Merge branch 'master' into v3-definition

2026-04-27 01:49:07 +00:00 · 2025-07-09 03:58:09 -05:00
parent aff5271291 5612670ee4
commit 3aa2d19c70
35 changed files with 1730 additions and 477 deletions
--- a/comfy_extras/nodes_audio.py
+++ b/comfy_extras/nodes_audio.py
@@ -133,14 +133,6 @@ def save_audio(self, audio, filename_prefix="ComfyUI", format="flac", prompt=Non
            if sample_rate != audio["sample_rate"]:
                waveform = torchaudio.functional.resample(waveform, audio["sample_rate"], sample_rate)

-        # Create in-memory WAV buffer
-        wav_buffer = io.BytesIO()
-        torchaudio.save(wav_buffer, waveform, sample_rate, format="WAV")
-        wav_buffer.seek(0)  # Rewind for reading
-
-        # Use PyAV to convert and add metadata
-        input_container = av.open(wav_buffer)
-
        # Create output with specified format
        output_buffer = io.BytesIO()
        output_container = av.open(output_buffer, mode='w', format=format)
@@ -150,7 +142,6 @@ def save_audio(self, audio, filename_prefix="ComfyUI", format="flac", prompt=Non
            output_container.metadata[key] = value

        # Set up the output stream with appropriate properties
-        input_container.streams.audio[0]
        if format == "opus":
            out_stream = output_container.add_stream("libopus", rate=sample_rate)
            if quality == "64k":
@@ -175,18 +166,16 @@ def save_audio(self, audio, filename_prefix="ComfyUI", format="flac", prompt=Non
        else: #format == "flac":
            out_stream = output_container.add_stream("flac", rate=sample_rate)

-
-        # Copy frames from input to output
-        for frame in input_container.decode(audio=0):
-            frame.pts = None  # Let PyAV handle timestamps
-            output_container.mux(out_stream.encode(frame))
+        frame = av.AudioFrame.from_ndarray(waveform.movedim(0, 1).reshape(1, -1).float().numpy(), format='flt', layout='mono' if waveform.shape[0] == 1 else 'stereo')
+        frame.sample_rate = sample_rate
+        frame.pts = 0
+        output_container.mux(out_stream.encode(frame))

        # Flush encoder
        output_container.mux(out_stream.encode(None))

        # Close containers
        output_container.close()
-        input_container.close()

        # Write the output to file
        output_buffer.seek(0)
--- a/comfy_extras/nodes_custom_sampler.py
+++ b/comfy_extras/nodes_custom_sampler.py
@@ -2,6 +2,8 @@ import math
 import comfy.samplers
 import comfy.sample
 from comfy.k_diffusion import sampling as k_diffusion_sampling
+from comfy.k_diffusion import sa_solver
+from comfy.comfy_types import IO, ComfyNodeABC, InputTypeDict
 import latent_preview
 import torch
 import comfy.utils
@@ -480,6 +482,89 @@ class SamplerDPMAdaptative:
                                                              "s_noise":s_noise })
        return (sampler, )

+
+class SamplerER_SDE(ComfyNodeABC):
+    @classmethod
+    def INPUT_TYPES(cls) -> InputTypeDict:
+        return {
+            "required": {
+                "solver_type": (IO.COMBO, {"options": ["ER-SDE", "Reverse-time SDE", "ODE"]}),
+                "max_stage": (IO.INT, {"default": 3, "min": 1, "max": 3}),
+                "eta": (
+                    IO.FLOAT,
+                    {"default": 1.0, "min": 0.0, "max": 100.0, "step": 0.01, "round": False, "tooltip": "Stochastic strength of reverse-time SDE.\nWhen eta=0, it reduces to deterministic ODE. This setting doesn't apply to ER-SDE solver type."},
+                ),
+                "s_noise": (IO.FLOAT, {"default": 1.0, "min": 0.0, "max": 100.0, "step": 0.01, "round": False}),
+            }
+        }
+
+    RETURN_TYPES = (IO.SAMPLER,)
+    CATEGORY = "sampling/custom_sampling/samplers"
+
+    FUNCTION = "get_sampler"
+
+    def get_sampler(self, solver_type, max_stage, eta, s_noise):
+        if solver_type == "ODE" or (solver_type == "Reverse-time SDE" and eta == 0):
+            eta = 0
+            s_noise = 0
+
+        def reverse_time_sde_noise_scaler(x):
+            return x ** (eta + 1)
+
+        if solver_type == "ER-SDE":
+            # Use the default one in sample_er_sde()
+            noise_scaler = None
+        else:
+            noise_scaler = reverse_time_sde_noise_scaler
+
+        sampler_name = "er_sde"
+        sampler = comfy.samplers.ksampler(sampler_name, {"s_noise": s_noise, "noise_scaler": noise_scaler, "max_stage": max_stage})
+        return (sampler,)
+
+
+class SamplerSASolver(ComfyNodeABC):
+    @classmethod
+    def INPUT_TYPES(cls) -> InputTypeDict:
+        return {
+            "required": {
+                "model": (IO.MODEL, {}),
+                "eta": (IO.FLOAT, {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01, "round": False},),
+                "sde_start_percent": (IO.FLOAT, {"default": 0.2, "min": 0.0, "max": 1.0, "step": 0.001},),
+                "sde_end_percent": (IO.FLOAT, {"default": 0.8, "min": 0.0, "max": 1.0, "step": 0.001},),
+                "s_noise": (IO.FLOAT, {"default": 1.0, "min": 0.0, "max": 100.0, "step": 0.01, "round": False},),
+                "predictor_order": (IO.INT, {"default": 3, "min": 1, "max": 6}),
+                "corrector_order": (IO.INT, {"default": 4, "min": 0, "max": 6}),
+                "use_pece": (IO.BOOLEAN, {}),
+                "simple_order_2": (IO.BOOLEAN, {}),
+            }
+        }
+
+    RETURN_TYPES = (IO.SAMPLER,)
+    CATEGORY = "sampling/custom_sampling/samplers"
+
+    FUNCTION = "get_sampler"
+
+    def get_sampler(self, model, eta, sde_start_percent, sde_end_percent, s_noise, predictor_order, corrector_order, use_pece, simple_order_2):
+        model_sampling = model.get_model_object("model_sampling")
+        start_sigma = model_sampling.percent_to_sigma(sde_start_percent)
+        end_sigma = model_sampling.percent_to_sigma(sde_end_percent)
+        tau_func = sa_solver.get_tau_interval_func(start_sigma, end_sigma, eta=eta)
+
+        sampler_name = "sa_solver"
+        sampler = comfy.samplers.ksampler(
+            sampler_name,
+            {
+                "tau_func": tau_func,
+                "s_noise": s_noise,
+                "predictor_order": predictor_order,
+                "corrector_order": corrector_order,
+                "use_pece": use_pece,
+                "simple_order_2": simple_order_2,
+            },
+        )
+        return (sampler,)
+
+
 class Noise_EmptyNoise:
    def __init__(self):
        self.seed = 0
@@ -609,8 +694,14 @@ class Guider_DualCFG(comfy.samplers.CFGGuider):
    def predict_noise(self, x, timestep, model_options={}, seed=None):
        negative_cond = self.conds.get("negative", None)
        middle_cond = self.conds.get("middle", None)
+        positive_cond = self.conds.get("positive", None)
+        if model_options.get("disable_cfg1_optimization", False) == False:
+            if math.isclose(self.cfg2, 1.0):
+                negative_cond = None
+                if math.isclose(self.cfg1, 1.0):
+                    middle_cond = None

-        out = comfy.samplers.calc_cond_batch(self.inner_model, [negative_cond, middle_cond, self.conds.get("positive", None)], x, timestep, model_options)
+        out = comfy.samplers.calc_cond_batch(self.inner_model, [negative_cond, middle_cond, positive_cond], x, timestep, model_options)
        return comfy.samplers.cfg_function(self.inner_model, out[1], out[0], self.cfg2, x, timestep, model_options=model_options, cond=middle_cond, uncond=negative_cond) + (out[2] - out[1]) * self.cfg1

 class DualCFGGuider:
@@ -781,6 +872,8 @@ NODE_CLASS_MAPPINGS = {
    "SamplerDPMPP_SDE": SamplerDPMPP_SDE,
    "SamplerDPMPP_2S_Ancestral": SamplerDPMPP_2S_Ancestral,
    "SamplerDPMAdaptative": SamplerDPMAdaptative,
+    "SamplerER_SDE": SamplerER_SDE,
+    "SamplerSASolver": SamplerSASolver,
    "SplitSigmas": SplitSigmas,
    "SplitSigmasDenoise": SplitSigmasDenoise,
    "FlipSigmas": FlipSigmas,
--- a/comfy_extras/nodes_images.py
+++ b/comfy_extras/nodes_images.py
@@ -583,6 +583,49 @@ class GetImageSize:

        return width, height, batch_size

+class ImageRotate:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": { "image": (IO.IMAGE,),
+                              "rotation": (["none", "90 degrees", "180 degrees", "270 degrees"],),
+                              }}
+    RETURN_TYPES = (IO.IMAGE,)
+    FUNCTION = "rotate"
+
+    CATEGORY = "image/transform"
+
+    def rotate(self, image, rotation):
+        rotate_by = 0
+        if rotation.startswith("90"):
+            rotate_by = 1
+        elif rotation.startswith("180"):
+            rotate_by = 2
+        elif rotation.startswith("270"):
+            rotate_by = 3
+
+        image = torch.rot90(image, k=rotate_by, dims=[2, 1])
+        return (image,)
+
+class ImageFlip:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": { "image": (IO.IMAGE,),
+                              "flip_method": (["x-axis: vertically", "y-axis: horizontally"],),
+                              }}
+    RETURN_TYPES = (IO.IMAGE,)
+    FUNCTION = "flip"
+
+    CATEGORY = "image/transform"
+
+    def flip(self, image, flip_method):
+        if flip_method.startswith("x"):
+            image = torch.flip(image, dims=[1])
+        elif flip_method.startswith("y"):
+            image = torch.flip(image, dims=[2])
+
+        return (image,)
+
+
 NODE_CLASS_MAPPINGS = {
    "ImageCrop": ImageCrop,
    "RepeatImageBatch": RepeatImageBatch,
@@ -594,4 +637,6 @@ NODE_CLASS_MAPPINGS = {
    "ImageStitch": ImageStitch,
    "ResizeAndPadImage": ResizeAndPadImage,
    "GetImageSize": GetImageSize,
+    "ImageRotate": ImageRotate,
+    "ImageFlip": ImageFlip,
 }
--- a/comfy_extras/nodes_load_3d.py
+++ b/comfy_extras/nodes_load_3d.py
@@ -5,6 +5,8 @@ import os
 from comfy.comfy_types import IO
 from comfy_api.input_impl import VideoFromFile

+from pathlib import Path
+

 def normalize_path(path):
    return path.replace('\\', '/')
@@ -16,7 +18,14 @@ class Load3D():

        os.makedirs(input_dir, exist_ok=True)

-        files = [normalize_path(os.path.join("3d", f)) for f in os.listdir(input_dir) if f.endswith(('.gltf', '.glb', '.obj', '.fbx', '.stl'))]
+        input_path = Path(input_dir)
+        base_path = Path(folder_paths.get_input_directory())
+
+        files = [
+            normalize_path(str(file_path.relative_to(base_path)))
+            for file_path in input_path.rglob("*")
+            if file_path.suffix.lower() in {'.gltf', '.glb', '.obj', '.fbx', '.stl'}
+        ]

        return {"required": {
            "model_file": (sorted(files), {"file_upload": True}),
@@ -61,7 +70,14 @@ class Load3DAnimation():

        os.makedirs(input_dir, exist_ok=True)

-        files = [normalize_path(os.path.join("3d", f)) for f in os.listdir(input_dir) if f.endswith(('.gltf', '.glb', '.fbx'))]
+        input_path = Path(input_dir)
+        base_path = Path(folder_paths.get_input_directory())
+
+        files = [
+            normalize_path(str(file_path.relative_to(base_path)))
+            for file_path in input_path.rglob("*")
+            if file_path.suffix.lower() in {'.gltf', '.glb', '.fbx'}
+        ]

        return {"required": {
            "model_file": (sorted(files), {"file_upload": True}),
--- a/comfy_extras/nodes_lt.py
+++ b/comfy_extras/nodes_lt.py
@@ -134,8 +134,8 @@ class LTXVAddGuide:
        _, num_keyframes = get_keyframe_idxs(cond)
        latent_count = latent_length - num_keyframes
        frame_idx = frame_idx if frame_idx >= 0 else max((latent_count - 1) * time_scale_factor + 1 + frame_idx, 0)
-        if guide_length > 1:
-            frame_idx = frame_idx // time_scale_factor * time_scale_factor # frame index must be divisible by 8
+        if guide_length > 1 and frame_idx != 0:
+            frame_idx = (frame_idx - 1) // time_scale_factor * time_scale_factor + 1 # frame index - 1 must be divisible by 8 or frame_idx == 0

        latent_idx = (frame_idx + time_scale_factor - 1) // time_scale_factor

@@ -144,7 +144,7 @@ class LTXVAddGuide:
    def add_keyframe_index(self, cond, frame_idx, guiding_latent, scale_factors):
        keyframe_idxs, _ = get_keyframe_idxs(cond)
        _, latent_coords = self._patchifier.patchify(guiding_latent)
-        pixel_coords = latent_to_pixel_coords(latent_coords, scale_factors, True)
+        pixel_coords = latent_to_pixel_coords(latent_coords, scale_factors, causal_fix=frame_idx == 0)  # we need the causal fix only if we're placing the new latents at index 0
        pixel_coords[:, 0] += frame_idx
        if keyframe_idxs is None:
            keyframe_idxs = pixel_coords
--- a/comfy_extras/nodes_mask.py
+++ b/comfy_extras/nodes_mask.py
@@ -152,7 +152,7 @@ class ImageColorToMask:
    def image_to_mask(self, image, color):
        temp = (torch.clamp(image, 0, 1.0) * 255.0).round().to(torch.int)
        temp = torch.bitwise_left_shift(temp[:,:,:,0], 16) + torch.bitwise_left_shift(temp[:,:,:,1], 8) + temp[:,:,:,2]
-        mask = torch.where(temp == color, 255, 0).float()
+        mask = torch.where(temp == color, 1.0, 0).float()
        return (mask,)

 class SolidMask:
--- a/comfy_extras/nodes_perpneg.py
+++ b/comfy_extras/nodes_perpneg.py
@@ -4,6 +4,7 @@ import comfy.sampler_helpers
 import comfy.samplers
 import comfy.utils
 import node_helpers
+import math

 def perp_neg(x, noise_pred_pos, noise_pred_neg, noise_pred_nocond, neg_scale, cond_scale):
    pos = noise_pred_pos - noise_pred_nocond
@@ -69,8 +70,23 @@ class Guider_PerpNeg(comfy.samplers.CFGGuider):
        negative_cond = self.conds.get("negative", None)
        empty_cond = self.conds.get("empty_negative_prompt", None)

-        (noise_pred_pos, noise_pred_neg, noise_pred_empty) = \
-            comfy.samplers.calc_cond_batch(self.inner_model, [positive_cond, negative_cond, empty_cond], x, timestep, model_options)
+        if model_options.get("disable_cfg1_optimization", False) == False:
+            if math.isclose(self.neg_scale, 0.0):
+                negative_cond = None
+                if math.isclose(self.cfg, 1.0):
+                    empty_cond = None
+
+        conds = [positive_cond, negative_cond, empty_cond]
+
+        out = comfy.samplers.calc_cond_batch(self.inner_model, conds, x, timestep, model_options)
+
+        # Apply pre_cfg_functions since sampling_function() is skipped
+        for fn in model_options.get("sampler_pre_cfg_function", []):
+            args = {"conds":conds, "conds_out": out, "cond_scale": self.cfg, "timestep": timestep,
+                    "input": x, "sigma": timestep, "model": self.inner_model, "model_options": model_options}
+            out = fn(args)
+
+        noise_pred_pos, noise_pred_neg, noise_pred_empty = out
        cfg_result = perp_neg(x, noise_pred_pos, noise_pred_neg, noise_pred_empty, self.neg_scale, self.cfg)

        # normally this would be done in cfg_function, but we skipped
@@ -82,6 +98,7 @@ class Guider_PerpNeg(comfy.samplers.CFGGuider):
                "denoised": cfg_result,
                "cond": positive_cond,
                "uncond": negative_cond,
+                "cond_scale": self.cfg,
                "model": self.inner_model,
                "uncond_denoised": noise_pred_neg,
                "cond_denoised": noise_pred_pos,
--- a/comfy_extras/nodes_slg.py
+++ b/comfy_extras/nodes_slg.py
@@ -78,7 +78,75 @@ class SkipLayerGuidanceDiT:

        return (m, )

+class SkipLayerGuidanceDiTSimple:
+    '''
+    Simple version of the SkipLayerGuidanceDiT node that only modifies the uncond pass.
+    '''
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": {"model": ("MODEL", ),
+                             "double_layers": ("STRING", {"default": "7, 8, 9", "multiline": False}),
+                             "single_layers": ("STRING", {"default": "7, 8, 9", "multiline": False}),
+                             "start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001}),
+                             "end_percent": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.001}),
+                                }}
+    RETURN_TYPES = ("MODEL",)
+    FUNCTION = "skip_guidance"
+    EXPERIMENTAL = True
+
+    DESCRIPTION = "Simple version of the SkipLayerGuidanceDiT node that only modifies the uncond pass."
+
+    CATEGORY = "advanced/guidance"
+
+    def skip_guidance(self, model, start_percent, end_percent, double_layers="", single_layers=""):
+        def skip(args, extra_args):
+            return args
+
+        model_sampling = model.get_model_object("model_sampling")
+        sigma_start = model_sampling.percent_to_sigma(start_percent)
+        sigma_end = model_sampling.percent_to_sigma(end_percent)
+
+        double_layers = re.findall(r'\d+', double_layers)
+        double_layers = [int(i) for i in double_layers]
+
+        single_layers = re.findall(r'\d+', single_layers)
+        single_layers = [int(i) for i in single_layers]
+
+        if len(double_layers) == 0 and len(single_layers) == 0:
+            return (model, )
+
+        def calc_cond_batch_function(args):
+            x = args["input"]
+            model = args["model"]
+            conds = args["conds"]
+            sigma = args["sigma"]
+
+            model_options = args["model_options"]
+            slg_model_options = model_options.copy()
+
+            for layer in double_layers:
+                slg_model_options = comfy.model_patcher.set_model_options_patch_replace(slg_model_options, skip, "dit", "double_block", layer)
+
+            for layer in single_layers:
+                slg_model_options = comfy.model_patcher.set_model_options_patch_replace(slg_model_options, skip, "dit", "single_block", layer)
+
+            cond, uncond = conds
+            sigma_ = sigma[0].item()
+            if sigma_ >= sigma_end and sigma_ <= sigma_start and uncond is not None:
+                cond_out, _ = comfy.samplers.calc_cond_batch(model, [cond, None], x, sigma, model_options)
+                _, uncond_out = comfy.samplers.calc_cond_batch(model, [None, uncond], x, sigma, slg_model_options)
+                out = [cond_out, uncond_out]
+            else:
+                out = comfy.samplers.calc_cond_batch(model, conds, x, sigma, model_options)
+
+            return out
+
+        m = model.clone()
+        m.set_model_sampler_calc_cond_batch_function(calc_cond_batch_function)
+
+        return (m, )

 NODE_CLASS_MAPPINGS = {
    "SkipLayerGuidanceDiT": SkipLayerGuidanceDiT,
+    "SkipLayerGuidanceDiTSimple": SkipLayerGuidanceDiTSimple,
 }
--- a/comfy_extras/nodes_tcfg.py
+++ b/comfy_extras/nodes_tcfg.py
@@ -0,0 +1,71 @@
+# TCFG: Tangential Damping Classifier-free Guidance - (arXiv: https://arxiv.org/abs/2503.18137)
+
+import torch
+
+from comfy.comfy_types import IO, ComfyNodeABC, InputTypeDict
+
+
+def score_tangential_damping(cond_score: torch.Tensor, uncond_score: torch.Tensor) -> torch.Tensor:
+    """Drop tangential components from uncond score to align with cond score."""
+    # (B, 1, ...)
+    batch_num = cond_score.shape[0]
+    cond_score_flat = cond_score.reshape(batch_num, 1, -1).float()
+    uncond_score_flat = uncond_score.reshape(batch_num, 1, -1).float()
+
+    # Score matrix A (B, 2, ...)
+    score_matrix = torch.cat((uncond_score_flat, cond_score_flat), dim=1)
+    try:
+        _, _, Vh = torch.linalg.svd(score_matrix, full_matrices=False)
+    except RuntimeError:
+        # Fallback to CPU
+        _, _, Vh = torch.linalg.svd(score_matrix.cpu(), full_matrices=False)
+
+    # Drop the tangential components
+    v1 = Vh[:, 0:1, :].to(uncond_score_flat.device)  # (B, 1, ...)
+    uncond_score_td = (uncond_score_flat @ v1.transpose(-2, -1)) * v1
+    return uncond_score_td.reshape_as(uncond_score).to(uncond_score.dtype)
+
+
+class TCFG(ComfyNodeABC):
+    @classmethod
+    def INPUT_TYPES(cls) -> InputTypeDict:
+        return {
+            "required": {
+                "model": (IO.MODEL, {}),
+            }
+        }
+
+    RETURN_TYPES = (IO.MODEL,)
+    RETURN_NAMES = ("patched_model",)
+    FUNCTION = "patch"
+
+    CATEGORY = "advanced/guidance"
+    DESCRIPTION = "TCFG – Tangential Damping CFG (2503.18137)\n\nRefine the uncond (negative) to align with the cond (positive) for improving quality."
+
+    def patch(self, model):
+        m = model.clone()
+
+        def tangential_damping_cfg(args):
+            #  Assume [cond, uncond, ...]
+            x = args["input"]
+            conds_out = args["conds_out"]
+            if len(conds_out) <= 1 or None in args["conds"][:2]:
+                # Skip when either cond or uncond is None
+                return conds_out
+            cond_pred = conds_out[0]
+            uncond_pred = conds_out[1]
+            uncond_td = score_tangential_damping(x - cond_pred, x - uncond_pred)
+            uncond_pred_td = x - uncond_td
+            return [cond_pred, uncond_pred_td] + conds_out[2:]
+
+        m.set_model_sampler_pre_cfg_function(tangential_damping_cfg)
+        return (m,)
+
+
+NODE_CLASS_MAPPINGS = {
+    "TCFG": TCFG,
+}
+
+NODE_DISPLAY_NAME_MAPPINGS = {
+    "TCFG": "Tangential Damping CFG",
+}
--- a/comfy_extras/nodes_train.py
+++ b/comfy_extras/nodes_train.py
@@ -75,7 +75,7 @@ class BiasDiff(torch.nn.Module):
        return self.passive_memory_usage()


-def load_and_process_images(image_files, input_dir, resize_method="None"):
+def load_and_process_images(image_files, input_dir, resize_method="None", w=None, h=None):
    """Utility function to load and process a list of images.

    Args:
@@ -90,7 +90,6 @@ def load_and_process_images(image_files, input_dir, resize_method="None"):
        raise ValueError("No valid images found in input")

    output_images = []
-    w, h = None, None

    for file in image_files:
        image_path = os.path.join(input_dir, file)
@@ -206,6 +205,103 @@ class LoadImageSetFromFolderNode:
        return (output_tensor,)


+class LoadImageTextSetFromFolderNode:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {
+            "required": {
+                "folder": (folder_paths.get_input_subfolders(), {"tooltip": "The folder to load images from."}),
+                "clip": (IO.CLIP, {"tooltip": "The CLIP model used for encoding the text."}),
+            },
+            "optional": {
+                "resize_method": (
+                    ["None", "Stretch", "Crop", "Pad"],
+                    {"default": "None"},
+                ),
+                "width": (
+                    IO.INT,
+                    {
+                        "default": -1,
+                        "min": -1,
+                        "max": 10000,
+                        "step": 1,
+                        "tooltip": "The width to resize the images to. -1 means use the original width.",
+                    },
+                ),
+                "height": (
+                    IO.INT,
+                    {
+                        "default": -1,
+                        "min": -1,
+                        "max": 10000,
+                        "step": 1,
+                        "tooltip": "The height to resize the images to. -1 means use the original height.",
+                    },
+                )
+            },
+        }
+
+    RETURN_TYPES = ("IMAGE", IO.CONDITIONING,)
+    FUNCTION = "load_images"
+    CATEGORY = "loaders"
+    EXPERIMENTAL = True
+    DESCRIPTION = "Loads a batch of images and caption from a directory for training."
+
+    def load_images(self, folder, clip, resize_method, width=None, height=None):
+        if clip is None:
+            raise RuntimeError("ERROR: clip input is invalid: None\n\nIf the clip is from a checkpoint loader node your checkpoint does not contain a valid clip or text encoder model.")
+
+        logging.info(f"Loading images from folder: {folder}")
+
+        sub_input_dir = os.path.join(folder_paths.get_input_directory(), folder)
+        valid_extensions = [".png", ".jpg", ".jpeg", ".webp"]
+
+        image_files = []
+        for item in os.listdir(sub_input_dir):
+            path = os.path.join(sub_input_dir, item)
+            if any(item.lower().endswith(ext) for ext in valid_extensions):
+                image_files.append(path)
+            elif os.path.isdir(path):
+                # Support kohya-ss/sd-scripts folder structure
+                repeat = 1
+                if item.split("_")[0].isdigit():
+                    repeat = int(item.split("_")[0])
+                image_files.extend([
+                    os.path.join(path, f) for f in os.listdir(path) if any(f.lower().endswith(ext) for ext in valid_extensions)
+                ] * repeat)
+
+        caption_file_path = [
+            f.replace(os.path.splitext(f)[1], ".txt")
+            for f in image_files
+        ]
+        captions = []
+        for caption_file in caption_file_path:
+            caption_path = os.path.join(sub_input_dir, caption_file)
+            if os.path.exists(caption_path):
+                with open(caption_path, "r", encoding="utf-8") as f:
+                    caption = f.read().strip()
+                    captions.append(caption)
+            else:
+                captions.append("")
+
+        width = width if width != -1 else None
+        height = height if height != -1 else None
+        output_tensor = load_and_process_images(image_files, sub_input_dir, resize_method, width, height)
+
+        logging.info(f"Loaded {len(output_tensor)} images from {sub_input_dir}.")
+
+        logging.info(f"Encoding captions from {sub_input_dir}.")
+        conditions = []
+        empty_cond = clip.encode_from_tokens_scheduled(clip.tokenize(""))
+        for text in captions:
+            if text == "":
+                conditions.append(empty_cond)
+            tokens = clip.tokenize(text)
+            conditions.extend(clip.encode_from_tokens_scheduled(tokens))
+        logging.info(f"Encoded {len(conditions)} captions from {sub_input_dir}.")
+        return (output_tensor, conditions)
+
+
 def draw_loss_graph(loss_map, steps):
    width, height = 500, 300
    img = Image.new("RGB", (width, height), "white")
@@ -381,6 +477,13 @@ class TrainLoraNode:

        latents = latents["samples"].to(dtype)
        num_images = latents.shape[0]
+        logging.info(f"Total Images: {num_images}, Total Captions: {len(positive)}")
+        if len(positive) == 1 and num_images > 1:
+            positive = positive * num_images
+        elif len(positive) != num_images:
+            raise ValueError(
+                f"Number of positive conditions ({len(positive)}) does not match number of images ({num_images})."
+            )

        with torch.inference_mode(False):
            lora_sd = {}
@@ -474,6 +577,7 @@ class TrainLoraNode:
            # setup models
            for m in find_all_highest_child_module_with_forward(mp.model.diffusion_model):
                patch(m)
+            mp.model.requires_grad_(False)
            comfy.model_management.load_models_gpu([mp], memory_required=1e20, force_full_load=True)

            # Setup sampler and guider like in test script
@@ -486,7 +590,6 @@ class TrainLoraNode:
            )
            guider = comfy_extras.nodes_custom_sampler.Guider_Basic(mp)
            guider.set_conds(positive)  # Set conditioning from input
-            ss = comfy_extras.nodes_custom_sampler.SamplerCustomAdvanced()

            # yoland: this currently resize to the first image in the dataset

@@ -495,21 +598,21 @@ class TrainLoraNode:
            try:
                for step in (pbar:=tqdm.trange(steps, desc="Training LoRA", smoothing=0.01, disable=not comfy.utils.PROGRESS_BAR_ENABLED)):
                    # Generate random sigma
-                    sigma = mp.model.model_sampling.percent_to_sigma(
+                    sigmas = [mp.model.model_sampling.percent_to_sigma(
                        torch.rand((1,)).item()
-                    )
-                    sigma = torch.tensor([sigma])
+                    ) for _ in range(min(batch_size, num_images))]
+                    sigmas = torch.tensor(sigmas)

                    noise = comfy_extras.nodes_custom_sampler.Noise_RandomNoise(step * 1000 + seed)

                    indices = torch.randperm(num_images)[:batch_size]
-                    ss.sample(
-                        noise, guider, train_sampler, sigma, {"samples": latents[indices].clone()}
-                    )
+                    batch_latent = latents[indices].clone()
+                    guider.set_conds([positive[i] for i in indices])  # Set conditioning from input
+                    guider.sample(noise.generate_noise({"samples": batch_latent}), batch_latent, train_sampler, sigmas, seed=noise.seed)
            finally:
                for m in mp.model.modules():
                    unpatch(m)
-            del ss, train_sampler, optimizer
+            del train_sampler, optimizer
            torch.cuda.empty_cache()

            for adapter in all_weight_adapters:
@@ -697,6 +800,7 @@ NODE_CLASS_MAPPINGS = {
    "SaveLoRANode": SaveLoRA,
    "LoraModelLoader": LoraModelLoader,
    "LoadImageSetFromFolderNode": LoadImageSetFromFolderNode,
+    "LoadImageTextSetFromFolderNode": LoadImageTextSetFromFolderNode,
    "LossGraphNode": LossGraphNode,
 }

@@ -705,5 +809,6 @@ NODE_DISPLAY_NAME_MAPPINGS = {
    "SaveLoRANode": "Save LoRA Weights",
    "LoraModelLoader": "Load LoRA Model",
    "LoadImageSetFromFolderNode": "Load Image Dataset from Folder",
+    "LoadImageTextSetFromFolderNode": "Load Image and Text Dataset from Folder",
    "LossGraphNode": "Plot Loss Graph",
 }