Merge remote-tracking branch 'origin/master' into pysssss/basic-glsl-shader-node

2026-03-07 14:19:57 +00:00 · 2026-01-28 10:50:12 -08:00
parent 3da0e9c367 1711020904
commit cee092213e
53 changed files with 4204 additions and 408 deletions
--- a/comfy_extras/nodes_custom_sampler.py
+++ b/comfy_extras/nodes_custom_sampler.py
@@ -701,7 +701,14 @@ class Noise_EmptyNoise:

    def generate_noise(self, input_latent):
        latent_image = input_latent["samples"]
-        return torch.zeros(latent_image.shape, dtype=latent_image.dtype, layout=latent_image.layout, device="cpu")
+        if latent_image.is_nested:
+            tensors = latent_image.unbind()
+            zeros = []
+            for t in tensors:
+                zeros.append(torch.zeros(t.shape, dtype=t.dtype, layout=t.layout, device="cpu"))
+            return comfy.nested_tensor.NestedTensor(zeros)
+        else:
+            return torch.zeros(latent_image.shape, dtype=latent_image.dtype, layout=latent_image.layout, device="cpu")


 class Noise_RandomNoise:
@@ -741,7 +748,7 @@ class SamplerCustom(io.ComfyNode):
        latent = latent_image
        latent_image = latent["samples"]
        latent = latent.copy()
-        latent_image = comfy.sample.fix_empty_latent_channels(model, latent_image)
+        latent_image = comfy.sample.fix_empty_latent_channels(model, latent_image, latent.get("downscale_ratio_spacial", None))
        latent["samples"] = latent_image

        if not add_noise:
@@ -760,6 +767,7 @@ class SamplerCustom(io.ComfyNode):
        samples = comfy.sample.sample_custom(model, noise, cfg, sampler, sigmas, positive, negative, latent_image, noise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar, seed=noise_seed)

        out = latent.copy()
+        out.pop("downscale_ratio_spacial", None)
        out["samples"] = samples
        if "x0" in x0_output:
            x0_out = model.model.process_latent_out(x0_output["x0"].cpu())
@@ -939,7 +947,7 @@ class SamplerCustomAdvanced(io.ComfyNode):
        latent = latent_image
        latent_image = latent["samples"]
        latent = latent.copy()
-        latent_image = comfy.sample.fix_empty_latent_channels(guider.model_patcher, latent_image)
+        latent_image = comfy.sample.fix_empty_latent_channels(guider.model_patcher, latent_image, latent.get("downscale_ratio_spacial", None))
        latent["samples"] = latent_image

        noise_mask = None
@@ -954,6 +962,7 @@ class SamplerCustomAdvanced(io.ComfyNode):
        samples = samples.to(comfy.model_management.intermediate_device())

        out = latent.copy()
+        out.pop("downscale_ratio_spacial", None)
        out["samples"] = samples
        if "x0" in x0_output:
            x0_out = guider.model_patcher.model.process_latent_out(x0_output["x0"].cpu())
--- a/comfy_extras/nodes_logic.py
+++ b/comfy_extras/nodes_logic.py
@@ -104,19 +104,23 @@ class CustomComboNode(io.ComfyNode):
            category="utils",
            is_experimental=True,
            inputs=[io.Combo.Input("choice", options=[])],
-            outputs=[io.String.Output()]
+            outputs=[
+                io.String.Output(display_name="STRING"),
+                io.Int.Output(display_name="INDEX"),
+            ],
+            accept_all_inputs=True,
        )

    @classmethod
-    def validate_inputs(cls, choice: io.Combo.Type) -> bool:
+    def validate_inputs(cls, choice: io.Combo.Type, index: int = 0, **kwargs) -> bool:
        # NOTE: DO NOT DO THIS unless you want to skip validation entirely on the node's inputs.
        # I am doing that here because the widgets (besides the combo dropdown) on this node are fully frontend defined.
        # I need to skip checking that the chosen combo option is in the options list, since those are defined by the user.
        return True

    @classmethod
-    def execute(cls, choice: io.Combo.Type) -> io.NodeOutput:
-        return io.NodeOutput(choice)
+    def execute(cls, choice: io.Combo.Type, index: int = 0, **kwargs) -> io.NodeOutput:
+        return io.NodeOutput(choice, index)


 class DCTestNode(io.ComfyNode):
--- a/comfy_extras/nodes_lora_debug.py
+++ b/comfy_extras/nodes_lora_debug.py
@@ -0,0 +1,79 @@
+import folder_paths
+import comfy.utils
+import comfy.sd
+
+
+class LoraLoaderBypass:
+    """
+    Apply LoRA in bypass mode without modifying base model weights.
+
+    Bypass mode computes: output = base_forward(x) + lora_path(x)
+    This is useful for training and when model weights are offloaded.
+    """
+
+    def __init__(self):
+        self.loaded_lora = None
+
+    @classmethod
+    def INPUT_TYPES(s):
+        return {
+            "required": {
+                "model": ("MODEL", {"tooltip": "The diffusion model the LoRA will be applied to."}),
+                "clip": ("CLIP", {"tooltip": "The CLIP model the LoRA will be applied to."}),
+                "lora_name": (folder_paths.get_filename_list("loras"), {"tooltip": "The name of the LoRA."}),
+                "strength_model": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step": 0.01, "tooltip": "How strongly to modify the diffusion model. This value can be negative."}),
+                "strength_clip": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step": 0.01, "tooltip": "How strongly to modify the CLIP model. This value can be negative."}),
+            }
+        }
+
+    RETURN_TYPES = ("MODEL", "CLIP")
+    OUTPUT_TOOLTIPS = ("The modified diffusion model.", "The modified CLIP model.")
+    FUNCTION = "load_lora"
+
+    CATEGORY = "loaders"
+    DESCRIPTION = "Apply LoRA in bypass mode. Unlike regular LoRA, this doesn't modify model weights - instead it injects the LoRA computation during forward pass. Useful for training scenarios."
+    EXPERIMENTAL = True
+
+    def load_lora(self, model, clip, lora_name, strength_model, strength_clip):
+        if strength_model == 0 and strength_clip == 0:
+            return (model, clip)
+
+        lora_path = folder_paths.get_full_path_or_raise("loras", lora_name)
+        lora = None
+        if self.loaded_lora is not None:
+            if self.loaded_lora[0] == lora_path:
+                lora = self.loaded_lora[1]
+            else:
+                self.loaded_lora = None
+
+        if lora is None:
+            lora = comfy.utils.load_torch_file(lora_path, safe_load=True)
+            self.loaded_lora = (lora_path, lora)
+
+        model_lora, clip_lora = comfy.sd.load_bypass_lora_for_models(model, clip, lora, strength_model, strength_clip)
+        return (model_lora, clip_lora)
+
+
+class LoraLoaderBypassModelOnly(LoraLoaderBypass):
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": { "model": ("MODEL",),
+                              "lora_name": (folder_paths.get_filename_list("loras"), ),
+                              "strength_model": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step": 0.01}),
+                              }}
+    RETURN_TYPES = ("MODEL",)
+    FUNCTION = "load_lora_model_only"
+
+    def load_lora_model_only(self, model, lora_name, strength_model):
+        return (self.load_lora(model, None, lora_name, strength_model, 0)[0],)
+
+
+NODE_CLASS_MAPPINGS = {
+    "LoraLoaderBypass": LoraLoaderBypass,
+    "LoraLoaderBypassModelOnly": LoraLoaderBypassModelOnly,
+}
+
+NODE_DISPLAY_NAME_MAPPINGS = {
+    "LoraLoaderBypass": "Load LoRA (Bypass) (For debugging)",
+    "LoraLoaderBypassModelOnly": "Load LoRA (Bypass, Model Only) (for debugging)",
+}
--- a/comfy_extras/nodes_lt.py
+++ b/comfy_extras/nodes_lt.py
@@ -223,11 +223,24 @@ class LTXVAddGuide(io.ComfyNode):
        return frame_idx, latent_idx

    @classmethod
-    def add_keyframe_index(cls, cond, frame_idx, guiding_latent, scale_factors):
+    def add_keyframe_index(cls, cond, frame_idx, guiding_latent, scale_factors, latent_downscale_factor=1):
        keyframe_idxs, _ = get_keyframe_idxs(cond)
        _, latent_coords = cls.PATCHIFIER.patchify(guiding_latent)
        pixel_coords = latent_to_pixel_coords(latent_coords, scale_factors, causal_fix=frame_idx == 0)  # we need the causal fix only if we're placing the new latents at index 0
        pixel_coords[:, 0] += frame_idx
+
+        # The following adjusts keyframe end positions for small grid IC-LoRA.
+        # After dilation, the small grid has the same size and position as the large grid,
+        # but each token encodes a larger image patch. We adjust the end position (not start)
+        # so that RoPE represents the correct middle point of each token.
+        # keyframe_idxs dims: (batch, spatial_dim [t,h,w], token_id, [start, end])
+        # We only adjust h,w (not t) in dim 1, and only end (not start) in dim 3.
+        spatial_end_offset = (latent_downscale_factor - 1) * torch.tensor(
+            scale_factors[1:],
+            device=pixel_coords.device,
+        ).view(1, -1, 1, 1)
+        pixel_coords[:, 1:, :, 1:] += spatial_end_offset.to(pixel_coords.dtype)
+
        if keyframe_idxs is None:
            keyframe_idxs = pixel_coords
        else:
@@ -235,12 +248,12 @@ class LTXVAddGuide(io.ComfyNode):
        return node_helpers.conditioning_set_values(cond, {"keyframe_idxs": keyframe_idxs})

    @classmethod
-    def append_keyframe(cls, positive, negative, frame_idx, latent_image, noise_mask, guiding_latent, strength, scale_factors, guide_mask=None, in_channels=128):
+    def append_keyframe(cls, positive, negative, frame_idx, latent_image, noise_mask, guiding_latent, strength, scale_factors, guide_mask=None, in_channels=128, latent_downscale_factor=1):
        if latent_image.shape[1] != in_channels or guiding_latent.shape[1] != in_channels:
            raise ValueError("Adding guide to a combined AV latent is not supported.")

-        positive = cls.add_keyframe_index(positive, frame_idx, guiding_latent, scale_factors)
-        negative = cls.add_keyframe_index(negative, frame_idx, guiding_latent, scale_factors)
+        positive = cls.add_keyframe_index(positive, frame_idx, guiding_latent, scale_factors, latent_downscale_factor)
+        negative = cls.add_keyframe_index(negative, frame_idx, guiding_latent, scale_factors, latent_downscale_factor)

        if guide_mask is not None:
            target_h = max(noise_mask.shape[3], guide_mask.shape[3])
--- a/comfy_extras/nodes_sd3.py
+++ b/comfy_extras/nodes_sd3.py
@@ -55,7 +55,7 @@ class EmptySD3LatentImage(io.ComfyNode):
    @classmethod
    def execute(cls, width, height, batch_size=1) -> io.NodeOutput:
        latent = torch.zeros([batch_size, 16, height // 8, width // 8], device=comfy.model_management.intermediate_device())
-        return io.NodeOutput({"samples":latent})
+        return io.NodeOutput({"samples": latent, "downscale_ratio_spacial": 8})

    generate = execute  # TODO: remove

--- a/comfy_extras/nodes_train.py
+++ b/comfy_extras/nodes_train.py
@@ -18,6 +18,7 @@ import comfy_extras.nodes_custom_sampler
 import folder_paths
 import node_helpers
 from comfy.weight_adapter import adapters, adapter_maps
+from comfy.weight_adapter.bypass import BypassInjectionManager
 from comfy_api.latest import ComfyExtension, io, ui
 from comfy.utils import ProgressBar

@@ -339,6 +340,11 @@ class TrainSampler(comfy.samplers.Sampler):
                self._train_step_multires_mode(model_wrap, cond, extra_args, noisegen, latent_image, dataset_size, pbar)

            if (i + 1) % self.grad_acc == 0:
+                for param_groups in self.optimizer.param_groups:
+                    for param in param_groups["params"]:
+                        if param.grad is None:
+                            continue
+                        param.grad.data = param.grad.data.to(param.data.dtype)
                self.optimizer.step()
                self.optimizer.zero_grad()
            ui_pbar.update(1)
@@ -498,9 +504,9 @@ def _prepare_latents_and_count(latents, dtype, bucket_mode):
        num_images = sum(t.shape[0] for t in latents)
        multi_res = False  # Not using multi_res path in bucket mode

-        logging.info(f"Bucket mode: {num_buckets} buckets, {num_images} total samples")
+        logging.debug(f"Bucket mode: {num_buckets} buckets, {num_images} total samples")
        for i, lat in enumerate(latents):
-            logging.info(f"  Bucket {i}: shape {lat.shape}")
+            logging.debug(f"  Bucket {i}: shape {lat.shape}")
        return latents, num_images, multi_res

    # Non-bucket mode
@@ -509,7 +515,7 @@ def _prepare_latents_and_count(latents, dtype, bucket_mode):
        latents = [t.to(dtype) for t in latents]
        for latent in latents:
            all_shapes.add(latent.shape)
-        logging.info(f"Latent shapes: {all_shapes}")
+        logging.debug(f"Latent shapes: {all_shapes}")
        if len(all_shapes) > 1:
            multi_res = True
        else:
@@ -545,7 +551,7 @@ def _validate_and_expand_conditioning(positive, num_images, bucket_mode):
    if bucket_mode:
        return positive  # Skip validation in bucket mode

-    logging.info(f"Total Images: {num_images}, Total Captions: {len(positive)}")
+    logging.debug(f"Total Images: {num_images}, Total Captions: {len(positive)}")
    if len(positive) == 1 and num_images > 1:
        return positive * num_images
    elif len(positive) != num_images:
@@ -596,6 +602,8 @@ def _create_weight_adapter(
    shape = module.weight.shape
    lora_params = {}

+    logging.debug(f"Creating weight adapter for {key} with shape {shape}")
+
    if len(shape) >= 2:
        alpha = float(existing_weights.get(f"{key}.alpha", 1.0))
        dora_scale = existing_weights.get(f"{key}.dora_scale", None)
@@ -690,6 +698,61 @@ def _setup_lora_adapters(mp, existing_weights, algorithm, lora_dtype, rank):
    return lora_sd, all_weight_adapters


+def _setup_lora_adapters_bypass(mp, existing_weights, algorithm, lora_dtype, rank):
+    """Setup LoRA adapters in bypass mode.
+
+    In bypass mode:
+        - Weight adapters (lora/lokr/oft) use bypass injection (forward hook)
+        - Bias/norm adapters (BiasDiff) still use weight wrapper (direct modification)
+
+    This is useful when the base model weights are quantized and cannot be
+    directly modified.
+
+    Args:
+        mp: Model patcher
+        existing_weights: Dict of existing LoRA weights
+        algorithm: Algorithm name for new adapters
+        lora_dtype: dtype for LoRA weights
+        rank: Rank for new LoRA adapters
+
+    Returns:
+        tuple: (lora_sd dict, all_weight_adapters list, bypass_manager)
+    """
+    lora_sd = {}
+    all_weight_adapters = []
+    bypass_manager = BypassInjectionManager()
+
+    for n, m in mp.model.named_modules():
+        if hasattr(m, "weight_function"):
+            if m.weight is not None:
+                adapter, params = _create_weight_adapter(
+                    m, n, existing_weights, algorithm, lora_dtype, rank
+                )
+                lora_sd.update(params)
+                all_weight_adapters.append(adapter)
+
+                key = f"{n}.weight"
+                # BiasDiff (for 1D weights like norm) uses weight wrapper, not bypass
+                # Only use bypass for adapters that have h() method (lora/lokr/oft)
+                if isinstance(adapter, BiasDiff):
+                    mp.add_weight_wrapper(key, adapter)
+                    logging.debug(f"[BypassMode] Added 1D weight adapter (weight wrapper) for {key}")
+                else:
+                    bypass_manager.add_adapter(key, adapter, strength=1.0)
+                    logging.debug(f"[BypassMode] Added weight adapter (bypass) for {key}")
+
+            if hasattr(m, "bias") and m.bias is not None:
+                # Bias adapters still use weight wrapper (bias is usually not quantized)
+                bias_adapter, bias_params = _create_bias_adapter(m, n, lora_dtype)
+                lora_sd.update(bias_params)
+                key = f"{n}.bias"
+                mp.add_weight_wrapper(key, bias_adapter)
+                all_weight_adapters.append(bias_adapter)
+                logging.debug(f"[BypassMode] Added bias adapter (weight wrapper) for {key}")
+
+    return lora_sd, all_weight_adapters, bypass_manager
+
+
 def _create_optimizer(optimizer_name, parameters, learning_rate):
    """Create optimizer based on name.

@@ -884,11 +947,13 @@ class TrainLoraNode(io.ComfyNode):
                    default=False,
                    tooltip="Enable resolution bucket mode. When enabled, expects pre-bucketed latents from ResolutionBucket node.",
                ),
+                io.Boolean.Input(
+                    "bypass_mode",
+                    default=False,
+                    tooltip="Enable bypass mode for training. When enabled, adapters are applied via forward hooks instead of weight modification. Useful for quantized models where weights cannot be directly modified.",
+                ),
            ],
            outputs=[
-                io.Model.Output(
-                    display_name="model", tooltip="Model with LoRA applied"
-                ),
                io.Custom("LORA_MODEL").Output(
                    display_name="lora", tooltip="LoRA weights"
                ),
@@ -919,6 +984,7 @@ class TrainLoraNode(io.ComfyNode):
        gradient_checkpointing,
        existing_lora,
        bucket_mode,
+        bypass_mode,
    ):
        # Extract scalars from lists (due to is_input_list=True)
        model = model[0]
@@ -936,6 +1002,7 @@ class TrainLoraNode(io.ComfyNode):
        gradient_checkpointing = gradient_checkpointing[0]
        existing_lora = existing_lora[0]
        bucket_mode = bucket_mode[0]
+        bypass_mode = bypass_mode[0]

        # Process latents based on mode
        if bucket_mode:
@@ -968,9 +1035,16 @@ class TrainLoraNode(io.ComfyNode):
            existing_weights, existing_steps = _load_existing_lora(existing_lora)

            # Setup LoRA adapters
-            lora_sd, all_weight_adapters = _setup_lora_adapters(
-                mp, existing_weights, algorithm, lora_dtype, rank
-            )
+            bypass_manager = None
+            if bypass_mode:
+                logging.debug("Using bypass mode for training")
+                lora_sd, all_weight_adapters, bypass_manager = _setup_lora_adapters_bypass(
+                    mp, existing_weights, algorithm, lora_dtype, rank
+                )
+            else:
+                lora_sd, all_weight_adapters = _setup_lora_adapters(
+                    mp, existing_weights, algorithm, lora_dtype, rank
+                )

            # Create optimizer and loss function
            optimizer = _create_optimizer(
@@ -1029,6 +1103,14 @@ class TrainLoraNode(io.ComfyNode):
            guider = TrainGuider(mp)
            guider.set_conds(positive)

+            # Inject bypass hooks if bypass mode is enabled
+            bypass_injections = None
+            if bypass_manager is not None:
+                bypass_injections = bypass_manager.create_injections(mp.model)
+                for injection in bypass_injections:
+                    injection.inject(mp)
+                logging.debug(f"[BypassMode] Injected {bypass_manager.get_hook_count()} bypass hooks")
+
            # Run training loop
            try:
                _run_training_loop(
@@ -1041,6 +1123,11 @@ class TrainLoraNode(io.ComfyNode):
                    multi_res,
                )
            finally:
+                # Eject bypass hooks if they were injected
+                if bypass_injections is not None:
+                    for injection in bypass_injections:
+                        injection.eject(mp)
+                    logging.debug("[BypassMode] Ejected bypass hooks")
                for m in mp.model.modules():
                    unpatch(m)
            del train_sampler, optimizer
@@ -1052,7 +1139,9 @@ class TrainLoraNode(io.ComfyNode):
            for param in lora_sd:
                lora_sd[param] = lora_sd[param].to(lora_dtype)

-            return io.NodeOutput(mp, lora_sd, loss_map, steps + existing_steps)
+            # mp in train node is highly specialized for training
+            # use it in inference will result in bad behavior so we don't return it
+            return io.NodeOutput(lora_sd, loss_map, steps + existing_steps)


 class LoraModelLoader(io.ComfyNode):#