Merge branch 'master' into deepme987/auto-register-node-replacements-json

feat: auto-register node replacements from custom node JSON files
2026-03-23 14:07:30 +00:00 · 2026-03-18 17:14:08 -07:00 · 2026-03-17 20:58:06 -07:00 · 2026-03-17 20:57:32 -07:00
25 changed files with 410 additions and 674 deletions
--- a/comfy/context_windows.py
+++ b/comfy/context_windows.py
@@ -93,50 +93,6 @@ class IndexListCallbacks:
        return {}


-def slice_cond(cond_value, window: IndexListContextWindow, x_in: torch.Tensor, device, temporal_dim: int, temporal_scale: int=1, temporal_offset: int=0, retain_index_list: list[int]=[]):
-    if not (hasattr(cond_value, "cond") and isinstance(cond_value.cond, torch.Tensor)):
-        return None
-    cond_tensor = cond_value.cond
-    if temporal_dim >= cond_tensor.ndim:
-        return None
-
-    cond_size = cond_tensor.size(temporal_dim)
-
-    if temporal_scale == 1:
-        expected_size = x_in.size(window.dim) - temporal_offset
-        if cond_size != expected_size:
-            return None
-
-    if temporal_offset == 0 and temporal_scale == 1:
-        sliced = window.get_tensor(cond_tensor, device, dim=temporal_dim, retain_index_list=retain_index_list)
-        return cond_value._copy_with(sliced)
-
-    # skip leading latent positions that have no corresponding conditioning (e.g. reference frames)
-    if temporal_offset > 0:
-        indices = [i - temporal_offset for i in window.index_list[temporal_offset:]]
-        indices = [i for i in indices if 0 <= i]
-    else:
-        indices = list(window.index_list)
-
-    if not indices:
-        return None
-
-    if temporal_scale > 1:
-        scaled = []
-        for i in indices:
-            for k in range(temporal_scale):
-                si = i * temporal_scale + k
-                if si < cond_size:
-                    scaled.append(si)
-        indices = scaled
-        if not indices:
-            return None
-
-    idx = tuple([slice(None)] * temporal_dim + [indices])
-    sliced = cond_tensor[idx].to(device)
-    return cond_value._copy_with(sliced)
-
-
@dataclass
 class ContextSchedule:
    name: str
@@ -221,17 +177,10 @@ class IndexListContextHandler(ContextHandlerABC):
                                    new_cond_item[cond_key] = result
                                    handled = True
                                    break
-                            if not handled and self._model is not None:
-                                result = self._model.resize_cond_for_context_window(
-                                    cond_key, cond_value, window, x_in, device,
-                                    retain_index_list=self.cond_retain_index_list)
-                                if result is not None:
-                                    new_cond_item[cond_key] = result
-                                    handled = True
                            if handled:
                                continue
                            if isinstance(cond_value, torch.Tensor):
-                                if (self.dim < cond_value.ndim and cond_value.size(self.dim) == x_in.size(self.dim)) or \
+                                if (self.dim < cond_value.ndim and cond_value(self.dim) == x_in.size(self.dim)) or \
                                   (cond_value.ndim < self.dim and cond_value.size(0) == x_in.size(self.dim)):
                                    new_cond_item[cond_key] = window.get_tensor(cond_value, device)
                            # Handle audio_embed (temporal dim is 1)
@@ -275,7 +224,6 @@ class IndexListContextHandler(ContextHandlerABC):
        return context_windows

    def execute(self, calc_cond_batch: Callable, model: BaseModel, conds: list[list[dict]], x_in: torch.Tensor, timestep: torch.Tensor, model_options: dict[str]):
-        self._model = model
        self.set_step(timestep, model_options)
        context_windows = self.get_context_windows(model, x_in, model_options)
        enumerated_context_windows = list(enumerate(context_windows))
--- a/comfy/ldm/flux/model.py
+++ b/comfy/ldm/flux/model.py
@@ -386,7 +386,7 @@ class Flux(nn.Module):
                    h = max(h, ref.shape[-2] + h_offset)
                    w = max(w, ref.shape[-1] + w_offset)

-                kontext, kontext_ids = self.process_img(ref, index=index, h_offset=h_offset, w_offset=w_offset, transformer_options=transformer_options)
+                kontext, kontext_ids = self.process_img(ref, index=index, h_offset=h_offset, w_offset=w_offset)
                img = torch.cat([img, kontext], dim=1)
                img_ids = torch.cat([img_ids, kontext_ids], dim=1)
                ref_num_tokens.append(kontext.shape[1])
--- a/comfy/ldm/lightricks/vae/causal_conv3d.py
+++ b/comfy/ldm/lightricks/vae/causal_conv3d.py
@@ -23,11 +23,6 @@ class CausalConv3d(nn.Module):
        self.in_channels = in_channels
        self.out_channels = out_channels

-        if isinstance(stride, int):
-            self.time_stride = stride
-        else:
-            self.time_stride = stride[0]
-
        kernel_size = (kernel_size, kernel_size, kernel_size)
        self.time_kernel_size = kernel_size[0]

@@ -63,23 +58,18 @@ class CausalConv3d(nn.Module):
        pieces = [ cached, x ]
        if is_end and not causal:
            pieces.append(x[:, :, -1:, :, :].repeat((1, 1, (self.time_kernel_size - 1) // 2, 1, 1)))
-        input_length = sum([piece.shape[2] for piece in pieces])
-        cache_length = (self.time_kernel_size - self.time_stride) + ((input_length - self.time_kernel_size) % self.time_stride)

        needs_caching = not is_end
-        if needs_caching and cache_length == 0:
-            self.temporal_cache_state[tid] = (x[:, :, :0, :, :], False)
+        if needs_caching and x.shape[2] >= self.time_kernel_size - 1:
            needs_caching = False
-        if needs_caching and x.shape[2] >= cache_length:
-            needs_caching = False
-            self.temporal_cache_state[tid] = (x[:, :, -cache_length:, :, :], False)
+            self.temporal_cache_state[tid] = (x[:, :, -(self.time_kernel_size - 1):, :, :], False)

        x = torch.cat(pieces, dim=2)
        del pieces
        del cached

        if needs_caching:
-            self.temporal_cache_state[tid] = (x[:, :, -cache_length:, :, :], False)
+            self.temporal_cache_state[tid] = (x[:, :, -(self.time_kernel_size - 1):, :, :], False)
        elif is_end:
            self.temporal_cache_state[tid] = (None, True)

--- a/comfy/ldm/lightricks/vae/causal_video_autoencoder.py
+++ b/comfy/ldm/lightricks/vae/causal_video_autoencoder.py
@@ -233,7 +233,10 @@ class Encoder(nn.Module):

        self.gradient_checkpointing = False

-    def _forward_chunk(self, sample: torch.FloatTensor) -> Optional[torch.FloatTensor]:
+    def forward_orig(self, sample: torch.FloatTensor) -> torch.FloatTensor:
+        r"""The forward method of the `Encoder` class."""
+
+        sample = patchify(sample, patch_size_hw=self.patch_size, patch_size_t=1)
        sample = self.conv_in(sample)

        checkpoint_fn = (
@@ -244,14 +247,10 @@ class Encoder(nn.Module):

        for down_block in self.down_blocks:
            sample = checkpoint_fn(down_block)(sample)
-            if sample is None or sample.shape[2] == 0:
-                return None

        sample = self.conv_norm_out(sample)
        sample = self.conv_act(sample)
        sample = self.conv_out(sample)
-        if sample is None or sample.shape[2] == 0:
-            return None

        if self.latent_log_var == "uniform":
            last_channel = sample[:, -1:, ...]
@@ -283,35 +282,9 @@ class Encoder(nn.Module):

        return sample

-    def forward_orig(self, sample: torch.FloatTensor, device=None) -> torch.FloatTensor:
-        r"""The forward method of the `Encoder` class."""
-
-        max_chunk_size = get_max_chunk_size(sample.device if device is None else device) * 2  # encoder is more memory-efficient than decoder
-        frame_size = sample[:, :, :1, :, :].numel() * sample.element_size()
-        frame_size = int(frame_size * (self.conv_in.out_channels / self.conv_in.in_channels))
-
-        outputs = []
-        samples = [sample[:, :, :1, :, :]]
-        if sample.shape[2] > 1:
-            chunk_t = max(2, max_chunk_size // frame_size)
-            if chunk_t < 4:
-                chunk_t = 2
-            elif chunk_t < 8:
-                chunk_t = 4
-            else:
-                chunk_t = (chunk_t // 8) * 8
-            samples += list(torch.split(sample[:, :, 1:, :, :], chunk_t, dim=2))
-        for chunk_idx, chunk in enumerate(samples):
-            if chunk_idx == len(samples) - 1:
-                mark_conv3d_ended(self)
-            chunk = patchify(chunk, patch_size_hw=self.patch_size, patch_size_t=1).to(device=device)
-            output = self._forward_chunk(chunk)
-            if output is not None:
-                outputs.append(output)
-
-        return torch_cat_if_needed(outputs, dim=2)
-
    def forward(self, *args, **kwargs):
+        #No encoder support so just flag the end so it doesnt use the cache.
+        mark_conv3d_ended(self)
        try:
            return self.forward_orig(*args, **kwargs)
        finally:
@@ -536,53 +509,6 @@ class Decoder(nn.Module):
        c, (ts, hs, ws), to = self._output_scale
        return (input_shape[0], c, input_shape[2] * ts - to, input_shape[3] * hs, input_shape[4] * ws)

-    def run_up(self, idx, sample_ref, ended, timestep_shift_scale, scaled_timestep, checkpoint_fn, output_buffer, output_offset, max_chunk_size):
-        sample = sample_ref[0]
-        sample_ref[0] = None
-        if idx >= len(self.up_blocks):
-            sample = self.conv_norm_out(sample)
-            if timestep_shift_scale is not None:
-                shift, scale = timestep_shift_scale
-                sample = sample * (1 + scale) + shift
-            sample = self.conv_act(sample)
-            if ended:
-                mark_conv3d_ended(self.conv_out)
-            sample = self.conv_out(sample, causal=self.causal)
-            if sample is not None and sample.shape[2] > 0:
-                sample = unpatchify(sample, patch_size_hw=self.patch_size, patch_size_t=1)
-                t = sample.shape[2]
-                output_buffer[:, :, output_offset[0]:output_offset[0] + t].copy_(sample)
-                output_offset[0] += t
-            return
-
-        up_block = self.up_blocks[idx]
-        if ended:
-            mark_conv3d_ended(up_block)
-        if self.timestep_conditioning and isinstance(up_block, UNetMidBlock3D):
-            sample = checkpoint_fn(up_block)(
-                sample, causal=self.causal, timestep=scaled_timestep
-            )
-        else:
-            sample = checkpoint_fn(up_block)(sample, causal=self.causal)
-
-        if sample is None or sample.shape[2] == 0:
-            return
-
-        total_bytes = sample.numel() * sample.element_size()
-        num_chunks = (total_bytes + max_chunk_size - 1) // max_chunk_size
-
-        if num_chunks == 1:
-            # when we are not chunking, detach our x so the callee can free it as soon as they are done
-            next_sample_ref = [sample]
-            del sample
-            self.run_up(idx + 1, next_sample_ref, ended, timestep_shift_scale, scaled_timestep, checkpoint_fn, output_buffer, output_offset, max_chunk_size)
-            return
-        else:
-            samples = torch.chunk(sample, chunks=num_chunks, dim=2)
-
-            for chunk_idx, sample1 in enumerate(samples):
-                self.run_up(idx + 1, [sample1], ended and chunk_idx == len(samples) - 1, timestep_shift_scale, scaled_timestep, checkpoint_fn, output_buffer, output_offset, max_chunk_size)
-
    def forward_orig(
        self,
        sample: torch.FloatTensor,
@@ -602,7 +528,6 @@ class Decoder(nn.Module):
        )

        timestep_shift_scale = None
-        scaled_timestep = None
        if self.timestep_conditioning:
            assert (
                timestep is not None
@@ -639,7 +564,54 @@ class Decoder(nn.Module):

        max_chunk_size = get_max_chunk_size(sample.device)

-        self.run_up(0, [sample], True, timestep_shift_scale, scaled_timestep, checkpoint_fn, output_buffer, output_offset, max_chunk_size)
+        def run_up(idx, sample_ref, ended):
+            sample = sample_ref[0]
+            sample_ref[0] = None
+            if idx >= len(self.up_blocks):
+                sample = self.conv_norm_out(sample)
+                if timestep_shift_scale is not None:
+                    shift, scale = timestep_shift_scale
+                    sample = sample * (1 + scale) + shift
+                sample = self.conv_act(sample)
+                if ended:
+                    mark_conv3d_ended(self.conv_out)
+                sample = self.conv_out(sample, causal=self.causal)
+                if sample is not None and sample.shape[2] > 0:
+                    sample = unpatchify(sample, patch_size_hw=self.patch_size, patch_size_t=1)
+                    t = sample.shape[2]
+                    output_buffer[:, :, output_offset[0]:output_offset[0] + t].copy_(sample)
+                    output_offset[0] += t
+                return
+
+            up_block = self.up_blocks[idx]
+            if (ended):
+                mark_conv3d_ended(up_block)
+            if self.timestep_conditioning and isinstance(up_block, UNetMidBlock3D):
+                sample = checkpoint_fn(up_block)(
+                    sample, causal=self.causal, timestep=scaled_timestep
+                )
+            else:
+                sample = checkpoint_fn(up_block)(sample, causal=self.causal)
+
+            if sample is None or sample.shape[2] == 0:
+                return
+
+            total_bytes = sample.numel() * sample.element_size()
+            num_chunks = (total_bytes + max_chunk_size - 1) // max_chunk_size
+
+            if num_chunks == 1:
+                # when we are not chunking, detach our x so the callee can free it as soon as they are done
+                next_sample_ref = [sample]
+                del sample
+                run_up(idx + 1, next_sample_ref, ended)
+                return
+            else:
+                samples = torch.chunk(sample, chunks=num_chunks, dim=2)
+
+                for chunk_idx, sample1 in enumerate(samples):
+                    run_up(idx + 1, [sample1], ended and chunk_idx == len(samples) - 1)
+
+        run_up(0, [sample], True)

        return output_buffer

@@ -765,25 +737,12 @@ class SpaceToDepthDownsample(nn.Module):
            causal=True,
            spatial_padding_mode=spatial_padding_mode,
        )
-        self.temporal_cache_state = {}

    def forward(self, x, causal: bool = True):
-        tid = threading.get_ident()
-        cached, pad_first, cached_x, cached_input = self.temporal_cache_state.get(tid, (None, True, None, None))
-        if cached_input is not None:
-            x = torch_cat_if_needed([cached_input, x], dim=2)
-            cached_input = None
-
-        if self.stride[0] == 2 and pad_first:
+        if self.stride[0] == 2:
            x = torch.cat(
                [x[:, :, :1, :, :], x], dim=2
            )  # duplicate first frames for padding
-            pad_first = False
-
-        if x.shape[2] < self.stride[0]:
-            cached_input = x
-            self.temporal_cache_state[tid] = (cached, pad_first, cached_x, cached_input)
-            return None

        # skip connection
        x_in = rearrange(
@@ -798,26 +757,15 @@ class SpaceToDepthDownsample(nn.Module):

        # conv
        x = self.conv(x, causal=causal)
-        if self.stride[0] == 2 and x.shape[2] == 1:
-            if cached_x is not None:
-                x = torch_cat_if_needed([cached_x, x], dim=2)
-                cached_x = None
-            else:
-                cached_x = x
-                x = None
+        x = rearrange(
+            x,
+            "b c (d p1) (h p2) (w p3) -> b (c p1 p2 p3) d h w",
+            p1=self.stride[0],
+            p2=self.stride[1],
+            p3=self.stride[2],
+        )

-        if x is not None:
-            x = rearrange(
-                x,
-                "b c (d p1) (h p2) (w p3) -> b (c p1 p2 p3) d h w",
-                p1=self.stride[0],
-                p2=self.stride[1],
-                p3=self.stride[2],
-            )
-
-        cached = add_exchange_cache(x, cached, x_in, dim=2)
-
-        self.temporal_cache_state[tid] = (cached, pad_first, cached_x, cached_input)
+        x = x + x_in

        return x

@@ -1150,8 +1098,6 @@ class processor(nn.Module):
        return (x - self.get_buffer("mean-of-means").view(1, -1, 1, 1, 1).to(x)) / self.get_buffer("std-of-means").view(1, -1, 1, 1, 1).to(x)

 class VideoVAE(nn.Module):
-    comfy_has_chunked_io = True
-
    def __init__(self, version=0, config=None):
        super().__init__()

@@ -1294,9 +1240,11 @@ class VideoVAE(nn.Module):
            }
        return config

-    def encode(self, x, device=None):
-        x = x[:, :, :max(1, 1 + ((x.shape[2] - 1) // 8) * 8), :, :]
-        means, logvar = torch.chunk(self.encoder(x, device=device), 2, dim=1)
+    def encode(self, x):
+        frames_count = x.shape[2]
+        if ((frames_count - 1) % 8) != 0:
+            raise ValueError("Invalid number of frames: Encode input must have 1 + 8 * x frames (e.g., 1, 9, 17, ...). Please check your input.")
+        means, logvar = torch.chunk(self.encoder(x), 2, dim=1)
        return self.per_channel_statistics.normalize(means)

    def decode_output_shape(self, input_shape):
--- a/comfy/ldm/wan/vae.py
+++ b/comfy/ldm/wan/vae.py
@@ -360,43 +360,6 @@ class Decoder3d(nn.Module):
            RMS_norm(out_dim, images=False), nn.SiLU(),
            CausalConv3d(out_dim, output_channels, 3, padding=1))

-    def run_up(self, layer_idx, x_ref, feat_cache, feat_idx, out_chunks):
-        x = x_ref[0]
-        x_ref[0] = None
-        if layer_idx >= len(self.upsamples):
-            for layer in self.head:
-                if isinstance(layer, CausalConv3d) and feat_cache is not None:
-                    cache_x = x[:, :, -CACHE_T:, :, :]
-                    x = layer(x, feat_cache[feat_idx[0]])
-                    feat_cache[feat_idx[0]] = cache_x
-                    feat_idx[0] += 1
-                else:
-                    x = layer(x)
-            out_chunks.append(x)
-            return
-
-        layer = self.upsamples[layer_idx]
-        if feat_cache is not None:
-            x = layer(x, feat_cache, feat_idx)
-        else:
-            x = layer(x)
-
-        if isinstance(layer, Resample) and layer.mode == 'upsample3d' and x.shape[2] > 2:
-            for frame_idx in range(0, x.shape[2], 2):
-                self.run_up(
-                    layer_idx + 1,
-                    [x[:, :, frame_idx:frame_idx + 2, :, :]],
-                    feat_cache,
-                    feat_idx.copy(),
-                    out_chunks,
-                )
-            del x
-            return
-
-        next_x_ref = [x]
-        del x
-        self.run_up(layer_idx + 1, next_x_ref, feat_cache, feat_idx, out_chunks)
-
    def forward(self, x, feat_cache=None, feat_idx=[0]):
        ## conv1
        if feat_cache is not None:
@@ -417,7 +380,42 @@ class Decoder3d(nn.Module):

        out_chunks = []

-        self.run_up(0, [x], feat_cache, feat_idx, out_chunks)
+        def run_up(layer_idx, x_ref, feat_idx):
+            x = x_ref[0]
+            x_ref[0] = None
+            if layer_idx >= len(self.upsamples):
+                for layer in self.head:
+                    if isinstance(layer, CausalConv3d) and feat_cache is not None:
+                        cache_x = x[:, :, -CACHE_T:, :, :]
+                        x = layer(x, feat_cache[feat_idx[0]])
+                        feat_cache[feat_idx[0]] = cache_x
+                        feat_idx[0] += 1
+                    else:
+                        x = layer(x)
+                out_chunks.append(x)
+                return
+
+            layer = self.upsamples[layer_idx]
+            if isinstance(layer, Resample) and layer.mode == 'upsample3d' and x.shape[2] > 1:
+                for frame_idx in range(x.shape[2]):
+                    run_up(
+                        layer_idx,
+                        [x[:, :, frame_idx:frame_idx + 1, :, :]],
+                        feat_idx.copy(),
+                    )
+                del x
+                return
+
+            if feat_cache is not None:
+                x = layer(x, feat_cache, feat_idx)
+            else:
+                x = layer(x)
+
+            next_x_ref = [x]
+            del x
+            run_up(layer_idx + 1, next_x_ref, feat_idx)
+
+        run_up(0, [x], feat_idx)
        return out_chunks


--- a/comfy/memory_management.py
+++ b/comfy/memory_management.py
@@ -39,10 +39,7 @@ def read_tensor_file_slice_into(tensor, destination):
    if (destination.device.type != "cpu"
            or file_obj is None
            or threading.get_ident() != info.thread_id
-            or destination.numel() * destination.element_size() < info.size
-            or tensor.numel() * tensor.element_size() != info.size
-            or tensor.storage_offset() != 0
-            or not tensor.is_contiguous()):
+            or destination.numel() * destination.element_size() < info.size):
        return False

    if info.size == 0:
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@@ -21,7 +21,6 @@ import comfy.ldm.hunyuan3dv2_1.hunyuandit
 import torch
 import logging
 import comfy.ldm.lightricks.av_model
-import comfy.context_windows
 from comfy.ldm.modules.diffusionmodules.openaimodel import UNetModel, Timestep
 from comfy.ldm.cascade.stage_c import StageC
 from comfy.ldm.cascade.stage_b import StageB
@@ -286,12 +285,6 @@ class BaseModel(torch.nn.Module):
            return data
        return None

-    def resize_cond_for_context_window(self, cond_key, cond_value, window, x_in, device, retain_index_list=[]):
-        """Override in subclasses to handle model-specific cond slicing for context windows.
-        Return a sliced cond object, or None to fall through to default handling.
-        Use comfy.context_windows.slice_cond() for common cases."""
-        return None
-
    def extra_conds(self, **kwargs):
        out = {}
        concat_cond = self.concat_cond(**kwargs)
@@ -937,10 +930,9 @@ class LongCatImage(Flux):
        transformer_options = transformer_options.copy()
        rope_opts = transformer_options.get("rope_options", {})
        rope_opts = dict(rope_opts)
-        pe_len = float(c_crossattn.shape[1]) if c_crossattn is not None else 512.0
        rope_opts.setdefault("shift_t", 1.0)
-        rope_opts.setdefault("shift_y", pe_len)
-        rope_opts.setdefault("shift_x", pe_len)
+        rope_opts.setdefault("shift_y", 512.0)
+        rope_opts.setdefault("shift_x", 512.0)
        transformer_options["rope_options"] = rope_opts
        return super()._apply_model(x, t, c_concat, c_crossattn, control, transformer_options, **kwargs)

@@ -1383,11 +1375,6 @@ class WAN21_Vace(WAN21):
        out['vace_strength'] = comfy.conds.CONDConstant(vace_strength)
        return out

-    def resize_cond_for_context_window(self, cond_key, cond_value, window, x_in, device, retain_index_list=[]):
-        if cond_key == "vace_context":
-            return comfy.context_windows.slice_cond(cond_value, window, x_in, device, temporal_dim=3, retain_index_list=retain_index_list)
-        return super().resize_cond_for_context_window(cond_key, cond_value, window, x_in, device, retain_index_list=retain_index_list)
-
 class WAN21_Camera(WAN21):
    def __init__(self, model_config, model_type=ModelType.FLOW, image_to_video=False, device=None):
        super(WAN21, self).__init__(model_config, model_type, device=device, unet_model=comfy.ldm.wan.model.CameraWanModel)
@@ -1440,11 +1427,6 @@ class WAN21_HuMo(WAN21):

        return out

-    def resize_cond_for_context_window(self, cond_key, cond_value, window, x_in, device, retain_index_list=[]):
-        if cond_key == "audio_embed":
-            return comfy.context_windows.slice_cond(cond_value, window, x_in, device, temporal_dim=1)
-        return super().resize_cond_for_context_window(cond_key, cond_value, window, x_in, device, retain_index_list=retain_index_list)
-
 class WAN22_Animate(WAN21):
    def __init__(self, model_config, model_type=ModelType.FLOW, image_to_video=False, device=None):
        super(WAN21, self).__init__(model_config, model_type, device=device, unet_model=comfy.ldm.wan.model_animate.AnimateWanModel)
@@ -1462,13 +1444,6 @@ class WAN22_Animate(WAN21):
            out['pose_latents'] = comfy.conds.CONDRegular(self.process_latent_in(pose_latents))
        return out

-    def resize_cond_for_context_window(self, cond_key, cond_value, window, x_in, device, retain_index_list=[]):
-        if cond_key == "face_pixel_values":
-            return comfy.context_windows.slice_cond(cond_value, window, x_in, device, temporal_dim=2, temporal_scale=4, temporal_offset=1)
-        if cond_key == "pose_latents":
-            return comfy.context_windows.slice_cond(cond_value, window, x_in, device, temporal_dim=2, temporal_offset=1)
-        return super().resize_cond_for_context_window(cond_key, cond_value, window, x_in, device, retain_index_list=retain_index_list)
-
 class WAN22_S2V(WAN21):
    def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
        super(WAN21, self).__init__(model_config, model_type, device=device, unet_model=comfy.ldm.wan.model.WanModel_S2V)
@@ -1505,11 +1480,6 @@ class WAN22_S2V(WAN21):
            out['reference_motion'] = reference_motion.shape
        return out

-    def resize_cond_for_context_window(self, cond_key, cond_value, window, x_in, device, retain_index_list=[]):
-        if cond_key == "audio_embed":
-            return comfy.context_windows.slice_cond(cond_value, window, x_in, device, temporal_dim=1)
-        return super().resize_cond_for_context_window(cond_key, cond_value, window, x_in, device, retain_index_list=retain_index_list)
-
 class WAN22(WAN21):
    def __init__(self, model_config, model_type=ModelType.FLOW, image_to_video=False, device=None):
        super(WAN21, self).__init__(model_config, model_type, device=device, unet_model=comfy.ldm.wan.model.WanModel)
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -1003,7 +1003,7 @@ def text_encoder_offload_device():
 def text_encoder_device():
    if args.gpu_only:
        return get_torch_device()
-    elif vram_state in (VRAMState.HIGH_VRAM, VRAMState.NORMAL_VRAM) or comfy.memory_management.aimdo_enabled:
+    elif vram_state in (VRAMState.HIGH_VRAM, VRAMState.NORMAL_VRAM, VRAMState.SHARED) or comfy.memory_management.aimdo_enabled:
        if should_use_fp16(prioritize_performance=False):
            return get_torch_device()
        else:
--- a/comfy/sample.py
+++ b/comfy/sample.py
@@ -8,12 +8,12 @@ import comfy.nested_tensor

 def prepare_noise_inner(latent_image, generator, noise_inds=None):
    if noise_inds is None:
-        return torch.randn(latent_image.size(), dtype=torch.float32, layout=latent_image.layout, generator=generator, device="cpu").to(dtype=latent_image.dtype)
+        return torch.randn(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, generator=generator, device="cpu")

    unique_inds, inverse = np.unique(noise_inds, return_inverse=True)
    noises = []
    for i in range(unique_inds[-1]+1):
-        noise = torch.randn([1] + list(latent_image.size())[1:], dtype=torch.float32, layout=latent_image.layout, generator=generator, device="cpu").to(dtype=latent_image.dtype)
+        noise = torch.randn([1] + list(latent_image.size())[1:], dtype=latent_image.dtype, layout=latent_image.layout, generator=generator, device="cpu")
        if i in unique_inds:
            noises.append(noise)
    noises = [noises[i] for i in inverse]
--- a/comfy/samplers.py
+++ b/comfy/samplers.py
@@ -985,8 +985,8 @@ class CFGGuider:
        self.inner_model, self.conds, self.loaded_models = comfy.sampler_helpers.prepare_sampling(self.model_patcher, noise.shape, self.conds, self.model_options)
        device = self.model_patcher.load_device

-        noise = noise.to(device=device, dtype=torch.float32)
-        latent_image = latent_image.to(device=device, dtype=torch.float32)
+        noise = noise.to(device)
+        latent_image = latent_image.to(device)
        sigmas = sigmas.to(device)
        cast_to_load_options(self.model_options, device=device, dtype=self.model_patcher.model_dtype())

@@ -1028,7 +1028,6 @@ class CFGGuider:
                denoise_mask, _ = comfy.utils.pack_latents(denoise_masks)
            else:
                denoise_mask = denoise_masks[0]
-            denoise_mask = denoise_mask.float()

        self.conds = {}
        for k in self.original_conds:
--- a/comfy/sd.py
+++ b/comfy/sd.py
@@ -953,7 +953,7 @@ class VAE:

            # Pre-allocate output for VAEs that support direct buffer writes
            preallocated = False
-            if getattr(self.first_stage_model, 'comfy_has_chunked_io', False):
+            if hasattr(self.first_stage_model, 'decode_output_shape'):
                pixel_samples = torch.empty(self.first_stage_model.decode_output_shape(samples_in.shape), device=self.output_device, dtype=self.vae_output_dtype())
                preallocated = True

@@ -978,7 +978,6 @@ class VAE:
            do_tile = True

        if do_tile:
-            comfy.model_management.soft_empty_cache()
            dims = samples_in.ndim - 2
            if dims == 1 or self.extra_1d_channel is not None:
                pixel_samples = self.decode_tiled_1d(samples_in)
@@ -1039,13 +1038,8 @@ class VAE:
            batch_number = max(1, batch_number)
            samples = None
            for x in range(0, pixel_samples.shape[0], batch_number):
-                pixels_in = self.process_input(pixel_samples[x:x + batch_number]).to(self.vae_dtype)
-                if getattr(self.first_stage_model, 'comfy_has_chunked_io', False):
-                    out = self.first_stage_model.encode(pixels_in, device=self.device)
-                else:
-                    pixels_in = pixels_in.to(self.device)
-                    out = self.first_stage_model.encode(pixels_in)
-                out = out.to(self.output_device).to(dtype=self.vae_output_dtype())
+                pixels_in = self.process_input(pixel_samples[x:x + batch_number]).to(self.vae_dtype).to(self.device)
+                out = self.first_stage_model.encode(pixels_in).to(self.output_device).to(dtype=self.vae_output_dtype())
                if samples is None:
                    samples = torch.empty((pixel_samples.shape[0],) + tuple(out.shape[1:]), device=self.output_device, dtype=self.vae_output_dtype())
                samples[x:x + batch_number] = out
@@ -1060,7 +1054,6 @@ class VAE:
            do_tile = True

        if do_tile:
-            comfy.model_management.soft_empty_cache()
            if self.latent_dim == 3:
                tile = 256
                overlap = tile // 4
--- a/comfy/text_encoders/llama.py
+++ b/comfy/text_encoders/llama.py
@@ -1028,19 +1028,12 @@ class Qwen25_7BVLI(BaseLlama, BaseGenerate, torch.nn.Module):
                grid = e.get("extra", None)
                start = e.get("index")
                if position_ids is None:
-                    position_ids = torch.ones((3, embeds.shape[1]), device=embeds.device, dtype=torch.long)
+                    position_ids = torch.zeros((3, embeds.shape[1]), device=embeds.device)
                    position_ids[:, :start] = torch.arange(0, start, device=embeds.device)
                end = e.get("size") + start
                len_max = int(grid.max()) // 2
                start_next = len_max + start
-                if attention_mask is not None:
-                    # Assign compact sequential positions to attended tokens only,
-                    # skipping over padding so post-padding tokens aren't inflated.
-                    after_mask = attention_mask[0, end:]
-                    text_positions = after_mask.cumsum(0) - 1 + start_next + offset
-                    position_ids[:, end:] = torch.where(after_mask.bool(), text_positions, position_ids[0, end:])
-                else:
-                    position_ids[:, end:] = torch.arange(start_next + offset, start_next + (embeds.shape[1] - end) + offset, device=embeds.device)
+                position_ids[:, end:] = torch.arange(start_next + offset, start_next + (embeds.shape[1] - end) + offset, device=embeds.device)
                position_ids[0, start:end] = start + offset
                max_d = int(grid[0][1]) // 2
                position_ids[1, start:end] = torch.arange(start + offset, start + max_d + offset, device=embeds.device).unsqueeze(1).repeat(1, math.ceil((end - start) / max_d)).flatten(0)[:end - start]
--- a/comfy/text_encoders/longcat_image.py
+++ b/comfy/text_encoders/longcat_image.py
@@ -64,13 +64,7 @@ class LongCatImageBaseTokenizer(Qwen25_7BVLITokenizer):
        return [output]


-IMAGE_PAD_TOKEN_ID = 151655
-
 class LongCatImageTokenizer(sd1_clip.SD1Tokenizer):
-    T2I_PREFIX = "<|im_start|>system\nAs an image captioning expert, generate a descriptive text prompt based on an image content, suitable for input to a text-to-image model.<|im_end|>\n<|im_start|>user\n"
-    EDIT_PREFIX = "<|im_start|>system\nAs an image editing expert, first analyze the content and attributes of the input image(s). Then, based on the user's editing instructions, clearly and precisely determine how to modify the given image(s), ensuring that only the specified parts are altered and all other aspects remain consistent with the original(s).<|im_end|>\n<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|>"
-    SUFFIX = "<|im_end|>\n<|im_start|>assistant\n"
-
    def __init__(self, embedding_directory=None, tokenizer_data={}):
        super().__init__(
            embedding_directory=embedding_directory,
@@ -78,8 +72,10 @@ class LongCatImageTokenizer(sd1_clip.SD1Tokenizer):
            name="qwen25_7b",
            tokenizer=LongCatImageBaseTokenizer,
        )
+        self.longcat_template_prefix = "<|im_start|>system\nAs an image captioning expert, generate a descriptive text prompt based on an image content, suitable for input to a text-to-image model.<|im_end|>\n<|im_start|>user\n"
+        self.longcat_template_suffix = "<|im_end|>\n<|im_start|>assistant\n"

-    def tokenize_with_weights(self, text, return_word_ids=False, images=None, **kwargs):
+    def tokenize_with_weights(self, text, return_word_ids=False, **kwargs):
        skip_template = False
        if text.startswith("<|im_start|>"):
            skip_template = True
@@ -94,14 +90,11 @@ class LongCatImageTokenizer(sd1_clip.SD1Tokenizer):
                text, return_word_ids=return_word_ids, disable_weights=True, **kwargs
            )
        else:
-            has_images = images is not None and len(images) > 0
-            template_prefix = self.EDIT_PREFIX if has_images else self.T2I_PREFIX
-
            prefix_ids = base_tok.tokenizer(
-                template_prefix, add_special_tokens=False
+                self.longcat_template_prefix, add_special_tokens=False
            )["input_ids"]
            suffix_ids = base_tok.tokenizer(
-                self.SUFFIX, add_special_tokens=False
+                self.longcat_template_suffix, add_special_tokens=False
            )["input_ids"]

            prompt_tokens = base_tok.tokenize_with_weights(
@@ -113,14 +106,6 @@ class LongCatImageTokenizer(sd1_clip.SD1Tokenizer):
            suffix_pairs = [(t, 1.0) for t in suffix_ids]

            combined = prefix_pairs + prompt_pairs + suffix_pairs
-
-            if has_images:
-                embed_count = 0
-                for i in range(len(combined)):
-                    if combined[i][0] == IMAGE_PAD_TOKEN_ID and embed_count < len(images):
-                        combined[i] = ({"type": "image", "data": images[embed_count], "original_type": "image"}, combined[i][1])
-                        embed_count += 1
-
            tokens = {"qwen25_7b": [combined]}

        return tokens
--- a/comfy/text_encoders/qwen_vl.py
+++ b/comfy/text_encoders/qwen_vl.py
@@ -425,7 +425,4 @@ class Qwen2VLVisionTransformer(nn.Module):
            hidden_states = block(hidden_states, position_embeddings, cu_seqlens_now, optimized_attention=optimized_attention)

        hidden_states = self.merger(hidden_states)
-        # Potentially important for spatially precise edits. This is present in the HF implementation.
-        reverse_indices = torch.argsort(window_index)
-        hidden_states = hidden_states[reverse_indices, :]
        return hidden_states
--- a/comfy/utils.py
+++ b/comfy/utils.py
@@ -1135,8 +1135,8 @@ def tiled_scale_multidim(samples, function, tile=(64, 64), overlap=8, upscale_am
                pbar.update(1)
            continue

-        out = output[b:b+1].zero_()
-        out_div = torch.zeros([s.shape[0], 1] + mult_list_upscale(s.shape[2:]), device=output_device)
+        out = torch.zeros([s.shape[0], out_channels] + mult_list_upscale(s.shape[2:]), device=output_device)
+        out_div = torch.zeros([s.shape[0], out_channels] + mult_list_upscale(s.shape[2:]), device=output_device)

        positions = [range(0, s.shape[d+2] - overlap[d], tile[d] - overlap[d]) if s.shape[d+2] > tile[d] else [0] for d in range(dims)]

@@ -1151,7 +1151,7 @@ def tiled_scale_multidim(samples, function, tile=(64, 64), overlap=8, upscale_am
                upscaled.append(round(get_pos(d, pos)))

            ps = function(s_in).to(output_device)
-            mask = torch.ones([1, 1] + list(ps.shape[2:]), device=output_device)
+            mask = torch.ones_like(ps)

            for d in range(2, dims + 2):
                feather = round(get_scale(d - 2, overlap[d - 2]))
@@ -1174,7 +1174,7 @@ def tiled_scale_multidim(samples, function, tile=(64, 64), overlap=8, upscale_am
            if pbar is not None:
                pbar.update(1)

-        out.div_(out_div)
+        output[b:b+1] = out/out_div
    return output

 def tiled_scale(samples, function, tile_x=64, tile_y=64, overlap = 8, upscale_amount = 4, out_channels = 3, output_device="cpu", pbar = None):
--- a/comfy_api_nodes/apis/quiver.py
+++ b/comfy_api_nodes/apis/quiver.py
@@ -1,43 +0,0 @@
-from pydantic import BaseModel, Field
-
-
-class QuiverImageObject(BaseModel):
-    url: str = Field(...)
-
-
-class QuiverTextToSVGRequest(BaseModel):
-    model: str = Field(default="arrow-preview")
-    prompt: str = Field(...)
-    instructions: str | None = Field(default=None)
-    references: list[QuiverImageObject] | None = Field(default=None, max_length=4)
-    temperature: float | None = Field(default=None, ge=0, le=2)
-    top_p: float | None = Field(default=None, ge=0, le=1)
-    presence_penalty: float | None = Field(default=None, ge=-2, le=2)
-
-
-class QuiverImageToSVGRequest(BaseModel):
-    model: str = Field(default="arrow-preview")
-    image: QuiverImageObject = Field(...)
-    auto_crop: bool | None = Field(default=None)
-    target_size: int | None = Field(default=None, ge=128, le=4096)
-    temperature: float | None = Field(default=None, ge=0, le=2)
-    top_p: float | None = Field(default=None, ge=0, le=1)
-    presence_penalty: float | None = Field(default=None, ge=-2, le=2)
-
-
-class QuiverSVGResponseItem(BaseModel):
-    svg: str = Field(...)
-    mime_type: str | None = Field(default="image/svg+xml")
-
-
-class QuiverSVGUsage(BaseModel):
-    total_tokens: int | None = Field(default=None)
-    input_tokens: int | None = Field(default=None)
-    output_tokens: int | None = Field(default=None)
-
-
-class QuiverSVGResponse(BaseModel):
-    id: str | None = Field(default=None)
-    created: int | None = Field(default=None)
-    data: list[QuiverSVGResponseItem] = Field(...)
-    usage: QuiverSVGUsage | None = Field(default=None)
--- a/comfy_api_nodes/nodes_bytedance.py
+++ b/comfy_api_nodes/nodes_bytedance.py
@@ -47,10 +47,6 @@ SEEDREAM_MODELS = {
 BYTEPLUS_TASK_ENDPOINT = "/proxy/byteplus/api/v3/contents/generations/tasks"
 BYTEPLUS_TASK_STATUS_ENDPOINT = "/proxy/byteplus/api/v3/contents/generations/tasks"  # + /{task_id}

-DEPRECATED_MODELS = {"seedance-1-0-lite-t2v-250428", "seedance-1-0-lite-i2v-250428"}
-
-logger = logging.getLogger(__name__)
-

 def get_image_url_from_response(response: ImageTaskCreationResponse) -> str:
    if response.error:
@@ -139,7 +135,6 @@ class ByteDanceImageNode(IO.ComfyNode):
            price_badge=IO.PriceBadge(
                expr="""{"type":"usd","usd":0.03}""",
            ),
-            is_deprecated=True,
        )

    @classmethod
@@ -947,7 +942,7 @@ class ByteDanceImageReferenceNode(IO.ComfyNode):
        ]
        return await process_video_task(
            cls,
-            payload=Image2VideoTaskCreationRequest(model=model, content=x, generate_audio=None),
+            payload=Image2VideoTaskCreationRequest(model=model, content=x),
            estimated_duration=max(1, math.ceil(VIDEO_TASKS_EXECUTION_TIME[model][resolution] * (duration / 10.0))),
        )

@@ -957,12 +952,6 @@ async def process_video_task(
    payload: Text2VideoTaskCreationRequest | Image2VideoTaskCreationRequest,
    estimated_duration: int | None,
 ) -> IO.NodeOutput:
-    if payload.model in DEPRECATED_MODELS:
-        logger.warning(
-            "Model '%s' is deprecated and will be deactivated on May 13, 2026. "
-            "Please switch to a newer model. Recommended: seedance-1-0-pro-fast-251015.",
-            payload.model,
-        )
    initial_response = await sync_op(
        cls,
        ApiEndpoint(path=BYTEPLUS_TASK_ENDPOINT, method="POST"),
--- a/comfy_api_nodes/nodes_quiver.py
+++ b/comfy_api_nodes/nodes_quiver.py
@@ -1,291 +0,0 @@
-from io import BytesIO
-
-from typing_extensions import override
-
-from comfy_api.latest import IO, ComfyExtension
-from comfy_api_nodes.apis.quiver import (
-    QuiverImageObject,
-    QuiverImageToSVGRequest,
-    QuiverSVGResponse,
-    QuiverTextToSVGRequest,
-)
-from comfy_api_nodes.util import (
-    ApiEndpoint,
-    sync_op,
-    upload_image_to_comfyapi,
-    validate_string,
-)
-from comfy_extras.nodes_images import SVG
-
-
-class QuiverTextToSVGNode(IO.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return IO.Schema(
-            node_id="QuiverTextToSVGNode",
-            display_name="Quiver Text to SVG",
-            category="api node/image/Quiver",
-            description="Generate an SVG from a text prompt using Quiver AI.",
-            inputs=[
-                IO.String.Input(
-                    "prompt",
-                    multiline=True,
-                    default="",
-                    tooltip="Text description of the desired SVG output.",
-                ),
-                IO.String.Input(
-                    "instructions",
-                    multiline=True,
-                    default="",
-                    tooltip="Additional style or formatting guidance.",
-                    optional=True,
-                ),
-                IO.Autogrow.Input(
-                    "reference_images",
-                    template=IO.Autogrow.TemplatePrefix(
-                        IO.Image.Input("image"),
-                        prefix="ref_",
-                        min=0,
-                        max=4,
-                    ),
-                    tooltip="Up to 4 reference images to guide the generation.",
-                    optional=True,
-                ),
-                IO.DynamicCombo.Input(
-                    "model",
-                    options=[
-                        IO.DynamicCombo.Option(
-                            "arrow-preview",
-                            [
-                                IO.Float.Input(
-                                    "temperature",
-                                    default=1.0,
-                                    min=0.0,
-                                    max=2.0,
-                                    step=0.1,
-                                    display_mode=IO.NumberDisplay.slider,
-                                    tooltip="Randomness control. Higher values increase randomness.",
-                                    advanced=True,
-                                ),
-                                IO.Float.Input(
-                                    "top_p",
-                                    default=1.0,
-                                    min=0.05,
-                                    max=1.0,
-                                    step=0.05,
-                                    display_mode=IO.NumberDisplay.slider,
-                                    tooltip="Nucleus sampling parameter.",
-                                    advanced=True,
-                                ),
-                                IO.Float.Input(
-                                    "presence_penalty",
-                                    default=0.0,
-                                    min=-2.0,
-                                    max=2.0,
-                                    step=0.1,
-                                    display_mode=IO.NumberDisplay.slider,
-                                    tooltip="Token presence penalty.",
-                                    advanced=True,
-                                ),
-                            ],
-                        ),
-                    ],
-                    tooltip="Model to use for SVG generation.",
-                ),
-                IO.Int.Input(
-                    "seed",
-                    default=0,
-                    min=0,
-                    max=2147483647,
-                    control_after_generate=True,
-                    tooltip="Seed to determine if node should re-run; "
-                    "actual results are nondeterministic regardless of seed.",
-                ),
-            ],
-            outputs=[
-                IO.SVG.Output(),
-            ],
-            hidden=[
-                IO.Hidden.auth_token_comfy_org,
-                IO.Hidden.api_key_comfy_org,
-                IO.Hidden.unique_id,
-            ],
-            is_api_node=True,
-            price_badge=IO.PriceBadge(
-                expr="""{"type":"usd","usd":0.429}""",
-            ),
-        )
-
-    @classmethod
-    async def execute(
-        cls,
-        prompt: str,
-        model: dict,
-        seed: int,
-        instructions: str = None,
-        reference_images: IO.Autogrow.Type = None,
-    ) -> IO.NodeOutput:
-        validate_string(prompt, strip_whitespace=False, min_length=1)
-
-        references = None
-        if reference_images:
-            references = []
-            for key in reference_images:
-                url = await upload_image_to_comfyapi(cls, reference_images[key])
-                references.append(QuiverImageObject(url=url))
-            if len(references) > 4:
-                raise ValueError("Maximum 4 reference images are allowed.")
-
-        instructions_val = instructions.strip() if instructions else None
-        if instructions_val == "":
-            instructions_val = None
-
-        response = await sync_op(
-            cls,
-            ApiEndpoint(path="/proxy/quiver/v1/svgs/generations", method="POST"),
-            response_model=QuiverSVGResponse,
-            data=QuiverTextToSVGRequest(
-                model=model["model"],
-                prompt=prompt,
-                instructions=instructions_val,
-                references=references,
-                temperature=model.get("temperature"),
-                top_p=model.get("top_p"),
-                presence_penalty=model.get("presence_penalty"),
-            ),
-        )
-
-        svg_data = [BytesIO(item.svg.encode("utf-8")) for item in response.data]
-        return IO.NodeOutput(SVG(svg_data))
-
-
-class QuiverImageToSVGNode(IO.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return IO.Schema(
-            node_id="QuiverImageToSVGNode",
-            display_name="Quiver Image to SVG",
-            category="api node/image/Quiver",
-            description="Vectorize a raster image into SVG using Quiver AI.",
-            inputs=[
-                IO.Image.Input(
-                    "image",
-                    tooltip="Input image to vectorize.",
-                ),
-                IO.Boolean.Input(
-                    "auto_crop",
-                    default=False,
-                    tooltip="Automatically crop to the dominant subject.",
-                ),
-                IO.DynamicCombo.Input(
-                    "model",
-                    options=[
-                        IO.DynamicCombo.Option(
-                            "arrow-preview",
-                            [
-                                IO.Int.Input(
-                                    "target_size",
-                                    default=1024,
-                                    min=128,
-                                    max=4096,
-                                    tooltip="Square resize target in pixels.",
-                                ),
-                                IO.Float.Input(
-                                    "temperature",
-                                    default=1.0,
-                                    min=0.0,
-                                    max=2.0,
-                                    step=0.1,
-                                    display_mode=IO.NumberDisplay.slider,
-                                    tooltip="Randomness control. Higher values increase randomness.",
-                                    advanced=True,
-                                ),
-                                IO.Float.Input(
-                                    "top_p",
-                                    default=1.0,
-                                    min=0.05,
-                                    max=1.0,
-                                    step=0.05,
-                                    display_mode=IO.NumberDisplay.slider,
-                                    tooltip="Nucleus sampling parameter.",
-                                    advanced=True,
-                                ),
-                                IO.Float.Input(
-                                    "presence_penalty",
-                                    default=0.0,
-                                    min=-2.0,
-                                    max=2.0,
-                                    step=0.1,
-                                    display_mode=IO.NumberDisplay.slider,
-                                    tooltip="Token presence penalty.",
-                                    advanced=True,
-                                ),
-                            ],
-                        ),
-                    ],
-                    tooltip="Model to use for SVG vectorization.",
-                ),
-                IO.Int.Input(
-                    "seed",
-                    default=0,
-                    min=0,
-                    max=2147483647,
-                    control_after_generate=True,
-                    tooltip="Seed to determine if node should re-run; "
-                    "actual results are nondeterministic regardless of seed.",
-                ),
-            ],
-            outputs=[
-                IO.SVG.Output(),
-            ],
-            hidden=[
-                IO.Hidden.auth_token_comfy_org,
-                IO.Hidden.api_key_comfy_org,
-                IO.Hidden.unique_id,
-            ],
-            is_api_node=True,
-            price_badge=IO.PriceBadge(
-                expr="""{"type":"usd","usd":0.429}""",
-            ),
-        )
-
-    @classmethod
-    async def execute(
-        cls,
-        image,
-        auto_crop: bool,
-        model: dict,
-        seed: int,
-    ) -> IO.NodeOutput:
-        image_url = await upload_image_to_comfyapi(cls, image)
-
-        response = await sync_op(
-            cls,
-            ApiEndpoint(path="/proxy/quiver/v1/svgs/vectorizations", method="POST"),
-            response_model=QuiverSVGResponse,
-            data=QuiverImageToSVGRequest(
-                model=model["model"],
-                image=QuiverImageObject(url=image_url),
-                auto_crop=auto_crop if auto_crop else None,
-                target_size=model.get("target_size"),
-                temperature=model.get("temperature"),
-                top_p=model.get("top_p"),
-                presence_penalty=model.get("presence_penalty"),
-            ),
-        )
-
-        svg_data = [BytesIO(item.svg.encode("utf-8")) for item in response.data]
-        return IO.NodeOutput(SVG(svg_data))
-
-
-class QuiverExtension(ComfyExtension):
-    @override
-    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
-        return [
-            QuiverTextToSVGNode,
-            QuiverImageToSVGNode,
-        ]
-
-
-async def comfy_entrypoint() -> QuiverExtension:
-    return QuiverExtension()
--- a/comfy_extras/nodes_canny.py
+++ b/comfy_extras/nodes_canny.py
@@ -3,7 +3,6 @@ from typing_extensions import override

 import comfy.model_management
 from comfy_api.latest import ComfyExtension, io
-import torch


 class Canny(io.ComfyNode):
@@ -30,8 +29,8 @@ class Canny(io.ComfyNode):

    @classmethod
    def execute(cls, image, low_threshold, high_threshold) -> io.NodeOutput:
-        output = canny(image.to(device=comfy.model_management.get_torch_device(), dtype=torch.float32).movedim(-1, 1), low_threshold, high_threshold)
-        img_out = output[1].to(device=comfy.model_management.intermediate_device(), dtype=comfy.model_management.intermediate_dtype()).repeat(1, 3, 1, 1).movedim(1, -1)
+        output = canny(image.to(comfy.model_management.get_torch_device()).movedim(-1, 1), low_threshold, high_threshold)
+        img_out = output[1].to(comfy.model_management.intermediate_device()).repeat(1, 3, 1, 1).movedim(1, -1)
        return io.NodeOutput(img_out)


--- a/comfy_extras/nodes_context_windows.py
+++ b/comfy_extras/nodes_context_windows.py
@@ -27,8 +27,8 @@ class ContextWindowsManualNode(io.ComfyNode):
                io.Combo.Input("fuse_method", options=comfy.context_windows.ContextFuseMethods.LIST_STATIC, default=comfy.context_windows.ContextFuseMethods.PYRAMID, tooltip="The method to use to fuse the context windows."),
                io.Int.Input("dim", min=0, max=5, default=0, tooltip="The dimension to apply the context windows to."),
                io.Boolean.Input("freenoise", default=False, tooltip="Whether to apply FreeNoise noise shuffling, improves window blending."),
-                io.String.Input("cond_retain_index_list", default="", tooltip="List of latent indices to retain in the conditioning tensors for each window, for example setting this to '0' will use the initial start image for each window."),
-                io.Boolean.Input("split_conds_to_windows", default=False, tooltip="Whether to split multiple conditionings (created by ConditionCombine) to each window based on region index."),
+                #io.String.Input("cond_retain_index_list", default="", tooltip="List of latent indices to retain in the conditioning tensors for each window, for example setting this to '0' will use the initial start image for each window."),
+                #io.Boolean.Input("split_conds_to_windows", default=False, tooltip="Whether to split multiple conditionings (created by ConditionCombine) to each window based on region index."),
            ],
            outputs=[
                io.Model.Output(tooltip="The model with context windows applied during sampling."),
--- a/comfyui_version.py
+++ b/comfyui_version.py
@@ -1,3 +1,3 @@
 # This file is automatically generated by the build process when version is
 # updated in pyproject.toml.
-__version__ = "0.18.1"
+__version__ = "0.17.0"
--- a/main.py
+++ b/main.py
@@ -471,9 +471,6 @@ if __name__ == "__main__":
    if sys.version_info.major == 3 and sys.version_info.minor < 10:
        logging.warning("WARNING: You are using a python version older than 3.10, please upgrade to a newer one. 3.12 and above is recommended.")

-    if args.disable_dynamic_vram:
-        logging.warning("Dynamic vram disabled with argument. If you have any issues with dynamic vram enabled please give us a detailed reports as this argument will be removed soon.")
-
    event_loop, _, start_all_func = start_comfyui()
    try:
        x = start_all_func()
--- a/nodes.py
+++ b/nodes.py
@@ -1966,11 +1966,9 @@ class EmptyImage:
    CATEGORY = "image"

    def generate(self, width, height, batch_size=1, color=0):
-        dtype = comfy.model_management.intermediate_dtype()
-        device = comfy.model_management.intermediate_device()
-        r = torch.full([batch_size, height, width, 1], ((color >> 16) & 0xFF) / 0xFF, device=device, dtype=dtype)
-        g = torch.full([batch_size, height, width, 1], ((color >> 8) & 0xFF) / 0xFF, device=device, dtype=dtype)
-        b = torch.full([batch_size, height, width, 1], ((color) & 0xFF) / 0xFF, device=device, dtype=dtype)
+        r = torch.full([batch_size, height, width, 1], ((color >> 16) & 0xFF) / 0xFF)
+        g = torch.full([batch_size, height, width, 1], ((color >> 8) & 0xFF) / 0xFF)
+        b = torch.full([batch_size, height, width, 1], ((color) & 0xFF) / 0xFF)
        return (torch.cat((r, g, b), dim=-1), )

 class ImagePadForOutpaint:
@@ -2204,6 +2202,54 @@ def get_module_name(module_path: str) -> str:
    return base_path


+def load_node_replacements_json(module_dir: str, module_name: str):
+    """Load node_replacements.json from a custom node directory and register replacements.
+
+    Custom node authors can ship a node_replacements.json file in their repo root
+    to define node replacements declaratively, without writing Python registration code.
+    The file format matches the output of NodeReplace.as_dict(), keyed by old_node_id.
+    """
+    replacements_path = os.path.join(module_dir, "node_replacements.json")
+    if not os.path.isfile(replacements_path):
+        return
+
+    try:
+        with open(replacements_path, "r", encoding="utf-8") as f:
+            data = json.load(f)
+
+        if not isinstance(data, dict):
+            logging.warning(f"node_replacements.json in {module_name} must be a JSON object, skipping.")
+            return
+
+        from server import PromptServer
+        from comfy_api.latest._io import NodeReplace
+
+        manager = PromptServer.instance.node_replace_manager
+        count = 0
+        for old_node_id, replacements in data.items():
+            if not isinstance(replacements, list):
+                logging.warning(f"node_replacements.json in {module_name}: value for '{old_node_id}' must be a list, skipping.")
+                continue
+            for entry in replacements:
+                if not isinstance(entry, dict):
+                    continue
+                manager.register(NodeReplace(
+                    new_node_id=entry.get("new_node_id", ""),
+                    old_node_id=entry.get("old_node_id", old_node_id),
+                    old_widget_ids=entry.get("old_widget_ids"),
+                    input_mapping=entry.get("input_mapping"),
+                    output_mapping=entry.get("output_mapping"),
+                ))
+                count += 1
+
+        if count > 0:
+            logging.info(f"Loaded {count} node replacement(s) from {module_name}/node_replacements.json")
+    except json.JSONDecodeError as e:
+        logging.warning(f"Failed to parse node_replacements.json in {module_name}: {e}")
+    except Exception as e:
+        logging.warning(f"Failed to load node_replacements.json from {module_name}: {e}")
+
+
 async def load_custom_node(module_path: str, ignore=set(), module_parent="custom_nodes") -> bool:
    module_name = get_module_name(module_path)
    if os.path.isfile(module_path):
@@ -2228,6 +2274,8 @@ async def load_custom_node(module_path: str, ignore=set(), module_parent="custom

        LOADED_MODULE_DIRS[module_name] = os.path.abspath(module_dir)

+        load_node_replacements_json(module_dir, module_name)
+
        try:
            from comfy_config import config_parser

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "ComfyUI"
-version = "0.18.1"
+version = "0.17.0"
 readme = "README.md"
 license = { file = "LICENSE" }
 requires-python = ">=3.10"
--- a/tests/test_node_replacements_json.py
+++ b/tests/test_node_replacements_json.py
@@ -0,0 +1,219 @@
+"""Tests for auto-registration of node_replacements.json from custom node directories."""
+import json
+import os
+import tempfile
+import unittest
+from unittest.mock import MagicMock
+
+# We can't import nodes.py directly (torch dependency), so we test the
+# load_node_replacements_json logic by re-creating it from the same source.
+# This validates the JSON parsing and NodeReplace construction logic.
+
+
+class MockNodeReplace:
+    """Mirrors comfy_api.latest._io.NodeReplace for testing."""
+    def __init__(self, new_node_id, old_node_id, old_widget_ids=None,
+                 input_mapping=None, output_mapping=None):
+        self.new_node_id = new_node_id
+        self.old_node_id = old_node_id
+        self.old_widget_ids = old_widget_ids
+        self.input_mapping = input_mapping
+        self.output_mapping = output_mapping
+
+
+def load_node_replacements_json(module_dir, module_name, manager, NodeReplace=MockNodeReplace):
+    """Standalone version of the function from nodes.py for testing."""
+    import logging
+    replacements_path = os.path.join(module_dir, "node_replacements.json")
+    if not os.path.isfile(replacements_path):
+        return
+
+    try:
+        with open(replacements_path, "r", encoding="utf-8") as f:
+            data = json.load(f)
+
+        if not isinstance(data, dict):
+            logging.warning(f"node_replacements.json in {module_name} must be a JSON object, skipping.")
+            return
+
+        count = 0
+        for old_node_id, replacements in data.items():
+            if not isinstance(replacements, list):
+                logging.warning(f"node_replacements.json in {module_name}: value for '{old_node_id}' must be a list, skipping.")
+                continue
+            for entry in replacements:
+                if not isinstance(entry, dict):
+                    continue
+                manager.register(NodeReplace(
+                    new_node_id=entry.get("new_node_id", ""),
+                    old_node_id=entry.get("old_node_id", old_node_id),
+                    old_widget_ids=entry.get("old_widget_ids"),
+                    input_mapping=entry.get("input_mapping"),
+                    output_mapping=entry.get("output_mapping"),
+                ))
+                count += 1
+
+        if count > 0:
+            logging.info(f"Loaded {count} node replacement(s) from {module_name}/node_replacements.json")
+    except json.JSONDecodeError as e:
+        logging.warning(f"Failed to parse node_replacements.json in {module_name}: {e}")
+    except Exception as e:
+        logging.warning(f"Failed to load node_replacements.json from {module_name}: {e}")
+
+
+class TestLoadNodeReplacementsJson(unittest.TestCase):
+    """Test auto-registration of node_replacements.json from custom node directories."""
+
+    def setUp(self):
+        self.tmpdir = tempfile.mkdtemp()
+        self.mock_manager = MagicMock()
+
+    def _write_json(self, data):
+        path = os.path.join(self.tmpdir, "node_replacements.json")
+        with open(path, "w") as f:
+            json.dump(data, f)
+
+    def _load(self):
+        load_node_replacements_json(self.tmpdir, "test-node-pack", self.mock_manager)
+
+    def test_no_file_does_nothing(self):
+        """No node_replacements.json — should silently do nothing."""
+        self._load()
+        self.mock_manager.register.assert_not_called()
+
+    def test_empty_object(self):
+        """Empty {} — should do nothing."""
+        self._write_json({})
+        self._load()
+        self.mock_manager.register.assert_not_called()
+
+    def test_single_replacement(self):
+        """Single replacement entry registers correctly."""
+        self._write_json({
+            "OldNode": [{
+                "new_node_id": "NewNode",
+                "old_node_id": "OldNode",
+                "input_mapping": [{"new_id": "model", "old_id": "ckpt_name"}],
+                "output_mapping": [{"new_idx": 0, "old_idx": 0}],
+            }]
+        })
+        self._load()
+        self.mock_manager.register.assert_called_once()
+        registered = self.mock_manager.register.call_args[0][0]
+        self.assertEqual(registered.new_node_id, "NewNode")
+        self.assertEqual(registered.old_node_id, "OldNode")
+        self.assertEqual(registered.input_mapping, [{"new_id": "model", "old_id": "ckpt_name"}])
+        self.assertEqual(registered.output_mapping, [{"new_idx": 0, "old_idx": 0}])
+
+    def test_multiple_replacements(self):
+        """Multiple old_node_ids each with entries."""
+        self._write_json({
+            "NodeA": [{"new_node_id": "NodeB", "old_node_id": "NodeA"}],
+            "NodeC": [{"new_node_id": "NodeD", "old_node_id": "NodeC"}],
+        })
+        self._load()
+        self.assertEqual(self.mock_manager.register.call_count, 2)
+
+    def test_multiple_alternatives_for_same_node(self):
+        """Multiple replacement options for the same old node."""
+        self._write_json({
+            "OldNode": [
+                {"new_node_id": "AltA", "old_node_id": "OldNode"},
+                {"new_node_id": "AltB", "old_node_id": "OldNode"},
+            ]
+        })
+        self._load()
+        self.assertEqual(self.mock_manager.register.call_count, 2)
+
+    def test_null_mappings(self):
+        """Null input/output mappings (trivial replacement)."""
+        self._write_json({
+            "OldNode": [{
+                "new_node_id": "NewNode",
+                "old_node_id": "OldNode",
+                "input_mapping": None,
+                "output_mapping": None,
+            }]
+        })
+        self._load()
+        registered = self.mock_manager.register.call_args[0][0]
+        self.assertIsNone(registered.input_mapping)
+        self.assertIsNone(registered.output_mapping)
+
+    def test_old_node_id_defaults_to_key(self):
+        """If old_node_id is missing from entry, uses the dict key."""
+        self._write_json({
+            "OldNode": [{"new_node_id": "NewNode"}]
+        })
+        self._load()
+        registered = self.mock_manager.register.call_args[0][0]
+        self.assertEqual(registered.old_node_id, "OldNode")
+
+    def test_invalid_json_skips(self):
+        """Invalid JSON file — should warn and skip, not crash."""
+        path = os.path.join(self.tmpdir, "node_replacements.json")
+        with open(path, "w") as f:
+            f.write("{invalid json")
+        self._load()
+        self.mock_manager.register.assert_not_called()
+
+    def test_non_object_json_skips(self):
+        """JSON array instead of object — should warn and skip."""
+        self._write_json([1, 2, 3])
+        self._load()
+        self.mock_manager.register.assert_not_called()
+
+    def test_non_list_value_skips(self):
+        """Value is not a list — should warn and skip that key."""
+        self._write_json({
+            "OldNode": "not a list",
+            "GoodNode": [{"new_node_id": "NewNode", "old_node_id": "GoodNode"}],
+        })
+        self._load()
+        self.assertEqual(self.mock_manager.register.call_count, 1)
+
+    def test_with_old_widget_ids(self):
+        """old_widget_ids are passed through."""
+        self._write_json({
+            "OldNode": [{
+                "new_node_id": "NewNode",
+                "old_node_id": "OldNode",
+                "old_widget_ids": ["width", "height"],
+            }]
+        })
+        self._load()
+        registered = self.mock_manager.register.call_args[0][0]
+        self.assertEqual(registered.old_widget_ids, ["width", "height"])
+
+    def test_set_value_in_input_mapping(self):
+        """input_mapping with set_value entries."""
+        self._write_json({
+            "OldNode": [{
+                "new_node_id": "NewNode",
+                "old_node_id": "OldNode",
+                "input_mapping": [
+                    {"new_id": "method", "set_value": "lanczos"},
+                    {"new_id": "size", "old_id": "dimension"},
+                ],
+            }]
+        })
+        self._load()
+        registered = self.mock_manager.register.call_args[0][0]
+        self.assertEqual(len(registered.input_mapping), 2)
+        self.assertEqual(registered.input_mapping[0]["set_value"], "lanczos")
+        self.assertEqual(registered.input_mapping[1]["old_id"], "dimension")
+
+    def test_non_dict_entry_skipped(self):
+        """Non-dict entries in the list are silently skipped."""
+        self._write_json({
+            "OldNode": [
+                "not a dict",
+                {"new_node_id": "NewNode", "old_node_id": "OldNode"},
+            ]
+        })
+        self._load()
+        self.assertEqual(self.mock_manager.register.call_count, 1)
+
+
+if __name__ == "__main__":
+    unittest.main()