fix: specify UTF-8 encoding when reading subgraph files

On Windows, Python defaults to cp1252 encoding when no encoding is specified. JSON files containing UTF-8 characters (e.g., non-ASCII characters) cause UnicodeDecodeError when read with cp1252. This fixes the error that occurs when loading blueprint subgraphs on Windows systems. https://claude.ai/code/session_014WHi3SL9Gzsi3U6kbSjbSb
2026-04-25 00:49:13 +00:00 · 2026-02-21 22:40:45 +00:00
10 changed files with 17 additions and 350 deletions
--- a/.coderabbit.yaml
+++ b/.coderabbit.yaml
@@ -1,7 +1,6 @@
 # yaml-language-server: $schema=https://coderabbit.ai/integrations/schema.v2.json
 language: "en-US"
 early_access: false
-tone_instructions: "Only comment on issues introduced by this PR's changes. Do not flag pre-existing problems in moved, re-indented, or reformatted code."

 reviews:
  profile: "chill"
@@ -36,14 +35,6 @@ reviews:
    - "!**/*.bat"

  path_instructions:
-    - path: "**"
-      instructions: |
-        IMPORTANT: Only comment on issues directly introduced by this PR's code changes.
-        Do NOT flag pre-existing issues in code that was merely moved, re-indented,
-        de-indented, or reformatted without logic changes. If code appears in the diff
-        only due to whitespace or structural reformatting (e.g., removing a `with:` block),
-        treat it as unchanged. Contributors should not feel obligated to address
-        pre-existing issues outside the scope of their contribution.
    - path: "comfy/**"
      instructions: |
        Core ML/diffusion engine. Focus on:
@@ -83,11 +74,7 @@ reviews:
  auto_review:
    enabled: true
    auto_incremental_review: true
-    drafts: false
-    ignore_title_keywords:
-      - "WIP"
-      - "DO NOT REVIEW"
-      - "DO NOT MERGE"
+    drafts: true

  finishing_touches:
    docstrings:
@@ -97,7 +84,7 @@ reviews:

  tools:
    ruff:
-      enabled: false
+      enabled: true
    pylint:
      enabled: false
    flake8:
--- a/comfy/ldm/lightricks/av_model.py
+++ b/comfy/ldm/lightricks/av_model.py
@@ -9,7 +9,6 @@ from comfy.ldm.lightricks.model import (
    LTXVModel,
 )
 from comfy.ldm.lightricks.symmetric_patchifier import AudioPatchifier
-from comfy.ldm.lightricks.embeddings_connector import Embeddings1DConnector
 import comfy.ldm.common_dit

 class CompressedTimestep:
@@ -451,29 +450,6 @@ class LTXAVModel(LTXVModel):
            operations=self.operations,
        )

-        self.audio_embeddings_connector = Embeddings1DConnector(
-            split_rope=True,
-            double_precision_rope=True,
-            dtype=dtype,
-            device=device,
-            operations=self.operations,
-        )
-
-        self.video_embeddings_connector = Embeddings1DConnector(
-            split_rope=True,
-            double_precision_rope=True,
-            dtype=dtype,
-            device=device,
-            operations=self.operations,
-        )
-
-    def preprocess_text_embeds(self, context):
-        if context.shape[-1] == self.caption_channels * 2:
-            return context
-        out_vid = self.video_embeddings_connector(context)[0]
-        out_audio = self.audio_embeddings_connector(context)[0]
-        return torch.concat((out_vid, out_audio), dim=-1)
-
    def _init_transformer_blocks(self, device, dtype, **kwargs):
        """Initialize transformer blocks for LTXAV."""
        self.transformer_blocks = nn.ModuleList(
--- a/comfy/ldm/lightricks/embeddings_connector.py
+++ b/comfy/ldm/lightricks/embeddings_connector.py
@@ -157,9 +157,11 @@ class Embeddings1DConnector(nn.Module):
        self.num_learnable_registers = num_learnable_registers
        if self.num_learnable_registers:
            self.learnable_registers = nn.Parameter(
-                torch.empty(
+                torch.rand(
                    self.num_learnable_registers, inner_dim, dtype=dtype, device=device
                )
+                * 2.0
+                - 1.0
            )

    def get_fractional_positions(self, indices_grid):
@@ -232,7 +234,7 @@ class Embeddings1DConnector(nn.Module):

        return indices

-    def precompute_freqs_cis(self, indices_grid, spacing="exp", out_dtype=None):
+    def precompute_freqs_cis(self, indices_grid, spacing="exp"):
        dim = self.inner_dim
        n_elem = 2  # 2 because of cos and sin
        freqs = self.precompute_freqs(indices_grid, spacing)
@@ -245,7 +247,7 @@ class Embeddings1DConnector(nn.Module):
            )
        else:
            cos_freq, sin_freq = interleaved_freqs_cis(freqs, dim % n_elem)
-        return cos_freq.to(dtype=out_dtype), sin_freq.to(dtype=out_dtype), self.split_rope
+        return cos_freq.to(self.dtype), sin_freq.to(self.dtype), self.split_rope

    def forward(
        self,
@@ -286,7 +288,7 @@ class Embeddings1DConnector(nn.Module):
            hidden_states.shape[1], dtype=torch.float32, device=hidden_states.device
        )
        indices_grid = indices_grid[None, None, :]
-        freqs_cis = self.precompute_freqs_cis(indices_grid, out_dtype=hidden_states.dtype)
+        freqs_cis = self.precompute_freqs_cis(indices_grid)

        # 2. Blocks
        for block_idx, block in enumerate(self.transformer_1d_blocks):
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@@ -988,14 +988,10 @@ class LTXAV(BaseModel):
    def extra_conds(self, **kwargs):
        out = super().extra_conds(**kwargs)
        attention_mask = kwargs.get("attention_mask", None)
-        device = kwargs["device"]
-
        if attention_mask is not None:
            out['attention_mask'] = comfy.conds.CONDRegular(attention_mask)
        cross_attn = kwargs.get("cross_attn", None)
        if cross_attn is not None:
-            if hasattr(self.diffusion_model, "preprocess_text_embeds"):
-                cross_attn = self.diffusion_model.preprocess_text_embeds(cross_attn.to(device=device, dtype=self.get_dtype_inference()))
            out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)

        out['frame_rate'] = comfy.conds.CONDConstant(kwargs.get("frame_rate", 25))
--- a/comfy/text_encoders/lt.py
+++ b/comfy/text_encoders/lt.py
@@ -3,6 +3,7 @@ import os
 from transformers import T5TokenizerFast
 from .spiece_tokenizer import SPieceTokenizer
 import comfy.text_encoders.genmo
+from comfy.ldm.lightricks.embeddings_connector import Embeddings1DConnector
 import torch
 import comfy.utils
 import math
@@ -101,7 +102,6 @@ class LTXAVTEModel(torch.nn.Module):
        super().__init__()
        self.dtypes = set()
        self.dtypes.add(dtype)
-        self.compat_mode = False

        self.gemma3_12b = Gemma3_12BModel(device=device, dtype=dtype_llama, model_options=model_options, layer="all", layer_idx=None)
        self.dtypes.add(dtype_llama)
@@ -109,11 +109,6 @@ class LTXAVTEModel(torch.nn.Module):
        operations = self.gemma3_12b.operations # TODO
        self.text_embedding_projection = operations.Linear(3840 * 49, 3840, bias=False, dtype=dtype, device=device)

-    def enable_compat_mode(self):  # TODO: remove
-        from comfy.ldm.lightricks.embeddings_connector import Embeddings1DConnector
-        operations = self.gemma3_12b.operations
-        dtype = self.text_embedding_projection.weight.dtype
-        device = self.text_embedding_projection.weight.device
        self.audio_embeddings_connector = Embeddings1DConnector(
            split_rope=True,
            double_precision_rope=True,
@@ -129,7 +124,6 @@ class LTXAVTEModel(torch.nn.Module):
            device=device,
            operations=operations,
        )
-        self.compat_mode = True

    def set_clip_options(self, options):
        self.execution_device = options.get("execution_device", self.execution_device)
@@ -152,11 +146,9 @@ class LTXAVTEModel(torch.nn.Module):
        out = out.reshape((out.shape[0], out.shape[1], -1))
        out = self.text_embedding_projection(out)
        out = out.float()
-
-        if self.compat_mode:
-            out_vid = self.video_embeddings_connector(out)[0]
-            out_audio = self.audio_embeddings_connector(out)[0]
-            out = torch.concat((out_vid, out_audio), dim=-1)
+        out_vid = self.video_embeddings_connector(out)[0]
+        out_audio = self.audio_embeddings_connector(out)[0]
+        out = torch.concat((out_vid, out_audio), dim=-1)

        return out.to(out_device), pooled

@@ -167,30 +159,20 @@ class LTXAVTEModel(torch.nn.Module):
        if "model.layers.47.self_attn.q_norm.weight" in sd:
            return self.gemma3_12b.load_sd(sd)
        else:
-            sdo = comfy.utils.state_dict_prefix_replace(sd, {"text_embedding_projection.aggregate_embed.weight": "text_embedding_projection.weight"}, filter_keys=True)
+            sdo = comfy.utils.state_dict_prefix_replace(sd, {"text_embedding_projection.aggregate_embed.weight": "text_embedding_projection.weight", "model.diffusion_model.video_embeddings_connector.": "video_embeddings_connector.", "model.diffusion_model.audio_embeddings_connector.": "audio_embeddings_connector."}, filter_keys=True)
            if len(sdo) == 0:
                sdo = sd

            missing_all = []
            unexpected_all = []

-            for prefix, component in [("text_embedding_projection.", self.text_embedding_projection)]:
+            for prefix, component in [("text_embedding_projection.", self.text_embedding_projection), ("video_embeddings_connector.", self.video_embeddings_connector), ("audio_embeddings_connector.", self.audio_embeddings_connector)]:
                component_sd = {k.replace(prefix, ""): v for k, v in sdo.items() if k.startswith(prefix)}
                if component_sd:
                    missing, unexpected = component.load_state_dict(component_sd, strict=False, assign=getattr(self, "can_assign_sd", False))
                    missing_all.extend([f"{prefix}{k}" for k in missing])
                    unexpected_all.extend([f"{prefix}{k}" for k in unexpected])

-            if "model.diffusion_model.audio_embeddings_connector.transformer_1d_blocks.2.attn1.to_q.bias" not in sd:  # TODO: remove
-                ww = sd.get("model.diffusion_model.audio_embeddings_connector.transformer_1d_blocks.0.attn1.to_q.bias", None)
-                if ww is not None:
-                    if ww.shape[0] == 3840:
-                        self.enable_compat_mode()
-                        sdv = comfy.utils.state_dict_prefix_replace(sd, {"model.diffusion_model.video_embeddings_connector.": ""}, filter_keys=True)
-                        self.video_embeddings_connector.load_state_dict(sdv, strict=False, assign=getattr(self, "can_assign_sd", False))
-                        sda = comfy.utils.state_dict_prefix_replace(sd, {"model.diffusion_model.audio_embeddings_connector.": ""}, filter_keys=True)
-                        self.audio_embeddings_connector.load_state_dict(sda, strict=False, assign=getattr(self, "can_assign_sd", False))
-
            return (missing_all, unexpected_all)

    def memory_estimation_function(self, token_weight_pairs, device=None):
--- a/comfy_api_nodes/apis/kling.py
+++ b/comfy_api_nodes/apis/kling.py
@@ -134,13 +134,6 @@ class ImageToVideoWithAudioRequest(BaseModel):
    shot_type: str | None = Field(None)


-class KlingAvatarRequest(BaseModel):
-    image: str = Field(...)
-    sound_file: str = Field(...)
-    prompt: str | None = Field(None)
-    mode: str = Field(...)
-
-
 class MotionControlRequest(BaseModel):
    prompt: str = Field(...)
    image_url: str = Field(...)
--- a/comfy_api_nodes/nodes_kling.py
+++ b/comfy_api_nodes/nodes_kling.py
@@ -50,7 +50,6 @@ from comfy_api_nodes.apis import (
 )
 from comfy_api_nodes.apis.kling import (
    ImageToVideoWithAudioRequest,
-    KlingAvatarRequest,
    MotionControlRequest,
    MultiPromptEntry,
    OmniImageParamImage,
@@ -75,7 +74,6 @@ from comfy_api_nodes.util import (
    upload_image_to_comfyapi,
    upload_images_to_comfyapi,
    upload_video_to_comfyapi,
-    validate_audio_duration,
    validate_image_aspect_ratio,
    validate_image_dimensions,
    validate_string,
@@ -3141,103 +3139,6 @@ class KlingFirstLastFrameNode(IO.ComfyNode):
        return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url))


-class KlingAvatarNode(IO.ComfyNode):
-
-    @classmethod
-    def define_schema(cls) -> IO.Schema:
-        return IO.Schema(
-            node_id="KlingAvatarNode",
-            display_name="Kling Avatar 2.0",
-            category="api node/video/Kling",
-            description="Generate broadcast-style digital human videos from a single photo and an audio file.",
-            inputs=[
-                IO.Image.Input(
-                    "image",
-                    tooltip="Avatar reference image. "
-                    "Width and height must be at least 300px. Aspect ratio must be between 1:2.5 and 2.5:1.",
-                ),
-                IO.Audio.Input(
-                    "sound_file",
-                    tooltip="Audio input. Must be between 2 and 300 seconds in duration.",
-                ),
-                IO.Combo.Input("mode", options=["std", "pro"]),
-                IO.String.Input(
-                    "prompt",
-                    multiline=True,
-                    default="",
-                    optional=True,
-                    tooltip="Optional prompt to define avatar actions, emotions, and camera movements.",
-                ),
-                IO.Int.Input(
-                    "seed",
-                    default=0,
-                    min=0,
-                    max=2147483647,
-                    display_mode=IO.NumberDisplay.number,
-                    control_after_generate=True,
-                    tooltip="Seed controls whether the node should re-run; "
-                    "results are non-deterministic regardless of seed.",
-                ),
-            ],
-            outputs=[
-                IO.Video.Output(),
-            ],
-            hidden=[
-                IO.Hidden.auth_token_comfy_org,
-                IO.Hidden.api_key_comfy_org,
-                IO.Hidden.unique_id,
-            ],
-            is_api_node=True,
-            price_badge=IO.PriceBadge(
-                depends_on=IO.PriceBadgeDepends(widgets=["mode"]),
-                expr="""
-                (
-                  $prices := {"std": 0.056, "pro": 0.112};
-                  {"type":"usd","usd": $lookup($prices, widgets.mode), "format":{"suffix":"/second"}}
-                )
-                """,
-            ),
-        )
-
-    @classmethod
-    async def execute(
-        cls,
-        image: Input.Image,
-        sound_file: Input.Audio,
-        mode: str,
-        seed: int,
-        prompt: str = "",
-    ) -> IO.NodeOutput:
-        validate_image_dimensions(image, min_width=300, min_height=300)
-        validate_image_aspect_ratio(image, (1, 2.5), (2.5, 1))
-        validate_audio_duration(sound_file, min_duration=2, max_duration=300)
-        response = await sync_op(
-            cls,
-            ApiEndpoint(path="/proxy/kling/v1/videos/avatar/image2video", method="POST"),
-            response_model=TaskStatusResponse,
-            data=KlingAvatarRequest(
-                image=await upload_image_to_comfyapi(cls, image),
-                sound_file=await upload_audio_to_comfyapi(
-                    cls, sound_file, container_format="mp3", codec_name="libmp3lame", mime_type="audio/mpeg"
-                ),
-                prompt=prompt or None,
-                mode=mode,
-            ),
-        )
-        if response.code:
-            raise RuntimeError(
-                f"Kling request failed. Code: {response.code}, Message: {response.message}, Data: {response.data}"
-            )
-        final_response = await poll_op(
-            cls,
-            ApiEndpoint(path=f"/proxy/kling/v1/videos/avatar/image2video/{response.data.task_id}"),
-            response_model=TaskStatusResponse,
-            status_extractor=lambda r: (r.data.task_status if r.data else None),
-            max_poll_attempts=800,
-        )
-        return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url))
-
-
 class KlingExtension(ComfyExtension):
    @override
    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
@@ -3266,7 +3167,6 @@ class KlingExtension(ComfyExtension):
            MotionControl,
            KlingVideoNode,
            KlingFirstLastFrameNode,
-            KlingAvatarNode,
        ]


--- a/comfy_extras/nodes_images.py
+++ b/comfy_extras/nodes_images.py
@@ -6,7 +6,6 @@ import folder_paths
 import json
 import os
 import re
-import math
 import torch
 import comfy.utils

@@ -683,172 +682,6 @@ class ImageScaleToMaxDimension(IO.ComfyNode):
    upscale = execute    # TODO: remove


-class SplitImageToTileList(IO.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return IO.Schema(
-            node_id="SplitImageToTileList",
-            category="image/batch",
-            search_aliases=["split image", "tile image", "slice image"],
-            display_name="Split Image into List of Tiles",
-            description="Splits an image into a batched list of tiles with a specified overlap.",
-            inputs=[
-                IO.Image.Input("image"),
-                IO.Int.Input("tile_width", default=1024, min=64, max=MAX_RESOLUTION),
-                IO.Int.Input("tile_height", default=1024, min=64, max=MAX_RESOLUTION),
-                IO.Int.Input("overlap", default=128, min=0, max=4096),
-            ],
-            outputs=[
-                IO.Image.Output(is_output_list=True),
-            ],
-        )
-
-    @staticmethod
-    def get_grid_coords(width, height, tile_width, tile_height, overlap):
-        coords = []
-        stride_x = max(1, tile_width - overlap)
-        stride_y = max(1, tile_height - overlap)
-
-        y = 0
-        while y < height:
-            x = 0
-            y_end = min(y + tile_height, height)
-            y_start = max(0, y_end - tile_height)
-
-            while x < width:
-                x_end = min(x + tile_width, width)
-                x_start = max(0, x_end - tile_width)
-
-                coords.append((x_start, y_start, x_end, y_end))
-
-                if x_end >= width:
-                    break
-                x += stride_x
-
-            if y_end >= height:
-                break
-            y += stride_y
-
-        return coords
-
-    @classmethod
-    def execute(cls, image, tile_width, tile_height, overlap):
-        b, h, w, c = image.shape
-        coords = cls.get_grid_coords(w, h, tile_width, tile_height, overlap)
-
-        output_list = []
-        for (x_start, y_start, x_end, y_end) in coords:
-            tile = image[:, y_start:y_end, x_start:x_end, :]
-            output_list.append(tile)
-
-        return IO.NodeOutput(output_list)
-
-
-class ImageMergeTileList(IO.ComfyNode):
-    @classmethod
-    def define_schema(cls):
-        return IO.Schema(
-            node_id="ImageMergeTileList",
-            display_name="Merge List of Tiles to Image",
-            category="image/batch",
-            search_aliases=["split image", "tile image", "slice image"],
-            is_input_list=True,
-            inputs=[
-                IO.Image.Input("image_list"),
-                IO.Int.Input("final_width", default=1024, min=64, max=32768),
-                IO.Int.Input("final_height", default=1024, min=64, max=32768),
-                IO.Int.Input("overlap", default=128, min=0, max=4096),
-            ],
-            outputs=[
-                IO.Image.Output(is_output_list=False),
-            ],
-        )
-
-    @staticmethod
-    def get_grid_coords(width, height, tile_width, tile_height, overlap):
-        coords = []
-        stride_x = max(1, tile_width - overlap)
-        stride_y = max(1, tile_height - overlap)
-
-        y = 0
-        while y < height:
-            x = 0
-            y_end = min(y + tile_height, height)
-            y_start = max(0, y_end - tile_height)
-
-            while x < width:
-                x_end = min(x + tile_width, width)
-                x_start = max(0, x_end - tile_width)
-
-                coords.append((x_start, y_start, x_end, y_end))
-
-                if x_end >= width:
-                    break
-                x += stride_x
-
-            if y_end >= height:
-                break
-            y += stride_y
-
-        return coords
-
-    @classmethod
-    def execute(cls, image_list, final_width, final_height, overlap):
-        w = final_width[0]
-        h = final_height[0]
-        ovlp = overlap[0]
-        feather_str = 1.0
-
-        first_tile = image_list[0]
-        b, t_h, t_w, c = first_tile.shape
-        device = first_tile.device
-        dtype = first_tile.dtype
-
-        coords = cls.get_grid_coords(w, h, t_w, t_h, ovlp)
-
-        canvas = torch.zeros((b, h, w, c), device=device, dtype=dtype)
-        weights = torch.zeros((b, h, w, 1), device=device, dtype=dtype)
-
-        if ovlp > 0:
-            y_w = torch.sin(math.pi * torch.linspace(0, 1, t_h, device=device, dtype=dtype))
-            x_w = torch.sin(math.pi * torch.linspace(0, 1, t_w, device=device, dtype=dtype))
-            y_w = torch.clamp(y_w, min=1e-5)
-            x_w = torch.clamp(x_w, min=1e-5)
-
-            sine_mask = (y_w.unsqueeze(1) * x_w.unsqueeze(0)).unsqueeze(0).unsqueeze(-1)
-            flat_mask = torch.ones_like(sine_mask)
-
-            weight_mask = torch.lerp(flat_mask, sine_mask, feather_str)
-        else:
-            weight_mask = torch.ones((1, t_h, t_w, 1), device=device, dtype=dtype)
-
-        for i, (x_start, y_start, x_end, y_end) in enumerate(coords):
-            if i >= len(image_list):
-                break
-
-            tile = image_list[i]
-
-            region_h = y_end - y_start
-            region_w = x_end - x_start
-
-            real_h = min(region_h, tile.shape[1])
-            real_w = min(region_w, tile.shape[2])
-
-            y_end_actual = y_start + real_h
-            x_end_actual = x_start + real_w
-
-            tile_crop = tile[:, :real_h, :real_w, :]
-            mask_crop = weight_mask[:, :real_h, :real_w, :]
-
-            canvas[:, y_start:y_end_actual, x_start:x_end_actual, :] += tile_crop * mask_crop
-            weights[:, y_start:y_end_actual, x_start:x_end_actual, :] += mask_crop
-
-        weights[weights == 0] = 1.0
-        merged_image = canvas / weights
-
-        return IO.NodeOutput(merged_image)
-
-
 class ImagesExtension(ComfyExtension):
    @override
    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
@@ -868,8 +701,6 @@ class ImagesExtension(ComfyExtension):
            ImageRotate,
            ImageFlip,
            ImageScaleToMaxDimension,
-            SplitImageToTileList,
-            ImageMergeTileList,
        ]


--- a/comfy_extras/nodes_nag.py
+++ b/comfy_extras/nodes_nag.py
@@ -10,7 +10,7 @@ class NAGuidance(io.ComfyNode):
            node_id="NAGuidance",
            display_name="Normalized Attention Guidance",
            description="Applies Normalized Attention Guidance to models, enabling negative prompts on distilled/schnell models.",
-            category="advanced/guidance",
+            category="",
            is_experimental=True,
            inputs=[
                io.Model.Input("model", tooltip="The model to apply NAG to."),
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
-comfyui-frontend-package==1.39.16
-comfyui-workflow-templates==0.9.2
+comfyui-frontend-package==1.39.14
+comfyui-workflow-templates==0.8.43
 comfyui-embedded-docs==0.4.1
 torch
 torchsde