fix: move _initialized flag to end of GLContext.__init__

Prevents '_vao' attribute error when init fails partway through and subsequent calls skip initialization due to early _initialized flag.
Merge remote-tracking branch 'comfy-org/master' into test-glsl-nodes
2026-04-05 04:09:08 +00:00 · 2026-02-14 22:27:30 -08:00 · 2026-02-14 20:20:56 -08:00 · 2026-02-04 10:20:24 -08:00 · 2026-01-31 16:43:57 -08:00 · 2026-01-31 16:32:00 -08:00
179 changed files with 697 additions and 6266 deletions
--- a/.coderabbit.yaml
+++ b/.coderabbit.yaml
@@ -1,127 +0,0 @@
-# yaml-language-server: $schema=https://coderabbit.ai/integrations/schema.v2.json
-language: "en-US"
-early_access: false
-tone_instructions: "Only comment on issues introduced by this PR's changes. Do not flag pre-existing problems in moved, re-indented, or reformatted code."
-
-reviews:
-  profile: "chill"
-  request_changes_workflow: false
-  high_level_summary: false
-  poem: false
-  review_status: false
-  review_details: false
-  commit_status: true
-  collapse_walkthrough: true
-  changed_files_summary: false
-  sequence_diagrams: false
-  estimate_code_review_effort: false
-  assess_linked_issues: false
-  related_issues: false
-  related_prs: false
-  suggested_labels: false
-  auto_apply_labels: false
-  suggested_reviewers: false
-  auto_assign_reviewers: false
-  in_progress_fortune: false
-  enable_prompt_for_ai_agents: true
-
-  path_filters:
-    - "!comfy_api_nodes/apis/**"
-    - "!**/generated/*.pyi"
-    - "!.ci/**"
-    - "!script_examples/**"
-    - "!**/__pycache__/**"
-    - "!**/*.ipynb"
-    - "!**/*.png"
-    - "!**/*.bat"
-
-  path_instructions:
-    - path: "**"
-      instructions: |
-        IMPORTANT: Only comment on issues directly introduced by this PR's code changes.
-        Do NOT flag pre-existing issues in code that was merely moved, re-indented,
-        de-indented, or reformatted without logic changes. If code appears in the diff
-        only due to whitespace or structural reformatting (e.g., removing a `with:` block),
-        treat it as unchanged. Contributors should not feel obligated to address
-        pre-existing issues outside the scope of their contribution.
-    - path: "comfy/**"
-      instructions: |
-        Core ML/diffusion engine. Focus on:
-        - Backward compatibility (breaking changes affect all custom nodes)
-        - Memory management and GPU resource handling
-        - Performance implications in hot paths
-        - Thread safety for concurrent execution
-    - path: "comfy_api_nodes/**"
-      instructions: |
-        Third-party API integration nodes. Focus on:
-        - No hardcoded API keys or secrets
-        - Proper error handling for API failures (timeouts, rate limits, auth errors)
-        - Correct Pydantic model usage
-        - Security of user data passed to external APIs
-    - path: "comfy_extras/**"
-      instructions: |
-        Community-contributed extra nodes. Focus on:
-        - Consistency with node patterns (INPUT_TYPES, RETURN_TYPES, FUNCTION, CATEGORY)
-        - No breaking changes to existing node interfaces
-    - path: "comfy_execution/**"
-      instructions: |
-        Execution engine (graph execution, caching, jobs). Focus on:
-        - Caching correctness
-        - Concurrent execution safety
-        - Graph validation edge cases
-    - path: "nodes.py"
-      instructions: |
-        Core node definitions (2500+ lines). Focus on:
-        - Backward compatibility of NODE_CLASS_MAPPINGS
-        - Consistency of INPUT_TYPES return format
-    - path: "alembic_db/**"
-      instructions: |
-        Database migrations. Focus on:
-        - Migration safety and rollback support
-        - Data preservation during schema changes
-
-  auto_review:
-    enabled: true
-    auto_incremental_review: true
-    drafts: false
-    ignore_title_keywords:
-      - "WIP"
-      - "DO NOT REVIEW"
-      - "DO NOT MERGE"
-
-  finishing_touches:
-    docstrings:
-      enabled: false
-    unit_tests:
-      enabled: false
-
-  tools:
-    ruff:
-      enabled: false
-    pylint:
-      enabled: false
-    flake8:
-      enabled: false
-    gitleaks:
-      enabled: true
-    shellcheck:
-      enabled: false
-    markdownlint:
-      enabled: false
-    yamllint:
-      enabled: false
-    languagetool:
-      enabled: false
-    github-checks:
-      enabled: true
-      timeout_ms: 90000
-    ast-grep:
-      essential_rules: true
-
-chat:
-  auto_reply: true
-
-knowledge_base:
-  opt_out: false
-  learnings:
-    scope: "auto"
--- a/.github/ISSUE_TEMPLATE/bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -16,7 +16,7 @@ body:

        ## Very Important

-        Please make sure that you post ALL your ComfyUI logs in the bug report **even if there is no crash**. Just paste everything. The startup log (everything before "To see the GUI go to: ...") contains critical information to developers trying to help. For a performance issue or crash, paste everything from "got prompt" to the end, including the crash. More is better - always. A bug report without logs will likely be ignored.
+        Please make sure that you post ALL your ComfyUI logs in the bug report. A bug report without logs will likely be ignored.
  - type: checkboxes
    id: custom-nodes-test
    attributes:
--- a/.gitignore
+++ b/.gitignore
@@ -11,7 +11,7 @@ extra_model_paths.yaml
 /.vs
 .vscode/
 .idea/
-venv*/
+venv/
 .venv/
 /web/extensions/*
 !/web/extensions/logging.js.example
--- a/README.md
+++ b/README.md
@@ -189,6 +189,8 @@ The portable above currently comes with python 3.13 and pytorch cuda 13.0. Updat

 [Experimental portable for AMD GPUs](https://github.com/comfyanonymous/ComfyUI/releases/latest/download/ComfyUI_windows_portable_amd.7z)

+[Portable with pytorch cuda 12.8 and python 3.12](https://github.com/comfyanonymous/ComfyUI/releases/latest/download/ComfyUI_windows_portable_nvidia_cu128.7z).
+
 [Portable with pytorch cuda 12.6 and python 3.12](https://github.com/comfyanonymous/ComfyUI/releases/latest/download/ComfyUI_windows_portable_nvidia_cu126.7z) (Supports Nvidia 10 series and older GPUs).

 #### How do I share models between another UI and ComfyUI?
@@ -227,9 +229,9 @@ AMD users can install rocm and pytorch with pip if you don't have it already ins

 ```pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm7.1```

-This is the command to install the nightly with ROCm 7.2 which might have some performance improvements:
+This is the command to install the nightly with ROCm 7.1 which might have some performance improvements:

-```pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/rocm7.2```
+```pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/rocm7.1```


 ### AMD GPUs (Experimental: Windows and Linux), RDNA 3, 3.5 and 4 only.
--- a/app/node_replace_manager.py
+++ b/app/node_replace_manager.py
@@ -1,107 +0,0 @@
-from __future__ import annotations
-
-from aiohttp import web
-
-from typing import TYPE_CHECKING, TypedDict
-if TYPE_CHECKING:
-    from comfy_api.latest._io_public import NodeReplace
-
-from comfy_execution.graph_utils import is_link
-import nodes
-
-class NodeStruct(TypedDict):
-    inputs: dict[str, str | int | float | bool | tuple[str, int]]
-    class_type: str
-    _meta: dict[str, str]
-
-def copy_node_struct(node_struct: NodeStruct, empty_inputs: bool = False) -> NodeStruct:
-    new_node_struct = node_struct.copy()
-    if empty_inputs:
-        new_node_struct["inputs"] = {}
-    else:
-        new_node_struct["inputs"] = node_struct["inputs"].copy()
-    new_node_struct["_meta"] = node_struct["_meta"].copy()
-    return new_node_struct
-
-
-class NodeReplaceManager:
-    """Manages node replacement registrations."""
-
-    def __init__(self):
-        self._replacements: dict[str, list[NodeReplace]] = {}
-
-    def register(self, node_replace: NodeReplace):
-        """Register a node replacement mapping."""
-        self._replacements.setdefault(node_replace.old_node_id, []).append(node_replace)
-
-    def get_replacement(self, old_node_id: str) -> list[NodeReplace] | None:
-        """Get replacements for an old node ID."""
-        return self._replacements.get(old_node_id)
-
-    def has_replacement(self, old_node_id: str) -> bool:
-        """Check if a replacement exists for an old node ID."""
-        return old_node_id in self._replacements
-
-    def apply_replacements(self, prompt: dict[str, NodeStruct]):
-        connections: dict[str, list[tuple[str, str, int]]] = {}
-        need_replacement: set[str] = set()
-        for node_number, node_struct in prompt.items():
-            if "class_type" not in node_struct or "inputs" not in node_struct:
-                continue
-            class_type = node_struct["class_type"]
-            # need replacement if not in NODE_CLASS_MAPPINGS and has replacement
-            if class_type not in nodes.NODE_CLASS_MAPPINGS.keys() and self.has_replacement(class_type):
-                need_replacement.add(node_number)
-            # keep track of connections
-            for input_id, input_value in node_struct["inputs"].items():
-                if is_link(input_value):
-                    conn_number = input_value[0]
-                    connections.setdefault(conn_number, []).append((node_number, input_id, input_value[1]))
-        for node_number in need_replacement:
-            node_struct = prompt[node_number]
-            class_type = node_struct["class_type"]
-            replacements = self.get_replacement(class_type)
-            if replacements is None:
-                continue
-            # just use the first replacement
-            replacement = replacements[0]
-            new_node_id = replacement.new_node_id
-            # if replacement is not a valid node, skip trying to replace it as will only cause confusion
-            if new_node_id not in nodes.NODE_CLASS_MAPPINGS.keys():
-                continue
-            # first, replace node id (class_type)
-            new_node_struct = copy_node_struct(node_struct, empty_inputs=True)
-            new_node_struct["class_type"] = new_node_id
-            # TODO: consider replacing display_name in _meta as well for error reporting purposes; would need to query node schema
-            # second, replace inputs
-            if replacement.input_mapping is not None:
-                for input_map in replacement.input_mapping:
-                    if "set_value" in input_map:
-                        new_node_struct["inputs"][input_map["new_id"]] = input_map["set_value"]
-                    elif "old_id" in input_map:
-                        new_node_struct["inputs"][input_map["new_id"]] = node_struct["inputs"][input_map["old_id"]]
-            # finalize input replacement
-            prompt[node_number] = new_node_struct
-            # third, replace outputs
-            if replacement.output_mapping is not None:
-                # re-mapping outputs requires changing the input values of nodes that receive connections from this one
-                if node_number in connections:
-                    for conns in connections[node_number]:
-                        conn_node_number, conn_input_id, old_output_idx = conns
-                        for output_map in replacement.output_mapping:
-                            if output_map["old_idx"] == old_output_idx:
-                                new_output_idx = output_map["new_idx"]
-                                previous_input = prompt[conn_node_number]["inputs"][conn_input_id]
-                                previous_input[1] = new_output_idx
-
-    def as_dict(self):
-        """Serialize all replacements to dict."""
-        return {
-            k: [v.as_dict() for v in v_list]
-            for k, v_list in self._replacements.items()
-        }
-
-    def add_routes(self, routes):
-        @routes.get("/node_replacements")
-        async def get_node_replacements(request):
-            return web.json_response(self.as_dict())
--- a/app/subgraph_manager.py
+++ b/app/subgraph_manager.py
@@ -53,7 +53,7 @@ class SubgraphManager:
        return entry_id, entry

    async def load_entry_data(self, entry: SubgraphEntry):
-        with open(entry['path'], 'r', encoding='utf-8') as f:
+        with open(entry['path'], 'r') as f:
            entry['data'] = f.read()
        return entry

--- a/blueprints/.glsl/Hue_and_Saturation_1.frag
+++ b/blueprints/.glsl/Hue_and_Saturation_1.frag
@@ -169,7 +169,7 @@ vec3 colorize(vec3 rgb, float hue, float sat, float light) {
    float lum = dot(rgb, vec3(0.299, 0.587, 0.114));
    float l = adjustLightness(lum, light);

-    vec3 hsl = vec3(fract(hue), clamp(sat, 0.0, 1.0), clamp(l, 0.0, 1.0));
+    vec3 hsl = vec3(fract(hue), clamp(abs(sat), 0.0, 1.0), clamp(l, 0.0, 1.0));
    return hsl2rgb(hsl);
 }

--- a/blueprints/Brightness
+++ b/blueprints/Brightness
--- a/(Z-Image-Turbo).json
+++ b/(Z-Image-Turbo).json
--- a/blueprints/Canny
+++ b/blueprints/Canny
--- a/blueprints/Chromatic
+++ b/blueprints/Chromatic
--- a/blueprints/Color
+++ b/blueprints/Color
--- a/(Z-Image-Turbo).json
+++ b/(Z-Image-Turbo).json
--- a/blueprints/Depth
+++ b/blueprints/Depth
--- a/blueprints/Edge-Preserving
+++ b/blueprints/Edge-Preserving
--- a/blueprints/Film
+++ b/blueprints/Film
--- a/blueprints/Glow.json
+++ b/blueprints/Glow.json
--- a/Saturation.json
+++ b/Saturation.json
--- a/blueprints/Image
+++ b/blueprints/Image
--- a/blueprints/Image
+++ b/blueprints/Image
--- a/blueprints/Image
+++ b/blueprints/Image
@@ -1 +1 @@
-{"revision": 0, "last_node_id": 29, "last_link_id": 0, "nodes": [{"id": 29, "type": "4c9d6ea4-b912-40e5-8766-6793a9758c53", "pos": [1970, -230], "size": [180, 86], "flags": {}, "order": 5, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "R", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}, {"label": "G", "localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": []}, {"label": "B", "localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": []}, {"label": "A", "localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": []}], "title": "Image Channels", "properties": {"proxyWidgets": []}, "widgets_values": []}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "4c9d6ea4-b912-40e5-8766-6793a9758c53", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 28, "lastLinkId": 39, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Image Channels", "inputNode": {"id": -10, "bounding": [1820, -185, 120, 60]}, "outputNode": {"id": -20, "bounding": [2460, -215, 120, 120]}, "inputs": [{"id": "3522932b-2d86-4a1f-a02a-cb29f3a9d7fe", "name": "images.image0", "type": "IMAGE", "linkIds": [39], "localized_name": "images.image0", "label": "image", "pos": [1920, -165]}], "outputs": [{"id": "605cb9c3-b065-4d9b-81d2-3ec331889b2b", "name": "IMAGE0", "type": "IMAGE", "linkIds": [26], "localized_name": "IMAGE0", "label": "R", "pos": [2480, -195]}, {"id": "fb44a77e-0522-43e9-9527-82e7465b3596", "name": "IMAGE1", "type": "IMAGE", "linkIds": [27], "localized_name": "IMAGE1", "label": "G", "pos": [2480, -175]}, {"id": "81460ee6-0131-402a-874f-6bf3001fc4ff", "name": "IMAGE2", "type": "IMAGE", "linkIds": [28], "localized_name": "IMAGE2", "label": "B", "pos": [2480, -155]}, {"id": "ae690246-80d4-4951-b1d9-9306d8a77417", "name": "IMAGE3", "type": "IMAGE", "linkIds": [29], "localized_name": "IMAGE3", "label": "A", "pos": [2480, -135]}], "widgets": [], "nodes": [{"id": 23, "type": "GLSLShader", "pos": [2000, -330], "size": [400, 172], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 39}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}], "outputs": [{"label": "R", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [26]}, {"label": "G", "localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": [27]}, {"label": "B", "localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": [28]}, {"label": "A", "localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": [29]}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\nlayout(location = 1) out vec4 fragColor1;\nlayout(location = 2) out vec4 fragColor2;\nlayout(location = 3) out vec4 fragColor3;\n\nvoid main() {\n  vec4 color = texture(u_image0, v_texCoord);\n  // Output each channel as grayscale to separate render targets\n  fragColor0 = vec4(vec3(color.r), 1.0);  // Red channel\n  fragColor1 = vec4(vec3(color.g), 1.0);  // Green channel\n  fragColor2 = vec4(vec3(color.b), 1.0);  // Blue channel\n  fragColor3 = vec4(vec3(color.a), 1.0);  // Alpha channel\n}\n", "from_input"]}], "groups": [], "links": [{"id": 39, "origin_id": -10, "origin_slot": 0, "target_id": 23, "target_slot": 0, "type": "IMAGE"}, {"id": 26, "origin_id": 23, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 27, "origin_id": 23, "origin_slot": 1, "target_id": -20, "target_slot": 1, "type": "IMAGE"}, {"id": 28, "origin_id": 23, "origin_slot": 2, "target_id": -20, "target_slot": 2, "type": "IMAGE"}, {"id": 29, "origin_id": 23, "origin_slot": 3, "target_id": -20, "target_slot": 3, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image Tools/Color adjust"}]}}
+{"revision": 0, "last_node_id": 29, "last_link_id": 0, "nodes": [{"id": 29, "type": "4c9d6ea4-b912-40e5-8766-6793a9758c53", "pos": [1970, -230], "size": [180, 86], "flags": {}, "order": 5, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "R", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}, {"label": "G", "localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": []}, {"label": "B", "localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": []}, {"label": "A", "localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": []}], "title": "Image Channels", "properties": {"proxyWidgets": []}, "widgets_values": []}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "4c9d6ea4-b912-40e5-8766-6793a9758c53", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 28, "lastLinkId": 39, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Image Channels", "inputNode": {"id": -10, "bounding": [1820, -185, 120, 60]}, "outputNode": {"id": -20, "bounding": [2460, -215, 120, 120]}, "inputs": [{"id": "3522932b-2d86-4a1f-a02a-cb29f3a9d7fe", "name": "images.image0", "type": "IMAGE", "linkIds": [39], "localized_name": "images.image0", "label": "image", "pos": [1920, -165]}], "outputs": [{"id": "605cb9c3-b065-4d9b-81d2-3ec331889b2b", "name": "IMAGE0", "type": "IMAGE", "linkIds": [26], "localized_name": "IMAGE0", "label": "R", "pos": [2480, -195]}, {"id": "fb44a77e-0522-43e9-9527-82e7465b3596", "name": "IMAGE1", "type": "IMAGE", "linkIds": [27], "localized_name": "IMAGE1", "label": "G", "pos": [2480, -175]}, {"id": "81460ee6-0131-402a-874f-6bf3001fc4ff", "name": "IMAGE2", "type": "IMAGE", "linkIds": [28], "localized_name": "IMAGE2", "label": "B", "pos": [2480, -155]}, {"id": "ae690246-80d4-4951-b1d9-9306d8a77417", "name": "IMAGE3", "type": "IMAGE", "linkIds": [29], "localized_name": "IMAGE3", "label": "A", "pos": [2480, -135]}], "widgets": [], "nodes": [{"id": 23, "type": "GLSLShader", "pos": [2000, -330], "size": [400, 172], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 39}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}], "outputs": [{"label": "R", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [26]}, {"label": "G", "localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": [27]}, {"label": "B", "localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": [28]}, {"label": "A", "localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": [29]}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\nlayout(location = 1) out vec4 fragColor1;\nlayout(location = 2) out vec4 fragColor2;\nlayout(location = 3) out vec4 fragColor3;\n\nvoid main() {\n  vec4 color = texture(u_image0, v_texCoord);\n  // Output each channel as grayscale to separate render targets\n  fragColor0 = vec4(vec3(color.r), 1.0);  // Red channel\n  fragColor1 = vec4(vec3(color.g), 1.0);  // Green channel\n  fragColor2 = vec4(vec3(color.b), 1.0);  // Blue channel\n  fragColor3 = vec4(vec3(color.a), 1.0);  // Alpha channel\n}\n", "from_input"]}], "groups": [], "links": [{"id": 39, "origin_id": -10, "origin_slot": 0, "target_id": 23, "target_slot": 0, "type": "IMAGE"}, {"id": 26, "origin_id": 23, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 27, "origin_id": 23, "origin_slot": 1, "target_id": -20, "target_slot": 1, "type": "IMAGE"}, {"id": 28, "origin_id": 23, "origin_slot": 2, "target_id": -20, "target_slot": 2, "type": "IMAGE"}, {"id": 29, "origin_id": 23, "origin_slot": 3, "target_id": -20, "target_slot": 3, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}}]}}
--- a/blueprints/Image
+++ b/blueprints/Image
--- a/blueprints/Image
+++ b/blueprints/Image
--- a/(Qwen-image).json
+++ b/(Qwen-image).json
--- a/blueprints/Image
+++ b/blueprints/Image
--- a/(Qwen-Image).json
+++ b/(Qwen-Image).json
--- a/Upscale(Z-image-Turbo).json
+++ b/Upscale(Z-image-Turbo).json
--- a/blueprints/Image
+++ b/blueprints/Image
--- a/blueprints/Image
+++ b/blueprints/Image
--- a/blueprints/Image
+++ b/blueprints/Image
--- a/blueprints/Image
+++ b/blueprints/Image
--- a/(Z-Image-Turbo).json
+++ b/(Z-Image-Turbo).json
--- a/blueprints/Pose
+++ b/blueprints/Pose
--- a/blueprints/Prompt
+++ b/blueprints/Prompt
@@ -1 +0,0 @@
-{"revision": 0, "last_node_id": 15, "last_link_id": 0, "nodes": [{"id": 15, "type": "24d8bbfd-39d4-4774-bff0-3de40cc7a471", "pos": [-1490, 2040], "size": [400, 260], "flags": {}, "order": 0, "mode": 0, "inputs": [{"name": "prompt", "type": "STRING", "widget": {"name": "prompt"}, "link": null}, {"label": "reference images", "name": "images", "type": "IMAGE", "link": null}], "outputs": [{"name": "STRING", "type": "STRING", "links": null}], "title": "Prompt Enhance", "properties": {"proxyWidgets": [["-1", "prompt"]], "cnr_id": "comfy-core", "ver": "0.14.1"}, "widgets_values": [""]}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "24d8bbfd-39d4-4774-bff0-3de40cc7a471", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 15, "lastLinkId": 14, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Prompt Enhance", "inputNode": {"id": -10, "bounding": [-2170, 2110, 138.876953125, 80]}, "outputNode": {"id": -20, "bounding": [-640, 2110, 120, 60]}, "inputs": [{"id": "aeab7216-00e0-4528-a09b-bba50845c5a6", "name": "prompt", "type": "STRING", "linkIds": [11], "pos": [-2051.123046875, 2130]}, {"id": "7b73fd36-aa31-4771-9066-f6c83879994b", "name": "images", "type": "IMAGE", "linkIds": [14], "label": "reference images", "pos": [-2051.123046875, 2150]}], "outputs": [{"id": "c7b0d930-68a1-48d1-b496-0519e5837064", "name": "STRING", "type": "STRING", "linkIds": [13], "pos": [-620, 2130]}], "widgets": [], "nodes": [{"id": 11, "type": "GeminiNode", "pos": [-1560, 1990], "size": [470, 470], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "shape": 7, "type": "IMAGE", "link": 14}, {"localized_name": "audio", "name": "audio", "shape": 7, "type": "AUDIO", "link": null}, {"localized_name": "video", "name": "video", "shape": 7, "type": "VIDEO", "link": null}, {"localized_name": "files", "name": "files", "shape": 7, "type": "GEMINI_INPUT_FILES", "link": null}, {"localized_name": "prompt", "name": "prompt", "type": "STRING", "widget": {"name": "prompt"}, "link": 11}, {"localized_name": "model", "name": "model", "type": "COMBO", "widget": {"name": "model"}, "link": null}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": null}, {"localized_name": "system_prompt", "name": "system_prompt", "shape": 7, "type": "STRING", "widget": {"name": "system_prompt"}, "link": null}], "outputs": [{"localized_name": "STRING", "name": "STRING", "type": "STRING", "links": [13]}], "properties": {"cnr_id": "comfy-core", "ver": "0.14.1", "Node name for S&R": "GeminiNode"}, "widgets_values": ["", "gemini-3-pro-preview", 42, "randomize", "You are an expert in prompt writing.\nBased on the input, rewrite the user's input into a detailed prompt.\nincluding camera settings, lighting, composition, and style.\nReturn the prompt only"], "color": "#432", "bgcolor": "#653"}], "groups": [], "links": [{"id": 11, "origin_id": -10, "origin_slot": 0, "target_id": 11, "target_slot": 4, "type": "STRING"}, {"id": 13, "origin_id": 11, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "STRING"}, {"id": 14, "origin_id": -10, "origin_slot": 1, "target_id": 11, "target_slot": 0, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Text generation/Prompt enhance"}]}, "extra": {}}
--- a/blueprints/Sharpen.json
+++ b/blueprints/Sharpen.json
@@ -1 +1 @@
-{"revision": 0, "last_node_id": 25, "last_link_id": 0, "nodes": [{"id": 25, "type": "621ba4e2-22a8-482d-a369-023753198b7b", "pos": [4610, -790], "size": [230, 58], "flags": {}, "order": 4, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "IMAGE", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}], "title": "Sharpen", "properties": {"proxyWidgets": [["24", "value"]]}, "widgets_values": []}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "621ba4e2-22a8-482d-a369-023753198b7b", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 24, "lastLinkId": 36, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Sharpen", "inputNode": {"id": -10, "bounding": [4090, -825, 120, 60]}, "outputNode": {"id": -20, "bounding": [5150, -825, 120, 60]}, "inputs": [{"id": "37011fb7-14b7-4e0e-b1a0-6a02e8da1fd7", "name": "images.image0", "type": "IMAGE", "linkIds": [34], "localized_name": "images.image0", "label": "image", "pos": [4190, -805]}], "outputs": [{"id": "e9182b3f-635c-4cd4-a152-4b4be17ae4b9", "name": "IMAGE0", "type": "IMAGE", "linkIds": [35], "localized_name": "IMAGE0", "label": "IMAGE", "pos": [5170, -805]}], "widgets": [], "nodes": [{"id": 24, "type": "PrimitiveFloat", "pos": [4280, -1240], "size": [270, 58], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "strength", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [36]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 3, "precision": 2, "step": 0.05}, "widgets_values": [0.5]}, {"id": 23, "type": "GLSLShader", "pos": [4570, -1240], "size": [370, 192], "flags": {}, "order": 1, "mode": 0, "inputs": [{"label": "image0", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 34}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}, {"label": "u_float0", "localized_name": "floats.u_float0", "name": "floats.u_float0", "shape": 7, "type": "FLOAT", "link": 36}, {"label": "u_float1", "localized_name": "floats.u_float1", "name": "floats.u_float1", "shape": 7, "type": "FLOAT", "link": null}, {"label": "u_int0", "localized_name": "ints.u_int0", "name": "ints.u_int0", "shape": 7, "type": "INT", "link": null}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}], "outputs": [{"localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [35]}, {"localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": null}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform vec2 u_resolution;\nuniform float u_float0;  // strength [0.0 – 2.0] typical: 0.3–1.0\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\nvoid main() {\n    vec2 texel = 1.0 / u_resolution;\n    \n    // Sample center and neighbors\n    vec4 center = texture(u_image0, v_texCoord);\n    vec4 top    = texture(u_image0, v_texCoord + vec2( 0.0, -texel.y));\n    vec4 bottom = texture(u_image0, v_texCoord + vec2( 0.0,  texel.y));\n    vec4 left   = texture(u_image0, v_texCoord + vec2(-texel.x,  0.0));\n    vec4 right  = texture(u_image0, v_texCoord + vec2( texel.x,  0.0));\n    \n    // Edge enhancement (Laplacian)\n    vec4 edges = center * 4.0 - top - bottom - left - right;\n    \n    // Add edges back scaled by strength\n    vec4 sharpened = center + edges * u_float0;\n    \n    fragColor0 = vec4(clamp(sharpened.rgb, 0.0, 1.0), center.a);\n}", "from_input"]}], "groups": [], "links": [{"id": 36, "origin_id": 24, "origin_slot": 0, "target_id": 23, "target_slot": 2, "type": "FLOAT"}, {"id": 34, "origin_id": -10, "origin_slot": 0, "target_id": 23, "target_slot": 0, "type": "IMAGE"}, {"id": 35, "origin_id": 23, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image Tools/Sharpen"}]}}
+{"revision":0,"last_node_id":25,"last_link_id":0,"nodes":[{"id":25,"type":"621ba4e2-22a8-482d-a369-023753198b7b","pos":[4610,-790],"size":[230,58],"flags":{},"order":4,"mode":0,"inputs":[{"label":"image","localized_name":"images.image0","name":"images.image0","type":"IMAGE","link":null}],"outputs":[{"label":"IMAGE","localized_name":"IMAGE0","name":"IMAGE0","type":"IMAGE","links":[]}],"title":"Sharpen","properties":{"proxyWidgets":[["24","value"]]},"widgets_values":[]}],"links":[],"version":0.4,"definitions":{"subgraphs":[{"id":"621ba4e2-22a8-482d-a369-023753198b7b","version":1,"state":{"lastGroupId":0,"lastNodeId":24,"lastLinkId":36,"lastRerouteId":0},"revision":0,"config":{},"name":"Sharpen","inputNode":{"id":-10,"bounding":[4090,-825,120,60]},"outputNode":{"id":-20,"bounding":[5150,-825,120,60]},"inputs":[{"id":"37011fb7-14b7-4e0e-b1a0-6a02e8da1fd7","name":"images.image0","type":"IMAGE","linkIds":[34],"localized_name":"images.image0","label":"image","pos":[4190,-805]}],"outputs":[{"id":"e9182b3f-635c-4cd4-a152-4b4be17ae4b9","name":"IMAGE0","type":"IMAGE","linkIds":[35],"localized_name":"IMAGE0","label":"IMAGE","pos":[5170,-805]}],"widgets":[],"nodes":[{"id":24,"type":"PrimitiveFloat","pos":[4280,-1240],"size":[270,58],"flags":{},"order":0,"mode":0,"inputs":[{"label":"strength","localized_name":"value","name":"value","type":"FLOAT","widget":{"name":"value"},"link":null}],"outputs":[{"localized_name":"FLOAT","name":"FLOAT","type":"FLOAT","links":[36]}],"properties":{"Node name for S&R":"PrimitiveFloat","min":0,"max":3,"precision":2,"step":0.05},"widgets_values":[0.5]},{"id":23,"type":"GLSLShader","pos":[4570,-1240],"size":[370,192],"flags":{},"order":1,"mode":0,"inputs":[{"label":"image0","localized_name":"images.image0","name":"images.image0","type":"IMAGE","link":34},{"label":"image1","localized_name":"images.image1","name":"images.image1","shape":7,"type":"IMAGE","link":null},{"label":"u_float0","localized_name":"floats.u_float0","name":"floats.u_float0","shape":7,"type":"FLOAT","link":36},{"label":"u_float1","localized_name":"floats.u_float1","name":"floats.u_float1","shape":7,"type":"FLOAT","link":null},{"label":"u_int0","localized_name":"ints.u_int0","name":"ints.u_int0","shape":7,"type":"INT","link":null},{"localized_name":"fragment_shader","name":"fragment_shader","type":"STRING","widget":{"name":"fragment_shader"},"link":null},{"localized_name":"size_mode","name":"size_mode","type":"COMFY_DYNAMICCOMBO_V3","widget":{"name":"size_mode"},"link":null}],"outputs":[{"localized_name":"IMAGE0","name":"IMAGE0","type":"IMAGE","links":[35]},{"localized_name":"IMAGE1","name":"IMAGE1","type":"IMAGE","links":null},{"localized_name":"IMAGE2","name":"IMAGE2","type":"IMAGE","links":null},{"localized_name":"IMAGE3","name":"IMAGE3","type":"IMAGE","links":null}],"properties":{"Node name for S&R":"GLSLShader"},"widgets_values":["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform vec2 u_resolution;\nuniform float u_float0;  // strength [0.0 – 2.0] typical: 0.3–1.0\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\nvoid main() {\n    vec2 texel = 1.0 / u_resolution;\n    \n    // Sample center and neighbors\n    vec4 center = texture(u_image0, v_texCoord);\n    vec4 top    = texture(u_image0, v_texCoord + vec2( 0.0, -texel.y));\n    vec4 bottom = texture(u_image0, v_texCoord + vec2( 0.0,  texel.y));\n    vec4 left   = texture(u_image0, v_texCoord + vec2(-texel.x,  0.0));\n    vec4 right  = texture(u_image0, v_texCoord + vec2( texel.x,  0.0));\n    \n    // Edge enhancement (Laplacian)\n    vec4 edges = center * 4.0 - top - bottom - left - right;\n    \n    // Add edges back scaled by strength\n    vec4 sharpened = center + edges * u_float0;\n    \n    fragColor0 = vec4(clamp(sharpened.rgb, 0.0, 1.0), center.a);\n}","from_input"]}],"groups":[],"links":[{"id":36,"origin_id":24,"origin_slot":0,"target_id":23,"target_slot":2,"type":"FLOAT"},{"id":34,"origin_id":-10,"origin_slot":0,"target_id":23,"target_slot":0,"type":"IMAGE"},{"id":35,"origin_id":23,"origin_slot":0,"target_id":-20,"target_slot":0,"type":"IMAGE"}],"extra":{"workflowRendererVersion":"LG"}}]}}
--- a/blueprints/Text
+++ b/blueprints/Text
--- a/(Z-Image-Turbo).json
+++ b/(Z-Image-Turbo).json
--- a/blueprints/Text
+++ b/blueprints/Text
--- a/blueprints/Unsharp
+++ b/blueprints/Unsharp
--- a/blueprints/Video
+++ b/blueprints/Video
--- a/blueprints/Video
+++ b/blueprints/Video
--- a/blueprints/Video
+++ b/blueprints/Video
--- a/blueprints/Video
+++ b/blueprints/Video
@@ -1 +0,0 @@
-{"revision": 0, "last_node_id": 13, "last_link_id": 0, "nodes": [{"id": 13, "type": "cf95b747-3e17-46cb-8097-cac60ff9b2e1", "pos": [1120, 330], "size": [240, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "video", "name": "video", "type": "VIDEO", "link": null}, {"name": "model_name", "type": "COMBO", "widget": {"name": "model_name"}, "link": null}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": []}], "title": "Video Upscale(GAN x4)", "properties": {"proxyWidgets": [["-1", "model_name"]], "cnr_id": "comfy-core", "ver": "0.14.1"}, "widgets_values": ["RealESRGAN_x4plus.safetensors"]}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "cf95b747-3e17-46cb-8097-cac60ff9b2e1", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 13, "lastLinkId": 19, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Video Upscale(GAN x4)", "inputNode": {"id": -10, "bounding": [550, 460, 120, 80]}, "outputNode": {"id": -20, "bounding": [1490, 460, 120, 60]}, "inputs": [{"id": "666d633e-93e7-42dc-8d11-2b7b99b0f2a6", "name": "video", "type": "VIDEO", "linkIds": [10], "localized_name": "video", "pos": [650, 480]}, {"id": "2e23a087-caa8-4d65-99e6-662761aa905a", "name": "model_name", "type": "COMBO", "linkIds": [19], "pos": [650, 500]}], "outputs": [{"id": "0c1768ea-3ec2-412f-9af6-8e0fa36dae70", "name": "VIDEO", "type": "VIDEO", "linkIds": [15], "localized_name": "VIDEO", "pos": [1510, 480]}], "widgets": [], "nodes": [{"id": 2, "type": "ImageUpscaleWithModel", "pos": [1110, 450], "size": [320, 46], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "upscale_model", "name": "upscale_model", "type": "UPSCALE_MODEL", "link": 1}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 14}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [13]}], "properties": {"cnr_id": "comfy-core", "ver": "0.10.0", "Node name for S&R": "ImageUpscaleWithModel"}}, {"id": 11, "type": "CreateVideo", "pos": [1110, 550], "size": [320, 78], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "link": 13}, {"localized_name": "audio", "name": "audio", "shape": 7, "type": "AUDIO", "link": 16}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "widget": {"name": "fps"}, "link": 12}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": [15]}], "properties": {"cnr_id": "comfy-core", "ver": "0.10.0", "Node name for S&R": "CreateVideo"}, "widgets_values": [30]}, {"id": 10, "type": "GetVideoComponents", "pos": [1110, 330], "size": [320, 70], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "video", "name": "video", "type": "VIDEO", "link": 10}], "outputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "links": [14]}, {"localized_name": "audio", "name": "audio", "type": "AUDIO", "links": [16]}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "links": [12]}], "properties": {"cnr_id": "comfy-core", "ver": "0.10.0", "Node name for S&R": "GetVideoComponents"}}, {"id": 1, "type": "UpscaleModelLoader", "pos": [750, 450], "size": [280, 60], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "model_name", "name": "model_name", "type": "COMBO", "widget": {"name": "model_name"}, "link": 19}], "outputs": [{"localized_name": "UPSCALE_MODEL", "name": "UPSCALE_MODEL", "type": "UPSCALE_MODEL", "links": [1]}], "properties": {"cnr_id": "comfy-core", "ver": "0.10.0", "Node name for S&R": "UpscaleModelLoader", "models": [{"name": "RealESRGAN_x4plus.safetensors", "url": "https://huggingface.co/Comfy-Org/Real-ESRGAN_repackaged/resolve/main/RealESRGAN_x4plus.safetensors", "directory": "upscale_models"}]}, "widgets_values": ["RealESRGAN_x4plus.safetensors"]}], "groups": [], "links": [{"id": 1, "origin_id": 1, "origin_slot": 0, "target_id": 2, "target_slot": 0, "type": "UPSCALE_MODEL"}, {"id": 14, "origin_id": 10, "origin_slot": 0, "target_id": 2, "target_slot": 1, "type": "IMAGE"}, {"id": 13, "origin_id": 2, "origin_slot": 0, "target_id": 11, "target_slot": 0, "type": "IMAGE"}, {"id": 16, "origin_id": 10, "origin_slot": 1, "target_id": 11, "target_slot": 1, "type": "AUDIO"}, {"id": 12, "origin_id": 10, "origin_slot": 2, "target_id": 11, "target_slot": 2, "type": "FLOAT"}, {"id": 10, "origin_id": -10, "origin_slot": 0, "target_id": 10, "target_slot": 0, "type": "VIDEO"}, {"id": 15, "origin_id": 11, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "VIDEO"}, {"id": 19, "origin_id": -10, "origin_slot": 1, "target_id": 1, "target_slot": 0, "type": "COMBO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Video generation and editing/Enhance video"}]}, "extra": {}}
--- a/comfy/comfy_types/node_typing.py
+++ b/comfy/comfy_types/node_typing.py
@@ -176,8 +176,6 @@ class InputTypeOptions(TypedDict):
    """COMBO type only. Specifies the configuration for a multi-select widget.
    Available after ComfyUI frontend v1.13.4
    https://github.com/Comfy-Org/ComfyUI_frontend/pull/2987"""
-    gradient_stops: NotRequired[list[list[float]]]
-    """Gradient color stops for gradientslider display mode. Each stop is [offset, r, g, b] (``FLOAT``)."""


 class HiddenInputTypeDict(TypedDict):
--- a/comfy/conds.py
+++ b/comfy/conds.py
@@ -4,25 +4,6 @@ import comfy.utils
 import logging


-def is_equal(x, y):
-    if torch.is_tensor(x) and torch.is_tensor(y):
-        return torch.equal(x, y)
-    elif isinstance(x, dict) and isinstance(y, dict):
-        if x.keys() != y.keys():
-            return False
-        return all(is_equal(x[k], y[k]) for k in x)
-    elif isinstance(x, (list, tuple)) and isinstance(y, (list, tuple)):
-        if type(x) is not type(y) or len(x) != len(y):
-            return False
-        return all(is_equal(a, b) for a, b in zip(x, y))
-    else:
-        try:
-            return x == y
-        except Exception:
-            logging.warning("comparison issue with COND")
-            return False
-
-
 class CONDRegular:
    def __init__(self, cond):
        self.cond = cond
@@ -103,7 +84,7 @@ class CONDConstant(CONDRegular):
        return self._copy_with(self.cond)

    def can_concat(self, other):
-        if not is_equal(self.cond, other.cond):
+        if self.cond != other.cond:
            return False
        return True

--- a/comfy/ldm/anima/model.py
+++ b/comfy/ldm/anima/model.py
@@ -179,8 +179,8 @@ class LLMAdapter(nn.Module):
            if source_attention_mask.ndim == 2:
                source_attention_mask = source_attention_mask.unsqueeze(1).unsqueeze(1)

+        x = self.in_proj(self.embed(target_input_ids))
        context = source_hidden_states
-        x = self.in_proj(self.embed(target_input_ids, out_dtype=context.dtype))
        position_ids = torch.arange(x.shape[1], device=x.device).unsqueeze(0)
        position_ids_context = torch.arange(context.shape[1], device=x.device).unsqueeze(0)
        position_embeddings = self.rotary_emb(x, position_ids)
--- a/comfy/ldm/chroma/model.py
+++ b/comfy/ldm/chroma/model.py
@@ -152,7 +152,6 @@ class Chroma(nn.Module):
        transformer_options={},
        attn_mask: Tensor = None,
    ) -> Tensor:
-        transformer_options = transformer_options.copy()
        patches_replace = transformer_options.get("patches_replace", {})

        # running on sequences img
@@ -229,7 +228,6 @@ class Chroma(nn.Module):

        transformer_options["total_blocks"] = len(self.single_blocks)
        transformer_options["block_type"] = "single"
-        transformer_options["img_slice"] = [txt.shape[1], img.shape[1]]
        for i, block in enumerate(self.single_blocks):
            transformer_options["block_index"] = i
            if i not in self.skip_dit:
--- a/comfy/ldm/flux/layers.py
+++ b/comfy/ldm/flux/layers.py
@@ -196,9 +196,6 @@ class DoubleStreamBlock(nn.Module):
        else:
            (img_mod1, img_mod2), (txt_mod1, txt_mod2) = vec

-        transformer_patches = transformer_options.get("patches", {})
-        extra_options = transformer_options.copy()
-
        # prepare image for attention
        img_modulated = self.img_norm1(img)
        img_modulated = apply_mod(img_modulated, (1 + img_mod1.scale), img_mod1.shift, modulation_dims_img)
@@ -227,12 +224,6 @@ class DoubleStreamBlock(nn.Module):
        attn = attention(q, k, v, pe=pe, mask=attn_mask, transformer_options=transformer_options)
        del q, k, v

-        if "attn1_output_patch" in transformer_patches:
-            extra_options["img_slice"] = [txt.shape[1], attn.shape[1]]
-            patch = transformer_patches["attn1_output_patch"]
-            for p in patch:
-                attn = p(attn, extra_options)
-
        txt_attn, img_attn = attn[:, : txt.shape[1]], attn[:, txt.shape[1]:]

        # calculate the img bloks
@@ -312,9 +303,6 @@ class SingleStreamBlock(nn.Module):
        else:
            mod = vec

-        transformer_patches = transformer_options.get("patches", {})
-        extra_options = transformer_options.copy()
-
        qkv, mlp = torch.split(self.linear1(apply_mod(self.pre_norm(x), (1 + mod.scale), mod.shift, modulation_dims)), [3 * self.hidden_size, self.mlp_hidden_dim_first], dim=-1)

        q, k, v = qkv.view(qkv.shape[0], qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
@@ -324,12 +312,6 @@ class SingleStreamBlock(nn.Module):
        # compute attention
        attn = attention(q, k, v, pe=pe, mask=attn_mask, transformer_options=transformer_options)
        del q, k, v
-
-        if "attn1_output_patch" in transformer_patches:
-            patch = transformer_patches["attn1_output_patch"]
-            for p in patch:
-                attn = p(attn, extra_options)
-
        # compute activation in mlp stream, cat again and run second linear layer
        if self.yak_mlp:
            mlp = self.mlp_act(mlp[..., self.mlp_hidden_dim_first // 2:]) * mlp[..., :self.mlp_hidden_dim_first // 2]
--- a/comfy/ldm/flux/model.py
+++ b/comfy/ldm/flux/model.py
@@ -142,7 +142,6 @@ class Flux(nn.Module):
        attn_mask: Tensor = None,
    ) -> Tensor:

-        transformer_options = transformer_options.copy()
        patches = transformer_options.get("patches", {})
        patches_replace = transformer_options.get("patches_replace", {})
        if img.ndim != 3 or txt.ndim != 3:
@@ -232,7 +231,6 @@ class Flux(nn.Module):

        transformer_options["total_blocks"] = len(self.single_blocks)
        transformer_options["block_type"] = "single"
-        transformer_options["img_slice"] = [txt.shape[1], img.shape[1]]
        for i, block in enumerate(self.single_blocks):
            transformer_options["block_index"] = i
            if ("single_block", i) in blocks_replace:
--- a/comfy/ldm/hunyuan_video/model.py
+++ b/comfy/ldm/hunyuan_video/model.py
@@ -304,7 +304,6 @@ class HunyuanVideo(nn.Module):
        control=None,
        transformer_options={},
    ) -> Tensor:
-        transformer_options = transformer_options.copy()
        patches_replace = transformer_options.get("patches_replace", {})

        initial_shape = list(img.shape)
@@ -417,7 +416,6 @@ class HunyuanVideo(nn.Module):

        transformer_options["total_blocks"] = len(self.single_blocks)
        transformer_options["block_type"] = "single"
-        transformer_options["img_slice"] = [txt.shape[1], img.shape[1]]
        for i, block in enumerate(self.single_blocks):
            transformer_options["block_index"] = i
            if ("single_block", i) in blocks_replace:
--- a/comfy/ldm/lightricks/av_model.py
+++ b/comfy/ldm/lightricks/av_model.py
@@ -9,7 +9,6 @@ from comfy.ldm.lightricks.model import (
    LTXVModel,
 )
 from comfy.ldm.lightricks.symmetric_patchifier import AudioPatchifier
-from comfy.ldm.lightricks.embeddings_connector import Embeddings1DConnector
 import comfy.ldm.common_dit

 class CompressedTimestep:
@@ -218,7 +217,7 @@ class BasicAVTransformerBlock(nn.Module):
    def forward(
        self, x: Tuple[torch.Tensor, torch.Tensor], v_context=None, a_context=None, attention_mask=None, v_timestep=None, a_timestep=None,
        v_pe=None, a_pe=None, v_cross_pe=None, a_cross_pe=None, v_cross_scale_shift_timestep=None, a_cross_scale_shift_timestep=None,
-        v_cross_gate_timestep=None, a_cross_gate_timestep=None, transformer_options=None, self_attention_mask=None,
+        v_cross_gate_timestep=None, a_cross_gate_timestep=None, transformer_options=None,
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        run_vx = transformer_options.get("run_vx", True)
        run_ax = transformer_options.get("run_ax", True)
@@ -234,7 +233,7 @@ class BasicAVTransformerBlock(nn.Module):
            vshift_msa, vscale_msa = (self.get_ada_values(self.scale_shift_table, vx.shape[0], v_timestep, slice(0, 2)))
            norm_vx = comfy.ldm.common_dit.rms_norm(vx) * (1 + vscale_msa) + vshift_msa
            del vshift_msa, vscale_msa
-            attn1_out = self.attn1(norm_vx, pe=v_pe, mask=self_attention_mask, transformer_options=transformer_options)
+            attn1_out = self.attn1(norm_vx, pe=v_pe, transformer_options=transformer_options)
            del norm_vx
            # video cross-attention
            vgate_msa = self.get_ada_values(self.scale_shift_table, vx.shape[0], v_timestep, slice(2, 3))[0]
@@ -451,29 +450,6 @@ class LTXAVModel(LTXVModel):
            operations=self.operations,
        )

-        self.audio_embeddings_connector = Embeddings1DConnector(
-            split_rope=True,
-            double_precision_rope=True,
-            dtype=dtype,
-            device=device,
-            operations=self.operations,
-        )
-
-        self.video_embeddings_connector = Embeddings1DConnector(
-            split_rope=True,
-            double_precision_rope=True,
-            dtype=dtype,
-            device=device,
-            operations=self.operations,
-        )
-
-    def preprocess_text_embeds(self, context):
-        if context.shape[-1] == self.caption_channels * 2:
-            return context
-        out_vid = self.video_embeddings_connector(context)[0]
-        out_audio = self.audio_embeddings_connector(context)[0]
-        return torch.concat((out_vid, out_audio), dim=-1)
-
    def _init_transformer_blocks(self, device, dtype, **kwargs):
        """Initialize transformer blocks for LTXAV."""
        self.transformer_blocks = nn.ModuleList(
@@ -726,7 +702,7 @@ class LTXAVModel(LTXVModel):
        return [(v_pe, av_cross_video_freq_cis), (a_pe, av_cross_audio_freq_cis)]

    def _process_transformer_blocks(
-        self, x, context, attention_mask, timestep, pe, transformer_options={}, self_attention_mask=None, **kwargs
+        self, x, context, attention_mask, timestep, pe, transformer_options={}, **kwargs
    ):
        vx = x[0]
        ax = x[1]
@@ -770,7 +746,6 @@ class LTXAVModel(LTXVModel):
                        v_cross_gate_timestep=args["v_cross_gate_timestep"],
                        a_cross_gate_timestep=args["a_cross_gate_timestep"],
                        transformer_options=args["transformer_options"],
-                        self_attention_mask=args.get("self_attention_mask"),
                    )
                    return out

@@ -791,7 +766,6 @@ class LTXAVModel(LTXVModel):
                        "v_cross_gate_timestep": av_ca_a2v_gate_noise_timestep,
                        "a_cross_gate_timestep": av_ca_v2a_gate_noise_timestep,
                        "transformer_options": transformer_options,
-                        "self_attention_mask": self_attention_mask,
                    },
                    {"original_block": block_wrap},
                )
@@ -813,7 +787,6 @@ class LTXAVModel(LTXVModel):
                    v_cross_gate_timestep=av_ca_a2v_gate_noise_timestep,
                    a_cross_gate_timestep=av_ca_v2a_gate_noise_timestep,
                    transformer_options=transformer_options,
-                    self_attention_mask=self_attention_mask,
                )

        return [vx, ax]
--- a/comfy/ldm/lightricks/embeddings_connector.py
+++ b/comfy/ldm/lightricks/embeddings_connector.py
@@ -157,9 +157,11 @@ class Embeddings1DConnector(nn.Module):
        self.num_learnable_registers = num_learnable_registers
        if self.num_learnable_registers:
            self.learnable_registers = nn.Parameter(
-                torch.empty(
+                torch.rand(
                    self.num_learnable_registers, inner_dim, dtype=dtype, device=device
                )
+                * 2.0
+                - 1.0
            )

    def get_fractional_positions(self, indices_grid):
@@ -232,7 +234,7 @@ class Embeddings1DConnector(nn.Module):

        return indices

-    def precompute_freqs_cis(self, indices_grid, spacing="exp", out_dtype=None):
+    def precompute_freqs_cis(self, indices_grid, spacing="exp"):
        dim = self.inner_dim
        n_elem = 2  # 2 because of cos and sin
        freqs = self.precompute_freqs(indices_grid, spacing)
@@ -245,7 +247,7 @@ class Embeddings1DConnector(nn.Module):
            )
        else:
            cos_freq, sin_freq = interleaved_freqs_cis(freqs, dim % n_elem)
-        return cos_freq.to(dtype=out_dtype), sin_freq.to(dtype=out_dtype), self.split_rope
+        return cos_freq.to(self.dtype), sin_freq.to(self.dtype), self.split_rope

    def forward(
        self,
@@ -286,7 +288,7 @@ class Embeddings1DConnector(nn.Module):
            hidden_states.shape[1], dtype=torch.float32, device=hidden_states.device
        )
        indices_grid = indices_grid[None, None, :]
-        freqs_cis = self.precompute_freqs_cis(indices_grid, out_dtype=hidden_states.dtype)
+        freqs_cis = self.precompute_freqs_cis(indices_grid)

        # 2. Blocks
        for block_idx, block in enumerate(self.transformer_1d_blocks):
--- a/comfy/ldm/lightricks/model.py
+++ b/comfy/ldm/lightricks/model.py
@@ -1,7 +1,6 @@
 from abc import ABC, abstractmethod
 from enum import Enum
 import functools
-import logging
 import math
 from typing import Dict, Optional, Tuple

@@ -15,8 +14,6 @@ import comfy.ldm.common_dit

 from .symmetric_patchifier import SymmetricPatchifier, latent_to_pixel_coords

-logger = logging.getLogger(__name__)
-
 def _log_base(x, base):
    return np.log(x) / np.log(base)

@@ -418,12 +415,12 @@ class BasicTransformerBlock(nn.Module):

        self.scale_shift_table = nn.Parameter(torch.empty(6, dim, device=device, dtype=dtype))

-    def forward(self, x, context=None, attention_mask=None, timestep=None, pe=None, transformer_options={}, self_attention_mask=None):
+    def forward(self, x, context=None, attention_mask=None, timestep=None, pe=None, transformer_options={}):
        shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = (self.scale_shift_table[None, None].to(device=x.device, dtype=x.dtype) + timestep.reshape(x.shape[0], timestep.shape[1], self.scale_shift_table.shape[0], -1)).unbind(dim=2)

        attn1_input = comfy.ldm.common_dit.rms_norm(x)
        attn1_input = torch.addcmul(attn1_input, attn1_input, scale_msa).add_(shift_msa)
-        attn1_input = self.attn1(attn1_input, pe=pe, mask=self_attention_mask, transformer_options=transformer_options)
+        attn1_input = self.attn1(attn1_input, pe=pe, transformer_options=transformer_options)
        x.addcmul_(attn1_input, gate_msa)
        del attn1_input

@@ -641,16 +638,8 @@ class LTXBaseModel(torch.nn.Module, ABC):
        """Process input data. Must be implemented by subclasses."""
        pass

-    def _build_guide_self_attention_mask(self, x, transformer_options, merged_args):
-        """Build self-attention mask for per-guide attention attenuation.
-
-        Base implementation returns None (no attenuation). Subclasses that
-        support guide-based attention control should override this.
-        """
-        return None
-
    @abstractmethod
-    def _process_transformer_blocks(self, x, context, attention_mask, timestep, pe, self_attention_mask=None, **kwargs):
+    def _process_transformer_blocks(self, x, context, attention_mask, timestep, pe, **kwargs):
        """Process transformer blocks. Must be implemented by subclasses."""
        pass

@@ -799,17 +788,9 @@ class LTXBaseModel(torch.nn.Module, ABC):
        attention_mask = self._prepare_attention_mask(attention_mask, input_dtype)
        pe = self._prepare_positional_embeddings(pixel_coords, frame_rate, input_dtype)

-        # Build self-attention mask for per-guide attenuation
-        self_attention_mask = self._build_guide_self_attention_mask(
-            x, transformer_options, merged_args
-        )
-
        # Process transformer blocks
        x = self._process_transformer_blocks(
-            x, context, attention_mask, timestep, pe,
-            transformer_options=transformer_options,
-            self_attention_mask=self_attention_mask,
-            **merged_args,
+            x, context, attention_mask, timestep, pe, transformer_options=transformer_options, **merged_args
        )

        # Process output
@@ -909,243 +890,13 @@ class LTXVModel(LTXBaseModel):
            pixel_coords = pixel_coords[:, :, grid_mask, ...]

            kf_grid_mask = grid_mask[-keyframe_idxs.shape[2]:]
-
-            # Compute per-guide surviving token counts from guide_attention_entries.
-            # Each entry tracks one guide reference; they are appended in order and
-            # their pre_filter_counts partition the kf_grid_mask.
-            guide_entries = kwargs.get("guide_attention_entries", None)
-            if guide_entries:
-                total_pfc = sum(e["pre_filter_count"] for e in guide_entries)
-                if total_pfc != len(kf_grid_mask):
-                    raise ValueError(
-                        f"guide pre_filter_counts ({total_pfc}) != "
-                        f"keyframe grid mask length ({len(kf_grid_mask)})"
-                    )
-                resolved_entries = []
-                offset = 0
-                for entry in guide_entries:
-                    pfc = entry["pre_filter_count"]
-                    entry_mask = kf_grid_mask[offset:offset + pfc]
-                    surviving = int(entry_mask.sum().item())
-                    resolved_entries.append({
-                        **entry,
-                        "surviving_count": surviving,
-                    })
-                    offset += pfc
-                additional_args["resolved_guide_entries"] = resolved_entries
-
            keyframe_idxs = keyframe_idxs[..., kf_grid_mask, :]
            pixel_coords[:, :, -keyframe_idxs.shape[2]:, :] = keyframe_idxs

-            # Total surviving guide tokens (all guides)
-            additional_args["num_guide_tokens"] = keyframe_idxs.shape[2]
-
        x = self.patchify_proj(x)
        return x, pixel_coords, additional_args

-    def _build_guide_self_attention_mask(self, x, transformer_options, merged_args):
-        """Build self-attention mask for per-guide attention attenuation.
-
-        Reads resolved_guide_entries from merged_args (computed in _process_input)
-        to build a log-space additive bias mask that attenuates noisy ↔ guide
-        attention for each guide reference independently.
-
-        Returns None if no attenuation is needed (all strengths == 1.0 and no
-        spatial masks, or no guide tokens).
-        """
-        if isinstance(x, list):
-            # AV model: x = [vx, ax]; use vx for token count and device
-            total_tokens = x[0].shape[1]
-            device = x[0].device
-            dtype = x[0].dtype
-        else:
-            total_tokens = x.shape[1]
-            device = x.device
-            dtype = x.dtype
-
-        num_guide_tokens = merged_args.get("num_guide_tokens", 0)
-        if num_guide_tokens == 0:
-            return None
-
-        resolved_entries = merged_args.get("resolved_guide_entries", None)
-        if not resolved_entries:
-            return None
-
-        # Check if any attenuation is actually needed
-        needs_attenuation = any(
-            e["strength"] < 1.0 or e.get("pixel_mask") is not None
-            for e in resolved_entries
-        )
-        if not needs_attenuation:
-            return None
-
-        # Build per-guide-token weights for all tracked guide tokens.
-        # Guides are appended in order at the end of the sequence.
-        guide_start = total_tokens - num_guide_tokens
-        all_weights = []
-        total_tracked = 0
-
-        for entry in resolved_entries:
-            surviving = entry["surviving_count"]
-            if surviving == 0:
-                continue
-
-            strength = entry["strength"]
-            pixel_mask = entry.get("pixel_mask")
-            latent_shape = entry.get("latent_shape")
-
-            if pixel_mask is not None and latent_shape is not None:
-                f_lat, h_lat, w_lat = latent_shape
-                per_token = self._downsample_mask_to_latent(
-                    pixel_mask.to(device=device, dtype=dtype),
-                    f_lat, h_lat, w_lat,
-                )
-                # per_token shape: (B, f_lat*h_lat*w_lat).
-                # Collapse batch dim — the mask is assumed identical across the
-                # batch; validate and take the first element to get (1, tokens).
-                if per_token.shape[0] > 1:
-                    ref = per_token[0]
-                    for bi in range(1, per_token.shape[0]):
-                        if not torch.equal(ref, per_token[bi]):
-                            logger.warning(
-                                "pixel_mask differs across batch elements; "
-                                "using first element only."
-                            )
-                            break
-                    per_token = per_token[:1]
-                # `surviving` is the post-grid_mask token count.
-                # Clamp to surviving to handle any mismatch safely.
-                n_weights = min(per_token.shape[1], surviving)
-                weights = per_token[:, :n_weights] * strength  # (1, n_weights)
-            else:
-                weights = torch.full(
-                    (1, surviving), strength, device=device, dtype=dtype
-                )
-
-            all_weights.append(weights)
-            total_tracked += weights.shape[1]
-
-        if not all_weights:
-            return None
-
-        # Concatenate per-token weights for all tracked guides
-        tracked_weights = torch.cat(all_weights, dim=1)  # (1, total_tracked)
-
-        # Check if any weight is actually < 1.0 (otherwise no attenuation needed)
-        if (tracked_weights >= 1.0).all():
-            return None
-
-        # Build the mask: guide tokens are at the end of the sequence.
-        # Tracked guides come first (in order), untracked follow.
-        return self._build_self_attention_mask(
-            total_tokens, num_guide_tokens, total_tracked,
-            tracked_weights, guide_start, device, dtype,
-        )
-
-    @staticmethod
-    def _downsample_mask_to_latent(mask, f_lat, h_lat, w_lat):
-        """Downsample a pixel-space mask to per-token latent weights.
-
-        Args:
-            mask: (B, 1, F_pix, H_pix, W_pix) pixel-space mask with values in [0, 1].
-            f_lat: Number of latent frames (pre-dilation original count).
-            h_lat: Latent height (pre-dilation original height).
-            w_lat: Latent width (pre-dilation original width).
-
-        Returns:
-            (B, F_lat * H_lat * W_lat) flattened per-token weights.
-        """
-        b = mask.shape[0]
-        f_pix = mask.shape[2]
-
-        # Spatial downsampling: area interpolation per frame
-        spatial_down = torch.nn.functional.interpolate(
-            rearrange(mask, "b 1 f h w -> (b f) 1 h w"),
-            size=(h_lat, w_lat),
-            mode="area",
-        )
-        spatial_down = rearrange(spatial_down, "(b f) 1 h w -> b 1 f h w", b=b)
-
-        # Temporal downsampling: first pixel frame maps to first latent frame,
-        # remaining pixel frames are averaged in groups for causal temporal structure.
-        first_frame = spatial_down[:, :, :1, :, :]
-        if f_pix > 1 and f_lat > 1:
-            remaining_pix = f_pix - 1
-            remaining_lat = f_lat - 1
-            t = remaining_pix // remaining_lat
-            if t < 1:
-                # Fewer pixel frames than latent frames — upsample by repeating
-                # the available pixel frames via nearest interpolation.
-                rest_flat = rearrange(
-                    spatial_down[:, :, 1:, :, :],
-                    "b 1 f h w -> (b h w) 1 f",
-                )
-                rest_up = torch.nn.functional.interpolate(
-                    rest_flat, size=remaining_lat, mode="nearest",
-                )
-                rest = rearrange(
-                    rest_up, "(b h w) 1 f -> b 1 f h w",
-                    b=b, h=h_lat, w=w_lat,
-                )
-            else:
-                # Trim trailing pixel frames that don't fill a complete group
-                usable = remaining_lat * t
-                rest = rearrange(
-                    spatial_down[:, :, 1:1 + usable, :, :],
-                    "b 1 (f t) h w -> b 1 f t h w",
-                    t=t,
-                )
-                rest = rest.mean(dim=3)
-            latent_mask = torch.cat([first_frame, rest], dim=2)
-        elif f_lat > 1:
-            # Single pixel frame but multiple latent frames — repeat the
-            # single frame across all latent frames.
-            latent_mask = first_frame.expand(-1, -1, f_lat, -1, -1)
-        else:
-            latent_mask = first_frame
-
-        return rearrange(latent_mask, "b 1 f h w -> b (f h w)")
-
-    @staticmethod
-    def _build_self_attention_mask(total_tokens, num_guide_tokens, tracked_count,
-                                    tracked_weights, guide_start, device, dtype):
-        """Build a log-space additive self-attention bias mask.
-
-        Attenuates attention between noisy tokens and tracked guide tokens.
-        Untracked guide tokens (at the end of the guide portion) keep full attention.
-
-        Args:
-            total_tokens: Total sequence length.
-            num_guide_tokens: Total guide tokens (all guides) at end of sequence.
-            tracked_count: Number of tracked guide tokens (first in the guide portion).
-            tracked_weights: (1, tracked_count) tensor, values in [0, 1].
-            guide_start: Index where guide tokens begin in the sequence.
-            device: Target device.
-            dtype: Target dtype.
-
-        Returns:
-            (1, 1, total_tokens, total_tokens) additive bias mask.
-            0.0 = full attention, negative = attenuated, finfo.min = effectively fully masked.
-        """
-        finfo = torch.finfo(dtype)
-        mask = torch.zeros((1, 1, total_tokens, total_tokens), device=device, dtype=dtype)
-        tracked_end = guide_start + tracked_count
-
-        # Convert weights to log-space bias
-        w = tracked_weights.to(device=device, dtype=dtype)  # (1, tracked_count)
-        log_w = torch.full_like(w, finfo.min)
-        positive_mask = w > 0
-        if positive_mask.any():
-            log_w[positive_mask] = torch.log(w[positive_mask].clamp(min=finfo.tiny))
-
-        # noisy → tracked guides: each noisy row gets the same per-guide weight
-        mask[:, :, :guide_start, guide_start:tracked_end] = log_w.view(1, 1, 1, -1)
-        # tracked guides → noisy: each guide row broadcasts its weight across noisy cols
-        mask[:, :, guide_start:tracked_end, :guide_start] = log_w.view(1, 1, -1, 1)
-
-        return mask
-
-    def _process_transformer_blocks(self, x, context, attention_mask, timestep, pe, transformer_options={}, self_attention_mask=None, **kwargs):
+    def _process_transformer_blocks(self, x, context, attention_mask, timestep, pe, transformer_options={}, **kwargs):
        """Process transformer blocks for LTXV."""
        patches_replace = transformer_options.get("patches_replace", {})
        blocks_replace = patches_replace.get("dit", {})
@@ -1155,10 +906,10 @@ class LTXVModel(LTXBaseModel):

                def block_wrap(args):
                    out = {}
-                    out["img"] = block(args["img"], context=args["txt"], attention_mask=args["attention_mask"], timestep=args["vec"], pe=args["pe"], transformer_options=args["transformer_options"], self_attention_mask=args.get("self_attention_mask"))
+                    out["img"] = block(args["img"], context=args["txt"], attention_mask=args["attention_mask"], timestep=args["vec"], pe=args["pe"], transformer_options=args["transformer_options"])
                    return out

-                out = blocks_replace[("double_block", i)]({"img": x, "txt": context, "attention_mask": attention_mask, "vec": timestep, "pe": pe, "transformer_options": transformer_options, "self_attention_mask": self_attention_mask}, {"original_block": block_wrap})
+                out = blocks_replace[("double_block", i)]({"img": x, "txt": context, "attention_mask": attention_mask, "vec": timestep, "pe": pe, "transformer_options": transformer_options}, {"original_block": block_wrap})
                x = out["img"]
            else:
                x = block(
@@ -1168,7 +919,6 @@ class LTXVModel(LTXBaseModel):
                    timestep=timestep,
                    pe=pe,
                    transformer_options=transformer_options,
-                    self_attention_mask=self_attention_mask,
                )

        return x
--- a/comfy/ldm/modules/diffusionmodules/model.py
+++ b/comfy/ldm/modules/diffusionmodules/model.py
@@ -102,7 +102,19 @@ class VideoConv3d(nn.Module):
        return self.conv(x)

 def interpolate_up(x, scale_factor):
-    return torch.nn.functional.interpolate(x, scale_factor=scale_factor, mode="nearest")
+    try:
+        return torch.nn.functional.interpolate(x, scale_factor=scale_factor, mode="nearest")
+    except: #operation not implemented for bf16
+        orig_shape = list(x.shape)
+        out_shape = orig_shape[:2]
+        for i in range(len(orig_shape) - 2):
+            out_shape.append(round(orig_shape[i + 2] * scale_factor[i]))
+        out = torch.empty(out_shape, dtype=x.dtype, layout=x.layout, device=x.device)
+        split = 8
+        l = out.shape[1] // split
+        for i in range(0, out.shape[1], l):
+            out[:,i:i+l] = torch.nn.functional.interpolate(x[:,i:i+l].to(torch.float32), scale_factor=scale_factor, mode="nearest").to(x.dtype)
+        return out

 class Upsample(nn.Module):
    def __init__(self, in_channels, with_conv, conv_op=ops.Conv2d, scale_factor=2.0):
--- a/comfy/ldm/modules/diffusionmodules/openaimodel.py
+++ b/comfy/ldm/modules/diffusionmodules/openaimodel.py
@@ -18,8 +18,6 @@ import comfy.patcher_extension
 import comfy.ops
 ops = comfy.ops.disable_weight_init

-from ..sdpose import HeatmapHead
-
 class TimestepBlock(nn.Module):
    """
    Any module where forward() takes timestep embeddings as a second argument.
@@ -443,7 +441,6 @@ class UNetModel(nn.Module):
        disable_temporal_crossattention=False,
        max_ddpm_temb_period=10000,
        attn_precision=None,
-        heatmap_head=False,
        device=None,
        operations=ops,
    ):
@@ -830,9 +827,6 @@ class UNetModel(nn.Module):
            #nn.LogSoftmax(dim=1)  # change to cross_entropy and produce non-normalized logits
        )

-        if heatmap_head:
-            self.heatmap_head = HeatmapHead(device=device, dtype=self.dtype, operations=operations)
-
    def forward(self, x, timesteps=None, context=None, y=None, control=None, transformer_options={}, **kwargs):
        return comfy.patcher_extension.WrapperExecutor.new_class_executor(
            self._forward,
--- a/comfy/ldm/modules/sdpose.py
+++ b/comfy/ldm/modules/sdpose.py
@@ -1,130 +0,0 @@
-import torch
-import numpy as np
-from scipy.ndimage import gaussian_filter
-
-class HeatmapHead(torch.nn.Module):
-    def __init__(
-            self,
-            in_channels=640,
-            out_channels=133,
-            input_size=(768, 1024),
-            heatmap_scale=4,
-            deconv_out_channels=(640,),
-            deconv_kernel_sizes=(4,),
-            conv_out_channels=(640,),
-            conv_kernel_sizes=(1,),
-            final_layer_kernel_size=1,
-            device=None, dtype=None, operations=None
-        ):
-        super().__init__()
-
-        self.heatmap_size = (input_size[0] // heatmap_scale, input_size[1] // heatmap_scale)
-        self.scale_factor = ((np.array(input_size) - 1) / (np.array(self.heatmap_size) - 1)).astype(np.float32)
-
-        # Deconv layers
-        if deconv_out_channels:
-            deconv_layers = []
-            for out_ch, kernel_size in zip(deconv_out_channels, deconv_kernel_sizes):
-                if kernel_size == 4:
-                    padding, output_padding = 1, 0
-                elif kernel_size == 3:
-                    padding, output_padding = 1, 1
-                elif kernel_size == 2:
-                    padding, output_padding = 0, 0
-                else:
-                    raise ValueError(f'Unsupported kernel size {kernel_size}')
-
-                deconv_layers.extend([
-                    operations.ConvTranspose2d(in_channels, out_ch, kernel_size,
-                                     stride=2, padding=padding, output_padding=output_padding, bias=False, device=device, dtype=dtype),
-                    torch.nn.InstanceNorm2d(out_ch, device=device, dtype=dtype),
-                    torch.nn.SiLU(inplace=True)
-                ])
-                in_channels = out_ch
-            self.deconv_layers = torch.nn.Sequential(*deconv_layers)
-        else:
-            self.deconv_layers = torch.nn.Identity()
-
-        # Conv layers
-        if conv_out_channels:
-            conv_layers = []
-            for out_ch, kernel_size in zip(conv_out_channels, conv_kernel_sizes):
-                padding = (kernel_size - 1) // 2
-                conv_layers.extend([
-                    operations.Conv2d(in_channels, out_ch, kernel_size,
-                            stride=1, padding=padding, device=device, dtype=dtype),
-                    torch.nn.InstanceNorm2d(out_ch, device=device, dtype=dtype),
-                    torch.nn.SiLU(inplace=True)
-                ])
-                in_channels = out_ch
-            self.conv_layers = torch.nn.Sequential(*conv_layers)
-        else:
-            self.conv_layers = torch.nn.Identity()
-
-        self.final_layer = operations.Conv2d(in_channels, out_channels, kernel_size=final_layer_kernel_size, padding=final_layer_kernel_size // 2, device=device, dtype=dtype)
-
-    def forward(self, x): # Decode heatmaps to keypoints
-        heatmaps = self.final_layer(self.conv_layers(self.deconv_layers(x)))
-        heatmaps_np = heatmaps.float().cpu().numpy()  # (B, K, H, W)
-        B, K, H, W = heatmaps_np.shape
-
-        batch_keypoints = []
-        batch_scores = []
-
-        for b in range(B):
-            hm = heatmaps_np[b].copy()  # (K, H, W)
-
-            # --- vectorised argmax ---
-            flat = hm.reshape(K, -1)
-            idx = np.argmax(flat, axis=1)
-            scores = flat[np.arange(K), idx].copy()
-            y_locs, x_locs = np.unravel_index(idx, (H, W))
-            keypoints = np.stack([x_locs, y_locs], axis=-1).astype(np.float32)  # (K, 2) in heatmap space
-            invalid = scores <= 0.
-            keypoints[invalid] = -1
-
-            # --- DARK sub-pixel refinement (UDP) ---
-            # 1. Gaussian blur with max-preserving normalisation
-            border = 5  # (kernel-1)//2 for kernel=11
-            for k in range(K):
-                origin_max = np.max(hm[k])
-                dr = np.zeros((H + 2 * border, W + 2 * border), dtype=np.float32)
-                dr[border:-border, border:-border] = hm[k].copy()
-                dr = gaussian_filter(dr, sigma=2.0)
-                hm[k] = dr[border:-border, border:-border].copy()
-                cur_max = np.max(hm[k])
-                if cur_max > 0:
-                    hm[k] *= origin_max / cur_max
-            # 2. Log-space for Taylor expansion
-            np.clip(hm, 1e-3, 50., hm)
-            np.log(hm, hm)
-            # 3. Hessian-based Newton step
-            hm_pad = np.pad(hm, ((0, 0), (1, 1), (1, 1)), mode='edge').flatten()
-            index = keypoints[:, 0] + 1 + (keypoints[:, 1] + 1) * (W + 2)
-            index += (W + 2) * (H + 2) * np.arange(0, K)
-            index = index.astype(int).reshape(-1, 1)
-            i_       = hm_pad[index]
-            ix1      = hm_pad[index + 1]
-            iy1      = hm_pad[index + W + 2]
-            ix1y1    = hm_pad[index + W + 3]
-            ix1_y1_  = hm_pad[index - W - 3]
-            ix1_     = hm_pad[index - 1]
-            iy1_     = hm_pad[index - 2 - W]
-            dx = 0.5 * (ix1 - ix1_)
-            dy = 0.5 * (iy1 - iy1_)
-            derivative = np.concatenate([dx, dy], axis=1).reshape(K, 2, 1)
-            dxx = ix1  - 2 * i_ + ix1_
-            dyy = iy1  - 2 * i_ + iy1_
-            dxy = 0.5 * (ix1y1 - ix1 - iy1 + i_ + i_ - ix1_ - iy1_ + ix1_y1_)
-            hessian = np.concatenate([dxx, dxy, dxy, dyy], axis=1).reshape(K, 2, 2)
-            hessian = np.linalg.inv(hessian + np.finfo(np.float32).eps * np.eye(2))
-            keypoints -= np.einsum('imn,ink->imk', hessian, derivative).squeeze(axis=-1)
-
-            # --- restore to input image space ---
-            keypoints = keypoints * self.scale_factor
-            keypoints[invalid] = -1
-
-            batch_keypoints.append(keypoints)
-            batch_scores.append(scores)
-
-        return batch_keypoints, batch_scores
--- a/comfy/ldm/wan/vae.py
+++ b/comfy/ldm/wan/vae.py
@@ -459,7 +459,6 @@ class WanVAE(nn.Module):
                 attn_scales=[],
                 temperal_downsample=[True, True, False],
                 image_channels=3,
-                 conv_out_channels=3,
                 dropout=0.0):
        super().__init__()
        self.dim = dim
@@ -475,7 +474,7 @@ class WanVAE(nn.Module):
                                 attn_scales, self.temperal_downsample, dropout)
        self.conv1 = CausalConv3d(z_dim * 2, z_dim * 2, 1)
        self.conv2 = CausalConv3d(z_dim, z_dim, 1)
-        self.decoder = Decoder3d(dim, z_dim, conv_out_channels, dim_mult, num_res_blocks,
+        self.decoder = Decoder3d(dim, z_dim, image_channels, dim_mult, num_res_blocks,
                                 attn_scales, self.temperal_upsample, dropout)

    def encode(self, x):
@@ -485,7 +484,7 @@ class WanVAE(nn.Module):
        iter_ = 1 + (t - 1) // 4
        feat_map = None
        if iter_ > 1:
-            feat_map = [None] * count_conv3d(self.encoder)
+            feat_map = [None] * count_conv3d(self.decoder)
        ## 对encode输入的x，按时间拆分为1、4、4、4....
        for i in range(iter_):
            conv_idx = [0]
--- a/comfy/lora.py
+++ b/comfy/lora.py
@@ -337,7 +337,6 @@ def model_lora_keys_unet(model, key_map={}):
            if k.startswith("diffusion_model.decoder.") and k.endswith(".weight"):
                key_lora = k[len("diffusion_model.decoder."):-len(".weight")]
                key_map["base_model.model.{}".format(key_lora)] = k  # Official base model loras
-                key_map["lycoris_{}".format(key_lora.replace(".", "_"))] = k  # LyCORIS/LoKR format

    return key_map

@@ -375,31 +374,6 @@ def pad_tensor_to_shape(tensor: torch.Tensor, new_shape: list[int]) -> torch.Ten

    return padded_tensor

-def calculate_shape(patches, weight, key, original_weights=None):
-    current_shape = weight.shape
-
-    for p in patches:
-        v = p[1]
-        offset = p[3]
-
-        # Offsets restore the old shape; lists force a diff without metadata
-        if offset is not None or isinstance(v, list):
-            continue
-
-        if isinstance(v, weight_adapter.WeightAdapterBase):
-            adapter_shape = v.calculate_shape(key)
-            if adapter_shape is not None:
-                current_shape = adapter_shape
-            continue
-
-        # Standard diff logic with padding
-        if len(v) == 2:
-            patch_type, patch_data = v[0], v[1]
-            if patch_type == "diff" and len(patch_data) > 1 and patch_data[1]['pad_weight']:
-                current_shape = patch_data[0].shape
-
-    return current_shape
-
 def calculate_weight(patches, weight, key, intermediate_dtype=torch.float32, original_weights=None):
    for p in patches:
        strength = p[0]
--- a/comfy/memory_management.py
+++ b/comfy/memory_management.py
@@ -78,4 +78,4 @@ def interpret_gathered_like(tensors, gathered):

    return dest_views

-aimdo_enabled = False
+aimdo_allocator = None
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@@ -76,7 +76,6 @@ class ModelType(Enum):
    FLUX = 8
    IMG_TO_IMG = 9
    FLOW_COSMOS = 10
-    IMG_TO_IMG_FLOW = 11


 def model_sampling(model_config, model_type):
@@ -109,8 +108,6 @@ def model_sampling(model_config, model_type):
    elif model_type == ModelType.FLOW_COSMOS:
        c = comfy.model_sampling.COSMOS_RFLOW
        s = comfy.model_sampling.ModelSamplingCosmosRFlow
-    elif model_type == ModelType.IMG_TO_IMG_FLOW:
-        c = comfy.model_sampling.IMG_TO_IMG_FLOW

    class ModelSampling(s, c):
        pass
@@ -181,7 +178,10 @@ class BaseModel(torch.nn.Module):
            xc = torch.cat([xc] + [comfy.model_management.cast_to_device(c_concat, xc.device, xc.dtype)], dim=1)

        context = c_crossattn
-        dtype = self.get_dtype_inference()
+        dtype = self.get_dtype()
+
+        if self.manual_cast_dtype is not None:
+            dtype = self.manual_cast_dtype

        xc = xc.to(dtype)
        device = xc.device
@@ -218,13 +218,6 @@ class BaseModel(torch.nn.Module):
    def get_dtype(self):
        return self.diffusion_model.dtype

-    def get_dtype_inference(self):
-        dtype = self.get_dtype()
-
-        if self.manual_cast_dtype is not None:
-            dtype = self.manual_cast_dtype
-        return dtype
-
    def encode_adm(self, **kwargs):
        return None

@@ -379,7 +372,9 @@ class BaseModel(torch.nn.Module):
                    input_shapes += shape

        if comfy.model_management.xformers_enabled() or comfy.model_management.pytorch_attention_flash_attention():
-            dtype = self.get_dtype_inference()
+            dtype = self.get_dtype()
+            if self.manual_cast_dtype is not None:
+                dtype = self.manual_cast_dtype
            #TODO: this needs to be tweaked
            area = sum(map(lambda input_shape: input_shape[0] * math.prod(input_shape[2:]), input_shapes))
            return (area * comfy.model_management.dtype_size(dtype) * 0.01 * self.memory_usage_factor) * (1024 * 1024)
@@ -925,25 +920,6 @@ class Flux(BaseModel):
            out['ref_latents'] = list([1, 16, sum(map(lambda a: math.prod(a.size()[2:]), ref_latents))])
        return out

-class LongCatImage(Flux):
-    def _apply_model(self, x, t, c_concat=None, c_crossattn=None, control=None, transformer_options={}, **kwargs):
-        transformer_options = transformer_options.copy()
-        rope_opts = transformer_options.get("rope_options", {})
-        rope_opts = dict(rope_opts)
-        rope_opts.setdefault("shift_t", 1.0)
-        rope_opts.setdefault("shift_y", 512.0)
-        rope_opts.setdefault("shift_x", 512.0)
-        transformer_options["rope_options"] = rope_opts
-        return super()._apply_model(x, t, c_concat, c_crossattn, control, transformer_options, **kwargs)
-
-    def encode_adm(self, **kwargs):
-        return None
-
-    def extra_conds(self, **kwargs):
-        out = super().extra_conds(**kwargs)
-        out.pop('guidance', None)
-        return out
-
 class Flux2(Flux):
    def extra_conds(self, **kwargs):
        out = super().extra_conds(**kwargs)
@@ -993,10 +969,6 @@ class LTXV(BaseModel):
        if keyframe_idxs is not None:
            out['keyframe_idxs'] = comfy.conds.CONDRegular(keyframe_idxs)

-        guide_attention_entries = kwargs.get("guide_attention_entries", None)
-        if guide_attention_entries is not None:
-            out['guide_attention_entries'] = comfy.conds.CONDConstant(guide_attention_entries)
-
        return out

    def process_timestep(self, timestep, x, denoise_mask=None, **kwargs):
@@ -1014,14 +986,10 @@ class LTXAV(BaseModel):
    def extra_conds(self, **kwargs):
        out = super().extra_conds(**kwargs)
        attention_mask = kwargs.get("attention_mask", None)
-        device = kwargs["device"]
-
        if attention_mask is not None:
            out['attention_mask'] = comfy.conds.CONDRegular(attention_mask)
        cross_attn = kwargs.get("cross_attn", None)
        if cross_attn is not None:
-            if hasattr(self.diffusion_model, "preprocess_text_embeds"):
-                cross_attn = self.diffusion_model.preprocess_text_embeds(cross_attn.to(device=device, dtype=self.get_dtype_inference()))
            out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)

        out['frame_rate'] = comfy.conds.CONDConstant(kwargs.get("frame_rate", 25))
@@ -1049,10 +1017,6 @@ class LTXAV(BaseModel):
        if latent_shapes is not None:
            out['latent_shapes'] = comfy.conds.CONDConstant(latent_shapes)

-        guide_attention_entries = kwargs.get("guide_attention_entries", None)
-        if guide_attention_entries is not None:
-            out['guide_attention_entries'] = comfy.conds.CONDConstant(guide_attention_entries)
-
        return out

    def process_timestep(self, timestep, x, denoise_mask=None, audio_denoise_mask=None, **kwargs):
@@ -1201,7 +1165,7 @@ class Anima(BaseModel):
                t5xxl_ids = t5xxl_ids.unsqueeze(0)

                if torch.is_inference_mode_enabled():  # if not we are training
-                    cross_attn = self.diffusion_model.preprocess_text_embeds(cross_attn.to(device=device, dtype=self.get_dtype_inference()), t5xxl_ids.to(device=device), t5xxl_weights=t5xxl_weights.to(device=device, dtype=self.get_dtype_inference()))
+                    cross_attn = self.diffusion_model.preprocess_text_embeds(cross_attn.to(device=device, dtype=self.get_dtype()), t5xxl_ids.to(device=device), t5xxl_weights=t5xxl_weights.to(device=device, dtype=self.get_dtype()))
                else:
                    out['t5xxl_ids'] = comfy.conds.CONDRegular(t5xxl_ids)
                    out['t5xxl_weights'] = comfy.conds.CONDRegular(t5xxl_weights)
@@ -1496,12 +1460,6 @@ class WAN22(WAN21):
    def scale_latent_inpaint(self, sigma, noise, latent_image, **kwargs):
        return latent_image

-class WAN21_FlowRVS(WAN21):
-    def __init__(self, model_config, model_type=ModelType.IMG_TO_IMG_FLOW, image_to_video=False, device=None):
-        model_config.unet_config["model_type"] = "t2v"
-        super(WAN21, self).__init__(model_config, model_type, device=device, unet_model=comfy.ldm.wan.model.WanModel)
-        self.image_to_video = image_to_video
-
 class Hunyuan3Dv2(BaseModel):
    def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
        super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.hunyuan3d.model.Hunyuan3Dv2)
--- a/comfy/model_detection.py
+++ b/comfy/model_detection.py
@@ -279,8 +279,6 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
            dit_config["txt_norm"] = any_suffix_in(state_dict_keys, key_prefix, 'txt_norm.', ["weight", "scale"])
            if dit_config["yak_mlp"] and dit_config["txt_norm"]:  # Ovis model
                dit_config["txt_ids_dims"] = [1, 2]
-            if dit_config.get("context_in_dim") == 3584 and dit_config["vec_in_dim"] is None:  # LongCat-Image
-                dit_config["txt_ids_dims"] = [1, 2]

        return dit_config

@@ -511,9 +509,6 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
        if ref_conv_weight is not None:
            dit_config["in_dim_ref_conv"] = ref_conv_weight.shape[1]

-        if metadata is not None and "config" in metadata:
-            dit_config.update(json.loads(metadata["config"]).get("transformer", {}))
-
        return dit_config

    if '{}latent_in.weight'.format(key_prefix) in state_dict_keys:  # Hunyuan 3D
@@ -797,10 +792,6 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
        unet_config["use_temporal_resblock"] = False
        unet_config["use_temporal_attention"] = False

-    heatmap_key = '{}heatmap_head.conv_layers.0.weight'.format(key_prefix)
-    if heatmap_key in state_dict_keys:
-        unet_config["heatmap_head"] = True
-
    return unet_config

 def model_config_from_unet_config(unet_config, state_dict=None):
@@ -1021,7 +1012,7 @@ def unet_config_from_diffusers_unet(state_dict, dtype=None):

    LotusD = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False, 'adm_in_channels': 4,
            'dtype': dtype, 'in_channels': 4, 'model_channels': 320, 'num_res_blocks': [2, 2, 2, 2], 'transformer_depth': [1, 1, 1, 1, 1, 1, 0, 0],
-            'channel_mult': [1, 2, 4, 4], 'transformer_depth_middle': 1, 'use_linear_in_transformer': True, 'context_dim': 1024, 'num_head_channels': 64,
+            'channel_mult': [1, 2, 4, 4], 'transformer_depth_middle': 1, 'use_linear_in_transformer': True, 'context_dim': 1024, 'num_heads': 8,
            'transformer_depth_output': [1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0],
            'use_temporal_attention': False, 'use_temporal_resblock': False}

--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -350,7 +350,7 @@ AMD_ENABLE_MIOPEN_ENV = 'COMFYUI_ENABLE_MIOPEN'

 try:
    if is_amd():
-        arch = torch.cuda.get_device_properties(get_torch_device()).gcnArchName.split(':')[0]
+        arch = torch.cuda.get_device_properties(get_torch_device()).gcnArchName
        if not (any((a in arch) for a in AMD_RDNA2_AND_OLDER_ARCH)):
            if os.getenv(AMD_ENABLE_MIOPEN_ENV) != '1':
                torch.backends.cudnn.enabled = False  # Seems to improve things a lot on AMD
@@ -378,7 +378,7 @@ try:
        if args.use_split_cross_attention == False and args.use_quad_cross_attention == False:
            if aotriton_supported(arch):  # AMD efficient attention implementation depends on aotriton.
                if torch_version_numeric >= (2, 7):  # works on 2.6 but doesn't actually seem to improve much
-                    if any((a in arch) for a in ["gfx90a", "gfx942", "gfx950", "gfx1100", "gfx1101", "gfx1151"]):  # TODO: more arches, TODO: gfx950
+                    if any((a in arch) for a in ["gfx90a", "gfx942", "gfx1100", "gfx1101", "gfx1151"]):  # TODO: more arches, TODO: gfx950
                        ENABLE_PYTORCH_ATTENTION = True
                if rocm_version >= (7, 0):
                   if any((a in arch) for a in ["gfx1200", "gfx1201"]):
@@ -836,7 +836,7 @@ def unet_inital_load_device(parameters, dtype):

    mem_dev = get_free_memory(torch_dev)
    mem_cpu = get_free_memory(cpu_dev)
-    if mem_dev > mem_cpu and model_size < mem_dev and comfy.memory_management.aimdo_enabled:
+    if mem_dev > mem_cpu and model_size < mem_dev and comfy.memory_management.aimdo_allocator is None:
        return torch_dev
    else:
        return cpu_dev
@@ -1121,6 +1121,7 @@ def get_cast_buffer(offload_stream, device, size, ref):
            synchronize()
            del STREAM_CAST_BUFFERS[offload_stream]
            del cast_buffer
+            #FIXME: This doesn't work in Aimdo because mempool cant clear cache
            soft_empty_cache()
        with wf_context:
            cast_buffer = torch.empty((size), dtype=torch.int8, device=device)
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@@ -271,7 +271,6 @@ class ModelPatcher:
        self.is_clip = False
        self.hook_mode = comfy.hooks.EnumHookMode.MaxSpeed

-        self.cached_patcher_init: tuple[Callable, tuple] | None = None
        if not hasattr(self.model, 'model_loaded_weight_memory'):
            self.model.model_loaded_weight_memory = 0

@@ -308,15 +307,8 @@ class ModelPatcher:
    def get_free_memory(self, device):
        return comfy.model_management.get_free_memory(device)

-    def clone(self, disable_dynamic=False):
-        class_ = self.__class__
-        model = self.model
-        if self.is_dynamic() and disable_dynamic:
-            class_ = ModelPatcher
-            temp_model_patcher = self.cached_patcher_init[0](*self.cached_patcher_init[1], disable_dynamic=True)
-            model = temp_model_patcher.model
-
-        n = class_(model, self.load_device, self.offload_device, self.model_size(), weight_inplace_update=self.weight_inplace_update)
+    def clone(self):
+        n = self.__class__(self.model, self.load_device, self.offload_device, self.model_size(), weight_inplace_update=self.weight_inplace_update)
        n.patches = {}
        for k in self.patches:
            n.patches[k] = self.patches[k][:]
@@ -370,8 +362,6 @@ class ModelPatcher:
        n.is_clip = self.is_clip
        n.hook_mode = self.hook_mode

-        n.cached_patcher_init = self.cached_patcher_init
-
        for callback in self.get_all_callbacks(CallbacksMP.ON_CLONE):
            callback(self, n)
        return n
@@ -416,16 +406,13 @@ class ModelPatcher:
    def memory_required(self, input_shape):
        return self.model.memory_required(input_shape=input_shape)

-    def disable_model_cfg1_optimization(self):
-        self.model_options["disable_cfg1_optimization"] = True
-
    def set_model_sampler_cfg_function(self, sampler_cfg_function, disable_cfg1_optimization=False):
        if len(inspect.signature(sampler_cfg_function).parameters) == 3:
            self.model_options["sampler_cfg_function"] = lambda args: sampler_cfg_function(args["cond"], args["uncond"], args["cond_scale"]) #Old way
        else:
            self.model_options["sampler_cfg_function"] = sampler_cfg_function
        if disable_cfg1_optimization:
-            self.disable_model_cfg1_optimization()
+            self.model_options["disable_cfg1_optimization"] = True

    def set_model_sampler_post_cfg_function(self, post_cfg_function, disable_cfg1_optimization=False):
        self.model_options = set_model_options_post_cfg_function(self.model_options, post_cfg_function, disable_cfg1_optimization)
@@ -1527,10 +1514,8 @@ class ModelPatcherDynamic(ModelPatcher):

                    weight, _, _ = get_key_weight(self.model, key)
                    if weight is None:
-                        return (False, 0)
+                        return 0
                    if key in self.patches:
-                        if comfy.lora.calculate_shape(self.patches[key], weight, key) != weight.shape:
-                            return (True, 0)
                        setattr(m, param_key + "_lowvram_function", LowVramPatch(key, self.patches))
                        num_patches += 1
                    else:
@@ -1544,13 +1529,7 @@ class ModelPatcherDynamic(ModelPatcher):
                        model_dtype = getattr(m, param_key + "_comfy_model_dtype", None) or weight.dtype
                        weight._model_dtype = model_dtype
                        geometry = comfy.memory_management.TensorGeometry(shape=weight.shape, dtype=model_dtype)
-                    return (False, comfy.memory_management.vram_aligned_size(geometry))
-
-                def force_load_param(self, param_key, device_to):
-                    key = key_param_name_to_key(n, param_key)
-                    if key in self.backup:
-                        comfy.utils.set_attr_param(self.model, key, self.backup[key].weight)
-                    self.patch_weight_to_device(key, device_to=device_to)
+                    return comfy.memory_management.vram_aligned_size(geometry)

                if hasattr(m, "comfy_cast_weights"):
                    m.comfy_cast_weights = True
@@ -1558,19 +1537,13 @@ class ModelPatcherDynamic(ModelPatcher):
                    m.seed_key = n
                    set_dirty(m, dirty)

-                    force_load, v_weight_size = setup_param(self, m, n, "weight")
-                    force_load_bias, v_weight_bias = setup_param(self, m, n, "bias")
-                    force_load = force_load or force_load_bias
-                    v_weight_size += v_weight_bias
+                    v_weight_size = 0
+                    v_weight_size += setup_param(self, m, n, "weight")
+                    v_weight_size += setup_param(self, m, n, "bias")

-                    if force_load:
-                        logging.info(f"Module {n} has resizing Lora - force loading")
-                        force_load_param(self, "weight", device_to)
-                        force_load_param(self, "bias", device_to)
-                    else:
-                        if vbar is not None and not hasattr(m, "_v"):
-                            m._v = vbar.alloc(v_weight_size)
-                        allocated_size += v_weight_size
+                    if vbar is not None and not hasattr(m, "_v"):
+                        m._v = vbar.alloc(v_weight_size)
+                    allocated_size += v_weight_size

                else:
                    for param in params:
@@ -1633,11 +1606,6 @@ class ModelPatcherDynamic(ModelPatcher):
            for m in self.model.modules():
                move_weight_functions(m, device_to)

-            keys = list(self.backup.keys())
-            for k in keys:
-                bk = self.backup[k]
-                comfy.utils.set_attr_param(self.model, k, bk.weight)
-
    def partially_load(self, device_to, extra_memory=0, force_patch_weights=False):
        assert not force_patch_weights #See above
        with self.use_ejected(skip_and_inject_on_exit_only=True):
--- a/comfy/model_sampling.py
+++ b/comfy/model_sampling.py
@@ -83,16 +83,6 @@ class IMG_TO_IMG(X0):
    def calculate_input(self, sigma, noise):
        return noise

-class IMG_TO_IMG_FLOW(CONST):
-    def calculate_denoised(self, sigma, model_output, model_input):
-        return model_output
-
-    def noise_scaling(self, sigma, noise, latent_image, max_denoise=False):
-        return latent_image
-
-    def inverse_noise_scaling(self, sigma, latent):
-        return 1.0 - latent
-
 class COSMOS_RFLOW:
    def calculate_input(self, sigma, noise):
        sigma = (sigma / (sigma + 1))
--- a/comfy/ops.py
+++ b/comfy/ops.py
@@ -19,8 +19,9 @@
 import torch
 import logging
 import comfy.model_management
-from comfy.cli_args import args, PerformanceFeature
+from comfy.cli_args import args, PerformanceFeature, enables_dynamic_vram
 import comfy.float
+import comfy.rmsnorm
 import json
 import comfy.memory_management
 import comfy.pinned_memory
@@ -79,7 +80,7 @@ def cast_to_input(weight, input, non_blocking=False, copy=True):
    return comfy.model_management.cast_to(weight, input.dtype, input.device, non_blocking=non_blocking, copy=copy)


-def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compute_dtype, want_requant):
+def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compute_dtype):
    offload_stream = None
    xfer_dest = None

@@ -167,15 +168,17 @@ def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compu
            x = to_dequant(x, dtype)
        if not resident and lowvram_fn is not None:
            x = to_dequant(x, dtype if compute_dtype is None else compute_dtype)
+            #FIXME: this is not accurate, we need to be sensitive to the compute dtype
            x = lowvram_fn(x)
-            if (want_requant and len(fns) == 0 or update_weight):
+            if (isinstance(orig, QuantizedTensor) and
+                (orig.dtype == dtype and len(fns) == 0 or update_weight)):
                seed = comfy.utils.string_to_seed(s.seed_key)
-                if isinstance(orig, QuantizedTensor):
-                    y = QuantizedTensor.from_float(x, s.layout_type, scale="recalculate", stochastic_rounding=seed)
-                else:
-                    y = comfy.float.stochastic_rounding(x, orig.dtype, seed=seed)
-            if want_requant and len(fns) == 0:
-                x = y
+                y = QuantizedTensor.from_float(x, s.layout_type, scale="recalculate", stochastic_rounding=seed)
+                if orig.dtype == dtype and len(fns) == 0:
+                    #The layer actually wants our freshly saved QT
+                    x = y
+            elif update_weight:
+                y = comfy.float.stochastic_rounding(x, orig.dtype, seed = comfy.utils.string_to_seed(s.seed_key))
            if update_weight:
                orig.copy_(y)
        for f in fns:
@@ -192,7 +195,7 @@ def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compu
    return weight, bias, (offload_stream, device if signature is not None else None, None)


-def cast_bias_weight(s, input=None, dtype=None, device=None, bias_dtype=None, offloadable=False, compute_dtype=None, want_requant=False):
+def cast_bias_weight(s, input=None, dtype=None, device=None, bias_dtype=None, offloadable=False, compute_dtype=None):
    # NOTE: offloadable=False is a a legacy and if you are a custom node author reading this please pass
    # offloadable=True and call uncast_bias_weight() after your last usage of the weight/bias. This
    # will add async-offload support to your cast and improve performance.
@@ -210,7 +213,7 @@ def cast_bias_weight(s, input=None, dtype=None, device=None, bias_dtype=None, of
    non_blocking = comfy.model_management.device_supports_non_blocking(device)

    if hasattr(s, "_v"):
-        return cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compute_dtype, want_requant)
+        return cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compute_dtype)

    if offloadable and (device != s.weight.device or
                        (s.bias is not None and device != s.bias.device)):
@@ -294,7 +297,7 @@ class disable_weight_init:
    class Linear(torch.nn.Linear, CastWeightBiasOp):

        def __init__(self, in_features, out_features, bias=True, device=None, dtype=None):
-            if not comfy.model_management.WINDOWS or not comfy.memory_management.aimdo_enabled:
+            if not comfy.model_management.WINDOWS or not enables_dynamic_vram():
                super().__init__(in_features, out_features, bias, device, dtype)
                return

@@ -315,7 +318,7 @@ class disable_weight_init:
        def _load_from_state_dict(self, state_dict, prefix, local_metadata,
                                strict, missing_keys, unexpected_keys, error_msgs):

-            if not comfy.model_management.WINDOWS or not comfy.memory_management.aimdo_enabled:
+            if not comfy.model_management.WINDOWS or not enables_dynamic_vram():
                return super()._load_from_state_dict(state_dict, prefix, local_metadata, strict,
                                                     missing_keys, unexpected_keys, error_msgs)
            assign_to_params_buffers = local_metadata.get("assign_to_params_buffers", False)
@@ -460,7 +463,7 @@ class disable_weight_init:
            else:
                return super().forward(*args, **kwargs)

-    class RMSNorm(torch.nn.RMSNorm, CastWeightBiasOp):
+    class RMSNorm(comfy.rmsnorm.RMSNorm, CastWeightBiasOp):
        def reset_parameters(self):
            self.bias = None
            return None
@@ -472,7 +475,8 @@ class disable_weight_init:
                weight = None
                bias = None
                offload_stream = None
-            x = torch.nn.functional.rms_norm(input, self.normalized_shape, weight, self.eps)
+            x = comfy.rmsnorm.rms_norm(input, weight, self.eps)  # TODO: switch to commented out line when old torch is deprecated
+            # x = torch.nn.functional.rms_norm(input, self.normalized_shape, weight, self.eps)
            uncast_bias_weight(self, weight, bias, offload_stream)
            return x

@@ -615,8 +619,7 @@ def fp8_linear(self, input):

    if input.ndim != 2:
        return None
-    lora_compute_dtype=comfy.model_management.lora_compute_dtype(input.device)
-    w, bias, offload_stream = cast_bias_weight(self, input, dtype=dtype, bias_dtype=input_dtype, offloadable=True, compute_dtype=lora_compute_dtype, want_requant=True)
+    w, bias, offload_stream = cast_bias_weight(self, input, dtype=dtype, bias_dtype=input_dtype, offloadable=True)
    scale_weight = torch.ones((), device=input.device, dtype=torch.float32)

    scale_input = torch.ones((), device=input.device, dtype=torch.float32)
@@ -826,10 +829,6 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
                else:
                    sd = {}

-                if not hasattr(self, 'weight'):
-                    logging.warning("Warning: state dict on uninitialized op {}".format(prefix))
-                    return sd
-
                if self.bias is not None:
                    sd["{}bias".format(prefix)] = self.bias

@@ -853,8 +852,8 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
            def _forward(self, input, weight, bias):
                return torch.nn.functional.linear(input, weight, bias)

-            def forward_comfy_cast_weights(self, input, compute_dtype=None, want_requant=False):
-                weight, bias, offload_stream = cast_bias_weight(self, input, offloadable=True, compute_dtype=compute_dtype, want_requant=want_requant)
+            def forward_comfy_cast_weights(self, input, compute_dtype=None):
+                weight, bias, offload_stream = cast_bias_weight(self, input, offloadable=True, compute_dtype=compute_dtype)
                x = self._forward(input, weight, bias)
                uncast_bias_weight(self, weight, bias, offload_stream)
                return x
@@ -884,7 +883,8 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
                            scale = comfy.model_management.cast_to_device(scale, input.device, None)
                        input = QuantizedTensor.from_float(input_reshaped, self.layout_type, scale=scale)

-                output = self.forward_comfy_cast_weights(input, compute_dtype, want_requant=isinstance(input, QuantizedTensor))
+
+                output = self.forward_comfy_cast_weights(input, compute_dtype)

                # Reshape output back to 3D if input was 3D
                if reshaped_3d:
--- a/comfy/rmsnorm.py
+++ b/comfy/rmsnorm.py
@@ -1,10 +1,57 @@
 import torch
 import comfy.model_management
+import numbers
+import logging
+
+RMSNorm = None
+
+try:
+    rms_norm_torch = torch.nn.functional.rms_norm
+    RMSNorm = torch.nn.RMSNorm
+except:
+    rms_norm_torch = None
+    logging.warning("Please update pytorch to use native RMSNorm")

-RMSNorm = torch.nn.RMSNorm

 def rms_norm(x, weight=None, eps=1e-6):
-    if weight is None:
-        return torch.nn.functional.rms_norm(x, (x.shape[-1],), eps=eps)
+    if rms_norm_torch is not None and not (torch.jit.is_tracing() or torch.jit.is_scripting()):
+        if weight is None:
+            return rms_norm_torch(x, (x.shape[-1],), eps=eps)
+        else:
+            return rms_norm_torch(x, weight.shape, weight=comfy.model_management.cast_to(weight, dtype=x.dtype, device=x.device), eps=eps)
    else:
-        return torch.nn.functional.rms_norm(x, weight.shape, weight=comfy.model_management.cast_to(weight, dtype=x.dtype, device=x.device), eps=eps)
+        r = x * torch.rsqrt(torch.mean(x**2, dim=-1, keepdim=True) + eps)
+        if weight is None:
+            return r
+        else:
+            return r * comfy.model_management.cast_to(weight, dtype=x.dtype, device=x.device)
+
+
+if RMSNorm is None:
+    class RMSNorm(torch.nn.Module):
+        def __init__(
+            self,
+            normalized_shape,
+            eps=1e-6,
+            elementwise_affine=True,
+            device=None,
+            dtype=None,
+        ):
+            factory_kwargs = {"device": device, "dtype": dtype}
+            super().__init__()
+            if isinstance(normalized_shape, numbers.Integral):
+                # mypy error: incompatible types in assignment
+                normalized_shape = (normalized_shape,)  # type: ignore[assignment]
+            self.normalized_shape = tuple(normalized_shape)  # type: ignore[arg-type]
+            self.eps = eps
+            self.elementwise_affine = elementwise_affine
+            if self.elementwise_affine:
+                self.weight = torch.nn.Parameter(
+                    torch.empty(self.normalized_shape, **factory_kwargs)
+                )
+            else:
+                self.register_parameter("weight", None)
+            self.bias = None
+
+        def forward(self, x):
+            return rms_norm(x, self.weight, self.eps)
--- a/comfy/sd.py
+++ b/comfy/sd.py
@@ -60,7 +60,6 @@ import comfy.text_encoders.jina_clip_2
 import comfy.text_encoders.newbie
 import comfy.text_encoders.anima
 import comfy.text_encoders.ace15
-import comfy.text_encoders.longcat_image

 import comfy.model_patcher
 import comfy.lora
@@ -424,17 +423,6 @@ class CLIP:
    def get_key_patches(self):
        return self.patcher.get_key_patches()

-    def generate(self, tokens, do_sample=True, max_length=256, temperature=1.0, top_k=50, top_p=0.95, min_p=0.0, repetition_penalty=1.0, seed=None):
-        self.cond_stage_model.reset_clip_options()
-
-        self.load_model()
-        self.cond_stage_model.set_clip_options({"layer": None})
-        self.cond_stage_model.set_clip_options({"execution_device": self.patcher.load_device})
-        return self.cond_stage_model.generate(tokens, do_sample=do_sample, max_length=max_length, temperature=temperature, top_k=top_k, top_p=top_p, min_p=min_p, repetition_penalty=repetition_penalty, seed=seed)
-
-    def decode(self, token_ids, skip_special_tokens=True):
-        return self.tokenizer.decode(token_ids, skip_special_tokens=skip_special_tokens)
-
 class VAE:
    def __init__(self, sd=None, device=None, config=None, dtype=None, metadata=None):
        if 'decoder.up_blocks.0.resnets.0.norm1.weight' in sd.keys(): #diffusers format
@@ -695,9 +683,8 @@ class VAE:
                    self.latent_dim = 3
                    self.latent_channels = 16
                    self.output_channels = sd["encoder.conv1.weight"].shape[1]
-                    self.conv_out_channels = sd["decoder.head.2.weight"].shape[0]
                    self.pad_channel_value = 1.0
-                    ddconfig = {"dim": dim, "z_dim": self.latent_channels, "dim_mult": [1, 2, 4, 4], "num_res_blocks": 2, "attn_scales": [], "temperal_downsample": [False, True, True], "image_channels": self.output_channels, "conv_out_channels": self.conv_out_channels, "dropout": 0.0}
+                    ddconfig = {"dim": dim, "z_dim": self.latent_channels, "dim_mult": [1, 2, 4, 4], "num_res_blocks": 2, "attn_scales": [], "temperal_downsample": [False, True, True], "image_channels": self.output_channels, "dropout": 0.0}
                    self.first_stage_model = comfy.ldm.wan.vae.WanVAE(**ddconfig)
                    self.working_dtypes = [torch.bfloat16, torch.float16, torch.float32]
                    self.memory_used_encode = lambda shape, dtype: (1500 if shape[2]<=4 else 6000) * shape[3] * shape[4] * model_management.dtype_size(dtype)
@@ -1161,7 +1148,6 @@ class CLIPType(Enum):
    KANDINSKY5_IMAGE = 23
    NEWBIE = 24
    FLUX2 = 25
-    LONGCAT_IMAGE = 26


 def load_clip(ckpt_paths, embedding_directory=None, clip_type=CLIPType.STABLE_DIFFUSION, model_options={}):
@@ -1196,7 +1182,6 @@ class TEModel(Enum):
    JINA_CLIP_2 = 19
    QWEN3_8B = 20
    QWEN3_06B = 21
-    GEMMA_3_4B_VISION = 22


 def detect_te_model(sd):
@@ -1225,10 +1210,7 @@ def detect_te_model(sd):
        if 'model.layers.47.self_attn.q_norm.weight' in sd:
            return TEModel.GEMMA_3_12B
        if 'model.layers.0.self_attn.q_norm.weight' in sd:
-            if 'vision_model.embeddings.patch_embedding.weight' in sd:
-                return TEModel.GEMMA_3_4B_VISION
-            else:
-                return TEModel.GEMMA_3_4B
+            return TEModel.GEMMA_3_4B
        return TEModel.GEMMA_2_2B
    if 'model.layers.0.self_attn.k_proj.bias' in sd:
        weight = sd['model.layers.0.self_attn.k_proj.bias']
@@ -1288,8 +1270,6 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip
        else:
            if "text_projection" in clip_data[i]:
                clip_data[i]["text_projection.weight"] = clip_data[i]["text_projection"].transpose(0, 1) #old models saved with the CLIPSave node
-        if "lm_head.weight" in clip_data[i]:
-            clip_data[i]["model.lm_head.weight"] = clip_data[i].pop("lm_head.weight") # prefix missing in some models

    tokenizer_data = {}
    clip_target = EmptyClass()
@@ -1355,14 +1335,6 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip
            clip_target.clip = comfy.text_encoders.lumina2.te(**llama_detect(clip_data), model_type="gemma3_4b")
            clip_target.tokenizer = comfy.text_encoders.lumina2.NTokenizer
            tokenizer_data["spiece_model"] = clip_data[0].get("spiece_model", None)
-        elif te_model == TEModel.GEMMA_3_4B_VISION:
-            clip_target.clip = comfy.text_encoders.lumina2.te(**llama_detect(clip_data), model_type="gemma3_4b_vision")
-            clip_target.tokenizer = comfy.text_encoders.lumina2.NTokenizer
-            tokenizer_data["spiece_model"] = clip_data[0].get("spiece_model", None)
-        elif te_model == TEModel.GEMMA_3_12B:
-            clip_target.clip = comfy.text_encoders.lt.gemma3_te(**llama_detect(clip_data))
-            clip_target.tokenizer = comfy.text_encoders.lt.Gemma3_12BTokenizer
-            tokenizer_data["spiece_model"] = clip_data[0].get("spiece_model", None)
        elif te_model == TEModel.LLAMA3_8:
            clip_target.clip = comfy.text_encoders.hidream.hidream_clip(**llama_detect(clip_data),
                                                                        clip_l=False, clip_g=False, t5=False, llama=True, dtype_t5=None)
@@ -1374,9 +1346,6 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip
            if clip_type == CLIPType.HUNYUAN_IMAGE:
                clip_target.clip = comfy.text_encoders.hunyuan_image.te(byt5=False, **llama_detect(clip_data))
                clip_target.tokenizer = comfy.text_encoders.hunyuan_image.HunyuanImageTokenizer
-            elif clip_type == CLIPType.LONGCAT_IMAGE:
-                clip_target.clip = comfy.text_encoders.longcat_image.te(**llama_detect(clip_data))
-                clip_target.tokenizer = comfy.text_encoders.longcat_image.LongCatImageTokenizer
            else:
                clip_target.clip = comfy.text_encoders.qwen_image.te(**llama_detect(clip_data))
                clip_target.tokenizer = comfy.text_encoders.qwen_image.QwenImageTokenizer
@@ -1536,24 +1505,14 @@ def load_checkpoint(config_path=None, ckpt_path=None, output_vae=True, output_cl

    return (model, clip, vae)

-def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, output_clipvision=False, embedding_directory=None, output_model=True, model_options={}, te_model_options={}, disable_dynamic=False):
+def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, output_clipvision=False, embedding_directory=None, output_model=True, model_options={}, te_model_options={}):
    sd, metadata = comfy.utils.load_torch_file(ckpt_path, return_metadata=True)
-    out = load_state_dict_guess_config(sd, output_vae, output_clip, output_clipvision, embedding_directory, output_model, model_options, te_model_options=te_model_options, metadata=metadata, disable_dynamic=disable_dynamic)
+    out = load_state_dict_guess_config(sd, output_vae, output_clip, output_clipvision, embedding_directory, output_model, model_options, te_model_options=te_model_options, metadata=metadata)
    if out is None:
        raise RuntimeError("ERROR: Could not detect model type of: {}\n{}".format(ckpt_path, model_detection_error_hint(ckpt_path, sd)))
-    if output_model:
-        out[0].cached_patcher_init = (load_checkpoint_guess_config_model_only, (ckpt_path, embedding_directory, model_options, te_model_options))
    return out

-def load_checkpoint_guess_config_model_only(ckpt_path, embedding_directory=None, model_options={}, te_model_options={}, disable_dynamic=False):
-    model, *_ = load_checkpoint_guess_config(ckpt_path, False, False, False,
-            embedding_directory=embedding_directory,
-            model_options=model_options,
-            te_model_options=te_model_options,
-            disable_dynamic=disable_dynamic)
-    return model
-
-def load_state_dict_guess_config(sd, output_vae=True, output_clip=True, output_clipvision=False, embedding_directory=None, output_model=True, model_options={}, te_model_options={}, metadata=None, disable_dynamic=False):
+def load_state_dict_guess_config(sd, output_vae=True, output_clip=True, output_clipvision=False, embedding_directory=None, output_model=True, model_options={}, te_model_options={}, metadata=None):
    clip = None
    clipvision = None
    vae = None
@@ -1602,8 +1561,7 @@ def load_state_dict_guess_config(sd, output_vae=True, output_clip=True, output_c
    if output_model:
        inital_load_device = model_management.unet_inital_load_device(parameters, unet_dtype)
        model = model_config.get_model(sd, diffusion_model_prefix, device=inital_load_device)
-        ModelPatcher = comfy.model_patcher.ModelPatcher if disable_dynamic else comfy.model_patcher.CoreModelPatcher
-        model_patcher = ModelPatcher(model, load_device=load_device, offload_device=model_management.unet_offload_device())
+        model_patcher = comfy.model_patcher.CoreModelPatcher(model, load_device=load_device, offload_device=model_management.unet_offload_device())
        model.load_model_weights(sd, diffusion_model_prefix, assign=model_patcher.is_dynamic())

    if output_vae:
@@ -1654,7 +1612,7 @@ def load_state_dict_guess_config(sd, output_vae=True, output_clip=True, output_c
    return (model_patcher, clip, vae, clipvision)


-def load_diffusion_model_state_dict(sd, model_options={}, metadata=None, disable_dynamic=False):
+def load_diffusion_model_state_dict(sd, model_options={}, metadata=None):
    """
    Loads a UNet diffusion model from a state dictionary, supporting both diffusers and regular formats.

@@ -1738,8 +1696,7 @@ def load_diffusion_model_state_dict(sd, model_options={}, metadata=None, disable
        model_config.optimizations["fp8"] = True

    model = model_config.get_model(new_sd, "")
-    ModelPatcher = comfy.model_patcher.ModelPatcher if disable_dynamic else comfy.model_patcher.CoreModelPatcher
-    model_patcher = ModelPatcher(model, load_device=load_device, offload_device=offload_device)
+    model_patcher = comfy.model_patcher.CoreModelPatcher(model, load_device=load_device, offload_device=offload_device)
    if not model_management.is_device_cpu(offload_device):
        model.to(offload_device)
    model.load_model_weights(new_sd, "", assign=model_patcher.is_dynamic())
@@ -1748,13 +1705,12 @@ def load_diffusion_model_state_dict(sd, model_options={}, metadata=None, disable
        logging.info("left over keys in diffusion model: {}".format(left_over))
    return model_patcher

-def load_diffusion_model(unet_path, model_options={}, disable_dynamic=False):
+def load_diffusion_model(unet_path, model_options={}):
    sd, metadata = comfy.utils.load_torch_file(unet_path, return_metadata=True)
-    model = load_diffusion_model_state_dict(sd, model_options=model_options, metadata=metadata, disable_dynamic=disable_dynamic)
+    model = load_diffusion_model_state_dict(sd, model_options=model_options, metadata=metadata)
    if model is None:
        logging.error("ERROR UNSUPPORTED DIFFUSION MODEL {}".format(unet_path))
        raise RuntimeError("ERROR: Could not detect model type of: {}\n{}".format(unet_path, model_detection_error_hint(unet_path, sd)))
-    model.cached_patcher_init = (load_diffusion_model, (unet_path, model_options))
    return model

 def load_unet(unet_path, dtype=None):
--- a/comfy/sd1_clip.py
+++ b/comfy/sd1_clip.py
@@ -308,15 +308,6 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder):
    def load_sd(self, sd):
        return self.transformer.load_state_dict(sd, strict=False, assign=getattr(self, "can_assign_sd", False))

-    def generate(self, tokens, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed):
-        if isinstance(tokens, dict):
-            tokens_only = next(iter(tokens.values())) # todo: get this better?
-        else:
-            tokens_only = tokens
-        tokens_only = [[t[0] for t in b] for b in tokens_only]
-        embeds = self.process_tokens(tokens_only, device=self.execution_device)[0]
-        return self.transformer.generate(embeds, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed)
-
 def parse_parentheses(string):
    result = []
    current_item = ""
@@ -573,8 +564,6 @@ class SDTokenizer:
        min_length = tokenizer_options.get("{}_min_length".format(self.embedding_key), self.min_length)
        min_padding = tokenizer_options.get("{}_min_padding".format(self.embedding_key), self.min_padding)

-        min_length = kwargs.get("min_length", min_length)
-
        text = escape_important(text)
        if kwargs.get("disable_weights", self.disable_weights):
            parsed_weights = [(text, 1.0)]
@@ -674,9 +663,6 @@ class SDTokenizer:
    def state_dict(self):
        return {}

-    def decode(self, token_ids, skip_special_tokens=True):
-        return self.tokenizer.decode(token_ids, skip_special_tokens=skip_special_tokens)
-
 class SD1Tokenizer:
    def __init__(self, embedding_directory=None, tokenizer_data={}, clip_name="l", tokenizer=SDTokenizer, name=None):
        if name is not None:
@@ -700,9 +686,6 @@ class SD1Tokenizer:
    def state_dict(self):
        return getattr(self, self.clip).state_dict()

-    def decode(self, token_ids, skip_special_tokens=True):
-        return getattr(self, self.clip).decode(token_ids, skip_special_tokens=skip_special_tokens)
-
 class SD1CheckpointClipModel(SDClipModel):
    def __init__(self, device="cpu", dtype=None, model_options={}):
        super().__init__(device=device, return_projected_pooled=False, dtype=dtype, model_options=model_options)
@@ -739,6 +722,3 @@ class SD1ClipModel(torch.nn.Module):

    def load_sd(self, sd):
        return getattr(self, self.clip).load_sd(sd)
-
-    def generate(self, tokens, do_sample=True, max_length=256, temperature=1.0, top_k=50, top_p=0.95, min_p=0.0, repetition_penalty=1.0, seed=None):
-        return getattr(self, self.clip).generate(tokens, do_sample=do_sample, max_length=max_length, temperature=temperature, top_k=top_k, top_p=top_p, min_p=min_p, repetition_penalty=repetition_penalty, seed=seed)
--- a/comfy/supported_models.py
+++ b/comfy/supported_models.py
@@ -25,7 +25,6 @@ import comfy.text_encoders.kandinsky5
 import comfy.text_encoders.z_image
 import comfy.text_encoders.anima
 import comfy.text_encoders.ace15
-import comfy.text_encoders.longcat_image

 from . import supported_models_base
 from . import latent_formats
@@ -526,8 +525,7 @@ class LotusD(SD20):
    }

    unet_extra_config = {
-        "num_classes": 'sequential',
-        "num_head_channels": 64,
+        "num_classes": 'sequential'
    }

    def get_model(self, state_dict, prefix="", device=None):
@@ -1258,16 +1256,6 @@ class WAN22_T2V(WAN21_T2V):
        out = model_base.WAN22(self, image_to_video=True, device=device)
        return out

-class WAN21_FlowRVS(WAN21_T2V):
-    unet_config = {
-        "image_model": "wan2.1",
-        "model_type": "flow_rvs",
-    }
-
-    def get_model(self, state_dict, prefix="", device=None):
-        out = model_base.WAN21_FlowRVS(self, image_to_video=True, device=device)
-        return out
-
 class Hunyuan3Dv2(supported_models_base.BASE):
    unet_config = {
        "image_model": "hunyuan3d2",
@@ -1679,37 +1667,6 @@ class ACEStep15(supported_models_base.BASE):
        return supported_models_base.ClipTarget(comfy.text_encoders.ace15.ACE15Tokenizer, comfy.text_encoders.ace15.te(**detect))


-class LongCatImage(supported_models_base.BASE):
-    unet_config = {
-        "image_model": "flux",
-        "guidance_embed": False,
-        "vec_in_dim": None,
-        "context_in_dim": 3584,
-        "txt_ids_dims": [1, 2],
-    }
-
-    sampling_settings = {
-    }
-
-    unet_extra_config = {}
-    latent_format = latent_formats.Flux
-
-    memory_usage_factor = 2.5
-
-    supported_inference_dtypes = [torch.bfloat16, torch.float16, torch.float32]
-
-    vae_key_prefix = ["vae."]
-    text_encoder_key_prefix = ["text_encoders."]
-
-    def get_model(self, state_dict, prefix="", device=None):
-        out = model_base.LongCatImage(self, device=device)
-        return out
-
-    def clip_target(self, state_dict={}):
-        pref = self.text_encoder_key_prefix[0]
-        hunyuan_detect = comfy.text_encoders.hunyuan_video.llama_detect(state_dict, "{}qwen25_7b.transformer.".format(pref))
-        return supported_models_base.ClipTarget(comfy.text_encoders.longcat_image.LongCatImageTokenizer, comfy.text_encoders.longcat_image.te(**hunyuan_detect))
-
-models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, LongCatImage, FluxSchnell, GenmoMochi, LTXV, LTXAV, HunyuanVideo15_SR_Distilled, HunyuanVideo15, HunyuanImage21Refiner, HunyuanImage21, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, CosmosT2IPredict2, CosmosI2VPredict2, ZImage, Lumina2, WAN22_T2V, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, WAN21_Vace, WAN21_Camera, WAN22_Camera, WAN22_S2V, WAN21_HuMo, WAN22_Animate, WAN21_FlowRVS, Hunyuan3Dv2mini, Hunyuan3Dv2, Hunyuan3Dv2_1, HiDream, Chroma, ChromaRadiance, ACEStep, ACEStep15, Omnigen2, QwenImage, Flux2, Kandinsky5Image, Kandinsky5, Anima]
+models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, FluxSchnell, GenmoMochi, LTXV, LTXAV, HunyuanVideo15_SR_Distilled, HunyuanVideo15, HunyuanImage21Refiner, HunyuanImage21, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, CosmosT2IPredict2, CosmosI2VPredict2, ZImage, Lumina2, WAN22_T2V, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, WAN21_Vace, WAN21_Camera, WAN22_Camera, WAN22_S2V, WAN21_HuMo, WAN22_Animate, Hunyuan3Dv2mini, Hunyuan3Dv2, Hunyuan3Dv2_1, HiDream, Chroma, ChromaRadiance, ACEStep, ACEStep15, Omnigen2, QwenImage, Flux2, Kandinsky5Image, Kandinsky5, Anima]

 models += [SVD_img2vid]
--- a/comfy/text_encoders/ace15.py
+++ b/comfy/text_encoders/ace15.py
@@ -328,14 +328,14 @@ class ACE15TEModel(torch.nn.Module):
                return getattr(self, self.lm_model).load_sd(sd)

    def memory_estimation_function(self, token_weight_pairs, device=None):
-        lm_metadata = token_weight_pairs.get("lm_metadata", {})
+        lm_metadata = token_weight_pairs["lm_metadata"]
        constant = self.constant
        if comfy.model_management.should_use_bf16(device):
            constant *= 0.5

        token_weight_pairs = token_weight_pairs.get("lm_prompt", [])
        num_tokens = sum(map(lambda a: len(a), token_weight_pairs))
-        num_tokens += lm_metadata.get("min_tokens", 0)
+        num_tokens += lm_metadata['min_tokens']
        return num_tokens * constant * 1024 * 1024

 def te(dtype_llama=None, llama_quantization_metadata=None, lm_model="qwen3_2b"):
--- a/comfy/text_encoders/anima.py
+++ b/comfy/text_encoders/anima.py
@@ -33,8 +33,6 @@ class AnimaTokenizer:
    def state_dict(self):
        return {}

-    def decode(self, token_ids, **kwargs):
-        return self.qwen3_06b.decode(token_ids, **kwargs)

 class Qwen3_06BModel(sd1_clip.SDClipModel):
    def __init__(self, device="cpu", layer="last", layer_idx=None, dtype=None, attention_mask=True, model_options={}):
--- a/comfy/text_encoders/llama.py
+++ b/comfy/text_encoders/llama.py
@@ -3,8 +3,6 @@ import torch.nn as nn
 from dataclasses import dataclass
 from typing import Optional, Any, Tuple
 import math
-from tqdm import tqdm
-import comfy.utils

 from comfy.ldm.modules.attention import optimized_attention_for_device
 import comfy.model_management
@@ -105,7 +103,6 @@ class Qwen3_06BConfig:
    rope_scale = None
    final_norm: bool = True
    lm_head: bool = False
-    stop_tokens = [151643, 151645]

@dataclass
 class Qwen3_06B_ACE15_Config:
@@ -129,7 +126,6 @@ class Qwen3_06B_ACE15_Config:
    rope_scale = None
    final_norm: bool = True
    lm_head: bool = False
-    stop_tokens = [151643, 151645]

@dataclass
 class Qwen3_2B_ACE15_lm_Config:
@@ -153,7 +149,6 @@ class Qwen3_2B_ACE15_lm_Config:
    rope_scale = None
    final_norm: bool = True
    lm_head: bool = False
-    stop_tokens = [151643, 151645]

@dataclass
 class Qwen3_4B_ACE15_lm_Config:
@@ -177,7 +172,6 @@ class Qwen3_4B_ACE15_lm_Config:
    rope_scale = None
    final_norm: bool = True
    lm_head: bool = False
-    stop_tokens = [151643, 151645]

@dataclass
 class Qwen3_4BConfig:
@@ -201,7 +195,6 @@ class Qwen3_4BConfig:
    rope_scale = None
    final_norm: bool = True
    lm_head: bool = False
-    stop_tokens = [151643, 151645]

@dataclass
 class Qwen3_8BConfig:
@@ -225,7 +218,6 @@ class Qwen3_8BConfig:
    rope_scale = None
    final_norm: bool = True
    lm_head: bool = False
-    stop_tokens = [151643, 151645]

@dataclass
 class Ovis25_2BConfig:
@@ -296,7 +288,6 @@ class Gemma2_2B_Config:
    rope_scale = None
    final_norm: bool = True
    lm_head: bool = False
-    stop_tokens = [1]

@dataclass
 class Gemma3_4B_Config:
@@ -321,14 +312,6 @@ class Gemma3_4B_Config:
    rope_scale = [8.0, 1.0]
    final_norm: bool = True
    lm_head: bool = False
-    stop_tokens = [1, 106]
-
-GEMMA3_VISION_CONFIG = {"num_channels": 3, "hidden_act": "gelu_pytorch_tanh", "hidden_size": 1152, "image_size": 896, "intermediate_size": 4304, "model_type": "siglip_vision_model", "num_attention_heads": 16, "num_hidden_layers": 27, "patch_size": 14}
-
-@dataclass
-class Gemma3_4B_Vision_Config(Gemma3_4B_Config):
-    vision_config = GEMMA3_VISION_CONFIG
-    mm_tokens_per_image = 256

@dataclass
 class Gemma3_12B_Config:
@@ -353,9 +336,8 @@ class Gemma3_12B_Config:
    rope_scale = [8.0, 1.0]
    final_norm: bool = True
    lm_head: bool = False
-    vision_config = GEMMA3_VISION_CONFIG
+    vision_config = {"num_channels": 3, "hidden_act": "gelu_pytorch_tanh", "hidden_size": 1152, "image_size": 896, "intermediate_size": 4304, "model_type": "siglip_vision_model", "num_attention_heads": 16, "num_hidden_layers": 27, "patch_size": 14}
    mm_tokens_per_image = 256
-    stop_tokens = [1, 106]

 class RMSNorm(nn.Module):
    def __init__(self, dim: int, eps: float = 1e-5, add=False, device=None, dtype=None):
@@ -459,10 +441,8 @@ class Attention(nn.Module):
        freqs_cis: Optional[torch.Tensor] = None,
        optimized_attention=None,
        past_key_value: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
-        sliding_window: Optional[int] = None,
    ):
        batch_size, seq_length, _ = hidden_states.shape
-
        xq = self.q_proj(hidden_states)
        xk = self.k_proj(hidden_states)
        xv = self.v_proj(hidden_states)
@@ -497,11 +477,6 @@ class Attention(nn.Module):
            else:
                present_key_value = (xk, xv, index + num_tokens)

-            if sliding_window is not None and xk.shape[2] > sliding_window:
-                xk = xk[:, :, -sliding_window:]
-                xv = xv[:, :, -sliding_window:]
-                attention_mask = attention_mask[..., -sliding_window:] if attention_mask is not None else None
-
        xk = xk.repeat_interleave(self.num_heads // self.num_kv_heads, dim=1)
        xv = xv.repeat_interleave(self.num_heads // self.num_kv_heads, dim=1)

@@ -584,12 +559,10 @@ class TransformerBlockGemma2(nn.Module):
        optimized_attention=None,
        past_key_value: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
    ):
-        sliding_window = None
        if self.transformer_type == 'gemma3':
            if self.sliding_attention:
-                sliding_window = self.sliding_attention
                if x.shape[1] > self.sliding_attention:
-                    sliding_mask = torch.full((x.shape[1], x.shape[1]), torch.finfo(x.dtype).min, device=x.device, dtype=x.dtype)
+                    sliding_mask = torch.full((x.shape[1], x.shape[1]), float("-inf"), device=x.device, dtype=x.dtype)
                    sliding_mask.tril_(diagonal=-self.sliding_attention)
                    if attention_mask is not None:
                        attention_mask = attention_mask + sliding_mask
@@ -608,7 +581,6 @@ class TransformerBlockGemma2(nn.Module):
            freqs_cis=freqs_cis,
            optimized_attention=optimized_attention,
            past_key_value=past_key_value,
-            sliding_window=sliding_window,
        )

        x = self.post_attention_layernorm(x)
@@ -793,107 +765,6 @@ class BaseLlama:
    def forward(self, input_ids, *args, **kwargs):
        return self.model(input_ids, *args, **kwargs)

-class BaseGenerate:
-    def logits(self, x):
-        input = x[:, -1:]
-        if hasattr(self.model, "lm_head"):
-            module = self.model.lm_head
-        else:
-            module = self.model.embed_tokens
-
-        offload_stream = None
-        if module.comfy_cast_weights:
-            weight, _, offload_stream = comfy.ops.cast_bias_weight(module, input, offloadable=True)
-        else:
-            weight = self.model.embed_tokens.weight.to(x)
-
-        x = torch.nn.functional.linear(input, weight, None)
-
-        comfy.ops.uncast_bias_weight(module, weight, None, offload_stream)
-        return x
-
-    def generate(self, embeds=None, do_sample=True, max_length=256, temperature=1.0, top_k=50, top_p=0.9, min_p=0.0, repetition_penalty=1.0, seed=42, stop_tokens=None, initial_tokens=[], execution_dtype=None, min_tokens=0):
-        device = embeds.device
-        model_config = self.model.config
-
-        if stop_tokens is None:
-            stop_tokens = self.model.config.stop_tokens
-
-        if execution_dtype is None:
-            if comfy.model_management.should_use_bf16(device):
-                execution_dtype = torch.bfloat16
-            else:
-                execution_dtype = torch.float32
-        embeds = embeds.to(execution_dtype)
-
-        if embeds.ndim == 2:
-            embeds = embeds.unsqueeze(0)
-
-        past_key_values = [] #kv_cache init
-        max_cache_len = embeds.shape[1] + max_length
-        for x in range(model_config.num_hidden_layers):
-            past_key_values.append((torch.empty([embeds.shape[0], model_config.num_key_value_heads, max_cache_len, model_config.head_dim], device=device, dtype=execution_dtype),
-                                    torch.empty([embeds.shape[0], model_config.num_key_value_heads, max_cache_len, model_config.head_dim], device=device, dtype=execution_dtype), 0))
-
-        generator = torch.Generator(device=device).manual_seed(seed) if do_sample else None
-
-        generated_token_ids = []
-        pbar = comfy.utils.ProgressBar(max_length)
-
-        # Generation loop
-        for step in tqdm(range(max_length), desc="Generating tokens"):
-            x, _, past_key_values = self.model.forward(None, embeds=embeds, attention_mask=None, past_key_values=past_key_values)
-            logits = self.logits(x)[:, -1]
-            next_token = self.sample_token(logits, temperature, top_k, top_p, min_p, repetition_penalty, initial_tokens + generated_token_ids, generator, do_sample=do_sample)
-            token_id = next_token[0].item()
-            generated_token_ids.append(token_id)
-
-            embeds = self.model.embed_tokens(next_token).to(execution_dtype)
-            pbar.update(1)
-
-            if token_id in stop_tokens:
-                break
-
-        return generated_token_ids
-
-    def sample_token(self, logits, temperature, top_k, top_p, min_p, repetition_penalty, token_history, generator, do_sample=True):
-
-        if not do_sample or temperature == 0.0:
-            return torch.argmax(logits, dim=-1, keepdim=True)
-
-        # Sampling mode
-        if repetition_penalty != 1.0:
-            for i in range(logits.shape[0]):
-                for token_id in set(token_history):
-                    logits[i, token_id] *= repetition_penalty if logits[i, token_id] < 0 else 1/repetition_penalty
-
-        if temperature != 1.0:
-            logits = logits / temperature
-
-        if top_k > 0:
-            indices_to_remove = logits < torch.topk(logits, top_k)[0][..., -1, None]
-            logits[indices_to_remove] = torch.finfo(logits.dtype).min
-
-        if min_p > 0.0:
-            probs_before_filter = torch.nn.functional.softmax(logits, dim=-1)
-            top_probs, _ = probs_before_filter.max(dim=-1, keepdim=True)
-            min_threshold = min_p * top_probs
-            indices_to_remove = probs_before_filter < min_threshold
-            logits[indices_to_remove] = torch.finfo(logits.dtype).min
-
-        if top_p < 1.0:
-            sorted_logits, sorted_indices = torch.sort(logits, descending=True)
-            cumulative_probs = torch.cumsum(torch.nn.functional.softmax(sorted_logits, dim=-1), dim=-1)
-            sorted_indices_to_remove = cumulative_probs > top_p
-            sorted_indices_to_remove[..., 0] = False
-            indices_to_remove = torch.zeros_like(logits, dtype=torch.bool)
-            indices_to_remove.scatter_(1, sorted_indices, sorted_indices_to_remove)
-            logits[indices_to_remove] = torch.finfo(logits.dtype).min
-
-        probs = torch.nn.functional.softmax(logits, dim=-1)
-
-        return torch.multinomial(probs, num_samples=1, generator=generator)
-
 class BaseQwen3:
    def logits(self, x):
        input = x[:, -1:]
@@ -937,7 +808,7 @@ class Qwen25_3B(BaseLlama, torch.nn.Module):
        self.model = Llama2_(config, device=device, dtype=dtype, ops=operations)
        self.dtype = dtype

-class Qwen3_06B(BaseLlama, BaseQwen3, BaseGenerate, torch.nn.Module):
+class Qwen3_06B(BaseLlama, BaseQwen3, torch.nn.Module):
    def __init__(self, config_dict, dtype, device, operations):
        super().__init__()
        config = Qwen3_06BConfig(**config_dict)
@@ -964,7 +835,7 @@ class Qwen3_2B_ACE15_lm(BaseLlama, BaseQwen3, torch.nn.Module):
        self.model = Llama2_(config, device=device, dtype=dtype, ops=operations)
        self.dtype = dtype

-class Qwen3_4B(BaseLlama, BaseQwen3, BaseGenerate, torch.nn.Module):
+class Qwen3_4B(BaseLlama, BaseQwen3, torch.nn.Module):
    def __init__(self, config_dict, dtype, device, operations):
        super().__init__()
        config = Qwen3_4BConfig(**config_dict)
@@ -982,7 +853,7 @@ class Qwen3_4B_ACE15_lm(BaseLlama, BaseQwen3, torch.nn.Module):
        self.model = Llama2_(config, device=device, dtype=dtype, ops=operations)
        self.dtype = dtype

-class Qwen3_8B(BaseLlama, BaseQwen3, BaseGenerate, torch.nn.Module):
+class Qwen3_8B(BaseLlama, BaseQwen3, torch.nn.Module):
    def __init__(self, config_dict, dtype, device, operations):
        super().__init__()
        config = Qwen3_8BConfig(**config_dict)
@@ -1000,7 +871,7 @@ class Ovis25_2B(BaseLlama, torch.nn.Module):
        self.model = Llama2_(config, device=device, dtype=dtype, ops=operations)
        self.dtype = dtype

-class Qwen25_7BVLI(BaseLlama, BaseGenerate, torch.nn.Module):
+class Qwen25_7BVLI(BaseLlama, torch.nn.Module):
    def __init__(self, config_dict, dtype, device, operations):
        super().__init__()
        config = Qwen25_7BVLI_Config(**config_dict)
@@ -1010,9 +881,6 @@ class Qwen25_7BVLI(BaseLlama, BaseGenerate, torch.nn.Module):
        self.visual = qwen_vl.Qwen2VLVisionTransformer(hidden_size=1280, output_hidden_size=config.hidden_size, device=device, dtype=dtype, ops=operations)
        self.dtype = dtype

-        # todo: should this be tied or not?
-        #self.lm_head = operations.Linear(config.hidden_size, config.vocab_size, bias=False, device=device, dtype=dtype)
-
    def preprocess_embed(self, embed, device):
        if embed["type"] == "image":
            image, grid = qwen_vl.process_qwen2vl_images(embed["data"])
@@ -1046,7 +914,7 @@ class Qwen25_7BVLI(BaseLlama, BaseGenerate, torch.nn.Module):

        return super().forward(x, attention_mask=attention_mask, embeds=embeds, num_tokens=num_tokens, intermediate_output=intermediate_output, final_layer_norm_intermediate=final_layer_norm_intermediate, dtype=dtype, position_ids=position_ids)

-class Gemma2_2B(BaseLlama, BaseGenerate, torch.nn.Module):
+class Gemma2_2B(BaseLlama, torch.nn.Module):
    def __init__(self, config_dict, dtype, device, operations):
        super().__init__()
        config = Gemma2_2B_Config(**config_dict)
@@ -1055,7 +923,7 @@ class Gemma2_2B(BaseLlama, BaseGenerate, torch.nn.Module):
        self.model = Llama2_(config, device=device, dtype=dtype, ops=operations)
        self.dtype = dtype

-class Gemma3_4B(BaseLlama, BaseGenerate, torch.nn.Module):
+class Gemma3_4B(BaseLlama, torch.nn.Module):
    def __init__(self, config_dict, dtype, device, operations):
        super().__init__()
        config = Gemma3_4B_Config(**config_dict)
@@ -1064,25 +932,7 @@ class Gemma3_4B(BaseLlama, BaseGenerate, torch.nn.Module):
        self.model = Llama2_(config, device=device, dtype=dtype, ops=operations)
        self.dtype = dtype

-class Gemma3_4B_Vision(BaseLlama, BaseGenerate, torch.nn.Module):
-    def __init__(self, config_dict, dtype, device, operations):
-        super().__init__()
-        config = Gemma3_4B_Vision_Config(**config_dict)
-        self.num_layers = config.num_hidden_layers
-
-        self.model = Llama2_(config, device=device, dtype=dtype, ops=operations)
-        self.dtype = dtype
-        self.multi_modal_projector = Gemma3MultiModalProjector(config, dtype, device, operations)
-        self.vision_model = comfy.clip_model.CLIPVision(config.vision_config, dtype, device, operations)
-        self.image_size = config.vision_config["image_size"]
-
-    def preprocess_embed(self, embed, device):
-        if embed["type"] == "image":
-            image = comfy.clip_model.clip_preprocess(embed["data"], size=self.image_size, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], crop=True)
-            return self.multi_modal_projector(self.vision_model(image.to(device, dtype=torch.float32))[0]), None
-        return None, None
-
-class Gemma3_12B(BaseLlama, BaseGenerate, torch.nn.Module):
+class Gemma3_12B(BaseLlama, torch.nn.Module):
    def __init__(self, config_dict, dtype, device, operations):
        super().__init__()
        config = Gemma3_12B_Config(**config_dict)
--- a/comfy/text_encoders/longcat_image.py
+++ b/comfy/text_encoders/longcat_image.py
@@ -1,184 +0,0 @@
-import re
-import numbers
-import torch
-from comfy import sd1_clip
-from comfy.text_encoders.qwen_image import Qwen25_7BVLITokenizer, Qwen25_7BVLIModel
-import logging
-
-logger = logging.getLogger(__name__)
-
-QUOTE_PAIRS = [("'", "'"), ('"', '"'), ("\u2018", "\u2019"), ("\u201c", "\u201d")]
-QUOTE_PATTERN = "|".join(
-    [
-        re.escape(q1) + r"[^" + re.escape(q1 + q2) + r"]*?" + re.escape(q2)
-        for q1, q2 in QUOTE_PAIRS
-    ]
-)
-WORD_INTERNAL_QUOTE_RE = re.compile(r"[a-zA-Z]+'[a-zA-Z]+")
-
-
-def split_quotation(prompt):
-    matches = WORD_INTERNAL_QUOTE_RE.findall(prompt)
-    mapping = []
-    for i, word_src in enumerate(set(matches)):
-        word_tgt = "longcat_$##$_longcat" * (i + 1)
-        prompt = prompt.replace(word_src, word_tgt)
-        mapping.append((word_src, word_tgt))
-
-    parts = re.split(f"({QUOTE_PATTERN})", prompt)
-    result = []
-    for part in parts:
-        for word_src, word_tgt in mapping:
-            part = part.replace(word_tgt, word_src)
-        if not part:
-            continue
-        is_quoted = bool(re.match(QUOTE_PATTERN, part))
-        result.append((part, is_quoted))
-    return result
-
-
-class LongCatImageBaseTokenizer(Qwen25_7BVLITokenizer):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.max_length = 512
-
-    def tokenize_with_weights(self, text, return_word_ids=False, **kwargs):
-        parts = split_quotation(text)
-        all_tokens = []
-        for part_text, is_quoted in parts:
-            if is_quoted:
-                for char in part_text:
-                    ids = self.tokenizer(char, add_special_tokens=False)["input_ids"]
-                    all_tokens.extend(ids)
-            else:
-                ids = self.tokenizer(part_text, add_special_tokens=False)["input_ids"]
-                all_tokens.extend(ids)
-
-        if len(all_tokens) > self.max_length:
-            all_tokens = all_tokens[: self.max_length]
-            logger.warning(f"Truncated prompt to {self.max_length} tokens")
-
-        output = [(t, 1.0) for t in all_tokens]
-        # Pad to max length
-        self.pad_tokens(output, self.max_length - len(output))
-        return [output]
-
-
-class LongCatImageTokenizer(sd1_clip.SD1Tokenizer):
-    def __init__(self, embedding_directory=None, tokenizer_data={}):
-        super().__init__(
-            embedding_directory=embedding_directory,
-            tokenizer_data=tokenizer_data,
-            name="qwen25_7b",
-            tokenizer=LongCatImageBaseTokenizer,
-        )
-        self.longcat_template_prefix = "<|im_start|>system\nAs an image captioning expert, generate a descriptive text prompt based on an image content, suitable for input to a text-to-image model.<|im_end|>\n<|im_start|>user\n"
-        self.longcat_template_suffix = "<|im_end|>\n<|im_start|>assistant\n"
-
-    def tokenize_with_weights(self, text, return_word_ids=False, **kwargs):
-        skip_template = False
-        if text.startswith("<|im_start|>"):
-            skip_template = True
-        if text.startswith("<|start_header_id|>"):
-            skip_template = True
-        if text == "":
-            text = " "
-
-        base_tok = getattr(self, "qwen25_7b")
-        if skip_template:
-            tokens = super().tokenize_with_weights(
-                text, return_word_ids=return_word_ids, disable_weights=True, **kwargs
-            )
-        else:
-            prefix_ids = base_tok.tokenizer(
-                self.longcat_template_prefix, add_special_tokens=False
-            )["input_ids"]
-            suffix_ids = base_tok.tokenizer(
-                self.longcat_template_suffix, add_special_tokens=False
-            )["input_ids"]
-
-            prompt_tokens = base_tok.tokenize_with_weights(
-                text, return_word_ids=return_word_ids, **kwargs
-            )
-            prompt_pairs = prompt_tokens[0]
-
-            prefix_pairs = [(t, 1.0) for t in prefix_ids]
-            suffix_pairs = [(t, 1.0) for t in suffix_ids]
-
-            combined = prefix_pairs + prompt_pairs + suffix_pairs
-            tokens = {"qwen25_7b": [combined]}
-
-        return tokens
-
-
-class LongCatImageTEModel(sd1_clip.SD1ClipModel):
-    def __init__(self, device="cpu", dtype=None, model_options={}):
-        super().__init__(
-            device=device,
-            dtype=dtype,
-            name="qwen25_7b",
-            clip_model=Qwen25_7BVLIModel,
-            model_options=model_options,
-        )
-
-    def encode_token_weights(self, token_weight_pairs, template_end=-1):
-        out, pooled, extra = super().encode_token_weights(token_weight_pairs)
-        tok_pairs = token_weight_pairs["qwen25_7b"][0]
-        count_im_start = 0
-        if template_end == -1:
-            for i, v in enumerate(tok_pairs):
-                elem = v[0]
-                if not torch.is_tensor(elem):
-                    if isinstance(elem, numbers.Integral):
-                        if elem == 151644 and count_im_start < 2:
-                            template_end = i
-                            count_im_start += 1
-
-        if out.shape[1] > (template_end + 3):
-            if tok_pairs[template_end + 1][0] == 872:
-                if tok_pairs[template_end + 2][0] == 198:
-                    template_end += 3
-
-        if template_end == -1:
-            template_end = 0
-
-        suffix_start = None
-        for i in range(len(tok_pairs) - 1, -1, -1):
-            elem = tok_pairs[i][0]
-            if not torch.is_tensor(elem) and isinstance(elem, numbers.Integral):
-                if elem == 151645:
-                    suffix_start = i
-                    break
-
-        out = out[:, template_end:]
-
-        if "attention_mask" in extra:
-            extra["attention_mask"] = extra["attention_mask"][:, template_end:]
-            if extra["attention_mask"].sum() == torch.numel(extra["attention_mask"]):
-                extra.pop("attention_mask")
-
-        if suffix_start is not None:
-            suffix_len = len(tok_pairs) - suffix_start
-            if suffix_len > 0 and out.shape[1] > suffix_len:
-                out = out[:, :-suffix_len]
-                if "attention_mask" in extra:
-                    extra["attention_mask"] = extra["attention_mask"][:, :-suffix_len]
-                    if extra["attention_mask"].sum() == torch.numel(
-                        extra["attention_mask"]
-                    ):
-                        extra.pop("attention_mask")
-
-        return out, pooled, extra
-
-
-def te(dtype_llama=None, llama_quantization_metadata=None):
-    class LongCatImageTEModel_(LongCatImageTEModel):
-        def __init__(self, device="cpu", dtype=None, model_options={}):
-            if llama_quantization_metadata is not None:
-                model_options = model_options.copy()
-                model_options["quantization_metadata"] = llama_quantization_metadata
-            if dtype_llama is not None:
-                dtype = dtype_llama
-            super().__init__(device=device, dtype=dtype, model_options=model_options)
-
-    return LongCatImageTEModel_
--- a/comfy/text_encoders/lt.py
+++ b/comfy/text_encoders/lt.py
@@ -3,10 +3,9 @@ import os
 from transformers import T5TokenizerFast
 from .spiece_tokenizer import SPieceTokenizer
 import comfy.text_encoders.genmo
+from comfy.ldm.lightricks.embeddings_connector import Embeddings1DConnector
 import torch
 import comfy.utils
-import math
-import itertools

 class T5XXLTokenizer(sd1_clip.SDTokenizer):
    def __init__(self, embedding_directory=None, tokenizer_data={}):
@@ -23,86 +22,46 @@ def ltxv_te(*args, **kwargs):
    return comfy.text_encoders.genmo.mochi_te(*args, **kwargs)


-class Gemma3_Tokenizer():
-    def state_dict(self):
-        return {"spiece_model": self.tokenizer.serialize_model()}
-
-    def tokenize_with_weights(self, text, return_word_ids=False, image=None, llama_template=None, skip_template=True, **kwargs):
-        self.llama_template = "<start_of_turn>system\nYou are a helpful assistant.<end_of_turn>\n<start_of_turn>user\n{}<end_of_turn>\n<start_of_turn>model\n"
-        self.llama_template_images = "<start_of_turn>system\nYou are a helpful assistant.<end_of_turn>\n<start_of_turn>user\n\n<image_soft_token>{}<end_of_turn>\n\n<start_of_turn>model\n"
-
-        if image is None:
-            images = []
-        else:
-            samples = image.movedim(-1, 1)
-            total = int(896 * 896)
-
-            scale_by = math.sqrt(total / (samples.shape[3] * samples.shape[2]))
-            width = round(samples.shape[3] * scale_by)
-            height = round(samples.shape[2] * scale_by)
-
-            s = comfy.utils.common_upscale(samples, width, height, "area", "disabled").movedim(1, -1)
-            images = [s[:, :, :, :3]]
-
-        if text.startswith('<start_of_turn>'):
-            skip_template = True
-
-        if skip_template:
-            llama_text = text
-        else:
-            if llama_template is None:
-                if len(images) > 0:
-                    llama_text = self.llama_template_images.format(text)
-                else:
-                    llama_text = self.llama_template.format(text)
-            else:
-                llama_text = llama_template.format(text)
-
-        text_tokens = super().tokenize_with_weights(llama_text, return_word_ids)
-
-        if len(images) > 0:
-            embed_count = 0
-            for r in text_tokens:
-                for i, token in enumerate(r):
-                    if token[0] == 262144 and embed_count < len(images):
-                        r[i] = ({"type": "image", "data": images[embed_count]},) + token[1:]
-                        embed_count += 1
-        return text_tokens
-
-class Gemma3_12BTokenizer(Gemma3_Tokenizer, sd1_clip.SDTokenizer):
+class Gemma3_12BTokenizer(sd1_clip.SDTokenizer):
    def __init__(self, embedding_directory=None, tokenizer_data={}):
        tokenizer = tokenizer_data.get("spiece_model", None)
-        special_tokens = {"<image_soft_token>": 262144, "<end_of_turn>": 106}
-        super().__init__(tokenizer, pad_with_end=False, embedding_size=3840, embedding_key='gemma3_12b', tokenizer_class=SPieceTokenizer, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=1024, pad_left=True, disable_weights=True, tokenizer_args={"add_bos": True, "add_eos": False, "special_tokens": special_tokens}, tokenizer_data=tokenizer_data)
+        super().__init__(tokenizer, pad_with_end=False, embedding_size=3840, embedding_key='gemma3_12b', tokenizer_class=SPieceTokenizer, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=512, pad_left=True, disable_weights=True, tokenizer_args={"add_bos": True, "add_eos": False}, tokenizer_data=tokenizer_data)

+    def state_dict(self):
+        return {"spiece_model": self.tokenizer.serialize_model()}

 class LTXAVGemmaTokenizer(sd1_clip.SD1Tokenizer):
    def __init__(self, embedding_directory=None, tokenizer_data={}):
        super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, name="gemma3_12b", tokenizer=Gemma3_12BTokenizer)

-
 class Gemma3_12BModel(sd1_clip.SDClipModel):
    def __init__(self, device="cpu", layer="all", layer_idx=None, dtype=None, attention_mask=True, model_options={}):
        llama_quantization_metadata = model_options.get("llama_quantization_metadata", None)
        if llama_quantization_metadata is not None:
            model_options = model_options.copy()
            model_options["quantization_metadata"] = llama_quantization_metadata
-        self.dtypes = set()
-        self.dtypes.add(dtype)
+
        super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"start": 2, "pad": 0}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Gemma3_12B, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options)

-    def generate(self, tokens, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed):
-        tokens_only = [[t[0] for t in b] for b in tokens]
-        embeds, _, _, embeds_info = self.process_tokens(tokens_only, self.execution_device)
-        comfy.utils.normalize_image_embeddings(embeds, embeds_info, self.transformer.model.config.hidden_size ** 0.5)
-        return self.transformer.generate(embeds, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed, stop_tokens=[106])  # 106 is <end_of_turn>
+    def tokenize_with_weights(self, text, return_word_ids=False, llama_template="{}", image_embeds=None, **kwargs):
+        text = llama_template.format(text)
+        text_tokens = super().tokenize_with_weights(text, return_word_ids)
+        embed_count = 0
+        for k in text_tokens:
+            tt = text_tokens[k]
+            for r in tt:
+                for i in range(len(r)):
+                    if r[i][0] == 262144:
+                        if image_embeds is not None and embed_count < image_embeds.shape[0]:
+                            r[i] = ({"type": "embedding", "data": image_embeds[embed_count], "original_type": "image"},) + r[i][1:]
+                            embed_count += 1
+        return text_tokens

 class LTXAVTEModel(torch.nn.Module):
    def __init__(self, dtype_llama=None, device="cpu", dtype=None, model_options={}):
        super().__init__()
        self.dtypes = set()
        self.dtypes.add(dtype)
-        self.compat_mode = False

        self.gemma3_12b = Gemma3_12BModel(device=device, dtype=dtype_llama, model_options=model_options, layer="all", layer_idx=None)
        self.dtypes.add(dtype_llama)
@@ -110,11 +69,6 @@ class LTXAVTEModel(torch.nn.Module):
        operations = self.gemma3_12b.operations # TODO
        self.text_embedding_projection = operations.Linear(3840 * 49, 3840, bias=False, dtype=dtype, device=device)

-    def enable_compat_mode(self):  # TODO: remove
-        from comfy.ldm.lightricks.embeddings_connector import Embeddings1DConnector
-        operations = self.gemma3_12b.operations
-        dtype = self.text_embedding_projection.weight.dtype
-        device = self.text_embedding_projection.weight.device
        self.audio_embeddings_connector = Embeddings1DConnector(
            split_rope=True,
            double_precision_rope=True,
@@ -130,7 +84,6 @@ class LTXAVTEModel(torch.nn.Module):
            device=device,
            operations=operations,
        )
-        self.compat_mode = True

    def set_clip_options(self, options):
        self.execution_device = options.get("execution_device", self.execution_device)
@@ -153,45 +106,30 @@ class LTXAVTEModel(torch.nn.Module):
        out = out.reshape((out.shape[0], out.shape[1], -1))
        out = self.text_embedding_projection(out)
        out = out.float()
-
-        if self.compat_mode:
-            out_vid = self.video_embeddings_connector(out)[0]
-            out_audio = self.audio_embeddings_connector(out)[0]
-            out = torch.concat((out_vid, out_audio), dim=-1)
+        out_vid = self.video_embeddings_connector(out)[0]
+        out_audio = self.audio_embeddings_connector(out)[0]
+        out = torch.concat((out_vid, out_audio), dim=-1)

        return out.to(out_device), pooled

-    def generate(self, tokens, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed):
-        return self.gemma3_12b.generate(tokens["gemma3_12b"], do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed)
-
    def load_sd(self, sd):
        if "model.layers.47.self_attn.q_norm.weight" in sd:
            return self.gemma3_12b.load_sd(sd)
        else:
-            sdo = comfy.utils.state_dict_prefix_replace(sd, {"text_embedding_projection.aggregate_embed.weight": "text_embedding_projection.weight"}, filter_keys=True)
+            sdo = comfy.utils.state_dict_prefix_replace(sd, {"text_embedding_projection.aggregate_embed.weight": "text_embedding_projection.weight", "model.diffusion_model.video_embeddings_connector.": "video_embeddings_connector.", "model.diffusion_model.audio_embeddings_connector.": "audio_embeddings_connector."}, filter_keys=True)
            if len(sdo) == 0:
                sdo = sd

            missing_all = []
            unexpected_all = []

-            for prefix, component in [("text_embedding_projection.", self.text_embedding_projection)]:
+            for prefix, component in [("text_embedding_projection.", self.text_embedding_projection), ("video_embeddings_connector.", self.video_embeddings_connector), ("audio_embeddings_connector.", self.audio_embeddings_connector)]:
                component_sd = {k.replace(prefix, ""): v for k, v in sdo.items() if k.startswith(prefix)}
                if component_sd:
                    missing, unexpected = component.load_state_dict(component_sd, strict=False, assign=getattr(self, "can_assign_sd", False))
                    missing_all.extend([f"{prefix}{k}" for k in missing])
                    unexpected_all.extend([f"{prefix}{k}" for k in unexpected])

-            if "model.diffusion_model.audio_embeddings_connector.transformer_1d_blocks.2.attn1.to_q.bias" not in sd:  # TODO: remove
-                ww = sd.get("model.diffusion_model.audio_embeddings_connector.transformer_1d_blocks.0.attn1.to_q.bias", None)
-                if ww is not None:
-                    if ww.shape[0] == 3840:
-                        self.enable_compat_mode()
-                        sdv = comfy.utils.state_dict_prefix_replace(sd, {"model.diffusion_model.video_embeddings_connector.": ""}, filter_keys=True)
-                        self.video_embeddings_connector.load_state_dict(sdv, strict=False, assign=getattr(self, "can_assign_sd", False))
-                        sda = comfy.utils.state_dict_prefix_replace(sd, {"model.diffusion_model.audio_embeddings_connector.": ""}, filter_keys=True)
-                        self.audio_embeddings_connector.load_state_dict(sda, strict=False, assign=getattr(self, "can_assign_sd", False))
-
            return (missing_all, unexpected_all)

    def memory_estimation_function(self, token_weight_pairs, device=None):
@@ -200,10 +138,8 @@ class LTXAVTEModel(torch.nn.Module):
            constant /= 2.0

        token_weight_pairs = token_weight_pairs.get("gemma3_12b", [])
-        m = min([sum(1 for _ in itertools.takewhile(lambda x: x[0] == 0, sub)) for sub in token_weight_pairs])
-
-        num_tokens = sum(map(lambda a: len(a), token_weight_pairs)) - m
-        num_tokens = max(num_tokens, 642)
+        num_tokens = sum(map(lambda a: len(a), token_weight_pairs))
+        num_tokens = max(num_tokens, 64)
        return num_tokens * constant * 1024 * 1024

 def ltxav_te(dtype_llama=None, llama_quantization_metadata=None):
@@ -216,14 +152,3 @@ def ltxav_te(dtype_llama=None, llama_quantization_metadata=None):
                dtype = dtype_llama
            super().__init__(dtype_llama=dtype_llama, device=device, dtype=dtype, model_options=model_options)
    return LTXAVTEModel_
-
-def gemma3_te(dtype_llama=None, llama_quantization_metadata=None):
-    class Gemma3_12BModel_(Gemma3_12BModel):
-        def __init__(self, device="cpu", dtype=None, model_options={}):
-            if llama_quantization_metadata is not None:
-                model_options = model_options.copy()
-                model_options["llama_quantization_metadata"] = llama_quantization_metadata
-            if dtype_llama is not None:
-                dtype = dtype_llama
-            super().__init__(device=device, dtype=dtype, model_options=model_options)
-    return Gemma3_12BModel_
--- a/comfy/text_encoders/lumina2.py
+++ b/comfy/text_encoders/lumina2.py
@@ -1,23 +1,23 @@
 from comfy import sd1_clip
 from .spiece_tokenizer import SPieceTokenizer
 import comfy.text_encoders.llama
-from comfy.text_encoders.lt import Gemma3_Tokenizer
-import comfy.utils
+

 class Gemma2BTokenizer(sd1_clip.SDTokenizer):
    def __init__(self, embedding_directory=None, tokenizer_data={}):
        tokenizer = tokenizer_data.get("spiece_model", None)
-        special_tokens = {"<end_of_turn>": 107}
-        super().__init__(tokenizer, pad_with_end=False, embedding_size=2304, embedding_key='gemma2_2b', tokenizer_class=SPieceTokenizer, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, tokenizer_args={"add_bos": True, "add_eos": False, "special_tokens": special_tokens}, tokenizer_data=tokenizer_data)
+        super().__init__(tokenizer, pad_with_end=False, embedding_size=2304, embedding_key='gemma2_2b', tokenizer_class=SPieceTokenizer, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, tokenizer_args={"add_bos": True, "add_eos": False}, tokenizer_data=tokenizer_data)

    def state_dict(self):
        return {"spiece_model": self.tokenizer.serialize_model()}

-class Gemma3_4BTokenizer(Gemma3_Tokenizer, sd1_clip.SDTokenizer):
+class Gemma3_4BTokenizer(sd1_clip.SDTokenizer):
    def __init__(self, embedding_directory=None, tokenizer_data={}):
        tokenizer = tokenizer_data.get("spiece_model", None)
-        special_tokens = {"<image_soft_token>": 262144, "<end_of_turn>": 106}
-        super().__init__(tokenizer, pad_with_end=False, embedding_size=2560, embedding_key='gemma3_4b', tokenizer_class=SPieceTokenizer, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, tokenizer_args={"add_bos": True, "add_eos": False, "special_tokens": special_tokens}, disable_weights=True, tokenizer_data=tokenizer_data)
+        super().__init__(tokenizer, pad_with_end=False, embedding_size=2560, embedding_key='gemma3_4b', tokenizer_class=SPieceTokenizer, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, tokenizer_args={"add_bos": True, "add_eos": False}, disable_weights=True, tokenizer_data=tokenizer_data)
+
+    def state_dict(self):
+        return {"spiece_model": self.tokenizer.serialize_model()}

 class LuminaTokenizer(sd1_clip.SD1Tokenizer):
    def __init__(self, embedding_directory=None, tokenizer_data={}):
@@ -40,20 +40,6 @@ class Gemma3_4BModel(sd1_clip.SDClipModel):

        super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"start": 2, "pad": 0}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Gemma3_4B, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options)

-class Gemma3_4B_Vision_Model(sd1_clip.SDClipModel):
-    def __init__(self, device="cpu", layer="hidden", layer_idx=-2, dtype=None, attention_mask=True, model_options={}):
-        llama_quantization_metadata = model_options.get("llama_quantization_metadata", None)
-        if llama_quantization_metadata is not None:
-            model_options = model_options.copy()
-            model_options["quantization_metadata"] = llama_quantization_metadata
-
-        super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"start": 2, "pad": 0}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Gemma3_4B_Vision, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options)
-
-    def process_tokens(self, tokens, device):
-        embeds, _, _, embeds_info = super().process_tokens(tokens, device)
-        comfy.utils.normalize_image_embeddings(embeds, embeds_info, self.transformer.model.config.hidden_size ** 0.5)
-        return embeds
-
 class LuminaModel(sd1_clip.SD1ClipModel):
    def __init__(self, device="cpu", dtype=None, model_options={}, name="gemma2_2b", clip_model=Gemma2_2BModel):
        super().__init__(device=device, dtype=dtype, name=name, clip_model=clip_model, model_options=model_options)
@@ -64,8 +50,6 @@ def te(dtype_llama=None, llama_quantization_metadata=None, model_type="gemma2_2b
        model = Gemma2_2BModel
    elif model_type == "gemma3_4b":
        model = Gemma3_4BModel
-    elif model_type == "gemma3_4b_vision":
-        model = Gemma3_4B_Vision_Model

    class LuminaTEModel_(LuminaModel):
        def __init__(self, device="cpu", dtype=None, model_options={}):
--- a/comfy/text_encoders/spiece_tokenizer.py
+++ b/comfy/text_encoders/spiece_tokenizer.py
@@ -6,10 +6,9 @@ class SPieceTokenizer:
    def from_pretrained(path, **kwargs):
        return SPieceTokenizer(path, **kwargs)

-    def __init__(self, tokenizer_path, add_bos=False, add_eos=True, special_tokens=None):
+    def __init__(self, tokenizer_path, add_bos=False, add_eos=True):
        self.add_bos = add_bos
        self.add_eos = add_eos
-        self.special_tokens = special_tokens
        import sentencepiece
        if torch.is_tensor(tokenizer_path):
            tokenizer_path = tokenizer_path.numpy().tobytes()
@@ -28,32 +27,8 @@ class SPieceTokenizer:
        return out

    def __call__(self, string):
-        if self.special_tokens is not None:
-            import re
-            special_tokens_pattern = '|'.join(re.escape(token) for token in self.special_tokens.keys())
-            if special_tokens_pattern and re.search(special_tokens_pattern, string):
-                parts = re.split(f'({special_tokens_pattern})', string)
-                result = []
-                for part in parts:
-                    if not part:
-                        continue
-                    if part in self.special_tokens:
-                        result.append(self.special_tokens[part])
-                    else:
-                        encoded = self.tokenizer.encode(part, add_bos=False, add_eos=False)
-                        result.extend(encoded)
-                return {"input_ids": result}
-
        out = self.tokenizer.encode(string)
        return {"input_ids": out}

-    def decode(self, token_ids, skip_special_tokens=False):
-
-        if skip_special_tokens and self.special_tokens:
-            special_token_ids = set(self.special_tokens.values())
-            token_ids = [tid for tid in token_ids if tid not in special_token_ids]
-
-        return self.tokenizer.decode(token_ids)
-
    def serialize_model(self):
        return torch.ByteTensor(list(self.tokenizer.serialized_model_proto()))
--- a/comfy/utils.py
+++ b/comfy/utils.py
@@ -29,7 +29,7 @@ import itertools
 from torch.nn.functional import interpolate
 from tqdm.auto import trange
 from einops import rearrange
-from comfy.cli_args import args
+from comfy.cli_args import args, enables_dynamic_vram
 import json
 import time
 import mmap
@@ -113,7 +113,7 @@ def load_torch_file(ckpt, safe_load=False, device=None, return_metadata=False):
    metadata = None
    if ckpt.lower().endswith(".safetensors") or ckpt.lower().endswith(".sft"):
        try:
-            if comfy.memory_management.aimdo_enabled:
+            if enables_dynamic_vram():
                sd, metadata = load_safetensors(ckpt)
                if not return_metadata:
                    metadata = None
@@ -1154,7 +1154,7 @@ def tiled_scale(samples, function, tile_x=64, tile_y=64, overlap = 8, upscale_am
    return tiled_scale_multidim(samples, function, (tile_y, tile_x), overlap=overlap, upscale_amount=upscale_amount, out_channels=out_channels, output_device=output_device, pbar=pbar)

 def model_trange(*args, **kwargs):
-    if not comfy.memory_management.aimdo_enabled:
+    if comfy.memory_management.aimdo_allocator is None:
        return trange(*args, **kwargs)

    pbar = trange(*args, **kwargs, smoothing=1.0)
@@ -1418,11 +1418,3 @@ def deepcopy_list_dict(obj, memo=None):

    memo[obj_id] = res
    return res
-
-def normalize_image_embeddings(embeds, embeds_info, scale_factor):
-    """Normalize image embeddings to match text embedding scale"""
-    for info in embeds_info:
-        if info.get("type") == "image":
-            start_idx = info["index"]
-            end_idx = start_idx + info["size"]
-            embeds[:, start_idx:end_idx, :] /= scale_factor
--- a/comfy/weight_adapter/base.py
+++ b/comfy/weight_adapter/base.py
@@ -49,12 +49,6 @@ class WeightAdapterBase:
        """
        raise NotImplementedError

-    def calculate_shape(
-        self,
-        key
-    ):
-        return None
-
    def calculate_weight(
        self,
        weight,
--- a/comfy/weight_adapter/lora.py
+++ b/comfy/weight_adapter/lora.py
@@ -214,13 +214,6 @@ class LoRAAdapter(WeightAdapterBase):
        else:
            return None

-    def calculate_shape(
-        self,
-        key
-    ):
-        reshape = self.weights[5]
-        return tuple(reshape) if reshape is not None else None
-
    def calculate_weight(
        self,
        weight,
--- a/comfy_api/feature_flags.py
+++ b/comfy_api/feature_flags.py
@@ -14,7 +14,6 @@ SERVER_FEATURE_FLAGS: dict[str, Any] = {
    "supports_preview_metadata": True,
    "max_upload_size": args.max_upload_size * 1024 * 1024, # Convert MB to bytes
    "extension": {"manager": {"supports_v4": True}},
-    "node_replacements": True,
 }


--- a/comfy_api/latest/init.py
+++ b/comfy_api/latest/init.py
@@ -21,17 +21,6 @@ class ComfyAPI_latest(ComfyAPIBase):
    VERSION = "latest"
    STABLE = False

-    def __init__(self):
-        super().__init__()
-        self.node_replacement = self.NodeReplacement()
-        self.execution = self.Execution()
-
-    class NodeReplacement(ProxiedSingleton):
-        async def register(self, node_replace: io.NodeReplace) -> None:
-            """Register a node replacement mapping."""
-            from server import PromptServer
-            PromptServer.instance.node_replace_manager.register(node_replace)
-
    class Execution(ProxiedSingleton):
        async def set_progress(
            self,
@@ -84,6 +73,8 @@ class ComfyAPI_latest(ComfyAPIBase):
                image=to_display,
            )

+    execution: Execution
+
 class ComfyExtension(ABC):
    async def on_load(self) -> None:
        """
--- a/comfy_api/latest/_input_impl/video_types.py
+++ b/comfy_api/latest/_input_impl/video_types.py
@@ -444,7 +444,7 @@ class VideoFromComponents(VideoInput):
            output.mux(packet)

            if audio_stream and self.__components.audio:
-                frame = av.AudioFrame.from_ndarray(waveform.float().cpu().contiguous().numpy(), format='fltp', layout=layout)
+                frame = av.AudioFrame.from_ndarray(waveform.float().cpu().numpy(), format='fltp', layout=layout)
                frame.sample_rate = audio_sample_rate
                frame.pts = 0
                output.mux(audio_stream.encode(frame))
--- a/comfy_api/latest/_io.py
+++ b/comfy_api/latest/_io.py
@@ -73,15 +73,8 @@ class RemoteOptions:
 class NumberDisplay(str, Enum):
    number = "number"
    slider = "slider"
-    gradient_slider = "gradientslider"


-class ControlAfterGenerate(str, Enum):
-    fixed = "fixed"
-    increment = "increment"
-    decrement = "decrement"
-    randomize = "randomize"
-
 class _ComfyType(ABC):
    Type = Any
    io_type: str = None
@@ -270,7 +263,7 @@ class Int(ComfyTypeIO):
    class Input(WidgetInput):
        '''Integer input.'''
        def __init__(self, id: str, display_name: str=None, optional=False, tooltip: str=None, lazy: bool=None,
-                    default: int=None, min: int=None, max: int=None, step: int=None, control_after_generate: bool | ControlAfterGenerate=None,
+                    default: int=None, min: int=None, max: int=None, step: int=None, control_after_generate: bool=None,
                    display_mode: NumberDisplay=None, socketless: bool=None, force_input: bool=None, extra_dict=None, raw_link: bool=None, advanced: bool=None):
            super().__init__(id, display_name, optional, tooltip, lazy, default, socketless, None, force_input, extra_dict, raw_link, advanced)
            self.min = min
@@ -297,15 +290,13 @@ class Float(ComfyTypeIO):
        '''Float input.'''
        def __init__(self, id: str, display_name: str=None, optional=False, tooltip: str=None, lazy: bool=None,
                    default: float=None, min: float=None, max: float=None, step: float=None, round: float=None,
-                    display_mode: NumberDisplay=None, gradient_stops: list[list[float]]=None,
-                    socketless: bool=None, force_input: bool=None, extra_dict=None, raw_link: bool=None, advanced: bool=None):
+                    display_mode: NumberDisplay=None, socketless: bool=None, force_input: bool=None, extra_dict=None, raw_link: bool=None, advanced: bool=None):
            super().__init__(id, display_name, optional, tooltip, lazy, default, socketless, None, force_input, extra_dict, raw_link, advanced)
            self.min = min
            self.max = max
            self.step = step
            self.round = round
            self.display_mode = display_mode
-            self.gradient_stops = gradient_stops
            self.default: float

        def as_dict(self):
@@ -315,7 +306,6 @@ class Float(ComfyTypeIO):
                "step": self.step,
                "round": self.round,
                "display": self.display_mode,
-                "gradient_stops": self.gradient_stops,
            })

@comfytype(io_type="STRING")
@@ -355,7 +345,7 @@ class Combo(ComfyTypeIO):
            tooltip: str=None,
            lazy: bool=None,
            default: str | int | Enum = None,
-            control_after_generate: bool | ControlAfterGenerate=None,
+            control_after_generate: bool=None,
            upload: UploadType=None,
            image_folder: FolderType=None,
            remote: RemoteOptions=None,
@@ -399,7 +389,7 @@ class MultiCombo(ComfyTypeI):
    Type = list[str]
    class Input(Combo.Input):
        def __init__(self, id: str, options: list[str], display_name: str=None, optional=False, tooltip: str=None, lazy: bool=None,
-                    default: list[str]=None, placeholder: str=None, chip: bool=None, control_after_generate: bool | ControlAfterGenerate=None,
+                    default: list[str]=None, placeholder: str=None, chip: bool=None, control_after_generate: bool=None,
                    socketless: bool=None, extra_dict=None, raw_link: bool=None, advanced: bool=None):
            super().__init__(id, options, display_name, optional, tooltip, lazy, default, control_after_generate, socketless=socketless, extra_dict=extra_dict, raw_link=raw_link, advanced=advanced)
            self.multiselect = True
@@ -1213,33 +1203,6 @@ class Color(ComfyTypeIO):
      def as_dict(self):
          return super().as_dict()

-@comfytype(io_type="BOUNDING_BOX")
-class BoundingBox(ComfyTypeIO):
-    class BoundingBoxDict(TypedDict):
-        x: int
-        y: int
-        width: int
-        height: int
-    Type = BoundingBoxDict
-
-    class Input(WidgetInput):
-        def __init__(self, id: str, display_name: str=None, optional=False, tooltip: str=None,
-                     socketless: bool=True, default: dict=None, component: str=None, force_input: bool=None):
-            super().__init__(id, display_name, optional, tooltip, None, default, socketless)
-            self.component = component
-            self.force_input = force_input
-            if default is None:
-                self.default = {"x": 0, "y": 0, "width": 512, "height": 512}
-
-        def as_dict(self):
-            d = super().as_dict()
-            if self.component:
-                d["component"] = self.component
-            if self.force_input is not None:
-                d["forceInput"] = self.force_input
-            return d
-
-
 DYNAMIC_INPUT_LOOKUP: dict[str, Callable[[dict[str, Any], dict[str, Any], tuple[str, dict[str, Any]], str, list[str] | None], None]] = {}
 def register_dynamic_input_func(io_type: str, func: Callable[[dict[str, Any], dict[str, Any], tuple[str, dict[str, Any]], str, list[str] | None], None]):
    DYNAMIC_INPUT_LOOKUP[io_type] = func
@@ -1346,7 +1309,6 @@ class NodeInfoV1:
    api_node: bool=None
    price_badge: dict | None = None
    search_aliases: list[str]=None
-    essentials_category: str=None


@dataclass
@@ -1468,8 +1430,6 @@ class Schema:
    """Flags a node as expandable, allowing NodeOutput to include 'expand' property."""
    accept_all_inputs: bool=False
    """When True, all inputs from the prompt will be passed to the node as kwargs, even if not defined in the schema."""
-    essentials_category: str | None = None
-    """Optional category for the Essentials tab. Path-based like category field (e.g., 'Basic', 'Image Tools/Editing')."""

    def validate(self):
        '''Validate the schema:
@@ -1576,7 +1536,6 @@ class Schema:
            python_module=getattr(cls, "RELATIVE_PYTHON_MODULE", "nodes"),
            price_badge=self.price_badge.as_dict(self.inputs) if self.price_badge is not None else None,
            search_aliases=self.search_aliases if self.search_aliases else None,
-            essentials_category=self.essentials_category,
        )
        return info

@@ -2071,74 +2030,11 @@ class _UIOutput(ABC):
        ...


-class InputMapOldId(TypedDict):
-    """Map an old node input to a new node input by ID."""
-    new_id: str
-    old_id: str
-
-class InputMapSetValue(TypedDict):
-    """Set a specific value for a new node input."""
-    new_id: str
-    set_value: Any
-
-InputMap = InputMapOldId | InputMapSetValue
-"""
-Input mapping for node replacement. Type is inferred by dictionary keys:
- {"new_id": str, "old_id": str} - maps old input to new input
- {"new_id": str, "set_value": Any} - sets a specific value for new input
-"""
-
-class OutputMap(TypedDict):
-    """Map outputs of node replacement via indexes."""
-    new_idx: int
-    old_idx: int
-
-class NodeReplace:
-    """
-    Defines a possible node replacement, mapping inputs and outputs of the old node to the new node.
-
-    Also supports assigning specific values to the input widgets of the new node.
-
-    Args:
-        new_node_id: The class name of the new replacement node.
-        old_node_id: The class name of the deprecated node.
-        old_widget_ids: Ordered list of input IDs for widgets that may not have an input slot
-            connected. The workflow JSON stores widget values by their relative position index,
-            not by ID. This list maps those positional indexes to input IDs, enabling the
-            replacement system to correctly identify widget values during node migration.
-        input_mapping: List of input mappings from old node to new node.
-        output_mapping: List of output mappings from old node to new node.
-    """
-    def __init__(self,
-        new_node_id: str,
-        old_node_id: str,
-        old_widget_ids: list[str] | None=None,
-        input_mapping: list[InputMap] | None=None,
-        output_mapping: list[OutputMap] | None=None,
-    ):
-        self.new_node_id = new_node_id
-        self.old_node_id = old_node_id
-        self.old_widget_ids = old_widget_ids
-        self.input_mapping = input_mapping
-        self.output_mapping = output_mapping
-
-    def as_dict(self):
-        """Create serializable representation of the node replacement."""
-        return {
-            "new_node_id": self.new_node_id,
-            "old_node_id": self.old_node_id,
-            "old_widget_ids": self.old_widget_ids,
-            "input_mapping": list(self.input_mapping) if self.input_mapping else None,
-            "output_mapping": list(self.output_mapping) if self.output_mapping else None,
-        }
-
-
 __all__ = [
    "FolderType",
    "UploadType",
    "RemoteOptions",
    "NumberDisplay",
-    "ControlAfterGenerate",

    "comfytype",
    "Custom",
@@ -2225,6 +2121,4 @@ __all__ = [
    "ImageCompare",
    "PriceBadgeDepends",
    "PriceBadge",
-    "BoundingBox",
-    "NodeReplace",
 ]
--- a/comfy_api_nodes/apis/bria.py
+++ b/comfy_api_nodes/apis/bria.py
@@ -45,55 +45,17 @@ class BriaEditImageRequest(BaseModel):
    )


-class BriaRemoveBackgroundRequest(BaseModel):
-    image: str = Field(...)
-    sync: bool = Field(False)
-    visual_input_content_moderation: bool = Field(
-        False, description="If true, returns 422 on input image moderation failure."
-    )
-    visual_output_content_moderation: bool = Field(
-        False, description="If true, returns 422 on visual output moderation failure."
-    )
-    seed: int = Field(...)
-
-
 class BriaStatusResponse(BaseModel):
    request_id: str = Field(...)
    status_url: str = Field(...)
    warning: str | None = Field(None)


-class BriaRemoveBackgroundResult(BaseModel):
-    image_url: str = Field(...)
-
-
-class BriaRemoveBackgroundResponse(BaseModel):
-    status: str = Field(...)
-    result: BriaRemoveBackgroundResult | None = Field(None)
-
-
-class BriaImageEditResult(BaseModel):
+class BriaResult(BaseModel):
    structured_prompt: str = Field(...)
    image_url: str = Field(...)


-class BriaImageEditResponse(BaseModel):
+class BriaResponse(BaseModel):
    status: str = Field(...)
-    result: BriaImageEditResult | None = Field(None)
-
-
-class BriaRemoveVideoBackgroundRequest(BaseModel):
-    video: str = Field(...)
-    background_color: str = Field(default="transparent", description="Background color for the output video.")
-    output_container_and_codec: str = Field(...)
-    preserve_audio: bool = Field(True)
-    seed: int = Field(...)
-
-
-class BriaRemoveVideoBackgroundResult(BaseModel):
-    video_url: str = Field(...)
-
-
-class BriaRemoveVideoBackgroundResponse(BaseModel):
-    status: str = Field(...)
-    result: BriaRemoveVideoBackgroundResult | None = Field(None)
+    result: BriaResult | None = Field(None)
--- a/comfy_api_nodes/apis/bytedance.py
+++ b/comfy_api_nodes/apis/bytedance.py
@@ -27,7 +27,6 @@ class Seedream4TaskCreationRequest(BaseModel):
    sequential_image_generation: str = Field("disabled")
    sequential_image_generation_options: Seedream4Options = Field(Seedream4Options(max_images=15))
    watermark: bool = Field(False)
-    output_format: str | None = None


 class ImageTaskCreationResponse(BaseModel):
@@ -107,7 +106,6 @@ RECOMMENDED_PRESETS_SEEDREAM_4 = [
    ("2496x1664 (3:2)", 2496, 1664),
    ("1664x2496 (2:3)", 1664, 2496),
    ("3024x1296 (21:9)", 3024, 1296),
-    ("3072x3072 (1:1)", 3072, 3072),
    ("4096x4096 (1:1)", 4096, 4096),
    ("Custom", None, None),
 ]
--- a/comfy_api_nodes/apis/elevenlabs.py
+++ b/comfy_api_nodes/apis/elevenlabs.py
@@ -1,88 +0,0 @@
-from pydantic import BaseModel, Field
-
-
-class SpeechToTextRequest(BaseModel):
-    model_id: str = Field(...)
-    cloud_storage_url: str = Field(...)
-    language_code: str | None = Field(None, description="ISO-639-1 or ISO-639-3 language code")
-    tag_audio_events: bool | None = Field(None, description="Annotate sounds like (laughter) in transcript")
-    num_speakers: int | None = Field(None, description="Max speakers predicted")
-    timestamps_granularity: str = Field(default="word", description="Timing precision: none, word, or character")
-    diarize: bool | None = Field(None, description="Annotate which speaker is talking")
-    diarization_threshold: float | None = Field(None, description="Speaker separation sensitivity")
-    temperature: float | None = Field(None, description="Randomness control")
-    seed: int = Field(..., description="Seed for deterministic sampling")
-
-
-class SpeechToTextWord(BaseModel):
-    text: str = Field(..., description="The word text")
-    type: str = Field(default="word", description="Type of text element (word, spacing, etc.)")
-    start: float | None = Field(None, description="Start time in seconds (when timestamps enabled)")
-    end: float | None = Field(None, description="End time in seconds (when timestamps enabled)")
-    speaker_id: str | None = Field(None, description="Speaker identifier when diarization is enabled")
-    logprob: float | None = Field(None, description="Log probability of the word")
-
-
-class SpeechToTextResponse(BaseModel):
-    language_code: str = Field(..., description="Detected or specified language code")
-    language_probability: float | None = Field(None, description="Confidence of language detection")
-    text: str = Field(..., description="Full transcript text")
-    words: list[SpeechToTextWord] | None = Field(None, description="Word-level timing information")
-
-
-class TextToSpeechVoiceSettings(BaseModel):
-    stability: float | None = Field(None, description="Voice stability")
-    similarity_boost: float | None = Field(None, description="Similarity boost")
-    style: float | None = Field(None, description="Style exaggeration")
-    use_speaker_boost: bool | None = Field(None, description="Boost similarity to original speaker")
-    speed: float | None = Field(None, description="Speech speed")
-
-
-class TextToSpeechRequest(BaseModel):
-    text: str = Field(..., description="Text to convert to speech")
-    model_id: str = Field(..., description="Model ID for TTS")
-    language_code: str | None = Field(None, description="ISO-639-1 or ISO-639-3 language code")
-    voice_settings: TextToSpeechVoiceSettings | None = Field(None, description="Voice settings")
-    seed: int = Field(..., description="Seed for deterministic sampling")
-    apply_text_normalization: str | None = Field(None, description="Text normalization mode: auto, on, off")
-
-
-class TextToSoundEffectsRequest(BaseModel):
-    text: str = Field(..., description="Text prompt to convert into a sound effect")
-    duration_seconds: float = Field(..., description="Duration of generated sound in seconds")
-    prompt_influence: float = Field(..., description="How closely generation follows the prompt")
-    loop: bool | None = Field(None, description="Whether to create a smoothly looping sound effect")
-
-
-class AddVoiceRequest(BaseModel):
-    name: str = Field(..., description="Name that identifies the voice")
-    remove_background_noise: bool = Field(..., description="Remove background noise from voice samples")
-
-
-class AddVoiceResponse(BaseModel):
-    voice_id: str = Field(..., description="The newly created voice's unique identifier")
-
-
-class SpeechToSpeechRequest(BaseModel):
-    model_id: str = Field(..., description="Model ID for speech-to-speech")
-    voice_settings: str = Field(..., description="JSON string of voice settings")
-    seed: int = Field(..., description="Seed for deterministic sampling")
-    remove_background_noise: bool = Field(..., description="Remove background noise from input audio")
-
-
-class DialogueInput(BaseModel):
-    text: str = Field(..., description="Text content to convert to speech")
-    voice_id: str = Field(..., description="Voice identifier for this dialogue segment")
-
-
-class DialogueSettings(BaseModel):
-    stability: float | None = Field(None, description="Voice stability (0-1)")
-
-
-class TextToDialogueRequest(BaseModel):
-    inputs: list[DialogueInput] = Field(..., description="List of dialogue segments")
-    model_id: str = Field(..., description="Model ID for dialogue generation")
-    language_code: str | None = Field(None, description="ISO-639-1 language code")
-    settings: DialogueSettings | None = Field(None, description="Voice settings")
-    seed: int | None = Field(None, description="Seed for deterministic sampling")
-    apply_text_normalization: str | None = Field(None, description="Text normalization mode: auto, on, off")
--- a/comfy_api_nodes/apis/gemini.py
+++ b/comfy_api_nodes/apis/gemini.py
@@ -116,26 +116,14 @@ class GeminiGenerationConfig(BaseModel):
    topP: float | None = Field(None, ge=0.0, le=1.0)


-class GeminiImageOutputOptions(BaseModel):
-    mimeType: str = Field("image/png")
-    compressionQuality: int | None = Field(None)
-
-
 class GeminiImageConfig(BaseModel):
    aspectRatio: str | None = Field(None)
    imageSize: str | None = Field(None)
-    imageOutputOptions: GeminiImageOutputOptions = Field(default_factory=GeminiImageOutputOptions)
-
-
-class GeminiThinkingConfig(BaseModel):
-    includeThoughts: bool | None = Field(None)
-    thinkingLevel: str = Field(...)


 class GeminiImageGenerationConfig(GeminiGenerationConfig):
    responseModalities: list[str] | None = Field(None)
    imageConfig: GeminiImageConfig | None = Field(None)
-    thinkingConfig: GeminiThinkingConfig | None = Field(None)


 class GeminiImageGenerateContentRequest(BaseModel):
--- a/comfy_api_nodes/apis/hunyuan3d.py
+++ b/comfy_api_nodes/apis/hunyuan3d.py
@@ -64,23 +64,3 @@ class To3DProTaskResultResponse(BaseModel):

 class To3DProTaskQueryRequest(BaseModel):
    JobId: str = Field(...)
-
-
-class To3DUVFileInput(BaseModel):
-    Type: str = Field(..., description="File type: GLB, OBJ, or FBX")
-    Url: str = Field(...)
-
-
-class To3DUVTaskRequest(BaseModel):
-    File: To3DUVFileInput = Field(...)
-
-
-class TextureEditImageInfo(BaseModel):
-    Url: str = Field(...)
-
-
-class TextureEditTaskRequest(BaseModel):
-    File3D: To3DUVFileInput = Field(...)
-    Image: TextureEditImageInfo | None = Field(None)
-    Prompt: str | None = Field(None)
-    EnablePBR: bool | None = Field(None)
--- a/comfy_api_nodes/apis/kling.py
+++ b/comfy_api_nodes/apis/kling.py
@@ -134,13 +134,6 @@ class ImageToVideoWithAudioRequest(BaseModel):
    shot_type: str | None = Field(None)


-class KlingAvatarRequest(BaseModel):
-    image: str = Field(...)
-    sound_file: str = Field(...)
-    prompt: str | None = Field(None)
-    mode: str = Field(...)
-
-
 class MotionControlRequest(BaseModel):
    prompt: str = Field(...)
    image_url: str = Field(...)
--- a/comfy_api_nodes/apis/recraft.py
+++ b/comfy_api_nodes/apis/recraft.py
@@ -198,6 +198,11 @@ dict_recraft_substyles_v3 = {
 }


+class RecraftModel(str, Enum):
+    recraftv3 = 'recraftv3'
+    recraftv2 = 'recraftv2'
+
+
 class RecraftImageSize(str, Enum):
    res_1024x1024 = '1024x1024'
    res_1365x1024 = '1365x1024'
@@ -216,41 +221,6 @@ class RecraftImageSize(str, Enum):
    res_1707x1024 = '1707x1024'


-RECRAFT_V4_SIZES = [
-    "1024x1024",
-    "1536x768",
-    "768x1536",
-    "1280x832",
-    "832x1280",
-    "1216x896",
-    "896x1216",
-    "1152x896",
-    "896x1152",
-    "832x1344",
-    "1280x896",
-    "896x1280",
-    "1344x768",
-    "768x1344",
-]
-
-RECRAFT_V4_PRO_SIZES = [
-    "2048x2048",
-    "3072x1536",
-    "1536x3072",
-    "2560x1664",
-    "1664x2560",
-    "2432x1792",
-    "1792x2432",
-    "2304x1792",
-    "1792x2304",
-    "1664x2688",
-    "1434x1024",
-    "1024x1434",
-    "2560x1792",
-    "1792x2560",
-]
-
-
 class RecraftColorObject(BaseModel):
    rgb: list[int] = Field(..., description='An array of 3 integer values in range of 0...255 defining RGB Color Model')

@@ -264,16 +234,17 @@ class RecraftControlsObject(BaseModel):

 class RecraftImageGenerationRequest(BaseModel):
    prompt: str = Field(..., description='The text prompt describing the image to generate')
-    size: str | None = Field(None, description='The size of the generated image (e.g., "1024x1024")')
+    size: RecraftImageSize | None = Field(None, description='The size of the generated image (e.g., "1024x1024")')
    n: int = Field(..., description='The number of images to generate')
    negative_prompt: str | None = Field(None, description='A text description of undesired elements on an image')
-    model: str = Field(...)
+    model: RecraftModel | None = Field(RecraftModel.recraftv3, description='The model to use for generation (e.g., "recraftv3")')
    style: str | None = Field(None, description='The style to apply to the generated image (e.g., "digital_illustration")')
    substyle: str | None = Field(None, description='The substyle to apply to the generated image, depending on the style input')
    controls: RecraftControlsObject | None = Field(None, description='A set of custom parameters to tweak generation process')
    style_id: str | None = Field(None, description='Use a previously uploaded style as a reference; UUID')
    strength: float | None = Field(None, description='Defines the difference with the original image, should lie in [0, 1], where 0 means almost identical, and 1 means miserable similarity')
    random_seed: int | None = Field(None, description="Seed for video generation")
+    # text_layout


 class RecraftReturnedObject(BaseModel):
--- a/comfy_api_nodes/nodes_bfl.py
+++ b/comfy_api_nodes/nodes_bfl.py
@@ -57,7 +57,6 @@ class FluxProUltraImageNode(IO.ComfyNode):
                    tooltip="Whether to perform upsampling on the prompt. "
                    "If active, automatically modifies the prompt for more creative generation, "
                    "but results are nondeterministic (same seed will not produce exactly the same result).",
-                    advanced=True,
                ),
                IO.Int.Input(
                    "seed",
@@ -201,7 +200,6 @@ class FluxKontextProImageNode(IO.ComfyNode):
                    "prompt_upsampling",
                    default=False,
                    tooltip="Whether to perform upsampling on the prompt. If active, automatically modifies the prompt for more creative generation, but results are nondeterministic (same seed will not produce exactly the same result).",
-                    advanced=True,
                ),
                IO.Image.Input(
                    "input_image",
@@ -298,7 +296,6 @@ class FluxProExpandNode(IO.ComfyNode):
                    tooltip="Whether to perform upsampling on the prompt. "
                    "If active, automatically modifies the prompt for more creative generation, "
                    "but results are nondeterministic (same seed will not produce exactly the same result).",
-                    advanced=True,
                ),
                IO.Int.Input(
                    "top",
@@ -436,7 +433,6 @@ class FluxProFillNode(IO.ComfyNode):
                    tooltip="Whether to perform upsampling on the prompt. "
                    "If active, automatically modifies the prompt for more creative generation, "
                    "but results are nondeterministic (same seed will not produce exactly the same result).",
-                    advanced=True,
                ),
                IO.Float.Input(
                    "guidance",
@@ -581,7 +577,6 @@ class Flux2ProImageNode(IO.ComfyNode):
                    default=True,
                    tooltip="Whether to perform upsampling on the prompt. "
                    "If active, automatically modifies the prompt for more creative generation.",
-                    advanced=True,
                ),
                IO.Image.Input("images", optional=True, tooltip="Up to 9 images to be used as references."),
            ],
--- a/comfy_api_nodes/nodes_bria.py
+++ b/comfy_api_nodes/nodes_bria.py
@@ -3,11 +3,7 @@ from typing_extensions import override
 from comfy_api.latest import IO, ComfyExtension, Input
 from comfy_api_nodes.apis.bria import (
    BriaEditImageRequest,
-    BriaRemoveBackgroundRequest,
-    BriaRemoveBackgroundResponse,
-    BriaRemoveVideoBackgroundRequest,
-    BriaRemoveVideoBackgroundResponse,
-    BriaImageEditResponse,
+    BriaResponse,
    BriaStatusResponse,
    InputModerationSettings,
 )
@@ -15,12 +11,10 @@ from comfy_api_nodes.util import (
    ApiEndpoint,
    convert_mask_to_image,
    download_url_to_image_tensor,
-    download_url_to_video_output,
+    get_number_of_images,
    poll_op,
    sync_op,
-    upload_image_to_comfyapi,
-    upload_video_to_comfyapi,
-    validate_video_duration,
+    upload_images_to_comfyapi,
 )


@@ -79,15 +73,21 @@ class BriaImageEditNode(IO.ComfyNode):
                IO.DynamicCombo.Input(
                    "moderation",
                    options=[
-                        IO.DynamicCombo.Option("false", []),
                        IO.DynamicCombo.Option(
                            "true",
                            [
-                                IO.Boolean.Input("prompt_content_moderation", default=False),
-                                IO.Boolean.Input("visual_input_moderation", default=False),
-                                IO.Boolean.Input("visual_output_moderation", default=True),
+                                IO.Boolean.Input(
+                                    "prompt_content_moderation", default=False
+                                ),
+                                IO.Boolean.Input(
+                                    "visual_input_moderation", default=False
+                                ),
+                                IO.Boolean.Input(
+                                    "visual_output_moderation", default=True
+                                ),
                            ],
                        ),
+                        IO.DynamicCombo.Option("false", []),
                    ],
                    tooltip="Moderation settings",
                ),
@@ -127,26 +127,50 @@ class BriaImageEditNode(IO.ComfyNode):
        mask: Input.Image | None = None,
    ) -> IO.NodeOutput:
        if not prompt and not structured_prompt:
-            raise ValueError("One of prompt or structured_prompt is required to be non-empty.")
+            raise ValueError(
+                "One of prompt or structured_prompt is required to be non-empty."
+            )
+        if get_number_of_images(image) != 1:
+            raise ValueError("Exactly one input image is required.")
        mask_url = None
        if mask is not None:
-            mask_url = await upload_image_to_comfyapi(cls, convert_mask_to_image(mask), wait_label="Uploading mask")
+            mask_url = (
+                await upload_images_to_comfyapi(
+                    cls,
+                    convert_mask_to_image(mask),
+                    max_images=1,
+                    mime_type="image/png",
+                    wait_label="Uploading mask",
+                )
+            )[0]
        response = await sync_op(
            cls,
            ApiEndpoint(path="proxy/bria/v2/image/edit", method="POST"),
            data=BriaEditImageRequest(
                instruction=prompt if prompt else None,
                structured_instruction=structured_prompt if structured_prompt else None,
-                images=[await upload_image_to_comfyapi(cls, image, wait_label="Uploading image")],
+                images=await upload_images_to_comfyapi(
+                    cls,
+                    image,
+                    max_images=1,
+                    mime_type="image/png",
+                    wait_label="Uploading image",
+                ),
                mask=mask_url,
                negative_prompt=negative_prompt if negative_prompt else None,
                guidance_scale=guidance_scale,
                seed=seed,
                model_version=model,
                steps_num=steps,
-                prompt_content_moderation=moderation.get("prompt_content_moderation", False),
-                visual_input_content_moderation=moderation.get("visual_input_moderation", False),
-                visual_output_content_moderation=moderation.get("visual_output_moderation", False),
+                prompt_content_moderation=moderation.get(
+                    "prompt_content_moderation", False
+                ),
+                visual_input_content_moderation=moderation.get(
+                    "visual_input_moderation", False
+                ),
+                visual_output_content_moderation=moderation.get(
+                    "visual_output_moderation", False
+                ),
            ),
            response_model=BriaStatusResponse,
        )
@@ -154,7 +178,7 @@ class BriaImageEditNode(IO.ComfyNode):
            cls,
            ApiEndpoint(path=f"/proxy/bria/v2/status/{response.request_id}"),
            status_extractor=lambda r: r.status,
-            response_model=BriaImageEditResponse,
+            response_model=BriaResponse,
        )
        return IO.NodeOutput(
            await download_url_to_image_tensor(response.result.image_url),
@@ -162,167 +186,11 @@ class BriaImageEditNode(IO.ComfyNode):
        )


-class BriaRemoveImageBackground(IO.ComfyNode):
-
-    @classmethod
-    def define_schema(cls):
-        return IO.Schema(
-            node_id="BriaRemoveImageBackground",
-            display_name="Bria Remove Image Background",
-            category="api node/image/Bria",
-            description="Remove the background from an image using Bria RMBG 2.0.",
-            inputs=[
-                IO.Image.Input("image"),
-                IO.DynamicCombo.Input(
-                    "moderation",
-                    options=[
-                        IO.DynamicCombo.Option("false", []),
-                        IO.DynamicCombo.Option(
-                            "true",
-                            [
-                                IO.Boolean.Input("visual_input_moderation", default=False),
-                                IO.Boolean.Input("visual_output_moderation", default=True),
-                            ],
-                        ),
-                    ],
-                    tooltip="Moderation settings",
-                ),
-                IO.Int.Input(
-                    "seed",
-                    default=0,
-                    min=0,
-                    max=2147483647,
-                    display_mode=IO.NumberDisplay.number,
-                    control_after_generate=True,
-                    tooltip="Seed controls whether the node should re-run; "
-                    "results are non-deterministic regardless of seed.",
-                ),
-            ],
-            outputs=[IO.Image.Output()],
-            hidden=[
-                IO.Hidden.auth_token_comfy_org,
-                IO.Hidden.api_key_comfy_org,
-                IO.Hidden.unique_id,
-            ],
-            is_api_node=True,
-            price_badge=IO.PriceBadge(
-                expr="""{"type":"usd","usd":0.018}""",
-            ),
-        )
-
-    @classmethod
-    async def execute(
-        cls,
-        image: Input.Image,
-        moderation: dict,
-        seed: int,
-    ) -> IO.NodeOutput:
-        response = await sync_op(
-            cls,
-            ApiEndpoint(path="/proxy/bria/v2/image/edit/remove_background", method="POST"),
-            data=BriaRemoveBackgroundRequest(
-                image=await upload_image_to_comfyapi(cls, image, wait_label="Uploading image"),
-                sync=False,
-                visual_input_content_moderation=moderation.get("visual_input_moderation", False),
-                visual_output_content_moderation=moderation.get("visual_output_moderation", False),
-                seed=seed,
-            ),
-            response_model=BriaStatusResponse,
-        )
-        response = await poll_op(
-            cls,
-            ApiEndpoint(path=f"/proxy/bria/v2/status/{response.request_id}"),
-            status_extractor=lambda r: r.status,
-            response_model=BriaRemoveBackgroundResponse,
-        )
-        return IO.NodeOutput(await download_url_to_image_tensor(response.result.image_url))
-
-
-class BriaRemoveVideoBackground(IO.ComfyNode):
-
-    @classmethod
-    def define_schema(cls):
-        return IO.Schema(
-            node_id="BriaRemoveVideoBackground",
-            display_name="Bria Remove Video Background",
-            category="api node/video/Bria",
-            description="Remove the background from a video using Bria. ",
-            inputs=[
-                IO.Video.Input("video"),
-                IO.Combo.Input(
-                    "background_color",
-                    options=[
-                        "Black",
-                        "White",
-                        "Gray",
-                        "Red",
-                        "Green",
-                        "Blue",
-                        "Yellow",
-                        "Cyan",
-                        "Magenta",
-                        "Orange",
-                    ],
-                    tooltip="Background color for the output video.",
-                ),
-                IO.Int.Input(
-                    "seed",
-                    default=0,
-                    min=0,
-                    max=2147483647,
-                    display_mode=IO.NumberDisplay.number,
-                    control_after_generate=True,
-                    tooltip="Seed controls whether the node should re-run; "
-                    "results are non-deterministic regardless of seed.",
-                ),
-            ],
-            outputs=[IO.Video.Output()],
-            hidden=[
-                IO.Hidden.auth_token_comfy_org,
-                IO.Hidden.api_key_comfy_org,
-                IO.Hidden.unique_id,
-            ],
-            is_api_node=True,
-            price_badge=IO.PriceBadge(
-                expr="""{"type":"usd","usd":0.14,"format":{"suffix":"/second"}}""",
-            ),
-        )
-
-    @classmethod
-    async def execute(
-        cls,
-        video: Input.Video,
-        background_color: str,
-        seed: int,
-    ) -> IO.NodeOutput:
-        validate_video_duration(video, max_duration=60.0)
-        response = await sync_op(
-            cls,
-            ApiEndpoint(path="/proxy/bria/v2/video/edit/remove_background", method="POST"),
-            data=BriaRemoveVideoBackgroundRequest(
-                video=await upload_video_to_comfyapi(cls, video),
-                background_color=background_color,
-                output_container_and_codec="mp4_h264",
-                seed=seed,
-            ),
-            response_model=BriaStatusResponse,
-        )
-        response = await poll_op(
-            cls,
-            ApiEndpoint(path=f"/proxy/bria/v2/status/{response.request_id}"),
-            status_extractor=lambda r: r.status,
-            response_model=BriaRemoveVideoBackgroundResponse,
-        )
-        return IO.NodeOutput(await download_url_to_video_output(response.result.video_url))
-
-
 class BriaExtension(ComfyExtension):
    @override
    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
        return [
            BriaImageEditNode,
-            BriaRemoveImageBackground,
-            BriaRemoveVideoBackground,
        ]


--- a/comfy_api_nodes/nodes_bytedance.py
+++ b/comfy_api_nodes/nodes_bytedance.py
@@ -37,12 +37,6 @@ from comfy_api_nodes.util import (

 BYTEPLUS_IMAGE_ENDPOINT = "/proxy/byteplus/api/v3/images/generations"

-SEEDREAM_MODELS = {
-    "seedream 5.0 lite": "seedream-5-0-260128",
-    "seedream-4-5-251128": "seedream-4-5-251128",
-    "seedream-4-0-250828": "seedream-4-0-250828",
-}
-
 # Long-running tasks endpoints(e.g., video)
 BYTEPLUS_TASK_ENDPOINT = "/proxy/byteplus/api/v3/contents/generations/tasks"
 BYTEPLUS_TASK_STATUS_ENDPOINT = "/proxy/byteplus/api/v3/contents/generations/tasks"  # + /{task_id}
@@ -120,7 +114,6 @@ class ByteDanceImageNode(IO.ComfyNode):
                    default=False,
                    tooltip='Whether to add an "AI generated" watermark to the image',
                    optional=True,
-                    advanced=True,
                ),
            ],
            outputs=[
@@ -186,13 +179,14 @@ class ByteDanceSeedreamNode(IO.ComfyNode):
    def define_schema(cls):
        return IO.Schema(
            node_id="ByteDanceSeedreamNode",
-            display_name="ByteDance Seedream 4.5 & 5.0",
+            display_name="ByteDance Seedream 4.5",
            category="api node/image/ByteDance",
            description="Unified text-to-image generation and precise single-sentence editing at up to 4K resolution.",
            inputs=[
                IO.Combo.Input(
                    "model",
-                    options=list(SEEDREAM_MODELS.keys()),
+                    options=["seedream-4-5-251128", "seedream-4-0-250828"],
+                    tooltip="Model name",
                ),
                IO.String.Input(
                    "prompt",
@@ -203,7 +197,7 @@ class ByteDanceSeedreamNode(IO.ComfyNode):
                IO.Image.Input(
                    "image",
                    tooltip="Input image(s) for image-to-image generation. "
-                    "Reference image(s) for single or multi-reference generation.",
+                    "List of 1-10 images for single or multi-reference generation.",
                    optional=True,
                ),
                IO.Combo.Input(
@@ -215,8 +209,8 @@ class ByteDanceSeedreamNode(IO.ComfyNode):
                    "width",
                    default=2048,
                    min=1024,
-                    max=6240,
-                    step=2,
+                    max=4096,
+                    step=8,
                    tooltip="Custom width for image. Value is working only if `size_preset` is set to `Custom`",
                    optional=True,
                ),
@@ -224,8 +218,8 @@ class ByteDanceSeedreamNode(IO.ComfyNode):
                    "height",
                    default=2048,
                    min=1024,
-                    max=4992,
-                    step=2,
+                    max=4096,
+                    step=8,
                    tooltip="Custom height for image. Value is working only if `size_preset` is set to `Custom`",
                    optional=True,
                ),
@@ -265,14 +259,12 @@ class ByteDanceSeedreamNode(IO.ComfyNode):
                    default=False,
                    tooltip='Whether to add an "AI generated" watermark to the image.',
                    optional=True,
-                    advanced=True,
                ),
                IO.Boolean.Input(
                    "fail_on_partial",
                    default=True,
                    tooltip="If enabled, abort execution if any requested images are missing or return an error.",
                    optional=True,
-                    advanced=True,
                ),
            ],
            outputs=[
@@ -288,8 +280,7 @@ class ByteDanceSeedreamNode(IO.ComfyNode):
                depends_on=IO.PriceBadgeDepends(widgets=["model"]),
                expr="""
                (
-                  $price := $contains(widgets.model, "5.0 lite") ? 0.035 :
-                            $contains(widgets.model, "4-5") ? 0.04 : 0.03;
+                  $price := $contains(widgets.model, "seedream-4-5-251128") ? 0.04 : 0.03;
                  {
                    "type":"usd",
                    "usd": $price,
@@ -315,7 +306,6 @@ class ByteDanceSeedreamNode(IO.ComfyNode):
        watermark: bool = False,
        fail_on_partial: bool = True,
    ) -> IO.NodeOutput:
-        model = SEEDREAM_MODELS[model]
        validate_string(prompt, strip_whitespace=True, min_length=1)
        w = h = None
        for label, tw, th in RECOMMENDED_PRESETS_SEEDREAM_4:
@@ -325,12 +315,15 @@ class ByteDanceSeedreamNode(IO.ComfyNode):

        if w is None or h is None:
            w, h = width, height
-
+            if not (1024 <= w <= 4096) or not (1024 <= h <= 4096):
+                raise ValueError(
+                    f"Custom size out of range: {w}x{h}. " "Both width and height must be between 1024 and 4096 pixels."
+                )
        out_num_pixels = w * h
        mp_provided = out_num_pixels / 1_000_000.0
-        if ("seedream-4-5" in model or "seedream-5-0" in model) and out_num_pixels < 3686400:
+        if "seedream-4-5" in model and out_num_pixels < 3686400:
            raise ValueError(
-                f"Minimum image resolution for the selected model is 3.68MP, "
+                f"Minimum image resolution that Seedream 4.5 can generate is 3.68MP, "
                f"but {mp_provided:.2f}MP provided."
            )
        if "seedream-4-0" in model and out_num_pixels < 921600:
@@ -338,18 +331,9 @@ class ByteDanceSeedreamNode(IO.ComfyNode):
                f"Minimum image resolution that the selected model can generate is 0.92MP, "
                f"but {mp_provided:.2f}MP provided."
            )
-        max_pixels = 10_404_496 if "seedream-5-0" in model else 16_777_216
-        if out_num_pixels > max_pixels:
-            raise ValueError(
-                f"Maximum image resolution for the selected model is {max_pixels / 1_000_000:.2f}MP, "
-                f"but {mp_provided:.2f}MP provided."
-            )
        n_input_images = get_number_of_images(image) if image is not None else 0
-        max_num_of_images = 14 if model == "seedream-5-0-260128" else 10
-        if n_input_images > max_num_of_images:
-            raise ValueError(
-                f"Maximum of {max_num_of_images} reference images are supported, but {n_input_images} received."
-            )
+        if n_input_images > 10:
+            raise ValueError(f"Maximum of 10 reference images are supported, but {n_input_images} received.")
        if sequential_image_generation == "auto" and n_input_images + max_images > 15:
            raise ValueError(
                "The maximum number of generated images plus the number of reference images cannot exceed 15."
@@ -377,7 +361,6 @@ class ByteDanceSeedreamNode(IO.ComfyNode):
                sequential_image_generation=sequential_image_generation,
                sequential_image_generation_options=Seedream4Options(max_images=max_images),
                watermark=watermark,
-                output_format="png" if model == "seedream-5-0-260128" else None,
            ),
        )
        if len(response.data) == 1:
@@ -449,21 +432,18 @@ class ByteDanceTextToVideoNode(IO.ComfyNode):
                    tooltip="Specifies whether to fix the camera. The platform appends an instruction "
                    "to fix the camera to your prompt, but does not guarantee the actual effect.",
                    optional=True,
-                    advanced=True,
                ),
                IO.Boolean.Input(
                    "watermark",
                    default=False,
                    tooltip='Whether to add an "AI generated" watermark to the video.',
                    optional=True,
-                    advanced=True,
                ),
                IO.Boolean.Input(
                    "generate_audio",
                    default=False,
                    tooltip="This parameter is ignored for any model except seedance-1-5-pro.",
                    optional=True,
-                    advanced=True,
                ),
            ],
            outputs=[
@@ -581,21 +561,18 @@ class ByteDanceImageToVideoNode(IO.ComfyNode):
                    tooltip="Specifies whether to fix the camera. The platform appends an instruction "
                    "to fix the camera to your prompt, but does not guarantee the actual effect.",
                    optional=True,
-                    advanced=True,
                ),
                IO.Boolean.Input(
                    "watermark",
                    default=False,
                    tooltip='Whether to add an "AI generated" watermark to the video.',
                    optional=True,
-                    advanced=True,
                ),
                IO.Boolean.Input(
                    "generate_audio",
                    default=False,
                    tooltip="This parameter is ignored for any model except seedance-1-5-pro.",
                    optional=True,
-                    advanced=True,
                ),
            ],
            outputs=[
@@ -717,21 +694,18 @@ class ByteDanceFirstLastFrameNode(IO.ComfyNode):
                    tooltip="Specifies whether to fix the camera. The platform appends an instruction "
                    "to fix the camera to your prompt, but does not guarantee the actual effect.",
                    optional=True,
-                    advanced=True,
                ),
                IO.Boolean.Input(
                    "watermark",
                    default=False,
                    tooltip='Whether to add an "AI generated" watermark to the video.',
                    optional=True,
-                    advanced=True,
                ),
                IO.Boolean.Input(
                    "generate_audio",
                    default=False,
                    tooltip="This parameter is ignored for any model except seedance-1-5-pro.",
                    optional=True,
-                    advanced=True,
                ),
            ],
            outputs=[
@@ -860,7 +834,6 @@ class ByteDanceImageReferenceNode(IO.ComfyNode):
                    default=False,
                    tooltip='Whether to add an "AI generated" watermark to the video.',
                    optional=True,
-                    advanced=True,
                ),
            ],
            outputs=[
--- a/comfy_api_nodes/nodes_elevenlabs.py
+++ b/comfy_api_nodes/nodes_elevenlabs.py
@@ -1,924 +0,0 @@
-import json
-import uuid
-
-from typing_extensions import override
-
-from comfy_api.latest import IO, ComfyExtension, Input
-from comfy_api_nodes.apis.elevenlabs import (
-    AddVoiceRequest,
-    AddVoiceResponse,
-    DialogueInput,
-    DialogueSettings,
-    SpeechToSpeechRequest,
-    SpeechToTextRequest,
-    SpeechToTextResponse,
-    TextToDialogueRequest,
-    TextToSoundEffectsRequest,
-    TextToSpeechRequest,
-    TextToSpeechVoiceSettings,
-)
-from comfy_api_nodes.util import (
-    ApiEndpoint,
-    audio_bytes_to_audio_input,
-    audio_ndarray_to_bytesio,
-    audio_tensor_to_contiguous_ndarray,
-    sync_op,
-    sync_op_raw,
-    upload_audio_to_comfyapi,
-    validate_string,
-)
-
-ELEVENLABS_MUSIC_SECTIONS = "ELEVENLABS_MUSIC_SECTIONS"  # Custom type for music sections
-ELEVENLABS_COMPOSITION_PLAN = "ELEVENLABS_COMPOSITION_PLAN"  # Custom type for composition plan
-ELEVENLABS_VOICE = "ELEVENLABS_VOICE"  # Custom type for voice selection
-
-# Predefined ElevenLabs voices: (voice_id, display_name, gender, accent)
-ELEVENLABS_VOICES = [
-    ("CwhRBWXzGAHq8TQ4Fs17", "Roger", "male", "american"),
-    ("EXAVITQu4vr4xnSDxMaL", "Sarah", "female", "american"),
-    ("FGY2WhTYpPnrIDTdsKH5", "Laura", "female", "american"),
-    ("IKne3meq5aSn9XLyUdCD", "Charlie", "male", "australian"),
-    ("JBFqnCBsd6RMkjVDRZzb", "George", "male", "british"),
-    ("N2lVS1w4EtoT3dr4eOWO", "Callum", "male", "american"),
-    ("SAz9YHcvj6GT2YYXdXww", "River", "neutral", "american"),
-    ("SOYHLrjzK2X1ezoPC6cr", "Harry", "male", "american"),
-    ("TX3LPaxmHKxFdv7VOQHJ", "Liam", "male", "american"),
-    ("Xb7hH8MSUJpSbSDYk0k2", "Alice", "female", "british"),
-    ("XrExE9yKIg1WjnnlVkGX", "Matilda", "female", "american"),
-    ("bIHbv24MWmeRgasZH58o", "Will", "male", "american"),
-    ("cgSgspJ2msm6clMCkdW9", "Jessica", "female", "american"),
-    ("cjVigY5qzO86Huf0OWal", "Eric", "male", "american"),
-    ("hpp4J3VqNfWAUOO0d1Us", "Bella", "female", "american"),
-    ("iP95p4xoKVk53GoZ742B", "Chris", "male", "american"),
-    ("nPczCjzI2devNBz1zQrb", "Brian", "male", "american"),
-    ("onwK4e9ZLuTAKqWW03F9", "Daniel", "male", "british"),
-    ("pFZP5JQG7iQjIQuC4Bku", "Lily", "female", "british"),
-    ("pNInz6obpgDQGcFmaJgB", "Adam", "male", "american"),
-    ("pqHfZKP75CvOlQylNhV4", "Bill", "male", "american"),
-]
-
-ELEVENLABS_VOICE_OPTIONS = [f"{name} ({gender}, {accent})" for _, name, gender, accent in ELEVENLABS_VOICES]
-ELEVENLABS_VOICE_MAP = {
-    f"{name} ({gender}, {accent})": voice_id for voice_id, name, gender, accent in ELEVENLABS_VOICES
-}
-
-
-class ElevenLabsSpeechToText(IO.ComfyNode):
-    @classmethod
-    def define_schema(cls) -> IO.Schema:
-        return IO.Schema(
-            node_id="ElevenLabsSpeechToText",
-            display_name="ElevenLabs Speech to Text",
-            category="api node/audio/ElevenLabs",
-            description="Transcribe audio to text. "
-            "Supports automatic language detection, speaker diarization, and audio event tagging.",
-            inputs=[
-                IO.Audio.Input(
-                    "audio",
-                    tooltip="Audio to transcribe.",
-                ),
-                IO.DynamicCombo.Input(
-                    "model",
-                    options=[
-                        IO.DynamicCombo.Option(
-                            "scribe_v2",
-                            [
-                                IO.Boolean.Input(
-                                    "tag_audio_events",
-                                    default=False,
-                                    tooltip="Annotate sounds like (laughter), (music), etc. in transcript.",
-                                ),
-                                IO.Boolean.Input(
-                                    "diarize",
-                                    default=False,
-                                    tooltip="Annotate which speaker is talking.",
-                                ),
-                                IO.Float.Input(
-                                    "diarization_threshold",
-                                    default=0.22,
-                                    min=0.1,
-                                    max=0.4,
-                                    step=0.01,
-                                    display_mode=IO.NumberDisplay.slider,
-                                    tooltip="Speaker separation sensitivity. "
-                                    "Lower values are more sensitive to speaker changes.",
-                                ),
-                                IO.Float.Input(
-                                    "temperature",
-                                    default=0.0,
-                                    min=0.0,
-                                    max=2.0,
-                                    step=0.01,
-                                    display_mode=IO.NumberDisplay.slider,
-                                    tooltip="Randomness control. "
-                                    "0.0 uses model default. Higher values increase randomness.",
-                                ),
-                                IO.Combo.Input(
-                                    "timestamps_granularity",
-                                    options=["word", "character", "none"],
-                                    default="word",
-                                    tooltip="Timing precision for transcript words.",
-                                ),
-                            ],
-                        ),
-                    ],
-                    tooltip="Model to use for transcription.",
-                ),
-                IO.String.Input(
-                    "language_code",
-                    default="",
-                    tooltip="ISO-639-1 or ISO-639-3 language code (e.g., 'en', 'es', 'fra'). "
-                    "Leave empty for automatic detection.",
-                ),
-                IO.Int.Input(
-                    "num_speakers",
-                    default=0,
-                    min=0,
-                    max=32,
-                    display_mode=IO.NumberDisplay.slider,
-                    tooltip="Maximum number of speakers to predict. Set to 0 for automatic detection.",
-                ),
-                IO.Int.Input(
-                    "seed",
-                    default=1,
-                    min=0,
-                    max=2147483647,
-                    tooltip="Seed for reproducibility (determinism not guaranteed).",
-                ),
-            ],
-            outputs=[
-                IO.String.Output(display_name="text"),
-                IO.String.Output(display_name="language_code"),
-                IO.String.Output(display_name="words_json"),
-            ],
-            hidden=[
-                IO.Hidden.auth_token_comfy_org,
-                IO.Hidden.api_key_comfy_org,
-                IO.Hidden.unique_id,
-            ],
-            is_api_node=True,
-            price_badge=IO.PriceBadge(
-                expr="""{"type":"usd","usd":0.0073,"format":{"approximate":true,"suffix":"/minute"}}""",
-            ),
-        )
-
-    @classmethod
-    async def execute(
-        cls,
-        audio: Input.Audio,
-        model: dict,
-        language_code: str,
-        num_speakers: int,
-        seed: int,
-    ) -> IO.NodeOutput:
-        if model["diarize"] and num_speakers:
-            raise ValueError(
-                "Number of speakers cannot be specified when diarization is enabled. "
-                "Either disable diarization or set num_speakers to 0."
-            )
-        request = SpeechToTextRequest(
-            model_id=model["model"],
-            cloud_storage_url=await upload_audio_to_comfyapi(
-                cls, audio, container_format="mp4", codec_name="aac", mime_type="audio/mp4"
-            ),
-            language_code=language_code if language_code.strip() else None,
-            tag_audio_events=model["tag_audio_events"],
-            num_speakers=num_speakers if num_speakers > 0 else None,
-            timestamps_granularity=model["timestamps_granularity"],
-            diarize=model["diarize"],
-            diarization_threshold=model["diarization_threshold"] if model["diarize"] else None,
-            seed=seed,
-            temperature=model["temperature"],
-        )
-        response = await sync_op(
-            cls,
-            ApiEndpoint(path="/proxy/elevenlabs/v1/speech-to-text", method="POST"),
-            response_model=SpeechToTextResponse,
-            data=request,
-            content_type="multipart/form-data",
-        )
-        words_json = json.dumps(
-            [w.model_dump(exclude_none=True) for w in response.words] if response.words else [],
-            indent=2,
-        )
-        return IO.NodeOutput(response.text, response.language_code, words_json)
-
-
-class ElevenLabsVoiceSelector(IO.ComfyNode):
-    @classmethod
-    def define_schema(cls) -> IO.Schema:
-        return IO.Schema(
-            node_id="ElevenLabsVoiceSelector",
-            display_name="ElevenLabs Voice Selector",
-            category="api node/audio/ElevenLabs",
-            description="Select a predefined ElevenLabs voice for text-to-speech generation.",
-            inputs=[
-                IO.Combo.Input(
-                    "voice",
-                    options=ELEVENLABS_VOICE_OPTIONS,
-                    tooltip="Choose a voice from the predefined ElevenLabs voices.",
-                ),
-            ],
-            outputs=[
-                IO.Custom(ELEVENLABS_VOICE).Output(display_name="voice"),
-            ],
-            is_api_node=False,
-        )
-
-    @classmethod
-    def execute(cls, voice: str) -> IO.NodeOutput:
-        voice_id = ELEVENLABS_VOICE_MAP.get(voice)
-        if not voice_id:
-            raise ValueError(f"Unknown voice: {voice}")
-        return IO.NodeOutput(voice_id)
-
-
-class ElevenLabsTextToSpeech(IO.ComfyNode):
-    @classmethod
-    def define_schema(cls) -> IO.Schema:
-        return IO.Schema(
-            node_id="ElevenLabsTextToSpeech",
-            display_name="ElevenLabs Text to Speech",
-            category="api node/audio/ElevenLabs",
-            description="Convert text to speech.",
-            inputs=[
-                IO.Custom(ELEVENLABS_VOICE).Input(
-                    "voice",
-                    tooltip="Voice to use for speech synthesis. Connect from Voice Selector or Instant Voice Clone.",
-                ),
-                IO.String.Input(
-                    "text",
-                    multiline=True,
-                    default="",
-                    tooltip="The text to convert to speech.",
-                ),
-                IO.Float.Input(
-                    "stability",
-                    default=0.5,
-                    min=0.0,
-                    max=1.0,
-                    step=0.01,
-                    display_mode=IO.NumberDisplay.slider,
-                    tooltip="Voice stability. Lower values give broader emotional range, "
-                    "higher values produce more consistent but potentially monotonous speech.",
-                ),
-                IO.Combo.Input(
-                    "apply_text_normalization",
-                    options=["auto", "on", "off"],
-                    tooltip="Text normalization mode. 'auto' lets the system decide, "
-                    "'on' always applies normalization, 'off' skips it.",
-                ),
-                IO.DynamicCombo.Input(
-                    "model",
-                    options=[
-                        IO.DynamicCombo.Option(
-                            "eleven_multilingual_v2",
-                            [
-                                IO.Float.Input(
-                                    "speed",
-                                    default=1.0,
-                                    min=0.7,
-                                    max=1.3,
-                                    step=0.01,
-                                    display_mode=IO.NumberDisplay.slider,
-                                    tooltip="Speech speed. 1.0 is normal, <1.0 slower, >1.0 faster.",
-                                ),
-                                IO.Float.Input(
-                                    "similarity_boost",
-                                    default=0.75,
-                                    min=0.0,
-                                    max=1.0,
-                                    step=0.01,
-                                    display_mode=IO.NumberDisplay.slider,
-                                    tooltip="Similarity boost. Higher values make the voice more similar to the original.",
-                                ),
-                                IO.Boolean.Input(
-                                    "use_speaker_boost",
-                                    default=False,
-                                    tooltip="Boost similarity to the original speaker voice.",
-                                ),
-                                IO.Float.Input(
-                                    "style",
-                                    default=0.0,
-                                    min=0.0,
-                                    max=0.2,
-                                    step=0.01,
-                                    display_mode=IO.NumberDisplay.slider,
-                                    tooltip="Style exaggeration. Higher values increase stylistic expression "
-                                    "but may reduce stability.",
-                                ),
-                            ],
-                        ),
-                        IO.DynamicCombo.Option(
-                            "eleven_v3",
-                            [
-                                IO.Float.Input(
-                                    "speed",
-                                    default=1.0,
-                                    min=0.7,
-                                    max=1.3,
-                                    step=0.01,
-                                    display_mode=IO.NumberDisplay.slider,
-                                    tooltip="Speech speed. 1.0 is normal, <1.0 slower, >1.0 faster.",
-                                ),
-                                IO.Float.Input(
-                                    "similarity_boost",
-                                    default=0.75,
-                                    min=0.0,
-                                    max=1.0,
-                                    step=0.01,
-                                    display_mode=IO.NumberDisplay.slider,
-                                    tooltip="Similarity boost. Higher values make the voice more similar to the original.",
-                                ),
-                            ],
-                        ),
-                    ],
-                    tooltip="Model to use for text-to-speech.",
-                ),
-                IO.String.Input(
-                    "language_code",
-                    default="",
-                    tooltip="ISO-639-1 or ISO-639-3 language code (e.g., 'en', 'es', 'fra'). "
-                    "Leave empty for automatic detection.",
-                ),
-                IO.Int.Input(
-                    "seed",
-                    default=1,
-                    min=0,
-                    max=2147483647,
-                    tooltip="Seed for reproducibility (determinism not guaranteed).",
-                ),
-                IO.Combo.Input(
-                    "output_format",
-                    options=["mp3_44100_192", "opus_48000_192"],
-                    tooltip="Audio output format.",
-                ),
-            ],
-            outputs=[
-                IO.Audio.Output(),
-            ],
-            hidden=[
-                IO.Hidden.auth_token_comfy_org,
-                IO.Hidden.api_key_comfy_org,
-                IO.Hidden.unique_id,
-            ],
-            is_api_node=True,
-            price_badge=IO.PriceBadge(
-                expr="""{"type":"usd","usd":0.24,"format":{"approximate":true,"suffix":"/1K chars"}}""",
-            ),
-        )
-
-    @classmethod
-    async def execute(
-        cls,
-        voice: str,
-        text: str,
-        stability: float,
-        apply_text_normalization: str,
-        model: dict,
-        language_code: str,
-        seed: int,
-        output_format: str,
-    ) -> IO.NodeOutput:
-        validate_string(text, min_length=1)
-        request = TextToSpeechRequest(
-            text=text,
-            model_id=model["model"],
-            language_code=language_code if language_code.strip() else None,
-            voice_settings=TextToSpeechVoiceSettings(
-                stability=stability,
-                similarity_boost=model["similarity_boost"],
-                speed=model["speed"],
-                use_speaker_boost=model.get("use_speaker_boost", None),
-                style=model.get("style", None),
-            ),
-            seed=seed,
-            apply_text_normalization=apply_text_normalization,
-        )
-        response = await sync_op_raw(
-            cls,
-            ApiEndpoint(
-                path=f"/proxy/elevenlabs/v1/text-to-speech/{voice}",
-                method="POST",
-                query_params={"output_format": output_format},
-            ),
-            data=request,
-            as_binary=True,
-        )
-        return IO.NodeOutput(audio_bytes_to_audio_input(response))
-
-
-class ElevenLabsAudioIsolation(IO.ComfyNode):
-    @classmethod
-    def define_schema(cls) -> IO.Schema:
-        return IO.Schema(
-            node_id="ElevenLabsAudioIsolation",
-            display_name="ElevenLabs Voice Isolation",
-            category="api node/audio/ElevenLabs",
-            description="Remove background noise from audio, isolating vocals or speech.",
-            inputs=[
-                IO.Audio.Input(
-                    "audio",
-                    tooltip="Audio to process for background noise removal.",
-                ),
-            ],
-            outputs=[
-                IO.Audio.Output(),
-            ],
-            hidden=[
-                IO.Hidden.auth_token_comfy_org,
-                IO.Hidden.api_key_comfy_org,
-                IO.Hidden.unique_id,
-            ],
-            is_api_node=True,
-            price_badge=IO.PriceBadge(
-                expr="""{"type":"usd","usd":0.24,"format":{"approximate":true,"suffix":"/minute"}}""",
-            ),
-        )
-
-    @classmethod
-    async def execute(
-        cls,
-        audio: Input.Audio,
-    ) -> IO.NodeOutput:
-        audio_data_np = audio_tensor_to_contiguous_ndarray(audio["waveform"])
-        audio_bytes_io = audio_ndarray_to_bytesio(audio_data_np, audio["sample_rate"], "mp4", "aac")
-        response = await sync_op_raw(
-            cls,
-            ApiEndpoint(path="/proxy/elevenlabs/v1/audio-isolation", method="POST"),
-            files={"audio": ("audio.mp4", audio_bytes_io, "audio/mp4")},
-            content_type="multipart/form-data",
-            as_binary=True,
-        )
-        return IO.NodeOutput(audio_bytes_to_audio_input(response))
-
-
-class ElevenLabsTextToSoundEffects(IO.ComfyNode):
-    @classmethod
-    def define_schema(cls) -> IO.Schema:
-        return IO.Schema(
-            node_id="ElevenLabsTextToSoundEffects",
-            display_name="ElevenLabs Text to Sound Effects",
-            category="api node/audio/ElevenLabs",
-            description="Generate sound effects from text descriptions.",
-            inputs=[
-                IO.String.Input(
-                    "text",
-                    multiline=True,
-                    default="",
-                    tooltip="Text description of the sound effect to generate.",
-                ),
-                IO.DynamicCombo.Input(
-                    "model",
-                    options=[
-                        IO.DynamicCombo.Option(
-                            "eleven_sfx_v2",
-                            [
-                                IO.Float.Input(
-                                    "duration",
-                                    default=5.0,
-                                    min=0.5,
-                                    max=30.0,
-                                    step=0.1,
-                                    display_mode=IO.NumberDisplay.slider,
-                                    tooltip="Duration of generated sound in seconds.",
-                                ),
-                                IO.Boolean.Input(
-                                    "loop",
-                                    default=False,
-                                    tooltip="Create a smoothly looping sound effect.",
-                                ),
-                                IO.Float.Input(
-                                    "prompt_influence",
-                                    default=0.3,
-                                    min=0.0,
-                                    max=1.0,
-                                    step=0.01,
-                                    display_mode=IO.NumberDisplay.slider,
-                                    tooltip="How closely generation follows the prompt. "
-                                    "Higher values make the sound follow the text more closely.",
-                                ),
-                            ],
-                        ),
-                    ],
-                    tooltip="Model to use for sound effect generation.",
-                ),
-                IO.Combo.Input(
-                    "output_format",
-                    options=["mp3_44100_192", "opus_48000_192"],
-                    tooltip="Audio output format.",
-                ),
-            ],
-            outputs=[
-                IO.Audio.Output(),
-            ],
-            hidden=[
-                IO.Hidden.auth_token_comfy_org,
-                IO.Hidden.api_key_comfy_org,
-                IO.Hidden.unique_id,
-            ],
-            is_api_node=True,
-            price_badge=IO.PriceBadge(
-                expr="""{"type":"usd","usd":0.14,"format":{"approximate":true,"suffix":"/minute"}}""",
-            ),
-        )
-
-    @classmethod
-    async def execute(
-        cls,
-        text: str,
-        model: dict,
-        output_format: str,
-    ) -> IO.NodeOutput:
-        validate_string(text, min_length=1)
-        response = await sync_op_raw(
-            cls,
-            ApiEndpoint(
-                path="/proxy/elevenlabs/v1/sound-generation",
-                method="POST",
-                query_params={"output_format": output_format},
-            ),
-            data=TextToSoundEffectsRequest(
-                text=text,
-                duration_seconds=model["duration"],
-                prompt_influence=model["prompt_influence"],
-                loop=model.get("loop", None),
-            ),
-            as_binary=True,
-        )
-        return IO.NodeOutput(audio_bytes_to_audio_input(response))
-
-
-class ElevenLabsInstantVoiceClone(IO.ComfyNode):
-    @classmethod
-    def define_schema(cls) -> IO.Schema:
-        return IO.Schema(
-            node_id="ElevenLabsInstantVoiceClone",
-            display_name="ElevenLabs Instant Voice Clone",
-            category="api node/audio/ElevenLabs",
-            description="Create a cloned voice from audio samples. "
-            "Provide 1-8 audio recordings of the voice to clone.",
-            inputs=[
-                IO.Autogrow.Input(
-                    "files",
-                    template=IO.Autogrow.TemplatePrefix(
-                        IO.Audio.Input("audio"),
-                        prefix="audio",
-                        min=1,
-                        max=8,
-                    ),
-                    tooltip="Audio recordings for voice cloning.",
-                ),
-                IO.Boolean.Input(
-                    "remove_background_noise",
-                    default=False,
-                    tooltip="Remove background noise from voice samples using audio isolation.",
-                ),
-            ],
-            outputs=[
-                IO.Custom(ELEVENLABS_VOICE).Output(display_name="voice"),
-            ],
-            hidden=[
-                IO.Hidden.auth_token_comfy_org,
-                IO.Hidden.api_key_comfy_org,
-                IO.Hidden.unique_id,
-            ],
-            is_api_node=True,
-            price_badge=IO.PriceBadge(expr="""{"type":"usd","usd":0.15}"""),
-        )
-
-    @classmethod
-    async def execute(
-        cls,
-        files: IO.Autogrow.Type,
-        remove_background_noise: bool,
-    ) -> IO.NodeOutput:
-        file_tuples: list[tuple[str, tuple[str, bytes, str]]] = []
-        for key in files:
-            audio = files[key]
-            sample_rate: int = audio["sample_rate"]
-            waveform = audio["waveform"]
-            audio_data_np = audio_tensor_to_contiguous_ndarray(waveform)
-            audio_bytes_io = audio_ndarray_to_bytesio(audio_data_np, sample_rate, "mp4", "aac")
-            file_tuples.append(("files", (f"{key}.mp4", audio_bytes_io.getvalue(), "audio/mp4")))
-
-        response = await sync_op(
-            cls,
-            ApiEndpoint(path="/proxy/elevenlabs/v1/voices/add", method="POST"),
-            response_model=AddVoiceResponse,
-            data=AddVoiceRequest(
-                name=str(uuid.uuid4()),
-                remove_background_noise=remove_background_noise,
-            ),
-            files=file_tuples,
-            content_type="multipart/form-data",
-        )
-        return IO.NodeOutput(response.voice_id)
-
-
-ELEVENLABS_STS_VOICE_SETTINGS = [
-    IO.Float.Input(
-        "speed",
-        default=1.0,
-        min=0.7,
-        max=1.3,
-        step=0.01,
-        display_mode=IO.NumberDisplay.slider,
-        tooltip="Speech speed. 1.0 is normal, <1.0 slower, >1.0 faster.",
-    ),
-    IO.Float.Input(
-        "similarity_boost",
-        default=0.75,
-        min=0.0,
-        max=1.0,
-        step=0.01,
-        display_mode=IO.NumberDisplay.slider,
-        tooltip="Similarity boost. Higher values make the voice more similar to the original.",
-    ),
-    IO.Boolean.Input(
-        "use_speaker_boost",
-        default=False,
-        tooltip="Boost similarity to the original speaker voice.",
-    ),
-    IO.Float.Input(
-        "style",
-        default=0.0,
-        min=0.0,
-        max=0.2,
-        step=0.01,
-        display_mode=IO.NumberDisplay.slider,
-        tooltip="Style exaggeration. Higher values increase stylistic expression but may reduce stability.",
-    ),
-]
-
-
-class ElevenLabsSpeechToSpeech(IO.ComfyNode):
-    @classmethod
-    def define_schema(cls) -> IO.Schema:
-        return IO.Schema(
-            node_id="ElevenLabsSpeechToSpeech",
-            display_name="ElevenLabs Speech to Speech",
-            category="api node/audio/ElevenLabs",
-            description="Transform speech from one voice to another while preserving the original content and emotion.",
-            inputs=[
-                IO.Custom(ELEVENLABS_VOICE).Input(
-                    "voice",
-                    tooltip="Target voice for the transformation. "
-                    "Connect from Voice Selector or Instant Voice Clone.",
-                ),
-                IO.Audio.Input(
-                    "audio",
-                    tooltip="Source audio to transform.",
-                ),
-                IO.Float.Input(
-                    "stability",
-                    default=0.5,
-                    min=0.0,
-                    max=1.0,
-                    step=0.01,
-                    display_mode=IO.NumberDisplay.slider,
-                    tooltip="Voice stability. Lower values give broader emotional range, "
-                    "higher values produce more consistent but potentially monotonous speech.",
-                ),
-                IO.DynamicCombo.Input(
-                    "model",
-                    options=[
-                        IO.DynamicCombo.Option(
-                            "eleven_multilingual_sts_v2",
-                            ELEVENLABS_STS_VOICE_SETTINGS,
-                        ),
-                        IO.DynamicCombo.Option(
-                            "eleven_english_sts_v2",
-                            ELEVENLABS_STS_VOICE_SETTINGS,
-                        ),
-                    ],
-                    tooltip="Model to use for speech-to-speech transformation.",
-                ),
-                IO.Combo.Input(
-                    "output_format",
-                    options=["mp3_44100_192", "opus_48000_192"],
-                    tooltip="Audio output format.",
-                ),
-                IO.Int.Input(
-                    "seed",
-                    default=0,
-                    min=0,
-                    max=4294967295,
-                    tooltip="Seed for reproducibility.",
-                ),
-                IO.Boolean.Input(
-                    "remove_background_noise",
-                    default=False,
-                    tooltip="Remove background noise from input audio using audio isolation.",
-                ),
-            ],
-            outputs=[
-                IO.Audio.Output(),
-            ],
-            hidden=[
-                IO.Hidden.auth_token_comfy_org,
-                IO.Hidden.api_key_comfy_org,
-                IO.Hidden.unique_id,
-            ],
-            is_api_node=True,
-            price_badge=IO.PriceBadge(
-                expr="""{"type":"usd","usd":0.24,"format":{"approximate":true,"suffix":"/minute"}}""",
-            ),
-        )
-
-    @classmethod
-    async def execute(
-        cls,
-        voice: str,
-        audio: Input.Audio,
-        stability: float,
-        model: dict,
-        output_format: str,
-        seed: int,
-        remove_background_noise: bool,
-    ) -> IO.NodeOutput:
-        audio_data_np = audio_tensor_to_contiguous_ndarray(audio["waveform"])
-        audio_bytes_io = audio_ndarray_to_bytesio(audio_data_np, audio["sample_rate"], "mp4", "aac")
-        voice_settings = TextToSpeechVoiceSettings(
-            stability=stability,
-            similarity_boost=model["similarity_boost"],
-            style=model["style"],
-            use_speaker_boost=model["use_speaker_boost"],
-            speed=model["speed"],
-        )
-        response = await sync_op_raw(
-            cls,
-            ApiEndpoint(
-                path=f"/proxy/elevenlabs/v1/speech-to-speech/{voice}",
-                method="POST",
-                query_params={"output_format": output_format},
-            ),
-            data=SpeechToSpeechRequest(
-                model_id=model["model"],
-                voice_settings=voice_settings.model_dump_json(exclude_none=True),
-                seed=seed,
-                remove_background_noise=remove_background_noise,
-            ),
-            files={"audio": ("audio.mp4", audio_bytes_io.getvalue(), "audio/mp4")},
-            content_type="multipart/form-data",
-            as_binary=True,
-        )
-        return IO.NodeOutput(audio_bytes_to_audio_input(response))
-
-
-def _generate_dialogue_inputs(count: int) -> list:
-    """Generate input widgets for a given number of dialogue entries."""
-    inputs = []
-    for i in range(1, count + 1):
-        inputs.extend(
-            [
-                IO.String.Input(
-                    f"text{i}",
-                    multiline=True,
-                    default="",
-                    tooltip=f"Text content for dialogue entry {i}.",
-                ),
-                IO.Custom(ELEVENLABS_VOICE).Input(
-                    f"voice{i}",
-                    tooltip=f"Voice for dialogue entry {i}. Connect from Voice Selector or Instant Voice Clone.",
-                ),
-            ]
-        )
-    return inputs
-
-
-class ElevenLabsTextToDialogue(IO.ComfyNode):
-    @classmethod
-    def define_schema(cls) -> IO.Schema:
-        return IO.Schema(
-            node_id="ElevenLabsTextToDialogue",
-            display_name="ElevenLabs Text to Dialogue",
-            category="api node/audio/ElevenLabs",
-            description="Generate multi-speaker dialogue from text. Each dialogue entry has its own text and voice.",
-            inputs=[
-                IO.Float.Input(
-                    "stability",
-                    default=0.5,
-                    min=0.0,
-                    max=1.0,
-                    step=0.5,
-                    display_mode=IO.NumberDisplay.slider,
-                    tooltip="Voice stability. Lower values give broader emotional range, "
-                    "higher values produce more consistent but potentially monotonous speech.",
-                ),
-                IO.Combo.Input(
-                    "apply_text_normalization",
-                    options=["auto", "on", "off"],
-                    tooltip="Text normalization mode. 'auto' lets the system decide, "
-                    "'on' always applies normalization, 'off' skips it.",
-                ),
-                IO.Combo.Input(
-                    "model",
-                    options=["eleven_v3"],
-                    tooltip="Model to use for dialogue generation.",
-                ),
-                IO.DynamicCombo.Input(
-                    "inputs",
-                    options=[
-                        IO.DynamicCombo.Option("1", _generate_dialogue_inputs(1)),
-                        IO.DynamicCombo.Option("2", _generate_dialogue_inputs(2)),
-                        IO.DynamicCombo.Option("3", _generate_dialogue_inputs(3)),
-                        IO.DynamicCombo.Option("4", _generate_dialogue_inputs(4)),
-                        IO.DynamicCombo.Option("5", _generate_dialogue_inputs(5)),
-                        IO.DynamicCombo.Option("6", _generate_dialogue_inputs(6)),
-                        IO.DynamicCombo.Option("7", _generate_dialogue_inputs(7)),
-                        IO.DynamicCombo.Option("8", _generate_dialogue_inputs(8)),
-                        IO.DynamicCombo.Option("9", _generate_dialogue_inputs(9)),
-                        IO.DynamicCombo.Option("10", _generate_dialogue_inputs(10)),
-                    ],
-                    tooltip="Number of dialogue entries.",
-                ),
-                IO.String.Input(
-                    "language_code",
-                    default="",
-                    tooltip="ISO-639-1 or ISO-639-3 language code (e.g., 'en', 'es', 'fra'). "
-                    "Leave empty for automatic detection.",
-                ),
-                IO.Int.Input(
-                    "seed",
-                    default=1,
-                    min=0,
-                    max=4294967295,
-                    tooltip="Seed for reproducibility.",
-                ),
-                IO.Combo.Input(
-                    "output_format",
-                    options=["mp3_44100_192", "opus_48000_192"],
-                    tooltip="Audio output format.",
-                ),
-            ],
-            outputs=[
-                IO.Audio.Output(),
-            ],
-            hidden=[
-                IO.Hidden.auth_token_comfy_org,
-                IO.Hidden.api_key_comfy_org,
-                IO.Hidden.unique_id,
-            ],
-            is_api_node=True,
-            price_badge=IO.PriceBadge(
-                expr="""{"type":"usd","usd":0.24,"format":{"approximate":true,"suffix":"/1K chars"}}""",
-            ),
-        )
-
-    @classmethod
-    async def execute(
-        cls,
-        stability: float,
-        apply_text_normalization: str,
-        model: str,
-        inputs: dict,
-        language_code: str,
-        seed: int,
-        output_format: str,
-    ) -> IO.NodeOutput:
-        num_entries = int(inputs["inputs"])
-        dialogue_inputs: list[DialogueInput] = []
-        for i in range(1, num_entries + 1):
-            text = inputs[f"text{i}"]
-            voice_id = inputs[f"voice{i}"]
-            validate_string(text, min_length=1)
-            dialogue_inputs.append(DialogueInput(text=text, voice_id=voice_id))
-        request = TextToDialogueRequest(
-            inputs=dialogue_inputs,
-            model_id=model,
-            language_code=language_code if language_code.strip() else None,
-            settings=DialogueSettings(stability=stability),
-            seed=seed,
-            apply_text_normalization=apply_text_normalization,
-        )
-        response = await sync_op_raw(
-            cls,
-            ApiEndpoint(
-                path="/proxy/elevenlabs/v1/text-to-dialogue",
-                method="POST",
-                query_params={"output_format": output_format},
-            ),
-            data=request,
-            as_binary=True,
-        )
-        return IO.NodeOutput(audio_bytes_to_audio_input(response))
-
-
-class ElevenLabsExtension(ComfyExtension):
-    @override
-    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
-        return [
-            ElevenLabsSpeechToText,
-            ElevenLabsVoiceSelector,
-            ElevenLabsTextToSpeech,
-            ElevenLabsAudioIsolation,
-            ElevenLabsTextToSoundEffects,
-            ElevenLabsInstantVoiceClone,
-            ElevenLabsSpeechToSpeech,
-            ElevenLabsTextToDialogue,
-        ]
-
-
-async def comfy_entrypoint() -> ElevenLabsExtension:
-    return ElevenLabsExtension()
--- a/comfy_api_nodes/nodes_gemini.py
+++ b/comfy_api_nodes/nodes_gemini.py
@@ -6,7 +6,6 @@ See: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/infer
 import base64
 import os
 from enum import Enum
-from fnmatch import fnmatch
 from io import BytesIO
 from typing import Literal

@@ -29,7 +28,6 @@ from comfy_api_nodes.apis.gemini import (
    GeminiRole,
    GeminiSystemInstructionContent,
    GeminiTextPart,
-    GeminiThinkingConfig,
    Modality,
 )
 from comfy_api_nodes.util import (
@@ -56,21 +54,6 @@ GEMINI_IMAGE_SYS_PROMPT = (
    "Prioritize generating the visual representation above any text, formatting, or conversational requests."
 )

-GEMINI_IMAGE_2_PRICE_BADGE = IO.PriceBadge(
-    depends_on=IO.PriceBadgeDepends(widgets=["model", "resolution"]),
-    expr="""
-    (
-      $m := widgets.model;
-      $r := widgets.resolution;
-      $isFlash := $contains($m, "nano banana 2");
-      $flashPrices := {"1k": 0.0696, "2k": 0.0696, "4k": 0.123};
-      $proPrices := {"1k": 0.134, "2k": 0.134, "4k": 0.24};
-      $prices := $isFlash ? $flashPrices : $proPrices;
-      {"type":"usd","usd": $lookup($prices, $r), "format":{"suffix":"/Image","approximate":true}}
-    )
-    """,
-)
-

 class GeminiModel(str, Enum):
    """
@@ -136,13 +119,6 @@ async def create_image_parts(
    return image_parts


-def _mime_matches(mime: GeminiMimeType | None, pattern: str) -> bool:
-    """Check if a MIME type matches a pattern. Supports fnmatch globs (e.g. 'image/*')."""
-    if mime is None:
-        return False
-    return fnmatch(mime.value, pattern)
-
-
 def get_parts_by_type(response: GeminiGenerateContentResponse, part_type: Literal["text"] | str) -> list[GeminiPart]:
    """
    Filter response parts by their type.
@@ -175,9 +151,9 @@ def get_parts_by_type(response: GeminiGenerateContentResponse, part_type: Litera
        for part in candidate.content.parts:
            if part_type == "text" and part.text:
                parts.append(part)
-            elif part.inlineData and _mime_matches(part.inlineData.mimeType, part_type):
+            elif part.inlineData and part.inlineData.mimeType == part_type:
                parts.append(part)
-            elif part.fileData and _mime_matches(part.fileData.mimeType, part_type):
+            elif part.fileData and part.fileData.mimeType == part_type:
                parts.append(part)

    if not parts and blocked_reasons:
@@ -202,7 +178,7 @@ def get_text_from_response(response: GeminiGenerateContentResponse) -> str:

 async def get_image_from_response(response: GeminiGenerateContentResponse) -> Input.Image:
    image_tensors: list[Input.Image] = []
-    parts = get_parts_by_type(response, "image/*")
+    parts = get_parts_by_type(response, "image/png")
    for part in parts:
        if part.inlineData:
            image_data = base64.b64decode(part.inlineData.data)
@@ -245,10 +221,6 @@ def calculate_tokens_price(response: GeminiGenerateContentResponse) -> float | N
        input_tokens_price = 2
        output_text_tokens_price = 12.0
        output_image_tokens_price = 120.0
-    elif response.modelVersion == "gemini-3.1-flash-image-preview":
-        input_tokens_price = 0.5
-        output_text_tokens_price = 3.0
-        output_image_tokens_price = 60.0
    else:
        return None
    final_price = response.usageMetadata.promptTokenCount * input_tokens_price
@@ -336,7 +308,6 @@ class GeminiNode(IO.ComfyNode):
                    default="",
                    optional=True,
                    tooltip="Foundational instructions that dictate an AI's behavior.",
-                    advanced=True,
                ),
            ],
            outputs=[
@@ -614,7 +585,6 @@ class GeminiImage(IO.ComfyNode):
                    tooltip="Choose 'IMAGE' for image-only output, or "
                    "'IMAGE+TEXT' to return both the generated image and a text response.",
                    optional=True,
-                    advanced=True,
                ),
                IO.String.Input(
                    "system_prompt",
@@ -622,7 +592,6 @@ class GeminiImage(IO.ComfyNode):
                    default=GEMINI_IMAGE_SYS_PROMPT,
                    optional=True,
                    tooltip="Foundational instructions that dictate an AI's behavior.",
-                    advanced=True,
                ),
            ],
            outputs=[
@@ -657,7 +626,7 @@ class GeminiImage(IO.ComfyNode):

        if not aspect_ratio:
            aspect_ratio = "auto"  # for backward compatability with old workflows; to-do remove this in December
-        image_config = GeminiImageConfig() if aspect_ratio == "auto" else GeminiImageConfig(aspectRatio=aspect_ratio)
+        image_config = GeminiImageConfig(aspectRatio=aspect_ratio)

        if images is not None:
            parts.extend(await create_image_parts(cls, images))
@@ -677,7 +646,7 @@ class GeminiImage(IO.ComfyNode):
                ],
                generationConfig=GeminiImageGenerationConfig(
                    responseModalities=(["IMAGE"] if response_modalities == "IMAGE" else ["TEXT", "IMAGE"]),
-                    imageConfig=image_config,
+                    imageConfig=None if aspect_ratio == "auto" else image_config,
                ),
                systemInstruction=gemini_system_prompt,
            ),
@@ -706,7 +675,7 @@ class GeminiImage2(IO.ComfyNode):
                ),
                IO.Combo.Input(
                    "model",
-                    options=["gemini-3-pro-image-preview", "Nano Banana 2 (Gemini 3.1 Flash Image)"],
+                    options=["gemini-3-pro-image-preview"],
                ),
                IO.Int.Input(
                    "seed",
@@ -737,7 +706,6 @@ class GeminiImage2(IO.ComfyNode):
                    options=["IMAGE+TEXT", "IMAGE"],
                    tooltip="Choose 'IMAGE' for image-only output, or "
                    "'IMAGE+TEXT' to return both the generated image and a text response.",
-                    advanced=True,
                ),
                IO.Image.Input(
                    "images",
@@ -757,7 +725,6 @@ class GeminiImage2(IO.ComfyNode):
                    default=GEMINI_IMAGE_SYS_PROMPT,
                    optional=True,
                    tooltip="Foundational instructions that dictate an AI's behavior.",
-                    advanced=True,
                ),
            ],
            outputs=[
@@ -770,169 +737,19 @@ class GeminiImage2(IO.ComfyNode):
                IO.Hidden.unique_id,
            ],
            is_api_node=True,
-            price_badge=GEMINI_IMAGE_2_PRICE_BADGE,
-        )
-
-    @classmethod
-    async def execute(
-        cls,
-        prompt: str,
-        model: str,
-        seed: int,
-        aspect_ratio: str,
-        resolution: str,
-        response_modalities: str,
-        images: Input.Image | None = None,
-        files: list[GeminiPart] | None = None,
-        system_prompt: str = "",
-    ) -> IO.NodeOutput:
-        validate_string(prompt, strip_whitespace=True, min_length=1)
-        if model == "Nano Banana 2 (Gemini 3.1 Flash Image)":
-            model = "gemini-3.1-flash-image-preview"
-            if response_modalities == "IMAGE+TEXT":
-                raise ValueError("IMAGE+TEXT is not currently available for the Nano Banana 2 model.")
-
-        parts: list[GeminiPart] = [GeminiPart(text=prompt)]
-        if images is not None:
-            if get_number_of_images(images) > 14:
-                raise ValueError("The current maximum number of supported images is 14.")
-            parts.extend(await create_image_parts(cls, images))
-        if files is not None:
-            parts.extend(files)
-
-        image_config = GeminiImageConfig(imageSize=resolution)
-        if aspect_ratio != "auto":
-            image_config.aspectRatio = aspect_ratio
-
-        gemini_system_prompt = None
-        if system_prompt:
-            gemini_system_prompt = GeminiSystemInstructionContent(parts=[GeminiTextPart(text=system_prompt)], role=None)
-
-        response = await sync_op(
-            cls,
-            ApiEndpoint(path=f"/proxy/vertexai/gemini/{model}", method="POST"),
-            data=GeminiImageGenerateContentRequest(
-                contents=[
-                    GeminiContent(role=GeminiRole.user, parts=parts),
-                ],
-                generationConfig=GeminiImageGenerationConfig(
-                    responseModalities=(["IMAGE"] if response_modalities == "IMAGE" else ["TEXT", "IMAGE"]),
-                    imageConfig=image_config,
-                ),
-                systemInstruction=gemini_system_prompt,
+            price_badge=IO.PriceBadge(
+                depends_on=IO.PriceBadgeDepends(widgets=["resolution"]),
+                expr="""
+                (
+                  $r := widgets.resolution;
+                  ($contains($r,"1k") or $contains($r,"2k"))
+                    ? {"type":"usd","usd":0.134,"format":{"suffix":"/Image","approximate":true}}
+                    : $contains($r,"4k")
+                      ? {"type":"usd","usd":0.24,"format":{"suffix":"/Image","approximate":true}}
+                      : {"type":"text","text":"Token-based"}
+                )
+                """,
            ),
-            response_model=GeminiGenerateContentResponse,
-            price_extractor=calculate_tokens_price,
-        )
-        return IO.NodeOutput(await get_image_from_response(response), get_text_from_response(response))
-
-
-class GeminiNanoBanana2(IO.ComfyNode):
-
-    @classmethod
-    def define_schema(cls):
-        return IO.Schema(
-            node_id="GeminiNanoBanana2",
-            display_name="Nano Banana 2",
-            category="api node/image/Gemini",
-            description="Generate or edit images synchronously via Google Vertex API.",
-            inputs=[
-                IO.String.Input(
-                    "prompt",
-                    multiline=True,
-                    tooltip="Text prompt describing the image to generate or the edits to apply. "
-                    "Include any constraints, styles, or details the model should follow.",
-                    default="",
-                ),
-                IO.Combo.Input(
-                    "model",
-                    options=["Nano Banana 2 (Gemini 3.1 Flash Image)"],
-                ),
-                IO.Int.Input(
-                    "seed",
-                    default=42,
-                    min=0,
-                    max=0xFFFFFFFFFFFFFFFF,
-                    control_after_generate=True,
-                    tooltip="When the seed is fixed to a specific value, the model makes a best effort to provide "
-                    "the same response for repeated requests. Deterministic output isn't guaranteed. "
-                    "Also, changing the model or parameter settings, such as the temperature, "
-                    "can cause variations in the response even when you use the same seed value. "
-                    "By default, a random seed value is used.",
-                ),
-                IO.Combo.Input(
-                    "aspect_ratio",
-                    options=[
-                        "auto",
-                        "1:1",
-                        "2:3",
-                        "3:2",
-                        "3:4",
-                        "4:3",
-                        "4:5",
-                        "5:4",
-                        "9:16",
-                        "16:9",
-                        "21:9",
-                        # "1:4",
-                        # "4:1",
-                        # "8:1",
-                        # "1:8",
-                    ],
-                    default="auto",
-                    tooltip="If set to 'auto', matches your input image's aspect ratio; "
-                    "if no image is provided, a 16:9 square is usually generated.",
-                ),
-                IO.Combo.Input(
-                    "resolution",
-                    options=[
-                        # "512px",
-                        "1K",
-                        "2K",
-                        "4K",
-                    ],
-                    tooltip="Target output resolution. For 2K/4K the native Gemini upscaler is used.",
-                ),
-                IO.Combo.Input(
-                    "response_modalities",
-                    options=["IMAGE"],
-                    advanced=True,
-                ),
-                IO.Combo.Input(
-                    "thinking_level",
-                    options=["MINIMAL", "HIGH"],
-                ),
-                IO.Image.Input(
-                    "images",
-                    optional=True,
-                    tooltip="Optional reference image(s). "
-                    "To include multiple images, use the Batch Images node (up to 14).",
-                ),
-                IO.Custom("GEMINI_INPUT_FILES").Input(
-                    "files",
-                    optional=True,
-                    tooltip="Optional file(s) to use as context for the model. "
-                    "Accepts inputs from the Gemini Generate Content Input Files node.",
-                ),
-                IO.String.Input(
-                    "system_prompt",
-                    multiline=True,
-                    default=GEMINI_IMAGE_SYS_PROMPT,
-                    optional=True,
-                    tooltip="Foundational instructions that dictate an AI's behavior.",
-                    advanced=True,
-                ),
-            ],
-            outputs=[
-                IO.Image.Output(),
-            ],
-            hidden=[
-                IO.Hidden.auth_token_comfy_org,
-                IO.Hidden.api_key_comfy_org,
-                IO.Hidden.unique_id,
-            ],
-            is_api_node=True,
-            price_badge=GEMINI_IMAGE_2_PRICE_BADGE,
        )

    @classmethod
@@ -944,14 +761,11 @@ class GeminiNanoBanana2(IO.ComfyNode):
        aspect_ratio: str,
        resolution: str,
        response_modalities: str,
-        thinking_level: str,
        images: Input.Image | None = None,
        files: list[GeminiPart] | None = None,
        system_prompt: str = "",
    ) -> IO.NodeOutput:
        validate_string(prompt, strip_whitespace=True, min_length=1)
-        if model == "Nano Banana 2 (Gemini 3.1 Flash Image)":
-            model = "gemini-3.1-flash-image-preview"

        parts: list[GeminiPart] = [GeminiPart(text=prompt)]
        if images is not None:
@@ -979,7 +793,6 @@ class GeminiNanoBanana2(IO.ComfyNode):
                generationConfig=GeminiImageGenerationConfig(
                    responseModalities=(["IMAGE"] if response_modalities == "IMAGE" else ["TEXT", "IMAGE"]),
                    imageConfig=image_config,
-                    thinkingConfig=GeminiThinkingConfig(thinkingLevel=thinking_level),
                ),
                systemInstruction=gemini_system_prompt,
            ),
@@ -996,7 +809,6 @@ class GeminiExtension(ComfyExtension):
            GeminiNode,
            GeminiImage,
            GeminiImage2,
-            GeminiNanoBanana2,
            GeminiInputFiles,
        ]

--- a/comfy_api_nodes/nodes_hunyuan3d.py
+++ b/comfy_api_nodes/nodes_hunyuan3d.py
@@ -1,48 +1,31 @@
 from typing_extensions import override

-from comfy_api.latest import IO, ComfyExtension, Input, Types
+from comfy_api.latest import IO, ComfyExtension, Input
 from comfy_api_nodes.apis.hunyuan3d import (
    Hunyuan3DViewImage,
    InputGenerateType,
    ResultFile3D,
-    TextureEditTaskRequest,
    To3DProTaskCreateResponse,
    To3DProTaskQueryRequest,
    To3DProTaskRequest,
    To3DProTaskResultResponse,
-    To3DUVFileInput,
-    To3DUVTaskRequest,
 )
 from comfy_api_nodes.util import (
    ApiEndpoint,
    download_url_to_file_3d,
-    download_url_to_image_tensor,
    downscale_image_tensor_by_max_side,
    poll_op,
    sync_op,
-    upload_3d_model_to_comfyapi,
    upload_image_to_comfyapi,
    validate_image_dimensions,
    validate_string,
 )


-def _is_tencent_rate_limited(status: int, body: object) -> bool:
-    return (
-        status == 400
-        and isinstance(body, dict)
-        and "RequestLimitExceeded" in str(body.get("Response", {}).get("Error", {}).get("Code", ""))
-    )
-
-
-def get_file_from_response(
-    response_objs: list[ResultFile3D], file_type: str, raise_if_not_found: bool = True
-) -> ResultFile3D | None:
+def get_file_from_response(response_objs: list[ResultFile3D], file_type: str) -> ResultFile3D | None:
    for i in response_objs:
        if i.Type.lower() == file_type.lower():
            return i
-    if raise_if_not_found:
-        raise ValueError(f"'{file_type}' file type is not found in the response.")
    return None


@@ -52,9 +35,8 @@ class TencentTextToModelNode(IO.ComfyNode):
    def define_schema(cls):
        return IO.Schema(
            node_id="TencentTextToModelNode",
-            display_name="Hunyuan3D: Text to Model",
+            display_name="Hunyuan3D: Text to Model (Pro)",
            category="api node/3d/Tencent",
-            essentials_category="3D",
            inputs=[
                IO.Combo.Input(
                    "model",
@@ -138,7 +120,6 @@ class TencentTextToModelNode(IO.ComfyNode):
                EnablePBR=generate_type.get("pbr", None),
                PolygonType=generate_type.get("polygon_type", None),
            ),
-            is_rate_limited=_is_tencent_rate_limited,
        )
        if response.Error:
            raise ValueError(f"Task creation failed with code {response.Error.Code}: {response.Error.Message}")
@@ -150,14 +131,11 @@ class TencentTextToModelNode(IO.ComfyNode):
            response_model=To3DProTaskResultResponse,
            status_extractor=lambda r: r.Status,
        )
+        glb_result = get_file_from_response(result.ResultFile3Ds, "glb")
+        obj_result = get_file_from_response(result.ResultFile3Ds, "obj")
+        file_glb = await download_url_to_file_3d(glb_result.Url, "glb", task_id=task_id) if glb_result else None
        return IO.NodeOutput(
-            f"{task_id}.glb",
-            await download_url_to_file_3d(
-                get_file_from_response(result.ResultFile3Ds, "glb").Url, "glb", task_id=task_id
-            ),
-            await download_url_to_file_3d(
-                get_file_from_response(result.ResultFile3Ds, "obj").Url, "obj", task_id=task_id
-            ),
+            file_glb, file_glb, await download_url_to_file_3d(obj_result.Url, "obj", task_id=task_id) if obj_result else None
        )


@@ -167,9 +145,8 @@ class TencentImageToModelNode(IO.ComfyNode):
    def define_schema(cls):
        return IO.Schema(
            node_id="TencentImageToModelNode",
-            display_name="Hunyuan3D: Image(s) to Model",
+            display_name="Hunyuan3D: Image(s) to Model (Pro)",
            category="api node/3d/Tencent",
-            essentials_category="3D",
            inputs=[
                IO.Combo.Input(
                    "model",
@@ -291,7 +268,6 @@ class TencentImageToModelNode(IO.ComfyNode):
                EnablePBR=generate_type.get("pbr", None),
                PolygonType=generate_type.get("polygon_type", None),
            ),
-            is_rate_limited=_is_tencent_rate_limited,
        )
        if response.Error:
            raise ValueError(f"Task creation failed with code {response.Error.Code}: {response.Error.Message}")
@@ -303,257 +279,11 @@ class TencentImageToModelNode(IO.ComfyNode):
            response_model=To3DProTaskResultResponse,
            status_extractor=lambda r: r.Status,
        )
+        glb_result = get_file_from_response(result.ResultFile3Ds, "glb")
+        obj_result = get_file_from_response(result.ResultFile3Ds, "obj")
+        file_glb = await download_url_to_file_3d(glb_result.Url, "glb", task_id=task_id) if glb_result else None
        return IO.NodeOutput(
-            f"{task_id}.glb",
-            await download_url_to_file_3d(
-                get_file_from_response(result.ResultFile3Ds, "glb").Url, "glb", task_id=task_id
-            ),
-            await download_url_to_file_3d(
-                get_file_from_response(result.ResultFile3Ds, "obj").Url, "obj", task_id=task_id
-            ),
-        )
-
-
-class TencentModelTo3DUVNode(IO.ComfyNode):
-
-    @classmethod
-    def define_schema(cls):
-        return IO.Schema(
-            node_id="TencentModelTo3DUVNode",
-            display_name="Hunyuan3D: Model to UV",
-            category="api node/3d/Tencent",
-            description="Perform UV unfolding on a 3D model to generate UV texture. "
-            "Input model must have less than 30000 faces.",
-            inputs=[
-                IO.MultiType.Input(
-                    "model_3d",
-                    types=[IO.File3DGLB, IO.File3DOBJ, IO.File3DFBX, IO.File3DAny],
-                    tooltip="Input 3D model (GLB, OBJ, or FBX)",
-                ),
-                IO.Int.Input(
-                    "seed",
-                    default=1,
-                    min=0,
-                    max=2147483647,
-                    display_mode=IO.NumberDisplay.number,
-                    control_after_generate=True,
-                    tooltip="Seed controls whether the node should re-run; "
-                    "results are non-deterministic regardless of seed.",
-                ),
-            ],
-            outputs=[
-                IO.File3DOBJ.Output(display_name="OBJ"),
-                IO.File3DFBX.Output(display_name="FBX"),
-                IO.Image.Output(),
-            ],
-            hidden=[
-                IO.Hidden.auth_token_comfy_org,
-                IO.Hidden.api_key_comfy_org,
-                IO.Hidden.unique_id,
-            ],
-            is_api_node=True,
-            price_badge=IO.PriceBadge(expr='{"type":"usd","usd":0.2}'),
-        )
-
-    SUPPORTED_FORMATS = {"glb", "obj", "fbx"}
-
-    @classmethod
-    async def execute(
-        cls,
-        model_3d: Types.File3D,
-        seed: int,
-    ) -> IO.NodeOutput:
-        _ = seed
-        file_format = model_3d.format.lower()
-        if file_format not in cls.SUPPORTED_FORMATS:
-            raise ValueError(
-                f"Unsupported file format: '{file_format}'. "
-                f"Supported formats: {', '.join(sorted(cls.SUPPORTED_FORMATS))}."
-            )
-        response = await sync_op(
-            cls,
-            ApiEndpoint(path="/proxy/tencent/hunyuan/3d-uv", method="POST"),
-            response_model=To3DProTaskCreateResponse,
-            data=To3DUVTaskRequest(
-                File=To3DUVFileInput(
-                    Type=file_format.upper(),
-                    Url=await upload_3d_model_to_comfyapi(cls, model_3d, file_format),
-                )
-            ),
-            is_rate_limited=_is_tencent_rate_limited,
-        )
-        if response.Error:
-            raise ValueError(f"Task creation failed with code {response.Error.Code}: {response.Error.Message}")
-        result = await poll_op(
-            cls,
-            ApiEndpoint(path="/proxy/tencent/hunyuan/3d-uv/query", method="POST"),
-            data=To3DProTaskQueryRequest(JobId=response.JobId),
-            response_model=To3DProTaskResultResponse,
-            status_extractor=lambda r: r.Status,
-        )
-        return IO.NodeOutput(
-            await download_url_to_file_3d(get_file_from_response(result.ResultFile3Ds, "obj").Url, "obj"),
-            await download_url_to_file_3d(get_file_from_response(result.ResultFile3Ds, "fbx").Url, "fbx"),
-            await download_url_to_image_tensor(get_file_from_response(result.ResultFile3Ds, "image").Url),
-        )
-
-
-class Tencent3DTextureEditNode(IO.ComfyNode):
-
-    @classmethod
-    def define_schema(cls):
-        return IO.Schema(
-            node_id="Tencent3DTextureEditNode",
-            display_name="Hunyuan3D: 3D Texture Edit",
-            category="api node/3d/Tencent",
-            description="After inputting the 3D model, perform 3D model texture redrawing.",
-            inputs=[
-                IO.MultiType.Input(
-                    "model_3d",
-                    types=[IO.File3DFBX, IO.File3DAny],
-                    tooltip="3D model in FBX format. Model should have less than 100000 faces.",
-                ),
-                IO.String.Input(
-                    "prompt",
-                    multiline=True,
-                    default="",
-                    tooltip="Describes texture editing. Supports up to 1024 UTF-8 characters.",
-                ),
-                IO.Int.Input(
-                    "seed",
-                    default=0,
-                    min=0,
-                    max=2147483647,
-                    display_mode=IO.NumberDisplay.number,
-                    control_after_generate=True,
-                    tooltip="Seed controls whether the node should re-run; "
-                    "results are non-deterministic regardless of seed.",
-                ),
-            ],
-            outputs=[
-                IO.File3DGLB.Output(display_name="GLB"),
-                IO.File3DFBX.Output(display_name="FBX"),
-            ],
-            hidden=[
-                IO.Hidden.auth_token_comfy_org,
-                IO.Hidden.api_key_comfy_org,
-                IO.Hidden.unique_id,
-            ],
-            is_api_node=True,
-            price_badge=IO.PriceBadge(
-                expr="""{"type":"usd","usd": 0.6}""",
-            ),
-        )
-
-    @classmethod
-    async def execute(
-        cls,
-        model_3d: Types.File3D,
-        prompt: str,
-        seed: int,
-    ) -> IO.NodeOutput:
-        _ = seed
-        file_format = model_3d.format.lower()
-        if file_format != "fbx":
-            raise ValueError(f"Unsupported file format: '{file_format}'. Only FBX format is supported.")
-        validate_string(prompt, field_name="prompt", min_length=1, max_length=1024)
-        model_url = await upload_3d_model_to_comfyapi(cls, model_3d, file_format)
-        response = await sync_op(
-            cls,
-            ApiEndpoint(path="/proxy/tencent/hunyuan/3d-texture-edit", method="POST"),
-            response_model=To3DProTaskCreateResponse,
-            data=TextureEditTaskRequest(
-                File3D=To3DUVFileInput(Type=file_format.upper(), Url=model_url),
-                Prompt=prompt,
-                EnablePBR=True,
-            ),
-            is_rate_limited=_is_tencent_rate_limited,
-        )
-        if response.Error:
-            raise ValueError(f"Task creation failed with code {response.Error.Code}: {response.Error.Message}")
-
-        result = await poll_op(
-            cls,
-            ApiEndpoint(path="/proxy/tencent/hunyuan/3d-texture-edit/query", method="POST"),
-            data=To3DProTaskQueryRequest(JobId=response.JobId),
-            response_model=To3DProTaskResultResponse,
-            status_extractor=lambda r: r.Status,
-        )
-        return IO.NodeOutput(
-            await download_url_to_file_3d(get_file_from_response(result.ResultFile3Ds, "glb").Url, "glb"),
-            await download_url_to_file_3d(get_file_from_response(result.ResultFile3Ds, "fbx").Url, "fbx"),
-        )
-
-
-class Tencent3DPartNode(IO.ComfyNode):
-
-    @classmethod
-    def define_schema(cls):
-        return IO.Schema(
-            node_id="Tencent3DPartNode",
-            display_name="Hunyuan3D: 3D Part",
-            category="api node/3d/Tencent",
-            description="Automatically perform component identification and generation based on the model structure.",
-            inputs=[
-                IO.MultiType.Input(
-                    "model_3d",
-                    types=[IO.File3DFBX, IO.File3DAny],
-                    tooltip="3D model in FBX format. Model should have less than 30000 faces.",
-                ),
-                IO.Int.Input(
-                    "seed",
-                    default=0,
-                    min=0,
-                    max=2147483647,
-                    display_mode=IO.NumberDisplay.number,
-                    control_after_generate=True,
-                    tooltip="Seed controls whether the node should re-run; "
-                    "results are non-deterministic regardless of seed.",
-                ),
-            ],
-            outputs=[
-                IO.File3DFBX.Output(display_name="FBX"),
-            ],
-            hidden=[
-                IO.Hidden.auth_token_comfy_org,
-                IO.Hidden.api_key_comfy_org,
-                IO.Hidden.unique_id,
-            ],
-            is_api_node=True,
-            price_badge=IO.PriceBadge(expr='{"type":"usd","usd":0.6}'),
-        )
-
-    @classmethod
-    async def execute(
-        cls,
-        model_3d: Types.File3D,
-        seed: int,
-    ) -> IO.NodeOutput:
-        _ = seed
-        file_format = model_3d.format.lower()
-        if file_format != "fbx":
-            raise ValueError(f"Unsupported file format: '{file_format}'. Only FBX format is supported.")
-        model_url = await upload_3d_model_to_comfyapi(cls, model_3d, file_format)
-        response = await sync_op(
-            cls,
-            ApiEndpoint(path="/proxy/tencent/hunyuan/3d-part", method="POST"),
-            response_model=To3DProTaskCreateResponse,
-            data=To3DUVTaskRequest(
-                File=To3DUVFileInput(Type=file_format.upper(), Url=model_url),
-            ),
-            is_rate_limited=_is_tencent_rate_limited,
-        )
-        if response.Error:
-            raise ValueError(f"Task creation failed with code {response.Error.Code}: {response.Error.Message}")
-        result = await poll_op(
-            cls,
-            ApiEndpoint(path="/proxy/tencent/hunyuan/3d-part/query", method="POST"),
-            data=To3DProTaskQueryRequest(JobId=response.JobId),
-            response_model=To3DProTaskResultResponse,
-            status_extractor=lambda r: r.Status,
-        )
-        return IO.NodeOutput(
-            await download_url_to_file_3d(get_file_from_response(result.ResultFile3Ds, "fbx").Url, "fbx"),
+            file_glb, file_glb, await download_url_to_file_3d(obj_result.Url, "obj", task_id=task_id) if obj_result else None
        )


@@ -563,9 +293,6 @@ class TencentHunyuan3DExtension(ComfyExtension):
        return [
            TencentTextToModelNode,
            TencentImageToModelNode,
-            # TencentModelTo3DUVNode,
-            # Tencent3DTextureEditNode,
-            Tencent3DPartNode,
        ]


--- a/comfy_api_nodes/nodes_ideogram.py
+++ b/comfy_api_nodes/nodes_ideogram.py
@@ -261,7 +261,6 @@ class IdeogramV1(IO.ComfyNode):
                    default="AUTO",
                    tooltip="Determine if MagicPrompt should be used in generation",
                    optional=True,
-                    advanced=True,
                ),
                IO.Int.Input(
                    "seed",
@@ -395,7 +394,6 @@ class IdeogramV2(IO.ComfyNode):
                    default="AUTO",
                    tooltip="Determine if MagicPrompt should be used in generation",
                    optional=True,
-                    advanced=True,
                ),
                IO.Int.Input(
                    "seed",
@@ -413,7 +411,6 @@ class IdeogramV2(IO.ComfyNode):
                    default="NONE",
                    tooltip="Style type for generation (V2 only)",
                    optional=True,
-                    advanced=True,
                ),
                IO.String.Input(
                    "negative_prompt",
@@ -567,7 +564,6 @@ class IdeogramV3(IO.ComfyNode):
                    default="AUTO",
                    tooltip="Determine if MagicPrompt should be used in generation",
                    optional=True,
-                    advanced=True,
                ),
                IO.Int.Input(
                    "seed",
@@ -594,7 +590,6 @@ class IdeogramV3(IO.ComfyNode):
                    default="DEFAULT",
                    tooltip="Controls the trade-off between generation speed and quality",
                    optional=True,
-                    advanced=True,
                ),
                IO.Image.Input(
                    "character_image",
--- a/comfy_api_nodes/nodes_kling.py
+++ b/comfy_api_nodes/nodes_kling.py
@@ -50,7 +50,6 @@ from comfy_api_nodes.apis import (
 )
 from comfy_api_nodes.apis.kling import (
    ImageToVideoWithAudioRequest,
-    KlingAvatarRequest,
    MotionControlRequest,
    MultiPromptEntry,
    OmniImageParamImage,
@@ -75,7 +74,6 @@ from comfy_api_nodes.util import (
    upload_image_to_comfyapi,
    upload_images_to_comfyapi,
    upload_video_to_comfyapi,
-    validate_audio_duration,
    validate_image_aspect_ratio,
    validate_image_dimensions,
    validate_string,
@@ -2264,7 +2262,6 @@ class KlingLipSyncAudioToVideoNode(IO.ComfyNode):
            node_id="KlingLipSyncAudioToVideoNode",
            display_name="Kling Lip Sync Video with Audio",
            category="api node/video/Kling",
-            essentials_category="Video Generation",
            description="Kling Lip Sync Audio to Video Node. Syncs mouth movements in a video file to the audio content of an audio file. When using, ensure that the audio contains clearly distinguishable vocals and that the video contains a distinct face. The audio file should not be larger than 5MB. The video file should not be larger than 100MB, should have height/width between 720px and 1920px, and should be between 2s and 10s in length.",
            inputs=[
                IO.Video.Input("video"),
@@ -2336,7 +2333,6 @@ class KlingLipSyncTextToVideoNode(IO.ComfyNode):
                    max=2.0,
                    display_mode=IO.NumberDisplay.slider,
                    tooltip="Speech Rate. Valid range: 0.8~2.0, accurate to one decimal place.",
-                    advanced=True,
                ),
            ],
            outputs=[
@@ -2458,7 +2454,6 @@ class KlingImageGenerationNode(IO.ComfyNode):
                IO.Combo.Input(
                    "image_type",
                    options=[i.value for i in KlingImageGenImageReferenceType],
-                    advanced=True,
                ),
                IO.Float.Input(
                    "image_fidelity",
@@ -2468,7 +2463,6 @@ class KlingImageGenerationNode(IO.ComfyNode):
                    step=0.01,
                    display_mode=IO.NumberDisplay.slider,
                    tooltip="Reference intensity for user-uploaded images",
-                    advanced=True,
                ),
                IO.Float.Input(
                    "human_fidelity",
@@ -2478,7 +2472,6 @@ class KlingImageGenerationNode(IO.ComfyNode):
                    step=0.01,
                    display_mode=IO.NumberDisplay.slider,
                    tooltip="Subject reference similarity",
-                    advanced=True,
                ),
                IO.Combo.Input("model_name", options=["kling-v3", "kling-v2", "kling-v1-5"]),
                IO.Combo.Input(
@@ -2594,7 +2587,7 @@ class TextToVideoWithAudio(IO.ComfyNode):
                IO.Combo.Input("mode", options=["pro"]),
                IO.Combo.Input("aspect_ratio", options=["16:9", "9:16", "1:1"]),
                IO.Combo.Input("duration", options=[5, 10]),
-                IO.Boolean.Input("generate_audio", default=True, advanced=True),
+                IO.Boolean.Input("generate_audio", default=True),
            ],
            outputs=[
                IO.Video.Output(),
@@ -2662,7 +2655,7 @@ class ImageToVideoWithAudio(IO.ComfyNode):
                IO.String.Input("prompt", multiline=True, tooltip="Positive text prompt."),
                IO.Combo.Input("mode", options=["pro"]),
                IO.Combo.Input("duration", options=[5, 10]),
-                IO.Boolean.Input("generate_audio", default=True, advanced=True),
+                IO.Boolean.Input("generate_audio", default=True),
            ],
            outputs=[
                IO.Video.Output(),
@@ -3141,103 +3134,6 @@ class KlingFirstLastFrameNode(IO.ComfyNode):
        return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url))


-class KlingAvatarNode(IO.ComfyNode):
-
-    @classmethod
-    def define_schema(cls) -> IO.Schema:
-        return IO.Schema(
-            node_id="KlingAvatarNode",
-            display_name="Kling Avatar 2.0",
-            category="api node/video/Kling",
-            description="Generate broadcast-style digital human videos from a single photo and an audio file.",
-            inputs=[
-                IO.Image.Input(
-                    "image",
-                    tooltip="Avatar reference image. "
-                    "Width and height must be at least 300px. Aspect ratio must be between 1:2.5 and 2.5:1.",
-                ),
-                IO.Audio.Input(
-                    "sound_file",
-                    tooltip="Audio input. Must be between 2 and 300 seconds in duration.",
-                ),
-                IO.Combo.Input("mode", options=["std", "pro"]),
-                IO.String.Input(
-                    "prompt",
-                    multiline=True,
-                    default="",
-                    optional=True,
-                    tooltip="Optional prompt to define avatar actions, emotions, and camera movements.",
-                ),
-                IO.Int.Input(
-                    "seed",
-                    default=0,
-                    min=0,
-                    max=2147483647,
-                    display_mode=IO.NumberDisplay.number,
-                    control_after_generate=True,
-                    tooltip="Seed controls whether the node should re-run; "
-                    "results are non-deterministic regardless of seed.",
-                ),
-            ],
-            outputs=[
-                IO.Video.Output(),
-            ],
-            hidden=[
-                IO.Hidden.auth_token_comfy_org,
-                IO.Hidden.api_key_comfy_org,
-                IO.Hidden.unique_id,
-            ],
-            is_api_node=True,
-            price_badge=IO.PriceBadge(
-                depends_on=IO.PriceBadgeDepends(widgets=["mode"]),
-                expr="""
-                (
-                  $prices := {"std": 0.056, "pro": 0.112};
-                  {"type":"usd","usd": $lookup($prices, widgets.mode), "format":{"suffix":"/second"}}
-                )
-                """,
-            ),
-        )
-
-    @classmethod
-    async def execute(
-        cls,
-        image: Input.Image,
-        sound_file: Input.Audio,
-        mode: str,
-        seed: int,
-        prompt: str = "",
-    ) -> IO.NodeOutput:
-        validate_image_dimensions(image, min_width=300, min_height=300)
-        validate_image_aspect_ratio(image, (1, 2.5), (2.5, 1))
-        validate_audio_duration(sound_file, min_duration=2, max_duration=300)
-        response = await sync_op(
-            cls,
-            ApiEndpoint(path="/proxy/kling/v1/videos/avatar/image2video", method="POST"),
-            response_model=TaskStatusResponse,
-            data=KlingAvatarRequest(
-                image=await upload_image_to_comfyapi(cls, image),
-                sound_file=await upload_audio_to_comfyapi(
-                    cls, sound_file, container_format="mp3", codec_name="libmp3lame", mime_type="audio/mpeg"
-                ),
-                prompt=prompt or None,
-                mode=mode,
-            ),
-        )
-        if response.code:
-            raise RuntimeError(
-                f"Kling request failed. Code: {response.code}, Message: {response.message}, Data: {response.data}"
-            )
-        final_response = await poll_op(
-            cls,
-            ApiEndpoint(path=f"/proxy/kling/v1/videos/avatar/image2video/{response.data.task_id}"),
-            response_model=TaskStatusResponse,
-            status_extractor=lambda r: (r.data.task_status if r.data else None),
-            max_poll_attempts=800,
-        )
-        return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url))
-
-
 class KlingExtension(ComfyExtension):
    @override
    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
@@ -3266,7 +3162,6 @@ class KlingExtension(ComfyExtension):
            MotionControl,
            KlingVideoNode,
            KlingFirstLastFrameNode,
-            KlingAvatarNode,
        ]


--- a/comfy_api_nodes/nodes_ltxv.py
+++ b/comfy_api_nodes/nodes_ltxv.py
@@ -74,7 +74,6 @@ class TextToVideoNode(IO.ComfyNode):
                    default=False,
                    optional=True,
                    tooltip="When true, the generated video will include AI-generated audio matching the scene.",
-                    advanced=True,
                ),
            ],
            outputs=[
@@ -152,7 +151,6 @@ class ImageToVideoNode(IO.ComfyNode):
                    default=False,
                    optional=True,
                    tooltip="When true, the generated video will include AI-generated audio matching the scene.",
-                    advanced=True,
                ),
            ],
            outputs=[
--- a/comfy_api_nodes/nodes_magnific.py
+++ b/comfy_api_nodes/nodes_magnific.py
@@ -110,13 +110,11 @@ class MagnificImageUpscalerCreativeNode(IO.ComfyNode):
                IO.Combo.Input(
                    "engine",
                    options=["automatic", "magnific_illusio", "magnific_sharpy", "magnific_sparkle"],
-                    advanced=True,
                ),
                IO.Boolean.Input(
                    "auto_downscale",
                    default=False,
                    tooltip="Automatically downscale input image if output would exceed maximum pixel limit.",
-                    advanced=True,
                ),
            ],
            outputs=[
@@ -282,7 +280,6 @@ class MagnificImageUpscalerPreciseV2Node(IO.ComfyNode):
                    "auto_downscale",
                    default=False,
                    tooltip="Automatically downscale input image if output would exceed maximum resolution.",
-                    advanced=True,
                ),
            ],
            outputs=[
@@ -443,7 +440,6 @@ class MagnificImageStyleTransferNode(IO.ComfyNode):
                        "softy",
                    ],
                    tooltip="Processing engine selection.",
-                    advanced=True,
                ),
                IO.DynamicCombo.Input(
                    "portrait_mode",
@@ -472,7 +468,6 @@ class MagnificImageStyleTransferNode(IO.ComfyNode):
                    default=True,
                    tooltip="When disabled, expect each generation to introduce a degree of randomness, "
                    "leading to more diverse outcomes.",
-                    advanced=True,
                ),
            ],
            outputs=[
@@ -587,19 +582,16 @@ class MagnificImageRelightNode(IO.ComfyNode):
                    "interpolate_from_original",
                    default=False,
                    tooltip="Restricts generation freedom to match original more closely.",
-                    advanced=True,
                ),
                IO.Boolean.Input(
                    "change_background",
                    default=True,
                    tooltip="Modifies background based on prompt/reference.",
-                    advanced=True,
                ),
                IO.Boolean.Input(
                    "preserve_details",
                    default=True,
                    tooltip="Maintains texture and fine details from original.",
-                    advanced=True,
                ),
                IO.DynamicCombo.Input(
                    "advanced_settings",
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Hunter Senft-Grupp	5332ca60e4	fix: move _initialized flag to end of GLContext.__init__ Prevents '_vao' attribute error when init fails partway through and subsequent calls skip initialization due to early _initialized flag.	2026-02-14 22:27:30 -08:00
Hunter Senft-Grupp	40b247f046	Merge remote-tracking branch 'comfy-org/master' into test-glsl-nodes	2026-02-14 20:20:56 -08:00
pythongosssss	c2d229a786	Add edge preserving blur	2026-02-04 10:20:24 -08:00
pythongosssss	3c40ee0f02	print -> logger	2026-01-31 16:43:57 -08:00
pythongosssss	43034b6881	rebuild blueprints	2026-01-31 16:32:00 -08:00
pythongosssss	bb048d4aaa	more fixes	2026-01-31 16:30:10 -08:00
pythongosssss	7c1f02d1fa	add multipass for faster blur	2026-01-31 16:30:00 -08:00
pythongosssss	292a5918f4	shader nit iteration	2026-01-31 16:03:47 -08:00
pythongosssss	0050b66a0b	add glsl shader update system	2026-01-31 13:48:59 -08:00
pythongosssss	0c313f5293	hsb	2026-01-31 12:25:38 -08:00
pythongosssss	1fcf9dca18	brightness/contrast	2026-01-31 12:25:34 -08:00
pythongosssss	3b790d24d6	Add glow	2026-01-31 10:16:51 -08:00
pythongosssss	92b2b7198a	Merge remote-tracking branch 'origin/master' into pysssss/glsl-blueprints	2026-01-30 16:27:23 -08:00
pythongosssss	309c3e4ec0	Add channels	2026-01-30 14:57:44 -08:00
pythongosssss	23591d4388	Add image operation blueprints	2026-01-30 14:53:39 -08:00
pythongosssss	c3d07bec6d	add diagnostics, update mac initialization	2026-01-30 12:26:04 -08:00
pythongosssss	59b955ff54	fix ci perf: only read required outputs	2026-01-29 20:14:26 -08:00
pythongosssss	1263d6fe88	add additional support for egl & osmesa backends	2026-01-29 20:07:40 -08:00
pythongosssss	23572c6314	tidy	2026-01-28 20:59:01 -08:00
pythongosssss	d809ef8fb1	remove cpu support	2026-01-28 20:58:04 -08:00
pythongosssss	a4317314d2	convert to using PyOpenGL and glfw	2026-01-28 20:48:20 -08:00
pythongosssss	aaea976f36	fix line endings	2026-01-28 11:02:17 -08:00
pythongosssss	cee092213e	Merge remote-tracking branch 'origin/master' into pysssss/basic-glsl-shader-node	2026-01-28 10:50:12 -08:00
pythongosssss	3da0e9c367	fix casing	2026-01-28 10:47:36 -08:00
pythongosssss	9fa8202620	Try fix build	2026-01-28 10:47:36 -08:00
pythongosssss	b4438c9baf	Support multiple outputs	2026-01-28 10:47:36 -08:00
pythongosssss	cc30293d65	tidy	2026-01-23 10:38:26 -08:00
pythongosssss	866d863128	adds support for executing simple glsl shaders using moderngl package	2026-01-23 10:37:52 -08:00
				`@@ -1 +0,0 @@`
				{"revision": 0, "last_node_id": 15, "last_link_id": 0, "nodes": [{"id": 15, "type": "24d8bbfd-39d4-4774-bff0-3de40cc7a471", "pos": [-1490, 2040], "size": [400, 260], "flags": {}, "order": 0, "mode": 0, "inputs": [{"name": "prompt", "type": "STRING", "widget": {"name": "prompt"}, "link": null}, {"label": "reference images", "name": "images", "type": "IMAGE", "link": null}], "outputs": [{"name": "STRING", "type": "STRING", "links": null}], "title": "Prompt Enhance", "properties": {"proxyWidgets": [["-1", "prompt"]], "cnr_id": "comfy-core", "ver": "0.14.1"}, "widgets_values": [""]}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "24d8bbfd-39d4-4774-bff0-3de40cc7a471", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 15, "lastLinkId": 14, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Prompt Enhance", "inputNode": {"id": -10, "bounding": [-2170, 2110, 138.876953125, 80]}, "outputNode": {"id": -20, "bounding": [-640, 2110, 120, 60]}, "inputs": [{"id": "aeab7216-00e0-4528-a09b-bba50845c5a6", "name": "prompt", "type": "STRING", "linkIds": [11], "pos": [-2051.123046875, 2130]}, {"id": "7b73fd36-aa31-4771-9066-f6c83879994b", "name": "images", "type": "IMAGE", "linkIds": [14], "label": "reference images", "pos": [-2051.123046875, 2150]}], "outputs": [{"id": "c7b0d930-68a1-48d1-b496-0519e5837064", "name": "STRING", "type": "STRING", "linkIds": [13], "pos": [-620, 2130]}], "widgets": [], "nodes": [{"id": 11, "type": "GeminiNode", "pos": [-1560, 1990], "size": [470, 470], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "shape": 7, "type": "IMAGE", "link": 14}, {"localized_name": "audio", "name": "audio", "shape": 7, "type": "AUDIO", "link": null}, {"localized_name": "video", "name": "video", "shape": 7, "type": "VIDEO", "link": null}, {"localized_name": "files", "name": "files", "shape": 7, "type": "GEMINI_INPUT_FILES", "link": null}, {"localized_name": "prompt", "name": "prompt", "type": "STRING", "widget": {"name": "prompt"}, "link": 11}, {"localized_name": "model", "name": "model", "type": "COMBO", "widget": {"name": "model"}, "link": null}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": null}, {"localized_name": "system_prompt", "name": "system_prompt", "shape": 7, "type": "STRING", "widget": {"name": "system_prompt"}, "link": null}], "outputs": [{"localized_name": "STRING", "name": "STRING", "type": "STRING", "links": [13]}], "properties": {"cnr_id": "comfy-core", "ver": "0.14.1", "Node name for S&R": "GeminiNode"}, "widgets_values": ["", "gemini-3-pro-preview", 42, "randomize", "You are an expert in prompt writing.\nBased on the input, rewrite the user's input into a detailed prompt.\nincluding camera settings, lighting, composition, and style.\nReturn the prompt only"], "color": "#432", "bgcolor": "#653"}], "groups": [], "links": [{"id": 11, "origin_id": -10, "origin_slot": 0, "target_id": 11, "target_slot": 4, "type": "STRING"}, {"id": 13, "origin_id": 11, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "STRING"}, {"id": 14, "origin_id": -10, "origin_slot": 1, "target_id": 11, "target_slot": 0, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Text generation/Prompt enhance"}]}, "extra": {}}
				`@@ -1 +0,0 @@`
				{"revision": 0, "last_node_id": 13, "last_link_id": 0, "nodes": [{"id": 13, "type": "cf95b747-3e17-46cb-8097-cac60ff9b2e1", "pos": [1120, 330], "size": [240, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "video", "name": "video", "type": "VIDEO", "link": null}, {"name": "model_name", "type": "COMBO", "widget": {"name": "model_name"}, "link": null}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": []}], "title": "Video Upscale(GAN x4)", "properties": {"proxyWidgets": [["-1", "model_name"]], "cnr_id": "comfy-core", "ver": "0.14.1"}, "widgets_values": ["RealESRGAN_x4plus.safetensors"]}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "cf95b747-3e17-46cb-8097-cac60ff9b2e1", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 13, "lastLinkId": 19, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Video Upscale(GAN x4)", "inputNode": {"id": -10, "bounding": [550, 460, 120, 80]}, "outputNode": {"id": -20, "bounding": [1490, 460, 120, 60]}, "inputs": [{"id": "666d633e-93e7-42dc-8d11-2b7b99b0f2a6", "name": "video", "type": "VIDEO", "linkIds": [10], "localized_name": "video", "pos": [650, 480]}, {"id": "2e23a087-caa8-4d65-99e6-662761aa905a", "name": "model_name", "type": "COMBO", "linkIds": [19], "pos": [650, 500]}], "outputs": [{"id": "0c1768ea-3ec2-412f-9af6-8e0fa36dae70", "name": "VIDEO", "type": "VIDEO", "linkIds": [15], "localized_name": "VIDEO", "pos": [1510, 480]}], "widgets": [], "nodes": [{"id": 2, "type": "ImageUpscaleWithModel", "pos": [1110, 450], "size": [320, 46], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "upscale_model", "name": "upscale_model", "type": "UPSCALE_MODEL", "link": 1}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 14}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [13]}], "properties": {"cnr_id": "comfy-core", "ver": "0.10.0", "Node name for S&R": "ImageUpscaleWithModel"}}, {"id": 11, "type": "CreateVideo", "pos": [1110, 550], "size": [320, 78], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "link": 13}, {"localized_name": "audio", "name": "audio", "shape": 7, "type": "AUDIO", "link": 16}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "widget": {"name": "fps"}, "link": 12}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": [15]}], "properties": {"cnr_id": "comfy-core", "ver": "0.10.0", "Node name for S&R": "CreateVideo"}, "widgets_values": [30]}, {"id": 10, "type": "GetVideoComponents", "pos": [1110, 330], "size": [320, 70], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "video", "name": "video", "type": "VIDEO", "link": 10}], "outputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "links": [14]}, {"localized_name": "audio", "name": "audio", "type": "AUDIO", "links": [16]}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "links": [12]}], "properties": {"cnr_id": "comfy-core", "ver": "0.10.0", "Node name for S&R": "GetVideoComponents"}}, {"id": 1, "type": "UpscaleModelLoader", "pos": [750, 450], "size": [280, 60], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "model_name", "name": "model_name", "type": "COMBO", "widget": {"name": "model_name"}, "link": 19}], "outputs": [{"localized_name": "UPSCALE_MODEL", "name": "UPSCALE_MODEL", "type": "UPSCALE_MODEL", "links": [1]}], "properties": {"cnr_id": "comfy-core", "ver": "0.10.0", "Node name for S&R": "UpscaleModelLoader", "models": [{"name": "RealESRGAN_x4plus.safetensors", "url": "https://huggingface.co/Comfy-Org/Real-ESRGAN_repackaged/resolve/main/RealESRGAN_x4plus.safetensors", "directory": "upscale_models"}]}, "widgets_values": ["RealESRGAN_x4plus.safetensors"]}], "groups": [], "links": [{"id": 1, "origin_id": 1, "origin_slot": 0, "target_id": 2, "target_slot": 0, "type": "UPSCALE_MODEL"}, {"id": 14, "origin_id": 10, "origin_slot": 0, "target_id": 2, "target_slot": 1, "type": "IMAGE"}, {"id": 13, "origin_id": 2, "origin_slot": 0, "target_id": 11, "target_slot": 0, "type": "IMAGE"}, {"id": 16, "origin_id": 10, "origin_slot": 1, "target_id": 11, "target_slot": 1, "type": "AUDIO"}, {"id": 12, "origin_id": 10, "origin_slot": 2, "target_id": 11, "target_slot": 2, "type": "FLOAT"}, {"id": 10, "origin_id": -10, "origin_slot": 0, "target_id": 10, "target_slot": 0, "type": "VIDEO"}, {"id": 15, "origin_id": 11, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "VIDEO"}, {"id": 19, "origin_id": -10, "origin_slot": 1, "target_id": 1, "target_slot": 0, "type": "COMBO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Video generation and editing/Enhance video"}]}, "extra": {}}