Add comprehensive test suite for assets API

- conftest.py: Test fixtures (in-memory SQLite, mock UserManager, test image) - schemas_test.py: 98 tests for Pydantic input validation - helpers_test.py: 50 tests for utility functions - queries_crud_test.py: 27 tests for core CRUD operations - queries_filter_test.py: 28 tests for filtering/pagination - queries_tags_test.py: 24 tests for tag operations - routes_upload_test.py: 18 tests for upload endpoints - routes_read_update_test.py: 21 tests for read/update endpoints - routes_tags_delete_test.py: 17 tests for tags/delete endpoints Total: 283 tests covering all 12 asset API endpoints Amp-Thread-ID: https://ampcode.com/threads/T-019be932-d48b-76b9-843a-790e9d2a1f58 Co-authored-by: Amp <amp@ampcode.com>
Remove extra whitespace at end of routes.py
2026-03-12 16:49:57 +00:00 · 2026-01-22 23:15:19 -08:00 · 2026-01-16 01:04:26 -08:00 · 2026-01-16 01:02:40 -08:00 · 2026-01-16 01:01:49 -08:00 · 2026-01-16 00:50:13 -08:00
311 changed files with 5911 additions and 23011 deletions
--- a/.coderabbit.yaml
+++ b/.coderabbit.yaml
@@ -1,114 +0,0 @@
-# yaml-language-server: $schema=https://coderabbit.ai/integrations/schema.v2.json
-language: "en-US"
-early_access: false
-
-reviews:
-  profile: "chill"
-  request_changes_workflow: false
-  high_level_summary: false
-  poem: false
-  review_status: false
-  review_details: false
-  commit_status: true
-  collapse_walkthrough: true
-  changed_files_summary: false
-  sequence_diagrams: false
-  estimate_code_review_effort: false
-  assess_linked_issues: false
-  related_issues: false
-  related_prs: false
-  suggested_labels: false
-  auto_apply_labels: false
-  suggested_reviewers: false
-  auto_assign_reviewers: false
-  in_progress_fortune: false
-  enable_prompt_for_ai_agents: true
-
-  path_filters:
-    - "!comfy_api_nodes/apis/**"
-    - "!**/generated/*.pyi"
-    - "!.ci/**"
-    - "!script_examples/**"
-    - "!**/__pycache__/**"
-    - "!**/*.ipynb"
-    - "!**/*.png"
-    - "!**/*.bat"
-
-  path_instructions:
-    - path: "comfy/**"
-      instructions: |
-        Core ML/diffusion engine. Focus on:
-        - Backward compatibility (breaking changes affect all custom nodes)
-        - Memory management and GPU resource handling
-        - Performance implications in hot paths
-        - Thread safety for concurrent execution
-    - path: "comfy_api_nodes/**"
-      instructions: |
-        Third-party API integration nodes. Focus on:
-        - No hardcoded API keys or secrets
-        - Proper error handling for API failures (timeouts, rate limits, auth errors)
-        - Correct Pydantic model usage
-        - Security of user data passed to external APIs
-    - path: "comfy_extras/**"
-      instructions: |
-        Community-contributed extra nodes. Focus on:
-        - Consistency with node patterns (INPUT_TYPES, RETURN_TYPES, FUNCTION, CATEGORY)
-        - No breaking changes to existing node interfaces
-    - path: "comfy_execution/**"
-      instructions: |
-        Execution engine (graph execution, caching, jobs). Focus on:
-        - Caching correctness
-        - Concurrent execution safety
-        - Graph validation edge cases
-    - path: "nodes.py"
-      instructions: |
-        Core node definitions (2500+ lines). Focus on:
-        - Backward compatibility of NODE_CLASS_MAPPINGS
-        - Consistency of INPUT_TYPES return format
-    - path: "alembic_db/**"
-      instructions: |
-        Database migrations. Focus on:
-        - Migration safety and rollback support
-        - Data preservation during schema changes
-
-  auto_review:
-    enabled: true
-    auto_incremental_review: true
-    drafts: true
-
-  finishing_touches:
-    docstrings:
-      enabled: false
-    unit_tests:
-      enabled: false
-
-  tools:
-    ruff:
-      enabled: true
-    pylint:
-      enabled: false
-    flake8:
-      enabled: false
-    gitleaks:
-      enabled: true
-    shellcheck:
-      enabled: false
-    markdownlint:
-      enabled: false
-    yamllint:
-      enabled: false
-    languagetool:
-      enabled: false
-    github-checks:
-      enabled: true
-      timeout_ms: 90000
-    ast-grep:
-      essential_rules: true
-
-chat:
-  auto_reply: true
-
-knowledge_base:
-  opt_out: false
-  learnings:
-    scope: "auto"
--- a/.github/workflows/release-stable-all.yml
+++ b/.github/workflows/release-stable-all.yml
@@ -20,7 +20,7 @@ jobs:
      git_tag: ${{ inputs.git_tag }}
      cache_tag: "cu130"
      python_minor: "13"
-      python_patch: "11"
+      python_patch: "9"
      rel_name: "nvidia"
      rel_extra_name: ""
      test_release: true
@@ -65,11 +65,11 @@ jobs:
      contents: "write"
      packages: "write"
      pull-requests: "read"
-    name: "Release AMD ROCm 7.2"
+    name: "Release AMD ROCm 7.1.1"
    uses: ./.github/workflows/stable-release.yml
    with:
      git_tag: ${{ inputs.git_tag }}
-      cache_tag: "rocm72"
+      cache_tag: "rocm711"
      python_minor: "12"
      python_patch: "10"
      rel_name: "amd"
--- a/.github/workflows/release-webhook.yml
+++ b/.github/workflows/release-webhook.yml
@@ -7,8 +7,6 @@ on:
 jobs:
  send-webhook:
    runs-on: ubuntu-latest
-    env:
-      DESKTOP_REPO_DISPATCH_TOKEN: ${{ secrets.DESKTOP_REPO_DISPATCH_TOKEN }}
    steps:
      - name: Send release webhook
        env:
@@ -108,37 +106,3 @@ jobs:
            --fail --silent --show-error
          
          echo "✅ Release webhook sent successfully"
-
-      - name: Send repository dispatch to desktop
-        env:
-          DISPATCH_TOKEN: ${{ env.DESKTOP_REPO_DISPATCH_TOKEN }}
-          RELEASE_TAG: ${{ github.event.release.tag_name }}
-          RELEASE_URL: ${{ github.event.release.html_url }}
-        run: |
-          set -euo pipefail
-
-          if [ -z "${DISPATCH_TOKEN:-}" ]; then
-            echo "::error::DESKTOP_REPO_DISPATCH_TOKEN is required but not set."
-            exit 1
-          fi
-
-          PAYLOAD="$(jq -n \
-            --arg release_tag "$RELEASE_TAG" \
-            --arg release_url "$RELEASE_URL" \
-            '{
-              event_type: "comfyui_release_published",
-              client_payload: {
-                release_tag: $release_tag,
-                release_url: $release_url
-              }
-            }')"
-
-          curl -fsSL \
-            -X POST \
-            -H "Accept: application/vnd.github+json" \
-            -H "Content-Type: application/json" \
-            -H "Authorization: Bearer ${DISPATCH_TOKEN}" \
-            https://api.github.com/repos/Comfy-Org/desktop/dispatches \
-            -d "$PAYLOAD"
-
-          echo "✅ Dispatched ComfyUI release ${RELEASE_TAG} to Comfy-Org/desktop"
--- a/.github/workflows/windows_release_dependencies.yml
+++ b/.github/workflows/windows_release_dependencies.yml
@@ -29,7 +29,7 @@ on:
        description: 'python patch version'
        required: true
        type: string
-        default: "11"
+        default: "9"
 #  push:
 #    branches:
 #      - master
--- a/.gitignore
+++ b/.gitignore
@@ -11,7 +11,7 @@ extra_model_paths.yaml
 /.vs
 .vscode/
 .idea/
-venv*/
+venv/
 .venv/
 /web/extensions/*
 !/web/extensions/logging.js.example
--- a/README.md
+++ b/README.md
@@ -108,7 +108,7 @@ See what ComfyUI can do with the [example workflows](https://comfyanonymous.gith
 - [LCM models and Loras](https://comfyanonymous.github.io/ComfyUI_examples/lcm/)
 - Latent previews with [TAESD](#how-to-show-high-quality-previews)
 - Works fully offline: core will never download anything unless you want to.
- Optional API nodes to use paid models from external providers through the online [Comfy API](https://docs.comfy.org/tutorials/api-nodes/overview) disable with: `--disable-api-nodes`
+- Optional API nodes to use paid models from external providers through the online [Comfy API](https://docs.comfy.org/tutorials/api-nodes/overview).
 - [Config file](extra_model_paths.yaml.example) to set the search paths for models.

 Workflow examples can be found on the [Examples page](https://comfyanonymous.github.io/ComfyUI_examples/)
@@ -208,11 +208,11 @@ comfy install

 ## Manual Install (Windows, Linux)

-Python 3.14 works but some custom nodes may have issues. The free threaded variant works but some dependencies will enable the GIL so it's not fully supported.
+Python 3.14 works but you may encounter issues with the torch compile node. The free threaded variant is still missing some dependencies.

 Python 3.13 is very well supported. If you have trouble with some custom node dependencies on 3.13 you can try 3.12

-torch 2.4 and above is supported but some features and optimizations might only work on newer versions. We generally recommend using the latest major version of pytorch with the latest cuda version unless it is less than 2 weeks old.
+torch 2.4 and above is supported but some features might only work on newer versions. We generally recommend using the latest major version of pytorch with the latest cuda version unless it is less than 2 weeks old.

 ### Instructions:

@@ -227,11 +227,11 @@ Put your VAE in: models/vae

 AMD users can install rocm and pytorch with pip if you don't have it already installed, this is the command to install the stable version:

-```pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm7.1```
+```pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.4```

-This is the command to install the nightly with ROCm 7.2 which might have some performance improvements:
+This is the command to install the nightly with ROCm 7.0 which might have some performance improvements:

-```pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/rocm7.2```
+```pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/rocm7.1```


 ### AMD GPUs (Experimental: Windows and Linux), RDNA 3, 3.5 and 4 only.
@@ -240,7 +240,7 @@ These have less hardware support than the builds above but they work on windows.

 RDNA 3 (RX 7000 series):

-```pip install --pre torch torchvision torchaudio --index-url https://rocm.nightlies.amd.com/v2/gfx110X-all/```
+```pip install --pre torch torchvision torchaudio --index-url https://rocm.nightlies.amd.com/v2/gfx110X-dgpu/```

 RDNA 3.5 (Strix halo/Ryzen AI Max+ 365):

--- a/app/assets/api/routes.py
+++ b/app/assets/api/routes.py
@@ -8,10 +8,10 @@ from aiohttp import web
 from pydantic import ValidationError

 import app.assets.manager as manager
+import app.assets.scanner as scanner
 from app import user_manager
 from app.assets.api import schemas_in
 from app.assets.helpers import get_query_dict
-from app.assets.scanner import seed_assets

 import folder_paths

@@ -21,9 +21,6 @@ USER_MANAGER: user_manager.UserManager | None = None
 # UUID regex (canonical hyphenated form, case-insensitive)
 UUID_RE = r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}"

-# Note to any custom node developers reading this code:
-# The assets system is not yet fully implemented, do not rely on the code in /app/assets remaining the same.
-
 def register_assets_system(app: web.Application, user_manager_instance: user_manager.UserManager) -> None:
    global USER_MANAGER
    USER_MANAGER = user_manager_instance
@@ -71,7 +68,7 @@ async def list_assets(request: web.Request) -> web.Response:
        order=q.order,
        owner_id=USER_MANAGER.get_request_user_id(request),
    )
-    return web.json_response(payload.model_dump(mode="json", exclude_none=True))
+    return web.json_response(payload.model_dump(mode="json"))


@ROUTES.get(f"/api/assets/{{id:{UUID_RE}}}")
@@ -119,33 +116,10 @@ async def download_asset_content(request: web.Request) -> web.Response:
    quoted = (filename or "").replace("\r", "").replace("\n", "").replace('"', "'")
    cd = f'{disposition}; filename="{quoted}"; filename*=UTF-8\'\'{urllib.parse.quote(filename)}'

-    file_size = os.path.getsize(abs_path)
-    logging.info(
-        "download_asset_content: path=%s, size=%d bytes (%.2f MB), content_type=%s, filename=%s",
-        abs_path,
-        file_size,
-        file_size / (1024 * 1024),
-        content_type,
-        filename,
-    )
-
-    async def file_sender():
-        chunk_size = 64 * 1024
-        with open(abs_path, "rb") as f:
-            while True:
-                chunk = f.read(chunk_size)
-                if not chunk:
-                    break
-                yield chunk
-
-    return web.Response(
-        body=file_sender(),
-        content_type=content_type,
-        headers={
-            "Content-Disposition": cd,
-            "Content-Length": str(file_size),
-        },
-    )
+    resp = web.FileResponse(abs_path)
+    resp.content_type = content_type
+    resp.headers["Content-Disposition"] = cd
+    return resp


@ROUTES.post("/api/assets/from-hash")
@@ -173,6 +147,7 @@ async def create_asset_from_hash(request: web.Request) -> web.Response:
@ROUTES.post("/api/assets")
 async def upload_asset(request: web.Request) -> web.Response:
    """Multipart/form-data endpoint for Asset uploads."""
+
    if not (request.content_type or "").lower().startswith("multipart/"):
        return _error_response(415, "UNSUPPORTED_MEDIA_TYPE", "Use multipart/form-data for uploads.")

@@ -365,6 +340,7 @@ async def update_asset(request: web.Request) -> web.Response:
        result = manager.update_asset(
            asset_info_id=asset_info_id,
            name=body.name,
+            tags=body.tags,
            user_metadata=body.user_metadata,
            owner_id=USER_MANAGER.get_request_user_id(request),
        )
@@ -380,6 +356,34 @@ async def update_asset(request: web.Request) -> web.Response:
    return web.json_response(result.model_dump(mode="json"), status=200)


+@ROUTES.put(f"/api/assets/{{id:{UUID_RE}}}/preview")
+async def set_asset_preview(request: web.Request) -> web.Response:
+    asset_info_id = str(uuid.UUID(request.match_info["id"]))
+    try:
+        body = schemas_in.SetPreviewBody.model_validate(await request.json())
+    except ValidationError as ve:
+        return _validation_error_response("INVALID_BODY", ve)
+    except Exception:
+        return _error_response(400, "INVALID_JSON", "Request body must be valid JSON.")
+
+    try:
+        result = manager.set_asset_preview(
+            asset_info_id=asset_info_id,
+            preview_asset_id=body.preview_id,
+            owner_id=USER_MANAGER.get_request_user_id(request),
+        )
+    except (PermissionError, ValueError) as ve:
+        return _error_response(404, "ASSET_NOT_FOUND", str(ve), {"id": asset_info_id})
+    except Exception:
+        logging.exception(
+            "set_asset_preview failed for asset_info_id=%s, owner_id=%s",
+            asset_info_id,
+            USER_MANAGER.get_request_user_id(request),
+        )
+        return _error_response(500, "INTERNAL", "Unexpected server error.")
+    return web.json_response(result.model_dump(mode="json"), status=200)
+
+
@ROUTES.delete(f"/api/assets/{{id:{UUID_RE}}}")
 async def delete_asset(request: web.Request) -> web.Response:
    asset_info_id = str(uuid.UUID(request.match_info["id"]))
@@ -430,7 +434,6 @@ async def get_tags(request: web.Request) -> web.Response:
    )
    return web.json_response(result.model_dump(mode="json"))

-
@ROUTES.post(f"/api/assets/{{id:{UUID_RE}}}/tags")
 async def add_asset_tags(request: web.Request) -> web.Response:
    asset_info_id = str(uuid.UUID(request.match_info["id"]))
@@ -492,23 +495,21 @@ async def delete_asset_tags(request: web.Request) -> web.Response:
    return web.json_response(result.model_dump(mode="json"), status=200)


-@ROUTES.post("/api/assets/seed")
-async def seed_assets_endpoint(request: web.Request) -> web.Response:
-    """Trigger asset seeding for specified roots (models, input, output)."""
+@ROUTES.post("/api/assets/scan/seed")
+async def seed_assets(request: web.Request) -> web.Response:
    try:
        payload = await request.json()
-        roots = payload.get("roots", ["models", "input", "output"])
    except Exception:
-        roots = ["models", "input", "output"]
-
-    valid_roots = [r for r in roots if r in ("models", "input", "output")]
-    if not valid_roots:
-        return _error_response(400, "INVALID_BODY", "No valid roots specified")
+        payload = {}

    try:
-        seed_assets(tuple(valid_roots))
-    except Exception:
-        logging.exception("seed_assets failed for roots=%s", valid_roots)
-        return _error_response(500, "INTERNAL", "Seed operation failed")
+        body = schemas_in.ScheduleAssetScanBody.model_validate(payload)
+    except ValidationError as ve:
+        return _validation_error_response("INVALID_BODY", ve)

-    return web.json_response({"seeded": valid_roots}, status=200)
+    try:
+        scanner.seed_assets(body.roots)
+    except Exception:
+        logging.exception("seed_assets failed for roots=%s", body.roots)
+        return _error_response(500, "INTERNAL", "Unexpected server error.")
+    return web.json_response({"synced": True, "roots": body.roots}, status=200)
--- a/app/assets/api/schemas_in.py
+++ b/app/assets/api/schemas_in.py
@@ -1,4 +1,5 @@
 import json
+import uuid
 from typing import Any, Literal

 from pydantic import (
@@ -10,6 +11,8 @@ from pydantic import (
    model_validator,
 )

+from app.assets.helpers import RootType
+
 class ListAssetsQuery(BaseModel):
    include_tags: list[str] = Field(default_factory=list)
    exclude_tags: list[str] = Field(default_factory=list)
@@ -58,12 +61,16 @@ class ListAssetsQuery(BaseModel):

 class UpdateAssetBody(BaseModel):
    name: str | None = None
+    tags: list[str] | None = None
    user_metadata: dict[str, Any] | None = None

    @model_validator(mode="after")
    def _at_least_one(self):
-        if self.name is None and self.user_metadata is None:
-            raise ValueError("Provide at least one of: name, user_metadata.")
+        if self.name is None and self.tags is None and self.user_metadata is None:
+            raise ValueError("Provide at least one of: name, tags, user_metadata.")
+        if self.tags is not None:
+            if not isinstance(self.tags, list) or not all(isinstance(t, str) for t in self.tags):
+                raise ValueError("Field 'tags' must be an array of strings.")
        return self


@@ -262,3 +269,26 @@ class UploadAssetSpec(BaseModel):
            if len(self.tags) < 2:
                raise ValueError("models uploads require a category tag as the second tag")
        return self
+
+
+class SetPreviewBody(BaseModel):
+    """Set or clear the preview for an AssetInfo. Provide an Asset.id or null."""
+    preview_id: str | None = None
+
+    @field_validator("preview_id", mode="before")
+    @classmethod
+    def _norm_uuid(cls, v):
+        if v is None:
+            return None
+        s = str(v).strip()
+        if not s:
+            return None
+        try:
+            uuid.UUID(s)
+        except Exception:
+            raise ValueError("preview_id must be a UUID")
+        return s
+
+
+class ScheduleAssetScanBody(BaseModel):
+    roots: list[RootType] = Field(..., min_length=1)
--- a/app/assets/database/queries.py
+++ b/app/assets/database/queries.py
@@ -230,7 +230,6 @@ def list_asset_infos_page(
            select(AssetInfoTag.asset_info_id, Tag.name)
            .join(Tag, Tag.name == AssetInfoTag.tag_name)
            .where(AssetInfoTag.asset_info_id.in_(id_list))
-            .order_by(AssetInfoTag.added_at)
        )
        for aid, tag_name in rows.all():
            tag_map[aid].append(tag_name)
--- a/app/assets/manager.py
+++ b/app/assets/manager.py
@@ -28,6 +28,7 @@ from app.assets.database.queries import (
 )
 from app.assets.helpers import resolve_destination_from_tags, ensure_within_base
 from app.assets.database.models import Asset
+import app.assets.hashing as hashing


 def _safe_sort_field(requested: str | None) -> str:
@@ -100,6 +101,7 @@ def list_assets(
                size=int(asset.size_bytes) if asset else None,
                mime_type=asset.mime_type if asset else None,
                tags=tags,
+                preview_url=f"/api/assets/{info.id}/content",
                created_at=info.created_at,
                updated_at=info.updated_at,
                last_access_time=info.last_access_time,
@@ -158,9 +160,9 @@ def resolve_asset_content_for_download(
        touch_asset_info_by_id(session, asset_info_id=asset_info_id)
        session.commit()

-        ctype = asset.mime_type or mimetypes.guess_type(info.name or abs_path)[0] or "application/octet-stream"
-        download_name = info.name or os.path.basename(abs_path)
-        return abs_path, ctype, download_name
+    ctype = asset.mime_type or mimetypes.guess_type(info.name or abs_path)[0] or "application/octet-stream"
+    download_name = info.name or os.path.basename(abs_path)
+    return abs_path, ctype, download_name


 def upload_asset_from_temp_path(
@@ -171,12 +173,7 @@ def upload_asset_from_temp_path(
    owner_id: str = "",
    expected_asset_hash: str | None = None,
 ) -> schemas_out.AssetCreated:
-    """
-    Create new asset or update existing asset from a temporary file path.
-    """
    try:
-        # NOTE: blake3 is not required right now, so this will fail if blake3 is not installed in local environment
-        import app.assets.hashing as hashing
        digest = hashing.blake3_hash(temp_path)
    except Exception as e:
        raise RuntimeError(f"failed to hash uploaded file: {e}")
@@ -271,22 +268,21 @@ def upload_asset_from_temp_path(
            raise RuntimeError("inconsistent DB state after ingest")
        info, asset = pair
        tag_names = get_asset_tags(session, asset_info_id=info.id)
-        created_result = schemas_out.AssetCreated(
-            id=info.id,
-            name=info.name,
-            asset_hash=asset.hash,
-            size=int(asset.size_bytes),
-            mime_type=asset.mime_type,
-            tags=tag_names,
-            user_metadata=info.user_metadata or {},
-            preview_id=info.preview_id,
-            created_at=info.created_at,
-            last_access_time=info.last_access_time,
-            created_new=result["asset_created"],
-        )
        session.commit()

-    return created_result
+    return schemas_out.AssetCreated(
+        id=info.id,
+        name=info.name,
+        asset_hash=asset.hash,
+        size=int(asset.size_bytes),
+        mime_type=asset.mime_type,
+        tags=tag_names,
+        user_metadata=info.user_metadata or {},
+        preview_id=info.preview_id,
+        created_at=info.created_at,
+        last_access_time=info.last_access_time,
+        created_new=result["asset_created"],
+    )


 def update_asset(
@@ -315,17 +311,16 @@ def update_asset(
        )

        tag_names = get_asset_tags(session, asset_info_id=asset_info_id)
-        result = schemas_out.AssetUpdated(
-            id=info.id,
-            name=info.name,
-            asset_hash=info.asset.hash if info.asset else None,
-            tags=tag_names,
-            user_metadata=info.user_metadata or {},
-            updated_at=info.updated_at,
-        )
        session.commit()

-    return result
+    return schemas_out.AssetUpdated(
+        id=info.id,
+        name=info.name,
+        asset_hash=info.asset.hash if info.asset else None,
+        tags=tag_names,
+        user_metadata=info.user_metadata or {},
+        updated_at=info.updated_at,
+    )


 def set_asset_preview(
@@ -351,21 +346,20 @@ def set_asset_preview(
        if not res:
            raise RuntimeError("State changed during preview update")
        info, asset, tags = res
-        result = schemas_out.AssetDetail(
-            id=info.id,
-            name=info.name,
-            asset_hash=asset.hash if asset else None,
-            size=int(asset.size_bytes) if asset and asset.size_bytes is not None else None,
-            mime_type=asset.mime_type if asset else None,
-            tags=tags,
-            user_metadata=info.user_metadata or {},
-            preview_id=info.preview_id,
-            created_at=info.created_at,
-            last_access_time=info.last_access_time,
-        )
        session.commit()

-    return result
+    return schemas_out.AssetDetail(
+        id=info.id,
+        name=info.name,
+        asset_hash=asset.hash if asset else None,
+        size=int(asset.size_bytes) if asset and asset.size_bytes is not None else None,
+        mime_type=asset.mime_type if asset else None,
+        tags=tags,
+        user_metadata=info.user_metadata or {},
+        preview_id=info.preview_id,
+        created_at=info.created_at,
+        last_access_time=info.last_access_time,
+    )


 def delete_asset_reference(*, asset_info_id: str, owner_id: str, delete_content_if_orphan: bool = True) -> bool:
@@ -425,22 +419,21 @@ def create_asset_from_hash(
            owner_id=owner_id,
        )
        tag_names = get_asset_tags(session, asset_info_id=info.id)
-        result = schemas_out.AssetCreated(
-            id=info.id,
-            name=info.name,
-            asset_hash=asset.hash,
-            size=int(asset.size_bytes),
-            mime_type=asset.mime_type,
-            tags=tag_names,
-            user_metadata=info.user_metadata or {},
-            preview_id=info.preview_id,
-            created_at=info.created_at,
-            last_access_time=info.last_access_time,
-            created_new=False,
-        )
        session.commit()

-    return result
+    return schemas_out.AssetCreated(
+        id=info.id,
+        name=info.name,
+        asset_hash=asset.hash,
+        size=int(asset.size_bytes),
+        mime_type=asset.mime_type,
+        tags=tag_names,
+        user_metadata=info.user_metadata or {},
+        preview_id=info.preview_id,
+        created_at=info.created_at,
+        last_access_time=info.last_access_time,
+        created_new=False,
+    )


 def add_tags_to_asset(
--- a/app/assets/scanner.py
+++ b/app/assets/scanner.py
@@ -27,7 +27,6 @@ def seed_assets(roots: tuple[RootType, ...], enable_logging: bool = False) -> No
    t_start = time.perf_counter()
    created = 0
    skipped_existing = 0
-    orphans_pruned = 0
    paths: list[str] = []
    try:
        existing_paths: set[str] = set()
@@ -39,11 +38,6 @@ def seed_assets(roots: tuple[RootType, ...], enable_logging: bool = False) -> No
            except Exception as e:
                logging.exception("fast DB scan failed for %s: %s", r, e)

-        try:
-            orphans_pruned = _prune_orphaned_assets(roots)
-        except Exception as e:
-            logging.exception("orphan pruning failed: %s", e)
-
        if "models" in roots:
            paths.extend(collect_models_files())
        if "input" in roots:
@@ -91,43 +85,15 @@ def seed_assets(roots: tuple[RootType, ...], enable_logging: bool = False) -> No
    finally:
        if enable_logging:
            logging.info(
-                "Assets scan(roots=%s) completed in %.3fs (created=%d, skipped_existing=%d, orphans_pruned=%d, total_seen=%d)",
+                "Assets scan(roots=%s) completed in %.3fs (created=%d, skipped_existing=%d, total_seen=%d)",
                roots,
                time.perf_counter() - t_start,
                created,
                skipped_existing,
-                orphans_pruned,
                len(paths),
            )


-def _prune_orphaned_assets(roots: tuple[RootType, ...]) -> int:
-    """Prune cache states outside configured prefixes, then delete orphaned seed assets."""
-    all_prefixes = [os.path.abspath(p) for r in roots for p in prefixes_for_root(r)]
-    if not all_prefixes:
-        return 0
-
-    def make_prefix_condition(prefix: str):
-        base = prefix if prefix.endswith(os.sep) else prefix + os.sep
-        escaped, esc = escape_like_prefix(base)
-        return AssetCacheState.file_path.like(escaped + "%", escape=esc)
-
-    matches_valid_prefix = sqlalchemy.or_(*[make_prefix_condition(p) for p in all_prefixes])
-
-    orphan_subq = (
-        sqlalchemy.select(Asset.id)
-        .outerjoin(AssetCacheState, AssetCacheState.asset_id == Asset.id)
-        .where(Asset.hash.is_(None), AssetCacheState.id.is_(None))
-    ).scalar_subquery()
-
-    with create_session() as sess:
-        sess.execute(sqlalchemy.delete(AssetCacheState).where(~matches_valid_prefix))
-        sess.execute(sqlalchemy.delete(AssetInfo).where(AssetInfo.asset_id.in_(orphan_subq)))
-        result = sess.execute(sqlalchemy.delete(Asset).where(Asset.id.in_(orphan_subq)))
-        sess.commit()
-        return result.rowcount
-
-
 def _fast_db_consistency_pass(
    root: RootType,
    *,
--- a/app/node_replace_manager.py
+++ b/app/node_replace_manager.py
@@ -1,105 +0,0 @@
-from __future__ import annotations
-
-from aiohttp import web
-
-from typing import TYPE_CHECKING, TypedDict
-if TYPE_CHECKING:
-    from comfy_api.latest._io_public import NodeReplace
-
-from comfy_execution.graph_utils import is_link
-import nodes
-
-class NodeStruct(TypedDict):
-    inputs: dict[str, str | int | float | bool | tuple[str, int]]
-    class_type: str
-    _meta: dict[str, str]
-
-def copy_node_struct(node_struct: NodeStruct, empty_inputs: bool = False) -> NodeStruct:
-    new_node_struct = node_struct.copy()
-    if empty_inputs:
-        new_node_struct["inputs"] = {}
-    else:
-        new_node_struct["inputs"] = node_struct["inputs"].copy()
-    new_node_struct["_meta"] = node_struct["_meta"].copy()
-    return new_node_struct
-
-
-class NodeReplaceManager:
-    """Manages node replacement registrations."""
-
-    def __init__(self):
-        self._replacements: dict[str, list[NodeReplace]] = {}
-
-    def register(self, node_replace: NodeReplace):
-        """Register a node replacement mapping."""
-        self._replacements.setdefault(node_replace.old_node_id, []).append(node_replace)
-
-    def get_replacement(self, old_node_id: str) -> list[NodeReplace] | None:
-        """Get replacements for an old node ID."""
-        return self._replacements.get(old_node_id)
-
-    def has_replacement(self, old_node_id: str) -> bool:
-        """Check if a replacement exists for an old node ID."""
-        return old_node_id in self._replacements
-
-    def apply_replacements(self, prompt: dict[str, NodeStruct]):
-        connections: dict[str, list[tuple[str, str, int]]] = {}
-        need_replacement: set[str] = set()
-        for node_number, node_struct in prompt.items():
-            class_type = node_struct["class_type"]
-            # need replacement if not in NODE_CLASS_MAPPINGS and has replacement
-            if class_type not in nodes.NODE_CLASS_MAPPINGS.keys() and self.has_replacement(class_type):
-                need_replacement.add(node_number)
-            # keep track of connections
-            for input_id, input_value in node_struct["inputs"].items():
-                if is_link(input_value):
-                    conn_number = input_value[0]
-                    connections.setdefault(conn_number, []).append((node_number, input_id, input_value[1]))
-        for node_number in need_replacement:
-            node_struct = prompt[node_number]
-            class_type = node_struct["class_type"]
-            replacements = self.get_replacement(class_type)
-            if replacements is None:
-                continue
-            # just use the first replacement
-            replacement = replacements[0]
-            new_node_id = replacement.new_node_id
-            # if replacement is not a valid node, skip trying to replace it as will only cause confusion
-            if new_node_id not in nodes.NODE_CLASS_MAPPINGS.keys():
-                continue
-            # first, replace node id (class_type)
-            new_node_struct = copy_node_struct(node_struct, empty_inputs=True)
-            new_node_struct["class_type"] = new_node_id
-            # TODO: consider replacing display_name in _meta as well for error reporting purposes; would need to query node schema
-            # second, replace inputs
-            if replacement.input_mapping is not None:
-                for input_map in replacement.input_mapping:
-                    if "set_value" in input_map:
-                        new_node_struct["inputs"][input_map["new_id"]] = input_map["set_value"]
-                    elif "old_id" in input_map:
-                        new_node_struct["inputs"][input_map["new_id"]] = node_struct["inputs"][input_map["old_id"]]
-            # finalize input replacement
-            prompt[node_number] = new_node_struct
-            # third, replace outputs
-            if replacement.output_mapping is not None:
-                # re-mapping outputs requires changing the input values of nodes that receive connections from this one
-                if node_number in connections:
-                    for conns in connections[node_number]:
-                        conn_node_number, conn_input_id, old_output_idx = conns
-                        for output_map in replacement.output_mapping:
-                            if output_map["old_idx"] == old_output_idx:
-                                new_output_idx = output_map["new_idx"]
-                                previous_input = prompt[conn_node_number]["inputs"][conn_input_id]
-                                previous_input[1] = new_output_idx
-
-    def as_dict(self):
-        """Serialize all replacements to dict."""
-        return {
-            k: [v.as_dict() for v in v_list]
-            for k, v_list in self._replacements.items()
-        }
-
-    def add_routes(self, routes):
-        @routes.get("/node_replacements")
-        async def get_node_replacements(request):
-            return web.json_response(self.as_dict())
--- a/blueprints/.glsl/Brightness_and_Contrast_1.frag
+++ b/blueprints/.glsl/Brightness_and_Contrast_1.frag
@@ -1,44 +0,0 @@
-#version 300 es
-precision highp float;
-
-uniform sampler2D u_image0;
-uniform float u_float0; // Brightness slider -100..100
-uniform float u_float1; // Contrast slider -100..100
-
-in vec2 v_texCoord;
-out vec4 fragColor;
-
-const float MID_GRAY = 0.18;  // 18% reflectance
-
-// sRGB gamma 2.2 approximation
-vec3 srgbToLinear(vec3 c) {
-    return pow(max(c, 0.0), vec3(2.2));
-}
-
-vec3 linearToSrgb(vec3 c) {
-    return pow(max(c, 0.0), vec3(1.0/2.2));
-}
-
-float mapBrightness(float b) {
-    return clamp(b / 100.0, -1.0, 1.0);
-}
-
-float mapContrast(float c) {
-    return clamp(c / 100.0 + 1.0, 0.0, 2.0);
-}
-
-void main() {
-    vec4 orig = texture(u_image0, v_texCoord);
-
-    float brightness = mapBrightness(u_float0);
-    float contrast   = mapContrast(u_float1);
-
-    vec3 lin = srgbToLinear(orig.rgb);
-
-    lin = (lin - MID_GRAY) * contrast + brightness + MID_GRAY;
-
-    // Convert back to sRGB
-    vec3 result = linearToSrgb(clamp(lin, 0.0, 1.0));
-
-    fragColor = vec4(result, orig.a);
-}
--- a/blueprints/.glsl/Chromatic_Aberration_16.frag
+++ b/blueprints/.glsl/Chromatic_Aberration_16.frag
@@ -1,72 +0,0 @@
-#version 300 es
-precision highp float;
-
-uniform sampler2D u_image0;
-uniform vec2 u_resolution;
-uniform int u_int0;      // Mode
-uniform float u_float0;  // Amount (0 to 100)
-
-in vec2 v_texCoord;
-out vec4 fragColor;
-
-const int MODE_LINEAR   = 0;
-const int MODE_RADIAL   = 1;
-const int MODE_BARREL   = 2;
-const int MODE_SWIRL    = 3;
-const int MODE_DIAGONAL = 4;
-
-const float AMOUNT_SCALE = 0.0005;
-const float RADIAL_MULT = 4.0;
-const float BARREL_MULT = 8.0;
-const float INV_SQRT2 = 0.70710678118;
-
-void main() {
-    vec2 uv = v_texCoord;
-    vec4 original = texture(u_image0, uv);
-
-    float amount = u_float0 * AMOUNT_SCALE;
-
-    if (amount < 0.000001) {
-        fragColor = original;
-        return;
-    }
-
-    // Aspect-corrected coordinates for circular effects
-    float aspect = u_resolution.x / u_resolution.y;
-    vec2 centered = uv - 0.5;
-    vec2 corrected = vec2(centered.x * aspect, centered.y);
-    float r = length(corrected);
-    vec2 dir = r > 0.0001 ? corrected / r : vec2(0.0);
-    vec2 offset = vec2(0.0);
-
-    if (u_int0 == MODE_LINEAR) {
-        // Horizontal shift (no aspect correction needed)
-        offset = vec2(amount, 0.0);
-    }
-    else if (u_int0 == MODE_RADIAL) {
-        // Outward from center, stronger at edges
-        offset = dir * r * amount * RADIAL_MULT;
-        offset.x /= aspect;  // Convert back to UV space
-    }
-    else if (u_int0 == MODE_BARREL) {
-        // Lens distortion simulation (r² falloff)
-        offset = dir * r * r * amount * BARREL_MULT;
-        offset.x /= aspect;  // Convert back to UV space
-    }
-    else if (u_int0 == MODE_SWIRL) {
-        // Perpendicular to radial (rotational aberration)
-        vec2 perp = vec2(-dir.y, dir.x);
-        offset = perp * r * amount * RADIAL_MULT;
-        offset.x /= aspect;  // Convert back to UV space
-    }
-    else if (u_int0 == MODE_DIAGONAL) {
-        // 45° offset (no aspect correction needed)
-        offset = vec2(amount, amount) * INV_SQRT2;
-    }
-    
-    float red = texture(u_image0, uv + offset).r;
-    float green = original.g;
-    float blue = texture(u_image0, uv - offset).b;
-    
-    fragColor = vec4(red, green, blue, original.a);
-}
--- a/blueprints/.glsl/Color_Adjustment_15.frag
+++ b/blueprints/.glsl/Color_Adjustment_15.frag
@@ -1,78 +0,0 @@
-#version 300 es
-precision highp float;
-
-uniform sampler2D u_image0;
-uniform float u_float0; // temperature (-100 to 100)
-uniform float u_float1; // tint (-100 to 100)
-uniform float u_float2; // vibrance (-100 to 100)
-uniform float u_float3; // saturation (-100 to 100)
-
-in vec2 v_texCoord;
-out vec4 fragColor;
-
-const float INPUT_SCALE = 0.01;
-const float TEMP_TINT_PRIMARY = 0.3;
-const float TEMP_TINT_SECONDARY = 0.15;
-const float VIBRANCE_BOOST = 2.0;
-const float SATURATION_BOOST = 2.0;
-const float SKIN_PROTECTION = 0.5;
-const float EPSILON = 0.001;
-const vec3 LUMA_WEIGHTS = vec3(0.299, 0.587, 0.114);
-
-void main() {
-    vec4 tex = texture(u_image0, v_texCoord);
-    vec3 color = tex.rgb;
-    
-    // Scale inputs: -100/100 → -1/1
-    float temperature = u_float0 * INPUT_SCALE;
-    float tint = u_float1 * INPUT_SCALE;
-    float vibrance = u_float2 * INPUT_SCALE;
-    float saturation = u_float3 * INPUT_SCALE;
-    
-    // Temperature (warm/cool): positive = warm, negative = cool
-    color.r += temperature * TEMP_TINT_PRIMARY;
-    color.b -= temperature * TEMP_TINT_PRIMARY;
-    
-    // Tint (green/magenta): positive = green, negative = magenta
-    color.g += tint * TEMP_TINT_PRIMARY;
-    color.r -= tint * TEMP_TINT_SECONDARY;
-    color.b -= tint * TEMP_TINT_SECONDARY;
-    
-    // Single clamp after temperature/tint
-    color = clamp(color, 0.0, 1.0);
-    
-    // Vibrance with skin protection
-    if (vibrance != 0.0) {
-        float maxC = max(color.r, max(color.g, color.b));
-        float minC = min(color.r, min(color.g, color.b));
-        float sat = maxC - minC;
-        float gray = dot(color, LUMA_WEIGHTS);
-        
-        if (vibrance < 0.0) {
-            // Desaturate: -100 → gray
-            color = mix(vec3(gray), color, 1.0 + vibrance);
-        } else {
-            // Boost less saturated colors more
-            float vibranceAmt = vibrance * (1.0 - sat);
-            
-            // Branchless skin tone protection
-            float isWarmTone = step(color.b, color.g) * step(color.g, color.r);
-            float warmth = (color.r - color.b) / max(maxC, EPSILON);
-            float skinTone = isWarmTone * warmth * sat * (1.0 - sat);
-            vibranceAmt *= (1.0 - skinTone * SKIN_PROTECTION);
-            
-            color = mix(vec3(gray), color, 1.0 + vibranceAmt * VIBRANCE_BOOST);
-        }
-    }
-    
-    // Saturation
-    if (saturation != 0.0) {
-        float gray = dot(color, LUMA_WEIGHTS);
-        float satMix = saturation < 0.0
-            ? 1.0 + saturation                      // -100 → gray
-            : 1.0 + saturation * SATURATION_BOOST;  // +100 → 3x boost
-        color = mix(vec3(gray), color, satMix);
-    }
-    
-    fragColor = vec4(clamp(color, 0.0, 1.0), tex.a);
-}
--- a/blueprints/.glsl/Edge-Preserving_Blur_128.frag
+++ b/blueprints/.glsl/Edge-Preserving_Blur_128.frag
@@ -1,94 +0,0 @@
-#version 300 es
-precision highp float;
-
-uniform sampler2D u_image0;
-uniform float u_float0;   // Blur radius (0–20, default ~5)
-uniform float u_float1;   // Edge threshold (0–100, default ~30)
-uniform int u_int0;       // Step size (0/1 = every pixel, 2+ = skip pixels)
-
-in vec2 v_texCoord;
-out vec4 fragColor;
-
-const int MAX_RADIUS = 20;
-const float EPSILON = 0.0001;
-
-// Perceptual luminance
-float getLuminance(vec3 rgb) {
-    return dot(rgb, vec3(0.299, 0.587, 0.114));
-}
-
-vec4 bilateralFilter(vec2 uv, vec2 texelSize, int radius,
-                     float sigmaSpatial, float sigmaColor)
-{
-    vec4 center = texture(u_image0, uv);
-    vec3 centerRGB = center.rgb;
-
-    float invSpatial2 = -0.5 / (sigmaSpatial * sigmaSpatial);
-    float invColor2   = -0.5 / (sigmaColor * sigmaColor + EPSILON);
-
-    vec3 sumRGB = vec3(0.0);
-    float sumWeight = 0.0;
-
-    int step = max(u_int0, 1);
-    float radius2 = float(radius * radius);
-
-    for (int dy = -MAX_RADIUS; dy <= MAX_RADIUS; dy++) {
-        if (dy < -radius || dy > radius) continue;
-        if (abs(dy) % step != 0) continue;
-
-        for (int dx = -MAX_RADIUS; dx <= MAX_RADIUS; dx++) {
-            if (dx < -radius || dx > radius) continue;
-            if (abs(dx) % step != 0) continue;
-
-            vec2 offset = vec2(float(dx), float(dy));
-            float dist2 = dot(offset, offset);
-            if (dist2 > radius2) continue;
-
-            vec3 sampleRGB = texture(u_image0, uv + offset * texelSize).rgb;
-
-            // Spatial Gaussian
-            float spatialWeight = exp(dist2 * invSpatial2);
-
-            // Perceptual color distance (weighted RGB)
-            vec3 diff = sampleRGB - centerRGB;
-            float colorDist = dot(diff * diff, vec3(0.299, 0.587, 0.114));
-            float colorWeight = exp(colorDist * invColor2);
-
-            float w = spatialWeight * colorWeight;
-            sumRGB += sampleRGB * w;
-            sumWeight += w;
-        }
-    }
-
-    vec3 resultRGB = sumRGB / max(sumWeight, EPSILON);
-    return vec4(resultRGB, center.a); // preserve center alpha
-}
-
-void main() {
-    vec2 texelSize = 1.0 / vec2(textureSize(u_image0, 0));
-
-    float radiusF = clamp(u_float0, 0.0, float(MAX_RADIUS));
-    int radius = int(radiusF + 0.5);
-
-    if (radius == 0) {
-        fragColor = texture(u_image0, v_texCoord);
-        return;
-    }
-
-    // Edge threshold → color sigma
-    // Squared curve for better low-end control
-    float t = clamp(u_float1, 0.0, 100.0) / 100.0;
-    t *= t;
-    float sigmaColor = mix(0.01, 0.5, t);
-
-    // Spatial sigma tied to radius
-    float sigmaSpatial = max(radiusF * 0.75, 0.5);
-
-    fragColor = bilateralFilter(
-        v_texCoord,
-        texelSize,
-        radius,
-        sigmaSpatial,
-        sigmaColor
-    );
-}
--- a/blueprints/.glsl/Film_Grain_15.frag
+++ b/blueprints/.glsl/Film_Grain_15.frag
@@ -1,124 +0,0 @@
-#version 300 es
-precision highp float;
-
-uniform sampler2D u_image0;
-uniform vec2 u_resolution;
-uniform float u_float0; // grain amount      [0.0 – 1.0]   typical: 0.2–0.8
-uniform float u_float1; // grain size        [0.3 – 3.0]   lower = finer grain
-uniform float u_float2; // color amount      [0.0 – 1.0]   0 = monochrome, 1 = RGB grain
-uniform float u_float3; // luminance bias    [0.0 – 1.0]   0 = uniform, 1 = shadows only
-uniform int   u_int0;   // noise mode        [0 or 1]      0 = smooth, 1 = grainy
-
-in vec2 v_texCoord;
-layout(location = 0) out vec4 fragColor0;
-
-// High-quality integer hash (pcg-like)
-uint pcg(uint v) {
-    uint state = v * 747796405u + 2891336453u;
-    uint word = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u;
-    return (word >> 22u) ^ word;
-}
-
-// 2D -> 1D hash input
-uint hash2d(uvec2 p) {
-    return pcg(p.x + pcg(p.y));
-}
-
-// Hash to float [0, 1]
-float hashf(uvec2 p) {
-    return float(hash2d(p)) / float(0xffffffffu);
-}
-
-// Hash to float with offset (for RGB channels)
-float hashf(uvec2 p, uint offset) {
-    return float(pcg(hash2d(p) + offset)) / float(0xffffffffu);
-}
-
-// Convert uniform [0,1] to roughly Gaussian distribution
-// Using simple approximation: average of multiple samples
-float toGaussian(uvec2 p) {
-    float sum = hashf(p, 0u) + hashf(p, 1u) + hashf(p, 2u) + hashf(p, 3u);
-    return (sum - 2.0) * 0.7;  // Centered, scaled
-}
-
-float toGaussian(uvec2 p, uint offset) {
-    float sum = hashf(p, offset) + hashf(p, offset + 1u) 
-              + hashf(p, offset + 2u) + hashf(p, offset + 3u);
-    return (sum - 2.0) * 0.7;
-}
-
-// Smooth noise with better interpolation
-float smoothNoise(vec2 p) {
-    vec2 i = floor(p);
-    vec2 f = fract(p);
-    
-    // Quintic interpolation (less banding than cubic)
-    f = f * f * f * (f * (f * 6.0 - 15.0) + 10.0);
-    
-    uvec2 ui = uvec2(i);
-    float a = toGaussian(ui);
-    float b = toGaussian(ui + uvec2(1u, 0u));
-    float c = toGaussian(ui + uvec2(0u, 1u));
-    float d = toGaussian(ui + uvec2(1u, 1u));
-    
-    return mix(mix(a, b, f.x), mix(c, d, f.x), f.y);
-}
-
-float smoothNoise(vec2 p, uint offset) {
-    vec2 i = floor(p);
-    vec2 f = fract(p);
-    
-    f = f * f * f * (f * (f * 6.0 - 15.0) + 10.0);
-    
-    uvec2 ui = uvec2(i);
-    float a = toGaussian(ui, offset);
-    float b = toGaussian(ui + uvec2(1u, 0u), offset);
-    float c = toGaussian(ui + uvec2(0u, 1u), offset);
-    float d = toGaussian(ui + uvec2(1u, 1u), offset);
-    
-    return mix(mix(a, b, f.x), mix(c, d, f.x), f.y);
-}
-
-void main() {
-    vec4 color = texture(u_image0, v_texCoord);
-    
-    // Luminance (Rec.709)
-    float luma = dot(color.rgb, vec3(0.2126, 0.7152, 0.0722));
-    
-    // Grain UV (resolution-independent)
-    vec2 grainUV = v_texCoord * u_resolution / max(u_float1, 0.01);
-    uvec2 grainPixel = uvec2(grainUV);
-    
-    float g;
-    vec3 grainRGB;
-    
-    if (u_int0 == 1) {
-        // Grainy mode: pure hash noise (no interpolation = no banding)
-        g = toGaussian(grainPixel);
-        grainRGB = vec3(
-            toGaussian(grainPixel, 100u),
-            toGaussian(grainPixel, 200u),
-            toGaussian(grainPixel, 300u)
-        );
-    } else {
-        // Smooth mode: interpolated with quintic curve
-        g = smoothNoise(grainUV);
-        grainRGB = vec3(
-            smoothNoise(grainUV, 100u),
-            smoothNoise(grainUV, 200u),
-            smoothNoise(grainUV, 300u)
-        );
-    }
-    
-    // Luminance weighting (less grain in highlights)
-    float lumWeight = mix(1.0, 1.0 - luma, clamp(u_float3, 0.0, 1.0));
-    
-    // Strength
-    float strength = u_float0 * 0.15;
-    
-    // Color vs monochrome grain
-    vec3 grainColor = mix(vec3(g), grainRGB, clamp(u_float2, 0.0, 1.0));
-    
-    color.rgb += grainColor * strength * lumWeight;
-    fragColor0 = vec4(clamp(color.rgb, 0.0, 1.0), color.a);
-}
--- a/blueprints/.glsl/Glow_30.frag
+++ b/blueprints/.glsl/Glow_30.frag
@@ -1,133 +0,0 @@
-#version 300 es
-precision mediump float;
-
-uniform sampler2D u_image0;
-uniform vec2 u_resolution;
-uniform int u_int0;      // Blend mode
-uniform int u_int1;      // Color tint
-uniform float u_float0;  // Intensity
-uniform float u_float1;  // Radius
-uniform float u_float2;  // Threshold
-
-in vec2 v_texCoord;
-out vec4 fragColor;
-
-const int BLEND_ADD      = 0;
-const int BLEND_SCREEN   = 1;
-const int BLEND_SOFT     = 2;
-const int BLEND_OVERLAY  = 3;
-const int BLEND_LIGHTEN  = 4;
-
-const float GOLDEN_ANGLE = 2.39996323;
-const int MAX_SAMPLES = 48;
-const vec3 LUMA = vec3(0.299, 0.587, 0.114);
-
-float hash(vec2 p) {
-    p = fract(p * vec2(123.34, 456.21));
-    p += dot(p, p + 45.32);
-    return fract(p.x * p.y);
-}
-
-vec3 hexToRgb(int h) {
-    return vec3(
-        float((h >> 16) & 255),
-        float((h >> 8) & 255),
-        float(h & 255)
-    ) * (1.0 / 255.0);
-}
-
-vec3 blend(vec3 base, vec3 glow, int mode) {
-    if (mode == BLEND_SCREEN) {
-        return 1.0 - (1.0 - base) * (1.0 - glow);
-    }
-    if (mode == BLEND_SOFT) {
-        return mix(
-            base - (1.0 - 2.0 * glow) * base * (1.0 - base),
-            base + (2.0 * glow - 1.0) * (sqrt(base) - base),
-            step(0.5, glow)
-        );
-    }
-    if (mode == BLEND_OVERLAY) {
-        return mix(
-            2.0 * base * glow,
-            1.0 - 2.0 * (1.0 - base) * (1.0 - glow),
-            step(0.5, base)
-        );
-    }
-    if (mode == BLEND_LIGHTEN) {
-        return max(base, glow);
-    }
-    return base + glow;
-}
-
-void main() {
-    vec4 original = texture(u_image0, v_texCoord);
-    
-    float intensity = u_float0 * 0.05;
-    float radius = u_float1 * u_float1 * 0.012;
-    
-    if (intensity < 0.001 || radius < 0.1) {
-        fragColor = original;
-        return;
-    }
-    
-    float threshold = 1.0 - u_float2 * 0.01;
-    float t0 = threshold - 0.15;
-    float t1 = threshold + 0.15;
-    
-    vec2 texelSize = 1.0 / u_resolution;
-    float radius2 = radius * radius;
-    
-    float sampleScale = clamp(radius * 0.75, 0.35, 1.0);
-    int samples = int(float(MAX_SAMPLES) * sampleScale);
-    
-    float noise = hash(gl_FragCoord.xy);
-    float angleOffset = noise * GOLDEN_ANGLE;
-    float radiusJitter = 0.85 + noise * 0.3;
-    
-    float ca = cos(GOLDEN_ANGLE);
-    float sa = sin(GOLDEN_ANGLE);
-    vec2 dir = vec2(cos(angleOffset), sin(angleOffset));
-    
-    vec3 glow = vec3(0.0);
-    float totalWeight = 0.0;
-    
-    // Center tap
-    float centerMask = smoothstep(t0, t1, dot(original.rgb, LUMA));
-    glow += original.rgb * centerMask * 2.0;
-    totalWeight += 2.0;
-    
-    for (int i = 1; i < MAX_SAMPLES; i++) {
-        if (i >= samples) break;
-        
-        float fi = float(i);
-        float dist = sqrt(fi / float(samples)) * radius * radiusJitter;
-        
-        vec2 offset = dir * dist * texelSize;
-        vec3 c = texture(u_image0, v_texCoord + offset).rgb;
-        float mask = smoothstep(t0, t1, dot(c, LUMA));
-        
-        float w = 1.0 - (dist * dist) / (radius2 * 1.5);
-        w = max(w, 0.0);
-        w *= w;
-        
-        glow += c * mask * w;
-        totalWeight += w;
-        
-        dir = vec2(
-            dir.x * ca - dir.y * sa,
-            dir.x * sa + dir.y * ca
-        );
-    }
-    
-    glow *= intensity / max(totalWeight, 0.001);
-    
-    if (u_int1 > 0) {
-        glow *= hexToRgb(u_int1);
-    }
-    
-    vec3 result = blend(original.rgb, glow, u_int0);
-    result += (noise - 0.5) * (1.0 / 255.0);
-    
-    fragColor = vec4(clamp(result, 0.0, 1.0), original.a);
-}
--- a/blueprints/.glsl/Hue_and_Saturation_1.frag
+++ b/blueprints/.glsl/Hue_and_Saturation_1.frag
@@ -1,222 +0,0 @@
-#version 300 es
-precision highp float;
-
-uniform sampler2D u_image0;
-uniform int u_int0;      // Mode: 0=Master, 1=Reds, 2=Yellows, 3=Greens, 4=Cyans, 5=Blues, 6=Magentas, 7=Colorize
-uniform int u_int1;      // Color Space: 0=HSL, 1=HSB/HSV
-uniform float u_float0;  // Hue (-180 to 180)
-uniform float u_float1;  // Saturation (-100 to 100)
-uniform float u_float2;  // Lightness/Brightness (-100 to 100)
-uniform float u_float3;  // Overlap (0 to 100) - feathering between adjacent color ranges
-
-in vec2 v_texCoord;
-out vec4 fragColor;
-
-// Color range modes
-const int MODE_MASTER   = 0;
-const int MODE_RED      = 1;
-const int MODE_YELLOW   = 2;
-const int MODE_GREEN    = 3;
-const int MODE_CYAN     = 4;
-const int MODE_BLUE     = 5;
-const int MODE_MAGENTA  = 6;
-const int MODE_COLORIZE = 7;
-
-// Color space modes
-const int COLORSPACE_HSL = 0;
-const int COLORSPACE_HSB = 1;
-
-const float EPSILON = 0.0001;
-
-//=============================================================================
-// RGB <-> HSL Conversions
-//=============================================================================
-
-vec3 rgb2hsl(vec3 c) {
-    float maxC = max(max(c.r, c.g), c.b);
-    float minC = min(min(c.r, c.g), c.b);
-    float delta = maxC - minC;
-
-    float h = 0.0;
-    float s = 0.0;
-    float l = (maxC + minC) * 0.5;
-
-    if (delta > EPSILON) {
-        s = l < 0.5
-            ? delta / (maxC + minC)
-            : delta / (2.0 - maxC - minC);
-
-        if (maxC == c.r) {
-            h = (c.g - c.b) / delta + (c.g < c.b ? 6.0 : 0.0);
-        } else if (maxC == c.g) {
-            h = (c.b - c.r) / delta + 2.0;
-        } else {
-            h = (c.r - c.g) / delta + 4.0;
-        }
-        h /= 6.0;
-    }
-
-    return vec3(h, s, l);
-}
-
-float hue2rgb(float p, float q, float t) {
-    t = fract(t);
-    if (t < 1.0/6.0) return p + (q - p) * 6.0 * t;
-    if (t < 0.5)       return q;
-    if (t < 2.0/3.0)   return p + (q - p) * (2.0/3.0 - t) * 6.0;
-    return p;
-}
-
-vec3 hsl2rgb(vec3 hsl) {
-    if (hsl.y < EPSILON) return vec3(hsl.z);
-
-    float q = hsl.z < 0.5
-        ? hsl.z * (1.0 + hsl.y)
-        : hsl.z + hsl.y - hsl.z * hsl.y;
-    float p = 2.0 * hsl.z - q;
-
-    return vec3(
-        hue2rgb(p, q, hsl.x + 1.0/3.0),
-        hue2rgb(p, q, hsl.x),
-        hue2rgb(p, q, hsl.x - 1.0/3.0)
-    );
-}
-
-vec3 rgb2hsb(vec3 c) {
-    float maxC = max(max(c.r, c.g), c.b);
-    float minC = min(min(c.r, c.g), c.b);
-    float delta = maxC - minC;
-
-    float h = 0.0;
-    float s = (maxC > EPSILON) ? delta / maxC : 0.0;
-    float b = maxC;
-
-    if (delta > EPSILON) {
-        if (maxC == c.r) {
-            h = (c.g - c.b) / delta + (c.g < c.b ? 6.0 : 0.0);
-        } else if (maxC == c.g) {
-            h = (c.b - c.r) / delta + 2.0;
-        } else {
-            h = (c.r - c.g) / delta + 4.0;
-        }
-        h /= 6.0;
-    }
-
-    return vec3(h, s, b);
-}
-
-vec3 hsb2rgb(vec3 hsb) {
-    vec3 rgb = clamp(abs(mod(hsb.x * 6.0 + vec3(0.0, 4.0, 2.0), 6.0) - 3.0) - 1.0, 0.0, 1.0);
-    return hsb.z * mix(vec3(1.0), rgb, hsb.y);
-}
-
-//=============================================================================
-// Color Range Weight Calculation
-//=============================================================================
-
-float hueDistance(float a, float b) {
-    float d = abs(a - b);
-    return min(d, 1.0 - d);
-}
-
-float getHueWeight(float hue, float center, float overlap) {
-    float baseWidth = 1.0 / 6.0;
-    float feather = baseWidth * overlap;
-
-    float d = hueDistance(hue, center);
-
-    float inner = baseWidth * 0.5;
-    float outer = inner + feather;
-
-    return 1.0 - smoothstep(inner, outer, d);
-}
-
-float getModeWeight(float hue, int mode, float overlap) {
-    if (mode == MODE_MASTER || mode == MODE_COLORIZE) return 1.0;
-
-    if (mode == MODE_RED) {
-        return max(
-            getHueWeight(hue, 0.0, overlap),
-            getHueWeight(hue, 1.0, overlap)
-        );
-    }
-
-    float center = float(mode - 1) / 6.0;
-    return getHueWeight(hue, center, overlap);
-}
-
-//=============================================================================
-// Adjustment Functions
-//=============================================================================
-
-float adjustLightness(float l, float amount) {
-    return amount > 0.0
-        ? l + (1.0 - l) * amount
-        : l + l * amount;
-}
-
-float adjustBrightness(float b, float amount) {
-    return clamp(b + amount, 0.0, 1.0);
-}
-
-float adjustSaturation(float s, float amount) {
-    return amount > 0.0
-        ? s + (1.0 - s) * amount
-        : s + s * amount;
-}
-
-vec3 colorize(vec3 rgb, float hue, float sat, float light) {
-    float lum = dot(rgb, vec3(0.299, 0.587, 0.114));
-    float l = adjustLightness(lum, light);
-
-    vec3 hsl = vec3(fract(hue), clamp(sat, 0.0, 1.0), clamp(l, 0.0, 1.0));
-    return hsl2rgb(hsl);
-}
-
-//=============================================================================
-// Main
-//=============================================================================
-
-void main() {
-    vec4 original = texture(u_image0, v_texCoord);
-
-    float hueShift   = u_float0 / 360.0;   // -180..180 -> -0.5..0.5
-    float satAmount  = u_float1 / 100.0;   // -100..100 -> -1..1
-    float lightAmount= u_float2 / 100.0;   // -100..100 -> -1..1
-    float overlap    = u_float3 / 100.0;   // 0..100 -> 0..1
-
-    vec3 result;
-
-    if (u_int0 == MODE_COLORIZE) {
-        result = colorize(original.rgb, hueShift, satAmount, lightAmount);
-        fragColor = vec4(result, original.a);
-        return;
-    }
-
-    vec3 hsx = (u_int1 == COLORSPACE_HSL)
-        ? rgb2hsl(original.rgb)
-        : rgb2hsb(original.rgb);
-
-    float weight = getModeWeight(hsx.x, u_int0, overlap);
-
-    if (u_int0 != MODE_MASTER && hsx.y < EPSILON) {
-        weight = 0.0;
-    }
-
-    if (weight > EPSILON) {
-        float h = fract(hsx.x + hueShift * weight);
-        float s = clamp(adjustSaturation(hsx.y, satAmount * weight), 0.0, 1.0);
-        float v = (u_int1 == COLORSPACE_HSL)
-            ? clamp(adjustLightness(hsx.z, lightAmount * weight), 0.0, 1.0)
-            : clamp(adjustBrightness(hsx.z, lightAmount * weight), 0.0, 1.0);
-
-        vec3 adjusted = vec3(h, s, v);
-        result = (u_int1 == COLORSPACE_HSL)
-            ? hsl2rgb(adjusted)
-            : hsb2rgb(adjusted);
-    } else {
-        result = original.rgb;
-    }
-
-    fragColor = vec4(result, original.a);
-}
--- a/blueprints/.glsl/Image_Blur_1.frag
+++ b/blueprints/.glsl/Image_Blur_1.frag
@@ -1,111 +0,0 @@
-#version 300 es
-#pragma passes 2
-precision highp float;
-
-// Blur type constants
-const int BLUR_GAUSSIAN = 0;
-const int BLUR_BOX = 1;
-const int BLUR_RADIAL = 2;
-
-// Radial blur config
-const int RADIAL_SAMPLES = 12;
-const float RADIAL_STRENGTH = 0.0003;
-
-uniform sampler2D u_image0;
-uniform vec2 u_resolution;
-uniform int u_int0;      // Blur type (BLUR_GAUSSIAN, BLUR_BOX, BLUR_RADIAL)
-uniform float u_float0;  // Blur radius/amount
-uniform int u_pass;      // Pass index (0 = horizontal, 1 = vertical)
-
-in vec2 v_texCoord;
-layout(location = 0) out vec4 fragColor0;
-
-float gaussian(float x, float sigma) {
-    return exp(-(x * x) / (2.0 * sigma * sigma));
-}
-
-void main() {
-    vec2 texelSize = 1.0 / u_resolution;
-    float radius = max(u_float0, 0.0);
-
-    // Radial (angular) blur - single pass, doesn't use separable
-    if (u_int0 == BLUR_RADIAL) {
-        // Only execute on first pass
-        if (u_pass > 0) {
-            fragColor0 = texture(u_image0, v_texCoord);
-            return;
-        }
-
-        vec2 center = vec2(0.5);
-        vec2 dir = v_texCoord - center;
-        float dist = length(dir);
-
-        if (dist < 1e-4) {
-            fragColor0 = texture(u_image0, v_texCoord);
-            return;
-        }
-
-        vec4 sum = vec4(0.0);
-        float totalWeight = 0.0;
-        float angleStep = radius * RADIAL_STRENGTH;
-
-        dir /= dist;
-
-        float cosStep = cos(angleStep);
-        float sinStep = sin(angleStep);
-
-        float negAngle = -float(RADIAL_SAMPLES) * angleStep;
-        vec2 rotDir = vec2(
-            dir.x * cos(negAngle) - dir.y * sin(negAngle),
-            dir.x * sin(negAngle) + dir.y * cos(negAngle)
-        );
-
-        for (int i = -RADIAL_SAMPLES; i <= RADIAL_SAMPLES; i++) {
-            vec2 uv = center + rotDir * dist;
-            float w = 1.0 - abs(float(i)) / float(RADIAL_SAMPLES);
-            sum += texture(u_image0, uv) * w;
-            totalWeight += w;
-
-            rotDir = vec2(
-                rotDir.x * cosStep - rotDir.y * sinStep,
-                rotDir.x * sinStep + rotDir.y * cosStep
-            );
-        }
-
-        fragColor0 = sum / max(totalWeight, 0.001);
-        return;
-    }
-
-    // Separable Gaussian / Box blur
-    int samples = int(ceil(radius));
-
-    if (samples == 0) {
-        fragColor0 = texture(u_image0, v_texCoord);
-        return;
-    }
-
-    // Direction: pass 0 = horizontal, pass 1 = vertical
-    vec2 dir = (u_pass == 0) ? vec2(1.0, 0.0) : vec2(0.0, 1.0);
-
-    vec4 color = vec4(0.0);
-    float totalWeight = 0.0;
-    float sigma = radius / 2.0;
-
-    for (int i = -samples; i <= samples; i++) {
-        vec2 offset = dir * float(i) * texelSize;
-        vec4 sample_color = texture(u_image0, v_texCoord + offset);
-
-        float weight;
-        if (u_int0 == BLUR_GAUSSIAN) {
-            weight = gaussian(float(i), sigma);
-        } else {
-            // BLUR_BOX
-            weight = 1.0;
-        }
-
-        color += sample_color * weight;
-        totalWeight += weight;
-    }
-
-    fragColor0 = color / totalWeight;
-}
--- a/blueprints/.glsl/Image_Channels_23.frag
+++ b/blueprints/.glsl/Image_Channels_23.frag
@@ -1,19 +0,0 @@
-#version 300 es
-precision highp float;
-
-uniform sampler2D u_image0;
-
-in vec2 v_texCoord;
-layout(location = 0) out vec4 fragColor0;
-layout(location = 1) out vec4 fragColor1;
-layout(location = 2) out vec4 fragColor2;
-layout(location = 3) out vec4 fragColor3;
-
-void main() {
-  vec4 color = texture(u_image0, v_texCoord);
-  // Output each channel as grayscale to separate render targets
-  fragColor0 = vec4(vec3(color.r), 1.0);  // Red channel
-  fragColor1 = vec4(vec3(color.g), 1.0);  // Green channel
-  fragColor2 = vec4(vec3(color.b), 1.0);  // Blue channel
-  fragColor3 = vec4(vec3(color.a), 1.0);  // Alpha channel
-}
--- a/blueprints/.glsl/Image_Levels_1.frag
+++ b/blueprints/.glsl/Image_Levels_1.frag
@@ -1,71 +0,0 @@
-#version 300 es
-precision highp float;
-
-// Levels Adjustment
-// u_int0:   channel      (0=RGB, 1=R, 2=G, 3=B)         default: 0
-// u_float0: input black  (0-255)                        default: 0
-// u_float1: input white  (0-255)                        default: 255
-// u_float2: gamma        (0.01-9.99)                    default: 1.0
-// u_float3: output black (0-255)                        default: 0
-// u_float4: output white (0-255)                        default: 255
-
-uniform sampler2D u_image0;
-uniform int u_int0;
-uniform float u_float0;
-uniform float u_float1;
-uniform float u_float2;
-uniform float u_float3;
-uniform float u_float4;
-
-in vec2 v_texCoord;
-out vec4 fragColor;
-
-vec3 applyLevels(vec3 color, float inBlack, float inWhite, float gamma, float outBlack, float outWhite) {
-    float inRange = max(inWhite - inBlack, 0.0001);
-    vec3 result = clamp((color - inBlack) / inRange, 0.0, 1.0);
-    result = pow(result, vec3(1.0 / gamma));
-    result = mix(vec3(outBlack), vec3(outWhite), result);
-    return result;
-}
-
-float applySingleChannel(float value, float inBlack, float inWhite, float gamma, float outBlack, float outWhite) {
-    float inRange = max(inWhite - inBlack, 0.0001);
-    float result = clamp((value - inBlack) / inRange, 0.0, 1.0);
-    result = pow(result, 1.0 / gamma);
-    result = mix(outBlack, outWhite, result);
-    return result;
-}
-
-void main() {
-    vec4 texColor = texture(u_image0, v_texCoord);
-    vec3 color = texColor.rgb;
-    
-    float inBlack = u_float0 / 255.0;
-    float inWhite = u_float1 / 255.0;
-    float gamma = u_float2;
-    float outBlack = u_float3 / 255.0;
-    float outWhite = u_float4 / 255.0;
-    
-    vec3 result;
-    
-    if (u_int0 == 0) {
-        result = applyLevels(color, inBlack, inWhite, gamma, outBlack, outWhite);
-    }
-    else if (u_int0 == 1) {
-        result = color;
-        result.r = applySingleChannel(color.r, inBlack, inWhite, gamma, outBlack, outWhite);
-    }
-    else if (u_int0 == 2) {
-        result = color;
-        result.g = applySingleChannel(color.g, inBlack, inWhite, gamma, outBlack, outWhite);
-    }
-    else if (u_int0 == 3) {
-        result = color;
-        result.b = applySingleChannel(color.b, inBlack, inWhite, gamma, outBlack, outWhite);
-    }
-    else {
-        result = color;
-    }
-    
-    fragColor = vec4(result, texColor.a);
-}
--- a/blueprints/.glsl/README.md
+++ b/blueprints/.glsl/README.md
@@ -1,28 +0,0 @@
-# GLSL Shader Sources
-
-This folder contains the GLSL fragment shaders extracted from blueprint JSON files for easier editing and version control.
-
-## File Naming Convention
-
-`{Blueprint_Name}_{node_id}.frag`
-
- **Blueprint_Name**: The JSON filename with spaces/special chars replaced by underscores
- **node_id**: The GLSLShader node ID within the subgraph
-
-## Usage
-
-```bash
-# Extract shaders from blueprint JSONs to this folder
-python update_blueprints.py extract
-
-# Patch edited shaders back into blueprint JSONs
-python update_blueprints.py patch
-```
-
-## Workflow
-
-1. Run `extract` to pull current shaders from JSONs
-2. Edit `.frag` files
-3. Run `patch` to update the blueprint JSONs
-4. Test
-5. Commit both `.frag` files and updated JSONs
--- a/blueprints/.glsl/Sharpen_23.frag
+++ b/blueprints/.glsl/Sharpen_23.frag
@@ -1,28 +0,0 @@
-#version 300 es
-precision highp float;
-
-uniform sampler2D u_image0;
-uniform vec2 u_resolution;
-uniform float u_float0;  // strength [0.0 – 2.0] typical: 0.3–1.0
-
-in vec2 v_texCoord;
-layout(location = 0) out vec4 fragColor0;
-
-void main() {
-    vec2 texel = 1.0 / u_resolution;
-    
-    // Sample center and neighbors
-    vec4 center = texture(u_image0, v_texCoord);
-    vec4 top    = texture(u_image0, v_texCoord + vec2( 0.0, -texel.y));
-    vec4 bottom = texture(u_image0, v_texCoord + vec2( 0.0,  texel.y));
-    vec4 left   = texture(u_image0, v_texCoord + vec2(-texel.x,  0.0));
-    vec4 right  = texture(u_image0, v_texCoord + vec2( texel.x,  0.0));
-    
-    // Edge enhancement (Laplacian)
-    vec4 edges = center * 4.0 - top - bottom - left - right;
-    
-    // Add edges back scaled by strength
-    vec4 sharpened = center + edges * u_float0;
-    
-    fragColor0 = vec4(clamp(sharpened.rgb, 0.0, 1.0), center.a);
-}
--- a/blueprints/.glsl/Unsharp_Mask_26.frag
+++ b/blueprints/.glsl/Unsharp_Mask_26.frag
@@ -1,61 +0,0 @@
-#version 300 es
-precision highp float;
-
-uniform sampler2D u_image0;
-uniform vec2 u_resolution;
-uniform float u_float0;  // amount    [0.0 - 3.0]  typical: 0.5-1.5
-uniform float u_float1;  // radius    [0.5 - 10.0] blur radius in pixels
-uniform float u_float2;  // threshold [0.0 - 0.1]  min difference to sharpen
-
-in vec2 v_texCoord;
-layout(location = 0) out vec4 fragColor0;
-
-float gaussian(float x, float sigma) {
-    return exp(-(x * x) / (2.0 * sigma * sigma));
-}
-
-float getLuminance(vec3 color) {
-    return dot(color, vec3(0.2126, 0.7152, 0.0722));
-}
-
-void main() {
-    vec2 texel = 1.0 / u_resolution;
-    float radius = max(u_float1, 0.5);
-    float amount = u_float0;
-    float threshold = u_float2;
-
-    vec4 original = texture(u_image0, v_texCoord);
-
-    // Gaussian blur for the "unsharp" mask
-    int samples = int(ceil(radius));
-    float sigma = radius / 2.0;
-
-    vec4 blurred = vec4(0.0);
-    float totalWeight = 0.0;
-
-    for (int x = -samples; x <= samples; x++) {
-        for (int y = -samples; y <= samples; y++) {
-            vec2 offset = vec2(float(x), float(y)) * texel;
-            vec4 sample_color = texture(u_image0, v_texCoord + offset);
-
-            float dist = length(vec2(float(x), float(y)));
-            float weight = gaussian(dist, sigma);
-            blurred += sample_color * weight;
-            totalWeight += weight;
-        }
-    }
-    blurred /= totalWeight;
-
-    // Unsharp mask = original - blurred
-    vec3 mask = original.rgb - blurred.rgb;
-
-    // Luminance-based threshold with smooth falloff
-    float lumaDelta = abs(getLuminance(original.rgb) - getLuminance(blurred.rgb));
-    float thresholdScale = smoothstep(0.0, threshold, lumaDelta);
-    mask *= thresholdScale;
-
-    // Sharpen: original + mask * amount
-    vec3 sharpened = original.rgb + mask * amount;
-
-    fragColor0 = vec4(clamp(sharpened, 0.0, 1.0), original.a);
-}
--- a/blueprints/.glsl/update_blueprints.py
+++ b/blueprints/.glsl/update_blueprints.py
@@ -1,159 +0,0 @@
-#!/usr/bin/env python3
-"""
-Shader Blueprint Updater
-
-Syncs GLSL shader files between this folder and blueprint JSON files.
-
-File naming convention:
-    {Blueprint Name}_{node_id}.frag
-
-Usage:
-    python update_blueprints.py extract   # Extract shaders from JSONs to here
-    python update_blueprints.py patch     # Patch shaders back into JSONs
-    python update_blueprints.py           # Same as patch (default)
-"""
-
-import json
-import logging
-import sys
-import re
-from pathlib import Path
-
-logging.basicConfig(level=logging.INFO, format='%(message)s')
-logger = logging.getLogger(__name__)
-
-GLSL_DIR = Path(__file__).parent
-BLUEPRINTS_DIR = GLSL_DIR.parent
-
-
-def get_blueprint_files():
-    """Get all blueprint JSON files."""
-    return sorted(BLUEPRINTS_DIR.glob("*.json"))
-
-
-def sanitize_filename(name):
-    """Convert blueprint name to safe filename."""
-    return re.sub(r'[^\w\-]', '_', name)
-
-
-def extract_shaders():
-    """Extract all shaders from blueprint JSONs to this folder."""
-    extracted = 0
-    for json_path in get_blueprint_files():
-        blueprint_name = json_path.stem
-
-        try:
-            with open(json_path, 'r') as f:
-                data = json.load(f)
-        except (json.JSONDecodeError, IOError) as e:
-            logger.warning("Skipping %s: %s", json_path.name, e)
-            continue
-
-        # Find GLSLShader nodes in subgraphs
-        for subgraph in data.get('definitions', {}).get('subgraphs', []):
-            for node in subgraph.get('nodes', []):
-                if node.get('type') == 'GLSLShader':
-                    node_id = node.get('id')
-                    widgets = node.get('widgets_values', [])
-
-                    # Find shader code (first string that looks like GLSL)
-                    for widget in widgets:
-                        if isinstance(widget, str) and widget.startswith('#version'):
-                            safe_name = sanitize_filename(blueprint_name)
-                            frag_name = f"{safe_name}_{node_id}.frag"
-                            frag_path = GLSL_DIR / frag_name
-
-                            with open(frag_path, 'w') as f:
-                                f.write(widget)
-
-                            logger.info("  Extracted: %s", frag_name)
-                            extracted += 1
-                            break
-
-    logger.info("\nExtracted %d shader(s)", extracted)
-
-
-def patch_shaders():
-    """Patch shaders from this folder back into blueprint JSONs."""
-    # Build lookup: blueprint_name -> [(node_id, shader_code), ...]
-    shader_updates = {}
-
-    for frag_path in sorted(GLSL_DIR.glob("*.frag")):
-        # Parse filename: {blueprint_name}_{node_id}.frag
-        parts = frag_path.stem.rsplit('_', 1)
-        if len(parts) != 2:
-            logger.warning("Skipping %s: invalid filename format", frag_path.name)
-            continue
-
-        blueprint_name, node_id_str = parts
-
-        try:
-            node_id = int(node_id_str)
-        except ValueError:
-            logger.warning("Skipping %s: invalid node_id", frag_path.name)
-            continue
-
-        with open(frag_path, 'r') as f:
-            shader_code = f.read()
-
-        if blueprint_name not in shader_updates:
-            shader_updates[blueprint_name] = []
-        shader_updates[blueprint_name].append((node_id, shader_code))
-
-    # Apply updates to JSON files
-    patched = 0
-    for json_path in get_blueprint_files():
-        blueprint_name = sanitize_filename(json_path.stem)
-
-        if blueprint_name not in shader_updates:
-            continue
-
-        try:
-            with open(json_path, 'r') as f:
-                data = json.load(f)
-        except (json.JSONDecodeError, IOError) as e:
-            logger.error("Error reading %s: %s", json_path.name, e)
-            continue
-
-        modified = False
-        for node_id, shader_code in shader_updates[blueprint_name]:
-            # Find the node and update
-            for subgraph in data.get('definitions', {}).get('subgraphs', []):
-                for node in subgraph.get('nodes', []):
-                    if node.get('id') == node_id and node.get('type') == 'GLSLShader':
-                        widgets = node.get('widgets_values', [])
-                        if len(widgets) > 0 and widgets[0] != shader_code:
-                            widgets[0] = shader_code
-                            modified = True
-                            logger.info("  Patched: %s (node %d)", json_path.name, node_id)
-                            patched += 1
-
-        if modified:
-            with open(json_path, 'w') as f:
-                json.dump(data, f)
-
-    if patched == 0:
-        logger.info("No changes to apply.")
-    else:
-        logger.info("\nPatched %d shader(s)", patched)
-
-
-def main():
-    if len(sys.argv) < 2:
-        command = "patch"
-    else:
-        command = sys.argv[1].lower()
-
-    if command == "extract":
-        logger.info("Extracting shaders from blueprints...")
-        extract_shaders()
-    elif command in ("patch", "update", "apply"):
-        logger.info("Patching shaders into blueprints...")
-        patch_shaders()
-    else:
-        logger.info(__doc__)
-        sys.exit(1)
-
-
-if __name__ == "__main__":
-    main()
--- a/blueprints/Brightness
+++ b/blueprints/Brightness
--- a/(Z-Image-Turbo).json
+++ b/(Z-Image-Turbo).json
--- a/blueprints/Canny
+++ b/blueprints/Canny
--- a/blueprints/Chromatic
+++ b/blueprints/Chromatic
--- a/blueprints/Color
+++ b/blueprints/Color
--- a/(Z-Image-Turbo).json
+++ b/(Z-Image-Turbo).json
--- a/blueprints/Depth
+++ b/blueprints/Depth
--- a/blueprints/Edge-Preserving
+++ b/blueprints/Edge-Preserving
--- a/blueprints/Film
+++ b/blueprints/Film
--- a/blueprints/Glow.json
+++ b/blueprints/Glow.json
--- a/Saturation.json
+++ b/Saturation.json
--- a/blueprints/Image
+++ b/blueprints/Image
--- a/blueprints/Image
+++ b/blueprints/Image
--- a/blueprints/Image
+++ b/blueprints/Image
@@ -1 +0,0 @@
-{"revision": 0, "last_node_id": 29, "last_link_id": 0, "nodes": [{"id": 29, "type": "4c9d6ea4-b912-40e5-8766-6793a9758c53", "pos": [1970, -230], "size": [180, 86], "flags": {}, "order": 5, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "R", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}, {"label": "G", "localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": []}, {"label": "B", "localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": []}, {"label": "A", "localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": []}], "title": "Image Channels", "properties": {"proxyWidgets": []}, "widgets_values": []}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "4c9d6ea4-b912-40e5-8766-6793a9758c53", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 28, "lastLinkId": 39, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Image Channels", "inputNode": {"id": -10, "bounding": [1820, -185, 120, 60]}, "outputNode": {"id": -20, "bounding": [2460, -215, 120, 120]}, "inputs": [{"id": "3522932b-2d86-4a1f-a02a-cb29f3a9d7fe", "name": "images.image0", "type": "IMAGE", "linkIds": [39], "localized_name": "images.image0", "label": "image", "pos": [1920, -165]}], "outputs": [{"id": "605cb9c3-b065-4d9b-81d2-3ec331889b2b", "name": "IMAGE0", "type": "IMAGE", "linkIds": [26], "localized_name": "IMAGE0", "label": "R", "pos": [2480, -195]}, {"id": "fb44a77e-0522-43e9-9527-82e7465b3596", "name": "IMAGE1", "type": "IMAGE", "linkIds": [27], "localized_name": "IMAGE1", "label": "G", "pos": [2480, -175]}, {"id": "81460ee6-0131-402a-874f-6bf3001fc4ff", "name": "IMAGE2", "type": "IMAGE", "linkIds": [28], "localized_name": "IMAGE2", "label": "B", "pos": [2480, -155]}, {"id": "ae690246-80d4-4951-b1d9-9306d8a77417", "name": "IMAGE3", "type": "IMAGE", "linkIds": [29], "localized_name": "IMAGE3", "label": "A", "pos": [2480, -135]}], "widgets": [], "nodes": [{"id": 23, "type": "GLSLShader", "pos": [2000, -330], "size": [400, 172], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 39}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}], "outputs": [{"label": "R", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [26]}, {"label": "G", "localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": [27]}, {"label": "B", "localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": [28]}, {"label": "A", "localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": [29]}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\nlayout(location = 1) out vec4 fragColor1;\nlayout(location = 2) out vec4 fragColor2;\nlayout(location = 3) out vec4 fragColor3;\n\nvoid main() {\n  vec4 color = texture(u_image0, v_texCoord);\n  // Output each channel as grayscale to separate render targets\n  fragColor0 = vec4(vec3(color.r), 1.0);  // Red channel\n  fragColor1 = vec4(vec3(color.g), 1.0);  // Green channel\n  fragColor2 = vec4(vec3(color.b), 1.0);  // Blue channel\n  fragColor3 = vec4(vec3(color.a), 1.0);  // Alpha channel\n}\n", "from_input"]}], "groups": [], "links": [{"id": 39, "origin_id": -10, "origin_slot": 0, "target_id": 23, "target_slot": 0, "type": "IMAGE"}, {"id": 26, "origin_id": 23, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 27, "origin_id": 23, "origin_slot": 1, "target_id": -20, "target_slot": 1, "type": "IMAGE"}, {"id": 28, "origin_id": 23, "origin_slot": 2, "target_id": -20, "target_slot": 2, "type": "IMAGE"}, {"id": 29, "origin_id": 23, "origin_slot": 3, "target_id": -20, "target_slot": 3, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image Tools/Color adjust"}]}}
--- a/blueprints/Image
+++ b/blueprints/Image
--- a/blueprints/Image
+++ b/blueprints/Image
--- a/(Qwen-image).json
+++ b/(Qwen-image).json
--- a/blueprints/Image
+++ b/blueprints/Image
--- a/(Qwen-Image).json
+++ b/(Qwen-Image).json
--- a/Upscale(Z-image-Turbo).json
+++ b/Upscale(Z-image-Turbo).json
--- a/blueprints/Image
+++ b/blueprints/Image
--- a/blueprints/Image
+++ b/blueprints/Image
--- a/blueprints/Image
+++ b/blueprints/Image
--- a/blueprints/Image
+++ b/blueprints/Image
--- a/(Z-Image-Turbo).json
+++ b/(Z-Image-Turbo).json
--- a/blueprints/Pose
+++ b/blueprints/Pose
--- a/blueprints/Prompt
+++ b/blueprints/Prompt
@@ -1 +0,0 @@
-{"revision": 0, "last_node_id": 15, "last_link_id": 0, "nodes": [{"id": 15, "type": "24d8bbfd-39d4-4774-bff0-3de40cc7a471", "pos": [-1490, 2040], "size": [400, 260], "flags": {}, "order": 0, "mode": 0, "inputs": [{"name": "prompt", "type": "STRING", "widget": {"name": "prompt"}, "link": null}, {"label": "reference images", "name": "images", "type": "IMAGE", "link": null}], "outputs": [{"name": "STRING", "type": "STRING", "links": null}], "title": "Prompt Enhance", "properties": {"proxyWidgets": [["-1", "prompt"]], "cnr_id": "comfy-core", "ver": "0.14.1"}, "widgets_values": [""]}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "24d8bbfd-39d4-4774-bff0-3de40cc7a471", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 15, "lastLinkId": 14, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Prompt Enhance", "inputNode": {"id": -10, "bounding": [-2170, 2110, 138.876953125, 80]}, "outputNode": {"id": -20, "bounding": [-640, 2110, 120, 60]}, "inputs": [{"id": "aeab7216-00e0-4528-a09b-bba50845c5a6", "name": "prompt", "type": "STRING", "linkIds": [11], "pos": [-2051.123046875, 2130]}, {"id": "7b73fd36-aa31-4771-9066-f6c83879994b", "name": "images", "type": "IMAGE", "linkIds": [14], "label": "reference images", "pos": [-2051.123046875, 2150]}], "outputs": [{"id": "c7b0d930-68a1-48d1-b496-0519e5837064", "name": "STRING", "type": "STRING", "linkIds": [13], "pos": [-620, 2130]}], "widgets": [], "nodes": [{"id": 11, "type": "GeminiNode", "pos": [-1560, 1990], "size": [470, 470], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "shape": 7, "type": "IMAGE", "link": 14}, {"localized_name": "audio", "name": "audio", "shape": 7, "type": "AUDIO", "link": null}, {"localized_name": "video", "name": "video", "shape": 7, "type": "VIDEO", "link": null}, {"localized_name": "files", "name": "files", "shape": 7, "type": "GEMINI_INPUT_FILES", "link": null}, {"localized_name": "prompt", "name": "prompt", "type": "STRING", "widget": {"name": "prompt"}, "link": 11}, {"localized_name": "model", "name": "model", "type": "COMBO", "widget": {"name": "model"}, "link": null}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": null}, {"localized_name": "system_prompt", "name": "system_prompt", "shape": 7, "type": "STRING", "widget": {"name": "system_prompt"}, "link": null}], "outputs": [{"localized_name": "STRING", "name": "STRING", "type": "STRING", "links": [13]}], "properties": {"cnr_id": "comfy-core", "ver": "0.14.1", "Node name for S&R": "GeminiNode"}, "widgets_values": ["", "gemini-3-pro-preview", 42, "randomize", "You are an expert in prompt writing.\nBased on the input, rewrite the user's input into a detailed prompt.\nincluding camera settings, lighting, composition, and style.\nReturn the prompt only"], "color": "#432", "bgcolor": "#653"}], "groups": [], "links": [{"id": 11, "origin_id": -10, "origin_slot": 0, "target_id": 11, "target_slot": 4, "type": "STRING"}, {"id": 13, "origin_id": 11, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "STRING"}, {"id": 14, "origin_id": -10, "origin_slot": 1, "target_id": 11, "target_slot": 0, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Text generation/Prompt enhance"}]}, "extra": {}}
--- a/blueprints/Sharpen.json
+++ b/blueprints/Sharpen.json
@@ -1 +0,0 @@
-{"revision": 0, "last_node_id": 25, "last_link_id": 0, "nodes": [{"id": 25, "type": "621ba4e2-22a8-482d-a369-023753198b7b", "pos": [4610, -790], "size": [230, 58], "flags": {}, "order": 4, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "IMAGE", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}], "title": "Sharpen", "properties": {"proxyWidgets": [["24", "value"]]}, "widgets_values": []}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "621ba4e2-22a8-482d-a369-023753198b7b", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 24, "lastLinkId": 36, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Sharpen", "inputNode": {"id": -10, "bounding": [4090, -825, 120, 60]}, "outputNode": {"id": -20, "bounding": [5150, -825, 120, 60]}, "inputs": [{"id": "37011fb7-14b7-4e0e-b1a0-6a02e8da1fd7", "name": "images.image0", "type": "IMAGE", "linkIds": [34], "localized_name": "images.image0", "label": "image", "pos": [4190, -805]}], "outputs": [{"id": "e9182b3f-635c-4cd4-a152-4b4be17ae4b9", "name": "IMAGE0", "type": "IMAGE", "linkIds": [35], "localized_name": "IMAGE0", "label": "IMAGE", "pos": [5170, -805]}], "widgets": [], "nodes": [{"id": 24, "type": "PrimitiveFloat", "pos": [4280, -1240], "size": [270, 58], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "strength", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [36]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 3, "precision": 2, "step": 0.05}, "widgets_values": [0.5]}, {"id": 23, "type": "GLSLShader", "pos": [4570, -1240], "size": [370, 192], "flags": {}, "order": 1, "mode": 0, "inputs": [{"label": "image0", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 34}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}, {"label": "u_float0", "localized_name": "floats.u_float0", "name": "floats.u_float0", "shape": 7, "type": "FLOAT", "link": 36}, {"label": "u_float1", "localized_name": "floats.u_float1", "name": "floats.u_float1", "shape": 7, "type": "FLOAT", "link": null}, {"label": "u_int0", "localized_name": "ints.u_int0", "name": "ints.u_int0", "shape": 7, "type": "INT", "link": null}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}], "outputs": [{"localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [35]}, {"localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": null}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform vec2 u_resolution;\nuniform float u_float0;  // strength [0.0 – 2.0] typical: 0.3–1.0\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\nvoid main() {\n    vec2 texel = 1.0 / u_resolution;\n    \n    // Sample center and neighbors\n    vec4 center = texture(u_image0, v_texCoord);\n    vec4 top    = texture(u_image0, v_texCoord + vec2( 0.0, -texel.y));\n    vec4 bottom = texture(u_image0, v_texCoord + vec2( 0.0,  texel.y));\n    vec4 left   = texture(u_image0, v_texCoord + vec2(-texel.x,  0.0));\n    vec4 right  = texture(u_image0, v_texCoord + vec2( texel.x,  0.0));\n    \n    // Edge enhancement (Laplacian)\n    vec4 edges = center * 4.0 - top - bottom - left - right;\n    \n    // Add edges back scaled by strength\n    vec4 sharpened = center + edges * u_float0;\n    \n    fragColor0 = vec4(clamp(sharpened.rgb, 0.0, 1.0), center.a);\n}", "from_input"]}], "groups": [], "links": [{"id": 36, "origin_id": 24, "origin_slot": 0, "target_id": 23, "target_slot": 2, "type": "FLOAT"}, {"id": 34, "origin_id": -10, "origin_slot": 0, "target_id": 23, "target_slot": 0, "type": "IMAGE"}, {"id": 35, "origin_id": 23, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image Tools/Sharpen"}]}}
--- a/blueprints/Text
+++ b/blueprints/Text
--- a/(Z-Image-Turbo).json
+++ b/(Z-Image-Turbo).json
--- a/blueprints/Text
+++ b/blueprints/Text
--- a/blueprints/Unsharp
+++ b/blueprints/Unsharp
--- a/blueprints/Video
+++ b/blueprints/Video
--- a/blueprints/Video
+++ b/blueprints/Video
--- a/blueprints/Video
+++ b/blueprints/Video
--- a/blueprints/Video
+++ b/blueprints/Video
@@ -1 +0,0 @@
-{"revision": 0, "last_node_id": 13, "last_link_id": 0, "nodes": [{"id": 13, "type": "cf95b747-3e17-46cb-8097-cac60ff9b2e1", "pos": [1120, 330], "size": [240, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "video", "name": "video", "type": "VIDEO", "link": null}, {"name": "model_name", "type": "COMBO", "widget": {"name": "model_name"}, "link": null}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": []}], "title": "Video Upscale(GAN x4)", "properties": {"proxyWidgets": [["-1", "model_name"]], "cnr_id": "comfy-core", "ver": "0.14.1"}, "widgets_values": ["RealESRGAN_x4plus.safetensors"]}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "cf95b747-3e17-46cb-8097-cac60ff9b2e1", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 13, "lastLinkId": 19, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Video Upscale(GAN x4)", "inputNode": {"id": -10, "bounding": [550, 460, 120, 80]}, "outputNode": {"id": -20, "bounding": [1490, 460, 120, 60]}, "inputs": [{"id": "666d633e-93e7-42dc-8d11-2b7b99b0f2a6", "name": "video", "type": "VIDEO", "linkIds": [10], "localized_name": "video", "pos": [650, 480]}, {"id": "2e23a087-caa8-4d65-99e6-662761aa905a", "name": "model_name", "type": "COMBO", "linkIds": [19], "pos": [650, 500]}], "outputs": [{"id": "0c1768ea-3ec2-412f-9af6-8e0fa36dae70", "name": "VIDEO", "type": "VIDEO", "linkIds": [15], "localized_name": "VIDEO", "pos": [1510, 480]}], "widgets": [], "nodes": [{"id": 2, "type": "ImageUpscaleWithModel", "pos": [1110, 450], "size": [320, 46], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "upscale_model", "name": "upscale_model", "type": "UPSCALE_MODEL", "link": 1}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 14}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [13]}], "properties": {"cnr_id": "comfy-core", "ver": "0.10.0", "Node name for S&R": "ImageUpscaleWithModel"}}, {"id": 11, "type": "CreateVideo", "pos": [1110, 550], "size": [320, 78], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "link": 13}, {"localized_name": "audio", "name": "audio", "shape": 7, "type": "AUDIO", "link": 16}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "widget": {"name": "fps"}, "link": 12}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": [15]}], "properties": {"cnr_id": "comfy-core", "ver": "0.10.0", "Node name for S&R": "CreateVideo"}, "widgets_values": [30]}, {"id": 10, "type": "GetVideoComponents", "pos": [1110, 330], "size": [320, 70], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "video", "name": "video", "type": "VIDEO", "link": 10}], "outputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "links": [14]}, {"localized_name": "audio", "name": "audio", "type": "AUDIO", "links": [16]}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "links": [12]}], "properties": {"cnr_id": "comfy-core", "ver": "0.10.0", "Node name for S&R": "GetVideoComponents"}}, {"id": 1, "type": "UpscaleModelLoader", "pos": [750, 450], "size": [280, 60], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "model_name", "name": "model_name", "type": "COMBO", "widget": {"name": "model_name"}, "link": 19}], "outputs": [{"localized_name": "UPSCALE_MODEL", "name": "UPSCALE_MODEL", "type": "UPSCALE_MODEL", "links": [1]}], "properties": {"cnr_id": "comfy-core", "ver": "0.10.0", "Node name for S&R": "UpscaleModelLoader", "models": [{"name": "RealESRGAN_x4plus.safetensors", "url": "https://huggingface.co/Comfy-Org/Real-ESRGAN_repackaged/resolve/main/RealESRGAN_x4plus.safetensors", "directory": "upscale_models"}]}, "widgets_values": ["RealESRGAN_x4plus.safetensors"]}], "groups": [], "links": [{"id": 1, "origin_id": 1, "origin_slot": 0, "target_id": 2, "target_slot": 0, "type": "UPSCALE_MODEL"}, {"id": 14, "origin_id": 10, "origin_slot": 0, "target_id": 2, "target_slot": 1, "type": "IMAGE"}, {"id": 13, "origin_id": 2, "origin_slot": 0, "target_id": 11, "target_slot": 0, "type": "IMAGE"}, {"id": 16, "origin_id": 10, "origin_slot": 1, "target_id": 11, "target_slot": 1, "type": "AUDIO"}, {"id": 12, "origin_id": 10, "origin_slot": 2, "target_id": 11, "target_slot": 2, "type": "FLOAT"}, {"id": 10, "origin_id": -10, "origin_slot": 0, "target_id": 10, "target_slot": 0, "type": "VIDEO"}, {"id": 15, "origin_id": 11, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "VIDEO"}, {"id": 19, "origin_id": -10, "origin_slot": 1, "target_id": 1, "target_slot": 0, "type": "COMBO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Video generation and editing/Enhance video"}]}, "extra": {}}
--- a/comfy/audio_encoders/audio_encoders.py
+++ b/comfy/audio_encoders/audio_encoders.py
@@ -25,11 +25,11 @@ class AudioEncoderModel():
        elif model_type == "whisper3":
            self.model = WhisperLargeV3(**model_config)
        self.model.eval()
-        self.patcher = comfy.model_patcher.CoreModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device)
+        self.patcher = comfy.model_patcher.ModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device)
        self.model_sample_rate = 16000

    def load_sd(self, sd):
-        return self.model.load_state_dict(sd, strict=False, assign=self.patcher.is_dynamic())
+        return self.model.load_state_dict(sd, strict=False)

    def get_sd(self):
        return self.model.state_dict()
--- a/comfy/checkpoint_pickle.py
+++ b/comfy/checkpoint_pickle.py
@@ -0,0 +1,13 @@
+import pickle
+
+load = pickle.load
+
+class Empty:
+    pass
+
+class Unpickler(pickle.Unpickler):
+    def find_class(self, module, name):
+        #TODO: safe unpickle
+        if module.startswith("pytorch_lightning"):
+            return Empty
+        return super().find_class(module, name)
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@@ -159,7 +159,6 @@ class PerformanceFeature(enum.Enum):
    Fp8MatrixMultiplication = "fp8_matrix_mult"
    CublasOps = "cublas_ops"
    AutoTune = "autotune"
-    DynamicVRAM = "dynamic_vram"

 parser.add_argument("--fast", nargs="*", type=PerformanceFeature, help="Enable some untested and potentially quality deteriorating optimizations. This is used to test new features so using it might crash your comfyui. --fast with no arguments enables everything. You can pass a list specific optimizations if you only want to enable specific ones. Current valid optimizations: {}".format(" ".join(map(lambda c: c.value, PerformanceFeature))))

@@ -258,6 +257,3 @@ elif args.fast == []:
 # '--fast' is provided with a list of performance features, use that list
 else:
    args.fast = set(args.fast)
-
-def enables_dynamic_vram():
-    return PerformanceFeature.DynamicVRAM in args.fast and not args.highvram and not args.gpu_only
--- a/comfy/clip_vision.py
+++ b/comfy/clip_vision.py
@@ -47,10 +47,10 @@ class ClipVisionModel():
        self.model = model_class(config, self.dtype, offload_device, comfy.ops.manual_cast)
        self.model.eval()

-        self.patcher = comfy.model_patcher.CoreModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device)
+        self.patcher = comfy.model_patcher.ModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device)

    def load_sd(self, sd):
-        return self.model.load_state_dict(sd, strict=False, assign=self.patcher.is_dynamic())
+        return self.model.load_state_dict(sd, strict=False)

    def get_sd(self):
        return self.model.state_dict()
@@ -66,7 +66,6 @@ class ClipVisionModel():
        outputs = Output()
        outputs["last_hidden_state"] = out[0].to(comfy.model_management.intermediate_device())
        outputs["image_embeds"] = out[2].to(comfy.model_management.intermediate_device())
-        outputs["image_sizes"] = [pixel_values.shape[1:]] * pixel_values.shape[0]
        if self.return_all_hidden_states:
            all_hs = out[1].to(comfy.model_management.intermediate_device())
            outputs["penultimate_hidden_states"] = all_hs[:, -2]
--- a/comfy/comfy_types/node_typing.py
+++ b/comfy/comfy_types/node_typing.py
@@ -176,8 +176,6 @@ class InputTypeOptions(TypedDict):
    """COMBO type only. Specifies the configuration for a multi-select widget.
    Available after ComfyUI frontend v1.13.4
    https://github.com/Comfy-Org/ComfyUI_frontend/pull/2987"""
-    gradient_stops: NotRequired[list[list[float]]]
-    """Gradient color stops for gradientslider display mode. Each stop is [offset, r, g, b] (``FLOAT``)."""


 class HiddenInputTypeDict(TypedDict):
@@ -238,8 +236,6 @@ class ComfyNodeABC(ABC):
    """Flags a node as experimental, informing users that it may change or not work as expected."""
    DEPRECATED: bool
    """Flags a node as deprecated, indicating to users that they should find alternatives to this node."""
-    DEV_ONLY: bool
-    """Flags a node as dev-only, hiding it from search/menus unless dev mode is enabled."""
    API_NODE: Optional[bool]
    """Flags a node as an API node. See: https://docs.comfy.org/tutorials/api-nodes/overview."""

--- a/comfy/controlnet.py
+++ b/comfy/controlnet.py
@@ -203,7 +203,7 @@ class ControlNet(ControlBase):
        self.control_model = control_model
        self.load_device = load_device
        if control_model is not None:
-            self.control_model_wrapped = comfy.model_patcher.CoreModelPatcher(self.control_model, load_device=load_device, offload_device=comfy.model_management.unet_offload_device())
+            self.control_model_wrapped = comfy.model_patcher.ModelPatcher(self.control_model, load_device=load_device, offload_device=comfy.model_management.unet_offload_device())

        self.compression_ratio = compression_ratio
        self.global_average_pooling = global_average_pooling
@@ -297,30 +297,6 @@ class ControlNet(ControlBase):
        self.model_sampling_current = None
        super().cleanup()

-
-class QwenFunControlNet(ControlNet):
-    def get_control(self, x_noisy, t, cond, batched_number, transformer_options):
-        # Fun checkpoints are more sensitive to high strengths in the generic
-        # ControlNet merge path. Use a soft response curve so strength=1.0 stays
-        # unchanged while >1 grows more gently.
-        original_strength = self.strength
-        self.strength = math.sqrt(max(self.strength, 0.0))
-        try:
-            return super().get_control(x_noisy, t, cond, batched_number, transformer_options)
-        finally:
-            self.strength = original_strength
-
-    def pre_run(self, model, percent_to_timestep_function):
-        super().pre_run(model, percent_to_timestep_function)
-        self.set_extra_arg("base_model", model.diffusion_model)
-
-    def copy(self):
-        c = QwenFunControlNet(None, global_average_pooling=self.global_average_pooling, load_device=self.load_device, manual_cast_dtype=self.manual_cast_dtype)
-        c.control_model = self.control_model
-        c.control_model_wrapped = self.control_model_wrapped
-        self.copy_to(c)
-        return c
-
 class ControlLoraOps:
    class Linear(torch.nn.Module, comfy.ops.CastWeightBiasOp):
        def __init__(self, in_features: int, out_features: int, bias: bool = True,
@@ -584,7 +560,6 @@ def load_controlnet_hunyuandit(controlnet_data, model_options={}):
 def load_controlnet_flux_xlabs_mistoline(sd, mistoline=False, model_options={}):
    model_config, operations, load_device, unet_dtype, manual_cast_dtype, offload_device = controlnet_config(sd, model_options=model_options)
    control_model = comfy.ldm.flux.controlnet.ControlNetFlux(mistoline=mistoline, operations=operations, device=offload_device, dtype=unet_dtype, **model_config.unet_config)
-    sd = model_config.process_unet_state_dict(sd)
    control_model = controlnet_load_state_dict(control_model, sd)
    extra_conds = ['y', 'guidance']
    control = ControlNet(control_model, load_device=load_device, manual_cast_dtype=manual_cast_dtype, extra_conds=extra_conds)
@@ -630,53 +605,6 @@ def load_controlnet_qwen_instantx(sd, model_options={}):
    control = ControlNet(control_model, compression_ratio=1, latent_format=latent_format, concat_mask=concat_mask, load_device=load_device, manual_cast_dtype=manual_cast_dtype, extra_conds=extra_conds)
    return control

-
-def load_controlnet_qwen_fun(sd, model_options={}):
-    load_device = comfy.model_management.get_torch_device()
-    weight_dtype = comfy.utils.weight_dtype(sd)
-    unet_dtype = model_options.get("dtype", weight_dtype)
-    manual_cast_dtype = comfy.model_management.unet_manual_cast(unet_dtype, load_device)
-
-    operations = model_options.get("custom_operations", None)
-    if operations is None:
-        operations = comfy.ops.pick_operations(unet_dtype, manual_cast_dtype, disable_fast_fp8=True)
-
-    in_features = sd["control_img_in.weight"].shape[1]
-    inner_dim = sd["control_img_in.weight"].shape[0]
-
-    block_weight = sd["control_blocks.0.attn.to_q.weight"]
-    attention_head_dim = sd["control_blocks.0.attn.norm_q.weight"].shape[0]
-    num_attention_heads = max(1, block_weight.shape[0] // max(1, attention_head_dim))
-
-    model = comfy.ldm.qwen_image.controlnet.QwenImageFunControlNetModel(
-        control_in_features=in_features,
-        inner_dim=inner_dim,
-        num_attention_heads=num_attention_heads,
-        attention_head_dim=attention_head_dim,
-        num_control_blocks=5,
-        main_model_double=60,
-        injection_layers=(0, 12, 24, 36, 48),
-        operations=operations,
-        device=comfy.model_management.unet_offload_device(),
-        dtype=unet_dtype,
-    )
-    model = controlnet_load_state_dict(model, sd)
-
-    latent_format = comfy.latent_formats.Wan21()
-    control = QwenFunControlNet(
-        model,
-        compression_ratio=1,
-        latent_format=latent_format,
-        # Fun checkpoints already expect their own 33-channel context handling.
-        # Enabling generic concat_mask injects an extra mask channel at apply-time
-        # and breaks the intended fallback packing path.
-        concat_mask=False,
-        load_device=load_device,
-        manual_cast_dtype=manual_cast_dtype,
-        extra_conds=[],
-    )
-    return control
-
 def convert_mistoline(sd):
    return comfy.utils.state_dict_prefix_replace(sd, {"single_controlnet_blocks.": "controlnet_single_blocks."})

@@ -754,8 +682,6 @@ def load_controlnet_state_dict(state_dict, model=None, model_options={}):
            return load_controlnet_qwen_instantx(controlnet_data, model_options=model_options)
        elif "controlnet_x_embedder.weight" in controlnet_data:
            return load_controlnet_flux_instantx(controlnet_data, model_options=model_options)
-    elif "control_blocks.0.after_proj.weight" in controlnet_data and "control_img_in.weight" in controlnet_data:
-        return load_controlnet_qwen_fun(controlnet_data, model_options=model_options)

    elif "controlnet_blocks.0.linear.weight" in controlnet_data: #mistoline flux
        return load_controlnet_flux_xlabs_mistoline(convert_mistoline(controlnet_data), mistoline=True, model_options=model_options)
--- a/comfy/k_diffusion/sampling.py
+++ b/comfy/k_diffusion/sampling.py
@@ -5,7 +5,7 @@ from scipy import integrate
 import torch
 from torch import nn
 import torchsde
-from tqdm.auto import tqdm
+from tqdm.auto import trange, tqdm

 from . import utils
 from . import deis
@@ -13,9 +13,6 @@ from . import sa_solver
 import comfy.model_patcher
 import comfy.model_sampling

-import comfy.memory_management
-from comfy.utils import model_trange as trange
-
 def append_zero(x):
    return torch.cat([x, x.new_zeros([1])])

--- a/comfy/latent_formats.py
+++ b/comfy/latent_formats.py
@@ -8,7 +8,6 @@ class LatentFormat:
    latent_rgb_factors_bias = None
    latent_rgb_factors_reshape = None
    taesd_decoder_name = None
-    spacial_downscale_ratio = 8

    def process_in(self, latent):
        return latent * self.scale_factor
@@ -81,7 +80,6 @@ class SD_X4(LatentFormat):

 class SC_Prior(LatentFormat):
    latent_channels = 16
-    spacial_downscale_ratio = 42
    def __init__(self):
        self.scale_factor = 1.0
        self.latent_rgb_factors = [
@@ -104,7 +102,6 @@ class SC_Prior(LatentFormat):
        ]

 class SC_B(LatentFormat):
-    spacial_downscale_ratio = 4
    def __init__(self):
        self.scale_factor = 1.0 / 0.43
        self.latent_rgb_factors = [
@@ -184,7 +181,6 @@ class Flux(SD3):

 class Flux2(LatentFormat):
    latent_channels = 128
-    spacial_downscale_ratio = 16

    def __init__(self):
        self.latent_rgb_factors =[
@@ -276,7 +272,6 @@ class Mochi(LatentFormat):
 class LTXV(LatentFormat):
    latent_channels = 128
    latent_dimensions = 3
-    spacial_downscale_ratio = 32

    def __init__(self):
        self.latent_rgb_factors = [
@@ -520,7 +515,6 @@ class Wan21(LatentFormat):
 class Wan22(Wan21):
    latent_channels = 48
    latent_dimensions = 3
-    spacial_downscale_ratio = 16

    latent_rgb_factors = [
            [ 0.0119,  0.0103,  0.0046],
@@ -598,7 +592,6 @@ class Wan22(Wan21):
 class HunyuanImage21(LatentFormat):
    latent_channels = 64
    latent_dimensions = 2
-    spacial_downscale_ratio = 32
    scale_factor = 0.75289

    latent_rgb_factors = [
@@ -732,7 +725,6 @@ class HunyuanVideo15(LatentFormat):
    latent_rgb_factors_bias = [ 0.0456, -0.0202, -0.0644]
    latent_channels = 32
    latent_dimensions = 3
-    spacial_downscale_ratio = 16
    scale_factor = 1.03682
    taesd_decoder_name = "lighttaehy1_5"

@@ -755,13 +747,8 @@ class ACEAudio(LatentFormat):
    latent_channels = 8
    latent_dimensions = 2

-class ACEAudio15(LatentFormat):
-    latent_channels = 64
-    latent_dimensions = 1
-
 class ChromaRadiance(LatentFormat):
    latent_channels = 3
-    spacial_downscale_ratio = 1

    def __init__(self):
        self.latent_rgb_factors = [
--- a/comfy/ldm/ace/ace_step15.py
+++ b/comfy/ldm/ace/ace_step15.py
--- a/comfy/ldm/anima/model.py
+++ b/comfy/ldm/anima/model.py
@@ -1,214 +0,0 @@
-from comfy.ldm.cosmos.predict2 import MiniTrainDIT
-import torch
-from torch import nn
-import torch.nn.functional as F
-
-
-def rotate_half(x):
-    x1 = x[..., : x.shape[-1] // 2]
-    x2 = x[..., x.shape[-1] // 2 :]
-    return torch.cat((-x2, x1), dim=-1)
-
-
-def apply_rotary_pos_emb(x, cos, sin, unsqueeze_dim=1):
-    cos = cos.unsqueeze(unsqueeze_dim)
-    sin = sin.unsqueeze(unsqueeze_dim)
-    x_embed = (x * cos) + (rotate_half(x) * sin)
-    return x_embed
-
-
-class RotaryEmbedding(nn.Module):
-    def __init__(self, head_dim):
-        super().__init__()
-        self.rope_theta = 10000
-        inv_freq = 1.0 / (self.rope_theta ** (torch.arange(0, head_dim, 2, dtype=torch.int64).to(dtype=torch.float) / head_dim))
-        self.register_buffer("inv_freq", inv_freq, persistent=False)
-
-    @torch.no_grad()
-    def forward(self, x, position_ids):
-        inv_freq_expanded = self.inv_freq[None, :, None].float().expand(position_ids.shape[0], -1, 1).to(x.device)
-        position_ids_expanded = position_ids[:, None, :].float()
-
-        device_type = x.device.type if isinstance(x.device.type, str) and x.device.type != "mps" else "cpu"
-        with torch.autocast(device_type=device_type, enabled=False):  # Force float32
-            freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2)
-            emb = torch.cat((freqs, freqs), dim=-1)
-            cos = emb.cos()
-            sin = emb.sin()
-
-        return cos.to(dtype=x.dtype), sin.to(dtype=x.dtype)
-
-
-class Attention(nn.Module):
-    def __init__(self, query_dim, context_dim, n_heads, head_dim, device=None, dtype=None, operations=None):
-        super().__init__()
-
-        inner_dim = head_dim * n_heads
-        self.n_heads = n_heads
-        self.head_dim = head_dim
-        self.query_dim = query_dim
-        self.context_dim = context_dim
-
-        self.q_proj = operations.Linear(query_dim, inner_dim, bias=False, device=device, dtype=dtype)
-        self.q_norm = operations.RMSNorm(self.head_dim, eps=1e-6, device=device, dtype=dtype)
-
-        self.k_proj = operations.Linear(context_dim, inner_dim, bias=False, device=device, dtype=dtype)
-        self.k_norm = operations.RMSNorm(self.head_dim, eps=1e-6, device=device, dtype=dtype)
-
-        self.v_proj = operations.Linear(context_dim, inner_dim, bias=False, device=device, dtype=dtype)
-
-        self.o_proj = operations.Linear(inner_dim, query_dim, bias=False, device=device, dtype=dtype)
-
-    def forward(self, x, mask=None, context=None, position_embeddings=None, position_embeddings_context=None):
-        context = x if context is None else context
-        input_shape = x.shape[:-1]
-        q_shape = (*input_shape, self.n_heads, self.head_dim)
-        context_shape = context.shape[:-1]
-        kv_shape = (*context_shape, self.n_heads, self.head_dim)
-
-        query_states = self.q_norm(self.q_proj(x).view(q_shape)).transpose(1, 2)
-        key_states = self.k_norm(self.k_proj(context).view(kv_shape)).transpose(1, 2)
-        value_states = self.v_proj(context).view(kv_shape).transpose(1, 2)
-
-        if position_embeddings is not None:
-            assert position_embeddings_context is not None
-            cos, sin = position_embeddings
-            query_states = apply_rotary_pos_emb(query_states, cos, sin)
-            cos, sin = position_embeddings_context
-            key_states = apply_rotary_pos_emb(key_states, cos, sin)
-
-        attn_output = F.scaled_dot_product_attention(query_states, key_states, value_states, attn_mask=mask)
-
-        attn_output = attn_output.transpose(1, 2).reshape(*input_shape, -1).contiguous()
-        attn_output = self.o_proj(attn_output)
-        return attn_output
-
-    def init_weights(self):
-        torch.nn.init.zeros_(self.o_proj.weight)
-
-
-class TransformerBlock(nn.Module):
-    def __init__(self, source_dim, model_dim, num_heads=16, mlp_ratio=4.0, use_self_attn=False, layer_norm=False, device=None, dtype=None, operations=None):
-        super().__init__()
-        self.use_self_attn = use_self_attn
-
-        if self.use_self_attn:
-            self.norm_self_attn = operations.LayerNorm(model_dim, device=device, dtype=dtype) if layer_norm else operations.RMSNorm(model_dim, eps=1e-6, device=device, dtype=dtype)
-            self.self_attn = Attention(
-                query_dim=model_dim,
-                context_dim=model_dim,
-                n_heads=num_heads,
-                head_dim=model_dim//num_heads,
-                device=device,
-                dtype=dtype,
-                operations=operations,
-            )
-
-        self.norm_cross_attn = operations.LayerNorm(model_dim, device=device, dtype=dtype) if layer_norm else operations.RMSNorm(model_dim, eps=1e-6, device=device, dtype=dtype)
-        self.cross_attn = Attention(
-            query_dim=model_dim,
-            context_dim=source_dim,
-            n_heads=num_heads,
-            head_dim=model_dim//num_heads,
-            device=device,
-            dtype=dtype,
-            operations=operations,
-        )
-
-        self.norm_mlp = operations.LayerNorm(model_dim, device=device, dtype=dtype) if layer_norm else operations.RMSNorm(model_dim, eps=1e-6, device=device, dtype=dtype)
-        self.mlp = nn.Sequential(
-            operations.Linear(model_dim, int(model_dim * mlp_ratio), device=device, dtype=dtype),
-            nn.GELU(),
-            operations.Linear(int(model_dim * mlp_ratio), model_dim, device=device, dtype=dtype)
-        )
-
-    def forward(self, x, context, target_attention_mask=None, source_attention_mask=None, position_embeddings=None, position_embeddings_context=None):
-        if self.use_self_attn:
-            normed = self.norm_self_attn(x)
-            attn_out = self.self_attn(normed, mask=target_attention_mask, position_embeddings=position_embeddings, position_embeddings_context=position_embeddings)
-            x = x + attn_out
-
-        normed = self.norm_cross_attn(x)
-        attn_out = self.cross_attn(normed, mask=source_attention_mask, context=context, position_embeddings=position_embeddings, position_embeddings_context=position_embeddings_context)
-        x = x + attn_out
-
-        x = x + self.mlp(self.norm_mlp(x))
-        return x
-
-    def init_weights(self):
-        torch.nn.init.zeros_(self.mlp[2].weight)
-        self.cross_attn.init_weights()
-
-
-class LLMAdapter(nn.Module):
-    def __init__(
-            self,
-            source_dim=1024,
-            target_dim=1024,
-            model_dim=1024,
-            num_layers=6,
-            num_heads=16,
-            use_self_attn=True,
-            layer_norm=False,
-            device=None,
-            dtype=None,
-            operations=None,
-        ):
-        super().__init__()
-
-        self.embed = operations.Embedding(32128, target_dim, device=device, dtype=dtype)
-        if model_dim != target_dim:
-            self.in_proj = operations.Linear(target_dim, model_dim, device=device, dtype=dtype)
-        else:
-            self.in_proj = nn.Identity()
-        self.rotary_emb = RotaryEmbedding(model_dim//num_heads)
-        self.blocks = nn.ModuleList([
-            TransformerBlock(source_dim, model_dim, num_heads=num_heads, use_self_attn=use_self_attn, layer_norm=layer_norm, device=device, dtype=dtype, operations=operations) for _ in range(num_layers)
-        ])
-        self.out_proj = operations.Linear(model_dim, target_dim, device=device, dtype=dtype)
-        self.norm = operations.RMSNorm(target_dim, eps=1e-6, device=device, dtype=dtype)
-
-    def forward(self, source_hidden_states, target_input_ids, target_attention_mask=None, source_attention_mask=None):
-        if target_attention_mask is not None:
-            target_attention_mask = target_attention_mask.to(torch.bool)
-            if target_attention_mask.ndim == 2:
-                target_attention_mask = target_attention_mask.unsqueeze(1).unsqueeze(1)
-
-        if source_attention_mask is not None:
-            source_attention_mask = source_attention_mask.to(torch.bool)
-            if source_attention_mask.ndim == 2:
-                source_attention_mask = source_attention_mask.unsqueeze(1).unsqueeze(1)
-
-        context = source_hidden_states
-        x = self.in_proj(self.embed(target_input_ids, out_dtype=context.dtype))
-        position_ids = torch.arange(x.shape[1], device=x.device).unsqueeze(0)
-        position_ids_context = torch.arange(context.shape[1], device=x.device).unsqueeze(0)
-        position_embeddings = self.rotary_emb(x, position_ids)
-        position_embeddings_context = self.rotary_emb(x, position_ids_context)
-        for block in self.blocks:
-            x = block(x, context, target_attention_mask=target_attention_mask, source_attention_mask=source_attention_mask, position_embeddings=position_embeddings, position_embeddings_context=position_embeddings_context)
-        return self.norm(self.out_proj(x))
-
-
-class Anima(MiniTrainDIT):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.llm_adapter = LLMAdapter(device=kwargs.get("device"), dtype=kwargs.get("dtype"), operations=kwargs.get("operations"))
-
-    def preprocess_text_embeds(self, text_embeds, text_ids, t5xxl_weights=None):
-        if text_ids is not None:
-            out = self.llm_adapter(text_embeds, text_ids)
-            if t5xxl_weights is not None:
-                out = out * t5xxl_weights
-
-            if out.shape[1] < 512:
-                out = torch.nn.functional.pad(out, (0, 0, 0, 512 - out.shape[1]))
-            return out
-        else:
-            return text_embeds
-
-    def forward(self, x, timesteps, context, **kwargs):
-        t5xxl_ids = kwargs.pop("t5xxl_ids", None)
-        if t5xxl_ids is not None:
-            context = self.preprocess_text_embeds(context, t5xxl_ids, t5xxl_weights=kwargs.pop("t5xxl_weights", None))
-        return super().forward(x, timesteps, context, **kwargs)
--- a/comfy/ldm/chroma/layers.py
+++ b/comfy/ldm/chroma/layers.py
@@ -3,6 +3,7 @@ from torch import Tensor, nn

 from comfy.ldm.flux.layers import (
    MLPEmbedder,
+    RMSNorm,
    ModulationOut,
 )

@@ -28,7 +29,7 @@ class Approximator(nn.Module):
        super().__init__()
        self.in_proj = operations.Linear(in_dim, hidden_dim, bias=True, dtype=dtype, device=device)
        self.layers = nn.ModuleList([MLPEmbedder(hidden_dim, hidden_dim, dtype=dtype, device=device, operations=operations) for x in range( n_layers)])
-        self.norms = nn.ModuleList([operations.RMSNorm(hidden_dim, dtype=dtype, device=device) for x in range( n_layers)])
+        self.norms = nn.ModuleList([RMSNorm(hidden_dim, dtype=dtype, device=device, operations=operations) for x in range( n_layers)])
        self.out_proj = operations.Linear(hidden_dim, out_dim, dtype=dtype, device=device)

    @property
--- a/comfy/ldm/chroma/model.py
+++ b/comfy/ldm/chroma/model.py
@@ -152,7 +152,6 @@ class Chroma(nn.Module):
        transformer_options={},
        attn_mask: Tensor = None,
    ) -> Tensor:
-        transformer_options = transformer_options.copy()
        patches_replace = transformer_options.get("patches_replace", {})

        # running on sequences img
@@ -229,7 +228,6 @@ class Chroma(nn.Module):

        transformer_options["total_blocks"] = len(self.single_blocks)
        transformer_options["block_type"] = "single"
-        transformer_options["img_slice"] = [txt.shape[1], img.shape[1]]
        for i, block in enumerate(self.single_blocks):
            transformer_options["block_index"] = i
            if i not in self.skip_dit:
--- a/comfy/ldm/chroma_radiance/layers.py
+++ b/comfy/ldm/chroma_radiance/layers.py
@@ -4,6 +4,8 @@ from functools import lru_cache
 import torch
 from torch import nn

+from comfy.ldm.flux.layers import RMSNorm
+

 class NerfEmbedder(nn.Module):
    """
@@ -143,7 +145,7 @@ class NerfGLUBlock(nn.Module):
        # We now need to generate parameters for 3 matrices.
        total_params = 3 * hidden_size_x**2 * mlp_ratio
        self.param_generator = operations.Linear(hidden_size_s, total_params, dtype=dtype, device=device)
-        self.norm = operations.RMSNorm(hidden_size_x, dtype=dtype, device=device)
+        self.norm = RMSNorm(hidden_size_x, dtype=dtype, device=device, operations=operations)
        self.mlp_ratio = mlp_ratio


@@ -176,7 +178,7 @@ class NerfGLUBlock(nn.Module):
 class NerfFinalLayer(nn.Module):
    def __init__(self, hidden_size, out_channels, dtype=None, device=None, operations=None):
        super().__init__()
-        self.norm = operations.RMSNorm(hidden_size, dtype=dtype, device=device)
+        self.norm = RMSNorm(hidden_size, dtype=dtype, device=device, operations=operations)
        self.linear = operations.Linear(hidden_size, out_channels, dtype=dtype, device=device)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
@@ -188,7 +190,7 @@ class NerfFinalLayer(nn.Module):
 class NerfFinalLayerConv(nn.Module):
    def __init__(self, hidden_size: int, out_channels: int, dtype=None, device=None, operations=None):
        super().__init__()
-        self.norm = operations.RMSNorm(hidden_size, dtype=dtype, device=device)
+        self.norm = RMSNorm(hidden_size, dtype=dtype, device=device, operations=operations)
        self.conv = operations.Conv2d(
            in_channels=hidden_size,
            out_channels=out_channels,
--- a/comfy/ldm/cosmos/predict2.py
+++ b/comfy/ldm/cosmos/predict2.py
@@ -13,7 +13,6 @@ from torchvision import transforms

 import comfy.patcher_extension
 from comfy.ldm.modules.attention import optimized_attention
-import comfy.ldm.common_dit

 def apply_rotary_pos_emb(
    t: torch.Tensor,
@@ -335,7 +334,7 @@ class FinalLayer(nn.Module):
        device=None, dtype=None, operations=None
    ):
        super().__init__()
-        self.layer_norm = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
+        self.layer_norm = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
        self.linear = operations.Linear(
            hidden_size, spatial_patch_size * spatial_patch_size * temporal_patch_size * out_channels, bias=False, device=device, dtype=dtype
        )
@@ -463,8 +462,6 @@ class Block(nn.Module):
        extra_per_block_pos_emb: Optional[torch.Tensor] = None,
        transformer_options: Optional[dict] = {},
    ) -> torch.Tensor:
-        residual_dtype = x_B_T_H_W_D.dtype
-        compute_dtype = emb_B_T_D.dtype
        if extra_per_block_pos_emb is not None:
            x_B_T_H_W_D = x_B_T_H_W_D + extra_per_block_pos_emb

@@ -514,7 +511,7 @@ class Block(nn.Module):
        result_B_T_H_W_D = rearrange(
            self.self_attn(
                # normalized_x_B_T_HW_D,
-                rearrange(normalized_x_B_T_H_W_D.to(compute_dtype), "b t h w d -> b (t h w) d"),
+                rearrange(normalized_x_B_T_H_W_D, "b t h w d -> b (t h w) d"),
                None,
                rope_emb=rope_emb_L_1_1_D,
                transformer_options=transformer_options,
@@ -524,7 +521,7 @@ class Block(nn.Module):
            h=H,
            w=W,
        )
-        x_B_T_H_W_D = x_B_T_H_W_D + gate_self_attn_B_T_1_1_D.to(residual_dtype) * result_B_T_H_W_D.to(residual_dtype)
+        x_B_T_H_W_D = x_B_T_H_W_D + gate_self_attn_B_T_1_1_D * result_B_T_H_W_D

        def _x_fn(
            _x_B_T_H_W_D: torch.Tensor,
@@ -538,7 +535,7 @@ class Block(nn.Module):
            )
            _result_B_T_H_W_D = rearrange(
                self.cross_attn(
-                    rearrange(_normalized_x_B_T_H_W_D.to(compute_dtype), "b t h w d -> b (t h w) d"),
+                    rearrange(_normalized_x_B_T_H_W_D, "b t h w d -> b (t h w) d"),
                    crossattn_emb,
                    rope_emb=rope_emb_L_1_1_D,
                    transformer_options=transformer_options,
@@ -557,7 +554,7 @@ class Block(nn.Module):
            shift_cross_attn_B_T_1_1_D,
            transformer_options=transformer_options,
        )
-        x_B_T_H_W_D = result_B_T_H_W_D.to(residual_dtype) * gate_cross_attn_B_T_1_1_D.to(residual_dtype) + x_B_T_H_W_D
+        x_B_T_H_W_D = result_B_T_H_W_D * gate_cross_attn_B_T_1_1_D + x_B_T_H_W_D

        normalized_x_B_T_H_W_D = _fn(
            x_B_T_H_W_D,
@@ -565,8 +562,8 @@ class Block(nn.Module):
            scale_mlp_B_T_1_1_D,
            shift_mlp_B_T_1_1_D,
        )
-        result_B_T_H_W_D = self.mlp(normalized_x_B_T_H_W_D.to(compute_dtype))
-        x_B_T_H_W_D = x_B_T_H_W_D + gate_mlp_B_T_1_1_D.to(residual_dtype) * result_B_T_H_W_D.to(residual_dtype)
+        result_B_T_H_W_D = self.mlp(normalized_x_B_T_H_W_D)
+        x_B_T_H_W_D = x_B_T_H_W_D + gate_mlp_B_T_1_1_D * result_B_T_H_W_D
        return x_B_T_H_W_D


@@ -838,8 +835,6 @@ class MiniTrainDIT(nn.Module):
        padding_mask: Optional[torch.Tensor] = None,
        **kwargs,
    ):
-        orig_shape = list(x.shape)
-        x = comfy.ldm.common_dit.pad_to_patch_size(x, (self.patch_temporal, self.patch_spatial, self.patch_spatial))
        x_B_C_T_H_W = x
        timesteps_B_T = timesteps
        crossattn_emb = context
@@ -878,14 +873,6 @@ class MiniTrainDIT(nn.Module):
            "extra_per_block_pos_emb": extra_pos_emb_B_T_H_W_D_or_T_H_W_B_D,
            "transformer_options": kwargs.get("transformer_options", {}),
        }
-
-        # The residual stream for this model has large values. To make fp16 compute_dtype work, we keep the residual stream
-        # in fp32, but run attention and MLP modules in fp16.
-        # An alternate method that clamps fp16 values "works" in the sense that it makes coherent images, but there is noticeable
-        # quality degradation and visual artifacts.
-        if x_B_T_H_W_D.dtype == torch.float16:
-            x_B_T_H_W_D = x_B_T_H_W_D.float()
-
        for block in self.blocks:
            x_B_T_H_W_D = block(
                x_B_T_H_W_D,
@@ -894,6 +881,6 @@ class MiniTrainDIT(nn.Module):
                **block_kwargs,
            )

-        x_B_T_H_W_O = self.final_layer(x_B_T_H_W_D.to(crossattn_emb.dtype), t_embedding_B_T_D, adaln_lora_B_T_3D=adaln_lora_B_T_3D)
-        x_B_C_Tt_Hp_Wp = self.unpatchify(x_B_T_H_W_O)[:, :, :orig_shape[-3], :orig_shape[-2], :orig_shape[-1]]
+        x_B_T_H_W_O = self.final_layer(x_B_T_H_W_D, t_embedding_B_T_D, adaln_lora_B_T_3D=adaln_lora_B_T_3D)
+        x_B_C_Tt_Hp_Wp = self.unpatchify(x_B_T_H_W_O)
        return x_B_C_Tt_Hp_Wp
--- a/comfy/ldm/flux/layers.py
+++ b/comfy/ldm/flux/layers.py
@@ -5,9 +5,9 @@ import torch
 from torch import Tensor, nn

 from .math import attention, rope
+import comfy.ops
+import comfy.ldm.common_dit

-# Fix import for some custom nodes, TODO: delete eventually.
-RMSNorm = None

 class EmbedND(nn.Module):
    def __init__(self, dim: int, theta: int, axes_dim: list):
@@ -87,12 +87,20 @@ def build_mlp(hidden_size, mlp_hidden_dim, mlp_silu_act=False, yak_mlp=False, dt
            operations.Linear(mlp_hidden_dim, hidden_size, bias=True, dtype=dtype, device=device),
        )

+class RMSNorm(torch.nn.Module):
+    def __init__(self, dim: int, dtype=None, device=None, operations=None):
+        super().__init__()
+        self.scale = nn.Parameter(torch.empty((dim), dtype=dtype, device=device))
+
+    def forward(self, x: Tensor):
+        return comfy.ldm.common_dit.rms_norm(x, self.scale, 1e-6)
+

 class QKNorm(torch.nn.Module):
    def __init__(self, dim: int, dtype=None, device=None, operations=None):
        super().__init__()
-        self.query_norm = operations.RMSNorm(dim, dtype=dtype, device=device)
-        self.key_norm = operations.RMSNorm(dim, dtype=dtype, device=device)
+        self.query_norm = RMSNorm(dim, dtype=dtype, device=device, operations=operations)
+        self.key_norm = RMSNorm(dim, dtype=dtype, device=device, operations=operations)

    def forward(self, q: Tensor, k: Tensor, v: Tensor) -> tuple:
        q = self.query_norm(q)
@@ -161,7 +169,7 @@ class SiLUActivation(nn.Module):


 class DoubleStreamBlock(nn.Module):
-    def __init__(self, hidden_size: int, num_heads: int, mlp_ratio: float, qkv_bias: bool = False, modulation=True, mlp_silu_act=False, proj_bias=True, yak_mlp=False, dtype=None, device=None, operations=None):
+    def __init__(self, hidden_size: int, num_heads: int, mlp_ratio: float, qkv_bias: bool = False, flipped_img_txt=False, modulation=True, mlp_silu_act=False, proj_bias=True, yak_mlp=False, dtype=None, device=None, operations=None):
        super().__init__()

        mlp_hidden_dim = int(hidden_size * mlp_ratio)
@@ -189,6 +197,8 @@ class DoubleStreamBlock(nn.Module):

        self.txt_mlp = build_mlp(hidden_size, mlp_hidden_dim, mlp_silu_act=mlp_silu_act, yak_mlp=yak_mlp, dtype=dtype, device=device, operations=operations)

+        self.flipped_img_txt = flipped_img_txt
+
    def forward(self, img: Tensor, txt: Tensor, vec: Tensor, pe: Tensor, attn_mask=None, modulation_dims_img=None, modulation_dims_txt=None, transformer_options={}):
        if self.modulation:
            img_mod1, img_mod2 = self.img_mod(vec)
@@ -196,9 +206,6 @@ class DoubleStreamBlock(nn.Module):
        else:
            (img_mod1, img_mod2), (txt_mod1, txt_mod2) = vec

-        transformer_patches = transformer_options.get("patches", {})
-        extra_options = transformer_options.copy()
-
        # prepare image for attention
        img_modulated = self.img_norm1(img)
        img_modulated = apply_mod(img_modulated, (1 + img_mod1.scale), img_mod1.shift, modulation_dims_img)
@@ -217,23 +224,32 @@ class DoubleStreamBlock(nn.Module):
        del txt_qkv
        txt_q, txt_k = self.txt_attn.norm(txt_q, txt_k, txt_v)

-        q = torch.cat((txt_q, img_q), dim=2)
-        del txt_q, img_q
-        k = torch.cat((txt_k, img_k), dim=2)
-        del txt_k, img_k
-        v = torch.cat((txt_v, img_v), dim=2)
-        del txt_v, img_v
-        # run actual attention
-        attn = attention(q, k, v, pe=pe, mask=attn_mask, transformer_options=transformer_options)
-        del q, k, v
+        if self.flipped_img_txt:
+            q = torch.cat((img_q, txt_q), dim=2)
+            del img_q, txt_q
+            k = torch.cat((img_k, txt_k), dim=2)
+            del img_k, txt_k
+            v = torch.cat((img_v, txt_v), dim=2)
+            del img_v, txt_v
+            # run actual attention
+            attn = attention(q, k, v,
+                             pe=pe, mask=attn_mask, transformer_options=transformer_options)
+            del q, k, v

-        if "attn1_output_patch" in transformer_patches:
-            extra_options["img_slice"] = [txt.shape[1], attn.shape[1]]
-            patch = transformer_patches["attn1_output_patch"]
-            for p in patch:
-                attn = p(attn, extra_options)
+            img_attn, txt_attn = attn[:, : img.shape[1]], attn[:, img.shape[1]:]
+        else:
+            q = torch.cat((txt_q, img_q), dim=2)
+            del txt_q, img_q
+            k = torch.cat((txt_k, img_k), dim=2)
+            del txt_k, img_k
+            v = torch.cat((txt_v, img_v), dim=2)
+            del txt_v, img_v
+            # run actual attention
+            attn = attention(q, k, v,
+                             pe=pe, mask=attn_mask, transformer_options=transformer_options)
+            del q, k, v

-        txt_attn, img_attn = attn[:, : txt.shape[1]], attn[:, txt.shape[1]:]
+            txt_attn, img_attn = attn[:, : txt.shape[1]], attn[:, txt.shape[1]:]

        # calculate the img bloks
        img += apply_mod(self.img_attn.proj(img_attn), img_mod1.gate, None, modulation_dims_img)
@@ -312,9 +328,6 @@ class SingleStreamBlock(nn.Module):
        else:
            mod = vec

-        transformer_patches = transformer_options.get("patches", {})
-        extra_options = transformer_options.copy()
-
        qkv, mlp = torch.split(self.linear1(apply_mod(self.pre_norm(x), (1 + mod.scale), mod.shift, modulation_dims)), [3 * self.hidden_size, self.mlp_hidden_dim_first], dim=-1)

        q, k, v = qkv.view(qkv.shape[0], qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
@@ -324,12 +337,6 @@ class SingleStreamBlock(nn.Module):
        # compute attention
        attn = attention(q, k, v, pe=pe, mask=attn_mask, transformer_options=transformer_options)
        del q, k, v
-
-        if "attn1_output_patch" in transformer_patches:
-            patch = transformer_patches["attn1_output_patch"]
-            for p in patch:
-                attn = p(attn, extra_options)
-
        # compute activation in mlp stream, cat again and run second linear layer
        if self.yak_mlp:
            mlp = self.mlp_act(mlp[..., self.mlp_hidden_dim_first // 2:]) * mlp[..., :self.mlp_hidden_dim_first // 2]
--- a/comfy/ldm/flux/math.py
+++ b/comfy/ldm/flux/math.py
@@ -29,34 +29,19 @@ def rope(pos: Tensor, dim: int, theta: int) -> Tensor:
    return out.to(dtype=torch.float32, device=pos.device)


-def _apply_rope1(x: Tensor, freqs_cis: Tensor):
-    x_ = x.to(dtype=freqs_cis.dtype).reshape(*x.shape[:-1], -1, 1, 2)
-
-    x_out = freqs_cis[..., 0] * x_[..., 0]
-    x_out.addcmul_(freqs_cis[..., 1], x_[..., 1])
-
-    return x_out.reshape(*x.shape).type_as(x)
-
-
-def _apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor):
-    return apply_rope1(xq, freqs_cis), apply_rope1(xk, freqs_cis)
-
-
 try:
    import comfy.quant_ops
-    q_apply_rope = comfy.quant_ops.ck.apply_rope
-    q_apply_rope1 = comfy.quant_ops.ck.apply_rope1
-    def apply_rope(xq, xk, freqs_cis):
-        if comfy.model_management.in_training:
-            return _apply_rope(xq, xk, freqs_cis)
-        else:
-            return apply_rope1(xq, freqs_cis), apply_rope1(xk, freqs_cis)
-    def apply_rope1(x, freqs_cis):
-        if comfy.model_management.in_training:
-            return _apply_rope1(x, freqs_cis)
-        else:
-            return q_apply_rope1(x, freqs_cis)
+    apply_rope = comfy.quant_ops.ck.apply_rope
+    apply_rope1 = comfy.quant_ops.ck.apply_rope1
 except:
    logging.warning("No comfy kitchen, using old apply_rope functions.")
-    apply_rope = _apply_rope
-    apply_rope1 = _apply_rope1
+    def apply_rope1(x: Tensor, freqs_cis: Tensor):
+        x_ = x.to(dtype=freqs_cis.dtype).reshape(*x.shape[:-1], -1, 1, 2)
+
+        x_out = freqs_cis[..., 0] * x_[..., 0]
+        x_out.addcmul_(freqs_cis[..., 1], x_[..., 1])
+
+        return x_out.reshape(*x.shape).type_as(x)
+
+    def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor):
+        return apply_rope1(xq, freqs_cis), apply_rope1(xk, freqs_cis)
--- a/comfy/ldm/flux/model.py
+++ b/comfy/ldm/flux/model.py
@@ -16,6 +16,7 @@ from .layers import (
    SingleStreamBlock,
    timestep_embedding,
    Modulation,
+    RMSNorm
 )

@dataclass
@@ -80,7 +81,7 @@ class Flux(nn.Module):
        self.txt_in = operations.Linear(params.context_in_dim, self.hidden_size, bias=params.ops_bias, dtype=dtype, device=device)

        if params.txt_norm:
-            self.txt_norm = operations.RMSNorm(params.context_in_dim, dtype=dtype, device=device)
+            self.txt_norm = RMSNorm(params.context_in_dim, dtype=dtype, device=device, operations=operations)
        else:
            self.txt_norm = None

@@ -142,7 +143,6 @@ class Flux(nn.Module):
        attn_mask: Tensor = None,
    ) -> Tensor:

-        transformer_options = transformer_options.copy()
        patches = transformer_options.get("patches", {})
        patches_replace = transformer_options.get("patches_replace", {})
        if img.ndim != 3 or txt.ndim != 3:
@@ -232,7 +232,6 @@ class Flux(nn.Module):

        transformer_options["total_blocks"] = len(self.single_blocks)
        transformer_options["block_type"] = "single"
-        transformer_options["img_slice"] = [txt.shape[1], img.shape[1]]
        for i, block in enumerate(self.single_blocks):
            transformer_options["block_index"] = i
            if ("single_block", i) in blocks_replace:
--- a/comfy/ldm/hunyuan_video/model.py
+++ b/comfy/ldm/hunyuan_video/model.py
@@ -241,6 +241,7 @@ class HunyuanVideo(nn.Module):
                    self.num_heads,
                    mlp_ratio=params.mlp_ratio,
                    qkv_bias=params.qkv_bias,
+                    flipped_img_txt=True,
                    dtype=dtype, device=device, operations=operations
                )
                for _ in range(params.depth)
@@ -304,7 +305,6 @@ class HunyuanVideo(nn.Module):
        control=None,
        transformer_options={},
    ) -> Tensor:
-        transformer_options = transformer_options.copy()
        patches_replace = transformer_options.get("patches_replace", {})

        initial_shape = list(img.shape)
@@ -378,14 +378,14 @@ class HunyuanVideo(nn.Module):
            extra_txt_ids = torch.zeros((txt_ids.shape[0], txt_vision_states.shape[1], txt_ids.shape[-1]), device=txt_ids.device, dtype=txt_ids.dtype)
            txt_ids = torch.cat((txt_ids, extra_txt_ids), dim=1)

-        ids = torch.cat((txt_ids, img_ids), dim=1)
+        ids = torch.cat((img_ids, txt_ids), dim=1)
        pe = self.pe_embedder(ids)

        img_len = img.shape[1]
        if txt_mask is not None:
            attn_mask_len = img_len + txt.shape[1]
            attn_mask = torch.zeros((1, 1, attn_mask_len), dtype=img.dtype, device=img.device)
-            attn_mask[:, 0, :txt.shape[1]] = txt_mask
+            attn_mask[:, 0, img_len:] = txt_mask
        else:
            attn_mask = None

@@ -413,11 +413,10 @@ class HunyuanVideo(nn.Module):
                    if add is not None:
                        img += add

-        img = torch.cat((txt, img), 1)
+        img = torch.cat((img, txt), 1)

        transformer_options["total_blocks"] = len(self.single_blocks)
        transformer_options["block_type"] = "single"
-        transformer_options["img_slice"] = [txt.shape[1], img.shape[1]]
        for i, block in enumerate(self.single_blocks):
            transformer_options["block_index"] = i
            if ("single_block", i) in blocks_replace:
@@ -436,9 +435,9 @@ class HunyuanVideo(nn.Module):
                if i < len(control_o):
                    add = control_o[i]
                    if add is not None:
-                        img[:, txt.shape[1]: img_len + txt.shape[1]] += add
+                        img[:, : img_len] += add

-        img = img[:, txt.shape[1]: img_len + txt.shape[1]]
+        img = img[:, : img_len]
        if ref_latent is not None:
            img = img[:, ref_latent.shape[1]:]

--- a/comfy/ldm/hunyuan_video/upsampler.py
+++ b/comfy/ldm/hunyuan_video/upsampler.py
@@ -109,10 +109,10 @@ class HunyuanVideo15SRModel():
        self.model_class = UPSAMPLERS.get(model_type)
        self.model = self.model_class(**config).eval()

-        self.patcher = comfy.model_patcher.CoreModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device)
+        self.patcher = comfy.model_patcher.ModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device)

    def load_sd(self, sd):
-        return self.model.load_state_dict(sd, strict=True, assign=self.patcher.is_dynamic())
+        return self.model.load_state_dict(sd, strict=True)

    def get_sd(self):
        return self.model.state_dict()
--- a/comfy/ldm/lightricks/av_model.py
+++ b/comfy/ldm/lightricks/av_model.py
@@ -18,12 +18,12 @@ class CompressedTimestep:
    def __init__(self, tensor: torch.Tensor, patches_per_frame: int):
        """
        tensor: [batch_size, num_tokens, feature_dim] tensor where num_tokens = num_frames * patches_per_frame
-        patches_per_frame: Number of spatial patches per frame (height * width in latent space), or None to disable compression
+        patches_per_frame: Number of spatial patches per frame (height * width in latent space)
        """
        self.batch_size, num_tokens, self.feature_dim = tensor.shape

        # Check if compression is valid (num_tokens must be divisible by patches_per_frame)
-        if patches_per_frame is not None and num_tokens % patches_per_frame == 0 and num_tokens >= patches_per_frame:
+        if num_tokens % patches_per_frame == 0 and num_tokens >= patches_per_frame:
            self.patches_per_frame = patches_per_frame
            self.num_frames = num_tokens // patches_per_frame

@@ -215,9 +215,22 @@ class BasicAVTransformerBlock(nn.Module):
        return (*scale_shift_ada_values, *gate_ada_values)

    def forward(
-        self, x: Tuple[torch.Tensor, torch.Tensor], v_context=None, a_context=None, attention_mask=None, v_timestep=None, a_timestep=None,
-        v_pe=None, a_pe=None, v_cross_pe=None, a_cross_pe=None, v_cross_scale_shift_timestep=None, a_cross_scale_shift_timestep=None,
-        v_cross_gate_timestep=None, a_cross_gate_timestep=None, transformer_options=None,
+        self,
+        x: Tuple[torch.Tensor, torch.Tensor],
+        v_context=None,
+        a_context=None,
+        attention_mask=None,
+        v_timestep=None,
+        a_timestep=None,
+        v_pe=None,
+        a_pe=None,
+        v_cross_pe=None,
+        a_cross_pe=None,
+        v_cross_scale_shift_timestep=None,
+        a_cross_scale_shift_timestep=None,
+        v_cross_gate_timestep=None,
+        a_cross_gate_timestep=None,
+        transformer_options=None,
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        run_vx = transformer_options.get("run_vx", True)
        run_ax = transformer_options.get("run_ax", True)
@@ -227,102 +240,144 @@ class BasicAVTransformerBlock(nn.Module):
        run_a2v = run_vx and transformer_options.get("a2v_cross_attn", True) and ax.numel() > 0
        run_v2a = run_ax and transformer_options.get("v2a_cross_attn", True)

-        # video
        if run_vx:
-            # video self-attention
-            vshift_msa, vscale_msa = (self.get_ada_values(self.scale_shift_table, vx.shape[0], v_timestep, slice(0, 2)))
+            vshift_msa, vscale_msa, vgate_msa = (
+                self.get_ada_values(self.scale_shift_table, vx.shape[0], v_timestep, slice(0, 3))
+            )
+
            norm_vx = comfy.ldm.common_dit.rms_norm(vx) * (1 + vscale_msa) + vshift_msa
-            del vshift_msa, vscale_msa
-            attn1_out = self.attn1(norm_vx, pe=v_pe, transformer_options=transformer_options)
-            del norm_vx
-            # video cross-attention
-            vgate_msa = self.get_ada_values(self.scale_shift_table, vx.shape[0], v_timestep, slice(2, 3))[0]
-            vx.addcmul_(attn1_out, vgate_msa)
-            del vgate_msa, attn1_out
-            vx.add_(self.attn2(comfy.ldm.common_dit.rms_norm(vx), context=v_context, mask=attention_mask, transformer_options=transformer_options))
+            vx += self.attn1(norm_vx, pe=v_pe, transformer_options=transformer_options) * vgate_msa
+            vx += self.attn2(
+                comfy.ldm.common_dit.rms_norm(vx),
+                context=v_context,
+                mask=attention_mask,
+                transformer_options=transformer_options,
+            )
+
+            del vshift_msa, vscale_msa, vgate_msa

-        # audio
        if run_ax:
-            # audio self-attention
-            ashift_msa, ascale_msa = (self.get_ada_values(self.audio_scale_shift_table, ax.shape[0], a_timestep, slice(0, 2)))
-            norm_ax = comfy.ldm.common_dit.rms_norm(ax) * (1 + ascale_msa) + ashift_msa
-            del ashift_msa, ascale_msa
-            attn1_out = self.audio_attn1(norm_ax, pe=a_pe, transformer_options=transformer_options)
-            del norm_ax
-            # audio cross-attention
-            agate_msa = self.get_ada_values(self.audio_scale_shift_table, ax.shape[0], a_timestep, slice(2, 3))[0]
-            ax.addcmul_(attn1_out, agate_msa)
-            del agate_msa, attn1_out
-            ax.add_(self.audio_attn2(comfy.ldm.common_dit.rms_norm(ax), context=a_context, mask=attention_mask, transformer_options=transformer_options))
+            ashift_msa, ascale_msa, agate_msa = (
+                self.get_ada_values(self.audio_scale_shift_table, ax.shape[0], a_timestep, slice(0, 3))
+            )

-        # video - audio cross attention.
+            norm_ax = comfy.ldm.common_dit.rms_norm(ax) * (1 + ascale_msa) + ashift_msa
+            ax += (
+                self.audio_attn1(norm_ax, pe=a_pe, transformer_options=transformer_options)
+                * agate_msa
+            )
+            ax += self.audio_attn2(
+                comfy.ldm.common_dit.rms_norm(ax),
+                context=a_context,
+                mask=attention_mask,
+                transformer_options=transformer_options,
+            )
+
+            del ashift_msa, ascale_msa, agate_msa
+
+        # Audio - Video cross attention.
        if run_a2v or run_v2a:
+            # norm3
            vx_norm3 = comfy.ldm.common_dit.rms_norm(vx)
            ax_norm3 = comfy.ldm.common_dit.rms_norm(ax)

-            # audio to video cross attention
+            (
+                scale_ca_audio_hidden_states_a2v,
+                shift_ca_audio_hidden_states_a2v,
+                scale_ca_audio_hidden_states_v2a,
+                shift_ca_audio_hidden_states_v2a,
+                gate_out_v2a,
+            ) = self.get_av_ca_ada_values(
+                self.scale_shift_table_a2v_ca_audio,
+                ax.shape[0],
+                a_cross_scale_shift_timestep,
+                a_cross_gate_timestep,
+            )
+
+            (
+                scale_ca_video_hidden_states_a2v,
+                shift_ca_video_hidden_states_a2v,
+                scale_ca_video_hidden_states_v2a,
+                shift_ca_video_hidden_states_v2a,
+                gate_out_a2v,
+            ) = self.get_av_ca_ada_values(
+                self.scale_shift_table_a2v_ca_video,
+                vx.shape[0],
+                v_cross_scale_shift_timestep,
+                v_cross_gate_timestep,
+            )
+
            if run_a2v:
-                scale_ca_audio_hidden_states_a2v, shift_ca_audio_hidden_states_a2v = self.get_ada_values(
-                    self.scale_shift_table_a2v_ca_audio[:4, :], ax.shape[0], a_cross_scale_shift_timestep)[:2]
-                scale_ca_video_hidden_states_a2v_v, shift_ca_video_hidden_states_a2v_v = self.get_ada_values(
-                    self.scale_shift_table_a2v_ca_video[:4, :], vx.shape[0], v_cross_scale_shift_timestep)[:2]
+                vx_scaled = (
+                    vx_norm3 * (1 + scale_ca_video_hidden_states_a2v)
+                    + shift_ca_video_hidden_states_a2v
+                )
+                ax_scaled = (
+                    ax_norm3 * (1 + scale_ca_audio_hidden_states_a2v)
+                    + shift_ca_audio_hidden_states_a2v
+                )
+                vx += (
+                    self.audio_to_video_attn(
+                        vx_scaled,
+                        context=ax_scaled,
+                        pe=v_cross_pe,
+                        k_pe=a_cross_pe,
+                        transformer_options=transformer_options,
+                    )
+                    * gate_out_a2v
+                )

-                vx_scaled = vx_norm3 * (1 + scale_ca_video_hidden_states_a2v_v) + shift_ca_video_hidden_states_a2v_v
-                ax_scaled = ax_norm3 * (1 + scale_ca_audio_hidden_states_a2v) + shift_ca_audio_hidden_states_a2v
-                del scale_ca_video_hidden_states_a2v_v, shift_ca_video_hidden_states_a2v_v, scale_ca_audio_hidden_states_a2v, shift_ca_audio_hidden_states_a2v
+                del gate_out_a2v
+                del scale_ca_video_hidden_states_a2v,\
+                    shift_ca_video_hidden_states_a2v,\
+                    scale_ca_audio_hidden_states_a2v,\
+                    shift_ca_audio_hidden_states_a2v,\

-                a2v_out = self.audio_to_video_attn(vx_scaled, context=ax_scaled, pe=v_cross_pe, k_pe=a_cross_pe, transformer_options=transformer_options)
-                del vx_scaled, ax_scaled
-
-                gate_out_a2v = self.get_ada_values(self.scale_shift_table_a2v_ca_video[4:, :], vx.shape[0], v_cross_gate_timestep)[0]
-                vx.addcmul_(a2v_out, gate_out_a2v)
-                del gate_out_a2v, a2v_out
-
-            # video to audio cross attention
            if run_v2a:
-                scale_ca_audio_hidden_states_v2a, shift_ca_audio_hidden_states_v2a = self.get_ada_values(
-                    self.scale_shift_table_a2v_ca_audio[:4, :], ax.shape[0], a_cross_scale_shift_timestep)[2:4]
-                scale_ca_video_hidden_states_v2a, shift_ca_video_hidden_states_v2a = self.get_ada_values(
-                    self.scale_shift_table_a2v_ca_video[:4, :], vx.shape[0], v_cross_scale_shift_timestep)[2:4]
+                ax_scaled = (
+                    ax_norm3 * (1 + scale_ca_audio_hidden_states_v2a)
+                    + shift_ca_audio_hidden_states_v2a
+                )
+                vx_scaled = (
+                    vx_norm3 * (1 + scale_ca_video_hidden_states_v2a)
+                    + shift_ca_video_hidden_states_v2a
+                )
+                ax += (
+                    self.video_to_audio_attn(
+                        ax_scaled,
+                        context=vx_scaled,
+                        pe=a_cross_pe,
+                        k_pe=v_cross_pe,
+                        transformer_options=transformer_options,
+                    )
+                    * gate_out_v2a
+                )

-                ax_scaled = ax_norm3 * (1 + scale_ca_audio_hidden_states_v2a) + shift_ca_audio_hidden_states_v2a
-                vx_scaled = vx_norm3 * (1 + scale_ca_video_hidden_states_v2a) + shift_ca_video_hidden_states_v2a
-                del scale_ca_video_hidden_states_v2a, shift_ca_video_hidden_states_v2a, scale_ca_audio_hidden_states_v2a, shift_ca_audio_hidden_states_v2a
+                del gate_out_v2a
+                del scale_ca_video_hidden_states_v2a,\
+                    shift_ca_video_hidden_states_v2a,\
+                    scale_ca_audio_hidden_states_v2a,\
+                    shift_ca_audio_hidden_states_v2a

-                v2a_out = self.video_to_audio_attn(ax_scaled, context=vx_scaled, pe=a_cross_pe, k_pe=v_cross_pe, transformer_options=transformer_options)
-                del ax_scaled, vx_scaled
-
-                gate_out_v2a = self.get_ada_values(self.scale_shift_table_a2v_ca_audio[4:, :], ax.shape[0], a_cross_gate_timestep)[0]
-                ax.addcmul_(v2a_out, gate_out_v2a)
-                del gate_out_v2a, v2a_out
-
-            del vx_norm3, ax_norm3
-
-        # video feedforward
        if run_vx:
-            vshift_mlp, vscale_mlp = self.get_ada_values(self.scale_shift_table, vx.shape[0], v_timestep, slice(3, 5))
+            vshift_mlp, vscale_mlp, vgate_mlp = (
+                self.get_ada_values(self.scale_shift_table, vx.shape[0], v_timestep, slice(3, None))
+            )
+
            vx_scaled = comfy.ldm.common_dit.rms_norm(vx) * (1 + vscale_mlp) + vshift_mlp
-            del vshift_mlp, vscale_mlp
+            vx += self.ff(vx_scaled) * vgate_mlp
+            del vshift_mlp, vscale_mlp, vgate_mlp

-            ff_out = self.ff(vx_scaled)
-            del vx_scaled
-
-            vgate_mlp = self.get_ada_values(self.scale_shift_table, vx.shape[0], v_timestep, slice(5, 6))[0]
-            vx.addcmul_(ff_out, vgate_mlp)
-            del vgate_mlp, ff_out
-
-        # audio feedforward
        if run_ax:
-            ashift_mlp, ascale_mlp = self.get_ada_values(self.audio_scale_shift_table, ax.shape[0], a_timestep, slice(3, 5))
+            ashift_mlp, ascale_mlp, agate_mlp = (
+                self.get_ada_values(self.audio_scale_shift_table, ax.shape[0], a_timestep, slice(3, None))
+            )
+
            ax_scaled = comfy.ldm.common_dit.rms_norm(ax) * (1 + ascale_mlp) + ashift_mlp
-            del ashift_mlp, ascale_mlp
+            ax += self.audio_ff(ax_scaled) * agate_mlp

-            ff_out = self.audio_ff(ax_scaled)
-            del ax_scaled
+            del ashift_mlp, ascale_mlp, agate_mlp

-            agate_mlp = self.get_ada_values(self.audio_scale_shift_table, ax.shape[0], a_timestep, slice(5, 6))[0]
-            ax.addcmul_(ff_out, agate_mlp)
-            del agate_mlp, ff_out

        return vx, ax

@@ -534,20 +589,9 @@ class LTXAVModel(LTXVModel):
        audio_length = kwargs.get("audio_length", 0)
        # Separate audio and video latents
        vx, ax = self.separate_audio_and_video_latents(x, audio_length)
-
-        has_spatial_mask = False
-        if denoise_mask is not None:
-            # check if any frame has spatial variation (inpainting)
-            for frame_idx in range(denoise_mask.shape[2]):
-                frame_mask = denoise_mask[0, 0, frame_idx]
-                if frame_mask.numel() > 0 and frame_mask.min() != frame_mask.max():
-                    has_spatial_mask = True
-                    break
-
        [vx, v_pixel_coords, additional_args] = super()._process_input(
            vx, keyframe_idxs, denoise_mask, **kwargs
        )
-        additional_args["has_spatial_mask"] = has_spatial_mask

        ax, a_latent_coords = self.a_patchifier.patchify(ax)
        ax = self.audio_patchify_proj(ax)
@@ -574,9 +618,8 @@ class LTXAVModel(LTXVModel):
        # Calculate patches_per_frame from orig_shape: [batch, channels, frames, height, width]
        # Video tokens are arranged as (frames * height * width), so patches_per_frame = height * width
        orig_shape = kwargs.get("orig_shape")
-        has_spatial_mask = kwargs.get("has_spatial_mask", None)
        v_patches_per_frame = None
-        if not has_spatial_mask and orig_shape is not None and len(orig_shape) == 5:
+        if orig_shape is not None and len(orig_shape) == 5:
            # orig_shape[3] = height, orig_shape[4] = width (in latent space)
            v_patches_per_frame = orig_shape[3] * orig_shape[4]

@@ -619,11 +662,10 @@ class LTXAVModel(LTXVModel):
            )

            # Compress cross-attention timesteps (only video side, audio is too small to benefit)
-            # v_patches_per_frame is None for spatial masks, set for temporal masks or no mask
            cross_av_timestep_ss = [
                av_ca_audio_scale_shift_timestep.view(batch_size, -1, av_ca_audio_scale_shift_timestep.shape[-1]),
-                CompressedTimestep(av_ca_video_scale_shift_timestep.view(batch_size, -1, av_ca_video_scale_shift_timestep.shape[-1]), v_patches_per_frame),  # video - compressed if possible
-                CompressedTimestep(av_ca_a2v_gate_noise_timestep.view(batch_size, -1, av_ca_a2v_gate_noise_timestep.shape[-1]), v_patches_per_frame),  # video - compressed if possible
+                CompressedTimestep(av_ca_video_scale_shift_timestep.view(batch_size, -1, av_ca_video_scale_shift_timestep.shape[-1]), v_patches_per_frame),  # video - compressed
+                CompressedTimestep(av_ca_a2v_gate_noise_timestep.view(batch_size, -1, av_ca_a2v_gate_noise_timestep.shape[-1]), v_patches_per_frame),  # video - compressed
                av_ca_v2a_gate_noise_timestep.view(batch_size, -1, av_ca_v2a_gate_noise_timestep.shape[-1]),
            ]

--- a/comfy/ldm/lightricks/vae/audio_vae.py
+++ b/comfy/ldm/lightricks/vae/audio_vae.py
@@ -103,10 +103,20 @@ class AudioPreprocessor:
            return waveform
        return torchaudio.functional.resample(waveform, source_rate, self.target_sample_rate)

+    @staticmethod
+    def normalize_amplitude(
+        waveform: torch.Tensor, max_amplitude: float = 0.5, eps: float = 1e-5
+    ) -> torch.Tensor:
+        waveform = waveform - waveform.mean(dim=2, keepdim=True)
+        peak = torch.max(torch.abs(waveform)) + eps
+        scale = peak.clamp(max=max_amplitude) / peak
+        return waveform * scale
+
    def waveform_to_mel(
        self, waveform: torch.Tensor, waveform_sample_rate: int, device
    ) -> torch.Tensor:
        waveform = self.resample(waveform, waveform_sample_rate)
+        waveform = self.normalize_amplitude(waveform)

        mel_transform = torchaudio.transforms.MelSpectrogram(
            sample_rate=self.target_sample_rate,
@@ -179,12 +189,9 @@ class AudioVAE(torch.nn.Module):
        waveform = self.device_manager.move_to_load_device(waveform)
        expected_channels = self.autoencoder.encoder.in_channels
        if waveform.shape[1] != expected_channels:
-            if waveform.shape[1] == 1:
-                waveform = waveform.expand(-1, expected_channels, *waveform.shape[2:])
-            else:
-                raise ValueError(
-                    f"Input audio must have {expected_channels} channels, got {waveform.shape[1]}"
-                )
+            raise ValueError(
+                f"Input audio must have {expected_channels} channels, got {waveform.shape[1]}"
+            )

        mel_spec = self.preprocessor.waveform_to_mel(
            waveform, waveform_sample_rate, device=self.device_manager.load_device
--- a/comfy/ldm/lightricks/vae/causal_conv3d.py
+++ b/comfy/ldm/lightricks/vae/causal_conv3d.py
@@ -1,11 +1,11 @@
 from typing import Tuple, Union

-import threading
 import torch
 import torch.nn as nn
 import comfy.ops
 ops = comfy.ops.disable_weight_init

+
 class CausalConv3d(nn.Module):
    def __init__(
        self,
@@ -42,34 +42,23 @@ class CausalConv3d(nn.Module):
            padding_mode=spatial_padding_mode,
            groups=groups,
        )
-        self.temporal_cache_state={}

    def forward(self, x, causal: bool = True):
-        tid = threading.get_ident()
-
-        cached, is_end = self.temporal_cache_state.get(tid, (None, False))
-        if cached is None:
-            padding_length = self.time_kernel_size - 1
-            if not causal:
-                padding_length = padding_length // 2
-            if x.shape[2] == 0:
-                return x
-            cached = x[:, :, :1, :, :].repeat((1, 1, padding_length, 1, 1))
-        pieces = [ cached, x ]
-        if is_end and not causal:
-            pieces.append(x[:, :, -1:, :, :].repeat((1, 1, (self.time_kernel_size - 1) // 2, 1, 1)))
-
-        needs_caching = not is_end
-        if needs_caching and x.shape[2] >= self.time_kernel_size - 1:
-            needs_caching = False
-            self.temporal_cache_state[tid] = (x[:, :, -(self.time_kernel_size - 1):, :, :], False)
-
-        x = torch.cat(pieces, dim=2)
-
-        if needs_caching:
-            self.temporal_cache_state[tid] = (x[:, :, -(self.time_kernel_size - 1):, :, :], False)
-
-        return self.conv(x) if x.shape[2] >= self.time_kernel_size else x[:, :, :0, :, :]
+        if causal:
+            first_frame_pad = x[:, :, :1, :, :].repeat(
+                (1, 1, self.time_kernel_size - 1, 1, 1)
+            )
+            x = torch.concatenate((first_frame_pad, x), dim=2)
+        else:
+            first_frame_pad = x[:, :, :1, :, :].repeat(
+                (1, 1, (self.time_kernel_size - 1) // 2, 1, 1)
+            )
+            last_frame_pad = x[:, :, -1:, :, :].repeat(
+                (1, 1, (self.time_kernel_size - 1) // 2, 1, 1)
+            )
+            x = torch.concatenate((first_frame_pad, x, last_frame_pad), dim=2)
+        x = self.conv(x)
+        return x

    @property
    def weight(self):
--- a/comfy/ldm/lightricks/vae/causal_video_autoencoder.py
+++ b/comfy/ldm/lightricks/vae/causal_video_autoencoder.py
@@ -1,5 +1,4 @@
 from __future__ import annotations
-import threading
 import torch
 from torch import nn
 from functools import partial
@@ -7,35 +6,12 @@ import math
 from einops import rearrange
 from typing import List, Optional, Tuple, Union
 from .conv_nd_factory import make_conv_nd, make_linear_nd
-from .causal_conv3d import CausalConv3d
 from .pixel_norm import PixelNorm
 from ..model import PixArtAlphaCombinedTimestepSizeEmbeddings
 import comfy.ops
-from comfy.ldm.modules.diffusionmodules.model import torch_cat_if_needed

 ops = comfy.ops.disable_weight_init

-def mark_conv3d_ended(module):
-    tid = threading.get_ident()
-    for _, m in module.named_modules():
-        if isinstance(m, CausalConv3d):
-            current = m.temporal_cache_state.get(tid, (None, False))
-            m.temporal_cache_state[tid] = (current[0], True)
-
-def split2(tensor, split_point, dim=2):
-    return torch.split(tensor, [split_point, tensor.shape[dim] - split_point], dim=dim)
-
-def add_exchange_cache(dest, cache_in, new_input, dim=2):
-    if dest is not None:
-        if cache_in is not None:
-            cache_to_dest = min(dest.shape[dim], cache_in.shape[dim])
-            lead_in_dest, dest = split2(dest, cache_to_dest, dim=dim)
-            lead_in_source, cache_in = split2(cache_in, cache_to_dest, dim=dim)
-            lead_in_dest.add_(lead_in_source)
-        body, new_input = split2(new_input, dest.shape[dim], dim)
-        dest.add_(body)
-    return torch_cat_if_needed([cache_in, new_input], dim=dim)
-
 class Encoder(nn.Module):
    r"""
    The `Encoder` layer of a variational autoencoder that encodes its input into a latent representation.
@@ -229,7 +205,7 @@ class Encoder(nn.Module):

        self.gradient_checkpointing = False

-    def forward_orig(self, sample: torch.FloatTensor) -> torch.FloatTensor:
+    def forward(self, sample: torch.FloatTensor) -> torch.FloatTensor:
        r"""The forward method of the `Encoder` class."""

        sample = patchify(sample, patch_size_hw=self.patch_size, patch_size_t=1)
@@ -278,22 +254,6 @@ class Encoder(nn.Module):

        return sample

-    def forward(self, *args, **kwargs):
-        #No encoder support so just flag the end so it doesnt use the cache.
-        mark_conv3d_ended(self)
-        try:
-            return self.forward_orig(*args, **kwargs)
-        finally:
-            tid = threading.get_ident()
-            for _, module in self.named_modules():
-                # ComfyUI doesn't thread this kind of stuff today, but just in case
-                # we key on the thread to make it thread safe.
-                tid = threading.get_ident()
-                if hasattr(module, "temporal_cache_state"):
-                    module.temporal_cache_state.pop(tid, None)
-
-
-MAX_CHUNK_SIZE=(128 * 1024 ** 2)

 class Decoder(nn.Module):
    r"""
@@ -381,6 +341,18 @@ class Decoder(nn.Module):
                    timestep_conditioning=timestep_conditioning,
                    spatial_padding_mode=spatial_padding_mode,
                )
+            elif block_name == "attn_res_x":
+                block = UNetMidBlock3D(
+                    dims=dims,
+                    in_channels=input_channel,
+                    num_layers=block_params["num_layers"],
+                    resnet_groups=norm_num_groups,
+                    norm_layer=norm_layer,
+                    inject_noise=block_params.get("inject_noise", False),
+                    timestep_conditioning=timestep_conditioning,
+                    attention_head_dim=block_params["attention_head_dim"],
+                    spatial_padding_mode=spatial_padding_mode,
+                )
            elif block_name == "res_x_y":
                output_channel = output_channel // block_params.get("multiplier", 2)
                block = ResnetBlock3D(
@@ -456,9 +428,8 @@ class Decoder(nn.Module):
            )
            self.last_scale_shift_table = nn.Parameter(torch.empty(2, output_channel))

-
    # def forward(self, sample: torch.FloatTensor, target_shape) -> torch.FloatTensor:
-    def forward_orig(
+    def forward(
        self,
        sample: torch.FloatTensor,
        timestep: Optional[torch.Tensor] = None,
@@ -466,7 +437,6 @@ class Decoder(nn.Module):
        r"""The forward method of the `Decoder` class."""
        batch_size = sample.shape[0]

-        mark_conv3d_ended(self.conv_in)
        sample = self.conv_in(sample, causal=self.causal)

        checkpoint_fn = (
@@ -475,12 +445,24 @@ class Decoder(nn.Module):
            else lambda x: x
        )

-        timestep_shift_scale = None
+        scaled_timestep = None
        if self.timestep_conditioning:
            assert (
                timestep is not None
            ), "should pass timestep with timestep_conditioning=True"
            scaled_timestep = timestep * self.timestep_scale_multiplier.to(dtype=sample.dtype, device=sample.device)
+
+        for up_block in self.up_blocks:
+            if self.timestep_conditioning and isinstance(up_block, UNetMidBlock3D):
+                sample = checkpoint_fn(up_block)(
+                    sample, causal=self.causal, timestep=scaled_timestep
+                )
+            else:
+                sample = checkpoint_fn(up_block)(sample, causal=self.causal)
+
+        sample = self.conv_norm_out(sample)
+
+        if self.timestep_conditioning:
            embedded_timestep = self.last_time_embedder(
                timestep=scaled_timestep.flatten(),
                resolution=None,
@@ -501,62 +483,16 @@ class Decoder(nn.Module):
                embedded_timestep.shape[-2],
                embedded_timestep.shape[-1],
            )
-            timestep_shift_scale = ada_values.unbind(dim=1)
+            shift, scale = ada_values.unbind(dim=1)
+            sample = sample * (1 + scale) + shift

-        output = []
-
-        def run_up(idx, sample, ended):
-            if idx >= len(self.up_blocks):
-                sample = self.conv_norm_out(sample)
-                if timestep_shift_scale is not None:
-                    shift, scale = timestep_shift_scale
-                    sample = sample * (1 + scale) + shift
-                sample = self.conv_act(sample)
-                if ended:
-                    mark_conv3d_ended(self.conv_out)
-                sample = self.conv_out(sample, causal=self.causal)
-                if sample is not None and sample.shape[2] > 0:
-                    output.append(sample)
-                return
-
-            up_block = self.up_blocks[idx]
-            if (ended):
-                mark_conv3d_ended(up_block)
-            if self.timestep_conditioning and isinstance(up_block, UNetMidBlock3D):
-                sample = checkpoint_fn(up_block)(
-                    sample, causal=self.causal, timestep=scaled_timestep
-                )
-            else:
-                sample = checkpoint_fn(up_block)(sample, causal=self.causal)
-
-            if sample is None or sample.shape[2] == 0:
-                return
-
-            total_bytes = sample.numel() * sample.element_size()
-            num_chunks = (total_bytes + MAX_CHUNK_SIZE - 1) // MAX_CHUNK_SIZE
-            samples = torch.chunk(sample, chunks=num_chunks, dim=2)
-
-            for chunk_idx, sample1 in enumerate(samples):
-                run_up(idx + 1, sample1, ended and chunk_idx == len(samples) - 1)
-
-        run_up(0, sample, True)
-        sample = torch.cat(output, dim=2)
+        sample = self.conv_act(sample)
+        sample = self.conv_out(sample, causal=self.causal)

        sample = unpatchify(sample, patch_size_hw=self.patch_size, patch_size_t=1)

        return sample

-    def forward(self, *args, **kwargs):
-        try:
-            return self.forward_orig(*args, **kwargs)
-        finally:
-            for _, module in self.named_modules():
-                #ComfyUI doesn't thread this kind of stuff today, but just incase
-                #we key on the thread to make it thread safe.
-                tid = threading.get_ident()
-                if hasattr(module, "temporal_cache_state"):
-                    module.temporal_cache_state.pop(tid, None)
-

 class UNetMidBlock3D(nn.Module):
    """
@@ -727,22 +663,8 @@ class DepthToSpaceUpsample(nn.Module):
        )
        self.residual = residual
        self.out_channels_reduction_factor = out_channels_reduction_factor
-        self.temporal_cache_state = {}

    def forward(self, x, causal: bool = True, timestep: Optional[torch.Tensor] = None):
-        tid = threading.get_ident()
-        cached, drop_first_conv, drop_first_res = self.temporal_cache_state.get(tid, (None, True, True))
-        y = self.conv(x, causal=causal)
-        y = rearrange(
-            y,
-            "b (c p1 p2 p3) d h w -> b c (d p1) (h p2) (w p3)",
-            p1=self.stride[0],
-            p2=self.stride[1],
-            p3=self.stride[2],
-        )
-        if self.stride[0] == 2 and y.shape[2] > 0 and drop_first_conv:
-            y = y[:, :, 1:, :, :]
-            drop_first_conv = False
        if self.residual:
            # Reshape and duplicate the input to match the output shape
            x_in = rearrange(
@@ -754,20 +676,21 @@ class DepthToSpaceUpsample(nn.Module):
            )
            num_repeat = math.prod(self.stride) // self.out_channels_reduction_factor
            x_in = x_in.repeat(1, num_repeat, 1, 1, 1)
-            if self.stride[0] == 2 and x_in.shape[2] > 0 and drop_first_res:
+            if self.stride[0] == 2:
                x_in = x_in[:, :, 1:, :, :]
-                drop_first_res = False
-
-            if y.shape[2] == 0:
-                y = None
-
-            cached = add_exchange_cache(y, cached, x_in, dim=2)
-            self.temporal_cache_state[tid] = (cached, drop_first_conv, drop_first_res)
-
-        else:
-            self.temporal_cache_state[tid] = (None, drop_first_conv, False)
-
-        return y
+        x = self.conv(x, causal=causal)
+        x = rearrange(
+            x,
+            "b (c p1 p2 p3) d h w -> b c (d p1) (h p2) (w p3)",
+            p1=self.stride[0],
+            p2=self.stride[1],
+            p3=self.stride[2],
+        )
+        if self.stride[0] == 2:
+            x = x[:, :, 1:, :, :]
+        if self.residual:
+            x = x + x_in
+        return x

 class LayerNorm(nn.Module):
    def __init__(self, dim, eps, elementwise_affine=True) -> None:
@@ -884,8 +807,6 @@ class ResnetBlock3D(nn.Module):
                torch.randn(4, in_channels) / in_channels**0.5
            )

-        self.temporal_cache_state={}
-
    def _feed_spatial_noise(
        self, hidden_states: torch.FloatTensor, per_channel_scale: torch.FloatTensor
    ) -> torch.FloatTensor:
@@ -959,12 +880,9 @@ class ResnetBlock3D(nn.Module):

        input_tensor = self.conv_shortcut(input_tensor)

-        tid = threading.get_ident()
-        cached = self.temporal_cache_state.get(tid, None)
-        cached = add_exchange_cache(hidden_states, cached, input_tensor, dim=2)
-        self.temporal_cache_state[tid] = cached
+        output_tensor = input_tensor + hidden_states

-        return hidden_states
+        return output_tensor


 def patchify(x, patch_size_hw, patch_size_t=1):
--- a/comfy/ldm/lumina/model.py
+++ b/comfy/ldm/lumina/model.py
@@ -13,53 +13,10 @@ from comfy.ldm.modules.attention import optimized_attention_masked
 from comfy.ldm.flux.layers import EmbedND
 from comfy.ldm.flux.math import apply_rope
 import comfy.patcher_extension
-import comfy.utils


-def invert_slices(slices, length):
-    sorted_slices = sorted(slices)
-    result = []
-    current = 0
-
-    for start, end in sorted_slices:
-        if current < start:
-            result.append((current, start))
-        current = max(current, end)
-
-    if current < length:
-        result.append((current, length))
-
-    return result
-
-
-def modulate(x, scale, timestep_zero_index=None):
-    if timestep_zero_index is None:
-        return x * (1 + scale.unsqueeze(1))
-    else:
-        scale = (1 + scale.unsqueeze(1))
-        actual_batch = scale.size(0) // 2
-        slices = timestep_zero_index
-        invert = invert_slices(timestep_zero_index, x.shape[1])
-        for s in slices:
-            x[:, s[0]:s[1]] *= scale[actual_batch:]
-        for s in invert:
-            x[:, s[0]:s[1]] *= scale[:actual_batch]
-        return x
-
-
-def apply_gate(gate, x, timestep_zero_index=None):
-    if timestep_zero_index is None:
-        return gate * x
-    else:
-        actual_batch = gate.size(0) // 2
-
-        slices = timestep_zero_index
-        invert = invert_slices(timestep_zero_index, x.shape[1])
-        for s in slices:
-            x[:, s[0]:s[1]] *= gate[actual_batch:]
-        for s in invert:
-            x[:, s[0]:s[1]] *= gate[:actual_batch]
-        return x
+def modulate(x, scale):
+    return x * (1 + scale.unsqueeze(1))

 #############################################################################
 #                               Core NextDiT Model                              #
@@ -301,7 +258,6 @@ class JointTransformerBlock(nn.Module):
        x_mask: torch.Tensor,
        freqs_cis: torch.Tensor,
        adaln_input: Optional[torch.Tensor]=None,
-        timestep_zero_index=None,
        transformer_options={},
    ):
        """
@@ -320,18 +276,18 @@ class JointTransformerBlock(nn.Module):
            assert adaln_input is not None
            scale_msa, gate_msa, scale_mlp, gate_mlp = self.adaLN_modulation(adaln_input).chunk(4, dim=1)

-            x = x + apply_gate(gate_msa.unsqueeze(1).tanh(), self.attention_norm2(
+            x = x + gate_msa.unsqueeze(1).tanh() * self.attention_norm2(
                clamp_fp16(self.attention(
-                    modulate(self.attention_norm1(x), scale_msa, timestep_zero_index=timestep_zero_index),
+                    modulate(self.attention_norm1(x), scale_msa),
                    x_mask,
                    freqs_cis,
                    transformer_options=transformer_options,
-                ))), timestep_zero_index=timestep_zero_index
+                ))
            )
-            x = x + apply_gate(gate_mlp.unsqueeze(1).tanh(), self.ffn_norm2(
+            x = x + gate_mlp.unsqueeze(1).tanh() * self.ffn_norm2(
                clamp_fp16(self.feed_forward(
-                    modulate(self.ffn_norm1(x), scale_mlp, timestep_zero_index=timestep_zero_index),
-                ))), timestep_zero_index=timestep_zero_index
+                    modulate(self.ffn_norm1(x), scale_mlp),
+                ))
            )
        else:
            assert adaln_input is None
@@ -389,37 +345,13 @@ class FinalLayer(nn.Module):
            ),
        )

-    def forward(self, x, c, timestep_zero_index=None):
+    def forward(self, x, c):
        scale = self.adaLN_modulation(c)
-        x = modulate(self.norm_final(x), scale, timestep_zero_index=timestep_zero_index)
+        x = modulate(self.norm_final(x), scale)
        x = self.linear(x)
        return x


-def pad_zimage(feats, pad_token, pad_tokens_multiple):
-    pad_extra = (-feats.shape[1]) % pad_tokens_multiple
-    return torch.cat((feats, pad_token.to(device=feats.device, dtype=feats.dtype, copy=True).unsqueeze(0).repeat(feats.shape[0], pad_extra, 1)), dim=1), pad_extra
-
-
-def pos_ids_x(start_t, H_tokens, W_tokens, batch_size, device, transformer_options={}):
-    rope_options = transformer_options.get("rope_options", None)
-    h_scale = 1.0
-    w_scale = 1.0
-    h_start = 0
-    w_start = 0
-    if rope_options is not None:
-        h_scale = rope_options.get("scale_y", 1.0)
-        w_scale = rope_options.get("scale_x", 1.0)
-
-        h_start = rope_options.get("shift_y", 0.0)
-        w_start = rope_options.get("shift_x", 0.0)
-    x_pos_ids = torch.zeros((batch_size, H_tokens * W_tokens, 3), dtype=torch.float32, device=device)
-    x_pos_ids[:, :, 0] = start_t
-    x_pos_ids[:, :, 1] = (torch.arange(H_tokens, dtype=torch.float32, device=device) * h_scale + h_start).view(-1, 1).repeat(1, W_tokens).flatten()
-    x_pos_ids[:, :, 2] = (torch.arange(W_tokens, dtype=torch.float32, device=device) * w_scale + w_start).view(1, -1).repeat(H_tokens, 1).flatten()
-    return x_pos_ids
-
-
 class NextDiT(nn.Module):
    """
    Diffusion model with a Transformer backbone.
@@ -446,12 +378,10 @@ class NextDiT(nn.Module):
        time_scale=1.0,
        pad_tokens_multiple=None,
        clip_text_dim=None,
-        siglip_feat_dim=None,
        image_model=None,
        device=None,
        dtype=None,
        operations=None,
-        **kwargs,
    ) -> None:
        super().__init__()
        self.dtype = dtype
@@ -561,41 +491,6 @@ class NextDiT(nn.Module):
                for layer_id in range(n_layers)
            ]
        )
-
-        if siglip_feat_dim is not None:
-            self.siglip_embedder = nn.Sequential(
-                operation_settings.get("operations").RMSNorm(siglip_feat_dim, eps=norm_eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")),
-                operation_settings.get("operations").Linear(
-                    siglip_feat_dim,
-                    dim,
-                    bias=True,
-                    device=operation_settings.get("device"),
-                    dtype=operation_settings.get("dtype"),
-                ),
-            )
-            self.siglip_refiner = nn.ModuleList(
-                [
-                    JointTransformerBlock(
-                        layer_id,
-                        dim,
-                        n_heads,
-                        n_kv_heads,
-                        multiple_of,
-                        ffn_dim_multiplier,
-                        norm_eps,
-                        qk_norm,
-                        modulation=False,
-                        operation_settings=operation_settings,
-                    )
-                    for layer_id in range(n_refiner_layers)
-                ]
-            )
-            self.siglip_pad_token = nn.Parameter(torch.empty((1, dim), device=device, dtype=dtype))
-        else:
-            self.siglip_embedder = None
-            self.siglip_refiner = None
-            self.siglip_pad_token = None
-
        # This norm final is in the lumina 2.0 code but isn't actually used for anything.
        # self.norm_final = operation_settings.get("operations").RMSNorm(dim, eps=norm_eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
        self.final_layer = FinalLayer(dim, patch_size, self.out_channels, z_image_modulation=z_image_modulation, operation_settings=operation_settings)
@@ -636,168 +531,70 @@ class NextDiT(nn.Module):
            imgs = torch.stack(imgs, dim=0)
        return imgs

-    def embed_cap(self, cap_feats=None, offset=0, bsz=1, device=None, dtype=None):
-        if cap_feats is not None:
-            cap_feats = self.cap_embedder(cap_feats)
-            cap_feats_len = cap_feats.shape[1]
-            if self.pad_tokens_multiple is not None:
-                cap_feats, _ = pad_zimage(cap_feats, self.cap_pad_token, self.pad_tokens_multiple)
-        else:
-            cap_feats_len = 0
-            cap_feats = self.cap_pad_token.to(device=device, dtype=dtype, copy=True).unsqueeze(0).repeat(bsz, self.pad_tokens_multiple, 1)
+    def patchify_and_embed(
+        self, x: List[torch.Tensor] | torch.Tensor, cap_feats: torch.Tensor, cap_mask: torch.Tensor, t: torch.Tensor, num_tokens, transformer_options={}
+    ) -> Tuple[torch.Tensor, torch.Tensor, List[Tuple[int, int]], List[int], torch.Tensor]:
+        bsz = len(x)
+        pH = pW = self.patch_size
+        device = x[0].device
+        orig_x = x
+
+        if self.pad_tokens_multiple is not None:
+            pad_extra = (-cap_feats.shape[1]) % self.pad_tokens_multiple
+            cap_feats = torch.cat((cap_feats, self.cap_pad_token.to(device=cap_feats.device, dtype=cap_feats.dtype, copy=True).unsqueeze(0).repeat(cap_feats.shape[0], pad_extra, 1)), dim=1)

        cap_pos_ids = torch.zeros(bsz, cap_feats.shape[1], 3, dtype=torch.float32, device=device)
-        cap_pos_ids[:, :, 0] = torch.arange(cap_feats.shape[1], dtype=torch.float32, device=device) + 1.0 + offset
-        embeds = (cap_feats,)
-        freqs_cis = (self.rope_embedder(cap_pos_ids).movedim(1, 2),)
-        return embeds, freqs_cis, cap_feats_len
-
-    def embed_all(self, x, cap_feats=None, siglip_feats=None, offset=0, omni=False, transformer_options={}):
-        bsz = 1
-        pH = pW = self.patch_size
-        device = x.device
-        embeds, freqs_cis, cap_feats_len = self.embed_cap(cap_feats, offset=offset, bsz=bsz, device=device, dtype=x.dtype)
-
-        if (not omni) or self.siglip_embedder is None:
-            cap_feats_len = embeds[0].shape[1] + offset
-            embeds += (None,)
-            freqs_cis += (None,)
-        else:
-            cap_feats_len += offset
-            if siglip_feats is not None:
-                b, h, w, c = siglip_feats.shape
-                siglip_feats = siglip_feats.permute(0, 3, 1, 2).reshape(b, h * w, c)
-                siglip_feats = self.siglip_embedder(siglip_feats)
-                siglip_pos_ids = torch.zeros((bsz, siglip_feats.shape[1], 3), dtype=torch.float32, device=device)
-                siglip_pos_ids[:, :, 0] = cap_feats_len + 2
-                siglip_pos_ids[:, :, 1] = (torch.linspace(0, h * 8 - 1, steps=h, dtype=torch.float32, device=device).floor()).view(-1, 1).repeat(1, w).flatten()
-                siglip_pos_ids[:, :, 2] = (torch.linspace(0, w * 8 - 1, steps=w, dtype=torch.float32, device=device).floor()).view(1, -1).repeat(h, 1).flatten()
-                if self.siglip_pad_token is not None:
-                    siglip_feats, pad_extra = pad_zimage(siglip_feats, self.siglip_pad_token, self.pad_tokens_multiple)  # TODO: double check
-                    siglip_pos_ids = torch.nn.functional.pad(siglip_pos_ids, (0, 0, 0, pad_extra))
-            else:
-                if self.siglip_pad_token is not None:
-                    siglip_feats = self.siglip_pad_token.to(device=device, dtype=x.dtype, copy=True).unsqueeze(0).repeat(bsz, self.pad_tokens_multiple, 1)
-                    siglip_pos_ids = torch.zeros((bsz, siglip_feats.shape[1], 3), dtype=torch.float32, device=device)
-
-            if siglip_feats is None:
-                embeds += (None,)
-                freqs_cis += (None,)
-            else:
-                embeds += (siglip_feats,)
-                freqs_cis += (self.rope_embedder(siglip_pos_ids).movedim(1, 2),)
+        cap_pos_ids[:, :, 0] = torch.arange(cap_feats.shape[1], dtype=torch.float32, device=device) + 1.0

        B, C, H, W = x.shape
        x = self.x_embedder(x.view(B, C, H // pH, pH, W // pW, pW).permute(0, 2, 4, 3, 5, 1).flatten(3).flatten(1, 2))
-        x_pos_ids = pos_ids_x(cap_feats_len + 1, H // pH, W // pW, bsz, device, transformer_options=transformer_options)
+
+        rope_options = transformer_options.get("rope_options", None)
+        h_scale = 1.0
+        w_scale = 1.0
+        h_start = 0
+        w_start = 0
+        if rope_options is not None:
+            h_scale = rope_options.get("scale_y", 1.0)
+            w_scale = rope_options.get("scale_x", 1.0)
+
+            h_start = rope_options.get("shift_y", 0.0)
+            w_start = rope_options.get("shift_x", 0.0)
+
+        H_tokens, W_tokens = H // pH, W // pW
+        x_pos_ids = torch.zeros((bsz, x.shape[1], 3), dtype=torch.float32, device=device)
+        x_pos_ids[:, :, 0] = cap_feats.shape[1] + 1
+        x_pos_ids[:, :, 1] = (torch.arange(H_tokens, dtype=torch.float32, device=device) * h_scale + h_start).view(-1, 1).repeat(1, W_tokens).flatten()
+        x_pos_ids[:, :, 2] = (torch.arange(W_tokens, dtype=torch.float32, device=device) * w_scale + w_start).view(1, -1).repeat(H_tokens, 1).flatten()
+
        if self.pad_tokens_multiple is not None:
-            x, pad_extra = pad_zimage(x, self.x_pad_token, self.pad_tokens_multiple)
+            pad_extra = (-x.shape[1]) % self.pad_tokens_multiple
+            x = torch.cat((x, self.x_pad_token.to(device=x.device, dtype=x.dtype, copy=True).unsqueeze(0).repeat(x.shape[0], pad_extra, 1)), dim=1)
            x_pos_ids = torch.nn.functional.pad(x_pos_ids, (0, 0, 0, pad_extra))

-        embeds += (x,)
-        freqs_cis += (self.rope_embedder(x_pos_ids).movedim(1, 2),)
-        return embeds, freqs_cis, cap_feats_len + len(freqs_cis) - 1
-
-
-    def patchify_and_embed(
-        self, x: torch.Tensor, cap_feats: torch.Tensor, cap_mask: torch.Tensor, t: torch.Tensor, num_tokens, ref_latents=[], ref_contexts=[], siglip_feats=[], transformer_options={}
-    ) -> Tuple[torch.Tensor, torch.Tensor, List[Tuple[int, int]], List[int], torch.Tensor]:
-        bsz = x.shape[0]
-        cap_mask = None  # TODO?
-        main_siglip = None
-        orig_x = x
-
-        embeds = ([], [], [])
-        freqs_cis = ([], [], [])
-        leftover_cap = []
-
-        start_t = 0
-        omni = len(ref_latents) > 0
-        if omni:
-            for i, ref in enumerate(ref_latents):
-                if i < len(ref_contexts):
-                    ref_con = ref_contexts[i]
-                else:
-                    ref_con = None
-                if i < len(siglip_feats):
-                    sig_feat = siglip_feats[i]
-                else:
-                    sig_feat = None
-
-                out = self.embed_all(ref, ref_con, sig_feat, offset=start_t, omni=omni, transformer_options=transformer_options)
-                for i, e in enumerate(out[0]):
-                    if e is not None:
-                        embeds[i].append(comfy.utils.repeat_to_batch_size(e, bsz))
-                        freqs_cis[i].append(out[1][i])
-                start_t = out[2]
-            leftover_cap = ref_contexts[len(ref_latents):]
-
-        H, W = x.shape[-2], x.shape[-1]
-        img_sizes = [(H, W)] * bsz
-        out = self.embed_all(x, cap_feats, main_siglip, offset=start_t, omni=omni, transformer_options=transformer_options)
-        img_len = out[0][-1].shape[1]
-        cap_len = out[0][0].shape[1]
-        for i, e in enumerate(out[0]):
-            if e is not None:
-                e = comfy.utils.repeat_to_batch_size(e, bsz)
-                embeds[i].append(e)
-                freqs_cis[i].append(out[1][i])
-        start_t = out[2]
-
-        for cap in leftover_cap:
-            out = self.embed_cap(cap, offset=start_t, bsz=bsz, device=x.device, dtype=x.dtype)
-            cap_len += out[0][0].shape[1]
-            embeds[0].append(comfy.utils.repeat_to_batch_size(out[0][0], bsz))
-            freqs_cis[0].append(out[1][0])
-            start_t += out[2]
+        freqs_cis = self.rope_embedder(torch.cat((cap_pos_ids, x_pos_ids), dim=1)).movedim(1, 2)

        patches = transformer_options.get("patches", {})

        # refine context
-        cap_feats = torch.cat(embeds[0], dim=1)
-        cap_freqs_cis = torch.cat(freqs_cis[0], dim=1)
        for layer in self.context_refiner:
-            cap_feats = layer(cap_feats, cap_mask, cap_freqs_cis, transformer_options=transformer_options)
-
-        feats = (cap_feats,)
-        fc = (cap_freqs_cis,)
-
-        if omni and len(embeds[1]) > 0:
-            siglip_mask = None
-            siglip_feats_combined = torch.cat(embeds[1], dim=1)
-            siglip_feats_freqs_cis = torch.cat(freqs_cis[1], dim=1)
-            if self.siglip_refiner is not None:
-                for layer in self.siglip_refiner:
-                    siglip_feats_combined = layer(siglip_feats_combined, siglip_mask, siglip_feats_freqs_cis, transformer_options=transformer_options)
-            feats += (siglip_feats_combined,)
-            fc += (siglip_feats_freqs_cis,)
+            cap_feats = layer(cap_feats, cap_mask, freqs_cis[:, :cap_pos_ids.shape[1]], transformer_options=transformer_options)

        padded_img_mask = None
-        x = torch.cat(embeds[-1], dim=1)
-        fc_x = torch.cat(freqs_cis[-1], dim=1)
-        if omni:
-            timestep_zero_index = [(x.shape[1] - img_len, x.shape[1])]
-        else:
-            timestep_zero_index = None
-
        x_input = x
        for i, layer in enumerate(self.noise_refiner):
-            x = layer(x, padded_img_mask, fc_x, t, timestep_zero_index=timestep_zero_index, transformer_options=transformer_options)
+            x = layer(x, padded_img_mask, freqs_cis[:, cap_pos_ids.shape[1]:], t, transformer_options=transformer_options)
            if "noise_refiner" in patches:
                for p in patches["noise_refiner"]:
-                    out = p({"img": x, "img_input": x_input, "txt": cap_feats, "pe": fc_x, "vec": t, "x": orig_x, "block_index": i, "transformer_options": transformer_options, "block_type": "noise_refiner"})
+                    out = p({"img": x, "img_input": x_input, "txt": cap_feats, "pe": freqs_cis[:, cap_pos_ids.shape[1]:], "vec": t, "x": orig_x, "block_index": i, "transformer_options": transformer_options, "block_type": "noise_refiner"})
                    if "img" in out:
                        x = out["img"]

-        padded_full_embed = torch.cat(feats + (x,), dim=1)
-        if timestep_zero_index is not None:
-            ind = padded_full_embed.shape[1] - x.shape[1]
-            timestep_zero_index = [(ind + x.shape[1] - img_len, ind + x.shape[1])]
-            timestep_zero_index.append((feats[0].shape[1] - cap_len, feats[0].shape[1]))
-
+        padded_full_embed = torch.cat((cap_feats, x), dim=1)
        mask = None
-        l_effective_cap_len = [padded_full_embed.shape[1] - img_len] * bsz
-        return padded_full_embed, mask, img_sizes, l_effective_cap_len, torch.cat(fc + (fc_x,), dim=1), timestep_zero_index
+        img_sizes = [(H, W)] * bsz
+        l_effective_cap_len = [cap_feats.shape[1]] * bsz
+        return padded_full_embed, mask, img_sizes, l_effective_cap_len, freqs_cis

    def forward(self, x, timesteps, context, num_tokens, attention_mask=None, **kwargs):
        return comfy.patcher_extension.WrapperExecutor.new_class_executor(
@@ -807,11 +604,7 @@ class NextDiT(nn.Module):
        ).execute(x, timesteps, context, num_tokens, attention_mask, **kwargs)

    # def forward(self, x, t, cap_feats, cap_mask):
-    def _forward(self, x, timesteps, context, num_tokens, attention_mask=None, ref_latents=[], ref_contexts=[], siglip_feats=[], transformer_options={}, **kwargs):
-        omni = len(ref_latents) > 0
-        if omni:
-            timesteps = torch.cat([timesteps * 0, timesteps], dim=0)
-
+    def _forward(self, x, timesteps, context, num_tokens, attention_mask=None, transformer_options={}, **kwargs):
        t = 1.0 - timesteps
        cap_feats = context
        cap_mask = attention_mask
@@ -826,6 +619,8 @@ class NextDiT(nn.Module):
        t = self.t_embedder(t * self.time_scale, dtype=x.dtype)  # (N, D)
        adaln_input = t

+        cap_feats = self.cap_embedder(cap_feats)  # (N, L, D)  # todo check if able to batchify w.o. redundant compute
+
        if self.clip_text_pooled_proj is not None:
            pooled = kwargs.get("clip_text_pooled", None)
            if pooled is not None:
@@ -837,7 +632,7 @@ class NextDiT(nn.Module):

        patches = transformer_options.get("patches", {})
        x_is_tensor = isinstance(x, torch.Tensor)
-        img, mask, img_size, cap_size, freqs_cis, timestep_zero_index = self.patchify_and_embed(x, cap_feats, cap_mask, adaln_input, num_tokens, ref_latents=ref_latents, ref_contexts=ref_contexts, siglip_feats=siglip_feats, transformer_options=transformer_options)
+        img, mask, img_size, cap_size, freqs_cis = self.patchify_and_embed(x, cap_feats, cap_mask, adaln_input, num_tokens, transformer_options=transformer_options)
        freqs_cis = freqs_cis.to(img.device)

        transformer_options["total_blocks"] = len(self.layers)
@@ -845,7 +640,7 @@ class NextDiT(nn.Module):
        img_input = img
        for i, layer in enumerate(self.layers):
            transformer_options["block_index"] = i
-            img = layer(img, mask, freqs_cis, adaln_input, timestep_zero_index=timestep_zero_index, transformer_options=transformer_options)
+            img = layer(img, mask, freqs_cis, adaln_input, transformer_options=transformer_options)
            if "double_block" in patches:
                for p in patches["double_block"]:
                    out = p({"img": img[:, cap_size[0]:], "img_input": img_input[:, cap_size[0]:], "txt": img[:, :cap_size[0]], "pe": freqs_cis[:, cap_size[0]:], "vec": adaln_input, "x": x, "block_index": i, "transformer_options": transformer_options})
@@ -854,7 +649,8 @@ class NextDiT(nn.Module):
                    if "txt" in out:
                        img[:, :cap_size[0]] = out["txt"]

-        img = self.final_layer(img, adaln_input, timestep_zero_index=timestep_zero_index)
+        img = self.final_layer(img, adaln_input)
        img = self.unpatchify(img, img_size, cap_size, return_tensor=x_is_tensor)[:, :, :h, :w]
+
        return -img

--- a/comfy/ldm/modules/attention.py
+++ b/comfy/ldm/modules/attention.py
@@ -524,9 +524,6 @@ def attention_pytorch(q, k, v, heads, mask=None, attn_precision=None, skip_resha

@wrap_attn
 def attention_sage(q, k, v, heads, mask=None, attn_precision=None, skip_reshape=False, skip_output_reshape=False, **kwargs):
-    if kwargs.get("low_precision_attention", True) is False:
-        return attention_pytorch(q, k, v, heads, mask=mask, skip_reshape=skip_reshape, skip_output_reshape=skip_output_reshape, **kwargs)
-
    exception_fallback = False
    if skip_reshape:
        b, _, _, dim_head = q.shape
--- a/comfy/ldm/modules/diffusionmodules/model.py
+++ b/comfy/ldm/modules/diffusionmodules/model.py
@@ -14,13 +14,10 @@ if model_management.xformers_enabled_vae():
    import xformers.ops

 def torch_cat_if_needed(xl, dim):
-    xl = [x for x in xl if x is not None and x.shape[dim] > 0]
    if len(xl) > 1:
        return torch.cat(xl, dim)
-    elif len(xl) == 1:
-        return xl[0]
    else:
-        return None
+        return xl[0]

 def get_timestep_embedding(timesteps, embedding_dim):
    """
@@ -102,7 +99,19 @@ class VideoConv3d(nn.Module):
        return self.conv(x)

 def interpolate_up(x, scale_factor):
-    return torch.nn.functional.interpolate(x, scale_factor=scale_factor, mode="nearest")
+    try:
+        return torch.nn.functional.interpolate(x, scale_factor=scale_factor, mode="nearest")
+    except: #operation not implemented for bf16
+        orig_shape = list(x.shape)
+        out_shape = orig_shape[:2]
+        for i in range(len(orig_shape) - 2):
+            out_shape.append(round(orig_shape[i + 2] * scale_factor[i]))
+        out = torch.empty(out_shape, dtype=x.dtype, layout=x.layout, device=x.device)
+        split = 8
+        l = out.shape[1] // split
+        for i in range(0, out.shape[1], l):
+            out[:,i:i+l] = torch.nn.functional.interpolate(x[:,i:i+l].to(torch.float32), scale_factor=scale_factor, mode="nearest").to(x.dtype)
+        return out

 class Upsample(nn.Module):
    def __init__(self, in_channels, with_conv, conv_op=ops.Conv2d, scale_factor=2.0):
--- a/comfy/ldm/qwen_image/controlnet.py
+++ b/comfy/ldm/qwen_image/controlnet.py
@@ -2,196 +2,6 @@ import torch
 import math

 from .model import QwenImageTransformer2DModel
-from .model import QwenImageTransformerBlock
-
-
-class QwenImageFunControlBlock(QwenImageTransformerBlock):
-    def __init__(self, dim, num_attention_heads, attention_head_dim, has_before_proj=False, dtype=None, device=None, operations=None):
-        super().__init__(
-            dim=dim,
-            num_attention_heads=num_attention_heads,
-            attention_head_dim=attention_head_dim,
-            dtype=dtype,
-            device=device,
-            operations=operations,
-        )
-        self.has_before_proj = has_before_proj
-        if has_before_proj:
-            self.before_proj = operations.Linear(dim, dim, device=device, dtype=dtype)
-        self.after_proj = operations.Linear(dim, dim, device=device, dtype=dtype)
-
-
-class QwenImageFunControlNetModel(torch.nn.Module):
-    def __init__(
-        self,
-        control_in_features=132,
-        inner_dim=3072,
-        num_attention_heads=24,
-        attention_head_dim=128,
-        num_control_blocks=5,
-        main_model_double=60,
-        injection_layers=(0, 12, 24, 36, 48),
-        dtype=None,
-        device=None,
-        operations=None,
-    ):
-        super().__init__()
-        self.dtype = dtype
-        self.main_model_double = main_model_double
-        self.injection_layers = tuple(injection_layers)
-        # Keep base hint scaling at 1.0 so user-facing strength behaves similarly
-        # to the reference Gen2/VideoX implementation around strength=1.
-        self.hint_scale = 1.0
-        self.control_img_in = operations.Linear(control_in_features, inner_dim, device=device, dtype=dtype)
-
-        self.control_blocks = torch.nn.ModuleList([])
-        for i in range(num_control_blocks):
-            self.control_blocks.append(
-                QwenImageFunControlBlock(
-                    dim=inner_dim,
-                    num_attention_heads=num_attention_heads,
-                    attention_head_dim=attention_head_dim,
-                    has_before_proj=(i == 0),
-                    dtype=dtype,
-                    device=device,
-                    operations=operations,
-                )
-            )
-
-    def _process_hint_tokens(self, hint):
-        if hint is None:
-            return None
-        if hint.ndim == 4:
-            hint = hint.unsqueeze(2)
-
-        # Fun checkpoints are trained with 33 latent channels before 2x2 packing:
-        # [control_latent(16), mask(1), inpaint_latent(16)] -> 132 features.
-        # Default behavior (no inpaint input in stock Apply ControlNet) should use
-        # zeros for mask/inpaint branches, matching VideoX fallback semantics.
-        expected_c = self.control_img_in.weight.shape[1] // 4
-        if hint.shape[1] == 16 and expected_c == 33:
-            zeros_mask = torch.zeros_like(hint[:, :1])
-            zeros_inpaint = torch.zeros_like(hint)
-            hint = torch.cat([hint, zeros_mask, zeros_inpaint], dim=1)
-
-        bs, c, t, h, w = hint.shape
-        hidden_states = torch.nn.functional.pad(hint, (0, w % 2, 0, h % 2))
-        orig_shape = hidden_states.shape
-        hidden_states = hidden_states.view(
-            orig_shape[0],
-            orig_shape[1],
-            orig_shape[-3],
-            orig_shape[-2] // 2,
-            2,
-            orig_shape[-1] // 2,
-            2,
-        )
-        hidden_states = hidden_states.permute(0, 2, 3, 5, 1, 4, 6)
-        hidden_states = hidden_states.reshape(
-            bs,
-            t * ((h + 1) // 2) * ((w + 1) // 2),
-            c * 4,
-        )
-
-        expected_in = self.control_img_in.weight.shape[1]
-        cur_in = hidden_states.shape[-1]
-        if cur_in < expected_in:
-            pad = torch.zeros(
-                (hidden_states.shape[0], hidden_states.shape[1], expected_in - cur_in),
-                device=hidden_states.device,
-                dtype=hidden_states.dtype,
-            )
-            hidden_states = torch.cat([hidden_states, pad], dim=-1)
-        elif cur_in > expected_in:
-            hidden_states = hidden_states[:, :, :expected_in]
-
-        return hidden_states
-
-    def forward(
-        self,
-        x,
-        timesteps,
-        context,
-        attention_mask=None,
-        guidance: torch.Tensor = None,
-        hint=None,
-        transformer_options={},
-        base_model=None,
-        **kwargs,
-    ):
-        if base_model is None:
-            raise RuntimeError("Qwen Fun ControlNet requires a QwenImage base model at runtime.")
-
-        encoder_hidden_states_mask = attention_mask
-        # Keep attention mask disabled inside Fun control blocks to mirror
-        # VideoX behavior (they rely on seq lengths for RoPE, not masked attention).
-        encoder_hidden_states_mask = None
-
-        hidden_states, img_ids, _ = base_model.process_img(x)
-        hint_tokens = self._process_hint_tokens(hint)
-        if hint_tokens is None:
-            raise RuntimeError("Qwen Fun ControlNet requires a control hint image.")
-
-        if hint_tokens.shape[1] != hidden_states.shape[1]:
-            max_tokens = min(hint_tokens.shape[1], hidden_states.shape[1])
-            hint_tokens = hint_tokens[:, :max_tokens]
-            hidden_states = hidden_states[:, :max_tokens]
-            img_ids = img_ids[:, :max_tokens]
-
-        txt_start = round(
-            max(
-                ((x.shape[-1] + (base_model.patch_size // 2)) // base_model.patch_size) // 2,
-                ((x.shape[-2] + (base_model.patch_size // 2)) // base_model.patch_size) // 2,
-            )
-        )
-        txt_ids = torch.arange(txt_start, txt_start + context.shape[1], device=x.device).reshape(1, -1, 1).repeat(x.shape[0], 1, 3)
-        ids = torch.cat((txt_ids, img_ids), dim=1)
-        image_rotary_emb = base_model.pe_embedder(ids).to(x.dtype).contiguous()
-
-        hidden_states = base_model.img_in(hidden_states)
-        encoder_hidden_states = base_model.txt_norm(context)
-        encoder_hidden_states = base_model.txt_in(encoder_hidden_states)
-
-        if guidance is not None:
-            guidance = guidance * 1000
-
-        temb = (
-            base_model.time_text_embed(timesteps, hidden_states)
-            if guidance is None
-            else base_model.time_text_embed(timesteps, guidance, hidden_states)
-        )
-
-        c = self.control_img_in(hint_tokens)
-
-        for i, block in enumerate(self.control_blocks):
-            if i == 0:
-                c_in = block.before_proj(c) + hidden_states
-                all_c = []
-            else:
-                all_c = list(torch.unbind(c, dim=0))
-                c_in = all_c.pop(-1)
-
-            encoder_hidden_states, c_out = block(
-                hidden_states=c_in,
-                encoder_hidden_states=encoder_hidden_states,
-                encoder_hidden_states_mask=encoder_hidden_states_mask,
-                temb=temb,
-                image_rotary_emb=image_rotary_emb,
-                transformer_options=transformer_options,
-            )
-
-            c_skip = block.after_proj(c_out) * self.hint_scale
-            all_c += [c_skip, c_out]
-            c = torch.stack(all_c, dim=0)
-
-        hints = torch.unbind(c, dim=0)[:-1]
-
-        controlnet_block_samples = [None] * self.main_model_double
-        for local_idx, base_idx in enumerate(self.injection_layers):
-            if local_idx < len(hints) and base_idx < len(controlnet_block_samples):
-                controlnet_block_samples[base_idx] = hints[local_idx]
-
-        return {"input": controlnet_block_samples}


 class QwenImageControlNetModel(QwenImageTransformer2DModel):
--- a/comfy/ldm/qwen_image/model.py
+++ b/comfy/ldm/qwen_image/model.py
@@ -170,14 +170,8 @@ class Attention(nn.Module):
        joint_query = apply_rope1(joint_query, image_rotary_emb)
        joint_key = apply_rope1(joint_key, image_rotary_emb)

-        if encoder_hidden_states_mask is not None:
-            attn_mask = torch.zeros((batch_size, 1, seq_txt + seq_img), dtype=hidden_states.dtype, device=hidden_states.device)
-            attn_mask[:, 0, :seq_txt] = encoder_hidden_states_mask
-        else:
-            attn_mask = None
-
        joint_hidden_states = optimized_attention_masked(joint_query, joint_key, joint_value, self.heads,
-                                                         attn_mask, transformer_options=transformer_options,
+                                                         attention_mask, transformer_options=transformer_options,
                                                         skip_reshape=True)

        txt_attn_output = joint_hidden_states[:, :seq_txt, :]
@@ -436,9 +430,6 @@ class QwenImageTransformer2DModel(nn.Module):
        encoder_hidden_states = context
        encoder_hidden_states_mask = attention_mask

-        if encoder_hidden_states_mask is not None and not torch.is_floating_point(encoder_hidden_states_mask):
-            encoder_hidden_states_mask = (encoder_hidden_states_mask - 1).to(x.dtype) * torch.finfo(x.dtype).max
-
        hidden_states, img_ids, orig_shape = self.process_img(x)
        num_embeds = hidden_states.shape[1]

--- a/comfy/ldm/wan/model.py
+++ b/comfy/ldm/wan/model.py
@@ -62,8 +62,6 @@ class WanSelfAttention(nn.Module):
            x(Tensor): Shape [B, L, num_heads, C / num_heads]
            freqs(Tensor): Rope freqs, shape [1024, C / num_heads / 2]
        """
-        patches = transformer_options.get("patches", {})
-
        b, s, n, d = *x.shape[:2], self.num_heads, self.head_dim

        def qkv_fn_q(x):
@@ -88,10 +86,6 @@ class WanSelfAttention(nn.Module):
            transformer_options=transformer_options,
        )

-        if "attn1_patch" in patches:
-            for p in patches["attn1_patch"]:
-                x = p({"x": x, "q": q, "k": k, "transformer_options": transformer_options})
-
        x = self.o(x)
        return x

@@ -231,8 +225,6 @@ class WanAttentionBlock(nn.Module):
        """
        # assert e.dtype == torch.float32

-        patches = transformer_options.get("patches", {})
-
        if e.ndim < 4:
            e = (comfy.model_management.cast_to(self.modulation, dtype=x.dtype, device=x.device) + e).chunk(6, dim=1)
        else:
@@ -250,11 +242,6 @@ class WanAttentionBlock(nn.Module):

        # cross-attention & ffn
        x = x + self.cross_attn(self.norm3(x), context, context_img_len=context_img_len, transformer_options=transformer_options)
-
-        if "attn2_patch" in patches:
-            for p in patches["attn2_patch"]:
-                x = p({"x": x, "transformer_options": transformer_options})
-
        y = self.ffn(torch.addcmul(repeat_e(e[3], x), self.norm2(x), 1 + repeat_e(e[4], x)))
        x = torch.addcmul(x, y, repeat_e(e[5], x))
        return x
@@ -501,7 +488,7 @@ class WanModel(torch.nn.Module):
        self.blocks = nn.ModuleList([
            wan_attn_block_class(cross_attn_type, dim, ffn_dim, num_heads,
                                 window_size, qk_norm, cross_attn_norm, eps, operation_settings=operation_settings)
-            for i in range(num_layers)
+            for _ in range(num_layers)
        ])

        # head
@@ -554,7 +541,6 @@ class WanModel(torch.nn.Module):
        # embeddings
        x = self.patch_embedding(x.float()).to(x.dtype)
        grid_sizes = x.shape[2:]
-        transformer_options["grid_sizes"] = grid_sizes
        x = x.flatten(2).transpose(1, 2)

        # time embeddings
@@ -752,7 +738,6 @@ class VaceWanModel(WanModel):
        # embeddings
        x = self.patch_embedding(x.float()).to(x.dtype)
        grid_sizes = x.shape[2:]
-        transformer_options["grid_sizes"] = grid_sizes
        x = x.flatten(2).transpose(1, 2)

        # time embeddings
--- a/comfy/ldm/wan/model_multitalk.py
+++ b/comfy/ldm/wan/model_multitalk.py
@@ -1,500 +0,0 @@
-import torch
-from einops import rearrange, repeat
-import comfy
-from comfy.ldm.modules.attention import optimized_attention
-
-
-def calculate_x_ref_attn_map(visual_q, ref_k, ref_target_masks, split_num=8):
-    scale = 1.0 / visual_q.shape[-1] ** 0.5
-    visual_q = visual_q.transpose(1, 2) * scale
-
-    B, H, x_seqlens, K = visual_q.shape
-
-    x_ref_attn_maps = []
-    for class_idx, ref_target_mask in enumerate(ref_target_masks):
-        ref_target_mask = ref_target_mask.view(1, 1, 1, -1)
-
-        x_ref_attnmap = torch.zeros(B, H, x_seqlens, device=visual_q.device, dtype=visual_q.dtype)
-        chunk_size = min(max(x_seqlens // split_num, 1), x_seqlens)
-
-        for i in range(0, x_seqlens, chunk_size):
-            end_i = min(i + chunk_size, x_seqlens)
-
-            attn_chunk = visual_q[:, :, i:end_i] @ ref_k.permute(0, 2, 3, 1)  # B, H, chunk, ref_seqlens
-
-            # Apply softmax
-            attn_max = attn_chunk.max(dim=-1, keepdim=True).values
-            attn_chunk = (attn_chunk - attn_max).exp()
-            attn_sum = attn_chunk.sum(dim=-1, keepdim=True)
-            attn_chunk = attn_chunk / (attn_sum + 1e-8)
-
-            # Apply mask and sum
-            masked_attn = attn_chunk * ref_target_mask
-            x_ref_attnmap[:, :, i:end_i] = masked_attn.sum(-1) / (ref_target_mask.sum() + 1e-8)
-
-            del attn_chunk, masked_attn
-
-        # Average across heads
-        x_ref_attnmap = x_ref_attnmap.mean(dim=1)  # B, x_seqlens
-        x_ref_attn_maps.append(x_ref_attnmap)
-
-    del visual_q, ref_k
-
-    return torch.cat(x_ref_attn_maps, dim=0)
-
-def get_attn_map_with_target(visual_q, ref_k, shape, ref_target_masks=None, split_num=2):
-    """Args:
-        query (torch.tensor): B M H K
-        key (torch.tensor): B M H K
-        shape (tuple): (N_t, N_h, N_w)
-        ref_target_masks: [B, N_h * N_w]
-    """
-
-    N_t, N_h, N_w = shape
-
-    x_seqlens = N_h * N_w
-    ref_k     = ref_k[:, :x_seqlens]
-    _, seq_lens, heads, _ = visual_q.shape
-    class_num, _ = ref_target_masks.shape
-    x_ref_attn_maps = torch.zeros(class_num, seq_lens).to(visual_q)
-
-    split_chunk = heads // split_num
-
-    for i in range(split_num):
-        x_ref_attn_maps_perhead = calculate_x_ref_attn_map(
-            visual_q[:, :, i*split_chunk:(i+1)*split_chunk, :],
-            ref_k[:, :, i*split_chunk:(i+1)*split_chunk, :],
-            ref_target_masks
-            )
-        x_ref_attn_maps += x_ref_attn_maps_perhead
-
-    return x_ref_attn_maps / split_num
-
-
-def normalize_and_scale(column, source_range, target_range, epsilon=1e-8):
-    source_min, source_max = source_range
-    new_min, new_max = target_range
-    normalized = (column - source_min) / (source_max - source_min + epsilon)
-    scaled = normalized * (new_max - new_min) + new_min
-    return scaled
-
-
-def rotate_half(x):
-    x = rearrange(x, "... (d r) -> ... d r", r=2)
-    x1, x2 = x.unbind(dim=-1)
-    x = torch.stack((-x2, x1), dim=-1)
-    return rearrange(x, "... d r -> ... (d r)")
-
-
-def get_audio_embeds(encoded_audio, audio_start, audio_end):
-    audio_embs = []
-    human_num = len(encoded_audio)
-    audio_frames = encoded_audio[0].shape[0]
-
-    indices = (torch.arange(4 + 1) - 2) * 1
-
-    for human_idx in range(human_num):
-        if audio_end > audio_frames: # in case of not enough audio for current window, pad with first audio frame as that's most likely silence
-            pad_len = audio_end - audio_frames
-            pad_shape = list(encoded_audio[human_idx].shape)
-            pad_shape[0] = pad_len
-            pad_tensor = encoded_audio[human_idx][:1].repeat(pad_len, *([1] * (encoded_audio[human_idx].dim() - 1)))
-            encoded_audio_in = torch.cat([encoded_audio[human_idx], pad_tensor], dim=0)
-        else:
-            encoded_audio_in = encoded_audio[human_idx]
-        center_indices = torch.arange(audio_start, audio_end, 1).unsqueeze(1) + indices.unsqueeze(0)
-        center_indices = torch.clamp(center_indices, min=0, max=encoded_audio_in.shape[0] - 1)
-        audio_emb = encoded_audio_in[center_indices].unsqueeze(0)
-        audio_embs.append(audio_emb)
-
-    return torch.cat(audio_embs, dim=0)
-
-
-def project_audio_features(audio_proj, encoded_audio, audio_start, audio_end):
-    audio_embs = get_audio_embeds(encoded_audio, audio_start, audio_end)
-
-    first_frame_audio_emb_s = audio_embs[:, :1, ...]
-    latter_frame_audio_emb = audio_embs[:, 1:, ...]
-    latter_frame_audio_emb = rearrange(latter_frame_audio_emb, "b (n_t n) w s c -> b n_t n w s c", n=4)
-
-    middle_index = audio_proj.seq_len // 2
-
-    latter_first_frame_audio_emb = latter_frame_audio_emb[:, :, :1, :middle_index+1, ...]
-    latter_first_frame_audio_emb = rearrange(latter_first_frame_audio_emb, "b n_t n w s c -> b n_t (n w) s c")
-    latter_last_frame_audio_emb = latter_frame_audio_emb[:, :, -1:, middle_index:, ...]
-    latter_last_frame_audio_emb = rearrange(latter_last_frame_audio_emb, "b n_t n w s c -> b n_t (n w) s c")
-    latter_middle_frame_audio_emb = latter_frame_audio_emb[:, :, 1:-1, middle_index:middle_index+1, ...]
-    latter_middle_frame_audio_emb = rearrange(latter_middle_frame_audio_emb, "b n_t n w s c -> b n_t (n w) s c")
-    latter_frame_audio_emb_s = torch.cat([latter_first_frame_audio_emb, latter_middle_frame_audio_emb, latter_last_frame_audio_emb], dim=2)
-
-    audio_emb = audio_proj(first_frame_audio_emb_s, latter_frame_audio_emb_s)
-    audio_emb = torch.cat(audio_emb.split(1), dim=2)
-
-    return audio_emb
-
-
-class RotaryPositionalEmbedding1D(torch.nn.Module):
-    def __init__(self,
-                 head_dim,
-                 ):
-        super().__init__()
-        self.head_dim = head_dim
-        self.base = 10000
-
-    def precompute_freqs_cis_1d(self, pos_indices):
-        freqs = 1.0 / (self.base ** (torch.arange(0, self.head_dim, 2)[: (self.head_dim // 2)].float() / self.head_dim))
-        freqs = freqs.to(pos_indices.device)
-        freqs = torch.einsum("..., f -> ... f", pos_indices.float(), freqs)
-        freqs = repeat(freqs, "... n -> ... (n r)", r=2)
-        return freqs
-
-    def forward(self, x, pos_indices):
-        freqs_cis = self.precompute_freqs_cis_1d(pos_indices)
-
-        x_ = x.float()
-
-        freqs_cis = freqs_cis.float().to(x.device)
-        cos, sin = freqs_cis.cos(), freqs_cis.sin()
-        cos, sin = rearrange(cos, 'n d -> 1 1 n d'), rearrange(sin, 'n d -> 1 1 n d')
-        x_ = (x_ * cos) + (rotate_half(x_) * sin)
-
-        return x_.type_as(x)
-
-class SingleStreamAttention(torch.nn.Module):
-    def __init__(
-        self,
-        dim: int,
-        encoder_hidden_states_dim: int,
-        num_heads: int,
-        qkv_bias: bool,
-        device=None, dtype=None, operations=None
-    ) -> None:
-        super().__init__()
-        self.dim = dim
-        self.encoder_hidden_states_dim = encoder_hidden_states_dim
-        self.num_heads = num_heads
-        self.head_dim = dim // num_heads
-
-        self.q_linear = operations.Linear(dim, dim, bias=qkv_bias, device=device, dtype=dtype)
-        self.proj = operations.Linear(dim, dim, device=device, dtype=dtype)
-        self.kv_linear = operations.Linear(encoder_hidden_states_dim, dim * 2, bias=qkv_bias, device=device, dtype=dtype)
-
-    def forward(self, x: torch.Tensor, encoder_hidden_states: torch.Tensor, shape=None) -> torch.Tensor:
-        N_t, N_h, N_w = shape
-
-        expected_tokens = N_t * N_h * N_w
-        actual_tokens = x.shape[1]
-        x_extra = None
-
-        if actual_tokens != expected_tokens:
-            x_extra = x[:, -N_h * N_w:, :]
-            x = x[:, :-N_h * N_w, :]
-            N_t = N_t - 1
-
-        B = x.shape[0]
-        S = N_h * N_w
-        x = x.view(B * N_t, S, self.dim)
-
-        # get q for hidden_state
-        q = self.q_linear(x).view(B * N_t, S, self.num_heads, self.head_dim)
-
-        # get kv from encoder_hidden_states # shape: (B, N, num_heads, head_dim)
-        kv = self.kv_linear(encoder_hidden_states)
-        encoder_k, encoder_v = kv.view(B * N_t, encoder_hidden_states.shape[1], 2, self.num_heads, self.head_dim).unbind(2)
-
-        #print("q.shape", q.shape) #torch.Size([21, 1024, 40, 128])
-        x = optimized_attention(
-            q.transpose(1, 2),
-            encoder_k.transpose(1, 2),
-            encoder_v.transpose(1, 2),
-            heads=self.num_heads, skip_reshape=True, skip_output_reshape=True).transpose(1, 2)
-
-        # linear transform
-        x = self.proj(x.reshape(B * N_t, S, self.dim))
-        x = x.view(B, N_t * S, self.dim)
-
-        if x_extra is not None:
-            x = torch.cat([x, torch.zeros_like(x_extra)], dim=1)
-
-        return x
-
-class SingleStreamMultiAttention(SingleStreamAttention):
-    def __init__(
-        self,
-        dim: int,
-        encoder_hidden_states_dim: int,
-        num_heads: int,
-        qkv_bias: bool,
-        class_range: int = 24,
-        class_interval: int = 4,
-        device=None, dtype=None, operations=None
-    ) -> None:
-        super().__init__(
-            dim=dim,
-            encoder_hidden_states_dim=encoder_hidden_states_dim,
-            num_heads=num_heads,
-            qkv_bias=qkv_bias,
-            device=device,
-            dtype=dtype,
-            operations=operations
-        )
-
-        # Rotary-embedding layout parameters
-        self.class_interval = class_interval
-        self.class_range = class_range
-        self.max_humans = self.class_range // self.class_interval
-
-        # Constant bucket used for background tokens
-        self.rope_bak = int(self.class_range // 2)
-
-        self.rope_1d = RotaryPositionalEmbedding1D(self.head_dim)
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        encoder_hidden_states: torch.Tensor,
-        shape=None,
-        x_ref_attn_map=None
-    ) -> torch.Tensor:
-        encoder_hidden_states = encoder_hidden_states.squeeze(0).to(x.device)
-        human_num = x_ref_attn_map.shape[0] if x_ref_attn_map is not None else 1
-        # Single-speaker fall-through
-        if human_num <= 1:
-            return super().forward(x, encoder_hidden_states, shape)
-
-        N_t, N_h, N_w = shape
-
-        x_extra = None
-        if x.shape[0] * N_t != encoder_hidden_states.shape[0]:
-            x_extra = x[:, -N_h * N_w:, :]
-            x = x[:, :-N_h * N_w, :]
-            N_t = N_t - 1
-        x = rearrange(x, "B (N_t S) C -> (B N_t) S C", N_t=N_t)
-
-        # Query projection
-        B, N, C = x.shape
-        q = self.q_linear(x)
-        q = q.view(B, N, self.num_heads, self.head_dim).permute(0, 2, 1, 3)
-
-        # Use `class_range` logic for 2 speakers
-        rope_h1 = (0, self.class_interval)
-        rope_h2 = (self.class_range - self.class_interval, self.class_range)
-        rope_bak = int(self.class_range // 2)
-
-        # Normalize and scale attention maps for each speaker
-        max_values = x_ref_attn_map.max(1).values[:, None, None]
-        min_values = x_ref_attn_map.min(1).values[:, None, None]
-        max_min_values = torch.cat([max_values, min_values], dim=2)
-
-        human1_max_value, human1_min_value = max_min_values[0, :, 0].max(), max_min_values[0, :, 1].min()
-        human2_max_value, human2_min_value = max_min_values[1, :, 0].max(), max_min_values[1, :, 1].min()
-
-        human1 = normalize_and_scale(x_ref_attn_map[0], (human1_min_value, human1_max_value), rope_h1)
-        human2 = normalize_and_scale(x_ref_attn_map[1], (human2_min_value, human2_max_value), rope_h2)
-        back = torch.full((x_ref_attn_map.size(1),), rope_bak, dtype=human1.dtype, device=human1.device)
-
-        # Token-wise speaker dominance
-        max_indices = x_ref_attn_map.argmax(dim=0)
-        normalized_map = torch.stack([human1, human2, back], dim=1)
-        normalized_pos = normalized_map[torch.arange(x_ref_attn_map.size(1)), max_indices]
-
-        # Apply rotary to Q
-        q = rearrange(q, "(B N_t) H S C -> B H (N_t S) C", N_t=N_t)
-        q = self.rope_1d(q, normalized_pos)
-        q = rearrange(q, "B H (N_t S) C -> (B N_t) H S C", N_t=N_t)
-
-        # Keys / Values
-        _, N_a, _ = encoder_hidden_states.shape
-        encoder_kv = self.kv_linear(encoder_hidden_states)
-        encoder_kv = encoder_kv.view(B, N_a, 2, self.num_heads, self.head_dim).permute(2, 0, 3, 1, 4)
-        encoder_k, encoder_v = encoder_kv.unbind(0)
-
-        # Rotary for keys – assign centre of each speaker bucket to its context tokens
-        per_frame = torch.zeros(N_a, dtype=encoder_k.dtype, device=encoder_k.device)
-        per_frame[: per_frame.size(0) // 2] = (rope_h1[0] + rope_h1[1]) / 2
-        per_frame[per_frame.size(0) // 2 :] = (rope_h2[0] + rope_h2[1]) / 2
-        encoder_pos = torch.cat([per_frame] * N_t, dim=0)
-
-        encoder_k = rearrange(encoder_k, "(B N_t) H S C -> B H (N_t S) C", N_t=N_t)
-        encoder_k = self.rope_1d(encoder_k, encoder_pos)
-        encoder_k = rearrange(encoder_k, "B H (N_t S) C -> (B N_t) H S C", N_t=N_t)
-
-        # Final attention
-        q = rearrange(q, "B H M K -> B M H K")
-        encoder_k = rearrange(encoder_k, "B H M K -> B M H K")
-        encoder_v = rearrange(encoder_v, "B H M K -> B M H K")
-
-        x = optimized_attention(
-            q.transpose(1, 2),
-            encoder_k.transpose(1, 2),
-            encoder_v.transpose(1, 2),
-            heads=self.num_heads, skip_reshape=True, skip_output_reshape=True).transpose(1, 2)
-
-        # Linear projection
-        x = x.reshape(B, N, C)
-        x = self.proj(x)
-
-        # Restore original layout
-        x = rearrange(x, "(B N_t) S C -> B (N_t S) C", N_t=N_t)
-        if x_extra is not None:
-            x = torch.cat([x, torch.zeros_like(x_extra)], dim=1)
-
-        return x
-
-
-class MultiTalkAudioProjModel(torch.nn.Module):
-    def __init__(
-        self,
-        seq_len: int = 5,
-        seq_len_vf: int = 12,
-        blocks: int = 12,
-        channels: int = 768,
-        intermediate_dim: int = 512,
-        out_dim: int = 768,
-        context_tokens: int = 32,
-        device=None, dtype=None, operations=None
-    ):
-        super().__init__()
-
-        self.seq_len = seq_len
-        self.blocks = blocks
-        self.channels = channels
-        self.input_dim = seq_len * blocks * channels
-        self.input_dim_vf = seq_len_vf * blocks * channels
-        self.intermediate_dim = intermediate_dim
-        self.context_tokens = context_tokens
-        self.out_dim = out_dim
-
-        # define multiple linear layers
-        self.proj1 = operations.Linear(self.input_dim, intermediate_dim, device=device, dtype=dtype)
-        self.proj1_vf = operations.Linear(self.input_dim_vf, intermediate_dim, device=device, dtype=dtype)
-        self.proj2 = operations.Linear(intermediate_dim, intermediate_dim, device=device, dtype=dtype)
-        self.proj3 = operations.Linear(intermediate_dim, context_tokens * out_dim, device=device, dtype=dtype)
-        self.norm = operations.LayerNorm(out_dim, device=device, dtype=dtype)
-
-    def forward(self, audio_embeds, audio_embeds_vf):
-        video_length = audio_embeds.shape[1] + audio_embeds_vf.shape[1]
-        B, _, _, S, C = audio_embeds.shape
-
-        # process audio of first frame
-        audio_embeds = rearrange(audio_embeds, "bz f w b c -> (bz f) w b c")
-        batch_size, window_size, blocks, channels = audio_embeds.shape
-        audio_embeds = audio_embeds.view(batch_size, window_size * blocks * channels)
-
-        # process audio of latter frame
-        audio_embeds_vf = rearrange(audio_embeds_vf, "bz f w b c -> (bz f) w b c")
-        batch_size_vf, window_size_vf, blocks_vf, channels_vf = audio_embeds_vf.shape
-        audio_embeds_vf = audio_embeds_vf.view(batch_size_vf, window_size_vf * blocks_vf * channels_vf)
-
-        # first projection
-        audio_embeds = torch.relu(self.proj1(audio_embeds))
-        audio_embeds_vf = torch.relu(self.proj1_vf(audio_embeds_vf))
-        audio_embeds = rearrange(audio_embeds, "(bz f) c -> bz f c", bz=B)
-        audio_embeds_vf = rearrange(audio_embeds_vf, "(bz f) c -> bz f c", bz=B)
-        audio_embeds_c = torch.concat([audio_embeds, audio_embeds_vf], dim=1)
-        batch_size_c, N_t, C_a = audio_embeds_c.shape
-        audio_embeds_c = audio_embeds_c.view(batch_size_c*N_t, C_a)
-
-        # second projection
-        audio_embeds_c = torch.relu(self.proj2(audio_embeds_c))
-
-        context_tokens = self.proj3(audio_embeds_c).reshape(batch_size_c*N_t, self.context_tokens, self.out_dim)
-
-        # normalization and reshape
-        context_tokens = self.norm(context_tokens)
-        context_tokens = rearrange(context_tokens, "(bz f) m c -> bz f m c", f=video_length)
-
-        return context_tokens
-
-
-class WanMultiTalkAttentionBlock(torch.nn.Module):
-    def __init__(self, in_dim=5120, out_dim=768, device=None, dtype=None, operations=None):
-        super().__init__()
-        self.audio_cross_attn = SingleStreamMultiAttention(in_dim, out_dim, num_heads=40, qkv_bias=True, device=device, dtype=dtype, operations=operations)
-        self.norm_x = operations.LayerNorm(in_dim, device=device, dtype=dtype, elementwise_affine=True)
-
-
-class MultiTalkGetAttnMapPatch:
-    def __init__(self, ref_target_masks=None):
-        self.ref_target_masks = ref_target_masks
-
-    def __call__(self, kwargs):
-        transformer_options = kwargs.get("transformer_options", {})
-        x = kwargs["x"]
-
-        if self.ref_target_masks is not None:
-            x_ref_attn_map = get_attn_map_with_target(kwargs["q"], kwargs["k"], transformer_options["grid_sizes"], ref_target_masks=self.ref_target_masks.to(x.device))
-            transformer_options["x_ref_attn_map"] = x_ref_attn_map
-        return x
-
-
-class MultiTalkCrossAttnPatch:
-    def __init__(self, model_patch, audio_scale=1.0, ref_target_masks=None):
-        self.model_patch = model_patch
-        self.audio_scale = audio_scale
-        self.ref_target_masks = ref_target_masks
-
-    def __call__(self, kwargs):
-        transformer_options = kwargs.get("transformer_options", {})
-        block_idx = transformer_options.get("block_index", None)
-        x = kwargs["x"]
-        if block_idx is None:
-            return torch.zeros_like(x)
-
-        audio_embeds = transformer_options.get("audio_embeds")
-        x_ref_attn_map = transformer_options.pop("x_ref_attn_map", None)
-
-        norm_x = self.model_patch.model.blocks[block_idx].norm_x(x)
-        x_audio = self.model_patch.model.blocks[block_idx].audio_cross_attn(
-            norm_x, audio_embeds.to(x.dtype),
-            shape=transformer_options["grid_sizes"],
-            x_ref_attn_map=x_ref_attn_map
-        )
-        x = x + x_audio * self.audio_scale
-        return x
-
-    def models(self):
-        return [self.model_patch]
-
-class MultiTalkApplyModelWrapper:
-    def __init__(self, init_latents):
-        self.init_latents = init_latents
-
-    def __call__(self, executor, x, *args, **kwargs):
-        x[:, :, :self.init_latents.shape[2]] = self.init_latents.to(x)
-        samples = executor(x, *args, **kwargs)
-        return samples
-
-
-class InfiniteTalkOuterSampleWrapper:
-    def __init__(self, motion_frames_latent, model_patch, is_extend=False):
-        self.motion_frames_latent = motion_frames_latent
-        self.model_patch = model_patch
-        self.is_extend = is_extend
-
-    def __call__(self, executor, *args, **kwargs):
-        model_patcher = executor.class_obj.model_patcher
-        model_options = executor.class_obj.model_options
-        process_latent_in = model_patcher.model.process_latent_in
-
-        # for InfiniteTalk, model input first latent(s) need to always be replaced on every step
-        if self.motion_frames_latent is not None:
-            wrappers = model_options["transformer_options"]["wrappers"]
-            w = wrappers.setdefault(comfy.patcher_extension.WrappersMP.APPLY_MODEL, {})
-            w["MultiTalk_apply_model"] = [MultiTalkApplyModelWrapper(process_latent_in(self.motion_frames_latent))]
-
-        # run the sampling process
-        result = executor(*args, **kwargs)
-
-        # insert motion frames before decoding
-        if self.is_extend:
-            overlap = self.motion_frames_latent.shape[2]
-            result = torch.cat([self.motion_frames_latent.to(result), result[:, :, overlap:]], dim=2)
-
-        return result
-
-    def to(self, device_or_dtype):
-        if isinstance(device_or_dtype, torch.device):
-            if self.motion_frames_latent is not None:
-                self.motion_frames_latent = self.motion_frames_latent.to(device_or_dtype)
-        return self
--- a/comfy/ldm/wan/vae.py
+++ b/comfy/ldm/wan/vae.py
@@ -5,7 +5,7 @@ import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from einops import rearrange
-from comfy.ldm.modules.diffusionmodules.model import vae_attention, torch_cat_if_needed
+from comfy.ldm.modules.diffusionmodules.model import vae_attention

 import comfy.ops
 ops = comfy.ops.disable_weight_init
@@ -20,29 +20,22 @@ class CausalConv3d(ops.Conv3d):

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
-        self._padding = 2 * self.padding[0]
-        self.padding = (0, self.padding[1], self.padding[2])
+        self._padding = (self.padding[2], self.padding[2], self.padding[1],
+                         self.padding[1], 2 * self.padding[0], 0)
+        self.padding = (0, 0, 0)

    def forward(self, x, cache_x=None, cache_list=None, cache_idx=None):
        if cache_list is not None:
            cache_x = cache_list[cache_idx]
            cache_list[cache_idx] = None

-        if cache_x is None and x.shape[2] == 1:
-            #Fast path - the op will pad for use by truncating the weight
-            #and save math on a pile of zeros.
-            return super().forward(x, autopad="causal_zero")
-
-        if self._padding > 0:
-            padding_needed = self._padding
-            if cache_x is not None:
-                cache_x = cache_x.to(x.device)
-                padding_needed = max(0, padding_needed - cache_x.shape[2])
-            padding_shape = list(x.shape)
-            padding_shape[2] = padding_needed
-            padding = torch.zeros(padding_shape, device=x.device, dtype=x.dtype)
-            x = torch_cat_if_needed([padding, cache_x, x], dim=2)
+        padding = list(self._padding)
+        if cache_x is not None and self._padding[4] > 0:
+            cache_x = cache_x.to(x.device)
+            x = torch.cat([cache_x, x], dim=2)
+            padding[4] -= cache_x.shape[2]
            del cache_x
+        x = F.pad(x, padding)

        return super().forward(x)

@@ -479,12 +472,10 @@ class WanVAE(nn.Module):

    def encode(self, x):
        conv_idx = [0]
+        feat_map = [None] * count_conv3d(self.decoder)
        ## cache
        t = x.shape[2]
        iter_ = 1 + (t - 1) // 4
-        feat_map = None
-        if iter_ > 1:
-            feat_map = [None] * count_conv3d(self.decoder)
        ## 对encode输入的x，按时间拆分为1、4、4、4....
        for i in range(iter_):
            conv_idx = [0]
@@ -504,11 +495,10 @@ class WanVAE(nn.Module):

    def decode(self, z):
        conv_idx = [0]
+        feat_map = [None] * count_conv3d(self.decoder)
        # z: [b,c,t,h,w]
+
        iter_ = z.shape[2]
-        feat_map = None
-        if iter_ > 1:
-            feat_map = [None] * count_conv3d(self.decoder)
        x = self.conv2(z)
        for i in range(iter_):
            conv_idx = [0]
--- a/comfy/lora.py
+++ b/comfy/lora.py
@@ -260,7 +260,6 @@ def model_lora_keys_unet(model, key_map={}):
                key_map["transformer.{}".format(k[:-len(".weight")])] = to #simpletrainer and probably regular diffusers flux lora format
                key_map["lycoris_{}".format(k[:-len(".weight")].replace(".", "_"))] = to #simpletrainer lycoris
                key_map["lora_transformer_{}".format(k[:-len(".weight")].replace(".", "_"))] = to #onetrainer
-                key_map[k[:-len(".weight")]] = to #DiffSynth lora format
        for k in sdk:
            hidden_size = model.model_config.unet_config.get("hidden_size", 0)
            if k.endswith(".weight") and ".linear1." in k:
@@ -332,12 +331,6 @@ def model_lora_keys_unet(model, key_map={}):
                key_map["{}".format(key_lora)] = k
                key_map["transformer.{}".format(key_lora)] = k

-    if isinstance(model, comfy.model_base.ACEStep15):
-        for k in sdk:
-            if k.startswith("diffusion_model.decoder.") and k.endswith(".weight"):
-                key_lora = k[len("diffusion_model.decoder."):-len(".weight")]
-                key_map["base_model.model.{}".format(key_lora)] = k  # Official base model loras
-
    return key_map


@@ -374,31 +367,6 @@ def pad_tensor_to_shape(tensor: torch.Tensor, new_shape: list[int]) -> torch.Ten

    return padded_tensor

-def calculate_shape(patches, weight, key, original_weights=None):
-    current_shape = weight.shape
-
-    for p in patches:
-        v = p[1]
-        offset = p[3]
-
-        # Offsets restore the old shape; lists force a diff without metadata
-        if offset is not None or isinstance(v, list):
-            continue
-
-        if isinstance(v, weight_adapter.WeightAdapterBase):
-            adapter_shape = v.calculate_shape(key)
-            if adapter_shape is not None:
-                current_shape = adapter_shape
-            continue
-
-        # Standard diff logic with padding
-        if len(v) == 2:
-            patch_type, patch_data = v[0], v[1]
-            if patch_type == "diff" and len(patch_data) > 1 and patch_data[1]['pad_weight']:
-                current_shape = patch_data[0].shape
-
-    return current_shape
-
 def calculate_weight(patches, weight, key, intermediate_dtype=torch.float32, original_weights=None):
    for p in patches:
        strength = p[0]
--- a/comfy/lora_convert.py
+++ b/comfy/lora_convert.py
@@ -5,7 +5,7 @@ import comfy.utils
 def convert_lora_bfl_control(sd): #BFL loras for Flux
    sd_out = {}
    for k in sd:
-        k_to = "diffusion_model.{}".format(k.replace(".lora_B.bias", ".diff_b").replace("_norm.scale", "_norm.set_weight"))
+        k_to = "diffusion_model.{}".format(k.replace(".lora_B.bias", ".diff_b").replace("_norm.scale", "_norm.scale.set_weight"))
        sd_out[k_to] = sd[k]

    sd_out["diffusion_model.img_in.reshape_weight"] = torch.tensor([sd["img_in.lora_B.weight"].shape[0], sd["img_in.lora_A.weight"].shape[1]])
--- a/comfy/memory_management.py
+++ b/comfy/memory_management.py
@@ -1,81 +0,0 @@
-import math
-import torch
-from typing import NamedTuple
-
-from comfy.quant_ops import QuantizedTensor
-
-class TensorGeometry(NamedTuple):
-    shape: any
-    dtype: torch.dtype
-
-    def element_size(self):
-        info = torch.finfo(self.dtype) if self.dtype.is_floating_point else torch.iinfo(self.dtype)
-        return info.bits // 8
-
-    def numel(self):
-        return math.prod(self.shape)
-
-def tensors_to_geometries(tensors, dtype=None):
-    geometries = []
-    for t in tensors:
-        if t is None or isinstance(t, QuantizedTensor):
-            geometries.append(t)
-            continue
-        tdtype = t.dtype
-        if hasattr(t, "_model_dtype"):
-            tdtype = t._model_dtype
-        if dtype is not None:
-            tdtype = dtype
-        geometries.append(TensorGeometry(shape=t.shape, dtype=tdtype))
-    return geometries
-
-def vram_aligned_size(tensor):
-    if isinstance(tensor, list):
-        return sum([vram_aligned_size(t) for t in tensor])
-
-    if isinstance(tensor, QuantizedTensor):
-        inner_tensors, _ = tensor.__tensor_flatten__()
-        return vram_aligned_size([ getattr(tensor, attr) for attr in inner_tensors ])
-
-    if tensor is None:
-        return 0
-
-    size = tensor.numel() * tensor.element_size()
-    aligment_req = 1024
-    return (size + aligment_req - 1) // aligment_req * aligment_req
-
-def interpret_gathered_like(tensors, gathered):
-    offset = 0
-    dest_views = []
-
-    if gathered.dim() != 1 or gathered.element_size() != 1:
-        raise ValueError(f"Buffer must be 1D and single-byte (got {gathered.dim()}D {gathered.dtype})")
-
-    for tensor in tensors:
-
-        if tensor is None:
-            dest_views.append(None)
-            continue
-
-        if isinstance(tensor, QuantizedTensor):
-            inner_tensors, qt_ctx = tensor.__tensor_flatten__()
-            templates = { attr: getattr(tensor, attr) for attr in inner_tensors }
-        else:
-            templates = { "data": tensor }
-
-        actuals = {}
-        for attr, template in templates.items():
-            size = template.numel() * template.element_size()
-            if offset + size > gathered.numel():
-                raise ValueError(f"Buffer too small: needs {offset + size} bytes, but only has {gathered.numel()}. ")
-            actuals[attr] = gathered[offset:offset+size].view(dtype=template.dtype).view(template.shape)
-            offset += vram_aligned_size(template)
-
-        if isinstance(tensor, QuantizedTensor):
-            dest_views.append(QuantizedTensor.__tensor_unflatten__(actuals, qt_ctx, 0, 0))
-        else:
-            dest_views.append(actuals["data"])
-
-    return dest_views
-
-aimdo_allocator = None
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@@ -49,8 +49,6 @@ import comfy.ldm.ace.model
 import comfy.ldm.omnigen.omnigen2
 import comfy.ldm.qwen_image.model
 import comfy.ldm.kandinsky5.model
-import comfy.ldm.anima.model
-import comfy.ldm.ace.ace_step15

 import comfy.model_management
 import comfy.patcher_extension
@@ -147,8 +145,6 @@ class BaseModel(torch.nn.Module):
                self.diffusion_model.to(memory_format=torch.channels_last)
                logging.debug("using channels last mode for diffusion model")
            logging.info("model weight dtype {}, manual cast: {}".format(self.get_dtype(), self.manual_cast_dtype))
-            comfy.model_management.archive_model_dtypes(self.diffusion_model)
-
        self.model_type = model_type
        self.model_sampling = model_sampling(model_config, model_type)

@@ -178,7 +174,10 @@ class BaseModel(torch.nn.Module):
            xc = torch.cat([xc] + [comfy.model_management.cast_to_device(c_concat, xc.device, xc.dtype)], dim=1)

        context = c_crossattn
-        dtype = self.get_dtype_inference()
+        dtype = self.get_dtype()
+
+        if self.manual_cast_dtype is not None:
+            dtype = self.manual_cast_dtype

        xc = xc.to(dtype)
        device = xc.device
@@ -215,13 +214,6 @@ class BaseModel(torch.nn.Module):
    def get_dtype(self):
        return self.diffusion_model.dtype

-    def get_dtype_inference(self):
-        dtype = self.get_dtype()
-
-        if self.manual_cast_dtype is not None:
-            dtype = self.manual_cast_dtype
-        return dtype
-
    def encode_adm(self, **kwargs):
        return None

@@ -306,7 +298,7 @@ class BaseModel(torch.nn.Module):

        return out

-    def load_model_weights(self, sd, unet_prefix="", assign=False):
+    def load_model_weights(self, sd, unet_prefix=""):
        to_load = {}
        keys = list(sd.keys())
        for k in keys:
@@ -314,7 +306,7 @@ class BaseModel(torch.nn.Module):
                to_load[k[len(unet_prefix):]] = sd.pop(k)

        to_load = self.model_config.process_unet_state_dict(to_load)
-        m, u = self.diffusion_model.load_state_dict(to_load, strict=False, assign=assign)
+        m, u = self.diffusion_model.load_state_dict(to_load, strict=False)
        if len(m) > 0:
            logging.warning("unet missing: {}".format(m))

@@ -329,7 +321,7 @@ class BaseModel(torch.nn.Module):
    def process_latent_out(self, latent):
        return self.latent_format.process_out(latent)

-    def state_dict_for_saving(self, unet_state_dict, clip_state_dict=None, vae_state_dict=None, clip_vision_state_dict=None):
+    def state_dict_for_saving(self, clip_state_dict=None, vae_state_dict=None, clip_vision_state_dict=None):
        extra_sds = []
        if clip_state_dict is not None:
            extra_sds.append(self.model_config.process_clip_state_dict_for_saving(clip_state_dict))
@@ -337,7 +329,10 @@ class BaseModel(torch.nn.Module):
            extra_sds.append(self.model_config.process_vae_state_dict_for_saving(vae_state_dict))
        if clip_vision_state_dict is not None:
            extra_sds.append(self.model_config.process_clip_vision_state_dict_for_saving(clip_vision_state_dict))
+
+        unet_state_dict = self.diffusion_model.state_dict()
        unet_state_dict = self.model_config.process_unet_state_dict_for_saving(unet_state_dict)
+
        if self.model_type == ModelType.V_PREDICTION:
            unet_state_dict["v_pred"] = torch.tensor([])

@@ -376,7 +371,9 @@ class BaseModel(torch.nn.Module):
                    input_shapes += shape

        if comfy.model_management.xformers_enabled() or comfy.model_management.pytorch_attention_flash_attention():
-            dtype = self.get_dtype_inference()
+            dtype = self.get_dtype()
+            if self.manual_cast_dtype is not None:
+                dtype = self.manual_cast_dtype
            #TODO: this needs to be tweaked
            area = sum(map(lambda input_shape: input_shape[0] * math.prod(input_shape[2:]), input_shapes))
            return (area * comfy.model_management.dtype_size(dtype) * 0.01 * self.memory_usage_factor) * (1024 * 1024)
@@ -778,8 +775,8 @@ class StableAudio1(BaseModel):
            out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
        return out

-    def state_dict_for_saving(self, unet_state_dict, clip_state_dict=None, vae_state_dict=None, clip_vision_state_dict=None):
-        sd = super().state_dict_for_saving(unet_state_dict, clip_state_dict=clip_state_dict, vae_state_dict=vae_state_dict, clip_vision_state_dict=clip_vision_state_dict)
+    def state_dict_for_saving(self, clip_state_dict=None, vae_state_dict=None, clip_vision_state_dict=None):
+        sd = super().state_dict_for_saving(clip_state_dict=clip_state_dict, vae_state_dict=vae_state_dict, clip_vision_state_dict=clip_vision_state_dict)
        d = {"conditioner.conditioners.seconds_start.": self.seconds_start_embedder.state_dict(), "conditioner.conditioners.seconds_total.": self.seconds_total_embedder.state_dict()}
        for k in d:
            s = d[k]
@@ -1150,35 +1147,9 @@ class CosmosPredict2(BaseModel):
        sigma = (sigma / (sigma + 1))
        return latent_image / (1.0 - sigma)

-class Anima(BaseModel):
-    def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
-        super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.anima.model.Anima)
-
-    def extra_conds(self, **kwargs):
-        out = super().extra_conds(**kwargs)
-        cross_attn = kwargs.get("cross_attn", None)
-        t5xxl_ids = kwargs.get("t5xxl_ids", None)
-        t5xxl_weights = kwargs.get("t5xxl_weights", None)
-        device = kwargs["device"]
-        if cross_attn is not None:
-            if t5xxl_ids is not None:
-                if t5xxl_weights is not None:
-                    t5xxl_weights = t5xxl_weights.unsqueeze(0).unsqueeze(-1).to(cross_attn)
-                t5xxl_ids = t5xxl_ids.unsqueeze(0)
-
-                if torch.is_inference_mode_enabled():  # if not we are training
-                    cross_attn = self.diffusion_model.preprocess_text_embeds(cross_attn.to(device=device, dtype=self.get_dtype_inference()), t5xxl_ids.to(device=device), t5xxl_weights=t5xxl_weights.to(device=device, dtype=self.get_dtype_inference()))
-                else:
-                    out['t5xxl_ids'] = comfy.conds.CONDRegular(t5xxl_ids)
-                    out['t5xxl_weights'] = comfy.conds.CONDRegular(t5xxl_weights)
-
-            out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
-        return out
-
 class Lumina2(BaseModel):
    def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
        super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.lumina.model.NextDiT)
-        self.memory_usage_factor_conds = ("ref_latents",)

    def extra_conds(self, **kwargs):
        out = super().extra_conds(**kwargs)
@@ -1198,35 +1169,6 @@ class Lumina2(BaseModel):
        if clip_text_pooled is not None:
            out['clip_text_pooled'] = comfy.conds.CONDRegular(clip_text_pooled)

-        clip_vision_outputs = kwargs.get("clip_vision_outputs", list(map(lambda a: a.get("clip_vision_output"), kwargs.get("unclip_conditioning", [{}]))))  # Z Image omni
-        if clip_vision_outputs is not None and len(clip_vision_outputs) > 0:
-            sigfeats = []
-            for clip_vision_output in clip_vision_outputs:
-                if clip_vision_output is not None:
-                    image_size = clip_vision_output.image_sizes[0]
-                    shape = clip_vision_output.last_hidden_state.shape
-                    sigfeats.append(clip_vision_output.last_hidden_state.reshape(shape[0], image_size[1] // 16, image_size[2] // 16, shape[-1]))
-            if len(sigfeats) > 0:
-                out['siglip_feats'] = comfy.conds.CONDList(sigfeats)
-
-        ref_latents = kwargs.get("reference_latents", None)
-        if ref_latents is not None:
-            latents = []
-            for lat in ref_latents:
-                latents.append(self.process_latent_in(lat))
-            out['ref_latents'] = comfy.conds.CONDList(latents)
-
-        ref_contexts = kwargs.get("reference_latents_text_embeds", None)
-        if ref_contexts is not None:
-            out['ref_contexts'] = comfy.conds.CONDList(ref_contexts)
-
-        return out
-
-    def extra_conds_shapes(self, **kwargs):
-        out = {}
-        ref_latents = kwargs.get("reference_latents", None)
-        if ref_latents is not None:
-            out['ref_latents'] = list([1, 16, sum(map(lambda a: math.prod(a.size()[2:]), ref_latents))])
        return out

 class WAN21(BaseModel):
@@ -1547,49 +1489,6 @@ class ACEStep(BaseModel):
        out['lyrics_strength'] = comfy.conds.CONDConstant(kwargs.get("lyrics_strength", 1.0))
        return out

-class ACEStep15(BaseModel):
-    def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
-        super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.ace.ace_step15.AceStepConditionGenerationModel)
-
-    def extra_conds(self, **kwargs):
-        out = super().extra_conds(**kwargs)
-        device = kwargs["device"]
-        noise = kwargs["noise"]
-
-        cross_attn = kwargs.get("cross_attn", None)
-        if cross_attn is not None:
-            if torch.count_nonzero(cross_attn) == 0:
-                out['replace_with_null_embeds'] = comfy.conds.CONDConstant(True)
-            out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
-
-        conditioning_lyrics = kwargs.get("conditioning_lyrics", None)
-        if cross_attn is not None:
-            out['lyric_embed'] = comfy.conds.CONDRegular(conditioning_lyrics)
-
-        refer_audio = kwargs.get("reference_audio_timbre_latents", None)
-        if refer_audio is None or len(refer_audio) == 0:
-            refer_audio = comfy.ldm.ace.ace_step15.get_silence_latent(noise.shape[2], device)
-            pass_audio_codes = True
-        else:
-            refer_audio = refer_audio[-1][:, :, :noise.shape[2]]
-            out['is_covers'] = comfy.conds.CONDConstant(True)
-            pass_audio_codes = False
-
-        if pass_audio_codes:
-            audio_codes = kwargs.get("audio_codes", None)
-            if audio_codes is not None:
-                out['audio_codes'] = comfy.conds.CONDRegular(torch.tensor(audio_codes, device=device))
-                refer_audio = refer_audio[:, :, :750]
-            else:
-                out['is_covers'] = comfy.conds.CONDConstant(False)
-
-        if refer_audio.shape[2] < noise.shape[2]:
-            pad = comfy.ldm.ace.ace_step15.get_silence_latent(noise.shape[2], device)
-            refer_audio = torch.cat([refer_audio.to(pad), pad[:, :, refer_audio.shape[2]:]], dim=2)
-
-        out['refer_audio'] = comfy.conds.CONDRegular(refer_audio)
-        return out
-
 class Omnigen2(BaseModel):
    def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
        super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.omnigen.omnigen2.OmniGen2Transformer2DModel)
@@ -1627,9 +1526,6 @@ class QwenImage(BaseModel):

    def extra_conds(self, **kwargs):
        out = super().extra_conds(**kwargs)
-        attention_mask = kwargs.get("attention_mask", None)
-        if attention_mask is not None:
-            out['attention_mask'] = comfy.conds.CONDRegular(attention_mask)
        cross_attn = kwargs.get("cross_attn", None)
        if cross_attn is not None:
            out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
--- a/comfy/model_detection.py
+++ b/comfy/model_detection.py
@@ -19,12 +19,6 @@ def count_blocks(state_dict_keys, prefix_string):
        count += 1
    return count

-def any_suffix_in(keys, prefix, main, suffix_list=[]):
-    for x in suffix_list:
-        if "{}{}{}".format(prefix, main, x) in keys:
-            return True
-    return False
-
 def calculate_transformer_depth(prefix, state_dict_keys, state_dict):
    context_dim = None
    use_linear_in_transformer = False
@@ -192,7 +186,7 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
            dit_config["meanflow_sum"] = False
        return dit_config

-    if any_suffix_in(state_dict_keys, key_prefix, 'double_blocks.0.img_attn.norm.key_norm.', ["weight", "scale"]) and ('{}img_in.weight'.format(key_prefix) in state_dict_keys or any_suffix_in(state_dict_keys, key_prefix, 'distilled_guidance_layer.norms.0.', ["weight", "scale"])): #Flux, Chroma or Chroma Radiance (has no img_in.weight)
+    if '{}double_blocks.0.img_attn.norm.key_norm.scale'.format(key_prefix) in state_dict_keys and ('{}img_in.weight'.format(key_prefix) in state_dict_keys or f"{key_prefix}distilled_guidance_layer.norms.0.scale" in state_dict_keys): #Flux, Chroma or Chroma Radiance (has no img_in.weight)
        dit_config = {}
        if '{}double_stream_modulation_img.lin.weight'.format(key_prefix) in state_dict_keys:
            dit_config["image_model"] = "flux2"
@@ -247,8 +241,7 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):

        dit_config["depth"] = count_blocks(state_dict_keys, '{}double_blocks.'.format(key_prefix) + '{}.')
        dit_config["depth_single_blocks"] = count_blocks(state_dict_keys, '{}single_blocks.'.format(key_prefix) + '{}.')
-
-        if any_suffix_in(state_dict_keys, key_prefix, 'distilled_guidance_layer.0.norms.0.', ["weight", "scale"]) or any_suffix_in(state_dict_keys, key_prefix, 'distilled_guidance_layer.norms.0.', ["weight", "scale"]): #Chroma
+        if '{}distilled_guidance_layer.0.norms.0.scale'.format(key_prefix) in state_dict_keys or '{}distilled_guidance_layer.norms.0.scale'.format(key_prefix) in state_dict_keys: #Chroma
            dit_config["image_model"] = "chroma"
            dit_config["in_channels"] = 64
            dit_config["out_channels"] = 64
@@ -256,18 +249,17 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
            dit_config["out_dim"] = 3072
            dit_config["hidden_dim"] = 5120
            dit_config["n_layers"] = 5
-
-            if any_suffix_in(state_dict_keys, key_prefix, 'nerf_blocks.0.norm.', ["weight", "scale"]): #Chroma Radiance
+            if f"{key_prefix}nerf_blocks.0.norm.scale" in state_dict_keys: #Chroma Radiance
                dit_config["image_model"] = "chroma_radiance"
                dit_config["in_channels"] = 3
                dit_config["out_channels"] = 3
-                dit_config["patch_size"] = state_dict.get('{}img_in_patch.weight'.format(key_prefix)).size(dim=-1)
+                dit_config["patch_size"] = 16
                dit_config["nerf_hidden_size"] = 64
                dit_config["nerf_mlp_ratio"] = 4
                dit_config["nerf_depth"] = 4
                dit_config["nerf_max_freqs"] = 8
                dit_config["nerf_tile_size"] = 512
-                dit_config["nerf_final_head_type"] = "conv" if any_suffix_in(state_dict_keys, key_prefix, 'nerf_final_layer_conv.norm.', ["weight", "scale"]) else "linear"
+                dit_config["nerf_final_head_type"] = "conv" if f"{key_prefix}nerf_final_layer_conv.norm.scale" in state_dict_keys else "linear"
                dit_config["nerf_embedder_dtype"] = torch.float32
                if "{}__x0__".format(key_prefix) in state_dict_keys: # x0 pred
                    dit_config["use_x0"] = True
@@ -276,7 +268,7 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
        else:
            dit_config["guidance_embed"] = "{}guidance_in.in_layer.weight".format(key_prefix) in state_dict_keys
            dit_config["yak_mlp"] = '{}double_blocks.0.img_mlp.gate_proj.weight'.format(key_prefix) in state_dict_keys
-            dit_config["txt_norm"] = any_suffix_in(state_dict_keys, key_prefix, 'txt_norm.', ["weight", "scale"])
+            dit_config["txt_norm"] = "{}txt_norm.scale".format(key_prefix) in state_dict_keys
            if dit_config["yak_mlp"] and dit_config["txt_norm"]:  # Ovis model
                dit_config["txt_ids_dims"] = [1, 2]

@@ -452,15 +444,8 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
            dit_config["ffn_dim_multiplier"] = (8.0 / 3.0)
            dit_config["z_image_modulation"] = True
            dit_config["time_scale"] = 1000.0
-            try:
-                dit_config["allow_fp16"] = torch.std(state_dict['{}layers.{}.ffn_norm1.weight'.format(key_prefix, dit_config["n_layers"] - 2)], unbiased=False).item() < 0.42
-            except Exception:
-                pass
            if '{}cap_pad_token'.format(key_prefix) in state_dict_keys:
                dit_config["pad_tokens_multiple"] = 32
-            sig_weight = state_dict.get('{}siglip_embedder.0.weight'.format(key_prefix), None)
-            if sig_weight is not None:
-                dit_config["siglip_feat_dim"] = sig_weight.shape[0]

        return dit_config

@@ -562,8 +547,6 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
    if '{}blocks.0.mlp.layer1.weight'.format(key_prefix) in state_dict_keys:  # Cosmos predict2
        dit_config = {}
        dit_config["image_model"] = "cosmos_predict2"
-        if "{}llm_adapter.blocks.0.cross_attn.q_proj.weight".format(key_prefix) in state_dict_keys:
-            dit_config["image_model"] = "anima"
        dit_config["max_img_h"] = 240
        dit_config["max_img_w"] = 240
        dit_config["max_frames"] = 128
@@ -663,11 +646,6 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
        dit_config["num_visual_blocks"] = count_blocks(state_dict_keys, '{}visual_transformer_blocks.'.format(key_prefix) + '{}.')
        return dit_config

-    if '{}encoder.lyric_encoder.layers.0.input_layernorm.weight'.format(key_prefix) in state_dict_keys:
-        dit_config = {}
-        dit_config["audio_model"] = "ace1.5"
-        return dit_config
-
    if '{}input_blocks.0.0.weight'.format(key_prefix) not in state_dict_keys:
        return None

--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -20,20 +20,12 @@ import psutil
 import logging
 from enum import Enum
 from comfy.cli_args import args, PerformanceFeature
-import threading
 import torch
 import sys
 import platform
 import weakref
 import gc
 import os
-from contextlib import nullcontext
-import comfy.memory_management
-import comfy.utils
-import comfy.quant_ops
-
-import comfy_aimdo.torch
-import comfy_aimdo.model_vbar

 class VRAMState(Enum):
    DISABLED = 0    #No vram present: no need to move models to vram
@@ -55,11 +47,6 @@ cpu_state = CPUState.GPU

 total_vram = 0

-
-# Training Related State
-in_training = False
-
-
 def get_supported_float8_types():
    float8_types = []
    try:
@@ -591,15 +578,9 @@ WINDOWS = any(platform.win32_ver())

 EXTRA_RESERVED_VRAM = 400 * 1024 * 1024
 if WINDOWS:
-    import comfy.windows
    EXTRA_RESERVED_VRAM = 600 * 1024 * 1024 #Windows is higher because of the shared vram issue
    if total_vram > (15 * 1024):  # more extra reserved vram on 16GB+ cards
        EXTRA_RESERVED_VRAM += 100 * 1024 * 1024
-    def get_free_ram():
-        return comfy.windows.get_free_ram()
-else:
-    def get_free_ram():
-        return psutil.virtual_memory().available

 if args.reserve_vram is not None:
    EXTRA_RESERVED_VRAM = args.reserve_vram * 1024 * 1024 * 1024
@@ -611,7 +592,7 @@ def extra_reserved_memory():
 def minimum_inference_memory():
    return (1024 * 1024 * 1024) * 0.8 + extra_reserved_memory()

-def free_memory(memory_required, device, keep_loaded=[], for_dynamic=False, ram_required=0):
+def free_memory(memory_required, device, keep_loaded=[]):
    cleanup_models_gc()
    unloaded_model = []
    can_unload = []
@@ -626,23 +607,15 @@ def free_memory(memory_required, device, keep_loaded=[], for_dynamic=False, ram_

    for x in sorted(can_unload):
        i = x[-1]
-        memory_to_free = 1e32
-        ram_to_free = 1e32
+        memory_to_free = None
        if not DISABLE_SMART_MEMORY:
-            memory_to_free = memory_required - get_free_memory(device)
-            ram_to_free = ram_required - get_free_ram()
-
-        if current_loaded_models[i].model.is_dynamic() and for_dynamic:
-            #don't actually unload dynamic models for the sake of other dynamic models
-            #as that works on-demand.
-            memory_required -= current_loaded_models[i].model.loaded_size()
-            memory_to_free = 0
-        if memory_to_free > 0 and current_loaded_models[i].model_unload(memory_to_free):
-            logging.debug(f"Unloading {current_loaded_models[i].model.model.__class__.__name__}")
+            free_mem = get_free_memory(device)
+            if free_mem > memory_required:
+                break
+            memory_to_free = memory_required - free_mem
+        logging.debug(f"Unloading {current_loaded_models[i].model.model.__class__.__name__}")
+        if current_loaded_models[i].model_unload(memory_to_free):
            unloaded_model.append(i)
-        if ram_to_free > 0:
-            logging.debug(f"RAM Unloading {current_loaded_models[i].model.model.__class__.__name__}")
-            current_loaded_models[i].model.partially_unload_ram(ram_to_free)

    for i in sorted(unloaded_model, reverse=True):
        unloaded_models.append(current_loaded_models.pop(i))
@@ -677,10 +650,7 @@ def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimu

    models_to_load = []

-    free_for_dynamic=True
    for x in models:
-        if not x.is_dynamic():
-            free_for_dynamic = False
        loaded_model = LoadedModel(x)
        try:
            loaded_model_index = current_loaded_models.index(loaded_model)
@@ -706,25 +676,19 @@ def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimu
            model_to_unload.model.detach(unpatch_all=False)
            model_to_unload.model_finalizer.detach()

-
    total_memory_required = {}
-    total_ram_required = {}
    for loaded_model in models_to_load:
        total_memory_required[loaded_model.device] = total_memory_required.get(loaded_model.device, 0) + loaded_model.model_memory_required(loaded_model.device)
-        #x2, one to make sure the OS can fit the model for loading in disk cache, and for us to do any pinning we
-        #want to do.
-        #FIXME: This should subtract off the to_load current pin consumption.
-        total_ram_required[loaded_model.device] = total_ram_required.get(loaded_model.device, 0) + loaded_model.model_memory() * 2

    for device in total_memory_required:
        if device != torch.device("cpu"):
-            free_memory(total_memory_required[device] * 1.1 + extra_mem, device, for_dynamic=free_for_dynamic, ram_required=total_ram_required[device])
+            free_memory(total_memory_required[device] * 1.1 + extra_mem, device)

    for device in total_memory_required:
        if device != torch.device("cpu"):
            free_mem = get_free_memory(device)
            if free_mem < minimum_memory_required:
-                models_l = free_memory(minimum_memory_required, device, for_dynamic=free_for_dynamic)
+                models_l = free_memory(minimum_memory_required, device)
                logging.info("{} models unloaded.".format(len(models_l)))

    for loaded_model in models_to_load:
@@ -768,9 +732,6 @@ def loaded_models(only_currently_used=False):

 def cleanup_models_gc():
    do_gc = False
-
-    reset_cast_buffers()
-
    for i in range(len(current_loaded_models)):
        cur = current_loaded_models[i]
        if cur.is_dead():
@@ -788,11 +749,6 @@ def cleanup_models_gc():
                logging.warning("WARNING, memory leak with model {}. Please make sure it is not being referenced from somewhere.".format(cur.real_model().__class__.__name__))


-def archive_model_dtypes(model):
-    for name, module in model.named_modules():
-        for param_name, param in module.named_parameters(recurse=False):
-            setattr(module, f"{param_name}_comfy_model_dtype", param.dtype)
-

 def cleanup_models():
    to_delete = []
@@ -836,7 +792,7 @@ def unet_inital_load_device(parameters, dtype):

    mem_dev = get_free_memory(torch_dev)
    mem_cpu = get_free_memory(cpu_dev)
-    if mem_dev > mem_cpu and model_size < mem_dev and comfy.memory_management.aimdo_allocator is None:
+    if mem_dev > mem_cpu and model_size < mem_dev:
        return torch_dev
    else:
        return cpu_dev
@@ -1095,51 +1051,6 @@ def current_stream(device):
        return None

 stream_counters = {}
-
-STREAM_CAST_BUFFERS = {}
-LARGEST_CASTED_WEIGHT = (None, 0)
-
-def get_cast_buffer(offload_stream, device, size, ref):
-    global LARGEST_CASTED_WEIGHT
-
-    if offload_stream is not None:
-        wf_context = offload_stream
-        if hasattr(wf_context, "as_context"):
-            wf_context = wf_context.as_context(offload_stream)
-    else:
-        wf_context = nullcontext()
-
-    cast_buffer = STREAM_CAST_BUFFERS.get(offload_stream, None)
-    if cast_buffer is None or cast_buffer.numel() < size:
-        if ref is LARGEST_CASTED_WEIGHT[0]:
-            #If there is one giant weight we do not want both streams to
-            #allocate a buffer for it. It's up to the caster to get the other
-            #offload stream in this corner case
-            return None
-        if cast_buffer is not None and cast_buffer.numel() > 50 * (1024 ** 2):
-            #I want my wrongly sized 50MB+ of VRAM back from the caching allocator right now
-            synchronize()
-            del STREAM_CAST_BUFFERS[offload_stream]
-            del cast_buffer
-            #FIXME: This doesn't work in Aimdo because mempool cant clear cache
-            soft_empty_cache()
-        with wf_context:
-            cast_buffer = torch.empty((size), dtype=torch.int8, device=device)
-            STREAM_CAST_BUFFERS[offload_stream] = cast_buffer
-
-        if  size > LARGEST_CASTED_WEIGHT[1]:
-            LARGEST_CASTED_WEIGHT = (ref, size)
-
-    return cast_buffer
-
-def reset_cast_buffers():
-    global LARGEST_CASTED_WEIGHT
-    LARGEST_CASTED_WEIGHT = (None, 0)
-    for offload_stream in STREAM_CAST_BUFFERS:
-        offload_stream.synchronize()
-    STREAM_CAST_BUFFERS.clear()
-    soft_empty_cache()
-
 def get_offload_stream(device):
    stream_counter = stream_counters.get(device, 0)
    if NUM_STREAMS == 0:
@@ -1182,61 +1093,7 @@ def sync_stream(device, stream):
        return
    current_stream(device).wait_stream(stream)

-
-def cast_to_gathered(tensors, r, non_blocking=False, stream=None):
-    wf_context = nullcontext()
-    if stream is not None:
-       wf_context = stream
-       if hasattr(wf_context, "as_context"):
-           wf_context = wf_context.as_context(stream)
-
-    dest_views = comfy.memory_management.interpret_gathered_like(tensors, r)
-    with wf_context:
-        for tensor in tensors:
-            dest_view = dest_views.pop(0)
-            if tensor is None:
-                continue
-            dest_view.copy_(tensor, non_blocking=non_blocking)
-
-
-def cast_to(weight, dtype=None, device=None, non_blocking=False, copy=False, stream=None, r=None):
-    if hasattr(weight, "_v"):
-        #Unexpected usage patterns. There is no reason these don't work but they
-        #have no testing and no callers do this.
-        assert r is None
-        assert stream is None
-
-        cast_geometry = comfy.memory_management.tensors_to_geometries([ weight ])
-
-        if dtype is None:
-            dtype = weight._model_dtype
-
-        signature = comfy_aimdo.model_vbar.vbar_fault(weight._v)
-        if signature is not None:
-            if comfy_aimdo.model_vbar.vbar_signature_compare(signature, weight._v_signature):
-                v_tensor = weight._v_tensor
-            else:
-                raw_tensor = comfy_aimdo.torch.aimdo_to_tensor(weight._v, device)
-                v_tensor = comfy.memory_management.interpret_gathered_like(cast_geometry, raw_tensor)[0]
-                weight._v_tensor = v_tensor
-                weight._v_signature = signature
-                #Send it over
-                v_tensor.copy_(weight, non_blocking=non_blocking)
-            return v_tensor.to(dtype=dtype)
-
-        r = torch.empty_like(weight, dtype=dtype, device=device)
-
-        if weight.dtype != r.dtype and weight.dtype != weight._model_dtype:
-            #Offloaded casting could skip this, however it would make the quantizations
-            #inconsistent between loaded and offloaded weights. So force the double casting
-            #that would happen in regular flow to make offload deterministic.
-            cast_buffer = torch.empty_like(weight, dtype=weight._model_dtype, device=device)
-            cast_buffer.copy_(weight, non_blocking=non_blocking)
-            weight = cast_buffer
-        r.copy_(weight, non_blocking=non_blocking)
-
-        return r
-
+def cast_to(weight, dtype=None, device=None, non_blocking=False, copy=False, stream=None):
    if device is None or weight.device == device:
        if not copy:
            if dtype is None or weight.dtype == dtype:
@@ -1255,12 +1112,10 @@ def cast_to(weight, dtype=None, device=None, non_blocking=False, copy=False, str
        if hasattr(wf_context, "as_context"):
            wf_context = wf_context.as_context(stream)
        with wf_context:
-            if r is None:
-                r = torch.empty_like(weight, dtype=dtype, device=device)
+            r = torch.empty_like(weight, dtype=dtype, device=device)
            r.copy_(weight, non_blocking=non_blocking)
    else:
-        if r is None:
-            r = torch.empty_like(weight, dtype=dtype, device=device)
+        r = torch.empty_like(weight, dtype=dtype, device=device)
        r.copy_(weight, non_blocking=non_blocking)
    return r

@@ -1280,14 +1135,14 @@ if not args.disable_pinned_memory:
            MAX_PINNED_MEMORY = get_total_memory(torch.device("cpu")) * 0.95
        logging.info("Enabled pinned memory {}".format(MAX_PINNED_MEMORY // (1024 * 1024)))

-PINNING_ALLOWED_TYPES = set(["Tensor", "Parameter", "QuantizedTensor"])
+PINNING_ALLOWED_TYPES = set(["Parameter", "QuantizedTensor"])

 def discard_cuda_async_error():
    try:
        a = torch.tensor([1], dtype=torch.uint8, device=get_torch_device())
        b = torch.tensor([1], dtype=torch.uint8, device=get_torch_device())
        _ = a + b
-        synchronize()
+        torch.cuda.synchronize()
    except torch.AcceleratorError:
        #Dump it! We already know about it from the synchronous return
        pass
@@ -1691,12 +1546,6 @@ def lora_compute_dtype(device):
    LORA_COMPUTE_DTYPES[device] = dtype
    return dtype

-def synchronize():
-    if is_intel_xpu():
-        torch.xpu.synchronize()
-    elif torch.cuda.is_available():
-        torch.cuda.synchronize()
-
 def soft_empty_cache(force=False):
    global cpu_state
    if cpu_state == CPUState.MPS:
@@ -1708,7 +1557,6 @@ def soft_empty_cache(force=False):
    elif is_mlu():
        torch.mlu.empty_cache()
    elif torch.cuda.is_available():
-        torch.cuda.synchronize()
        torch.cuda.empty_cache()
        torch.cuda.ipc_collect()

@@ -1720,6 +1568,9 @@ def debug_memory_summary():
        return torch.cuda.memory.memory_summary()
    return ""

+#TODO: might be cleaner to put this somewhere else
+import threading
+
 class InterruptProcessingException(Exception):
    pass

--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@@ -19,6 +19,7 @@
 from __future__ import annotations

 import collections
+import copy
 import inspect
 import logging
 import math
@@ -37,7 +38,19 @@ from comfy.comfy_types import UnetWrapperFunction
 from comfy.quant_ops import QuantizedTensor
 from comfy.patcher_extension import CallbacksMP, PatcherInjection, WrappersMP

-import comfy_aimdo.model_vbar
+
+def string_to_seed(data):
+    crc = 0xFFFFFFFF
+    for byte in data:
+        if isinstance(byte, str):
+            byte = ord(byte)
+        crc ^= byte
+        for _ in range(8):
+            if crc & 1:
+                crc = (crc >> 1) ^ 0xEDB88320
+            else:
+                crc >>= 1
+    return crc ^ 0xFFFFFFFF

 def set_model_options_patch_replace(model_options, patch, name, block_name, number, transformer_index=None):
    to = model_options["transformer_options"].copy()
@@ -110,10 +123,6 @@ def move_weight_functions(m, device):
                memory += f.move_to(device=device)
    return memory

-def string_to_seed(data):
-    logging.warning("WARNING: string_to_seed has moved from comfy.model_patcher to comfy.utils")
-    return comfy.utils.string_to_seed(data)
-
 class LowVramPatch:
    def __init__(self, key, patches, convert_func=None, set_func=None):
        self.key = key
@@ -160,11 +169,6 @@ def get_key_weight(model, key):

    return weight, set_func, convert_func

-def key_param_name_to_key(key, param):
-    if len(key) == 0:
-        return param
-    return "{}.{}".format(key, param)
-
 class AutoPatcherEjector:
    def __init__(self, model: 'ModelPatcher', skip_and_inject_on_exit_only=False):
        self.model = model
@@ -208,27 +212,6 @@ class MemoryCounter:
    def decrement(self, used: int):
        self.value -= used

-CustomTorchDevice = collections.namedtuple("FakeDevice", ["type", "index"])("comfy-lazy-caster", 0)
-
-class LazyCastingParam(torch.nn.Parameter):
-    def __new__(cls, model, key, tensor):
-        return super().__new__(cls, tensor)
-
-    def __init__(self, model, key, tensor):
-        self.model = model
-        self.key = key
-
-    @property
-    def device(self):
-        return CustomTorchDevice
-
-    #safetensors will .to() us to the cpu which we catch here to cast on demand. The returned tensor is
-    #then just a short lived thing in the safetensors serialization logic inside its big for loop over
-    #all weights getting garbage collected per-weight
-    def to(self, *args, **kwargs):
-        return self.model.patch_weight_to_device(self.key, device_to=self.model.load_device, return_weight=True).to("cpu")
-
-
 class ModelPatcher:
    def __init__(self, model, load_device, offload_device, size=0, weight_inplace_update=False):
        self.size = size
@@ -286,9 +269,6 @@ class ModelPatcher:
        if not hasattr(self.model, 'model_offload_buffer_memory'):
            self.model.model_offload_buffer_memory = 0

-    def is_dynamic(self):
-        return False
-
    def model_size(self):
        if self.size > 0:
            return self.size
@@ -304,9 +284,6 @@ class ModelPatcher:
    def lowvram_patch_counter(self):
        return self.model.lowvram_patch_counter

-    def get_free_memory(self, device):
-        return comfy.model_management.get_free_memory(device)
-
    def clone(self):
        n = self.__class__(self.model, self.load_device, self.offload_device, self.model_size(), weight_inplace_update=self.weight_inplace_update)
        n.patches = {}
@@ -316,7 +293,7 @@ class ModelPatcher:

        n.object_patches = self.object_patches.copy()
        n.weight_wrapper_patches = self.weight_wrapper_patches.copy()
-        n.model_options = comfy.utils.deepcopy_list_dict(self.model_options)
+        n.model_options = copy.deepcopy(self.model_options)
        n.backup = self.backup
        n.object_patches_backup = self.object_patches_backup
        n.parent = self
@@ -406,16 +383,13 @@ class ModelPatcher:
    def memory_required(self, input_shape):
        return self.model.memory_required(input_shape=input_shape)

-    def disable_model_cfg1_optimization(self):
-        self.model_options["disable_cfg1_optimization"] = True
-
    def set_model_sampler_cfg_function(self, sampler_cfg_function, disable_cfg1_optimization=False):
        if len(inspect.signature(sampler_cfg_function).parameters) == 3:
            self.model_options["sampler_cfg_function"] = lambda args: sampler_cfg_function(args["cond"], args["uncond"], args["cond_scale"]) #Old way
        else:
            self.model_options["sampler_cfg_function"] = sampler_cfg_function
        if disable_cfg1_optimization:
-            self.disable_model_cfg1_optimization()
+            self.model_options["disable_cfg1_optimization"] = True

    def set_model_sampler_post_cfg_function(self, post_cfg_function, disable_cfg1_optimization=False):
        self.model_options = set_model_options_post_cfg_function(self.model_options, post_cfg_function, disable_cfg1_optimization)
@@ -637,14 +611,14 @@ class ModelPatcher:
                        sd.pop(k)
            return sd

-    def patch_weight_to_device(self, key, device_to=None, inplace_update=False, return_weight=False):
-        weight, set_func, convert_func = get_key_weight(self.model, key)
+    def patch_weight_to_device(self, key, device_to=None, inplace_update=False):
        if key not in self.patches:
-            return weight
+            return

+        weight, set_func, convert_func = get_key_weight(self.model, key)
        inplace_update = self.weight_inplace_update or inplace_update

-        if key not in self.backup and not return_weight:
+        if key not in self.backup:
            self.backup[key] = collections.namedtuple('Dimension', ['weight', 'inplace_update'])(weight.to(device=self.offload_device, copy=inplace_update), inplace_update)

        temp_dtype = comfy.model_management.lora_compute_dtype(device_to)
@@ -657,15 +631,13 @@ class ModelPatcher:

        out_weight = comfy.lora.calculate_weight(self.patches[key], temp_weight, key)
        if set_func is None:
-            out_weight = comfy.float.stochastic_rounding(out_weight, weight.dtype, seed=comfy.utils.string_to_seed(key))
-            if return_weight:
-                return out_weight
-            elif inplace_update:
+            out_weight = comfy.float.stochastic_rounding(out_weight, weight.dtype, seed=string_to_seed(key))
+            if inplace_update:
                comfy.utils.copy_to_param(self.model, key, out_weight)
            else:
                comfy.utils.set_attr_param(self.model, key, out_weight)
        else:
-            return set_func(out_weight, inplace_update=inplace_update, seed=comfy.utils.string_to_seed(key), return_weight=return_weight)
+            set_func(out_weight, inplace_update=inplace_update, seed=string_to_seed(key))

    def pin_weight_to_device(self, key):
        weight, set_func, convert_func = get_key_weight(self.model, key)
@@ -682,19 +654,18 @@ class ModelPatcher:
        for key in list(self.pinned):
            self.unpin_weight(key)

-    def _load_list(self, prio_comfy_cast_weights=False, default_device=None):
+    def _load_list(self):
        loading = []
        for n, m in self.model.named_modules():
-            default = False
-            params = { name: param for name, param in m.named_parameters(recurse=False) }
+            params = []
+            skip = False
+            for name, param in m.named_parameters(recurse=False):
+                params.append(name)
            for name, param in m.named_parameters(recurse=True):
                if name not in params:
-                    default = True # default random weights in non leaf modules
+                    skip = True # skip random weights in non leaf modules
                    break
-            if default and default_device is not None:
-                for param in params.values():
-                    param.data = param.data.to(device=default_device)
-            if not default and (hasattr(m, "comfy_cast_weights") or len(params) > 0):
+            if not skip and (hasattr(m, "comfy_cast_weights") or len(params) > 0):
                module_mem = comfy.model_management.module_size(m)
                module_offload_mem = module_mem
                if hasattr(m, "comfy_cast_weights"):
@@ -710,8 +681,7 @@ class ModelPatcher:
                        return 0
                    module_offload_mem += check_module_offload_mem("{}.weight".format(n))
                    module_offload_mem += check_module_offload_mem("{}.bias".format(n))
-                prepend = (not hasattr(m, "comfy_cast_weights"),) if prio_comfy_cast_weights else ()
-                loading.append(prepend + (module_offload_mem, module_mem, n, m, params))
+                loading.append((module_offload_mem, module_mem, n, m, params))
        return loading

    def load(self, device_to=None, lowvram_model_memory=0, force_patch_weights=False, full_load=False):
@@ -803,7 +773,7 @@ class ModelPatcher:
                        continue

                for param in params:
-                    key = key_param_name_to_key(n, param)
+                    key = "{}.{}".format(n, param)
                    self.unpin_weight(key)
                    self.patch_weight_to_device(key, device_to=device_to)
                if comfy.model_management.is_device_cuda(device_to):
@@ -819,7 +789,7 @@ class ModelPatcher:
                n = x[1]
                params = x[3]
                for param in params:
-                    self.pin_weight_to_device(key_param_name_to_key(n, param))
+                    self.pin_weight_to_device("{}.{}".format(n, param))

            usable_stat = "{:.2f} MB usable,".format(lowvram_model_memory / (1024 * 1024)) if lowvram_model_memory < 1e32 else ""
            if lowvram_counter > 0:
@@ -925,7 +895,7 @@ class ModelPatcher:
                if hasattr(m, "comfy_patched_weights") and m.comfy_patched_weights == True:
                    move_weight = True
                    for param in params:
-                        key = key_param_name_to_key(n, param)
+                        key = "{}.{}".format(n, param)
                        bk = self.backup.get(key, None)
                        if bk is not None:
                            if not lowvram_possible:
@@ -976,7 +946,7 @@ class ModelPatcher:
                        logging.debug("freed {}".format(n))

                        for param in params:
-                            self.pin_weight_to_device(key_param_name_to_key(n, param))
+                            self.pin_weight_to_device("{}.{}".format(n, param))


            self.model.model_lowvram = True
@@ -1014,9 +984,6 @@ class ModelPatcher:

            return self.model.model_loaded_weight_memory - current_used

-    def partially_unload_ram(self, ram_to_unload):
-        pass
-
    def detach(self, unpatch_all=True):
        self.eject_model()
        self.model_patches_to(self.offload_device)
@@ -1350,10 +1317,10 @@ class ModelPatcher:
                                                 key, original_weights=original_weights)
        del original_weights[key]
        if set_func is None:
-            out_weight = comfy.float.stochastic_rounding(out_weight, weight.dtype, seed=comfy.utils.string_to_seed(key))
+            out_weight = comfy.float.stochastic_rounding(out_weight, weight.dtype, seed=string_to_seed(key))
            comfy.utils.copy_to_param(self.model, key, out_weight)
        else:
-            set_func(out_weight, inplace_update=True, seed=comfy.utils.string_to_seed(key))
+            set_func(out_weight, inplace_update=True, seed=string_to_seed(key))
        if self.hook_mode == comfy.hooks.EnumHookMode.MaxSpeed:
            # TODO: disable caching if not enough system RAM to do so
            target_device = self.offload_device
@@ -1388,275 +1355,7 @@ class ModelPatcher:
        self.unpatch_hooks()
        self.clear_cached_hook_weights()

-    def state_dict_for_saving(self, clip_state_dict=None, vae_state_dict=None, clip_vision_state_dict=None):
-        unet_state_dict = self.model.diffusion_model.state_dict()
-        for k, v in unet_state_dict.items():
-            op_keys = k.rsplit('.', 1)
-            if (len(op_keys) < 2) or op_keys[1] not in ["weight", "bias"]:
-                continue
-            try:
-                op = comfy.utils.get_attr(self.model.diffusion_model, op_keys[0])
-            except:
-                continue
-            if not op or not hasattr(op, "comfy_cast_weights") or \
-                (hasattr(op, "comfy_patched_weights") and op.comfy_patched_weights == True):
-                continue
-            key = "diffusion_model." + k
-            unet_state_dict[k] = LazyCastingParam(self, key, comfy.utils.get_attr(self.model, key))
-        return self.model.state_dict_for_saving(unet_state_dict, clip_state_dict=clip_state_dict, vae_state_dict=vae_state_dict, clip_vision_state_dict=clip_vision_state_dict)
-
    def __del__(self):
        self.unpin_all_weights()
        self.detach(unpatch_all=False)

-class ModelPatcherDynamic(ModelPatcher):
-
-    def __new__(cls, model=None, load_device=None, offload_device=None, size=0, weight_inplace_update=False):
-        if load_device is not None and comfy.model_management.is_device_cpu(load_device):
-            #reroute to default MP for CPUs
-            return ModelPatcher(model, load_device, offload_device, size, weight_inplace_update)
-        return super().__new__(cls)
-
-    def __init__(self, model, load_device, offload_device, size=0, weight_inplace_update=False):
-        super().__init__(model, load_device, offload_device, size, weight_inplace_update)
-        #this is now way more dynamic and we dont support the same base model for both Dynamic
-        #and non-dynamic patchers.
-        if hasattr(self.model, "model_loaded_weight_memory"):
-            del self.model.model_loaded_weight_memory
-        if not hasattr(self.model, "dynamic_vbars"):
-            self.model.dynamic_vbars = {}
-        assert load_device is not None
-
-    def is_dynamic(self):
-        return True
-
-    def _vbar_get(self, create=False):
-        if self.load_device == torch.device("cpu"):
-            return None
-        vbar = self.model.dynamic_vbars.get(self.load_device, None)
-        if create and vbar is None:
-            # x10. We dont know what model defined type casts we have in the vbar, but virtual address
-            # space is pretty free. This will cover someone casting an entire model from FP4 to FP32
-            # with some left over.
-            vbar = comfy_aimdo.model_vbar.ModelVBAR(self.model_size() * 10, self.load_device.index)
-            self.model.dynamic_vbars[self.load_device] = vbar
-        return vbar
-
-    def loaded_size(self):
-        vbar = self._vbar_get()
-        if vbar is None:
-            return 0
-        return vbar.loaded_size()
-
-    def get_free_memory(self, device):
-        #NOTE: on high condition / batch counts, estimate should have already vacated
-        #all non-dynamic models so this is safe even if its not 100% true that this
-        #would all be avaiable for inference use.
-        return comfy.model_management.get_total_memory(device) - self.model_size()
-
-    #Pinning is deferred to ops time. Assert against this API to avoid pin leaks.
-
-    def pin_weight_to_device(self, key):
-        raise RuntimeError("pin_weight_to_device invalid for dymamic weight loading")
-
-    def unpin_weight(self, key):
-        raise RuntimeError("unpin_weight invalid for dymamic weight loading")
-
-    def unpin_all_weights(self):
-        self.partially_unload_ram(1e32)
-
-    def memory_required(self, input_shape):
-        #Pad this significantly. We are trying to get away from precise estimates. This
-        #estimate is only used when using the ModelPatcherDynamic after ModelPatcher. If you
-        #use all ModelPatcherDynamic this is ignored and its all done dynamically.
-        return super().memory_required(input_shape=input_shape) * 1.3 + (1024 ** 3)
-
-
-    def load(self, device_to=None, lowvram_model_memory=0, force_patch_weights=False, full_load=False, dirty=False):
-
-        #Force patching doesn't make sense in Dynamic loading, as you dont know what does and
-        #doesn't need to be forced at this stage. The only thing you could do would be patch
-        #it all on CPU which consumes huge RAM.
-        assert not force_patch_weights
-
-        #Full load doesn't make sense as we dont actually have any loader capability here and
-        #now.
-        assert not full_load
-
-        assert device_to == self.load_device
-
-        num_patches = 0
-        allocated_size = 0
-
-        with self.use_ejected():
-            self.unpatch_hooks()
-
-            vbar = self._vbar_get(create=True)
-            if vbar is not None:
-                vbar.prioritize()
-
-            #We force reserve VRAM for the non comfy-weight so we dont have to deal
-            #with pin and unpin syncrhonization which can be expensive for small weights
-            #with a high layer rate (e.g. autoregressive LLMs).
-            #prioritize the non-comfy weights (note the order reverse).
-            loading = self._load_list(prio_comfy_cast_weights=True, default_device=device_to)
-            loading.sort(reverse=True)
-
-            for x in loading:
-                _, _, _, n, m, params = x
-
-                def set_dirty(item, dirty):
-                    if dirty or not hasattr(item, "_v_signature"):
-                        item._v_signature = None
-
-                def setup_param(self, m, n, param_key):
-                    nonlocal num_patches
-                    key = key_param_name_to_key(n, param_key)
-
-                    weight_function = []
-
-                    weight, _, _ = get_key_weight(self.model, key)
-                    if weight is None:
-                        return (False, 0)
-                    if key in self.patches:
-                        if comfy.lora.calculate_shape(self.patches[key], weight, key) != weight.shape:
-                            return (True, 0)
-                        setattr(m, param_key + "_lowvram_function", LowVramPatch(key, self.patches))
-                        num_patches += 1
-                    else:
-                        setattr(m, param_key + "_lowvram_function", None)
-
-                    if key in self.weight_wrapper_patches:
-                        weight_function.extend(self.weight_wrapper_patches[key])
-                    setattr(m, param_key + "_function", weight_function)
-                    geometry = weight
-                    if not isinstance(weight, QuantizedTensor):
-                        model_dtype = getattr(m, param_key + "_comfy_model_dtype", None) or weight.dtype
-                        weight._model_dtype = model_dtype
-                        geometry = comfy.memory_management.TensorGeometry(shape=weight.shape, dtype=model_dtype)
-                    return (False, comfy.memory_management.vram_aligned_size(geometry))
-
-                def force_load_param(self, param_key, device_to):
-                    key = key_param_name_to_key(n, param_key)
-                    if key in self.backup:
-                        comfy.utils.set_attr_param(self.model, key, self.backup[key].weight)
-                    self.patch_weight_to_device(key, device_to=device_to)
-
-                if hasattr(m, "comfy_cast_weights"):
-                    m.comfy_cast_weights = True
-                    m.pin_failed = False
-                    m.seed_key = n
-                    set_dirty(m, dirty)
-
-                    force_load, v_weight_size = setup_param(self, m, n, "weight")
-                    force_load_bias, v_weight_bias = setup_param(self, m, n, "bias")
-                    force_load = force_load or force_load_bias
-                    v_weight_size += v_weight_bias
-
-                    if force_load:
-                        logging.info(f"Module {n} has resizing Lora - force loading")
-                        force_load_param(self, "weight", device_to)
-                        force_load_param(self, "bias", device_to)
-                    else:
-                        if vbar is not None and not hasattr(m, "_v"):
-                            m._v = vbar.alloc(v_weight_size)
-                        allocated_size += v_weight_size
-
-                else:
-                    for param in params:
-                        key = key_param_name_to_key(n, param)
-                        weight, _, _ = get_key_weight(self.model, key)
-                        weight.seed_key = key
-                        set_dirty(weight, dirty)
-                        geometry = weight
-                        model_dtype = getattr(m, param + "_comfy_model_dtype", None) or weight.dtype
-                        geometry = comfy.memory_management.TensorGeometry(shape=weight.shape, dtype=model_dtype)
-                        weight_size = geometry.numel() * geometry.element_size()
-                        if vbar is not None and not hasattr(weight, "_v"):
-                            weight._v = vbar.alloc(weight_size)
-                            weight._model_dtype = model_dtype
-                        allocated_size += weight_size
-                    vbar.set_watermark_limit(allocated_size)
-
-                move_weight_functions(m, device_to)
-
-            logging.info(f"Model {self.model.__class__.__name__} prepared for dynamic VRAM loading. {allocated_size // (1024 ** 2)}MB Staged. {num_patches} patches attached.")
-
-            self.model.device = device_to
-            self.model.current_weight_patches_uuid = self.patches_uuid
-
-            for callback in self.get_all_callbacks(CallbacksMP.ON_LOAD):
-                #These are all super dangerous. Who knows what the custom nodes actually do here...
-                callback(self, device_to, lowvram_model_memory, force_patch_weights, full_load)
-
-            self.apply_hooks(self.forced_hooks, force_apply=True)
-
-    def partially_unload(self, device_to, memory_to_free=0, force_patch_weights=False):
-        assert not force_patch_weights #See above
-        assert self.load_device != torch.device("cpu")
-
-        vbar = self._vbar_get()
-        return 0 if vbar is None else vbar.free_memory(memory_to_free)
-
-    def partially_unload_ram(self, ram_to_unload):
-        loading = self._load_list(prio_comfy_cast_weights=True, default_device=self.offload_device)
-        for x in loading:
-            _, _, _, _, m, _ = x
-            ram_to_unload -= comfy.pinned_memory.unpin_memory(m)
-            if ram_to_unload <= 0:
-                return
-
-    def patch_model(self, device_to=None, lowvram_model_memory=0, load_weights=True, force_patch_weights=False):
-        #This isn't used by the core at all and can only be to load a model out of
-        #the control of proper model_managment. If you are a custom node author reading
-        #this, the correct pattern is to call load_models_gpu() to get a proper
-        #managed load of your model.
-        assert not load_weights
-        return super().patch_model(load_weights=load_weights, force_patch_weights=force_patch_weights)
-
-    def unpatch_model(self, device_to=None, unpatch_weights=True):
-        super().unpatch_model(device_to=None, unpatch_weights=False)
-
-        if unpatch_weights:
-            self.partially_unload_ram(1e32)
-            self.partially_unload(None, 1e32)
-            for m in self.model.modules():
-                move_weight_functions(m, device_to)
-
-            keys = list(self.backup.keys())
-            for k in keys:
-                bk = self.backup[k]
-                comfy.utils.set_attr_param(self.model, k, bk.weight)
-
-    def partially_load(self, device_to, extra_memory=0, force_patch_weights=False):
-        assert not force_patch_weights #See above
-        with self.use_ejected(skip_and_inject_on_exit_only=True):
-            dirty = self.model.current_weight_patches_uuid is not None and (self.model.current_weight_patches_uuid != self.patches_uuid)
-
-            self.unpatch_model(self.offload_device, unpatch_weights=False)
-            self.patch_model(load_weights=False)
-
-            try:
-                self.load(device_to, dirty=dirty)
-            except Exception as e:
-                self.detach()
-                raise e
-            #ModelPatcher::partially_load returns a number on what got loaded but
-            #nothing in core uses this and we have no data in the Dynamic world. Hit
-            #the custom node devs with a None rather than a 0 that would mislead any
-            #logic they might have.
-            return None
-
-    def patch_cached_hook_weights(self, cached_weights: dict, key: str, memory_counter: MemoryCounter):
-        assert False #Should be unreachable - we dont ever cache in the new implementation
-
-    def patch_hook_weight_to_device(self, hooks: comfy.hooks.HookGroup, combined_patches: dict, key: str, original_weights: dict, memory_counter: MemoryCounter):
-        if key not in combined_patches:
-            return
-
-        raise RuntimeError("Hooks not implemented in ModelPatcherDynamic. Please remove --fast arguments form ComfyUI startup")
-
-    def unpatch_hooks(self, whitelist_keys_set: set[str]=None) -> None:
-        pass
-
-CoreModelPatcher = ModelPatcher
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
bymyself	1ad4b76b55	Add comprehensive test suite for assets API - conftest.py: Test fixtures (in-memory SQLite, mock UserManager, test image) - schemas_test.py: 98 tests for Pydantic input validation - helpers_test.py: 50 tests for utility functions - queries_crud_test.py: 27 tests for core CRUD operations - queries_filter_test.py: 28 tests for filtering/pagination - queries_tags_test.py: 24 tests for tag operations - routes_upload_test.py: 18 tests for upload endpoints - routes_read_update_test.py: 21 tests for read/update endpoints - routes_tags_delete_test.py: 17 tests for tags/delete endpoints Total: 283 tests covering all 12 asset API endpoints Amp-Thread-ID: https://ampcode.com/threads/T-019be932-d48b-76b9-843a-790e9d2a1f58 Co-authored-by: Amp <amp@ampcode.com>	2026-01-22 23:15:19 -08:00
Jedrzej Kosinski	facda426b4	Remove extra whitespace at end of routes.py	2026-01-16 01:04:26 -08:00
Jedrzej Kosinski	65a5992f2d	Remove unnecessary logging statement used for testing	2026-01-16 01:02:40 -08:00
Jedrzej Kosinski	287da646e5	Finished @ROUTES.post("/api/assets/scan/seed")	2026-01-16 01:01:49 -08:00
Jedrzej Kosinski	63f9f1b11b	Finish @ROUTES.delete(f"/api/assets/{{id:{UUID_RE}}}/tags")	2026-01-16 00:50:13 -08:00
Jedrzej Kosinski	9e3f559189	Finished @ROUTES.post(f"/api/assets/{{id:{UUID_RE}}}/tags")	2026-01-16 00:45:36 -08:00
Jedrzej Kosinski	63c98d0c75	Finished @ROUTES.delete(f"/api/assets/{{id:{UUID_RE}}}")	2026-01-16 00:31:06 -08:00
Jedrzej Kosinski	e69a5aa1be	Finished @ROUTES.put(f"/api/assets/{{id:{UUID_RE}}}/preview")	2026-01-16 00:14:03 -08:00
Jedrzej Kosinski	e0c063f93e	Finished @ROUTES.put(f"/api/assets/{{id:{UUID_RE}}}")	2026-01-15 23:57:23 -08:00
Jedrzej Kosinski	6db4f4e3f1	Finished @ROUTES.post("/api/assets")	2026-01-15 23:41:19 -08:00
Jedrzej Kosinski	41d364030b	Finished @ROUTES.post("/api/assets/from-hash")	2026-01-15 23:09:54 -08:00
Jedrzej Kosinski	fab9b71f5d	Finished @ROUTES.head("/api/assets/hash/{hash}")	2026-01-15 21:13:34 -08:00
Jedrzej Kosinski	e5c1de4777	Finished @ROUTES.get(f"/api/assets/{{id:{UUID_RE}}}/content")	2026-01-15 21:00:35 -08:00
Jedrzej Kosinski	a5ed151e51	Merge branch 'master' into assets-redo-part2	2026-01-15 20:34:44 -08:00
Jedrzej Kosinski	e527b72b09	more progress	2026-01-15 18:16:00 -08:00
Jedrzej Kosinski	f14129947c	in progress GET /api/assets/{uuid}/content endpoint support	2026-01-14 22:54:21 -08:00
				`@@ -1 +0,0 @@`
				{"revision": 0, "last_node_id": 29, "last_link_id": 0, "nodes": [{"id": 29, "type": "4c9d6ea4-b912-40e5-8766-6793a9758c53", "pos": [1970, -230], "size": [180, 86], "flags": {}, "order": 5, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "R", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}, {"label": "G", "localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": []}, {"label": "B", "localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": []}, {"label": "A", "localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": []}], "title": "Image Channels", "properties": {"proxyWidgets": []}, "widgets_values": []}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "4c9d6ea4-b912-40e5-8766-6793a9758c53", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 28, "lastLinkId": 39, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Image Channels", "inputNode": {"id": -10, "bounding": [1820, -185, 120, 60]}, "outputNode": {"id": -20, "bounding": [2460, -215, 120, 120]}, "inputs": [{"id": "3522932b-2d86-4a1f-a02a-cb29f3a9d7fe", "name": "images.image0", "type": "IMAGE", "linkIds": [39], "localized_name": "images.image0", "label": "image", "pos": [1920, -165]}], "outputs": [{"id": "605cb9c3-b065-4d9b-81d2-3ec331889b2b", "name": "IMAGE0", "type": "IMAGE", "linkIds": [26], "localized_name": "IMAGE0", "label": "R", "pos": [2480, -195]}, {"id": "fb44a77e-0522-43e9-9527-82e7465b3596", "name": "IMAGE1", "type": "IMAGE", "linkIds": [27], "localized_name": "IMAGE1", "label": "G", "pos": [2480, -175]}, {"id": "81460ee6-0131-402a-874f-6bf3001fc4ff", "name": "IMAGE2", "type": "IMAGE", "linkIds": [28], "localized_name": "IMAGE2", "label": "B", "pos": [2480, -155]}, {"id": "ae690246-80d4-4951-b1d9-9306d8a77417", "name": "IMAGE3", "type": "IMAGE", "linkIds": [29], "localized_name": "IMAGE3", "label": "A", "pos": [2480, -135]}], "widgets": [], "nodes": [{"id": 23, "type": "GLSLShader", "pos": [2000, -330], "size": [400, 172], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 39}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}], "outputs": [{"label": "R", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [26]}, {"label": "G", "localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": [27]}, {"label": "B", "localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": [28]}, {"label": "A", "localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": [29]}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\nlayout(location = 1) out vec4 fragColor1;\nlayout(location = 2) out vec4 fragColor2;\nlayout(location = 3) out vec4 fragColor3;\n\nvoid main() {\n vec4 color = texture(u_image0, v_texCoord);\n // Output each channel as grayscale to separate render targets\n fragColor0 = vec4(vec3(color.r), 1.0); // Red channel\n fragColor1 = vec4(vec3(color.g), 1.0); // Green channel\n fragColor2 = vec4(vec3(color.b), 1.0); // Blue channel\n fragColor3 = vec4(vec3(color.a), 1.0); // Alpha channel\n}\n", "from_input"]}], "groups": [], "links": [{"id": 39, "origin_id": -10, "origin_slot": 0, "target_id": 23, "target_slot": 0, "type": "IMAGE"}, {"id": 26, "origin_id": 23, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 27, "origin_id": 23, "origin_slot": 1, "target_id": -20, "target_slot": 1, "type": "IMAGE"}, {"id": 28, "origin_id": 23, "origin_slot": 2, "target_id": -20, "target_slot": 2, "type": "IMAGE"}, {"id": 29, "origin_id": 23, "origin_slot": 3, "target_id": -20, "target_slot": 3, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image Tools/Color adjust"}]}}
				`@@ -1 +0,0 @@`
				{"revision": 0, "last_node_id": 15, "last_link_id": 0, "nodes": [{"id": 15, "type": "24d8bbfd-39d4-4774-bff0-3de40cc7a471", "pos": [-1490, 2040], "size": [400, 260], "flags": {}, "order": 0, "mode": 0, "inputs": [{"name": "prompt", "type": "STRING", "widget": {"name": "prompt"}, "link": null}, {"label": "reference images", "name": "images", "type": "IMAGE", "link": null}], "outputs": [{"name": "STRING", "type": "STRING", "links": null}], "title": "Prompt Enhance", "properties": {"proxyWidgets": [["-1", "prompt"]], "cnr_id": "comfy-core", "ver": "0.14.1"}, "widgets_values": [""]}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "24d8bbfd-39d4-4774-bff0-3de40cc7a471", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 15, "lastLinkId": 14, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Prompt Enhance", "inputNode": {"id": -10, "bounding": [-2170, 2110, 138.876953125, 80]}, "outputNode": {"id": -20, "bounding": [-640, 2110, 120, 60]}, "inputs": [{"id": "aeab7216-00e0-4528-a09b-bba50845c5a6", "name": "prompt", "type": "STRING", "linkIds": [11], "pos": [-2051.123046875, 2130]}, {"id": "7b73fd36-aa31-4771-9066-f6c83879994b", "name": "images", "type": "IMAGE", "linkIds": [14], "label": "reference images", "pos": [-2051.123046875, 2150]}], "outputs": [{"id": "c7b0d930-68a1-48d1-b496-0519e5837064", "name": "STRING", "type": "STRING", "linkIds": [13], "pos": [-620, 2130]}], "widgets": [], "nodes": [{"id": 11, "type": "GeminiNode", "pos": [-1560, 1990], "size": [470, 470], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "shape": 7, "type": "IMAGE", "link": 14}, {"localized_name": "audio", "name": "audio", "shape": 7, "type": "AUDIO", "link": null}, {"localized_name": "video", "name": "video", "shape": 7, "type": "VIDEO", "link": null}, {"localized_name": "files", "name": "files", "shape": 7, "type": "GEMINI_INPUT_FILES", "link": null}, {"localized_name": "prompt", "name": "prompt", "type": "STRING", "widget": {"name": "prompt"}, "link": 11}, {"localized_name": "model", "name": "model", "type": "COMBO", "widget": {"name": "model"}, "link": null}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": null}, {"localized_name": "system_prompt", "name": "system_prompt", "shape": 7, "type": "STRING", "widget": {"name": "system_prompt"}, "link": null}], "outputs": [{"localized_name": "STRING", "name": "STRING", "type": "STRING", "links": [13]}], "properties": {"cnr_id": "comfy-core", "ver": "0.14.1", "Node name for S&R": "GeminiNode"}, "widgets_values": ["", "gemini-3-pro-preview", 42, "randomize", "You are an expert in prompt writing.\nBased on the input, rewrite the user's input into a detailed prompt.\nincluding camera settings, lighting, composition, and style.\nReturn the prompt only"], "color": "#432", "bgcolor": "#653"}], "groups": [], "links": [{"id": 11, "origin_id": -10, "origin_slot": 0, "target_id": 11, "target_slot": 4, "type": "STRING"}, {"id": 13, "origin_id": 11, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "STRING"}, {"id": 14, "origin_id": -10, "origin_slot": 1, "target_id": 11, "target_slot": 0, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Text generation/Prompt enhance"}]}, "extra": {}}
				`@@ -1 +0,0 @@`
				{"revision": 0, "last_node_id": 25, "last_link_id": 0, "nodes": [{"id": 25, "type": "621ba4e2-22a8-482d-a369-023753198b7b", "pos": [4610, -790], "size": [230, 58], "flags": {}, "order": 4, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "IMAGE", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}], "title": "Sharpen", "properties": {"proxyWidgets": [["24", "value"]]}, "widgets_values": []}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "621ba4e2-22a8-482d-a369-023753198b7b", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 24, "lastLinkId": 36, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Sharpen", "inputNode": {"id": -10, "bounding": [4090, -825, 120, 60]}, "outputNode": {"id": -20, "bounding": [5150, -825, 120, 60]}, "inputs": [{"id": "37011fb7-14b7-4e0e-b1a0-6a02e8da1fd7", "name": "images.image0", "type": "IMAGE", "linkIds": [34], "localized_name": "images.image0", "label": "image", "pos": [4190, -805]}], "outputs": [{"id": "e9182b3f-635c-4cd4-a152-4b4be17ae4b9", "name": "IMAGE0", "type": "IMAGE", "linkIds": [35], "localized_name": "IMAGE0", "label": "IMAGE", "pos": [5170, -805]}], "widgets": [], "nodes": [{"id": 24, "type": "PrimitiveFloat", "pos": [4280, -1240], "size": [270, 58], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "strength", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [36]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 3, "precision": 2, "step": 0.05}, "widgets_values": [0.5]}, {"id": 23, "type": "GLSLShader", "pos": [4570, -1240], "size": [370, 192], "flags": {}, "order": 1, "mode": 0, "inputs": [{"label": "image0", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 34}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}, {"label": "u_float0", "localized_name": "floats.u_float0", "name": "floats.u_float0", "shape": 7, "type": "FLOAT", "link": 36}, {"label": "u_float1", "localized_name": "floats.u_float1", "name": "floats.u_float1", "shape": 7, "type": "FLOAT", "link": null}, {"label": "u_int0", "localized_name": "ints.u_int0", "name": "ints.u_int0", "shape": 7, "type": "INT", "link": null}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}], "outputs": [{"localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [35]}, {"localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": null}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform vec2 u_resolution;\nuniform float u_float0; // strength [0.0 – 2.0] typical: 0.3–1.0\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\nvoid main() {\n vec2 texel = 1.0 / u_resolution;\n \n // Sample center and neighbors\n vec4 center = texture(u_image0, v_texCoord);\n vec4 top = texture(u_image0, v_texCoord + vec2( 0.0, -texel.y));\n vec4 bottom = texture(u_image0, v_texCoord + vec2( 0.0, texel.y));\n vec4 left = texture(u_image0, v_texCoord + vec2(-texel.x, 0.0));\n vec4 right = texture(u_image0, v_texCoord + vec2( texel.x, 0.0));\n \n // Edge enhancement (Laplacian)\n vec4 edges = center * 4.0 - top - bottom - left - right;\n \n // Add edges back scaled by strength\n vec4 sharpened = center + edges * u_float0;\n \n fragColor0 = vec4(clamp(sharpened.rgb, 0.0, 1.0), center.a);\n}", "from_input"]}], "groups": [], "links": [{"id": 36, "origin_id": 24, "origin_slot": 0, "target_id": 23, "target_slot": 2, "type": "FLOAT"}, {"id": 34, "origin_id": -10, "origin_slot": 0, "target_id": 23, "target_slot": 0, "type": "IMAGE"}, {"id": 35, "origin_id": 23, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image Tools/Sharpen"}]}}
				`@@ -1 +0,0 @@`
				{"revision": 0, "last_node_id": 13, "last_link_id": 0, "nodes": [{"id": 13, "type": "cf95b747-3e17-46cb-8097-cac60ff9b2e1", "pos": [1120, 330], "size": [240, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "video", "name": "video", "type": "VIDEO", "link": null}, {"name": "model_name", "type": "COMBO", "widget": {"name": "model_name"}, "link": null}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": []}], "title": "Video Upscale(GAN x4)", "properties": {"proxyWidgets": [["-1", "model_name"]], "cnr_id": "comfy-core", "ver": "0.14.1"}, "widgets_values": ["RealESRGAN_x4plus.safetensors"]}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "cf95b747-3e17-46cb-8097-cac60ff9b2e1", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 13, "lastLinkId": 19, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Video Upscale(GAN x4)", "inputNode": {"id": -10, "bounding": [550, 460, 120, 80]}, "outputNode": {"id": -20, "bounding": [1490, 460, 120, 60]}, "inputs": [{"id": "666d633e-93e7-42dc-8d11-2b7b99b0f2a6", "name": "video", "type": "VIDEO", "linkIds": [10], "localized_name": "video", "pos": [650, 480]}, {"id": "2e23a087-caa8-4d65-99e6-662761aa905a", "name": "model_name", "type": "COMBO", "linkIds": [19], "pos": [650, 500]}], "outputs": [{"id": "0c1768ea-3ec2-412f-9af6-8e0fa36dae70", "name": "VIDEO", "type": "VIDEO", "linkIds": [15], "localized_name": "VIDEO", "pos": [1510, 480]}], "widgets": [], "nodes": [{"id": 2, "type": "ImageUpscaleWithModel", "pos": [1110, 450], "size": [320, 46], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "upscale_model", "name": "upscale_model", "type": "UPSCALE_MODEL", "link": 1}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 14}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [13]}], "properties": {"cnr_id": "comfy-core", "ver": "0.10.0", "Node name for S&R": "ImageUpscaleWithModel"}}, {"id": 11, "type": "CreateVideo", "pos": [1110, 550], "size": [320, 78], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "link": 13}, {"localized_name": "audio", "name": "audio", "shape": 7, "type": "AUDIO", "link": 16}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "widget": {"name": "fps"}, "link": 12}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": [15]}], "properties": {"cnr_id": "comfy-core", "ver": "0.10.0", "Node name for S&R": "CreateVideo"}, "widgets_values": [30]}, {"id": 10, "type": "GetVideoComponents", "pos": [1110, 330], "size": [320, 70], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "video", "name": "video", "type": "VIDEO", "link": 10}], "outputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "links": [14]}, {"localized_name": "audio", "name": "audio", "type": "AUDIO", "links": [16]}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "links": [12]}], "properties": {"cnr_id": "comfy-core", "ver": "0.10.0", "Node name for S&R": "GetVideoComponents"}}, {"id": 1, "type": "UpscaleModelLoader", "pos": [750, 450], "size": [280, 60], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "model_name", "name": "model_name", "type": "COMBO", "widget": {"name": "model_name"}, "link": 19}], "outputs": [{"localized_name": "UPSCALE_MODEL", "name": "UPSCALE_MODEL", "type": "UPSCALE_MODEL", "links": [1]}], "properties": {"cnr_id": "comfy-core", "ver": "0.10.0", "Node name for S&R": "UpscaleModelLoader", "models": [{"name": "RealESRGAN_x4plus.safetensors", "url": "https://huggingface.co/Comfy-Org/Real-ESRGAN_repackaged/resolve/main/RealESRGAN_x4plus.safetensors", "directory": "upscale_models"}]}, "widgets_values": ["RealESRGAN_x4plus.safetensors"]}], "groups": [], "links": [{"id": 1, "origin_id": 1, "origin_slot": 0, "target_id": 2, "target_slot": 0, "type": "UPSCALE_MODEL"}, {"id": 14, "origin_id": 10, "origin_slot": 0, "target_id": 2, "target_slot": 1, "type": "IMAGE"}, {"id": 13, "origin_id": 2, "origin_slot": 0, "target_id": 11, "target_slot": 0, "type": "IMAGE"}, {"id": 16, "origin_id": 10, "origin_slot": 1, "target_id": 11, "target_slot": 1, "type": "AUDIO"}, {"id": 12, "origin_id": 10, "origin_slot": 2, "target_id": 11, "target_slot": 2, "type": "FLOAT"}, {"id": 10, "origin_id": -10, "origin_slot": 0, "target_id": 10, "target_slot": 0, "type": "VIDEO"}, {"id": 15, "origin_id": 11, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "VIDEO"}, {"id": 19, "origin_id": -10, "origin_slot": 1, "target_id": 1, "target_slot": 0, "type": "COMBO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Video generation and editing/Enhance video"}]}, "extra": {}}