Fix some fp8 scaled checkpoints no longer working.

Update README with note on model support (#13235 )
Added note about additional supported models in ComfyUI.
2026-04-02 10:49:08 +00:00 · 2026-03-31 11:21:49 -04:00 · 2026-03-30 23:11:02 -04:00 · 2026-03-29 21:02:44 -07:00 · 2026-03-29 19:07:38 -07:00 · 2026-03-29 16:43:24 -07:00
210 changed files with 504108 additions and 16653 deletions
--- a/.github/scripts/check-ai-co-authors.sh
+++ b/.github/scripts/check-ai-co-authors.sh
@@ -0,0 +1,103 @@
+#!/usr/bin/env bash
+# Checks pull request commits for AI agent Co-authored-by trailers.
+# Exits non-zero when any are found and prints fix instructions.
+set -euo pipefail
+
+base_sha="${1:?usage: check-ai-co-authors.sh <base_sha> <head_sha>}"
+head_sha="${2:?usage: check-ai-co-authors.sh <base_sha> <head_sha>}"
+
+# Known AI coding-agent trailer patterns (case-insensitive).
+# Each entry is an extended-regex fragment matched against Co-authored-by lines.
+AGENT_PATTERNS=(
+    # Anthropic — Claude Code / Amp
+    'noreply@anthropic\.com'
+    # Cursor
+    'cursoragent@cursor\.com'
+    # GitHub Copilot
+    'copilot-swe-agent\[bot\]'
+    'copilot@github\.com'
+    # OpenAI Codex
+    'noreply@openai\.com'
+    'codex@openai\.com'
+    # Aider
+    'aider@aider\.chat'
+    # Google — Gemini / Jules
+    'gemini@google\.com'
+    'jules@google\.com'
+    # Windsurf / Codeium
+    '@codeium\.com'
+    # Devin
+    'devin-ai-integration\[bot\]'
+    'devin@cognition\.ai'
+    'devin@cognition-labs\.com'
+    # Amazon Q Developer
+    'amazon-q-developer'
+    '@amazon\.com.*[Qq].[Dd]eveloper'
+    # Cline
+    'cline-bot'
+    'cline@cline\.ai'
+    # Continue
+    'continue-agent'
+    'continue@continue\.dev'
+    # Sourcegraph
+    'noreply@sourcegraph\.com'
+    # Generic catch-alls for common agent name patterns
+    'Co-authored-by:.*\b[Cc]laude\b'
+    'Co-authored-by:.*\b[Cc]opilot\b'
+    'Co-authored-by:.*\b[Cc]ursor\b'
+    'Co-authored-by:.*\b[Cc]odex\b'
+    'Co-authored-by:.*\b[Gg]emini\b'
+    'Co-authored-by:.*\b[Aa]ider\b'
+    'Co-authored-by:.*\b[Dd]evin\b'
+    'Co-authored-by:.*\b[Ww]indsurf\b'
+    'Co-authored-by:.*\b[Cc]line\b'
+    'Co-authored-by:.*\b[Aa]mazon Q\b'
+    'Co-authored-by:.*\b[Jj]ules\b'
+    'Co-authored-by:.*\bOpenCode\b'
+)
+
+# Build a single alternation regex from all patterns.
+regex=""
+for pattern in "${AGENT_PATTERNS[@]}"; do
+    if [[ -n "$regex" ]]; then
+        regex="${regex}|${pattern}"
+    else
+        regex="$pattern"
+    fi
+done
+
+# Collect Co-authored-by lines from every commit in the PR range.
+violations=""
+while IFS= read -r sha; do
+    message="$(git log -1 --format='%B' "$sha")"
+    matched_lines="$(echo "$message" | grep -iE "^Co-authored-by:" || true)"
+    if [[ -z "$matched_lines" ]]; then
+        continue
+    fi
+
+    while IFS= read -r line; do
+        if echo "$line" | grep -iqE "$regex"; then
+            short="$(git log -1 --format='%h' "$sha")"
+            violations="${violations}  ${short}: ${line}"$'\n'
+        fi
+    done <<< "$matched_lines"
+done < <(git rev-list "${base_sha}..${head_sha}")
+
+if [[ -n "$violations" ]]; then
+    echo "::error::AI agent Co-authored-by trailers detected in PR commits."
+    echo ""
+    echo "The following commits contain Co-authored-by trailers from AI coding agents:"
+    echo ""
+    echo "$violations"
+    echo "These trailers should be removed before merging."
+    echo ""
+    echo "To fix, rewrite the commit messages with:"
+    echo "  git rebase -i ${base_sha}"
+    echo ""
+    echo "and remove the Co-authored-by lines, then force-push your branch."
+    echo ""
+    echo "If you believe this is a false positive, please open an issue."
+    exit 1
+fi
+
+echo "No AI agent Co-authored-by trailers found."
--- a/.github/workflows/check-ai-co-authors.yml
+++ b/.github/workflows/check-ai-co-authors.yml
@@ -0,0 +1,19 @@
+name: Check AI Co-Authors
+
+on:
+  pull_request:
+    branches: ['*']
+
+jobs:
+  check-ai-co-authors:
+    name: Check for AI agent co-author trailers
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Check commits for AI co-author trailers
+        run: bash .github/scripts/check-ai-co-authors.sh "${{ github.event.pull_request.base.sha }}" "${{ github.event.pull_request.head.sha }}"
--- a/.gitignore
+++ b/.gitignore
@@ -24,4 +24,3 @@ web_custom_versions/
 openapi.yaml
 filtered-openapi.yaml
 uv.lock
-.pyisolate_venvs/
--- a/README.md
+++ b/README.md
@@ -38,6 +38,8 @@ ComfyUI lets you design and execute advanced stable diffusion pipelines using a

 ## Get Started

+### Local
+
 #### [Desktop Application](https://www.comfy.org/download)
 - The easiest way to get started.
 - Available on Windows & macOS.
@@ -49,11 +51,17 @@ ComfyUI lets you design and execute advanced stable diffusion pipelines using a
 #### [Manual Install](#manual-install-windows-linux)
 Supports all operating systems and GPU types (NVIDIA, AMD, Intel, Apple Silicon, Ascend).

-## [Examples](https://comfyanonymous.github.io/ComfyUI_examples/)
-See what ComfyUI can do with the [example workflows](https://comfyanonymous.github.io/ComfyUI_examples/).
+### Cloud
+
+#### [Comfy Cloud](https://www.comfy.org/cloud)
+- Our official paid cloud version for those who can't afford local hardware.
+
+## Examples
+See what ComfyUI can do with the [newer template workflows](https://comfy.org/workflows) or old [example workflows](https://comfyanonymous.github.io/ComfyUI_examples/).

 ## Features
 - Nodes/graph/flowchart interface to experiment and create complex Stable Diffusion workflows without needing to code anything.
+- NOTE: There are many more models supported than the list below, if you want to see what is supported see our templates list inside ComfyUI.
 - Image Models
   - SD1.x, SD2.x ([unCLIP](https://comfyanonymous.github.io/ComfyUI_examples/unclip/))
   - [SDXL](https://comfyanonymous.github.io/ComfyUI_examples/sdxl/), [SDXL Turbo](https://comfyanonymous.github.io/ComfyUI_examples/sdturbo/)
@@ -225,7 +233,7 @@ Put your VAE in: models/vae

 AMD users can install rocm and pytorch with pip if you don't have it already installed, this is the command to install the stable version:

-```pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm7.1```
+```pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm7.2```

 This is the command to install the nightly with ROCm 7.2 which might have some performance improvements:

--- a/alembic_db/env.py
+++ b/alembic_db/env.py
@@ -8,7 +8,7 @@ from alembic import context
 config = context.config


-from app.database.models import Base
+from app.database.models import Base, NAMING_CONVENTION
 target_metadata = Base.metadata

 # other values from the config, defined by the needs of env.py,
@@ -51,7 +51,10 @@ def run_migrations_online() -> None:

    with connectable.connect() as connection:
        context.configure(
-            connection=connection, target_metadata=target_metadata
+            connection=connection,
+            target_metadata=target_metadata,
+            render_as_batch=True,
+            naming_convention=NAMING_CONVENTION,
        )

        with context.begin_transaction():
--- a/alembic_db/versions/0003_add_metadata_job_id.py
+++ b/alembic_db/versions/0003_add_metadata_job_id.py
@@ -0,0 +1,98 @@
+"""
+Add system_metadata and job_id columns to asset_references.
+Change preview_id FK from assets.id to asset_references.id.
+
+Revision ID: 0003_add_metadata_job_id
+Revises: 0002_merge_to_asset_references
+Create Date: 2026-03-09
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+from app.database.models import NAMING_CONVENTION
+
+revision = "0003_add_metadata_job_id"
+down_revision = "0002_merge_to_asset_references"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    with op.batch_alter_table("asset_references") as batch_op:
+        batch_op.add_column(
+            sa.Column("system_metadata", sa.JSON(), nullable=True)
+        )
+        batch_op.add_column(
+            sa.Column("job_id", sa.String(length=36), nullable=True)
+        )
+
+    # Change preview_id FK from assets.id to asset_references.id (self-ref).
+    # Existing values are asset-content IDs that won't match reference IDs,
+    # so null them out first.
+    op.execute("UPDATE asset_references SET preview_id = NULL WHERE preview_id IS NOT NULL")
+    with op.batch_alter_table(
+        "asset_references", naming_convention=NAMING_CONVENTION
+    ) as batch_op:
+        batch_op.drop_constraint(
+            "fk_asset_references_preview_id_assets", type_="foreignkey"
+        )
+        batch_op.create_foreign_key(
+            "fk_asset_references_preview_id_asset_references",
+            "asset_references",
+            ["preview_id"],
+            ["id"],
+            ondelete="SET NULL",
+        )
+        batch_op.create_index(
+            "ix_asset_references_preview_id", ["preview_id"]
+        )
+
+    # Purge any all-null meta rows before adding the constraint
+    op.execute(
+        "DELETE FROM asset_reference_meta"
+        " WHERE val_str IS NULL AND val_num IS NULL AND val_bool IS NULL AND val_json IS NULL"
+    )
+    with op.batch_alter_table("asset_reference_meta") as batch_op:
+        batch_op.create_check_constraint(
+            "ck_asset_reference_meta_has_value",
+            "val_str IS NOT NULL OR val_num IS NOT NULL OR val_bool IS NOT NULL OR val_json IS NOT NULL",
+        )
+
+
+def downgrade() -> None:
+    # SQLite doesn't reflect CHECK constraints, so we must declare it
+    # explicitly via table_args for the batch recreate to find it.
+    # Use the fully-rendered constraint name to avoid the naming convention
+    # doubling the prefix.
+    with op.batch_alter_table(
+        "asset_reference_meta",
+        table_args=[
+            sa.CheckConstraint(
+                "val_str IS NOT NULL OR val_num IS NOT NULL OR val_bool IS NOT NULL OR val_json IS NOT NULL",
+                name="ck_asset_reference_meta_has_value",
+            ),
+        ],
+    ) as batch_op:
+        batch_op.drop_constraint(
+            "ck_asset_reference_meta_has_value", type_="check"
+        )
+
+    with op.batch_alter_table(
+        "asset_references", naming_convention=NAMING_CONVENTION
+    ) as batch_op:
+        batch_op.drop_index("ix_asset_references_preview_id")
+        batch_op.drop_constraint(
+            "fk_asset_references_preview_id_asset_references", type_="foreignkey"
+        )
+        batch_op.create_foreign_key(
+            "fk_asset_references_preview_id_assets",
+            "assets",
+            ["preview_id"],
+            ["id"],
+            ondelete="SET NULL",
+        )
+
+    with op.batch_alter_table("asset_references") as batch_op:
+        batch_op.drop_column("job_id")
+        batch_op.drop_column("system_metadata")
--- a/app/assets/api/routes.py
+++ b/app/assets/api/routes.py
@@ -13,6 +13,7 @@ from pydantic import ValidationError
 import folder_paths
 from app import user_manager
 from app.assets.api import schemas_in, schemas_out
+from app.assets.services import schemas
 from app.assets.api.schemas_in import (
    AssetValidationError,
    UploadError,
@@ -38,6 +39,7 @@ from app.assets.services import (
    update_asset_metadata,
    upload_from_temp_path,
 )
+from app.assets.services.tagging import list_tag_histogram

 ROUTES = web.RouteTableDef()
 USER_MANAGER: user_manager.UserManager | None = None
@@ -122,6 +124,61 @@ def _validate_sort_field(requested: str | None) -> str:
    return "created_at"


+def _build_preview_url_from_view(tags: list[str], user_metadata: dict[str, Any] | None) -> str | None:
+    """Build a /api/view preview URL from asset tags and user_metadata filename."""
+    if not user_metadata:
+        return None
+    filename = user_metadata.get("filename")
+    if not filename:
+        return None
+
+    if "input" in tags:
+        view_type = "input"
+    elif "output" in tags:
+        view_type = "output"
+    else:
+        return None
+
+    subfolder = ""
+    if "/" in filename:
+        subfolder, filename = filename.rsplit("/", 1)
+
+    encoded_filename = urllib.parse.quote(filename, safe="")
+    url = f"/api/view?type={view_type}&filename={encoded_filename}"
+    if subfolder:
+        url += f"&subfolder={urllib.parse.quote(subfolder, safe='')}"
+    return url
+
+
+def _build_asset_response(result: schemas.AssetDetailResult | schemas.UploadResult) -> schemas_out.Asset:
+    """Build an Asset response from a service result."""
+    if result.ref.preview_id:
+        preview_detail = get_asset_detail(result.ref.preview_id)
+        if preview_detail:
+            preview_url = _build_preview_url_from_view(preview_detail.tags, preview_detail.ref.user_metadata)
+        else:
+            preview_url = None
+    else:
+        preview_url = _build_preview_url_from_view(result.tags, result.ref.user_metadata)
+    return schemas_out.Asset(
+        id=result.ref.id,
+        name=result.ref.name,
+        asset_hash=result.asset.hash if result.asset else None,
+        size=int(result.asset.size_bytes) if result.asset else None,
+        mime_type=result.asset.mime_type if result.asset else None,
+        tags=result.tags,
+        preview_url=preview_url,
+        preview_id=result.ref.preview_id,
+        user_metadata=result.ref.user_metadata or {},
+        metadata=result.ref.system_metadata,
+        job_id=result.ref.job_id,
+        prompt_id=result.ref.job_id,  # deprecated: mirrors job_id for cloud compat
+        created_at=result.ref.created_at,
+        updated_at=result.ref.updated_at,
+        last_access_time=result.ref.last_access_time,
+    )
+
+
@ROUTES.head("/api/assets/hash/{hash}")
@_require_assets_feature_enabled
 async def head_asset_by_hash(request: web.Request) -> web.Response:
@@ -164,20 +221,7 @@ async def list_assets_route(request: web.Request) -> web.Response:
        order=order,
    )

-    summaries = [
-        schemas_out.AssetSummary(
-            id=item.ref.id,
-            name=item.ref.name,
-            asset_hash=item.asset.hash if item.asset else None,
-            size=int(item.asset.size_bytes) if item.asset else None,
-            mime_type=item.asset.mime_type if item.asset else None,
-            tags=item.tags,
-            created_at=item.ref.created_at,
-            updated_at=item.ref.updated_at,
-            last_access_time=item.ref.last_access_time,
-        )
-        for item in result.items
-    ]
+    summaries = [_build_asset_response(item) for item in result.items]

    payload = schemas_out.AssetsList(
        assets=summaries,
@@ -207,18 +251,7 @@ async def get_asset_route(request: web.Request) -> web.Response:
                {"id": reference_id},
            )

-        payload = schemas_out.AssetDetail(
-            id=result.ref.id,
-            name=result.ref.name,
-            asset_hash=result.asset.hash if result.asset else None,
-            size=int(result.asset.size_bytes) if result.asset else None,
-            mime_type=result.asset.mime_type if result.asset else None,
-            tags=result.tags,
-            user_metadata=result.ref.user_metadata or {},
-            preview_id=result.ref.preview_id,
-            created_at=result.ref.created_at,
-            last_access_time=result.ref.last_access_time,
-        )
+        payload = _build_asset_response(result)
    except ValueError as e:
        return _build_error_response(
            404, "ASSET_NOT_FOUND", str(e), {"id": reference_id}
@@ -230,7 +263,7 @@ async def get_asset_route(request: web.Request) -> web.Response:
            USER_MANAGER.get_request_user_id(request),
        )
        return _build_error_response(500, "INTERNAL", "Unexpected server error.")
-    return web.json_response(payload.model_dump(mode="json"), status=200)
+    return web.json_response(payload.model_dump(mode="json", exclude_none=True), status=200)


@ROUTES.get(f"/api/assets/{{id:{UUID_RE}}}/content")
@@ -312,32 +345,31 @@ async def create_asset_from_hash_route(request: web.Request) -> web.Response:
            400, "INVALID_JSON", "Request body must be valid JSON."
        )

+    # Derive name from hash if not provided
+    name = body.name
+    if name is None:
+        name = body.hash.split(":", 1)[1] if ":" in body.hash else body.hash
+
    result = create_from_hash(
        hash_str=body.hash,
-        name=body.name,
+        name=name,
        tags=body.tags,
        user_metadata=body.user_metadata,
        owner_id=USER_MANAGER.get_request_user_id(request),
+        mime_type=body.mime_type,
+        preview_id=body.preview_id,
    )
    if result is None:
        return _build_error_response(
            404, "ASSET_NOT_FOUND", f"Asset content {body.hash} does not exist"
        )

+    asset = _build_asset_response(result)
    payload_out = schemas_out.AssetCreated(
-        id=result.ref.id,
-        name=result.ref.name,
-        asset_hash=result.asset.hash,
-        size=int(result.asset.size_bytes),
-        mime_type=result.asset.mime_type,
-        tags=result.tags,
-        user_metadata=result.ref.user_metadata or {},
-        preview_id=result.ref.preview_id,
-        created_at=result.ref.created_at,
-        last_access_time=result.ref.last_access_time,
+        **asset.model_dump(),
        created_new=result.created_new,
    )
-    return web.json_response(payload_out.model_dump(mode="json"), status=201)
+    return web.json_response(payload_out.model_dump(mode="json", exclude_none=True), status=201)


@ROUTES.post("/api/assets")
@@ -358,6 +390,8 @@ async def upload_asset(request: web.Request) -> web.Response:
                "name": parsed.provided_name,
                "user_metadata": parsed.user_metadata_raw,
                "hash": parsed.provided_hash,
+                "mime_type": parsed.provided_mime_type,
+                "preview_id": parsed.provided_preview_id,
            }
        )
    except ValidationError as ve:
@@ -386,6 +420,8 @@ async def upload_asset(request: web.Request) -> web.Response:
                tags=spec.tags,
                user_metadata=spec.user_metadata or {},
                owner_id=owner_id,
+                mime_type=spec.mime_type,
+                preview_id=spec.preview_id,
            )
            if result is None:
                delete_temp_file_if_exists(parsed.tmp_path)
@@ -410,6 +446,8 @@ async def upload_asset(request: web.Request) -> web.Response:
                client_filename=parsed.file_client_name,
                owner_id=owner_id,
                expected_hash=spec.hash,
+                mime_type=spec.mime_type,
+                preview_id=spec.preview_id,
            )
    except AssetValidationError as e:
        delete_temp_file_if_exists(parsed.tmp_path)
@@ -428,21 +466,13 @@ async def upload_asset(request: web.Request) -> web.Response:
        logging.exception("upload_asset failed for owner_id=%s", owner_id)
        return _build_error_response(500, "INTERNAL", "Unexpected server error.")

-    payload = schemas_out.AssetCreated(
-        id=result.ref.id,
-        name=result.ref.name,
-        asset_hash=result.asset.hash,
-        size=int(result.asset.size_bytes),
-        mime_type=result.asset.mime_type,
-        tags=result.tags,
-        user_metadata=result.ref.user_metadata or {},
-        preview_id=result.ref.preview_id,
-        created_at=result.ref.created_at,
-        last_access_time=result.ref.last_access_time,
+    asset = _build_asset_response(result)
+    payload_out = schemas_out.AssetCreated(
+        **asset.model_dump(),
        created_new=result.created_new,
    )
    status = 201 if result.created_new else 200
-    return web.json_response(payload.model_dump(mode="json"), status=status)
+    return web.json_response(payload_out.model_dump(mode="json", exclude_none=True), status=status)


@ROUTES.put(f"/api/assets/{{id:{UUID_RE}}}")
@@ -464,15 +494,9 @@ async def update_asset_route(request: web.Request) -> web.Response:
            name=body.name,
            user_metadata=body.user_metadata,
            owner_id=USER_MANAGER.get_request_user_id(request),
+            preview_id=body.preview_id,
        )
-        payload = schemas_out.AssetUpdated(
-            id=result.ref.id,
-            name=result.ref.name,
-            asset_hash=result.asset.hash if result.asset else None,
-            tags=result.tags,
-            user_metadata=result.ref.user_metadata or {},
-            updated_at=result.ref.updated_at,
-        )
+        payload = _build_asset_response(result)
    except PermissionError as pe:
        return _build_error_response(403, "FORBIDDEN", str(pe), {"id": reference_id})
    except ValueError as ve:
@@ -486,7 +510,7 @@ async def update_asset_route(request: web.Request) -> web.Response:
            USER_MANAGER.get_request_user_id(request),
        )
        return _build_error_response(500, "INTERNAL", "Unexpected server error.")
-    return web.json_response(payload.model_dump(mode="json"), status=200)
+    return web.json_response(payload.model_dump(mode="json", exclude_none=True), status=200)


@ROUTES.delete(f"/api/assets/{{id:{UUID_RE}}}")
@@ -555,7 +579,7 @@ async def get_tags(request: web.Request) -> web.Response:
    payload = schemas_out.TagsList(
        tags=tags, total=total, has_more=(query.offset + len(tags)) < total
    )
-    return web.json_response(payload.model_dump(mode="json"))
+    return web.json_response(payload.model_dump(mode="json", exclude_none=True))


@ROUTES.post(f"/api/assets/{{id:{UUID_RE}}}/tags")
@@ -603,7 +627,7 @@ async def add_asset_tags(request: web.Request) -> web.Response:
        )
        return _build_error_response(500, "INTERNAL", "Unexpected server error.")

-    return web.json_response(payload.model_dump(mode="json"), status=200)
+    return web.json_response(payload.model_dump(mode="json", exclude_none=True), status=200)


@ROUTES.delete(f"/api/assets/{{id:{UUID_RE}}}/tags")
@@ -650,7 +674,29 @@ async def delete_asset_tags(request: web.Request) -> web.Response:
        )
        return _build_error_response(500, "INTERNAL", "Unexpected server error.")

-    return web.json_response(payload.model_dump(mode="json"), status=200)
+    return web.json_response(payload.model_dump(mode="json", exclude_none=True), status=200)
+
+
+@ROUTES.get("/api/assets/tags/refine")
+@_require_assets_feature_enabled
+async def get_tags_refine(request: web.Request) -> web.Response:
+    """GET request to get tag histogram for filtered assets."""
+    query_dict = get_query_dict(request)
+    try:
+        q = schemas_in.TagsRefineQuery.model_validate(query_dict)
+    except ValidationError as ve:
+        return _build_validation_error_response("INVALID_QUERY", ve)
+
+    tag_counts = list_tag_histogram(
+        owner_id=USER_MANAGER.get_request_user_id(request),
+        include_tags=q.include_tags,
+        exclude_tags=q.exclude_tags,
+        name_contains=q.name_contains,
+        metadata_filter=q.metadata_filter,
+        limit=q.limit,
+    )
+    payload = schemas_out.TagHistogram(tag_counts=tag_counts)
+    return web.json_response(payload.model_dump(mode="json", exclude_none=True), status=200)


@ROUTES.post("/api/assets/seed")
--- a/app/assets/api/schemas_in.py
+++ b/app/assets/api/schemas_in.py
@@ -45,6 +45,8 @@ class ParsedUpload:
    user_metadata_raw: str | None
    provided_hash: str | None
    provided_hash_exists: bool | None
+    provided_mime_type: str | None = None
+    provided_preview_id: str | None = None


 class ListAssetsQuery(BaseModel):
@@ -98,11 +100,17 @@ class ListAssetsQuery(BaseModel):
 class UpdateAssetBody(BaseModel):
    name: str | None = None
    user_metadata: dict[str, Any] | None = None
+    preview_id: str | None = None  # references an asset_reference id, not an asset id

    @model_validator(mode="after")
    def _validate_at_least_one_field(self):
-        if self.name is None and self.user_metadata is None:
-            raise ValueError("Provide at least one of: name, user_metadata.")
+        if all(
+            v is None
+            for v in (self.name, self.user_metadata, self.preview_id)
+        ):
+            raise ValueError(
+                "Provide at least one of: name, user_metadata, preview_id."
+            )
        return self


@@ -110,9 +118,11 @@ class CreateFromHashBody(BaseModel):
    model_config = ConfigDict(extra="ignore", str_strip_whitespace=True)

    hash: str
-    name: str
+    name: str | None = None
    tags: list[str] = Field(default_factory=list)
    user_metadata: dict[str, Any] = Field(default_factory=dict)
+    mime_type: str | None = None
+    preview_id: str | None = None  # references an asset_reference id, not an asset id

    @field_validator("hash")
    @classmethod
@@ -138,6 +148,44 @@ class CreateFromHashBody(BaseModel):
        return []


+class TagsRefineQuery(BaseModel):
+    include_tags: list[str] = Field(default_factory=list)
+    exclude_tags: list[str] = Field(default_factory=list)
+    name_contains: str | None = None
+    metadata_filter: dict[str, Any] | None = None
+    limit: conint(ge=1, le=1000) = 100
+
+    @field_validator("include_tags", "exclude_tags", mode="before")
+    @classmethod
+    def _split_csv_tags(cls, v):
+        if v is None:
+            return []
+        if isinstance(v, str):
+            return [t.strip() for t in v.split(",") if t.strip()]
+        if isinstance(v, list):
+            out: list[str] = []
+            for item in v:
+                if isinstance(item, str):
+                    out.extend([t.strip() for t in item.split(",") if t.strip()])
+            return out
+        return v
+
+    @field_validator("metadata_filter", mode="before")
+    @classmethod
+    def _parse_metadata_json(cls, v):
+        if v is None or isinstance(v, dict):
+            return v
+        if isinstance(v, str) and v.strip():
+            try:
+                parsed = json.loads(v)
+            except Exception as e:
+                raise ValueError(f"metadata_filter must be JSON: {e}") from e
+            if not isinstance(parsed, dict):
+                raise ValueError("metadata_filter must be a JSON object")
+            return parsed
+        return None
+
+
 class TagsListQuery(BaseModel):
    model_config = ConfigDict(extra="ignore", str_strip_whitespace=True)

@@ -186,21 +234,25 @@ class TagsRemove(TagsAdd):
 class UploadAssetSpec(BaseModel):
    """Upload Asset operation.

-    - tags: ordered; first is root ('models'|'input'|'output');
+    - tags: optional list; if provided, first is root ('models'|'input'|'output');
            if root == 'models', second must be a valid category
    - name: display name
    - user_metadata: arbitrary JSON object (optional)
    - hash: optional canonical 'blake3:<hex>' for validation / fast-path
+    - mime_type: optional MIME type override
+    - preview_id: optional asset_reference ID for preview

    Files are stored using the content hash as filename stem.
    """

    model_config = ConfigDict(extra="ignore", str_strip_whitespace=True)

-    tags: list[str] = Field(..., min_length=1)
+    tags: list[str] = Field(default_factory=list)
    name: str | None = Field(default=None, max_length=512, description="Display Name")
    user_metadata: dict[str, Any] = Field(default_factory=dict)
    hash: str | None = Field(default=None)
+    mime_type: str | None = Field(default=None)
+    preview_id: str | None = Field(default=None)  # references an asset_reference id

    @field_validator("hash", mode="before")
    @classmethod
@@ -279,7 +331,7 @@ class UploadAssetSpec(BaseModel):
    @model_validator(mode="after")
    def _validate_order(self):
        if not self.tags:
-            raise ValueError("tags must be provided and non-empty")
+            raise ValueError("at least one tag is required for uploads")
        root = self.tags[0]
        if root not in {"models", "input", "output"}:
            raise ValueError("first tag must be one of: models, input, output")
--- a/app/assets/api/schemas_out.py
+++ b/app/assets/api/schemas_out.py
@@ -4,7 +4,10 @@ from typing import Any
 from pydantic import BaseModel, ConfigDict, Field, field_serializer


-class AssetSummary(BaseModel):
+class Asset(BaseModel):
+    """API view of an asset. Maps to DB ``AssetReference`` joined with its ``Asset`` blob;
+    ``id`` here is the AssetReference id, not the content-addressed Asset id."""
+
    id: str
    name: str
    asset_hash: str | None = None
@@ -12,8 +15,14 @@ class AssetSummary(BaseModel):
    mime_type: str | None = None
    tags: list[str] = Field(default_factory=list)
    preview_url: str | None = None
-    created_at: datetime | None = None
-    updated_at: datetime | None = None
+    preview_id: str | None = None  # references an asset_reference id, not an asset id
+    user_metadata: dict[str, Any] = Field(default_factory=dict)
+    is_immutable: bool = False
+    metadata: dict[str, Any] | None = None
+    job_id: str | None = None
+    prompt_id: str | None = None  # deprecated: use job_id
+    created_at: datetime
+    updated_at: datetime
    last_access_time: datetime | None = None

    model_config = ConfigDict(from_attributes=True)
@@ -23,50 +32,16 @@ class AssetSummary(BaseModel):
        return v.isoformat() if v else None


+class AssetCreated(Asset):
+    created_new: bool
+
+
 class AssetsList(BaseModel):
-    assets: list[AssetSummary]
+    assets: list[Asset]
    total: int
    has_more: bool


-class AssetUpdated(BaseModel):
-    id: str
-    name: str
-    asset_hash: str | None = None
-    tags: list[str] = Field(default_factory=list)
-    user_metadata: dict[str, Any] = Field(default_factory=dict)
-    updated_at: datetime | None = None
-
-    model_config = ConfigDict(from_attributes=True)
-
-    @field_serializer("updated_at")
-    def _serialize_updated_at(self, v: datetime | None, _info):
-        return v.isoformat() if v else None
-
-
-class AssetDetail(BaseModel):
-    id: str
-    name: str
-    asset_hash: str | None = None
-    size: int | None = None
-    mime_type: str | None = None
-    tags: list[str] = Field(default_factory=list)
-    user_metadata: dict[str, Any] = Field(default_factory=dict)
-    preview_id: str | None = None
-    created_at: datetime | None = None
-    last_access_time: datetime | None = None
-
-    model_config = ConfigDict(from_attributes=True)
-
-    @field_serializer("created_at", "last_access_time")
-    def _serialize_datetime(self, v: datetime | None, _info):
-        return v.isoformat() if v else None
-
-
-class AssetCreated(AssetDetail):
-    created_new: bool
-
-
 class TagUsage(BaseModel):
    name: str
    count: int
@@ -91,3 +66,7 @@ class TagsRemove(BaseModel):
    removed: list[str] = Field(default_factory=list)
    not_present: list[str] = Field(default_factory=list)
    total_tags: list[str] = Field(default_factory=list)
+
+
+class TagHistogram(BaseModel):
+    tag_counts: dict[str, int]
--- a/app/assets/api/upload.py
+++ b/app/assets/api/upload.py
@@ -52,6 +52,8 @@ async def parse_multipart_upload(
    user_metadata_raw: str | None = None
    provided_hash: str | None = None
    provided_hash_exists: bool | None = None
+    provided_mime_type: str | None = None
+    provided_preview_id: str | None = None

    file_written = 0
    tmp_path: str | None = None
@@ -128,6 +130,16 @@ async def parse_multipart_upload(
            provided_name = (await field.text()) or None
        elif fname == "user_metadata":
            user_metadata_raw = (await field.text()) or None
+        elif fname == "id":
+            raise UploadError(
+                400,
+                "UNSUPPORTED_FIELD",
+                "Client-provided 'id' is not supported. Asset IDs are assigned by the server.",
+            )
+        elif fname == "mime_type":
+            provided_mime_type = ((await field.text()) or "").strip() or None
+        elif fname == "preview_id":
+            provided_preview_id = ((await field.text()) or "").strip() or None

    if not file_present and not (provided_hash and provided_hash_exists):
        raise UploadError(
@@ -152,6 +164,8 @@ async def parse_multipart_upload(
        user_metadata_raw=user_metadata_raw,
        provided_hash=provided_hash,
        provided_hash_exists=provided_hash_exists,
+        provided_mime_type=provided_mime_type,
+        provided_preview_id=provided_preview_id,
    )


--- a/app/assets/database/models.py
+++ b/app/assets/database/models.py
@@ -45,13 +45,7 @@ class Asset(Base):
        passive_deletes=True,
    )

-    preview_of: Mapped[list[AssetReference]] = relationship(
-        "AssetReference",
-        back_populates="preview_asset",
-        primaryjoin=lambda: Asset.id == foreign(AssetReference.preview_id),
-        foreign_keys=lambda: [AssetReference.preview_id],
-        viewonly=True,
-    )
+    # preview_id on AssetReference is a self-referential FK to asset_references.id

    __table_args__ = (
        Index("uq_assets_hash", "hash", unique=True),
@@ -91,11 +85,15 @@ class AssetReference(Base):
    owner_id: Mapped[str] = mapped_column(String(128), nullable=False, default="")
    name: Mapped[str] = mapped_column(String(512), nullable=False)
    preview_id: Mapped[str | None] = mapped_column(
-        String(36), ForeignKey("assets.id", ondelete="SET NULL")
+        String(36), ForeignKey("asset_references.id", ondelete="SET NULL")
    )
    user_metadata: Mapped[dict[str, Any] | None] = mapped_column(
        JSON(none_as_null=True)
    )
+    system_metadata: Mapped[dict[str, Any] | None] = mapped_column(
+        JSON(none_as_null=True), nullable=True, default=None
+    )
+    job_id: Mapped[str | None] = mapped_column(String(36), nullable=True, default=None)
    created_at: Mapped[datetime] = mapped_column(
        DateTime(timezone=False), nullable=False, default=get_utc_now
    )
@@ -115,10 +113,10 @@ class AssetReference(Base):
        foreign_keys=[asset_id],
        lazy="selectin",
    )
-    preview_asset: Mapped[Asset | None] = relationship(
-        "Asset",
-        back_populates="preview_of",
+    preview_ref: Mapped[AssetReference | None] = relationship(
+        "AssetReference",
        foreign_keys=[preview_id],
+        remote_side=lambda: [AssetReference.id],
    )

    metadata_entries: Mapped[list[AssetReferenceMeta]] = relationship(
@@ -152,6 +150,7 @@ class AssetReference(Base):
        Index("ix_asset_references_created_at", "created_at"),
        Index("ix_asset_references_last_access_time", "last_access_time"),
        Index("ix_asset_references_deleted_at", "deleted_at"),
+        Index("ix_asset_references_preview_id", "preview_id"),
        Index("ix_asset_references_owner_name", "owner_id", "name"),
        CheckConstraint(
            "(mtime_ns IS NULL) OR (mtime_ns >= 0)", name="ck_ar_mtime_nonneg"
@@ -192,6 +191,10 @@ class AssetReferenceMeta(Base):
        Index("ix_asset_reference_meta_key_val_str", "key", "val_str"),
        Index("ix_asset_reference_meta_key_val_num", "key", "val_num"),
        Index("ix_asset_reference_meta_key_val_bool", "key", "val_bool"),
+        CheckConstraint(
+            "val_str IS NOT NULL OR val_num IS NOT NULL OR val_bool IS NOT NULL OR val_json IS NOT NULL",
+            name="has_value",
+        ),
    )


--- a/app/assets/database/queries/init.py
+++ b/app/assets/database/queries/init.py
@@ -1,6 +1,7 @@
 from app.assets.database.queries.asset import (
    asset_exists_by_hash,
    bulk_insert_assets,
+    create_stub_asset,
    get_asset_by_hash,
    get_existing_asset_ids,
    reassign_asset_references,
@@ -12,6 +13,7 @@ from app.assets.database.queries.asset_reference import (
    UnenrichedReferenceRow,
    bulk_insert_references_ignore_conflicts,
    bulk_update_enrichment_level,
+    count_active_siblings,
    bulk_update_is_missing,
    bulk_update_needs_verify,
    convert_metadata_to_rows,
@@ -31,16 +33,21 @@ from app.assets.database.queries.asset_reference import (
    get_unenriched_references,
    get_unreferenced_unhashed_asset_ids,
    insert_reference,
+    list_all_file_paths_by_asset_id,
    list_references_by_asset_id,
    list_references_page,
    mark_references_missing_outside_prefixes,
+    rebuild_metadata_projection,
+    reference_exists,
    reference_exists_for_asset_id,
    restore_references_by_paths,
    set_reference_metadata,
    set_reference_preview,
+    set_reference_system_metadata,
    soft_delete_reference_by_id,
    update_reference_access_time,
    update_reference_name,
+    update_is_missing_by_asset_id,
    update_reference_timestamps,
    update_reference_updated_at,
    upsert_reference,
@@ -54,6 +61,7 @@ from app.assets.database.queries.tags import (
    bulk_insert_tags_and_meta,
    ensure_tags_exist,
    get_reference_tags,
+    list_tag_counts_for_filtered_assets,
    list_tags_with_usage,
    remove_missing_tag_for_asset_id,
    remove_tags_from_reference,
@@ -74,6 +82,8 @@ __all__ = [
    "bulk_insert_references_ignore_conflicts",
    "bulk_insert_tags_and_meta",
    "bulk_update_enrichment_level",
+    "count_active_siblings",
+    "create_stub_asset",
    "bulk_update_is_missing",
    "bulk_update_needs_verify",
    "convert_metadata_to_rows",
@@ -97,20 +107,26 @@ __all__ = [
    "get_unenriched_references",
    "get_unreferenced_unhashed_asset_ids",
    "insert_reference",
+    "list_all_file_paths_by_asset_id",
    "list_references_by_asset_id",
    "list_references_page",
+    "list_tag_counts_for_filtered_assets",
    "list_tags_with_usage",
    "mark_references_missing_outside_prefixes",
    "reassign_asset_references",
+    "rebuild_metadata_projection",
+    "reference_exists",
    "reference_exists_for_asset_id",
    "remove_missing_tag_for_asset_id",
    "remove_tags_from_reference",
    "restore_references_by_paths",
    "set_reference_metadata",
    "set_reference_preview",
+    "set_reference_system_metadata",
    "soft_delete_reference_by_id",
    "set_reference_tags",
    "update_asset_hash_and_mime",
+    "update_is_missing_by_asset_id",
    "update_reference_access_time",
    "update_reference_name",
    "update_reference_timestamps",
--- a/app/assets/database/queries/asset.py
+++ b/app/assets/database/queries/asset.py
@@ -69,7 +69,7 @@ def upsert_asset(
        if asset.size_bytes != int(size_bytes) and int(size_bytes) > 0:
            asset.size_bytes = int(size_bytes)
            changed = True
-        if mime_type and asset.mime_type != mime_type:
+        if mime_type and not asset.mime_type:
            asset.mime_type = mime_type
            changed = True
        if changed:
@@ -78,6 +78,18 @@ def upsert_asset(
    return asset, created, updated


+def create_stub_asset(
+    session: Session,
+    size_bytes: int,
+    mime_type: str | None = None,
+) -> Asset:
+    """Create a new asset with no hash (stub for later enrichment)."""
+    asset = Asset(size_bytes=size_bytes, mime_type=mime_type, hash=None)
+    session.add(asset)
+    session.flush()
+    return asset
+
+
 def bulk_insert_assets(
    session: Session,
    rows: list[dict],
@@ -118,7 +130,7 @@ def update_asset_hash_and_mime(
        return False
    if asset_hash is not None:
        asset.hash = asset_hash
-    if mime_type is not None:
+    if mime_type is not None and not asset.mime_type:
        asset.mime_type = mime_type
    return True

--- a/app/assets/database/queries/asset_reference.py
+++ b/app/assets/database/queries/asset_reference.py
@@ -10,7 +10,7 @@ from decimal import Decimal
 from typing import NamedTuple, Sequence

 import sqlalchemy as sa
-from sqlalchemy import delete, exists, select
+from sqlalchemy import delete, select
 from sqlalchemy.dialects import sqlite
 from sqlalchemy.exc import IntegrityError
 from sqlalchemy.orm import Session, noload
@@ -24,12 +24,14 @@ from app.assets.database.models import (
 )
 from app.assets.database.queries.common import (
    MAX_BIND_PARAMS,
+    apply_metadata_filter,
+    apply_tag_filters,
    build_prefix_like_conditions,
    build_visible_owner_clause,
    calculate_rows_per_statement,
    iter_chunks,
 )
-from app.assets.helpers import escape_sql_like_string, get_utc_now, normalize_tags
+from app.assets.helpers import escape_sql_like_string, get_utc_now


 def _check_is_scalar(v):
@@ -44,15 +46,6 @@ def _check_is_scalar(v):

 def _scalar_to_row(key: str, ordinal: int, value) -> dict:
    """Convert a scalar value to a typed projection row."""
-    if value is None:
-        return {
-            "key": key,
-            "ordinal": ordinal,
-            "val_str": None,
-            "val_num": None,
-            "val_bool": None,
-            "val_json": None,
-        }
    if isinstance(value, bool):
        return {"key": key, "ordinal": ordinal, "val_bool": bool(value)}
    if isinstance(value, (int, float, Decimal)):
@@ -66,96 +59,19 @@ def _scalar_to_row(key: str, ordinal: int, value) -> dict:
 def convert_metadata_to_rows(key: str, value) -> list[dict]:
    """Turn a metadata key/value into typed projection rows."""
    if value is None:
-        return [_scalar_to_row(key, 0, None)]
+        return []

    if _check_is_scalar(value):
        return [_scalar_to_row(key, 0, value)]

    if isinstance(value, list):
        if all(_check_is_scalar(x) for x in value):
-            return [_scalar_to_row(key, i, x) for i, x in enumerate(value)]
-        return [{"key": key, "ordinal": i, "val_json": x} for i, x in enumerate(value)]
+            return [_scalar_to_row(key, i, x) for i, x in enumerate(value) if x is not None]
+        return [{"key": key, "ordinal": i, "val_json": x} for i, x in enumerate(value) if x is not None]

    return [{"key": key, "ordinal": 0, "val_json": value}]


-def _apply_tag_filters(
-    stmt: sa.sql.Select,
-    include_tags: Sequence[str] | None = None,
-    exclude_tags: Sequence[str] | None = None,
-) -> sa.sql.Select:
-    """include_tags: every tag must be present; exclude_tags: none may be present."""
-    include_tags = normalize_tags(include_tags)
-    exclude_tags = normalize_tags(exclude_tags)
-
-    if include_tags:
-        for tag_name in include_tags:
-            stmt = stmt.where(
-                exists().where(
-                    (AssetReferenceTag.asset_reference_id == AssetReference.id)
-                    & (AssetReferenceTag.tag_name == tag_name)
-                )
-            )
-
-    if exclude_tags:
-        stmt = stmt.where(
-            ~exists().where(
-                (AssetReferenceTag.asset_reference_id == AssetReference.id)
-                & (AssetReferenceTag.tag_name.in_(exclude_tags))
-            )
-        )
-    return stmt
-
-
-def _apply_metadata_filter(
-    stmt: sa.sql.Select,
-    metadata_filter: dict | None = None,
-) -> sa.sql.Select:
-    """Apply filters using asset_reference_meta projection table."""
-    if not metadata_filter:
-        return stmt
-
-    def _exists_for_pred(key: str, *preds) -> sa.sql.ClauseElement:
-        return sa.exists().where(
-            AssetReferenceMeta.asset_reference_id == AssetReference.id,
-            AssetReferenceMeta.key == key,
-            *preds,
-        )
-
-    def _exists_clause_for_value(key: str, value) -> sa.sql.ClauseElement:
-        if value is None:
-            no_row_for_key = sa.not_(
-                sa.exists().where(
-                    AssetReferenceMeta.asset_reference_id == AssetReference.id,
-                    AssetReferenceMeta.key == key,
-                )
-            )
-            null_row = _exists_for_pred(
-                key,
-                AssetReferenceMeta.val_json.is_(None),
-                AssetReferenceMeta.val_str.is_(None),
-                AssetReferenceMeta.val_num.is_(None),
-                AssetReferenceMeta.val_bool.is_(None),
-            )
-            return sa.or_(no_row_for_key, null_row)
-
-        if isinstance(value, bool):
-            return _exists_for_pred(key, AssetReferenceMeta.val_bool == bool(value))
-        if isinstance(value, (int, float, Decimal)):
-            num = value if isinstance(value, Decimal) else Decimal(str(value))
-            return _exists_for_pred(key, AssetReferenceMeta.val_num == num)
-        if isinstance(value, str):
-            return _exists_for_pred(key, AssetReferenceMeta.val_str == value)
-        return _exists_for_pred(key, AssetReferenceMeta.val_json == value)
-
-    for k, v in metadata_filter.items():
-        if isinstance(v, list):
-            ors = [_exists_clause_for_value(k, elem) for elem in v]
-            if ors:
-                stmt = stmt.where(sa.or_(*ors))
-        else:
-            stmt = stmt.where(_exists_clause_for_value(k, v))
-    return stmt


 def get_reference_by_id(
@@ -198,6 +114,23 @@ def get_reference_by_file_path(
    )


+def count_active_siblings(
+    session: Session,
+    asset_id: str,
+    exclude_reference_id: str,
+) -> int:
+    """Count active (non-deleted) references to an asset, excluding one reference."""
+    return (
+        session.query(AssetReference)
+        .filter(
+            AssetReference.asset_id == asset_id,
+            AssetReference.id != exclude_reference_id,
+            AssetReference.deleted_at.is_(None),
+        )
+        .count()
+    )
+
+
 def reference_exists_for_asset_id(
    session: Session,
    asset_id: str,
@@ -212,6 +145,21 @@ def reference_exists_for_asset_id(
    return session.execute(q).first() is not None


+def reference_exists(
+    session: Session,
+    reference_id: str,
+) -> bool:
+    """Return True if a reference with the given ID exists (not soft-deleted)."""
+    q = (
+        select(sa.literal(True))
+        .select_from(AssetReference)
+        .where(AssetReference.id == reference_id)
+        .where(AssetReference.deleted_at.is_(None))
+        .limit(1)
+    )
+    return session.execute(q).first() is not None
+
+
 def insert_reference(
    session: Session,
    asset_id: str,
@@ -336,8 +284,8 @@ def list_references_page(
        escaped, esc = escape_sql_like_string(name_contains)
        base = base.where(AssetReference.name.ilike(f"%{escaped}%", escape=esc))

-    base = _apply_tag_filters(base, include_tags, exclude_tags)
-    base = _apply_metadata_filter(base, metadata_filter)
+    base = apply_tag_filters(base, include_tags, exclude_tags)
+    base = apply_metadata_filter(base, metadata_filter)

    sort = (sort or "created_at").lower()
    order = (order or "desc").lower()
@@ -366,8 +314,8 @@ def list_references_page(
        count_stmt = count_stmt.where(
            AssetReference.name.ilike(f"%{escaped}%", escape=esc)
        )
-    count_stmt = _apply_tag_filters(count_stmt, include_tags, exclude_tags)
-    count_stmt = _apply_metadata_filter(count_stmt, metadata_filter)
+    count_stmt = apply_tag_filters(count_stmt, include_tags, exclude_tags)
+    count_stmt = apply_metadata_filter(count_stmt, metadata_filter)

    total = int(session.execute(count_stmt).scalar_one() or 0)
    refs = session.execute(base).unique().scalars().all()
@@ -379,7 +327,7 @@ def list_references_page(
            select(AssetReferenceTag.asset_reference_id, Tag.name)
            .join(Tag, Tag.name == AssetReferenceTag.tag_name)
            .where(AssetReferenceTag.asset_reference_id.in_(id_list))
-            .order_by(AssetReferenceTag.added_at)
+            .order_by(AssetReferenceTag.tag_name.asc())
        )
        for ref_id, tag_name in rows.all():
            tag_map[ref_id].append(tag_name)
@@ -492,6 +440,42 @@ def update_reference_updated_at(
    )


+def rebuild_metadata_projection(session: Session, ref: AssetReference) -> None:
+    """Delete and rebuild AssetReferenceMeta rows from merged system+user metadata.
+
+    The merged dict is ``{**system_metadata, **user_metadata}`` so user keys
+    override system keys of the same name.
+    """
+    session.execute(
+        delete(AssetReferenceMeta).where(
+            AssetReferenceMeta.asset_reference_id == ref.id
+        )
+    )
+    session.flush()
+
+    merged = {**(ref.system_metadata or {}), **(ref.user_metadata or {})}
+    if not merged:
+        return
+
+    rows: list[AssetReferenceMeta] = []
+    for k, v in merged.items():
+        for r in convert_metadata_to_rows(k, v):
+            rows.append(
+                AssetReferenceMeta(
+                    asset_reference_id=ref.id,
+                    key=r["key"],
+                    ordinal=int(r["ordinal"]),
+                    val_str=r.get("val_str"),
+                    val_num=r.get("val_num"),
+                    val_bool=r.get("val_bool"),
+                    val_json=r.get("val_json"),
+                )
+            )
+    if rows:
+        session.add_all(rows)
+        session.flush()
+
+
 def set_reference_metadata(
    session: Session,
    reference_id: str,
@@ -505,33 +489,24 @@ def set_reference_metadata(
    ref.updated_at = get_utc_now()
    session.flush()

-    session.execute(
-        delete(AssetReferenceMeta).where(
-            AssetReferenceMeta.asset_reference_id == reference_id
-        )
-    )
+    rebuild_metadata_projection(session, ref)
+
+
+def set_reference_system_metadata(
+    session: Session,
+    reference_id: str,
+    system_metadata: dict | None = None,
+) -> None:
+    """Set system_metadata on a reference and rebuild the merged projection."""
+    ref = session.get(AssetReference, reference_id)
+    if not ref:
+        raise ValueError(f"AssetReference {reference_id} not found")
+
+    ref.system_metadata = system_metadata or {}
+    ref.updated_at = get_utc_now()
    session.flush()

-    if not user_metadata:
-        return
-
-    rows: list[AssetReferenceMeta] = []
-    for k, v in user_metadata.items():
-        for r in convert_metadata_to_rows(k, v):
-            rows.append(
-                AssetReferenceMeta(
-                    asset_reference_id=reference_id,
-                    key=r["key"],
-                    ordinal=int(r["ordinal"]),
-                    val_str=r.get("val_str"),
-                    val_num=r.get("val_num"),
-                    val_bool=r.get("val_bool"),
-                    val_json=r.get("val_json"),
-                )
-            )
-    if rows:
-        session.add_all(rows)
-        session.flush()
+    rebuild_metadata_projection(session, ref)


 def delete_reference_by_id(
@@ -571,19 +546,19 @@ def soft_delete_reference_by_id(
 def set_reference_preview(
    session: Session,
    reference_id: str,
-    preview_asset_id: str | None = None,
+    preview_reference_id: str | None = None,
 ) -> None:
    """Set or clear preview_id and bump updated_at. Raises on unknown IDs."""
    ref = session.get(AssetReference, reference_id)
    if not ref:
        raise ValueError(f"AssetReference {reference_id} not found")

-    if preview_asset_id is None:
+    if preview_reference_id is None:
        ref.preview_id = None
    else:
-        if not session.get(Asset, preview_asset_id):
-            raise ValueError(f"Preview Asset {preview_asset_id} not found")
-        ref.preview_id = preview_asset_id
+        if not session.get(AssetReference, preview_reference_id):
+            raise ValueError(f"Preview AssetReference {preview_reference_id} not found")
+        ref.preview_id = preview_reference_id

    ref.updated_at = get_utc_now()
    session.flush()
@@ -609,6 +584,8 @@ def list_references_by_asset_id(
        session.execute(
            select(AssetReference)
            .where(AssetReference.asset_id == asset_id)
+            .where(AssetReference.is_missing == False)  # noqa: E712
+            .where(AssetReference.deleted_at.is_(None))
            .order_by(AssetReference.id.asc())
        )
        .scalars()
@@ -616,6 +593,25 @@ def list_references_by_asset_id(
    )


+def list_all_file_paths_by_asset_id(
+    session: Session,
+    asset_id: str,
+) -> list[str]:
+    """Return every file_path for an asset, including soft-deleted/missing refs.
+
+    Used for orphan cleanup where all on-disk files must be removed.
+    """
+    return list(
+        session.execute(
+            select(AssetReference.file_path)
+            .where(AssetReference.asset_id == asset_id)
+            .where(AssetReference.file_path.isnot(None))
+        )
+        .scalars()
+        .all()
+    )
+
+
 def upsert_reference(
    session: Session,
    asset_id: str,
@@ -855,6 +851,22 @@ def bulk_update_is_missing(
    return total


+def update_is_missing_by_asset_id(
+    session: Session, asset_id: str, value: bool
+) -> int:
+    """Set is_missing flag for ALL references belonging to an asset.
+
+    Returns: Number of rows updated
+    """
+    result = session.execute(
+        sa.update(AssetReference)
+        .where(AssetReference.asset_id == asset_id)
+        .where(AssetReference.deleted_at.is_(None))
+        .values(is_missing=value)
+    )
+    return result.rowcount
+
+
 def delete_references_by_ids(session: Session, reference_ids: list[str]) -> int:
    """Delete references by their IDs.

--- a/app/assets/database/queries/common.py
+++ b/app/assets/database/queries/common.py
@@ -1,12 +1,14 @@
 """Shared utilities for database query modules."""

 import os
-from typing import Iterable
+from decimal import Decimal
+from typing import Iterable, Sequence

 import sqlalchemy as sa
+from sqlalchemy import exists

-from app.assets.database.models import AssetReference
-from app.assets.helpers import escape_sql_like_string
+from app.assets.database.models import AssetReference, AssetReferenceMeta, AssetReferenceTag
+from app.assets.helpers import escape_sql_like_string, normalize_tags

 MAX_BIND_PARAMS = 800

@@ -52,3 +54,74 @@ def build_prefix_like_conditions(
        escaped, esc = escape_sql_like_string(base)
        conds.append(AssetReference.file_path.like(escaped + "%", escape=esc))
    return conds
+
+
+def apply_tag_filters(
+    stmt: sa.sql.Select,
+    include_tags: Sequence[str] | None = None,
+    exclude_tags: Sequence[str] | None = None,
+) -> sa.sql.Select:
+    """include_tags: every tag must be present; exclude_tags: none may be present."""
+    include_tags = normalize_tags(include_tags)
+    exclude_tags = normalize_tags(exclude_tags)
+
+    if include_tags:
+        for tag_name in include_tags:
+            stmt = stmt.where(
+                exists().where(
+                    (AssetReferenceTag.asset_reference_id == AssetReference.id)
+                    & (AssetReferenceTag.tag_name == tag_name)
+                )
+            )
+
+    if exclude_tags:
+        stmt = stmt.where(
+            ~exists().where(
+                (AssetReferenceTag.asset_reference_id == AssetReference.id)
+                & (AssetReferenceTag.tag_name.in_(exclude_tags))
+            )
+        )
+    return stmt
+
+
+def apply_metadata_filter(
+    stmt: sa.sql.Select,
+    metadata_filter: dict | None = None,
+) -> sa.sql.Select:
+    """Apply filters using asset_reference_meta projection table."""
+    if not metadata_filter:
+        return stmt
+
+    def _exists_for_pred(key: str, *preds) -> sa.sql.ClauseElement:
+        return sa.exists().where(
+            AssetReferenceMeta.asset_reference_id == AssetReference.id,
+            AssetReferenceMeta.key == key,
+            *preds,
+        )
+
+    def _exists_clause_for_value(key: str, value) -> sa.sql.ClauseElement:
+        if value is None:
+            return sa.not_(
+                sa.exists().where(
+                    AssetReferenceMeta.asset_reference_id == AssetReference.id,
+                    AssetReferenceMeta.key == key,
+                )
+            )
+
+        if isinstance(value, bool):
+            return _exists_for_pred(key, AssetReferenceMeta.val_bool == bool(value))
+        if isinstance(value, (int, float, Decimal)):
+            num = value if isinstance(value, Decimal) else Decimal(str(value))
+            return _exists_for_pred(key, AssetReferenceMeta.val_num == num)
+        if isinstance(value, str):
+            return _exists_for_pred(key, AssetReferenceMeta.val_str == value)
+        return _exists_for_pred(key, AssetReferenceMeta.val_json == value)
+
+    for k, v in metadata_filter.items():
+        if isinstance(v, list):
+            ors = [_exists_clause_for_value(k, elem) for elem in v]
+            if ors:
+                stmt = stmt.where(sa.or_(*ors))
+        else:
+            stmt = stmt.where(_exists_clause_for_value(k, v))
+    return stmt
--- a/app/assets/database/queries/tags.py
+++ b/app/assets/database/queries/tags.py
@@ -8,12 +8,15 @@ from sqlalchemy.exc import IntegrityError
 from sqlalchemy.orm import Session

 from app.assets.database.models import (
+    Asset,
    AssetReference,
    AssetReferenceMeta,
    AssetReferenceTag,
    Tag,
 )
 from app.assets.database.queries.common import (
+    apply_metadata_filter,
+    apply_tag_filters,
    build_visible_owner_clause,
    iter_row_chunks,
 )
@@ -72,9 +75,9 @@ def get_reference_tags(session: Session, reference_id: str) -> list[str]:
        tag_name
        for (tag_name,) in (
            session.execute(
-                select(AssetReferenceTag.tag_name).where(
-                    AssetReferenceTag.asset_reference_id == reference_id
-                )
+                select(AssetReferenceTag.tag_name)
+                .where(AssetReferenceTag.asset_reference_id == reference_id)
+                .order_by(AssetReferenceTag.tag_name.asc())
            )
        ).all()
    ]
@@ -117,7 +120,7 @@ def set_reference_tags(
        )
        session.flush()

-    return SetTagsResult(added=to_add, removed=to_remove, total=desired)
+    return SetTagsResult(added=sorted(to_add), removed=sorted(to_remove), total=sorted(desired))


 def add_tags_to_reference(
@@ -272,6 +275,12 @@ def list_tags_with_usage(
        .select_from(AssetReferenceTag)
        .join(AssetReference, AssetReference.id == AssetReferenceTag.asset_reference_id)
        .where(build_visible_owner_clause(owner_id))
+        .where(
+            sa.or_(
+                AssetReference.is_missing == False,  # noqa: E712
+                AssetReferenceTag.tag_name == "missing",
+            )
+        )
        .where(AssetReference.deleted_at.is_(None))
        .group_by(AssetReferenceTag.tag_name)
        .subquery()
@@ -308,6 +317,12 @@ def list_tags_with_usage(
            select(AssetReferenceTag.tag_name)
            .join(AssetReference, AssetReference.id == AssetReferenceTag.asset_reference_id)
            .where(build_visible_owner_clause(owner_id))
+            .where(
+                sa.or_(
+                    AssetReference.is_missing == False,  # noqa: E712
+                    AssetReferenceTag.tag_name == "missing",
+                )
+            )
            .where(AssetReference.deleted_at.is_(None))
            .group_by(AssetReferenceTag.tag_name)
        )
@@ -320,6 +335,53 @@ def list_tags_with_usage(
    return rows_norm, int(total or 0)


+def list_tag_counts_for_filtered_assets(
+    session: Session,
+    owner_id: str = "",
+    include_tags: Sequence[str] | None = None,
+    exclude_tags: Sequence[str] | None = None,
+    name_contains: str | None = None,
+    metadata_filter: dict | None = None,
+    limit: int = 100,
+) -> dict[str, int]:
+    """Return tag counts for assets matching the given filters.
+
+    Uses the same filtering logic as list_references_page but returns
+    {tag_name: count} instead of paginated references.
+    """
+    # Build a subquery of matching reference IDs
+    ref_sq = (
+        select(AssetReference.id)
+        .join(Asset, Asset.id == AssetReference.asset_id)
+        .where(build_visible_owner_clause(owner_id))
+        .where(AssetReference.is_missing == False)  # noqa: E712
+        .where(AssetReference.deleted_at.is_(None))
+    )
+
+    if name_contains:
+        escaped, esc = escape_sql_like_string(name_contains)
+        ref_sq = ref_sq.where(AssetReference.name.ilike(f"%{escaped}%", escape=esc))
+
+    ref_sq = apply_tag_filters(ref_sq, include_tags, exclude_tags)
+    ref_sq = apply_metadata_filter(ref_sq, metadata_filter)
+    ref_sq = ref_sq.subquery()
+
+    # Count tags across those references
+    q = (
+        select(
+            AssetReferenceTag.tag_name,
+            func.count(AssetReferenceTag.asset_reference_id).label("cnt"),
+        )
+        .where(AssetReferenceTag.asset_reference_id.in_(select(ref_sq.c.id)))
+        .group_by(AssetReferenceTag.tag_name)
+        .order_by(func.count(AssetReferenceTag.asset_reference_id).desc(), AssetReferenceTag.tag_name.asc())
+        .limit(limit)
+    )
+
+    rows = session.execute(q).all()
+    return {tag_name: int(cnt) for tag_name, cnt in rows}
+
+
 def bulk_insert_tags_and_meta(
    session: Session,
    tag_rows: list[dict],
--- a/app/assets/scanner.py
+++ b/app/assets/scanner.py
@@ -13,12 +13,13 @@ from app.assets.database.queries import (
    delete_references_by_ids,
    ensure_tags_exist,
    get_asset_by_hash,
+    get_reference_by_id,
    get_references_for_prefixes,
    get_unenriched_references,
    mark_references_missing_outside_prefixes,
    reassign_asset_references,
    remove_missing_tag_for_asset_id,
-    set_reference_metadata,
+    set_reference_system_metadata,
    update_asset_hash_and_mime,
 )
 from app.assets.services.bulk_ingest import (
@@ -338,6 +339,7 @@ def build_asset_specs(
                "metadata": metadata,
                "hash": asset_hash,
                "mime_type": mime_type,
+                "job_id": None,
            }
        )
        tag_pool.update(tags)
@@ -426,6 +428,7 @@ def enrich_asset(
    except OSError:
        return new_level

+    initial_mtime_ns = get_mtime_ns(stat_p)
    rel_fname = compute_relative_filename(file_path)
    mime_type: str | None = None
    metadata = None
@@ -489,9 +492,21 @@ def enrich_asset(
        except Exception as e:
            logging.warning("Failed to hash %s: %s", file_path, e)

+    # Optimistic guard: if the reference's mtime_ns changed since we
+    # started (e.g. ingest_existing_file updated it), our results are
+    # stale — discard them to avoid overwriting fresh registration data.
+    ref = get_reference_by_id(session, reference_id)
+    if ref is None or ref.mtime_ns != initial_mtime_ns:
+        session.rollback()
+        logging.info(
+            "Ref %s mtime changed during enrichment, discarding stale result",
+            reference_id,
+        )
+        return ENRICHMENT_STUB
+
    if extract_metadata and metadata:
-        user_metadata = metadata.to_user_metadata()
-        set_reference_metadata(session, reference_id, user_metadata)
+        system_metadata = metadata.to_user_metadata()
+        set_reference_system_metadata(session, reference_id, system_metadata)

    if full_hash:
        existing = get_asset_by_hash(session, full_hash)
--- a/app/assets/seeder.py
+++ b/app/assets/seeder.py
@@ -77,7 +77,9 @@ class _AssetSeeder:
    """

    def __init__(self) -> None:
-        self._lock = threading.Lock()
+        # RLock is required because _run_scan() drains pending work while
+        # holding _lock and re-enters start() which also acquires _lock.
+        self._lock = threading.RLock()
        self._state = State.IDLE
        self._progress: Progress | None = None
        self._last_progress: Progress | None = None
@@ -92,6 +94,7 @@ class _AssetSeeder:
        self._prune_first: bool = False
        self._progress_callback: ProgressCallback | None = None
        self._disabled: bool = False
+        self._pending_enrich: dict | None = None

    def disable(self) -> None:
        """Disable the asset seeder, preventing any scans from starting."""
@@ -196,6 +199,42 @@ class _AssetSeeder:
            compute_hashes=compute_hashes,
        )

+    def enqueue_enrich(
+        self,
+        roots: tuple[RootType, ...] = ("models", "input", "output"),
+        compute_hashes: bool = False,
+    ) -> bool:
+        """Start an enrichment scan now, or queue it for after the current scan.
+
+        If the seeder is idle, starts immediately. Otherwise, the enrich
+        request is stored and will run automatically when the current scan
+        finishes.
+
+        Args:
+            roots: Tuple of root types to scan
+            compute_hashes: If True, compute blake3 hashes
+
+        Returns:
+            True if started immediately, False if queued for later
+        """
+        with self._lock:
+            if self.start_enrich(roots=roots, compute_hashes=compute_hashes):
+                return True
+            if self._pending_enrich is not None:
+                existing_roots = set(self._pending_enrich["roots"])
+                existing_roots.update(roots)
+                self._pending_enrich["roots"] = tuple(existing_roots)
+                self._pending_enrich["compute_hashes"] = (
+                    self._pending_enrich["compute_hashes"] or compute_hashes
+                )
+            else:
+                self._pending_enrich = {
+                    "roots": roots,
+                    "compute_hashes": compute_hashes,
+                }
+            logging.info("Enrich scan queued (roots=%s)", self._pending_enrich["roots"])
+        return False
+
    def cancel(self) -> bool:
        """Request cancellation of the current scan.

@@ -381,9 +420,13 @@ class _AssetSeeder:
            return marked
        finally:
            with self._lock:
-                self._last_progress = self._progress
-                self._state = State.IDLE
-                self._progress = None
+                self._reset_to_idle()
+
+    def _reset_to_idle(self) -> None:
+        """Reset state to IDLE, preserving last progress. Caller must hold _lock."""
+        self._last_progress = self._progress
+        self._state = State.IDLE
+        self._progress = None

    def _is_cancelled(self) -> bool:
        """Check if cancellation has been requested."""
@@ -594,9 +637,18 @@ class _AssetSeeder:
                    },
                )
            with self._lock:
-                self._last_progress = self._progress
-                self._state = State.IDLE
-                self._progress = None
+                self._reset_to_idle()
+                pending = self._pending_enrich
+                if pending is not None:
+                    self._pending_enrich = None
+                    if not self.start_enrich(
+                        roots=pending["roots"],
+                        compute_hashes=pending["compute_hashes"],
+                    ):
+                        logging.warning(
+                            "Pending enrich scan could not start (roots=%s)",
+                            pending["roots"],
+                        )

    def _run_fast_phase(self, roots: tuple[RootType, ...]) -> tuple[int, int, int]:
        """Run phase 1: fast scan to create stub records.
--- a/app/assets/services/init.py
+++ b/app/assets/services/init.py
@@ -23,6 +23,8 @@ from app.assets.services.ingest import (
    DependencyMissingError,
    HashMismatchError,
    create_from_hash,
+    ingest_existing_file,
+    register_output_files,
    upload_from_temp_path,
 )
 from app.assets.database.queries import (
@@ -72,6 +74,8 @@ __all__ = [
    "delete_asset_reference",
    "get_asset_by_hash",
    "get_asset_detail",
+    "ingest_existing_file",
+    "register_output_files",
    "get_mtime_ns",
    "get_size_and_mtime_ns",
    "list_assets_page",
--- a/app/assets/services/asset_management.py
+++ b/app/assets/services/asset_management.py
@@ -16,10 +16,12 @@ from app.assets.database.queries import (
    get_reference_by_id,
    get_reference_with_owner_check,
    list_references_page,
+    list_all_file_paths_by_asset_id,
    list_references_by_asset_id,
    set_reference_metadata,
    set_reference_preview,
    set_reference_tags,
+    update_asset_hash_and_mime,
    update_reference_access_time,
    update_reference_name,
    update_reference_updated_at,
@@ -67,6 +69,8 @@ def update_asset_metadata(
    user_metadata: UserMetadata = None,
    tag_origin: str = "manual",
    owner_id: str = "",
+    mime_type: str | None = None,
+    preview_id: str | None = None,
 ) -> AssetDetailResult:
    with create_session() as session:
        ref = get_reference_with_owner_check(session, reference_id, owner_id)
@@ -103,6 +107,21 @@ def update_asset_metadata(
            )
            touched = True

+        if mime_type is not None:
+            updated = update_asset_hash_and_mime(
+                session, asset_id=ref.asset_id, mime_type=mime_type
+            )
+            if updated:
+                touched = True
+
+        if preview_id is not None:
+            set_reference_preview(
+                session,
+                reference_id=reference_id,
+                preview_reference_id=preview_id,
+            )
+            touched = True
+
        if touched and user_metadata is None:
            update_reference_updated_at(session, reference_id=reference_id)

@@ -159,11 +178,9 @@ def delete_asset_reference(
            session.commit()
            return True

-        # Orphaned asset - delete it and its files
-        refs = list_references_by_asset_id(session, asset_id=asset_id)
-        file_paths = [
-            r.file_path for r in (refs or []) if getattr(r, "file_path", None)
-        ]
+        # Orphaned asset - gather ALL file paths (including
+        # soft-deleted / missing refs) so their on-disk files get cleaned up.
+        file_paths = list_all_file_paths_by_asset_id(session, asset_id=asset_id)
        # Also include the just-deleted file path
        if file_path:
            file_paths.append(file_path)
@@ -185,7 +202,7 @@ def delete_asset_reference(

 def set_asset_preview(
    reference_id: str,
-    preview_asset_id: str | None = None,
+    preview_reference_id: str | None = None,
    owner_id: str = "",
 ) -> AssetDetailResult:
    with create_session() as session:
@@ -194,7 +211,7 @@ def set_asset_preview(
        set_reference_preview(
            session,
            reference_id=reference_id,
-            preview_asset_id=preview_asset_id,
+            preview_reference_id=preview_reference_id,
        )

        result = fetch_reference_asset_and_tags(
@@ -263,6 +280,47 @@ def list_assets_page(
        return ListAssetsResult(items=items, total=total)


+def resolve_hash_to_path(
+    asset_hash: str,
+    owner_id: str = "",
+) -> DownloadResolutionResult | None:
+    """Resolve a blake3 hash to an on-disk file path.
+
+    Only references visible to *owner_id* are considered (owner-less
+    references are always visible).
+
+    Returns a DownloadResolutionResult with abs_path, content_type, and
+    download_name, or None if no asset or live path is found.
+    """
+    with create_session() as session:
+        asset = queries_get_asset_by_hash(session, asset_hash)
+        if not asset:
+            return None
+        refs = list_references_by_asset_id(session, asset_id=asset.id)
+        visible = [
+            r for r in refs
+            if r.owner_id == "" or r.owner_id == owner_id
+        ]
+        abs_path = select_best_live_path(visible)
+        if not abs_path:
+            return None
+        display_name = os.path.basename(abs_path)
+        for ref in visible:
+            if ref.file_path == abs_path and ref.name:
+                display_name = ref.name
+                break
+        ctype = (
+            asset.mime_type
+            or mimetypes.guess_type(display_name)[0]
+            or "application/octet-stream"
+        )
+    return DownloadResolutionResult(
+        abs_path=abs_path,
+        content_type=ctype,
+        download_name=display_name,
+    )
+
+
 def resolve_asset_for_download(
    reference_id: str,
    owner_id: str = "",
--- a/app/assets/services/bulk_ingest.py
+++ b/app/assets/services/bulk_ingest.py
@@ -37,6 +37,7 @@ class SeedAssetSpec(TypedDict):
    metadata: ExtractedMetadata | None
    hash: str | None
    mime_type: str | None
+    job_id: str | None


 class AssetRow(TypedDict):
@@ -60,6 +61,7 @@ class ReferenceRow(TypedDict):
    name: str
    preview_id: str | None
    user_metadata: dict[str, Any] | None
+    job_id: str | None
    created_at: datetime
    updated_at: datetime
    last_access_time: datetime
@@ -167,6 +169,7 @@ def batch_insert_seed_assets(
                "name": spec["info_name"],
                "preview_id": None,
                "user_metadata": user_metadata,
+                "job_id": spec.get("job_id"),
                "created_at": current_time,
                "updated_at": current_time,
                "last_access_time": current_time,
--- a/app/assets/services/ingest.py
+++ b/app/assets/services/ingest.py
@@ -9,23 +9,29 @@ from sqlalchemy.orm import Session
 import app.assets.services.hashing as hashing
 from app.assets.database.queries import (
    add_tags_to_reference,
+    count_active_siblings,
+    create_stub_asset,
+    ensure_tags_exist,
    fetch_reference_and_asset,
    get_asset_by_hash,
-    get_existing_asset_ids,
    get_reference_by_file_path,
    get_reference_tags,
    get_or_create_reference,
+    reference_exists,
    remove_missing_tag_for_asset_id,
    set_reference_metadata,
    set_reference_tags,
+    update_asset_hash_and_mime,
    upsert_asset,
    upsert_reference,
    validate_tags_exist,
 )
-from app.assets.helpers import normalize_tags
+from app.assets.helpers import get_utc_now, normalize_tags
+from app.assets.services.bulk_ingest import batch_insert_seed_assets
 from app.assets.services.file_utils import get_size_and_mtime_ns
 from app.assets.services.path_utils import (
    compute_relative_filename,
+    get_name_and_tags_from_asset_path,
    resolve_destination_from_tags,
    validate_path_within_base,
 )
@@ -65,7 +71,7 @@ def _ingest_file_from_path(

    with create_session() as session:
        if preview_id:
-            if preview_id not in get_existing_asset_ids(session, [preview_id]):
+            if not reference_exists(session, preview_id):
                preview_id = None

        asset, asset_created, asset_updated = upsert_asset(
@@ -128,6 +134,102 @@ def _ingest_file_from_path(
    )


+def register_output_files(
+    file_paths: Sequence[str],
+    user_metadata: UserMetadata = None,
+    job_id: str | None = None,
+) -> int:
+    """Register a batch of output file paths as assets.
+
+    Returns the number of files successfully registered.
+    """
+    registered = 0
+    for abs_path in file_paths:
+        if not os.path.isfile(abs_path):
+            continue
+        try:
+            if ingest_existing_file(
+                abs_path, user_metadata=user_metadata, job_id=job_id
+            ):
+                registered += 1
+        except Exception:
+            logging.exception("Failed to register output: %s", abs_path)
+    return registered
+
+
+def ingest_existing_file(
+    abs_path: str,
+    user_metadata: UserMetadata = None,
+    extra_tags: Sequence[str] = (),
+    owner_id: str = "",
+    job_id: str | None = None,
+) -> bool:
+    """Register an existing on-disk file as an asset stub.
+
+    If a reference already exists for this path, updates mtime_ns, job_id,
+    size_bytes, and resets enrichment so the enricher will re-hash it.
+
+    For brand-new paths, inserts a stub record (hash=NULL) for immediate
+    UX visibility.
+
+    Returns True if a row was inserted or updated, False otherwise.
+    """
+    locator = os.path.abspath(abs_path)
+    size_bytes, mtime_ns = get_size_and_mtime_ns(abs_path)
+    mime_type = mimetypes.guess_type(abs_path, strict=False)[0]
+    name, path_tags = get_name_and_tags_from_asset_path(abs_path)
+    tags = list(dict.fromkeys(path_tags + list(extra_tags)))
+
+    with create_session() as session:
+        existing_ref = get_reference_by_file_path(session, locator)
+        if existing_ref is not None:
+            now = get_utc_now()
+            existing_ref.mtime_ns = mtime_ns
+            existing_ref.job_id = job_id
+            existing_ref.is_missing = False
+            existing_ref.deleted_at = None
+            existing_ref.updated_at = now
+            existing_ref.enrichment_level = 0
+
+            asset = existing_ref.asset
+            if asset:
+                # If other refs share this asset, detach to a new stub
+                # instead of mutating the shared row.
+                siblings = count_active_siblings(session, asset.id, existing_ref.id)
+                if siblings > 0:
+                    new_asset = create_stub_asset(
+                        session,
+                        size_bytes=size_bytes,
+                        mime_type=mime_type or asset.mime_type,
+                    )
+                    existing_ref.asset_id = new_asset.id
+                else:
+                    asset.hash = None
+                    asset.size_bytes = size_bytes
+                    if mime_type:
+                        asset.mime_type = mime_type
+            session.commit()
+            return True
+
+        spec = {
+            "abs_path": abs_path,
+            "size_bytes": size_bytes,
+            "mtime_ns": mtime_ns,
+            "info_name": name,
+            "tags": tags,
+            "fname": os.path.basename(abs_path),
+            "metadata": None,
+            "hash": None,
+            "mime_type": mime_type,
+            "job_id": job_id,
+        }
+        if tags:
+            ensure_tags_exist(session, tags)
+        result = batch_insert_seed_assets(session, [spec], owner_id=owner_id)
+        session.commit()
+        return result.won_paths > 0
+
+
 def _register_existing_asset(
    asset_hash: str,
    name: str,
@@ -135,6 +237,8 @@ def _register_existing_asset(
    tags: list[str] | None = None,
    tag_origin: str = "manual",
    owner_id: str = "",
+    mime_type: str | None = None,
+    preview_id: str | None = None,
 ) -> RegisterAssetResult:
    user_metadata = user_metadata or {}

@@ -143,14 +247,25 @@ def _register_existing_asset(
        if not asset:
            raise ValueError(f"No asset with hash {asset_hash}")

+        if mime_type and not asset.mime_type:
+            update_asset_hash_and_mime(session, asset_id=asset.id, mime_type=mime_type)
+
+        if preview_id:
+            if not reference_exists(session, preview_id):
+                preview_id = None
+
        ref, ref_created = get_or_create_reference(
            session,
            asset_id=asset.id,
            owner_id=owner_id,
            name=name,
+            preview_id=preview_id,
        )

        if not ref_created:
+            if preview_id and ref.preview_id != preview_id:
+                ref.preview_id = preview_id
+
            tag_names = get_reference_tags(session, reference_id=ref.id)
            result = RegisterAssetResult(
                ref=extract_reference_data(ref),
@@ -242,6 +357,8 @@ def upload_from_temp_path(
    client_filename: str | None = None,
    owner_id: str = "",
    expected_hash: str | None = None,
+    mime_type: str | None = None,
+    preview_id: str | None = None,
 ) -> UploadResult:
    try:
        digest, _ = hashing.compute_blake3_hash(temp_path)
@@ -270,6 +387,8 @@ def upload_from_temp_path(
            tags=tags or [],
            tag_origin="manual",
            owner_id=owner_id,
+            mime_type=mime_type,
+            preview_id=preview_id,
        )
        return UploadResult(
            ref=result.ref,
@@ -291,7 +410,7 @@ def upload_from_temp_path(
    dest_abs = os.path.abspath(os.path.join(dest_dir, hashed_basename))
    validate_path_within_base(dest_abs, base_dir)

-    content_type = (
+    content_type = mime_type or (
        mimetypes.guess_type(os.path.basename(src_for_ext), strict=False)[0]
        or mimetypes.guess_type(hashed_basename, strict=False)[0]
        or "application/octet-stream"
@@ -315,7 +434,7 @@ def upload_from_temp_path(
        mime_type=content_type,
        info_name=_sanitize_filename(name or client_filename, fallback=digest),
        owner_id=owner_id,
-        preview_id=None,
+        preview_id=preview_id,
        user_metadata=user_metadata or {},
        tags=tags,
        tag_origin="manual",
@@ -342,30 +461,99 @@ def upload_from_temp_path(
    )


+def register_file_in_place(
+    abs_path: str,
+    name: str,
+    tags: list[str],
+    owner_id: str = "",
+    mime_type: str | None = None,
+) -> UploadResult:
+    """Register an already-saved file in the asset database without moving it.
+
+    Tags are derived from the filesystem path (root category + subfolder names),
+    merged with any caller-provided tags, matching the behavior of the scanner.
+    If the path is not under a known root, only the caller-provided tags are used.
+    """
+    try:
+        _, path_tags = get_name_and_tags_from_asset_path(abs_path)
+    except ValueError:
+        path_tags = []
+    merged_tags = normalize_tags([*path_tags, *tags])
+
+    try:
+        digest, _ = hashing.compute_blake3_hash(abs_path)
+    except ImportError as e:
+        raise DependencyMissingError(str(e))
+    except Exception as e:
+        raise RuntimeError(f"failed to hash file: {e}")
+    asset_hash = "blake3:" + digest
+
+    size_bytes, mtime_ns = get_size_and_mtime_ns(abs_path)
+    content_type = mime_type or (
+        mimetypes.guess_type(abs_path, strict=False)[0]
+        or "application/octet-stream"
+    )
+
+    ingest_result = _ingest_file_from_path(
+        abs_path=abs_path,
+        asset_hash=asset_hash,
+        size_bytes=size_bytes,
+        mtime_ns=mtime_ns,
+        mime_type=content_type,
+        info_name=_sanitize_filename(name, fallback=digest),
+        owner_id=owner_id,
+        tags=merged_tags,
+        tag_origin="upload",
+        require_existing_tags=False,
+    )
+    reference_id = ingest_result.reference_id
+    if not reference_id:
+        raise RuntimeError("failed to create asset reference")
+
+    with create_session() as session:
+        pair = fetch_reference_and_asset(
+            session, reference_id=reference_id, owner_id=owner_id
+        )
+        if not pair:
+            raise RuntimeError("inconsistent DB state after ingest")
+        ref, asset = pair
+        tag_names = get_reference_tags(session, reference_id=ref.id)
+
+    return UploadResult(
+        ref=extract_reference_data(ref),
+        asset=extract_asset_data(asset),
+        tags=tag_names,
+        created_new=ingest_result.asset_created,
+    )
+
+
 def create_from_hash(
    hash_str: str,
    name: str,
    tags: list[str] | None = None,
    user_metadata: dict | None = None,
    owner_id: str = "",
+    mime_type: str | None = None,
+    preview_id: str | None = None,
 ) -> UploadResult | None:
    canonical = hash_str.strip().lower()

-    with create_session() as session:
-        asset = get_asset_by_hash(session, asset_hash=canonical)
-        if not asset:
-            return None
-
-    result = _register_existing_asset(
-        asset_hash=canonical,
-        name=_sanitize_filename(
-            name, fallback=canonical.split(":", 1)[1] if ":" in canonical else canonical
-        ),
-        user_metadata=user_metadata or {},
-        tags=tags or [],
-        tag_origin="manual",
-        owner_id=owner_id,
-    )
+    try:
+        result = _register_existing_asset(
+            asset_hash=canonical,
+            name=_sanitize_filename(
+                name, fallback=canonical.split(":", 1)[1] if ":" in canonical else canonical
+            ),
+            user_metadata=user_metadata or {},
+            tags=tags or [],
+            tag_origin="manual",
+            owner_id=owner_id,
+            mime_type=mime_type,
+            preview_id=preview_id,
+        )
+    except ValueError:
+        logging.warning("create_from_hash: no asset found for hash %s", canonical)
+        return None

    return UploadResult(
        ref=result.ref,
--- a/app/assets/services/path_utils.py
+++ b/app/assets/services/path_utils.py
@@ -93,12 +93,13 @@ def compute_relative_filename(file_path: str) -> str | None:

 def get_asset_category_and_relative_path(
    file_path: str,
-) -> tuple[Literal["input", "output", "models"], str]:
+) -> tuple[Literal["input", "output", "temp", "models"], str]:
    """Determine which root category a file path belongs to.

    Categories:
      - 'input': under folder_paths.get_input_directory()
      - 'output': under folder_paths.get_output_directory()
+      - 'temp': under folder_paths.get_temp_directory()
      - 'models': under any base path from get_comfy_models_folders()

    Returns:
@@ -129,7 +130,12 @@ def get_asset_category_and_relative_path(
    if _check_is_within(fp_abs, output_base):
        return "output", _compute_relative(fp_abs, output_base)

-    # 3) models (check deepest matching base to avoid ambiguity)
+    # 3) temp
+    temp_base = os.path.abspath(folder_paths.get_temp_directory())
+    if _check_is_within(fp_abs, temp_base):
+        return "temp", _compute_relative(fp_abs, temp_base)
+
+    # 4) models (check deepest matching base to avoid ambiguity)
    best: tuple[int, str, str] | None = None  # (base_len, bucket, rel_inside_bucket)
    for bucket, bases in get_comfy_models_folders():
        for b in bases:
@@ -146,7 +152,7 @@ def get_asset_category_and_relative_path(
        return "models", os.path.relpath(os.path.join(os.sep, combined), os.sep)

    raise ValueError(
-        f"Path is not within input, output, or configured model bases: {file_path}"
+        f"Path is not within input, output, temp, or configured model bases: {file_path}"
    )


--- a/app/assets/services/schemas.py
+++ b/app/assets/services/schemas.py
@@ -25,7 +25,9 @@ class ReferenceData:
    preview_id: str | None
    created_at: datetime
    updated_at: datetime
-    last_access_time: datetime | None
+    system_metadata: dict[str, Any] | None = None
+    job_id: str | None = None
+    last_access_time: datetime | None = None


@dataclass(frozen=True)
@@ -93,6 +95,8 @@ def extract_reference_data(ref: AssetReference) -> ReferenceData:
        file_path=ref.file_path,
        user_metadata=ref.user_metadata,
        preview_id=ref.preview_id,
+        system_metadata=ref.system_metadata,
+        job_id=ref.job_id,
        created_at=ref.created_at,
        updated_at=ref.updated_at,
        last_access_time=ref.last_access_time,
--- a/app/assets/services/tagging.py
+++ b/app/assets/services/tagging.py
@@ -1,3 +1,5 @@
+from typing import Sequence
+
 from app.assets.database.queries import (
    AddTagsResult,
    RemoveTagsResult,
@@ -6,6 +8,7 @@ from app.assets.database.queries import (
    list_tags_with_usage,
    remove_tags_from_reference,
 )
+from app.assets.database.queries.tags import list_tag_counts_for_filtered_assets
 from app.assets.services.schemas import TagUsage
 from app.database.db import create_session

@@ -73,3 +76,23 @@ def list_tags(
        )

    return [TagUsage(name, tag_type, count) for name, tag_type, count in rows], total
+
+
+def list_tag_histogram(
+    owner_id: str = "",
+    include_tags: Sequence[str] | None = None,
+    exclude_tags: Sequence[str] | None = None,
+    name_contains: str | None = None,
+    metadata_filter: dict | None = None,
+    limit: int = 100,
+) -> dict[str, int]:
+    with create_session() as session:
+        return list_tag_counts_for_filtered_assets(
+            session,
+            owner_id=owner_id,
+            include_tags=include_tags,
+            exclude_tags=exclude_tags,
+            name_contains=name_contains,
+            metadata_filter=metadata_filter,
+            limit=limit,
+        )
--- a/app/database/models.py
+++ b/app/database/models.py
@@ -1,9 +1,18 @@
 from typing import Any
 from datetime import datetime
+from sqlalchemy import MetaData
 from sqlalchemy.orm import DeclarativeBase

+NAMING_CONVENTION = {
+    "ix": "ix_%(table_name)s_%(column_0_N_name)s",
+    "uq": "uq_%(table_name)s_%(column_0_N_name)s",
+    "ck": "ck_%(table_name)s_%(constraint_name)s",
+    "fk": "fk_%(table_name)s_%(column_0_name)s_%(referred_table_name)s",
+    "pk": "pk_%(table_name)s",
+}
+
 class Base(DeclarativeBase):
-    pass
+    metadata = MetaData(naming_convention=NAMING_CONVENTION)

 def to_dict(obj: Any, include_none: bool = False) -> dict[str, Any]:
    fields = obj.__table__.columns.keys()
--- a/app/user_manager.py
+++ b/app/user_manager.py
@@ -6,6 +6,7 @@ import uuid
 import glob
 import shutil
 import logging
+import tempfile
 from aiohttp import web
 from urllib import parse
 from comfy.cli_args import args
@@ -377,8 +378,15 @@ class UserManager():
            try:
                body = await request.read()

-                with open(path, "wb") as f:
-                    f.write(body)
+                dir_name = os.path.dirname(path)
+                fd, tmp_path = tempfile.mkstemp(dir=dir_name)
+                try:
+                    with os.fdopen(fd, "wb") as f:
+                        f.write(body)
+                    os.replace(tmp_path, path)
+                except:
+                    os.unlink(tmp_path)
+                    raise
            except OSError as e:
                logging.warning(f"Error saving file '{path}': {e}")
                return web.Response(
--- a/blueprints/.glsl/Color_Balance_15.frag
+++ b/blueprints/.glsl/Color_Balance_15.frag
@@ -0,0 +1,90 @@
+#version 300 es
+precision highp float;
+
+uniform sampler2D u_image0;
+uniform float u_float0;
+uniform float u_float1;
+uniform float u_float2;
+uniform float u_float3;
+uniform float u_float4;
+uniform float u_float5;
+uniform float u_float6;
+uniform float u_float7;
+uniform float u_float8;
+uniform bool u_bool0;
+
+in vec2 v_texCoord;
+out vec4 fragColor;
+
+vec3 rgb2hsl(vec3 c) {
+    float maxC = max(c.r, max(c.g, c.b));
+    float minC = min(c.r, min(c.g, c.b));
+    float l = (maxC + minC) * 0.5;
+    if (maxC == minC) return vec3(0.0, 0.0, l);
+    float d = maxC - minC;
+    float s = l > 0.5 ? d / (2.0 - maxC - minC) : d / (maxC + minC);
+    float h;
+    if (maxC == c.r) {
+        h = (c.g - c.b) / d + (c.g < c.b ? 6.0 : 0.0);
+    } else if (maxC == c.g) {
+        h = (c.b - c.r) / d + 2.0;
+    } else {
+        h = (c.r - c.g) / d + 4.0;
+    }
+    h /= 6.0;
+    return vec3(h, s, l);
+}
+
+float hue2rgb(float p, float q, float t) {
+    if (t < 0.0) t += 1.0;
+    if (t > 1.0) t -= 1.0;
+    if (t < 1.0 / 6.0) return p + (q - p) * 6.0 * t;
+    if (t < 1.0 / 2.0) return q;
+    if (t < 2.0 / 3.0) return p + (q - p) * (2.0 / 3.0 - t) * 6.0;
+    return p;
+}
+
+vec3 hsl2rgb(vec3 hsl) {
+    float h = hsl.x, s = hsl.y, l = hsl.z;
+    if (s == 0.0) return vec3(l);
+    float q = l < 0.5 ? l * (1.0 + s) : l + s - l * s;
+    float p = 2.0 * l - q;
+    return vec3(
+        hue2rgb(p, q, h + 1.0 / 3.0),
+        hue2rgb(p, q, h),
+        hue2rgb(p, q, h - 1.0 / 3.0)
+    );
+}
+
+void main() {
+    vec4 tex = texture(u_image0, v_texCoord);
+    vec3 color = tex.rgb;
+
+    vec3 shadows = vec3(u_float0, u_float1, u_float2) * 0.01;
+    vec3 midtones = vec3(u_float3, u_float4, u_float5) * 0.01;
+    vec3 highlights = vec3(u_float6, u_float7, u_float8) * 0.01;
+
+    float maxC = max(color.r, max(color.g, color.b));
+    float minC = min(color.r, min(color.g, color.b));
+    float lightness = (maxC + minC) * 0.5;
+
+    // GIMP weight curves: linear ramps with constants a=0.25, b=0.333, scale=0.7
+    const float a = 0.25;
+    const float b = 0.333;
+    const float scale = 0.7;
+
+    float sw = clamp((lightness - b) / -a + 0.5, 0.0, 1.0) * scale;
+    float mw = clamp((lightness - b) / a + 0.5, 0.0, 1.0) *
+               clamp((lightness + b - 1.0) / -a + 0.5, 0.0, 1.0) * scale;
+    float hw = clamp((lightness + b - 1.0) / a + 0.5, 0.0, 1.0) * scale;
+
+    color += sw * shadows + mw * midtones + hw * highlights;
+
+    if (u_bool0) {
+        vec3 hsl = rgb2hsl(clamp(color, 0.0, 1.0));
+        hsl.z = lightness;
+        color = hsl2rgb(hsl);
+    }
+
+    fragColor = vec4(clamp(color, 0.0, 1.0), tex.a);
+}
--- a/blueprints/.glsl/Color_Curves_8.frag
+++ b/blueprints/.glsl/Color_Curves_8.frag
@@ -0,0 +1,49 @@
+#version 300 es
+precision highp float;
+
+uniform sampler2D u_image0;
+uniform sampler2D u_curve0;  // RGB master curve (256x1 LUT)
+uniform sampler2D u_curve1;  // Red channel curve
+uniform sampler2D u_curve2;  // Green channel curve
+uniform sampler2D u_curve3;  // Blue channel curve
+
+in vec2 v_texCoord;
+layout(location = 0) out vec4 fragColor0;
+
+// GIMP-compatible curve lookup with manual linear interpolation.
+// Matches gimp_curve_map_value_inline() from gimpcurve-map.c:
+//   index = value * (n_samples - 1)
+//   f = fract(index)
+//   result = (1-f) * samples[floor] + f * samples[ceil]
+//
+// Uses texelFetch (NEAREST) to avoid GPU half-texel offset issues
+// that occur with texture() + GL_LINEAR on small 256x1 LUTs.
+float applyCurve(sampler2D curve, float value) {
+    value = clamp(value, 0.0, 1.0);
+
+    float pos = value * 255.0;
+    int lo = int(floor(pos));
+    int hi = min(lo + 1, 255);
+    float f = pos - float(lo);
+
+    float a = texelFetch(curve, ivec2(lo, 0), 0).r;
+    float b = texelFetch(curve, ivec2(hi, 0), 0).r;
+
+    return a + f * (b - a);
+}
+
+void main() {
+    vec4 color = texture(u_image0, v_texCoord);
+
+    // GIMP order: per-channel curves first, then RGB master curve.
+    // See gimp_curve_map_pixels() default case in gimpcurve-map.c:
+    //   dest = colors_curve( channel_curve( src ) )
+    float tmp_r = applyCurve(u_curve1, color.r);
+    float tmp_g = applyCurve(u_curve2, color.g);
+    float tmp_b = applyCurve(u_curve3, color.b);
+    color.r = applyCurve(u_curve0, tmp_r);
+    color.g = applyCurve(u_curve0, tmp_g);
+    color.b = applyCurve(u_curve0, tmp_b);
+
+    fragColor0 = vec4(color.rgb, color.a);
+}
--- a/blueprints/Color
+++ b/blueprints/Color
--- a/blueprints/Color
+++ b/blueprints/Color
--- a/blueprints/Color
+++ b/blueprints/Color
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@@ -83,6 +83,8 @@ fpte_group.add_argument("--fp16-text-enc", action="store_true", help="Store text
 fpte_group.add_argument("--fp32-text-enc", action="store_true", help="Store text encoder weights in fp32.")
 fpte_group.add_argument("--bf16-text-enc", action="store_true", help="Store text encoder weights in bf16.")

+parser.add_argument("--fp16-intermediates", action="store_true", help="Experimental: Use fp16 for intermediate tensors between nodes instead of fp32.")
+
 parser.add_argument("--force-channels-last", action="store_true", help="Force channels last format when inferencing the models.")

 parser.add_argument("--directml", type=int, nargs="?", metavar="DIRECTML_DEVICE", const=-1, help="Use torch-directml.")
@@ -108,11 +110,13 @@ parser.add_argument("--preview-method", type=LatentPreviewMethod, default=Latent

 parser.add_argument("--preview-size", type=int, default=512, help="Sets the maximum preview size for sampler nodes.")

+CACHE_RAM_AUTO_GB = -1.0
+
 cache_group = parser.add_mutually_exclusive_group()
 cache_group.add_argument("--cache-classic", action="store_true", help="Use the old style (aggressive) caching.")
 cache_group.add_argument("--cache-lru", type=int, default=0, help="Use LRU caching with a maximum of N node results cached. May use more RAM/VRAM.")
 cache_group.add_argument("--cache-none", action="store_true", help="Reduced RAM/VRAM usage at the expense of executing every node for each run.")
-cache_group.add_argument("--cache-ram", nargs='?', const=4.0, type=float, default=0, help="Use RAM pressure caching with the specified headroom threshold. If available RAM drops below the threhold the cache remove large items to free RAM. Default 4GB")
+cache_group.add_argument("--cache-ram", nargs='?', const=CACHE_RAM_AUTO_GB, type=float, default=0, help="Use RAM pressure caching with the specified headroom threshold. If available RAM drops below the threshold the cache removes large items to free RAM. Default (when no value is provided): 25%% of system RAM (min 4GB, max 32GB).")

 attn_group = parser.add_mutually_exclusive_group()
 attn_group.add_argument("--use-split-cross-attention", action="store_true", help="Use the split cross attention optimization. Ignored when xformers is used.")
@@ -147,6 +151,7 @@ parser.add_argument("--reserve-vram", type=float, default=None, help="Set the am
 parser.add_argument("--async-offload", nargs='?', const=2, type=int, default=None, metavar="NUM_STREAMS", help="Use async weight offloading. An optional argument controls the amount of offload streams. Default is 2. Enabled by default on Nvidia.")
 parser.add_argument("--disable-async-offload", action="store_true", help="Disable async weight offloading.")
 parser.add_argument("--disable-dynamic-vram", action="store_true", help="Disable dynamic VRAM and use estimate based model loading.")
+parser.add_argument("--enable-dynamic-vram", action="store_true", help="Enable dynamic VRAM on systems where it's not enabled by default.")

 parser.add_argument("--force-non-blocking", action="store_true", help="Force ComfyUI to use non-blocking operations for all applicable tensors. This may improve performance on some non-Nvidia systems but can cause issues with some workflows.")

@@ -179,8 +184,6 @@ parser.add_argument("--disable-api-nodes", action="store_true", help="Disable lo

 parser.add_argument("--multi-user", action="store_true", help="Enables per-user storage.")

-parser.add_argument("--use-process-isolation", action="store_true", help="Enable process isolation for custom nodes with pyisolate.yaml manifests.")
-
 parser.add_argument("--verbose", default='INFO', const='DEBUG', nargs="?", choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help='Set the logging level')
 parser.add_argument("--log-stdout", action="store_true", help="Send normal process output to stdout instead of stderr (default).")

@@ -262,4 +265,6 @@ else:
    args.fast = set(args.fast)

 def enables_dynamic_vram():
+    if args.enable_dynamic_vram:
+        return True
    return not args.disable_dynamic_vram and not args.highvram and not args.gpu_only and not args.novram and not args.cpu
--- a/comfy/comfy_types/node_typing.py
+++ b/comfy/comfy_types/node_typing.py
@@ -176,8 +176,8 @@ class InputTypeOptions(TypedDict):
    """COMBO type only. Specifies the configuration for a multi-select widget.
    Available after ComfyUI frontend v1.13.4
    https://github.com/Comfy-Org/ComfyUI_frontend/pull/2987"""
-    gradient_stops: NotRequired[list[list[float]]]
-    """Gradient color stops for gradientslider display mode. Each stop is [offset, r, g, b] (``FLOAT``)."""
+    gradient_stops: NotRequired[list[dict]]
+    """Gradient color stops for gradientslider display mode. Each stop is {"offset": float, "color": [r, g, b]}."""


 class HiddenInputTypeDict(TypedDict):
--- a/comfy/context_windows.py
+++ b/comfy/context_windows.py
@@ -93,6 +93,50 @@ class IndexListCallbacks:
        return {}


+def slice_cond(cond_value, window: IndexListContextWindow, x_in: torch.Tensor, device, temporal_dim: int, temporal_scale: int=1, temporal_offset: int=0, retain_index_list: list[int]=[]):
+    if not (hasattr(cond_value, "cond") and isinstance(cond_value.cond, torch.Tensor)):
+        return None
+    cond_tensor = cond_value.cond
+    if temporal_dim >= cond_tensor.ndim:
+        return None
+
+    cond_size = cond_tensor.size(temporal_dim)
+
+    if temporal_scale == 1:
+        expected_size = x_in.size(window.dim) - temporal_offset
+        if cond_size != expected_size:
+            return None
+
+    if temporal_offset == 0 and temporal_scale == 1:
+        sliced = window.get_tensor(cond_tensor, device, dim=temporal_dim, retain_index_list=retain_index_list)
+        return cond_value._copy_with(sliced)
+
+    # skip leading latent positions that have no corresponding conditioning (e.g. reference frames)
+    if temporal_offset > 0:
+        indices = [i - temporal_offset for i in window.index_list[temporal_offset:]]
+        indices = [i for i in indices if 0 <= i]
+    else:
+        indices = list(window.index_list)
+
+    if not indices:
+        return None
+
+    if temporal_scale > 1:
+        scaled = []
+        for i in indices:
+            for k in range(temporal_scale):
+                si = i * temporal_scale + k
+                if si < cond_size:
+                    scaled.append(si)
+        indices = scaled
+        if not indices:
+            return None
+
+    idx = tuple([slice(None)] * temporal_dim + [indices])
+    sliced = cond_tensor[idx].to(device)
+    return cond_value._copy_with(sliced)
+
+
@dataclass
 class ContextSchedule:
    name: str
@@ -177,10 +221,17 @@ class IndexListContextHandler(ContextHandlerABC):
                                    new_cond_item[cond_key] = result
                                    handled = True
                                    break
+                            if not handled and self._model is not None:
+                                result = self._model.resize_cond_for_context_window(
+                                    cond_key, cond_value, window, x_in, device,
+                                    retain_index_list=self.cond_retain_index_list)
+                                if result is not None:
+                                    new_cond_item[cond_key] = result
+                                    handled = True
                            if handled:
                                continue
                            if isinstance(cond_value, torch.Tensor):
-                                if (self.dim < cond_value.ndim and cond_value(self.dim) == x_in.size(self.dim)) or \
+                                if (self.dim < cond_value.ndim and cond_value.size(self.dim) == x_in.size(self.dim)) or \
                                   (cond_value.ndim < self.dim and cond_value.size(0) == x_in.size(self.dim)):
                                    new_cond_item[cond_key] = window.get_tensor(cond_value, device)
                            # Handle audio_embed (temporal dim is 1)
@@ -224,6 +275,7 @@ class IndexListContextHandler(ContextHandlerABC):
        return context_windows

    def execute(self, calc_cond_batch: Callable, model: BaseModel, conds: list[list[dict]], x_in: torch.Tensor, timestep: torch.Tensor, model_options: dict[str]):
+        self._model = model
        self.set_step(timestep, model_options)
        context_windows = self.get_context_windows(model, x_in, model_options)
        enumerated_context_windows = list(enumerate(context_windows))
--- a/comfy/float.py
+++ b/comfy/float.py
@@ -209,3 +209,39 @@ def stochastic_round_quantize_nvfp4_by_block(x, per_tensor_scale, pad_16x, seed=
        output_block[i:i + slice_size].copy_(block)

    return output_fp4, to_blocked(output_block, flatten=False)
+
+
+def stochastic_round_quantize_mxfp8_by_block(x, pad_32x, seed=0):
+    def roundup(x_val, multiple):
+        return ((x_val + multiple - 1) // multiple) * multiple
+
+    if pad_32x:
+        rows, cols = x.shape
+        padded_rows = roundup(rows, 32)
+        padded_cols = roundup(cols, 32)
+        if padded_rows != rows or padded_cols != cols:
+            x = torch.nn.functional.pad(x, (0, padded_cols - cols, 0, padded_rows - rows))
+
+    F8_E4M3_MAX = 448.0
+    E8M0_BIAS = 127
+    BLOCK_SIZE = 32
+
+    rows, cols = x.shape
+    x_blocked = x.reshape(rows, -1, BLOCK_SIZE)
+    max_abs = torch.amax(torch.abs(x_blocked), dim=-1)
+
+    # E8M0 block scales (power-of-2 exponents)
+    scale_needed = torch.clamp(max_abs.float() / F8_E4M3_MAX, min=2**(-127))
+    exp_biased = torch.clamp(torch.ceil(torch.log2(scale_needed)).to(torch.int32) + E8M0_BIAS, 0, 254)
+    block_scales_e8m0 = exp_biased.to(torch.uint8)
+
+    zero_mask = (max_abs == 0)
+    block_scales_f32 = (block_scales_e8m0.to(torch.int32) << 23).view(torch.float32)
+    block_scales_f32 = torch.where(zero_mask, torch.ones_like(block_scales_f32), block_scales_f32)
+
+    # Scale per-block then stochastic round
+    data_scaled = (x_blocked.float() / block_scales_f32.unsqueeze(-1)).reshape(rows, cols)
+    output_fp8 = stochastic_rounding(data_scaled, torch.float8_e4m3fn, seed=seed)
+
+    block_scales_e8m0 = torch.where(zero_mask, torch.zeros_like(block_scales_e8m0), block_scales_e8m0)
+    return output_fp8, to_blocked(block_scales_e8m0, flatten=False).view(torch.float8_e8m0fnu)
--- a/comfy/hooks.py
+++ b/comfy/hooks.py
@@ -14,9 +14,6 @@ if TYPE_CHECKING:
 import comfy.lora
 import comfy.model_management
 import comfy.patcher_extension
-from comfy.cli_args import args
-import uuid
-import os
 from node_helpers import conditioning_set_values

 # #######################################################################################################
@@ -64,37 +61,8 @@ class EnumHookScope(enum.Enum):
    HookedOnly = "hooked_only"


-_ISOLATION_HOOKREF_MODE = args.use_process_isolation or os.environ.get("PYISOLATE_CHILD") == "1"
-
-
 class _HookRef:
-    def __init__(self):
-        if _ISOLATION_HOOKREF_MODE:
-            self._pyisolate_id = str(uuid.uuid4())
-
-    def _ensure_pyisolate_id(self):
-        pyisolate_id = getattr(self, "_pyisolate_id", None)
-        if pyisolate_id is None:
-            pyisolate_id = str(uuid.uuid4())
-            self._pyisolate_id = pyisolate_id
-        return pyisolate_id
-
-    def __eq__(self, other):
-        if not _ISOLATION_HOOKREF_MODE:
-            return self is other
-        if not isinstance(other, _HookRef):
-            return False
-        return self._ensure_pyisolate_id() == other._ensure_pyisolate_id()
-
-    def __hash__(self):
-        if not _ISOLATION_HOOKREF_MODE:
-            return id(self)
-        return hash(self._ensure_pyisolate_id())
-
-    def __str__(self):
-        if not _ISOLATION_HOOKREF_MODE:
-            return super().__str__()
-        return f"PYISOLATE_HOOKREF:{self._ensure_pyisolate_id()}"
+    pass


 def default_should_register(hook: Hook, model: ModelPatcher, model_options: dict, target_dict: dict[str], registered: HookGroup):
@@ -200,8 +168,6 @@ class WeightHook(Hook):
                key_map = comfy.lora.model_lora_keys_clip(model.model, key_map)
            else:
                key_map = comfy.lora.model_lora_keys_unet(model.model, key_map)
-            if self.weights is None:
-                self.weights = {}
            weights = comfy.lora.load_lora(self.weights, key_map, log_missing=False)
        else:
            if target == EnumWeightTarget.Clip:
--- a/comfy/isolation/init.py
+++ b/comfy/isolation/init.py
@@ -1,436 +0,0 @@
-# pylint: disable=consider-using-from-import,cyclic-import,global-statement,global-variable-not-assigned,import-outside-toplevel,logging-fstring-interpolation
-from __future__ import annotations
-import asyncio
-import inspect
-import logging
-import os
-import time
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Dict, List, Optional, Set, TYPE_CHECKING
-_IMPORT_TORCH = os.environ.get("PYISOLATE_IMPORT_TORCH", "1") == "1"
-
-load_isolated_node = None
-find_manifest_directories = None
-build_stub_class = None
-get_class_types_for_extension = None
-scan_shm_forensics = None
-start_shm_forensics = None
-
-if _IMPORT_TORCH:
-    import folder_paths
-    from .extension_loader import load_isolated_node
-    from .manifest_loader import find_manifest_directories
-    from .runtime_helpers import build_stub_class, get_class_types_for_extension
-    from .shm_forensics import scan_shm_forensics, start_shm_forensics
-
-if TYPE_CHECKING:
-    from pyisolate import ExtensionManager
-    from .extension_wrapper import ComfyNodeExtension
-
-LOG_PREFIX = "]["
-isolated_node_timings: List[tuple[float, Path, int]] = []
-
-if _IMPORT_TORCH:
-    PYISOLATE_VENV_ROOT = Path(folder_paths.base_path) / ".pyisolate_venvs"
-    PYISOLATE_VENV_ROOT.mkdir(parents=True, exist_ok=True)
-
-logger = logging.getLogger(__name__)
-_WORKFLOW_BOUNDARY_MIN_FREE_VRAM_BYTES = 2 * 1024 * 1024 * 1024
-_MODEL_PATCHER_IDLE_TIMEOUT_MS = 120000
-
-
-def initialize_proxies() -> None:
-    from .child_hooks import is_child_process
-
-    is_child = is_child_process()
-
-    if is_child:
-        from .child_hooks import initialize_child_process
-
-        initialize_child_process()
-    else:
-        from .host_hooks import initialize_host_process
-
-        initialize_host_process()
-        if start_shm_forensics is not None:
-            start_shm_forensics()
-
-
-@dataclass(frozen=True)
-class IsolatedNodeSpec:
-    node_name: str
-    display_name: str
-    stub_class: type
-    module_path: Path
-
-
-_ISOLATED_NODE_SPECS: List[IsolatedNodeSpec] = []
-_CLAIMED_PATHS: Set[Path] = set()
-_ISOLATION_SCAN_ATTEMPTED = False
-_EXTENSION_MANAGERS: List["ExtensionManager"] = []
-_RUNNING_EXTENSIONS: Dict[str, "ComfyNodeExtension"] = {}
-_ISOLATION_BACKGROUND_TASK: Optional["asyncio.Task[List[IsolatedNodeSpec]]"] = None
-_EARLY_START_TIME: Optional[float] = None
-
-
-def start_isolation_loading_early(loop: "asyncio.AbstractEventLoop") -> None:
-    global _ISOLATION_BACKGROUND_TASK, _EARLY_START_TIME
-    if _ISOLATION_BACKGROUND_TASK is not None:
-        return
-    _EARLY_START_TIME = time.perf_counter()
-    _ISOLATION_BACKGROUND_TASK = loop.create_task(initialize_isolation_nodes())
-
-
-async def await_isolation_loading() -> List[IsolatedNodeSpec]:
-    global _ISOLATION_BACKGROUND_TASK, _EARLY_START_TIME
-    if _ISOLATION_BACKGROUND_TASK is not None:
-        specs = await _ISOLATION_BACKGROUND_TASK
-        return specs
-    return await initialize_isolation_nodes()
-
-
-async def initialize_isolation_nodes() -> List[IsolatedNodeSpec]:
-    global _ISOLATED_NODE_SPECS, _ISOLATION_SCAN_ATTEMPTED, _CLAIMED_PATHS
-
-    if _ISOLATED_NODE_SPECS:
-        return _ISOLATED_NODE_SPECS
-
-    if _ISOLATION_SCAN_ATTEMPTED:
-        return []
-
-    _ISOLATION_SCAN_ATTEMPTED = True
-    if find_manifest_directories is None or load_isolated_node is None or build_stub_class is None:
-        return []
-    manifest_entries = find_manifest_directories()
-    _CLAIMED_PATHS = {entry[0].resolve() for entry in manifest_entries}
-
-    if not manifest_entries:
-        return []
-
-    os.environ["PYISOLATE_ISOLATION_ACTIVE"] = "1"
-    concurrency_limit = max(1, (os.cpu_count() or 4) // 2)
-    semaphore = asyncio.Semaphore(concurrency_limit)
-
-    async def load_with_semaphore(
-        node_dir: Path, manifest: Path
-    ) -> List[IsolatedNodeSpec]:
-        async with semaphore:
-            load_start = time.perf_counter()
-            spec_list = await load_isolated_node(
-                node_dir,
-                manifest,
-                logger,
-                lambda name, info, extension: build_stub_class(
-                    name,
-                    info,
-                    extension,
-                    _RUNNING_EXTENSIONS,
-                    logger,
-                ),
-                PYISOLATE_VENV_ROOT,
-                _EXTENSION_MANAGERS,
-            )
-            spec_list = [
-                IsolatedNodeSpec(
-                    node_name=node_name,
-                    display_name=display_name,
-                    stub_class=stub_cls,
-                    module_path=node_dir,
-                )
-                for node_name, display_name, stub_cls in spec_list
-            ]
-            isolated_node_timings.append(
-                (time.perf_counter() - load_start, node_dir, len(spec_list))
-            )
-            return spec_list
-
-    tasks = [
-        load_with_semaphore(node_dir, manifest)
-        for node_dir, manifest in manifest_entries
-    ]
-    results = await asyncio.gather(*tasks, return_exceptions=True)
-
-    specs: List[IsolatedNodeSpec] = []
-    for result in results:
-        if isinstance(result, Exception):
-            logger.error(
-                "%s Isolated node failed during startup; continuing: %s",
-                LOG_PREFIX,
-                result,
-            )
-            continue
-        specs.extend(result)
-
-    _ISOLATED_NODE_SPECS = specs
-    return list(_ISOLATED_NODE_SPECS)
-
-
-def _get_class_types_for_extension(extension_name: str) -> Set[str]:
-    """Get all node class types (node names) belonging to an extension."""
-    extension = _RUNNING_EXTENSIONS.get(extension_name)
-    if not extension:
-        return set()
-
-    ext_path = Path(extension.module_path)
-    class_types = set()
-    for spec in _ISOLATED_NODE_SPECS:
-        if spec.module_path.resolve() == ext_path.resolve():
-            class_types.add(spec.node_name)
-
-    return class_types
-
-
-async def notify_execution_graph(needed_class_types: Set[str], caches: list | None = None) -> None:
-    """Evict running extensions not needed for current execution.
-
-    When *caches* is provided, cache entries for evicted extensions' node
-    class_types are invalidated to prevent stale ``RemoteObjectHandle``
-    references from surviving in the output cache.
-    """
-    await wait_for_model_patcher_quiescence(
-        timeout_ms=_MODEL_PATCHER_IDLE_TIMEOUT_MS,
-        fail_loud=True,
-        marker="ISO:notify_graph_wait_idle",
-    )
-
-    evicted_class_types: Set[str] = set()
-
-    async def _stop_extension(
-        ext_name: str, extension: "ComfyNodeExtension", reason: str
-    ) -> None:
-        # Collect class_types BEFORE stopping so we can invalidate cache entries.
-        ext_class_types = _get_class_types_for_extension(ext_name)
-        evicted_class_types.update(ext_class_types)
-        logger.info("%s ISO:eject_start ext=%s reason=%s", LOG_PREFIX, ext_name, reason)
-        logger.debug("%s ISO:stop_start ext=%s", LOG_PREFIX, ext_name)
-        stop_result = extension.stop()
-        if inspect.isawaitable(stop_result):
-            await stop_result
-        _RUNNING_EXTENSIONS.pop(ext_name, None)
-        logger.debug("%s ISO:stop_done ext=%s", LOG_PREFIX, ext_name)
-        if scan_shm_forensics is not None:
-            scan_shm_forensics("ISO:stop_extension", refresh_model_context=True)
-
-    if scan_shm_forensics is not None:
-        scan_shm_forensics("ISO:notify_graph_start", refresh_model_context=True)
-    isolated_class_types_in_graph = needed_class_types.intersection(
-        {spec.node_name for spec in _ISOLATED_NODE_SPECS}
-    )
-    graph_uses_isolation = bool(isolated_class_types_in_graph)
-    logger.debug(
-        "%s ISO:notify_graph_start running=%d needed=%d",
-        LOG_PREFIX,
-        len(_RUNNING_EXTENSIONS),
-        len(needed_class_types),
-    )
-    if graph_uses_isolation:
-        for ext_name, extension in list(_RUNNING_EXTENSIONS.items()):
-            ext_class_types = _get_class_types_for_extension(ext_name)
-
-            # If NONE of this extension's nodes are in the execution graph -> evict.
-            if not ext_class_types.intersection(needed_class_types):
-                await _stop_extension(
-                    ext_name,
-                    extension,
-                    "isolated custom_node not in execution graph, evicting",
-                )
-    else:
-        logger.debug(
-            "%s ISO:notify_graph_skip_evict running=%d reason=no isolated nodes in graph",
-            LOG_PREFIX,
-            len(_RUNNING_EXTENSIONS),
-        )
-
-    # Isolated child processes add steady VRAM pressure; reclaim host-side models
-    # at workflow boundaries so subsequent host nodes (e.g. CLIP encode) keep headroom.
-    try:
-        import comfy.model_management as model_management
-
-        device = model_management.get_torch_device()
-        if getattr(device, "type", None) == "cuda":
-            required = max(
-                model_management.minimum_inference_memory(),
-                _WORKFLOW_BOUNDARY_MIN_FREE_VRAM_BYTES,
-            )
-            free_before = model_management.get_free_memory(device)
-            if free_before < required and _RUNNING_EXTENSIONS and graph_uses_isolation:
-                for ext_name, extension in list(_RUNNING_EXTENSIONS.items()):
-                    await _stop_extension(
-                        ext_name,
-                        extension,
-                        f"boundary low-vram restart (free={int(free_before)} target={int(required)})",
-                    )
-            if model_management.get_free_memory(device) < required:
-                model_management.unload_all_models()
-            model_management.cleanup_models_gc()
-            model_management.cleanup_models()
-            if model_management.get_free_memory(device) < required:
-                model_management.free_memory(required, device, for_dynamic=False)
-                model_management.soft_empty_cache()
-    except Exception:
-        logger.debug(
-            "%s workflow-boundary host VRAM relief failed", LOG_PREFIX, exc_info=True
-        )
-    finally:
-        # Invalidate cached outputs for evicted extensions so stale
-        # RemoteObjectHandle references are not served from cache.
-        if evicted_class_types and caches:
-            total_invalidated = 0
-            for cache in caches:
-                if hasattr(cache, "invalidate_by_class_types"):
-                    total_invalidated += cache.invalidate_by_class_types(
-                        evicted_class_types
-                    )
-            if total_invalidated > 0:
-                logger.info(
-                    "%s ISO:cache_invalidated count=%d class_types=%s",
-                    LOG_PREFIX,
-                    total_invalidated,
-                    evicted_class_types,
-                )
-        scan_shm_forensics("ISO:notify_graph_done", refresh_model_context=True)
-        logger.debug(
-            "%s ISO:notify_graph_done running=%d", LOG_PREFIX, len(_RUNNING_EXTENSIONS)
-        )
-
-
-async def flush_running_extensions_transport_state() -> int:
-    await wait_for_model_patcher_quiescence(
-        timeout_ms=_MODEL_PATCHER_IDLE_TIMEOUT_MS,
-        fail_loud=True,
-        marker="ISO:flush_transport_wait_idle",
-    )
-    total_flushed = 0
-    for ext_name, extension in list(_RUNNING_EXTENSIONS.items()):
-        flush_fn = getattr(extension, "flush_transport_state", None)
-        if not callable(flush_fn):
-            continue
-        try:
-            flushed = await flush_fn()
-            if isinstance(flushed, int):
-                total_flushed += flushed
-                if flushed > 0:
-                    logger.debug(
-                        "%s %s workflow-end flush released=%d",
-                        LOG_PREFIX,
-                        ext_name,
-                        flushed,
-                    )
-        except Exception:
-            logger.debug(
-                "%s %s workflow-end flush failed", LOG_PREFIX, ext_name, exc_info=True
-            )
-    scan_shm_forensics(
-        "ISO:flush_running_extensions_transport_state", refresh_model_context=True
-    )
-    return total_flushed
-
-
-async def wait_for_model_patcher_quiescence(
-    timeout_ms: int = _MODEL_PATCHER_IDLE_TIMEOUT_MS,
-    *,
-    fail_loud: bool = False,
-    marker: str = "ISO:wait_model_patcher_idle",
-) -> bool:
-    try:
-        from comfy.isolation.model_patcher_proxy_registry import ModelPatcherRegistry
-
-        registry = ModelPatcherRegistry()
-        start = time.perf_counter()
-        idle = await registry.wait_all_idle(timeout_ms)
-        elapsed_ms = (time.perf_counter() - start) * 1000.0
-        if idle:
-            logger.debug(
-                "%s %s idle=1 timeout_ms=%d elapsed_ms=%.3f",
-                LOG_PREFIX,
-                marker,
-                timeout_ms,
-                elapsed_ms,
-            )
-            return True
-
-        states = await registry.get_all_operation_states()
-        logger.error(
-            "%s %s idle_timeout timeout_ms=%d elapsed_ms=%.3f states=%s",
-            LOG_PREFIX,
-            marker,
-            timeout_ms,
-            elapsed_ms,
-            states,
-        )
-        if fail_loud:
-            raise TimeoutError(
-                f"ModelPatcherRegistry did not quiesce within {timeout_ms} ms"
-            )
-        return False
-    except Exception:
-        if fail_loud:
-            raise
-        logger.debug("%s %s failed", LOG_PREFIX, marker, exc_info=True)
-        return False
-
-
-def get_claimed_paths() -> Set[Path]:
-    return _CLAIMED_PATHS
-
-
-def update_rpc_event_loops(loop: "asyncio.AbstractEventLoop | None" = None) -> None:
-    """Update all active RPC instances with the current event loop.
-
-    This MUST be called at the start of each workflow execution to ensure
-    RPC calls are scheduled on the correct event loop. This handles the case
-    where asyncio.run() creates a new event loop for each workflow.
-
-    Args:
-        loop: The event loop to use. If None, uses asyncio.get_running_loop().
-    """
-    if loop is None:
-        try:
-            loop = asyncio.get_running_loop()
-        except RuntimeError:
-            loop = asyncio.get_event_loop()
-
-    update_count = 0
-
-    # Update RPCs from ExtensionManagers
-    for manager in _EXTENSION_MANAGERS:
-        if not hasattr(manager, "extensions"):
-            continue
-        for name, extension in manager.extensions.items():
-            if hasattr(extension, "rpc") and extension.rpc is not None:
-                if hasattr(extension.rpc, "update_event_loop"):
-                    extension.rpc.update_event_loop(loop)
-                    update_count += 1
-                    logger.debug(f"{LOG_PREFIX}Updated loop on extension '{name}'")
-
-    # Also update RPCs from running extensions (they may have direct RPC refs)
-    for name, extension in _RUNNING_EXTENSIONS.items():
-        if hasattr(extension, "rpc") and extension.rpc is not None:
-            if hasattr(extension.rpc, "update_event_loop"):
-                extension.rpc.update_event_loop(loop)
-                update_count += 1
-                logger.debug(f"{LOG_PREFIX}Updated loop on running extension '{name}'")
-
-    if update_count > 0:
-        logger.debug(f"{LOG_PREFIX}Updated event loop on {update_count} RPC instances")
-    else:
-        logger.debug(
-            f"{LOG_PREFIX}No RPC instances found to update (managers={len(_EXTENSION_MANAGERS)}, running={len(_RUNNING_EXTENSIONS)})"
-        )
-
-
-__all__ = [
-    "LOG_PREFIX",
-    "initialize_proxies",
-    "initialize_isolation_nodes",
-    "start_isolation_loading_early",
-    "await_isolation_loading",
-    "notify_execution_graph",
-    "flush_running_extensions_transport_state",
-    "wait_for_model_patcher_quiescence",
-    "get_claimed_paths",
-    "update_rpc_event_loops",
-    "IsolatedNodeSpec",
-    "get_class_types_for_extension",
-]
--- a/comfy/isolation/adapter.py
+++ b/comfy/isolation/adapter.py
@@ -1,965 +0,0 @@
-# pylint: disable=import-outside-toplevel,logging-fstring-interpolation,protected-access,raise-missing-from,useless-return,wrong-import-position
-from __future__ import annotations
-
-import logging
-import os
-import inspect
-from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional, cast
-
-from pyisolate.interfaces import IsolationAdapter, SerializerRegistryProtocol  # type: ignore[import-untyped]
-from pyisolate._internal.rpc_protocol import AsyncRPC, ProxiedSingleton  # type: ignore[import-untyped]
-
-_IMPORT_TORCH = os.environ.get("PYISOLATE_IMPORT_TORCH", "1") == "1"
-
-# Singleton proxies that do NOT transitively import torch/PIL/psutil/aiohttp.
-# Safe to import in sealed workers without host framework modules.
-from comfy.isolation.proxies.folder_paths_proxy import FolderPathsProxy
-from comfy.isolation.proxies.helper_proxies import HelperProxiesService
-from comfy.isolation.proxies.web_directory_proxy import WebDirectoryProxy
-
-# Singleton proxies that transitively import torch, PIL, or heavy host modules.
-# Only available when torch/host framework is present.
-CLIPProxy = None
-CLIPRegistry = None
-ModelPatcherProxy = None
-ModelPatcherRegistry = None
-ModelSamplingProxy = None
-ModelSamplingRegistry = None
-VAEProxy = None
-VAERegistry = None
-FirstStageModelRegistry = None
-ModelManagementProxy = None
-PromptServerService = None
-ProgressProxy = None
-UtilsProxy = None
-_HAS_TORCH_PROXIES = False
-if _IMPORT_TORCH:
-    from comfy.isolation.clip_proxy import CLIPProxy, CLIPRegistry
-    from comfy.isolation.model_patcher_proxy import (
-        ModelPatcherProxy,
-        ModelPatcherRegistry,
-    )
-    from comfy.isolation.model_sampling_proxy import (
-        ModelSamplingProxy,
-        ModelSamplingRegistry,
-    )
-    from comfy.isolation.vae_proxy import VAEProxy, VAERegistry, FirstStageModelRegistry
-    from comfy.isolation.proxies.model_management_proxy import ModelManagementProxy
-    from comfy.isolation.proxies.prompt_server_impl import PromptServerService
-    from comfy.isolation.proxies.progress_proxy import ProgressProxy
-    from comfy.isolation.proxies.utils_proxy import UtilsProxy
-    _HAS_TORCH_PROXIES = True
-
-logger = logging.getLogger(__name__)
-
-# Force /dev/shm for shared memory (bwrap makes /tmp private)
-import tempfile
-
-if os.path.exists("/dev/shm"):
-    # Only override if not already set or if default is not /dev/shm
-    current_tmp = tempfile.gettempdir()
-    if not current_tmp.startswith("/dev/shm"):
-        logger.debug(
-            f"Configuring shared memory: Changing TMPDIR from {current_tmp} to /dev/shm"
-        )
-        os.environ["TMPDIR"] = "/dev/shm"
-        tempfile.tempdir = None  # Clear cache to force re-evaluation
-
-
-class ComfyUIAdapter(IsolationAdapter):
-    # ComfyUI-specific IsolationAdapter implementation
-
-    @property
-    def identifier(self) -> str:
-        return "comfyui"
-
-    def get_path_config(self, module_path: str) -> Optional[Dict[str, Any]]:
-        if "ComfyUI" in module_path and "custom_nodes" in module_path:
-            parts = module_path.split("ComfyUI")
-            if len(parts) > 1:
-                comfy_root = parts[0] + "ComfyUI"
-                return {
-                    "preferred_root": comfy_root,
-                    "additional_paths": [
-                        os.path.join(comfy_root, "custom_nodes"),
-                        os.path.join(comfy_root, "comfy"),
-                    ],
-                    "filtered_subdirs": ["comfy", "app", "comfy_execution", "utils"],
-                }
-        return None
-
-    def get_sandbox_system_paths(self) -> Optional[List[str]]:
-        """Returns required application paths to mount in the sandbox."""
-        # By inspecting where our adapter is loaded from, we can determine the comfy root
-        adapter_file = inspect.getfile(self.__class__)
-        # adapter_file = /home/johnj/ComfyUI/comfy/isolation/adapter.py
-        comfy_root = os.path.dirname(os.path.dirname(os.path.dirname(adapter_file)))
-        if os.path.exists(comfy_root):
-            return [comfy_root]
-        return None
-
-    def setup_child_environment(self, snapshot: Dict[str, Any]) -> None:
-        comfy_root = snapshot.get("preferred_root")
-        if not comfy_root:
-            return
-
-        requirements_path = Path(comfy_root) / "requirements.txt"
-        if requirements_path.exists():
-            import re
-
-            for line in requirements_path.read_text().splitlines():
-                line = line.strip()
-                if not line or line.startswith("#"):
-                    continue
-                pkg_name = re.split(r"[<>=!~\[]", line)[0].strip()
-                if pkg_name:
-                    logging.getLogger(pkg_name).setLevel(logging.ERROR)
-
-    def register_serializers(self, registry: SerializerRegistryProtocol) -> None:
-        if not _IMPORT_TORCH:
-            # Sealed worker without torch — register torch-free TensorValue handler
-            # so IMAGE/MASK/LATENT tensors arrive as numpy arrays, not raw dicts.
-            import numpy as np
-
-            _TORCH_DTYPE_TO_NUMPY = {
-                "torch.float32": np.float32,
-                "torch.float64": np.float64,
-                "torch.float16": np.float16,
-                "torch.bfloat16": np.float32,  # numpy has no bfloat16; upcast
-                "torch.int32": np.int32,
-                "torch.int64": np.int64,
-                "torch.int16": np.int16,
-                "torch.int8": np.int8,
-                "torch.uint8": np.uint8,
-                "torch.bool": np.bool_,
-            }
-
-            def _deserialize_tensor_value(data: Dict[str, Any]) -> Any:
-                dtype_str = data["dtype"]
-                np_dtype = _TORCH_DTYPE_TO_NUMPY.get(dtype_str, np.float32)
-                shape = tuple(data["tensor_size"])
-                arr = np.array(data["data"], dtype=np_dtype).reshape(shape)
-                return arr
-
-            _NUMPY_TO_TORCH_DTYPE = {
-                np.float32: "torch.float32",
-                np.float64: "torch.float64",
-                np.float16: "torch.float16",
-                np.int32: "torch.int32",
-                np.int64: "torch.int64",
-                np.int16: "torch.int16",
-                np.int8: "torch.int8",
-                np.uint8: "torch.uint8",
-                np.bool_: "torch.bool",
-            }
-
-            def _serialize_tensor_value(obj: Any) -> Dict[str, Any]:
-                arr = np.asarray(obj, dtype=np.float32) if obj.dtype not in _NUMPY_TO_TORCH_DTYPE else np.asarray(obj)
-                dtype_str = _NUMPY_TO_TORCH_DTYPE.get(arr.dtype.type, "torch.float32")
-                return {
-                    "__type__": "TensorValue",
-                    "dtype": dtype_str,
-                    "tensor_size": list(arr.shape),
-                    "requires_grad": False,
-                    "data": arr.tolist(),
-                }
-
-            registry.register("TensorValue", _serialize_tensor_value, _deserialize_tensor_value, data_type=True)
-            # ndarray output from sealed workers serializes as TensorValue for host torch reconstruction
-            registry.register("ndarray", _serialize_tensor_value, _deserialize_tensor_value, data_type=True)
-            return
-
-        import torch
-
-        def serialize_device(obj: Any) -> Dict[str, Any]:
-            return {"__type__": "device", "device_str": str(obj)}
-
-        def deserialize_device(data: Dict[str, Any]) -> Any:
-            return torch.device(data["device_str"])
-
-        registry.register("device", serialize_device, deserialize_device)
-
-        _VALID_DTYPES = {
-            "float16", "float32", "float64", "bfloat16",
-            "int8", "int16", "int32", "int64",
-            "uint8", "bool",
-        }
-
-        def serialize_dtype(obj: Any) -> Dict[str, Any]:
-            return {"__type__": "dtype", "dtype_str": str(obj)}
-
-        def deserialize_dtype(data: Dict[str, Any]) -> Any:
-            dtype_name = data["dtype_str"].replace("torch.", "")
-            if dtype_name not in _VALID_DTYPES:
-                raise ValueError(f"Invalid dtype: {data['dtype_str']}")
-            return getattr(torch, dtype_name)
-
-        registry.register("dtype", serialize_dtype, deserialize_dtype)
-
-        from comfy_api.latest._io import FolderType
-        from comfy_api.latest._ui import SavedImages, SavedResult
-
-        def serialize_saved_result(obj: Any) -> Dict[str, Any]:
-            return {
-                "__type__": "SavedResult",
-                "filename": obj.filename,
-                "subfolder": obj.subfolder,
-                "folder_type": obj.type.value,
-            }
-
-        def deserialize_saved_result(data: Dict[str, Any]) -> Any:
-            if isinstance(data, SavedResult):
-                return data
-            folder_type = data["folder_type"] if "folder_type" in data else data["type"]
-            return SavedResult(
-                filename=data["filename"],
-                subfolder=data["subfolder"],
-                type=FolderType(folder_type),
-            )
-
-        registry.register(
-            "SavedResult",
-            serialize_saved_result,
-            deserialize_saved_result,
-            data_type=True,
-        )
-
-        def serialize_saved_images(obj: Any) -> Dict[str, Any]:
-            return {
-                "__type__": "SavedImages",
-                "results": [serialize_saved_result(result) for result in obj.results],
-                "is_animated": obj.is_animated,
-            }
-
-        def deserialize_saved_images(data: Dict[str, Any]) -> Any:
-            return SavedImages(
-                results=[deserialize_saved_result(result) for result in data["results"]],
-                is_animated=data.get("is_animated", False),
-            )
-
-        registry.register(
-            "SavedImages",
-            serialize_saved_images,
-            deserialize_saved_images,
-            data_type=True,
-        )
-
-        def serialize_model_patcher(obj: Any) -> Dict[str, Any]:
-            # Child-side: must already have _instance_id (proxy)
-            if os.environ.get("PYISOLATE_CHILD") == "1":
-                if hasattr(obj, "_instance_id"):
-                    return {"__type__": "ModelPatcherRef", "model_id": obj._instance_id}
-                raise RuntimeError(
-                    f"ModelPatcher in child lacks _instance_id: "
-                    f"{type(obj).__module__}.{type(obj).__name__}"
-                )
-            # Host-side: register with registry
-            if hasattr(obj, "_instance_id"):
-                return {"__type__": "ModelPatcherRef", "model_id": obj._instance_id}
-            model_id = ModelPatcherRegistry().register(obj)
-            return {"__type__": "ModelPatcherRef", "model_id": model_id}
-
-        def deserialize_model_patcher(data: Any) -> Any:
-            """Deserialize ModelPatcher refs; pass through already-materialized objects."""
-            if isinstance(data, dict):
-                return ModelPatcherProxy(
-                    data["model_id"], registry=None, manage_lifecycle=False
-                )
-            return data
-
-        def deserialize_model_patcher_ref(data: Dict[str, Any]) -> Any:
-            """Context-aware ModelPatcherRef deserializer for both host and child."""
-            is_child = os.environ.get("PYISOLATE_CHILD") == "1"
-            if is_child:
-                return ModelPatcherProxy(
-                    data["model_id"], registry=None, manage_lifecycle=False
-                )
-            else:
-                return ModelPatcherRegistry()._get_instance(data["model_id"])
-
-        # Register ModelPatcher type for serialization
-        registry.register(
-            "ModelPatcher", serialize_model_patcher, deserialize_model_patcher
-        )
-        # Register ModelPatcherProxy type (already a proxy, just return ref)
-        registry.register(
-            "ModelPatcherProxy", serialize_model_patcher, deserialize_model_patcher
-        )
-        # Register ModelPatcherRef for deserialization (context-aware: host or child)
-        registry.register("ModelPatcherRef", None, deserialize_model_patcher_ref)
-
-        def serialize_clip(obj: Any) -> Dict[str, Any]:
-            if hasattr(obj, "_instance_id"):
-                return {"__type__": "CLIPRef", "clip_id": obj._instance_id}
-            clip_id = CLIPRegistry().register(obj)
-            return {"__type__": "CLIPRef", "clip_id": clip_id}
-
-        def deserialize_clip(data: Any) -> Any:
-            if isinstance(data, dict):
-                return CLIPProxy(data["clip_id"], registry=None, manage_lifecycle=False)
-            return data
-
-        def deserialize_clip_ref(data: Dict[str, Any]) -> Any:
-            """Context-aware CLIPRef deserializer for both host and child."""
-            is_child = os.environ.get("PYISOLATE_CHILD") == "1"
-            if is_child:
-                return CLIPProxy(data["clip_id"], registry=None, manage_lifecycle=False)
-            else:
-                return CLIPRegistry()._get_instance(data["clip_id"])
-
-        # Register CLIP type for serialization
-        registry.register("CLIP", serialize_clip, deserialize_clip)
-        # Register CLIPProxy type (already a proxy, just return ref)
-        registry.register("CLIPProxy", serialize_clip, deserialize_clip)
-        # Register CLIPRef for deserialization (context-aware: host or child)
-        registry.register("CLIPRef", None, deserialize_clip_ref)
-
-        def serialize_vae(obj: Any) -> Dict[str, Any]:
-            if hasattr(obj, "_instance_id"):
-                return {"__type__": "VAERef", "vae_id": obj._instance_id}
-            vae_id = VAERegistry().register(obj)
-            return {"__type__": "VAERef", "vae_id": vae_id}
-
-        def deserialize_vae(data: Any) -> Any:
-            if isinstance(data, dict):
-                return VAEProxy(data["vae_id"])
-            return data
-
-        def deserialize_vae_ref(data: Dict[str, Any]) -> Any:
-            """Context-aware VAERef deserializer for both host and child."""
-            is_child = os.environ.get("PYISOLATE_CHILD") == "1"
-            if is_child:
-                # Child: create a proxy
-                return VAEProxy(data["vae_id"])
-            else:
-                # Host: lookup real VAE from registry
-                return VAERegistry()._get_instance(data["vae_id"])
-
-        # Register VAE type for serialization
-        registry.register("VAE", serialize_vae, deserialize_vae)
-        # Register VAEProxy type (already a proxy, just return ref)
-        registry.register("VAEProxy", serialize_vae, deserialize_vae)
-        # Register VAERef for deserialization (context-aware: host or child)
-        registry.register("VAERef", None, deserialize_vae_ref)
-
-        # ModelSampling serialization - handles ModelSampling* types
-        # copyreg removed - no pickle fallback allowed
-
-        def serialize_model_sampling(obj: Any) -> Dict[str, Any]:
-            # Proxy with _instance_id — return ref (works from both host and child)
-            if hasattr(obj, "_instance_id"):
-                return {"__type__": "ModelSamplingRef", "ms_id": obj._instance_id}
-            # Child-side: object created locally in child (e.g. ModelSamplingAdvanced
-            # in nodes_z_image_turbo.py). Serialize as inline data so the host can
-            # reconstruct the real torch.nn.Module.
-            if os.environ.get("PYISOLATE_CHILD") == "1":
-                import base64
-                import io as _io
-
-                # Identify base classes from comfy.model_sampling
-                bases = []
-                for base in type(obj).__mro__:
-                    if base.__module__ == "comfy.model_sampling" and base.__name__ != "object":
-                        bases.append(base.__name__)
-                # Serialize state_dict as base64 safetensors-like
-                sd = obj.state_dict()
-                sd_serialized = {}
-                for k, v in sd.items():
-                    buf = _io.BytesIO()
-                    torch.save(v, buf)
-                    sd_serialized[k] = base64.b64encode(buf.getvalue()).decode("ascii")
-                # Capture plain attrs (shift, multiplier, sigma_data, etc.)
-                plain_attrs = {}
-                for k, v in obj.__dict__.items():
-                    if k.startswith("_"):
-                        continue
-                    if isinstance(v, (bool, int, float, str)):
-                        plain_attrs[k] = v
-                return {
-                    "__type__": "ModelSamplingInline",
-                    "bases": bases,
-                    "state_dict": sd_serialized,
-                    "attrs": plain_attrs,
-                }
-            # Host-side: register with ModelSamplingRegistry and return JSON-safe dict
-            ms_id = ModelSamplingRegistry().register(obj)
-            return {"__type__": "ModelSamplingRef", "ms_id": ms_id}
-
-        def deserialize_model_sampling(data: Any) -> Any:
-            """Deserialize ModelSampling refs or inline data."""
-            if isinstance(data, dict):
-                if data.get("__type__") == "ModelSamplingInline":
-                    return _reconstruct_model_sampling_inline(data)
-                return ModelSamplingProxy(data["ms_id"])
-            return data
-
-        def _reconstruct_model_sampling_inline(data: Dict[str, Any]) -> Any:
-            """Reconstruct a ModelSampling object on the host from inline child data."""
-            import comfy.model_sampling as _ms
-            import base64
-            import io as _io
-
-            # Resolve base classes
-            base_classes = []
-            for name in data["bases"]:
-                cls = getattr(_ms, name, None)
-                if cls is not None:
-                    base_classes.append(cls)
-            if not base_classes:
-                raise RuntimeError(
-                    f"Cannot reconstruct ModelSampling: no known bases in {data['bases']}"
-                )
-            # Create dynamic class matching the child's class hierarchy
-            ReconstructedSampling = type("ReconstructedSampling", tuple(base_classes), {})
-            obj = ReconstructedSampling.__new__(ReconstructedSampling)
-            torch.nn.Module.__init__(obj)
-            # Restore plain attributes first
-            for k, v in data.get("attrs", {}).items():
-                setattr(obj, k, v)
-            # Restore state_dict (buffers like sigmas)
-            for k, v_b64 in data.get("state_dict", {}).items():
-                buf = _io.BytesIO(base64.b64decode(v_b64))
-                tensor = torch.load(buf, weights_only=True)
-                # Register as buffer so it's part of state_dict
-                parts = k.split(".")
-                if len(parts) == 1:
-                    cast(Any, obj).register_buffer(parts[0], tensor)  # pylint: disable=no-member
-                else:
-                    setattr(obj, parts[0], tensor)
-            # Register on host so future references use proxy pattern.
-            # Skip in child process — register() is async RPC and cannot be
-            # called synchronously during deserialization.
-            if os.environ.get("PYISOLATE_CHILD") != "1":
-                ModelSamplingRegistry().register(obj)
-            return obj
-
-        def deserialize_model_sampling_ref(data: Dict[str, Any]) -> Any:
-            """Context-aware ModelSamplingRef deserializer for both host and child."""
-            is_child = os.environ.get("PYISOLATE_CHILD") == "1"
-            if is_child:
-                return ModelSamplingProxy(data["ms_id"])
-            else:
-                return ModelSamplingRegistry()._get_instance(data["ms_id"])
-
-        # Register all ModelSampling* and StableCascadeSampling classes dynamically
-        import comfy.model_sampling
-
-        for ms_cls in vars(comfy.model_sampling).values():
-            if not isinstance(ms_cls, type):
-                continue
-            if not issubclass(ms_cls, torch.nn.Module):
-                continue
-            if not (ms_cls.__name__.startswith("ModelSampling") or ms_cls.__name__ == "StableCascadeSampling"):
-                continue
-            registry.register(
-                ms_cls.__name__,
-                serialize_model_sampling,
-                deserialize_model_sampling,
-            )
-        registry.register(
-            "ModelSamplingProxy", serialize_model_sampling, deserialize_model_sampling
-        )
-        # Register ModelSamplingRef for deserialization (context-aware: host or child)
-        registry.register("ModelSamplingRef", None, deserialize_model_sampling_ref)
-        # Register ModelSamplingInline for deserialization (child→host inline transfer)
-        registry.register(
-            "ModelSamplingInline", None, lambda data: _reconstruct_model_sampling_inline(data)
-        )
-
-        def serialize_cond(obj: Any) -> Dict[str, Any]:
-            type_key = f"{type(obj).__module__}.{type(obj).__name__}"
-            return {
-                "__type__": type_key,
-                "cond": obj.cond,
-            }
-
-        def deserialize_cond(data: Dict[str, Any]) -> Any:
-            import importlib
-
-            type_key = data["__type__"]
-            module_name, class_name = type_key.rsplit(".", 1)
-            module = importlib.import_module(module_name)
-            cls = getattr(module, class_name)
-            return cls(data["cond"])
-
-        def _serialize_public_state(obj: Any) -> Dict[str, Any]:
-            state: Dict[str, Any] = {}
-            for key, value in obj.__dict__.items():
-                if key.startswith("_"):
-                    continue
-                if callable(value):
-                    continue
-                state[key] = value
-            return state
-
-        def serialize_latent_format(obj: Any) -> Dict[str, Any]:
-            type_key = f"{type(obj).__module__}.{type(obj).__name__}"
-            return {
-                "__type__": type_key,
-                "state": _serialize_public_state(obj),
-            }
-
-        def deserialize_latent_format(data: Dict[str, Any]) -> Any:
-            import importlib
-
-            type_key = data["__type__"]
-            module_name, class_name = type_key.rsplit(".", 1)
-            module = importlib.import_module(module_name)
-            cls = getattr(module, class_name)
-            obj = cls()
-            for key, value in data.get("state", {}).items():
-                prop = getattr(type(obj), key, None)
-                if isinstance(prop, property) and prop.fset is None:
-                    continue
-                setattr(obj, key, value)
-            return obj
-
-        import comfy.conds
-
-        for cond_cls in vars(comfy.conds).values():
-            if not isinstance(cond_cls, type):
-                continue
-            if not issubclass(cond_cls, comfy.conds.CONDRegular):
-                continue
-            type_key = f"{cond_cls.__module__}.{cond_cls.__name__}"
-            registry.register(type_key, serialize_cond, deserialize_cond)
-            registry.register(cond_cls.__name__, serialize_cond, deserialize_cond)
-
-        import comfy.latent_formats
-
-        for latent_cls in vars(comfy.latent_formats).values():
-            if not isinstance(latent_cls, type):
-                continue
-            if not issubclass(latent_cls, comfy.latent_formats.LatentFormat):
-                continue
-            type_key = f"{latent_cls.__module__}.{latent_cls.__name__}"
-            registry.register(
-                type_key, serialize_latent_format, deserialize_latent_format
-            )
-            registry.register(
-                latent_cls.__name__, serialize_latent_format, deserialize_latent_format
-            )
-
-        # V3 API: unwrap NodeOutput.args
-        def deserialize_node_output(data: Any) -> Any:
-            return getattr(data, "args", data)
-
-        registry.register("NodeOutput", None, deserialize_node_output)
-
-        # KSAMPLER serializer: stores sampler name instead of function object
-        # sampler_function is a callable which gets filtered out by JSONSocketTransport
-        def serialize_ksampler(obj: Any) -> Dict[str, Any]:
-            func_name = obj.sampler_function.__name__
-            # Map function name back to sampler name
-            if func_name == "sample_unipc":
-                sampler_name = "uni_pc"
-            elif func_name == "sample_unipc_bh2":
-                sampler_name = "uni_pc_bh2"
-            elif func_name == "dpm_fast_function":
-                sampler_name = "dpm_fast"
-            elif func_name == "dpm_adaptive_function":
-                sampler_name = "dpm_adaptive"
-            elif func_name.startswith("sample_"):
-                sampler_name = func_name[7:]  # Remove "sample_" prefix
-            else:
-                sampler_name = func_name
-            return {
-                "__type__": "KSAMPLER",
-                "sampler_name": sampler_name,
-                "extra_options": obj.extra_options,
-                "inpaint_options": obj.inpaint_options,
-            }
-
-        def deserialize_ksampler(data: Dict[str, Any]) -> Any:
-            import comfy.samplers
-
-            return comfy.samplers.ksampler(
-                data["sampler_name"],
-                data.get("extra_options", {}),
-                data.get("inpaint_options", {}),
-            )
-
-        registry.register("KSAMPLER", serialize_ksampler, deserialize_ksampler)
-
-        from comfy.isolation.model_patcher_proxy_utils import register_hooks_serializers
-
-        register_hooks_serializers(registry)
-
-        # Generic Numpy Serializer
-        def serialize_numpy(obj: Any) -> Any:
-            import torch
-
-            try:
-                # Attempt zero-copy conversion to Tensor
-                return torch.from_numpy(obj)
-            except Exception:
-                # Fallback for non-numeric arrays (strings, objects, mixes)
-                return obj.tolist()
-
-        def deserialize_numpy_b64(data: Any) -> Any:
-            """Deserialize base64-encoded ndarray from sealed worker."""
-            import base64
-            import numpy as np
-            if isinstance(data, dict) and "data" in data and "dtype" in data:
-                raw = base64.b64decode(data["data"])
-                arr = np.frombuffer(raw, dtype=np.dtype(data["dtype"])).reshape(data["shape"])
-                return torch.from_numpy(arr.copy())
-            return data
-
-        registry.register("ndarray", serialize_numpy, deserialize_numpy_b64)
-
-        # -- File3D (comfy_api.latest._util.geometry_types) ---------------------
-        # Origin: comfy_api by ComfyOrg (Alexander Piskun), PR #12129
-
-        def serialize_file3d(obj: Any) -> Dict[str, Any]:
-            import base64
-            return {
-                "__type__": "File3D",
-                "format": obj.format,
-                "data": base64.b64encode(obj.get_bytes()).decode("ascii"),
-            }
-
-        def deserialize_file3d(data: Any) -> Any:
-            import base64
-            from io import BytesIO
-            from comfy_api.latest._util.geometry_types import File3D
-            return File3D(BytesIO(base64.b64decode(data["data"])), file_format=data["format"])
-
-        registry.register("File3D", serialize_file3d, deserialize_file3d, data_type=True)
-
-        # -- VIDEO (comfy_api.latest._input_impl.video_types) -------------------
-        # Origin: ComfyAPI Core v0.0.2 by ComfyOrg (guill), PR #8962
-
-        def serialize_video(obj: Any) -> Dict[str, Any]:
-            components = obj.get_components()
-            images = components.images.detach() if components.images.requires_grad else components.images
-            result: Dict[str, Any] = {
-                "__type__": "VIDEO",
-                "images": images,
-                "frame_rate_num": components.frame_rate.numerator,
-                "frame_rate_den": components.frame_rate.denominator,
-            }
-            if components.audio is not None:
-                waveform = components.audio["waveform"]
-                if waveform.requires_grad:
-                    waveform = waveform.detach()
-                result["audio_waveform"] = waveform
-                result["audio_sample_rate"] = components.audio["sample_rate"]
-            if components.metadata is not None:
-                result["metadata"] = components.metadata
-            return result
-
-        def deserialize_video(data: Any) -> Any:
-            from fractions import Fraction
-            from comfy_api.latest._input_impl.video_types import VideoFromComponents
-            from comfy_api.latest._util.video_types import VideoComponents
-            audio = None
-            if "audio_waveform" in data:
-                audio = {"waveform": data["audio_waveform"], "sample_rate": data["audio_sample_rate"]}
-            components = VideoComponents(
-                images=data["images"],
-                frame_rate=Fraction(data["frame_rate_num"], data["frame_rate_den"]),
-                audio=audio,
-                metadata=data.get("metadata"),
-            )
-            return VideoFromComponents(components)
-
-        registry.register("VIDEO", serialize_video, deserialize_video, data_type=True)
-        registry.register("VideoFromFile", serialize_video, deserialize_video, data_type=True)
-        registry.register("VideoFromComponents", serialize_video, deserialize_video, data_type=True)
-
-    def setup_web_directory(self, module: Any) -> None:
-        """Detect WEB_DIRECTORY on a module and populate/register it.
-
-        Called by the sealed worker after loading the node module.
-        Mirrors extension_wrapper.py:216-227 for host-coupled nodes.
-        Does NOT import extension_wrapper.py (it has `import torch` at module level).
-        """
-        import shutil
-
-        web_dir_attr = getattr(module, "WEB_DIRECTORY", None)
-        if web_dir_attr is None:
-            return
-
-        module_dir = os.path.dirname(os.path.abspath(module.__file__))
-        web_dir_path = os.path.abspath(os.path.join(module_dir, web_dir_attr))
-
-        # Read extension name from pyproject.toml
-        ext_name = os.path.basename(module_dir)
-        pyproject = os.path.join(module_dir, "pyproject.toml")
-        if os.path.exists(pyproject):
-            try:
-                import tomllib
-            except ImportError:
-                import tomli as tomllib  # type: ignore[no-redef]
-            try:
-                with open(pyproject, "rb") as f:
-                    data = tomllib.load(f)
-                name = data.get("project", {}).get("name")
-                if name:
-                    ext_name = name
-            except Exception:
-                pass
-
-        # Populate web dir if empty (mirrors _run_prestartup_web_copy)
-        if not (os.path.isdir(web_dir_path) and any(os.scandir(web_dir_path))):
-            os.makedirs(web_dir_path, exist_ok=True)
-
-            # Module-defined copy spec
-            copy_spec = getattr(module, "_PRESTARTUP_WEB_COPY", None)
-            if copy_spec is not None and callable(copy_spec):
-                try:
-                    copy_spec(web_dir_path)
-                except Exception as e:
-                    logger.warning("][ _PRESTARTUP_WEB_COPY failed: %s", e)
-
-            # Fallback: comfy_3d_viewers
-            try:
-                from comfy_3d_viewers import copy_viewer, VIEWER_FILES
-                for viewer in VIEWER_FILES:
-                    try:
-                        copy_viewer(viewer, web_dir_path)
-                    except Exception:
-                        pass
-            except ImportError:
-                pass
-
-            # Fallback: comfy_dynamic_widgets
-            try:
-                from comfy_dynamic_widgets import get_js_path
-                src = os.path.realpath(get_js_path())
-                if os.path.exists(src):
-                    dst_dir = os.path.join(web_dir_path, "js")
-                    os.makedirs(dst_dir, exist_ok=True)
-                    shutil.copy2(src, os.path.join(dst_dir, "dynamic_widgets.js"))
-            except ImportError:
-                pass
-
-        if os.path.isdir(web_dir_path) and any(os.scandir(web_dir_path)):
-            WebDirectoryProxy.register_web_dir(ext_name, web_dir_path)
-            logger.info(
-                "][ Adapter: registered web dir for %s (%d files)",
-                ext_name,
-                sum(1 for _ in Path(web_dir_path).rglob("*") if _.is_file()),
-            )
-
-    @staticmethod
-    def register_host_event_handlers(extension: Any) -> None:
-        """Register host-side event handlers for an isolated extension.
-
-        Wires ``"progress"`` events from the child to ``comfy.utils.PROGRESS_BAR_HOOK``
-        so the ComfyUI frontend receives progress bar updates.
-        """
-        register_event_handler = inspect.getattr_static(
-            extension, "register_event_handler", None
-        )
-        if not callable(register_event_handler):
-            return
-
-        def _host_progress_handler(payload: dict) -> None:
-            import comfy.utils
-
-            hook = comfy.utils.PROGRESS_BAR_HOOK
-            if hook is not None:
-                hook(
-                    payload.get("value", 0),
-                    payload.get("total", 0),
-                    payload.get("preview"),
-                    payload.get("node_id"),
-                )
-
-        extension.register_event_handler("progress", _host_progress_handler)
-
-    def setup_child_event_hooks(self, extension: Any) -> None:
-        """Wire PROGRESS_BAR_HOOK in the child to emit_event on the extension.
-
-        Host-coupled only — sealed workers do not have comfy.utils (torch).
-        """
-        is_child = os.environ.get("PYISOLATE_CHILD") == "1"
-        logger.info("][ ISO:setup_child_event_hooks called, PYISOLATE_CHILD=%s", is_child)
-        if not is_child:
-            return
-
-        if not _IMPORT_TORCH:
-            logger.info("][ ISO:setup_child_event_hooks skipped — sealed worker (no torch)")
-            return
-
-        import comfy.utils
-
-        def _event_progress_hook(value, total, preview=None, node_id=None):
-            logger.debug("][ ISO:event_progress value=%s/%s node_id=%s", value, total, node_id)
-            extension.emit_event("progress", {
-                "value": value,
-                "total": total,
-                "node_id": node_id,
-            })
-
-        comfy.utils.PROGRESS_BAR_HOOK = _event_progress_hook
-        logger.info("][ ISO:PROGRESS_BAR_HOOK wired to event channel")
-
-    def provide_rpc_services(self) -> List[type[ProxiedSingleton]]:
-        # Always available — no torch/PIL dependency
-        services: List[type[ProxiedSingleton]] = [
-            FolderPathsProxy,
-            HelperProxiesService,
-            WebDirectoryProxy,
-        ]
-        # Torch/PIL-dependent proxies
-        if _HAS_TORCH_PROXIES:
-            services.extend([
-                PromptServerService,
-                ModelManagementProxy,
-                UtilsProxy,
-                ProgressProxy,
-                VAERegistry,
-                CLIPRegistry,
-                ModelPatcherRegistry,
-                ModelSamplingRegistry,
-                FirstStageModelRegistry,
-            ])
-        return services
-
-    def handle_api_registration(self, api: ProxiedSingleton, rpc: AsyncRPC) -> None:
-        # Resolve the real name whether it's an instance or the Singleton class itself
-        api_name = api.__name__ if isinstance(api, type) else api.__class__.__name__
-
-        if api_name == "FolderPathsProxy":
-            import folder_paths
-
-            # Replace module-level functions with proxy methods
-            # This is aggressive but necessary for transparent proxying
-            # Handle both instance and class cases
-            instance = api() if isinstance(api, type) else api
-            for name in dir(instance):
-                if not name.startswith("_"):
-                    setattr(folder_paths, name, getattr(instance, name))
-
-            # Fence: isolated children get writable temp inside sandbox
-            if os.environ.get("PYISOLATE_CHILD") == "1":
-                import tempfile
-                _child_temp = os.path.join(tempfile.gettempdir(), "comfyui_temp")
-                os.makedirs(_child_temp, exist_ok=True)
-                folder_paths.temp_directory = _child_temp
-
-            return
-
-        if api_name == "ModelManagementProxy":
-            if _IMPORT_TORCH:
-                import comfy.model_management
-
-                instance = api() if isinstance(api, type) else api
-                # Replace module-level functions with proxy methods
-                for name in dir(instance):
-                    if not name.startswith("_"):
-                        setattr(comfy.model_management, name, getattr(instance, name))
-            return
-
-        if api_name == "UtilsProxy":
-            if not _IMPORT_TORCH:
-                logger.info("][ ISO:UtilsProxy handle_api_registration skipped — sealed worker (no torch)")
-                return
-
-            import comfy.utils
-
-            # Static Injection of RPC mechanism to ensure Child can access it
-            # independent of instance lifecycle.
-            api.set_rpc(rpc)
-
-            is_child = os.environ.get("PYISOLATE_CHILD") == "1"
-            logger.info("][ ISO:UtilsProxy handle_api_registration PYISOLATE_CHILD=%s", is_child)
-
-            # Progress hook wiring moved to setup_child_event_hooks via event channel
-
-            return
-
-        if api_name == "PromptServerProxy":
-            if not _IMPORT_TORCH:
-                return
-            # Defer heavy import to child context
-            import server
-
-            instance = api() if isinstance(api, type) else api
-            proxy = (
-                instance.instance
-            )  # PromptServerProxy instance has .instance property returning self
-
-            original_register_route = proxy.register_route
-
-            def register_route_wrapper(
-                method: str, path: str, handler: Callable[..., Any]
-            ) -> None:
-                callback_id = rpc.register_callback(handler)
-                loop = getattr(rpc, "loop", None)
-                if loop and loop.is_running():
-                    import asyncio
-
-                    asyncio.create_task(
-                        original_register_route(
-                            method, path, handler=callback_id, is_callback=True
-                        )
-                    )
-                else:
-                    original_register_route(
-                        method, path, handler=callback_id, is_callback=True
-                    )
-                return None
-
-            proxy.register_route = register_route_wrapper
-
-            class RouteTableDefProxy:
-                def __init__(self, proxy_instance: Any):
-                    self.proxy = proxy_instance
-
-                def get(
-                    self, path: str, **kwargs: Any
-                ) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
-                    def decorator(handler: Callable[..., Any]) -> Callable[..., Any]:
-                        self.proxy.register_route("GET", path, handler)
-                        return handler
-
-                    return decorator
-
-                def post(
-                    self, path: str, **kwargs: Any
-                ) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
-                    def decorator(handler: Callable[..., Any]) -> Callable[..., Any]:
-                        self.proxy.register_route("POST", path, handler)
-                        return handler
-
-                    return decorator
-
-                def patch(
-                    self, path: str, **kwargs: Any
-                ) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
-                    def decorator(handler: Callable[..., Any]) -> Callable[..., Any]:
-                        self.proxy.register_route("PATCH", path, handler)
-                        return handler
-
-                    return decorator
-
-                def put(
-                    self, path: str, **kwargs: Any
-                ) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
-                    def decorator(handler: Callable[..., Any]) -> Callable[..., Any]:
-                        self.proxy.register_route("PUT", path, handler)
-                        return handler
-
-                    return decorator
-
-                def delete(
-                    self, path: str, **kwargs: Any
-                ) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
-                    def decorator(handler: Callable[..., Any]) -> Callable[..., Any]:
-                        self.proxy.register_route("DELETE", path, handler)
-                        return handler
-
-                    return decorator
-
-            proxy.routes = RouteTableDefProxy(proxy)
-
-            if (
-                hasattr(server, "PromptServer")
-                and getattr(server.PromptServer, "instance", None) != proxy
-            ):
-                server.PromptServer.instance = proxy
--- a/comfy/isolation/child_hooks.py
+++ b/comfy/isolation/child_hooks.py
@@ -1,101 +0,0 @@
-# pylint: disable=import-outside-toplevel,logging-fstring-interpolation
-# Child process initialization for PyIsolate
-import logging
-import os
-
-logger = logging.getLogger(__name__)
-
-
-def is_child_process() -> bool:
-    return os.environ.get("PYISOLATE_CHILD") == "1"
-
-
-def initialize_child_process() -> None:
-    _setup_child_loop_bridge()
-
-    # Manual RPC injection
-    try:
-        from pyisolate._internal.rpc_protocol import get_child_rpc_instance
-
-        rpc = get_child_rpc_instance()
-        if rpc:
-            _setup_proxy_callers(rpc)
-        else:
-            logger.warning("Could not get child RPC instance for manual injection")
-            _setup_proxy_callers()
-    except Exception as e:
-        logger.error(f"Manual RPC Injection failed: {e}")
-        _setup_proxy_callers()
-
-    _setup_logging()
-
-
-def _setup_child_loop_bridge() -> None:
-    import asyncio
-
-    main_loop = None
-    try:
-        main_loop = asyncio.get_running_loop()
-    except RuntimeError:
-        try:
-            main_loop = asyncio.get_event_loop()
-        except RuntimeError:
-            pass
-
-    if main_loop is None:
-        return
-
-    try:
-        from .proxies.base import set_global_loop
-
-        set_global_loop(main_loop)
-    except ImportError:
-        pass
-
-
-def _setup_prompt_server_stub(rpc=None) -> None:
-    try:
-        from .proxies.prompt_server_impl import PromptServerStub
-
-        if rpc:
-            PromptServerStub.set_rpc(rpc)
-        elif hasattr(PromptServerStub, "clear_rpc"):
-            PromptServerStub.clear_rpc()
-        else:
-            PromptServerStub._rpc = None  # type: ignore[attr-defined]
-
-    except Exception as e:
-        logger.error(f"Failed to setup PromptServerStub: {e}")
-
-
-def _setup_proxy_callers(rpc=None) -> None:
-    try:
-        from .proxies.folder_paths_proxy import FolderPathsProxy
-        from .proxies.helper_proxies import HelperProxiesService
-        from .proxies.model_management_proxy import ModelManagementProxy
-        from .proxies.progress_proxy import ProgressProxy
-        from .proxies.prompt_server_impl import PromptServerStub
-        from .proxies.utils_proxy import UtilsProxy
-
-        if rpc is None:
-            FolderPathsProxy.clear_rpc()
-            HelperProxiesService.clear_rpc()
-            ModelManagementProxy.clear_rpc()
-            ProgressProxy.clear_rpc()
-            PromptServerStub.clear_rpc()
-            UtilsProxy.clear_rpc()
-            return
-
-        FolderPathsProxy.set_rpc(rpc)
-        HelperProxiesService.set_rpc(rpc)
-        ModelManagementProxy.set_rpc(rpc)
-        ProgressProxy.set_rpc(rpc)
-        PromptServerStub.set_rpc(rpc)
-        UtilsProxy.set_rpc(rpc)
-
-    except Exception as e:
-        logger.error(f"Failed to setup child singleton proxy callers: {e}")
-
-
-def _setup_logging() -> None:
-    logging.getLogger().setLevel(logging.INFO)
--- a/comfy/isolation/clip_proxy.py
+++ b/comfy/isolation/clip_proxy.py
@@ -1,327 +0,0 @@
-# pylint: disable=attribute-defined-outside-init,import-outside-toplevel,logging-fstring-interpolation
-# CLIP Proxy implementation
-from __future__ import annotations
-
-import logging
-from typing import TYPE_CHECKING, Any, Optional
-
-from comfy.isolation.proxies.base import (
-    IS_CHILD_PROCESS,
-    BaseProxy,
-    BaseRegistry,
-    detach_if_grad,
-)
-
-if TYPE_CHECKING:
-    from comfy.isolation.model_patcher_proxy import ModelPatcherProxy
-
-
-class CondStageModelRegistry(BaseRegistry[Any]):
-    _type_prefix = "cond_stage_model"
-
-    async def get_property(self, instance_id: str, name: str) -> Any:
-        obj = self._get_instance(instance_id)
-        return getattr(obj, name)
-
-
-class CondStageModelProxy(BaseProxy[CondStageModelRegistry]):
-    _registry_class = CondStageModelRegistry
-    __module__ = "comfy.sd"
-
-    def __getattr__(self, name: str) -> Any:
-        try:
-            return self._call_rpc("get_property", name)
-        except Exception as e:
-            raise AttributeError(
-                f"'{self.__class__.__name__}' object has no attribute '{name}'"
-            ) from e
-
-    def __repr__(self) -> str:
-        return f"<CondStageModelProxy {self._instance_id}>"
-
-
-class TokenizerRegistry(BaseRegistry[Any]):
-    _type_prefix = "tokenizer"
-
-    async def get_property(self, instance_id: str, name: str) -> Any:
-        obj = self._get_instance(instance_id)
-        return getattr(obj, name)
-
-
-class TokenizerProxy(BaseProxy[TokenizerRegistry]):
-    _registry_class = TokenizerRegistry
-    __module__ = "comfy.sd"
-
-    def __getattr__(self, name: str) -> Any:
-        try:
-            return self._call_rpc("get_property", name)
-        except Exception as e:
-            raise AttributeError(
-                f"'{self.__class__.__name__}' object has no attribute '{name}'"
-            ) from e
-
-    def __repr__(self) -> str:
-        return f"<TokenizerProxy {self._instance_id}>"
-
-
-logger = logging.getLogger(__name__)
-
-
-class CLIPRegistry(BaseRegistry[Any]):
-    _type_prefix = "clip"
-    _allowed_setters = {
-        "layer_idx",
-        "tokenizer_options",
-        "use_clip_schedule",
-        "apply_hooks_to_conds",
-    }
-
-    async def get_ram_usage(self, instance_id: str) -> int:
-        return self._get_instance(instance_id).get_ram_usage()
-
-    async def get_patcher_id(self, instance_id: str) -> str:
-        from comfy.isolation.model_patcher_proxy import ModelPatcherRegistry
-
-        return ModelPatcherRegistry().register(self._get_instance(instance_id).patcher)
-
-    async def get_cond_stage_model_id(self, instance_id: str) -> str:
-        return CondStageModelRegistry().register(
-            self._get_instance(instance_id).cond_stage_model
-        )
-
-    async def get_tokenizer_id(self, instance_id: str) -> str:
-        return TokenizerRegistry().register(self._get_instance(instance_id).tokenizer)
-
-    async def load_model(self, instance_id: str) -> None:
-        self._get_instance(instance_id).load_model()
-
-    async def clip_layer(self, instance_id: str, layer_idx: int) -> None:
-        self._get_instance(instance_id).clip_layer(layer_idx)
-
-    async def set_tokenizer_option(
-        self, instance_id: str, option_name: str, value: Any
-    ) -> None:
-        self._get_instance(instance_id).set_tokenizer_option(option_name, value)
-
-    async def get_property(self, instance_id: str, name: str) -> Any:
-        return getattr(self._get_instance(instance_id), name)
-
-    async def set_property(self, instance_id: str, name: str, value: Any) -> None:
-        if name not in self._allowed_setters:
-            raise PermissionError(f"Setting '{name}' is not allowed via RPC")
-        setattr(self._get_instance(instance_id), name, value)
-
-    async def tokenize(
-        self, instance_id: str, text: str, return_word_ids: bool = False, **kwargs: Any
-    ) -> Any:
-        return self._get_instance(instance_id).tokenize(
-            text, return_word_ids=return_word_ids, **kwargs
-        )
-
-    async def encode(self, instance_id: str, text: str) -> Any:
-        return detach_if_grad(self._get_instance(instance_id).encode(text))
-
-    async def encode_from_tokens(
-        self,
-        instance_id: str,
-        tokens: Any,
-        return_pooled: bool = False,
-        return_dict: bool = False,
-    ) -> Any:
-        return detach_if_grad(
-            self._get_instance(instance_id).encode_from_tokens(
-                tokens, return_pooled=return_pooled, return_dict=return_dict
-            )
-        )
-
-    async def encode_from_tokens_scheduled(
-        self,
-        instance_id: str,
-        tokens: Any,
-        unprojected: bool = False,
-        add_dict: Optional[dict] = None,
-        show_pbar: bool = True,
-    ) -> Any:
-        add_dict = add_dict or {}
-        return detach_if_grad(
-            self._get_instance(instance_id).encode_from_tokens_scheduled(
-                tokens, unprojected=unprojected, add_dict=add_dict, show_pbar=show_pbar
-            )
-        )
-
-    async def add_patches(
-        self,
-        instance_id: str,
-        patches: Any,
-        strength_patch: float = 1.0,
-        strength_model: float = 1.0,
-    ) -> Any:
-        return self._get_instance(instance_id).add_patches(
-            patches, strength_patch=strength_patch, strength_model=strength_model
-        )
-
-    async def get_key_patches(self, instance_id: str) -> Any:
-        return self._get_instance(instance_id).get_key_patches()
-
-    async def load_sd(
-        self, instance_id: str, sd: dict, full_model: bool = False
-    ) -> Any:
-        return self._get_instance(instance_id).load_sd(sd, full_model=full_model)
-
-    async def get_sd(self, instance_id: str) -> Any:
-        return self._get_instance(instance_id).get_sd()
-
-    async def clone(self, instance_id: str) -> str:
-        return self.register(self._get_instance(instance_id).clone())
-
-
-class CLIPProxy(BaseProxy[CLIPRegistry]):
-    _registry_class = CLIPRegistry
-    __module__ = "comfy.sd"
-
-    def get_ram_usage(self) -> int:
-        return self._call_rpc("get_ram_usage")
-
-    @property
-    def patcher(self) -> "ModelPatcherProxy":
-        from comfy.isolation.model_patcher_proxy import ModelPatcherProxy
-
-        if not hasattr(self, "_patcher_proxy"):
-            patcher_id = self._call_rpc("get_patcher_id")
-            self._patcher_proxy = ModelPatcherProxy(patcher_id, manage_lifecycle=False)
-        return self._patcher_proxy
-
-    @patcher.setter
-    def patcher(self, value: Any) -> None:
-        from comfy.isolation.model_patcher_proxy import ModelPatcherProxy
-
-        if isinstance(value, ModelPatcherProxy):
-            self._patcher_proxy = value
-        else:
-            logger.warning(
-                f"Attempted to set CLIPProxy.patcher to non-proxy object: {value}"
-            )
-
-    @property
-    def cond_stage_model(self) -> CondStageModelProxy:
-        if not hasattr(self, "_cond_stage_model_proxy"):
-            csm_id = self._call_rpc("get_cond_stage_model_id")
-            self._cond_stage_model_proxy = CondStageModelProxy(
-                csm_id, manage_lifecycle=False
-            )
-        return self._cond_stage_model_proxy
-
-    @property
-    def tokenizer(self) -> TokenizerProxy:
-        if not hasattr(self, "_tokenizer_proxy"):
-            tok_id = self._call_rpc("get_tokenizer_id")
-            self._tokenizer_proxy = TokenizerProxy(tok_id, manage_lifecycle=False)
-        return self._tokenizer_proxy
-
-    def load_model(self) -> ModelPatcherProxy:
-        self._call_rpc("load_model")
-        return self.patcher
-
-    @property
-    def layer_idx(self) -> Optional[int]:
-        return self._call_rpc("get_property", "layer_idx")
-
-    @layer_idx.setter
-    def layer_idx(self, value: Optional[int]) -> None:
-        self._call_rpc("set_property", "layer_idx", value)
-
-    @property
-    def tokenizer_options(self) -> dict:
-        return self._call_rpc("get_property", "tokenizer_options")
-
-    @tokenizer_options.setter
-    def tokenizer_options(self, value: dict) -> None:
-        self._call_rpc("set_property", "tokenizer_options", value)
-
-    @property
-    def use_clip_schedule(self) -> bool:
-        return self._call_rpc("get_property", "use_clip_schedule")
-
-    @use_clip_schedule.setter
-    def use_clip_schedule(self, value: bool) -> None:
-        self._call_rpc("set_property", "use_clip_schedule", value)
-
-    @property
-    def apply_hooks_to_conds(self) -> Any:
-        return self._call_rpc("get_property", "apply_hooks_to_conds")
-
-    @apply_hooks_to_conds.setter
-    def apply_hooks_to_conds(self, value: Any) -> None:
-        self._call_rpc("set_property", "apply_hooks_to_conds", value)
-
-    def clip_layer(self, layer_idx: int) -> None:
-        return self._call_rpc("clip_layer", layer_idx)
-
-    def set_tokenizer_option(self, option_name: str, value: Any) -> None:
-        return self._call_rpc("set_tokenizer_option", option_name, value)
-
-    def tokenize(self, text: str, return_word_ids: bool = False, **kwargs: Any) -> Any:
-        return self._call_rpc(
-            "tokenize", text, return_word_ids=return_word_ids, **kwargs
-        )
-
-    def encode(self, text: str) -> Any:
-        return self._call_rpc("encode", text)
-
-    def encode_from_tokens(
-        self, tokens: Any, return_pooled: bool = False, return_dict: bool = False
-    ) -> Any:
-        res = self._call_rpc(
-            "encode_from_tokens",
-            tokens,
-            return_pooled=return_pooled,
-            return_dict=return_dict,
-        )
-        if return_pooled and isinstance(res, list) and not return_dict:
-            return tuple(res)
-        return res
-
-    def encode_from_tokens_scheduled(
-        self,
-        tokens: Any,
-        unprojected: bool = False,
-        add_dict: Optional[dict] = None,
-        show_pbar: bool = True,
-    ) -> Any:
-        add_dict = add_dict or {}
-        return self._call_rpc(
-            "encode_from_tokens_scheduled",
-            tokens,
-            unprojected=unprojected,
-            add_dict=add_dict,
-            show_pbar=show_pbar,
-        )
-
-    def add_patches(
-        self, patches: Any, strength_patch: float = 1.0, strength_model: float = 1.0
-    ) -> Any:
-        return self._call_rpc(
-            "add_patches",
-            patches,
-            strength_patch=strength_patch,
-            strength_model=strength_model,
-        )
-
-    def get_key_patches(self) -> Any:
-        return self._call_rpc("get_key_patches")
-
-    def load_sd(self, sd: dict, full_model: bool = False) -> Any:
-        return self._call_rpc("load_sd", sd, full_model=full_model)
-
-    def get_sd(self) -> Any:
-        return self._call_rpc("get_sd")
-
-    def clone(self) -> CLIPProxy:
-        new_id = self._call_rpc("clone")
-        return CLIPProxy(new_id, self._registry, manage_lifecycle=not IS_CHILD_PROCESS)
-
-
-if not IS_CHILD_PROCESS:
-    _CLIP_REGISTRY_SINGLETON = CLIPRegistry()
-    _COND_STAGE_MODEL_REGISTRY_SINGLETON = CondStageModelRegistry()
-    _TOKENIZER_REGISTRY_SINGLETON = TokenizerRegistry()
--- a/comfy/isolation/custom_node_serializers.py
+++ b/comfy/isolation/custom_node_serializers.py
@@ -1,16 +0,0 @@
-"""Compatibility shim for the indexed serializer path."""
-
-from __future__ import annotations
-
-from typing import Any
-
-
-def register_custom_node_serializers(_registry: Any) -> None:
-    """Legacy no-op shim.
-
-    Serializer registration now lives directly in the active isolation adapter.
-    This module remains importable because the isolation index still references it.
-    """
-    return None
-
-__all__ = ["register_custom_node_serializers"]
--- a/comfy/isolation/extension_loader.py
+++ b/comfy/isolation/extension_loader.py
@@ -1,489 +0,0 @@
-# pylint: disable=cyclic-import,import-outside-toplevel,redefined-outer-name
-from __future__ import annotations
-
-import logging
-import os
-import inspect
-import sys
-import types
-import platform
-from pathlib import Path
-from typing import Any, Callable, Dict, List, Tuple
-
-import pyisolate
-from pyisolate import ExtensionManager, ExtensionManagerConfig
-from packaging.requirements import InvalidRequirement, Requirement
-from packaging.utils import canonicalize_name
-
-from .manifest_loader import is_cache_valid, load_from_cache, save_to_cache
-from .host_policy import load_host_policy
-
-try:
-    import tomllib
-except ImportError:
-    import tomli as tomllib  # type: ignore[no-redef]
-
-logger = logging.getLogger(__name__)
-
-
-def _register_web_directory(extension_name: str, node_dir: Path) -> None:
-    """Register an isolated extension's web directory on the host side."""
-    import nodes
-
-    # Method 1: pyproject.toml [tool.comfy] web field
-    pyproject = node_dir / "pyproject.toml"
-    if pyproject.exists():
-        try:
-            with pyproject.open("rb") as f:
-                data = tomllib.load(f)
-            web_dir_name = data.get("tool", {}).get("comfy", {}).get("web")
-            if web_dir_name:
-                web_dir_path = str(node_dir / web_dir_name)
-                if os.path.isdir(web_dir_path):
-                    nodes.EXTENSION_WEB_DIRS[extension_name] = web_dir_path
-                    logger.debug(
-                        "][ Registered web dir for isolated %s: %s",
-                        extension_name,
-                        web_dir_path,
-                    )
-                    return
-        except Exception:
-            pass
-
-    # Method 2: __init__.py WEB_DIRECTORY constant (parse without importing)
-    init_file = node_dir / "__init__.py"
-    if init_file.exists():
-        try:
-            source = init_file.read_text()
-            for line in source.splitlines():
-                stripped = line.strip()
-                if stripped.startswith("WEB_DIRECTORY"):
-                    # Parse: WEB_DIRECTORY = "./web" or WEB_DIRECTORY = "web"
-                    _, _, value = stripped.partition("=")
-                    value = value.strip().strip("\"'")
-                    if value:
-                        web_dir_path = str((node_dir / value).resolve())
-                        if os.path.isdir(web_dir_path):
-                            nodes.EXTENSION_WEB_DIRS[extension_name] = web_dir_path
-                            logger.debug(
-                                "][ Registered web dir for isolated %s: %s",
-                                extension_name,
-                                web_dir_path,
-                            )
-                            return
-        except Exception:
-            pass
-
-
-def _get_extension_type(execution_model: str) -> type[Any]:
-    if execution_model == "sealed_worker":
-        return pyisolate.SealedNodeExtension
-
-    from .extension_wrapper import ComfyNodeExtension
-
-    return ComfyNodeExtension
-
-
-async def _stop_extension_safe(extension: Any, extension_name: str) -> None:
-    try:
-        stop_result = extension.stop()
-        if inspect.isawaitable(stop_result):
-            await stop_result
-    except Exception:
-        logger.debug("][ %s stop failed", extension_name, exc_info=True)
-
-
-def _normalize_dependency_spec(dep: str, base_paths: list[Path]) -> str:
-    req, sep, marker = dep.partition(";")
-    req = req.strip()
-    marker_suffix = f";{marker}" if sep else ""
-
-    def _resolve_local_path(local_path: str) -> Path | None:
-        for base in base_paths:
-            candidate = (base / local_path).resolve()
-            if candidate.exists():
-                return candidate
-        return None
-
-    if req.startswith("./") or req.startswith("../"):
-        resolved = _resolve_local_path(req)
-        if resolved is not None:
-            return f"{resolved}{marker_suffix}"
-
-    if req.startswith("file://"):
-        raw = req[len("file://") :]
-        if raw.startswith("./") or raw.startswith("../"):
-            resolved = _resolve_local_path(raw)
-            if resolved is not None:
-                return f"file://{resolved}{marker_suffix}"
-
-    return dep
-
-
-def _dependency_name_from_spec(dep: str) -> str | None:
-    stripped = dep.strip()
-    if not stripped or stripped == "-e" or stripped.startswith("-e "):
-        return None
-    if stripped.startswith(("/", "./", "../", "file://")):
-        return None
-
-    try:
-        return canonicalize_name(Requirement(stripped).name)
-    except InvalidRequirement:
-        return None
-
-
-def _parse_cuda_wheels_config(
-    tool_config: dict[str, object], dependencies: list[str]
-) -> dict[str, object] | None:
-    raw_config = tool_config.get("cuda_wheels")
-    if raw_config is None:
-        return None
-    if not isinstance(raw_config, dict):
-        raise ExtensionLoadError("[tool.comfy.isolation.cuda_wheels] must be a table")
-
-    index_url = raw_config.get("index_url")
-    index_urls = raw_config.get("index_urls")
-    if index_urls is not None:
-        if not isinstance(index_urls, list) or not all(
-            isinstance(u, str) and u.strip() for u in index_urls
-        ):
-            raise ExtensionLoadError(
-                "[tool.comfy.isolation.cuda_wheels.index_urls] must be a list of non-empty strings"
-            )
-    elif not isinstance(index_url, str) or not index_url.strip():
-        raise ExtensionLoadError(
-            "[tool.comfy.isolation.cuda_wheels.index_url] must be a non-empty string"
-        )
-
-    packages = raw_config.get("packages")
-    if not isinstance(packages, list) or not all(
-        isinstance(package_name, str) and package_name.strip()
-        for package_name in packages
-    ):
-        raise ExtensionLoadError(
-            "[tool.comfy.isolation.cuda_wheels.packages] must be a list of non-empty strings"
-        )
-
-    declared_dependencies = {
-        dependency_name
-        for dep in dependencies
-        if (dependency_name := _dependency_name_from_spec(dep)) is not None
-    }
-    normalized_packages = [canonicalize_name(package_name) for package_name in packages]
-    missing = [
-        package_name
-        for package_name in normalized_packages
-        if package_name not in declared_dependencies
-    ]
-    if missing:
-        missing_joined = ", ".join(sorted(missing))
-        raise ExtensionLoadError(
-            "[tool.comfy.isolation.cuda_wheels.packages] references undeclared dependencies: "
-            f"{missing_joined}"
-        )
-
-    package_map = raw_config.get("package_map", {})
-    if not isinstance(package_map, dict):
-        raise ExtensionLoadError(
-            "[tool.comfy.isolation.cuda_wheels.package_map] must be a table"
-        )
-
-    normalized_package_map: dict[str, str] = {}
-    for dependency_name, index_package_name in package_map.items():
-        if not isinstance(dependency_name, str) or not dependency_name.strip():
-            raise ExtensionLoadError(
-                "[tool.comfy.isolation.cuda_wheels.package_map] keys must be non-empty strings"
-            )
-        if not isinstance(index_package_name, str) or not index_package_name.strip():
-            raise ExtensionLoadError(
-                "[tool.comfy.isolation.cuda_wheels.package_map] values must be non-empty strings"
-            )
-        canonical_dependency_name = canonicalize_name(dependency_name)
-        if canonical_dependency_name not in normalized_packages:
-            raise ExtensionLoadError(
-                "[tool.comfy.isolation.cuda_wheels.package_map] can only override packages listed in "
-                "[tool.comfy.isolation.cuda_wheels.packages]"
-            )
-        normalized_package_map[canonical_dependency_name] = index_package_name.strip()
-
-    result: dict = {
-        "packages": normalized_packages,
-        "package_map": normalized_package_map,
-    }
-    if index_urls is not None:
-        result["index_urls"] = [u.rstrip("/") + "/" for u in index_urls]
-    else:
-        result["index_url"] = index_url.rstrip("/") + "/"
-    return result
-
-
-def get_enforcement_policy() -> Dict[str, bool]:
-    return {
-        "force_isolated": os.environ.get("PYISOLATE_ENFORCE_ISOLATED") == "1",
-        "force_sandbox": os.environ.get("PYISOLATE_ENFORCE_SANDBOX") == "1",
-    }
-
-
-class ExtensionLoadError(RuntimeError):
-    pass
-
-
-def register_dummy_module(extension_name: str, node_dir: Path) -> None:
-    normalized_name = extension_name.replace("-", "_").replace(".", "_")
-    if normalized_name not in sys.modules:
-        dummy_module = types.ModuleType(normalized_name)
-        dummy_module.__file__ = str(node_dir / "__init__.py")
-        dummy_module.__path__ = [str(node_dir)]
-        dummy_module.__package__ = normalized_name
-        sys.modules[normalized_name] = dummy_module
-
-
-def _is_stale_node_cache(cached_data: Dict[str, Dict]) -> bool:
-    for details in cached_data.values():
-        if not isinstance(details, dict):
-            return True
-        if details.get("is_v3") and "schema_v1" not in details:
-            return True
-    return False
-
-
-async def load_isolated_node(
-    node_dir: Path,
-    manifest_path: Path,
-    logger: logging.Logger,
-    build_stub_class: Callable[[str, Dict[str, object], Any], type],
-    venv_root: Path,
-    extension_managers: List[ExtensionManager],
-) -> List[Tuple[str, str, type]]:
-    try:
-        with manifest_path.open("rb") as handle:
-            manifest_data = tomllib.load(handle)
-    except Exception as e:
-        logger.warning(f"][ Failed to parse {manifest_path}: {e}")
-        return []
-
-    # Parse [tool.comfy.isolation]
-    tool_config = manifest_data.get("tool", {}).get("comfy", {}).get("isolation", {})
-    can_isolate = tool_config.get("can_isolate", False)
-    share_torch = tool_config.get("share_torch", False)
-    package_manager = tool_config.get("package_manager", "uv")
-    is_conda = package_manager == "conda"
-    execution_model = tool_config.get("execution_model")
-    if execution_model is None:
-        execution_model = "sealed_worker" if is_conda else "host-coupled"
-
-    if "sealed_host_ro_paths" in tool_config:
-        raise ValueError(
-            "Manifest field 'sealed_host_ro_paths' is not allowed. "
-            "Configure [tool.comfy.host].sealed_worker_ro_import_paths in host policy."
-        )
-
-    # Conda-specific manifest fields
-    conda_channels: list[str] = (
-        tool_config.get("conda_channels", []) if is_conda else []
-    )
-    conda_dependencies: list[str] = (
-        tool_config.get("conda_dependencies", []) if is_conda else []
-    )
-    conda_platforms: list[str] = (
-        tool_config.get("conda_platforms", []) if is_conda else []
-    )
-    conda_python: str = (
-        tool_config.get("conda_python", "*") if is_conda else "*"
-    )
-
-    # Parse [project] dependencies
-    project_config = manifest_data.get("project", {})
-    dependencies = project_config.get("dependencies", [])
-    if not isinstance(dependencies, list):
-        dependencies = []
-
-    # Get extension name (default to folder name if not in project.name)
-    extension_name = project_config.get("name", node_dir.name)
-
-    # LOGIC: Isolation Decision
-    policy = get_enforcement_policy()
-    isolated = can_isolate or policy["force_isolated"]
-
-    if not isolated:
-        return []
-
-    import folder_paths
-
-    base_paths = [Path(folder_paths.base_path), node_dir]
-    dependencies = [
-        _normalize_dependency_spec(dep, base_paths) if isinstance(dep, str) else dep
-        for dep in dependencies
-    ]
-    cuda_wheels = _parse_cuda_wheels_config(tool_config, dependencies)
-
-    manager_config = ExtensionManagerConfig(venv_root_path=str(venv_root))
-    extension_type = _get_extension_type(execution_model)
-    manager: ExtensionManager = pyisolate.ExtensionManager(
-        extension_type, manager_config
-    )
-    extension_managers.append(manager)
-
-    host_policy = load_host_policy(Path(folder_paths.base_path))
-
-    sandbox_config = {}
-    is_linux = platform.system() == "Linux"
-
-    if is_conda:
-        share_torch = False
-        share_cuda_ipc = False
-    else:
-        share_cuda_ipc = share_torch and is_linux
-
-    if is_linux and isolated:
-        sandbox_config = {
-            "network": host_policy["allow_network"],
-            "writable_paths": host_policy["writable_paths"],
-            "readonly_paths": host_policy["readonly_paths"],
-        }
-
-    extension_config: dict = {
-        "name": extension_name,
-        "module_path": str(node_dir),
-        "isolated": True,
-        "dependencies": dependencies,
-        "share_torch": share_torch,
-        "share_cuda_ipc": share_cuda_ipc,
-        "sandbox_mode": host_policy["sandbox_mode"],
-        "sandbox": sandbox_config,
-    }
-
-    _is_sealed = execution_model == "sealed_worker"
-    _is_sandboxed = host_policy["sandbox_mode"] != "disabled" and is_linux
-    logger.info(
-        "][ Loading isolated node: %s (torch_share [%s], sealed [%s], sandboxed [%s])",
-        extension_name,
-        "x" if share_torch else " ",
-        "x" if _is_sealed else " ",
-        "x" if _is_sandboxed else " ",
-    )
-
-    if cuda_wheels is not None:
-        extension_config["cuda_wheels"] = cuda_wheels
-
-    # Conda-specific keys
-    if is_conda:
-        extension_config["package_manager"] = "conda"
-        extension_config["conda_channels"] = conda_channels
-        extension_config["conda_dependencies"] = conda_dependencies
-        extension_config["conda_python"] = conda_python
-        find_links = tool_config.get("find_links", [])
-        if find_links:
-            extension_config["find_links"] = find_links
-        if conda_platforms:
-            extension_config["conda_platforms"] = conda_platforms
-
-    if execution_model != "host-coupled":
-        extension_config["execution_model"] = execution_model
-    if execution_model == "sealed_worker":
-        policy_ro_paths = host_policy.get("sealed_worker_ro_import_paths", [])
-        if isinstance(policy_ro_paths, list) and policy_ro_paths:
-            extension_config["sealed_host_ro_paths"] = list(policy_ro_paths)
-        # Sealed workers keep the host RPC service inventory even when the
-        # child resolves no API classes locally.
-
-    extension = manager.load_extension(extension_config)
-    register_dummy_module(extension_name, node_dir)
-
-    # Register host-side event handlers via adapter
-    from .adapter import ComfyUIAdapter
-    ComfyUIAdapter.register_host_event_handlers(extension)
-
-    # Register web directory on the host — only when sandbox is disabled.
-    # In sandbox mode, serving untrusted JS to the browser is not safe.
-    if host_policy["sandbox_mode"] == "disabled":
-        _register_web_directory(extension_name, node_dir)
-
-    # Register for proxied web serving — the child's web dir may have
-    # content that doesn't exist on the host (e.g., pip-installed viewer
-    # bundles). The WebDirectoryCache will lazily fetch via RPC.
-    from .proxies.web_directory_proxy import WebDirectoryProxy, get_web_directory_cache
-    cache = get_web_directory_cache()
-    cache.register_proxy(extension_name, WebDirectoryProxy())
-
-    # Try cache first (lazy spawn)
-    if is_cache_valid(node_dir, manifest_path, venv_root):
-        cached_data = load_from_cache(node_dir, venv_root)
-        if cached_data:
-            if _is_stale_node_cache(cached_data):
-                logger.debug(
-                    "][ %s cache is stale/incompatible; rebuilding metadata",
-                    extension_name,
-                )
-            else:
-                logger.debug(f"][ {extension_name} loaded from cache")
-                specs: List[Tuple[str, str, type]] = []
-                for node_name, details in cached_data.items():
-                    stub_cls = build_stub_class(node_name, details, extension)
-                    specs.append(
-                        (node_name, details.get("display_name", node_name), stub_cls)
-                    )
-                return specs
-
-    # Cache miss - spawn process and get metadata
-    logger.debug(f"][ {extension_name} cache miss, spawning process for metadata")
-
-    try:
-        remote_nodes: Dict[str, str] = await extension.list_nodes()
-    except Exception as exc:
-        logger.warning(
-            "][ %s metadata discovery failed, skipping isolated load: %s",
-            extension_name,
-            exc,
-        )
-        await _stop_extension_safe(extension, extension_name)
-        return []
-
-    if not remote_nodes:
-        logger.debug("][ %s exposed no isolated nodes; skipping", extension_name)
-        await _stop_extension_safe(extension, extension_name)
-        return []
-
-    specs: List[Tuple[str, str, type]] = []
-    cache_data: Dict[str, Dict] = {}
-
-    for node_name, display_name in remote_nodes.items():
-        try:
-            details = await extension.get_node_details(node_name)
-        except Exception as exc:
-            logger.warning(
-                "][ %s failed to load metadata for %s, skipping node: %s",
-                extension_name,
-                node_name,
-                exc,
-            )
-            continue
-        details["display_name"] = display_name
-        cache_data[node_name] = details
-        stub_cls = build_stub_class(node_name, details, extension)
-        specs.append((node_name, display_name, stub_cls))
-
-    if not specs:
-        logger.warning(
-            "][ %s produced no usable nodes after metadata scan; skipping",
-            extension_name,
-        )
-        await _stop_extension_safe(extension, extension_name)
-        return []
-
-    # Save metadata to cache for future runs
-    save_to_cache(node_dir, venv_root, cache_data, manifest_path)
-    logger.debug(f"][ {extension_name} metadata cached")
-
-    # Re-check web directory AFTER child has populated it
-    if host_policy["sandbox_mode"] == "disabled":
-        _register_web_directory(extension_name, node_dir)
-
-    # EJECT: Kill process after getting metadata (will respawn on first execution)
-    await _stop_extension_safe(extension, extension_name)
-
-    return specs
-
-
-__all__ = ["ExtensionLoadError", "register_dummy_module", "load_isolated_node"]
--- a/comfy/isolation/extension_wrapper.py
+++ b/comfy/isolation/extension_wrapper.py
@@ -1,878 +0,0 @@
-# pylint: disable=consider-using-from-import,cyclic-import,import-outside-toplevel,logging-fstring-interpolation,protected-access,wrong-import-position
-from __future__ import annotations
-
-import asyncio
-import torch
-
-
-class AttrDict(dict):
-    def __getattr__(self, item):
-        try:
-            return self[item]
-        except KeyError as e:
-            raise AttributeError(item) from e
-
-    def copy(self):
-        return AttrDict(super().copy())
-
-
-import importlib
-import inspect
-import json
-import logging
-import os
-import sys
-import uuid
-from dataclasses import asdict
-from typing import Any, Dict, List, Tuple
-
-from pyisolate import ExtensionBase
-
-from comfy_api.internal import _ComfyNodeInternal
-
-LOG_PREFIX = "]["
-V3_DISCOVERY_TIMEOUT = 30
-_PRE_EXEC_MIN_FREE_VRAM_BYTES = 2 * 1024 * 1024 * 1024
-
-logger = logging.getLogger(__name__)
-
-
-def _run_prestartup_web_copy(module: Any, module_dir: str, web_dir_path: str) -> None:
-    """Run the web asset copy step that prestartup_script.py used to do.
-
-    If the module's web/ directory is empty and the module had a
-    prestartup_script.py that copied assets from pip packages, this
-    function replicates that work inside the child process.
-
-    Generic pattern: reads _PRESTARTUP_WEB_COPY from the module if
-    defined, otherwise falls back to detecting common asset packages.
-    """
-    import shutil
-
-    # Already populated — nothing to do
-    if os.path.isdir(web_dir_path) and any(os.scandir(web_dir_path)):
-        return
-
-    os.makedirs(web_dir_path, exist_ok=True)
-
-    # Try module-defined copy spec first (generic hook for any node pack)
-    copy_spec = getattr(module, "_PRESTARTUP_WEB_COPY", None)
-    if copy_spec is not None and callable(copy_spec):
-        try:
-            copy_spec(web_dir_path)
-            logger.info(
-                "%s Ran _PRESTARTUP_WEB_COPY for %s", LOG_PREFIX, module_dir
-            )
-            return
-        except Exception as e:
-            logger.warning(
-                "%s _PRESTARTUP_WEB_COPY failed for %s: %s",
-                LOG_PREFIX, module_dir, e,
-            )
-
-    # Fallback: detect comfy_3d_viewers and run copy_viewer()
-    try:
-        from comfy_3d_viewers import copy_viewer, VIEWER_FILES
-        viewers = list(VIEWER_FILES.keys())
-        for viewer in viewers:
-            try:
-                copy_viewer(viewer, web_dir_path)
-            except Exception:
-                pass
-        if any(os.scandir(web_dir_path)):
-            logger.info(
-                "%s Copied %d viewer types from comfy_3d_viewers to %s",
-                LOG_PREFIX, len(viewers), web_dir_path,
-            )
-    except ImportError:
-        pass
-
-    # Fallback: detect comfy_dynamic_widgets
-    try:
-        from comfy_dynamic_widgets import get_js_path
-        src = os.path.realpath(get_js_path())
-        if os.path.exists(src):
-            dst_dir = os.path.join(web_dir_path, "js")
-            os.makedirs(dst_dir, exist_ok=True)
-            dst = os.path.join(dst_dir, "dynamic_widgets.js")
-            shutil.copy2(src, dst)
-    except ImportError:
-        pass
-
-
-def _read_extension_name(module_dir: str) -> str:
-    """Read extension name from pyproject.toml, falling back to directory name."""
-    pyproject = os.path.join(module_dir, "pyproject.toml")
-    if os.path.exists(pyproject):
-        try:
-            import tomllib
-        except ImportError:
-            import tomli as tomllib  # type: ignore[no-redef]
-        try:
-            with open(pyproject, "rb") as f:
-                data = tomllib.load(f)
-            name = data.get("project", {}).get("name")
-            if name:
-                return name
-        except Exception:
-            pass
-    return os.path.basename(module_dir)
-
-
-def _flush_tensor_transport_state(marker: str) -> int:
-    try:
-        from pyisolate import flush_tensor_keeper  # type: ignore[attr-defined]
-    except Exception:
-        return 0
-    if not callable(flush_tensor_keeper):
-        return 0
-    flushed = flush_tensor_keeper()
-    if flushed > 0:
-        logger.debug(
-            "%s %s flush_tensor_keeper released=%d", LOG_PREFIX, marker, flushed
-        )
-    return flushed
-
-
-def _relieve_child_vram_pressure(marker: str) -> None:
-    import comfy.model_management as model_management
-
-    model_management.cleanup_models_gc()
-    model_management.cleanup_models()
-
-    device = model_management.get_torch_device()
-    if not hasattr(device, "type") or device.type == "cpu":
-        return
-
-    required = max(
-        model_management.minimum_inference_memory(),
-        _PRE_EXEC_MIN_FREE_VRAM_BYTES,
-    )
-    if model_management.get_free_memory(device) < required:
-        model_management.free_memory(required, device, for_dynamic=True)
-        if model_management.get_free_memory(device) < required:
-            model_management.free_memory(required, device, for_dynamic=False)
-        model_management.cleanup_models()
-        model_management.soft_empty_cache()
-        logger.debug("%s %s free_memory target=%d", LOG_PREFIX, marker, required)
-
-
-def _sanitize_for_transport(value):
-    primitives = (str, int, float, bool, type(None))
-    if isinstance(value, primitives):
-        return value
-
-    cls_name = value.__class__.__name__
-    if cls_name == "FlexibleOptionalInputType":
-        return {
-            "__pyisolate_flexible_optional__": True,
-            "type": _sanitize_for_transport(getattr(value, "type", "*")),
-        }
-    if cls_name == "AnyType":
-        return {"__pyisolate_any_type__": True, "value": str(value)}
-    if cls_name == "ByPassTypeTuple":
-        return {
-            "__pyisolate_bypass_tuple__": [
-                _sanitize_for_transport(v) for v in tuple(value)
-            ]
-        }
-
-    if isinstance(value, dict):
-        return {k: _sanitize_for_transport(v) for k, v in value.items()}
-    if isinstance(value, tuple):
-        return {"__pyisolate_tuple__": [_sanitize_for_transport(v) for v in value]}
-    if isinstance(value, list):
-        return [_sanitize_for_transport(v) for v in value]
-
-    return str(value)
-
-
-# Re-export RemoteObjectHandle from pyisolate for backward compatibility
-# The canonical definition is now in pyisolate._internal.remote_handle
-from pyisolate._internal.remote_handle import RemoteObjectHandle  # noqa: E402,F401
-
-
-class ComfyNodeExtension(ExtensionBase):
-    def __init__(self) -> None:
-        super().__init__()
-        self.node_classes: Dict[str, type] = {}
-        self.display_names: Dict[str, str] = {}
-        self.node_instances: Dict[str, Any] = {}
-        self.remote_objects: Dict[str, Any] = {}
-        self._route_handlers: Dict[str, Any] = {}
-        self._module: Any = None
-
-    async def on_module_loaded(self, module: Any) -> None:
-        self._module = module
-
-        # Registries are initialized in host_hooks.py initialize_host_process()
-        # They auto-register via ProxiedSingleton when instantiated
-        # NO additional setup required here - if a registry is missing from host_hooks, it WILL fail
-
-        self.node_classes = getattr(module, "NODE_CLASS_MAPPINGS", {}) or {}
-        self.display_names = getattr(module, "NODE_DISPLAY_NAME_MAPPINGS", {}) or {}
-
-        # Register web directory with WebDirectoryProxy (child-side)
-        web_dir_attr = getattr(module, "WEB_DIRECTORY", None)
-        if web_dir_attr is not None:
-            module_dir = os.path.dirname(os.path.abspath(module.__file__))
-            web_dir_path = os.path.abspath(os.path.join(module_dir, web_dir_attr))
-            ext_name = _read_extension_name(module_dir)
-
-            # If web dir is empty, run the copy step that prestartup_script.py did
-            _run_prestartup_web_copy(module, module_dir, web_dir_path)
-
-            if os.path.isdir(web_dir_path) and any(os.scandir(web_dir_path)):
-                from comfy.isolation.proxies.web_directory_proxy import WebDirectoryProxy
-                WebDirectoryProxy.register_web_dir(ext_name, web_dir_path)
-
-        try:
-            from comfy_api.latest import ComfyExtension
-
-            for name, obj in inspect.getmembers(module):
-                if not (
-                    inspect.isclass(obj)
-                    and issubclass(obj, ComfyExtension)
-                    and obj is not ComfyExtension
-                ):
-                    continue
-                if not obj.__module__.startswith(module.__name__):
-                    continue
-                try:
-                    ext_instance = obj()
-                    try:
-                        await asyncio.wait_for(
-                            ext_instance.on_load(), timeout=V3_DISCOVERY_TIMEOUT
-                        )
-                    except asyncio.TimeoutError:
-                        logger.error(
-                            "%s V3 Extension %s timed out in on_load()",
-                            LOG_PREFIX,
-                            name,
-                        )
-                        continue
-                    try:
-                        v3_nodes = await asyncio.wait_for(
-                            ext_instance.get_node_list(), timeout=V3_DISCOVERY_TIMEOUT
-                        )
-                    except asyncio.TimeoutError:
-                        logger.error(
-                            "%s V3 Extension %s timed out in get_node_list()",
-                            LOG_PREFIX,
-                            name,
-                        )
-                        continue
-                    for node_cls in v3_nodes:
-                        if hasattr(node_cls, "GET_SCHEMA"):
-                            schema = node_cls.GET_SCHEMA()
-                            self.node_classes[schema.node_id] = node_cls
-                            if schema.display_name:
-                                self.display_names[schema.node_id] = schema.display_name
-                except Exception as e:
-                    logger.error("%s V3 Extension %s failed: %s", LOG_PREFIX, name, e)
-        except ImportError:
-            pass
-
-        module_name = getattr(module, "__name__", "isolated_nodes")
-        for node_cls in self.node_classes.values():
-            if hasattr(node_cls, "__module__") and "/" in str(node_cls.__module__):
-                node_cls.__module__ = module_name
-
-        self.node_instances = {}
-
-    async def list_nodes(self) -> Dict[str, str]:
-        return {name: self.display_names.get(name, name) for name in self.node_classes}
-
-    async def get_node_info(self, node_name: str) -> Dict[str, Any]:
-        return await self.get_node_details(node_name)
-
-    async def get_node_details(self, node_name: str) -> Dict[str, Any]:
-        node_cls = self._get_node_class(node_name)
-        is_v3 = issubclass(node_cls, _ComfyNodeInternal)
-
-        input_types_raw = (
-            node_cls.INPUT_TYPES() if hasattr(node_cls, "INPUT_TYPES") else {}
-        )
-        output_is_list = getattr(node_cls, "OUTPUT_IS_LIST", None)
-        if output_is_list is not None:
-            output_is_list = tuple(bool(x) for x in output_is_list)
-
-        details: Dict[str, Any] = {
-            "input_types": _sanitize_for_transport(input_types_raw),
-            "return_types": tuple(
-                str(t) for t in getattr(node_cls, "RETURN_TYPES", ())
-            ),
-            "return_names": getattr(node_cls, "RETURN_NAMES", None),
-            "function": str(getattr(node_cls, "FUNCTION", "execute")),
-            "category": str(getattr(node_cls, "CATEGORY", "")),
-            "output_node": bool(getattr(node_cls, "OUTPUT_NODE", False)),
-            "output_is_list": output_is_list,
-            "is_v3": is_v3,
-        }
-
-        if is_v3:
-            try:
-                schema = node_cls.GET_SCHEMA()
-                schema_v1 = asdict(schema.get_v1_info(node_cls))
-                try:
-                    schema_v3 = asdict(schema.get_v3_info(node_cls))
-                except (AttributeError, TypeError):
-                    schema_v3 = self._build_schema_v3_fallback(schema)
-                details.update(
-                    {
-                        "schema_v1": schema_v1,
-                        "schema_v3": schema_v3,
-                        "hidden": [h.value for h in (schema.hidden or [])],
-                        "description": getattr(schema, "description", ""),
-                        "deprecated": bool(getattr(node_cls, "DEPRECATED", False)),
-                        "experimental": bool(getattr(node_cls, "EXPERIMENTAL", False)),
-                        "api_node": bool(getattr(node_cls, "API_NODE", False)),
-                        "input_is_list": bool(
-                            getattr(node_cls, "INPUT_IS_LIST", False)
-                        ),
-                        "not_idempotent": bool(
-                            getattr(node_cls, "NOT_IDEMPOTENT", False)
-                        ),
-                        "accept_all_inputs": bool(
-                            getattr(node_cls, "ACCEPT_ALL_INPUTS", False)
-                        ),
-                    }
-                )
-            except Exception as exc:
-                logger.warning(
-                    "%s V3 schema serialization failed for %s: %s",
-                    LOG_PREFIX,
-                    node_name,
-                    exc,
-                )
-        return details
-
-    def _build_schema_v3_fallback(self, schema) -> Dict[str, Any]:
-        input_dict: Dict[str, Any] = {}
-        output_dict: Dict[str, Any] = {}
-        hidden_list: List[str] = []
-
-        if getattr(schema, "inputs", None):
-            for inp in schema.inputs:
-                self._add_schema_io_v3(inp, input_dict)
-        if getattr(schema, "outputs", None):
-            for out in schema.outputs:
-                self._add_schema_io_v3(out, output_dict)
-        if getattr(schema, "hidden", None):
-            for h in schema.hidden:
-                hidden_list.append(getattr(h, "value", str(h)))
-
-        return {
-            "input": input_dict,
-            "output": output_dict,
-            "hidden": hidden_list,
-            "name": getattr(schema, "node_id", None),
-            "display_name": getattr(schema, "display_name", None),
-            "description": getattr(schema, "description", None),
-            "category": getattr(schema, "category", None),
-            "output_node": getattr(schema, "is_output_node", False),
-            "deprecated": getattr(schema, "is_deprecated", False),
-            "experimental": getattr(schema, "is_experimental", False),
-            "api_node": getattr(schema, "is_api_node", False),
-        }
-
-    def _add_schema_io_v3(self, io_obj: Any, target: Dict[str, Any]) -> None:
-        io_id = getattr(io_obj, "id", None)
-        if io_id is None:
-            return
-
-        io_type_fn = getattr(io_obj, "get_io_type", None)
-        io_type = (
-            io_type_fn() if callable(io_type_fn) else getattr(io_obj, "io_type", None)
-        )
-
-        as_dict_fn = getattr(io_obj, "as_dict", None)
-        payload = as_dict_fn() if callable(as_dict_fn) else {}
-
-        target[str(io_id)] = (io_type, payload)
-
-    async def get_input_types(self, node_name: str) -> Dict[str, Any]:
-        node_cls = self._get_node_class(node_name)
-        if hasattr(node_cls, "INPUT_TYPES"):
-            return node_cls.INPUT_TYPES()
-        return {}
-
-    async def execute_node(self, node_name: str, **inputs: Any) -> Tuple[Any, ...]:
-        logger.debug(
-            "%s ISO:child_execute_start ext=%s node=%s input_keys=%d",
-            LOG_PREFIX,
-            getattr(self, "name", "?"),
-            node_name,
-            len(inputs),
-        )
-        if os.environ.get("PYISOLATE_CHILD") == "1":
-            _relieve_child_vram_pressure("EXT:pre_execute")
-
-        resolved_inputs = self._resolve_remote_objects(inputs)
-
-        instance = self._get_node_instance(node_name)
-        node_cls = self._get_node_class(node_name)
-
-        # V3 API nodes expect hidden parameters in cls.hidden, not as kwargs
-        # Hidden params come through RPC as string keys like "Hidden.prompt"
-        from comfy_api.latest._io import Hidden, HiddenHolder
-
-        # Map string representations back to Hidden enum keys
-        hidden_string_map = {
-            "Hidden.unique_id": Hidden.unique_id,
-            "Hidden.prompt": Hidden.prompt,
-            "Hidden.extra_pnginfo": Hidden.extra_pnginfo,
-            "Hidden.dynprompt": Hidden.dynprompt,
-            "Hidden.auth_token_comfy_org": Hidden.auth_token_comfy_org,
-            "Hidden.api_key_comfy_org": Hidden.api_key_comfy_org,
-            # Uppercase enum VALUE forms — V3 execution engine passes these
-            "UNIQUE_ID": Hidden.unique_id,
-            "PROMPT": Hidden.prompt,
-            "EXTRA_PNGINFO": Hidden.extra_pnginfo,
-            "DYNPROMPT": Hidden.dynprompt,
-            "AUTH_TOKEN_COMFY_ORG": Hidden.auth_token_comfy_org,
-            "API_KEY_COMFY_ORG": Hidden.api_key_comfy_org,
-        }
-
-        # Find and extract hidden parameters (both enum and string form)
-        hidden_found = {}
-        keys_to_remove = []
-
-        for key in list(resolved_inputs.keys()):
-            # Check string form first (from RPC serialization)
-            if key in hidden_string_map:
-                hidden_found[hidden_string_map[key]] = resolved_inputs[key]
-                keys_to_remove.append(key)
-            # Also check enum form (direct calls)
-            elif isinstance(key, Hidden):
-                hidden_found[key] = resolved_inputs[key]
-                keys_to_remove.append(key)
-
-        # Remove hidden params from kwargs
-        for key in keys_to_remove:
-            resolved_inputs.pop(key)
-
-        # Set hidden on node class if any hidden params found
-        if hidden_found:
-            if not hasattr(node_cls, "hidden") or node_cls.hidden is None:
-                node_cls.hidden = HiddenHolder.from_dict(hidden_found)
-            else:
-                # Update existing hidden holder
-                for key, value in hidden_found.items():
-                    setattr(node_cls.hidden, key.value.lower(), value)
-
-        # INPUT_IS_LIST: ComfyUI's executor passes all inputs as lists when this
-        # flag is set.  The isolation RPC delivers unwrapped values, so we must
-        # wrap each input in a single-element list to match the contract.
-        if getattr(node_cls, "INPUT_IS_LIST", False):
-            resolved_inputs = {k: [v] for k, v in resolved_inputs.items()}
-
-        function_name = getattr(node_cls, "FUNCTION", "execute")
-        if not hasattr(instance, function_name):
-            raise AttributeError(f"Node {node_name} missing callable '{function_name}'")
-
-        handler = getattr(instance, function_name)
-
-        try:
-            import torch
-            if asyncio.iscoroutinefunction(handler):
-                with torch.inference_mode():
-                    result = await handler(**resolved_inputs)
-            else:
-                import functools
-
-                def _run_with_inference_mode(**kwargs):
-                    with torch.inference_mode():
-                        return handler(**kwargs)
-
-                loop = asyncio.get_running_loop()
-                result = await loop.run_in_executor(
-                    None, functools.partial(_run_with_inference_mode, **resolved_inputs)
-                )
-        except Exception:
-            logger.exception(
-                "%s ISO:child_execute_error ext=%s node=%s",
-                LOG_PREFIX,
-                getattr(self, "name", "?"),
-                node_name,
-            )
-            raise
-
-        if type(result).__name__ == "NodeOutput":
-            node_output_dict = {
-                "__node_output__": True,
-                "args": self._wrap_unpicklable_objects(result.args),
-            }
-            if result.ui is not None:
-                node_output_dict["ui"] = self._wrap_unpicklable_objects(result.ui)
-            if getattr(result, "expand", None) is not None:
-                node_output_dict["expand"] = result.expand
-            if getattr(result, "block_execution", None) is not None:
-                node_output_dict["block_execution"] = result.block_execution
-            return node_output_dict
-        if self._is_comfy_protocol_return(result):
-            wrapped = self._wrap_unpicklable_objects(result)
-            return wrapped
-
-        if not isinstance(result, tuple):
-            result = (result,)
-        wrapped = self._wrap_unpicklable_objects(result)
-        return wrapped
-
-    async def flush_transport_state(self) -> int:
-        if os.environ.get("PYISOLATE_CHILD") != "1":
-            return 0
-        logger.debug(
-            "%s ISO:child_flush_start ext=%s", LOG_PREFIX, getattr(self, "name", "?")
-        )
-        flushed = _flush_tensor_transport_state("EXT:workflow_end")
-        try:
-            from comfy.isolation.model_patcher_proxy_registry import (
-                ModelPatcherRegistry,
-            )
-
-            registry = ModelPatcherRegistry()
-            removed = registry.sweep_pending_cleanup()
-            if removed > 0:
-                logger.debug(
-                    "%s EXT:workflow_end registry sweep removed=%d", LOG_PREFIX, removed
-                )
-        except Exception:
-            logger.debug(
-                "%s EXT:workflow_end registry sweep failed", LOG_PREFIX, exc_info=True
-            )
-        logger.debug(
-            "%s ISO:child_flush_done ext=%s flushed=%d",
-            LOG_PREFIX,
-            getattr(self, "name", "?"),
-            flushed,
-        )
-        return flushed
-
-    async def get_remote_object(self, object_id: str) -> Any:
-        """Retrieve a remote object by ID for host-side deserialization."""
-        if object_id not in self.remote_objects:
-            raise KeyError(f"Remote object {object_id} not found")
-
-        return self.remote_objects[object_id]
-
-    def _store_remote_object_handle(self, obj: Any) -> RemoteObjectHandle:
-        object_id = str(uuid.uuid4())
-        self.remote_objects[object_id] = obj
-        return RemoteObjectHandle(object_id, type(obj).__name__)
-
-    async def call_remote_object_method(
-        self,
-        object_id: str,
-        method_name: str,
-        *args: Any,
-        **kwargs: Any,
-    ) -> Any:
-        """Invoke a method or attribute-backed accessor on a child-owned object."""
-        obj = await self.get_remote_object(object_id)
-
-        if method_name == "get_patcher_attr":
-            return getattr(obj, args[0])
-        if method_name == "get_model_options":
-            return getattr(obj, "model_options")
-        if method_name == "set_model_options":
-            setattr(obj, "model_options", args[0])
-            return None
-        if method_name == "get_object_patches":
-            return getattr(obj, "object_patches")
-        if method_name == "get_patches":
-            return getattr(obj, "patches")
-        if method_name == "get_wrappers":
-            return getattr(obj, "wrappers")
-        if method_name == "get_callbacks":
-            return getattr(obj, "callbacks")
-        if method_name == "get_load_device":
-            return getattr(obj, "load_device")
-        if method_name == "get_offload_device":
-            return getattr(obj, "offload_device")
-        if method_name == "get_hook_mode":
-            return getattr(obj, "hook_mode")
-        if method_name == "get_parent":
-            parent = getattr(obj, "parent", None)
-            if parent is None:
-                return None
-            return self._store_remote_object_handle(parent)
-        if method_name == "get_inner_model_attr":
-            attr_name = args[0]
-            if hasattr(obj.model, attr_name):
-                return getattr(obj.model, attr_name)
-            if hasattr(obj, attr_name):
-                return getattr(obj, attr_name)
-            return None
-        if method_name == "inner_model_apply_model":
-            return obj.model.apply_model(*args[0], **args[1])
-        if method_name == "inner_model_extra_conds_shapes":
-            return obj.model.extra_conds_shapes(*args[0], **args[1])
-        if method_name == "inner_model_extra_conds":
-            return obj.model.extra_conds(*args[0], **args[1])
-        if method_name == "inner_model_memory_required":
-            return obj.model.memory_required(*args[0], **args[1])
-        if method_name == "process_latent_in":
-            return obj.model.process_latent_in(*args[0], **args[1])
-        if method_name == "process_latent_out":
-            return obj.model.process_latent_out(*args[0], **args[1])
-        if method_name == "scale_latent_inpaint":
-            return obj.model.scale_latent_inpaint(*args[0], **args[1])
-        if method_name.startswith("get_"):
-            attr_name = method_name[4:]
-            if hasattr(obj, attr_name):
-                return getattr(obj, attr_name)
-
-        target = getattr(obj, method_name)
-        if callable(target):
-            result = target(*args, **kwargs)
-            if inspect.isawaitable(result):
-                result = await result
-            if type(result).__name__ == "ModelPatcher":
-                return self._store_remote_object_handle(result)
-            return result
-        if args or kwargs:
-            raise TypeError(f"{method_name} is not callable on remote object {object_id}")
-        return target
-
-    def _wrap_unpicklable_objects(self, data: Any) -> Any:
-        if isinstance(data, (str, int, float, bool, type(None))):
-            return data
-        if isinstance(data, torch.Tensor):
-            tensor = data.detach() if data.requires_grad else data
-            if os.environ.get("PYISOLATE_CHILD") == "1" and tensor.device.type != "cpu":
-                return tensor.cpu()
-            return tensor
-
-        # Special-case clip vision outputs: preserve attribute access by packing fields
-        if hasattr(data, "penultimate_hidden_states") or hasattr(
-            data, "last_hidden_state"
-        ):
-            fields = {}
-            for attr in (
-                "penultimate_hidden_states",
-                "last_hidden_state",
-                "image_embeds",
-                "text_embeds",
-            ):
-                if hasattr(data, attr):
-                    try:
-                        fields[attr] = self._wrap_unpicklable_objects(
-                            getattr(data, attr)
-                        )
-                    except Exception:
-                        pass
-            if fields:
-                return {"__pyisolate_attribute_container__": True, "data": fields}
-
-        # Avoid converting arbitrary objects with stateful methods (models, etc.)
-        # They will be handled via RemoteObjectHandle below.
-
-        type_name = type(data).__name__
-        if type_name == "ModelPatcherProxy":
-            return {"__type__": "ModelPatcherRef", "model_id": data._instance_id}
-        if type_name == "CLIPProxy":
-            return {"__type__": "CLIPRef", "clip_id": data._instance_id}
-        if type_name == "VAEProxy":
-            return {"__type__": "VAERef", "vae_id": data._instance_id}
-        if type_name == "ModelSamplingProxy":
-            return {"__type__": "ModelSamplingRef", "ms_id": data._instance_id}
-
-        if isinstance(data, (list, tuple)):
-            wrapped = [self._wrap_unpicklable_objects(item) for item in data]
-            return tuple(wrapped) if isinstance(data, tuple) else wrapped
-        if isinstance(data, dict):
-            converted_dict = {
-                k: self._wrap_unpicklable_objects(v) for k, v in data.items()
-            }
-            return {"__pyisolate_attrdict__": True, "data": converted_dict}
-
-        from pyisolate._internal.serialization_registry import SerializerRegistry
-
-        registry = SerializerRegistry.get_instance()
-        if registry.is_data_type(type_name):
-            serializer = registry.get_serializer(type_name)
-            if serializer:
-                return serializer(data)
-
-        return self._store_remote_object_handle(data)
-
-    def _resolve_remote_objects(self, data: Any) -> Any:
-        if isinstance(data, RemoteObjectHandle):
-            if data.object_id not in self.remote_objects:
-                raise KeyError(f"Remote object {data.object_id} not found")
-            return self.remote_objects[data.object_id]
-
-        if isinstance(data, dict):
-            ref_type = data.get("__type__")
-            if ref_type in ("CLIPRef", "ModelPatcherRef", "VAERef"):
-                from pyisolate._internal.model_serialization import (
-                    deserialize_proxy_result,
-                )
-
-                return deserialize_proxy_result(data)
-            if ref_type == "ModelSamplingRef":
-                from pyisolate._internal.model_serialization import (
-                    deserialize_proxy_result,
-                )
-
-                return deserialize_proxy_result(data)
-            return {k: self._resolve_remote_objects(v) for k, v in data.items()}
-
-        if isinstance(data, (list, tuple)):
-            resolved = [self._resolve_remote_objects(item) for item in data]
-            return tuple(resolved) if isinstance(data, tuple) else resolved
-        return data
-
-    def _get_node_class(self, node_name: str) -> type:
-        if node_name not in self.node_classes:
-            raise KeyError(f"Unknown node: {node_name}")
-        return self.node_classes[node_name]
-
-    def _get_node_instance(self, node_name: str) -> Any:
-        if node_name not in self.node_instances:
-            if node_name not in self.node_classes:
-                raise KeyError(f"Unknown node: {node_name}")
-            self.node_instances[node_name] = self.node_classes[node_name]()
-        return self.node_instances[node_name]
-
-    async def before_module_loaded(self) -> None:
-        # Inject initialization here if we think this is the child
-        try:
-            from comfy.isolation import initialize_proxies
-
-            initialize_proxies()
-        except Exception as e:
-            logging.getLogger(__name__).error(
-                f"Failed to call initialize_proxies in before_module_loaded: {e}"
-            )
-
-        await super().before_module_loaded()
-        try:
-            from comfy_api.latest import ComfyAPI_latest
-            from .proxies.progress_proxy import ProgressProxy
-
-            ComfyAPI_latest.Execution = ProgressProxy
-            # ComfyAPI_latest.execution = ProgressProxy()  # Eliminated to avoid Singleton collision
-            # fp_proxy = FolderPathsProxy()                 # Eliminated to avoid Singleton collision
-            # latest_ui.folder_paths = fp_proxy
-            # latest_resources.folder_paths = fp_proxy
-        except Exception:
-            pass
-
-    async def call_route_handler(
-        self,
-        handler_module: str,
-        handler_func: str,
-        request_data: Dict[str, Any],
-    ) -> Any:
-        cache_key = f"{handler_module}.{handler_func}"
-        if cache_key not in self._route_handlers:
-            if self._module is not None and hasattr(self._module, "__file__"):
-                node_dir = os.path.dirname(self._module.__file__)
-                if node_dir not in sys.path:
-                    sys.path.insert(0, node_dir)
-            try:
-                module = importlib.import_module(handler_module)
-                self._route_handlers[cache_key] = getattr(module, handler_func)
-            except (ImportError, AttributeError) as e:
-                raise ValueError(f"Route handler not found: {cache_key}") from e
-
-        handler = self._route_handlers[cache_key]
-        mock_request = MockRequest(request_data)
-
-        if asyncio.iscoroutinefunction(handler):
-            result = await handler(mock_request)
-        else:
-            result = handler(mock_request)
-        return self._serialize_response(result)
-
-    def _is_comfy_protocol_return(self, result: Any) -> bool:
-        """
-        Check if the result matches the ComfyUI 'Protocol Return' schema.
-
-        A Protocol Return is a dictionary containing specific reserved keys that
-        ComfyUI's execution engine interprets as instructions (UI updates,
-        Workflow expansion, etc.) rather than purely data outputs.
-
-        Schema:
-           - Must be a dict
-           - Must contain at least one of: 'ui', 'result', 'expand'
-        """
-        if not isinstance(result, dict):
-            return False
-        return any(key in result for key in ("ui", "result", "expand"))
-
-    def _serialize_response(self, response: Any) -> Dict[str, Any]:
-        if response is None:
-            return {"type": "text", "body": "", "status": 204}
-        if isinstance(response, dict):
-            return {"type": "json", "body": response, "status": 200}
-        if isinstance(response, str):
-            return {"type": "text", "body": response, "status": 200}
-        if hasattr(response, "text") and hasattr(response, "status"):
-            return {
-                "type": "text",
-                "body": response.text
-                if hasattr(response, "text")
-                else str(response.body),
-                "status": response.status,
-                "headers": dict(response.headers)
-                if hasattr(response, "headers")
-                else {},
-            }
-        if hasattr(response, "body") and hasattr(response, "status"):
-            body = response.body
-            if isinstance(body, bytes):
-                try:
-                    return {
-                        "type": "text",
-                        "body": body.decode("utf-8"),
-                        "status": response.status,
-                    }
-                except UnicodeDecodeError:
-                    return {
-                        "type": "binary",
-                        "body": body.hex(),
-                        "status": response.status,
-                    }
-            return {"type": "json", "body": body, "status": response.status}
-        return {"type": "text", "body": str(response), "status": 200}
-
-
-class MockRequest:
-    def __init__(self, data: Dict[str, Any]):
-        self.method = data.get("method", "GET")
-        self.path = data.get("path", "/")
-        self.query = data.get("query", {})
-        self._body = data.get("body", {})
-        self._text = data.get("text", "")
-        self.headers = data.get("headers", {})
-        self.content_type = data.get(
-            "content_type", self.headers.get("Content-Type", "application/json")
-        )
-        self.match_info = data.get("match_info", {})
-
-    async def json(self) -> Any:
-        if isinstance(self._body, dict):
-            return self._body
-        if isinstance(self._body, str):
-            return json.loads(self._body)
-        return {}
-
-    async def post(self) -> Dict[str, Any]:
-        if isinstance(self._body, dict):
-            return self._body
-        return {}
-
-    async def text(self) -> str:
-        if self._text:
-            return self._text
-        if isinstance(self._body, str):
-            return self._body
-        if isinstance(self._body, dict):
-            return json.dumps(self._body)
-        return ""
-
-    async def read(self) -> bytes:
-        return (await self.text()).encode("utf-8")
--- a/comfy/isolation/host_hooks.py
+++ b/comfy/isolation/host_hooks.py
@@ -1,30 +0,0 @@
-# pylint: disable=import-outside-toplevel
-# Host process initialization for PyIsolate
-import logging
-
-logger = logging.getLogger(__name__)
-
-
-def initialize_host_process() -> None:
-    root = logging.getLogger()
-    for handler in root.handlers[:]:
-        root.removeHandler(handler)
-    root.addHandler(logging.NullHandler())
-
-    from .proxies.folder_paths_proxy import FolderPathsProxy
-    from .proxies.helper_proxies import HelperProxiesService
-    from .proxies.model_management_proxy import ModelManagementProxy
-    from .proxies.progress_proxy import ProgressProxy
-    from .proxies.prompt_server_impl import PromptServerService
-    from .proxies.utils_proxy import UtilsProxy
-    from .proxies.web_directory_proxy import WebDirectoryProxy
-    from .vae_proxy import VAERegistry
-
-    FolderPathsProxy()
-    HelperProxiesService()
-    ModelManagementProxy()
-    ProgressProxy()
-    PromptServerService()
-    UtilsProxy()
-    WebDirectoryProxy()
-    VAERegistry()
--- a/comfy/isolation/host_policy.py
+++ b/comfy/isolation/host_policy.py
@@ -1,178 +0,0 @@
-# pylint: disable=logging-fstring-interpolation
-from __future__ import annotations
-
-import logging
-import os
-from pathlib import Path
-from pathlib import PurePosixPath
-from typing import Dict, List, TypedDict
-
-try:
-    import tomllib
-except ImportError:
-    import tomli as tomllib  # type: ignore[no-redef]
-
-logger = logging.getLogger(__name__)
-
-HOST_POLICY_PATH_ENV = "COMFY_HOST_POLICY_PATH"
-VALID_SANDBOX_MODES = frozenset({"required", "disabled"})
-FORBIDDEN_WRITABLE_PATHS = frozenset({"/tmp"})
-
-
-class HostSecurityPolicy(TypedDict):
-    sandbox_mode: str
-    allow_network: bool
-    writable_paths: List[str]
-    readonly_paths: List[str]
-    sealed_worker_ro_import_paths: List[str]
-    whitelist: Dict[str, str]
-
-
-DEFAULT_POLICY: HostSecurityPolicy = {
-    "sandbox_mode": "required",
-    "allow_network": False,
-    "writable_paths": ["/dev/shm"],
-    "readonly_paths": [],
-    "sealed_worker_ro_import_paths": [],
-    "whitelist": {},
-}
-
-
-def _default_policy() -> HostSecurityPolicy:
-    return {
-        "sandbox_mode": DEFAULT_POLICY["sandbox_mode"],
-        "allow_network": DEFAULT_POLICY["allow_network"],
-        "writable_paths": list(DEFAULT_POLICY["writable_paths"]),
-        "readonly_paths": list(DEFAULT_POLICY["readonly_paths"]),
-        "sealed_worker_ro_import_paths": list(DEFAULT_POLICY["sealed_worker_ro_import_paths"]),
-        "whitelist": dict(DEFAULT_POLICY["whitelist"]),
-    }
-
-
-def _normalize_writable_paths(paths: list[object]) -> list[str]:
-    normalized_paths: list[str] = []
-    for raw_path in paths:
-        # Host-policy paths are contract-style POSIX paths; keep representation
-        # stable across Windows/Linux so tests and config behavior stay consistent.
-        normalized_path = str(PurePosixPath(str(raw_path).replace("\\", "/")))
-        if normalized_path in FORBIDDEN_WRITABLE_PATHS:
-            continue
-        normalized_paths.append(normalized_path)
-    return normalized_paths
-
-
-def _load_whitelist_file(file_path: Path, config_path: Path) -> Dict[str, str]:
-    if not file_path.is_absolute():
-        file_path = config_path.parent / file_path
-    if not file_path.exists():
-        logger.warning("whitelist_file %s not found, skipping.", file_path)
-        return {}
-    entries: Dict[str, str] = {}
-    for line in file_path.read_text().splitlines():
-        line = line.strip()
-        if not line or line.startswith("#"):
-            continue
-        entries[line] = "*"
-    logger.debug("Loaded %d whitelist entries from %s", len(entries), file_path)
-    return entries
-
-
-def _normalize_sealed_worker_ro_import_paths(raw_paths: object) -> list[str]:
-    if not isinstance(raw_paths, list):
-        raise ValueError(
-            "tool.comfy.host.sealed_worker_ro_import_paths must be a list of absolute paths."
-        )
-
-    normalized_paths: list[str] = []
-    seen: set[str] = set()
-    for raw_path in raw_paths:
-        if not isinstance(raw_path, str) or not raw_path.strip():
-            raise ValueError(
-                "tool.comfy.host.sealed_worker_ro_import_paths entries must be non-empty strings."
-            )
-        normalized_path = str(PurePosixPath(raw_path.replace("\\", "/")))
-        # Accept both POSIX absolute paths (/home/...) and Windows drive-letter paths (D:/...)
-        is_absolute = normalized_path.startswith("/") or (
-            len(normalized_path) >= 3 and normalized_path[1] == ":" and normalized_path[2] == "/"
-        )
-        if not is_absolute:
-            raise ValueError(
-                "tool.comfy.host.sealed_worker_ro_import_paths entries must be absolute paths."
-            )
-        if normalized_path not in seen:
-            seen.add(normalized_path)
-            normalized_paths.append(normalized_path)
-
-    return normalized_paths
-
-
-def load_host_policy(comfy_root: Path) -> HostSecurityPolicy:
-    config_override = os.environ.get(HOST_POLICY_PATH_ENV)
-    config_path = Path(config_override) if config_override else comfy_root / "pyproject.toml"
-    policy = _default_policy()
-
-    if not config_path.exists():
-        logger.debug("Host policy file missing at %s, using defaults.", config_path)
-        return policy
-
-    try:
-        with config_path.open("rb") as f:
-            data = tomllib.load(f)
-    except Exception:
-        logger.warning(
-            "Failed to parse host policy from %s, using defaults.",
-            config_path,
-            exc_info=True,
-        )
-        return policy
-
-    tool_config = data.get("tool", {}).get("comfy", {}).get("host", {})
-    if not isinstance(tool_config, dict):
-        logger.debug("No [tool.comfy.host] section found, using defaults.")
-        return policy
-
-    sandbox_mode = tool_config.get("sandbox_mode")
-    if isinstance(sandbox_mode, str):
-        normalized_sandbox_mode = sandbox_mode.strip().lower()
-        if normalized_sandbox_mode in VALID_SANDBOX_MODES:
-            policy["sandbox_mode"] = normalized_sandbox_mode
-        else:
-            logger.warning(
-                "Invalid host sandbox_mode %r in %s, using default %r.",
-                sandbox_mode,
-                config_path,
-                DEFAULT_POLICY["sandbox_mode"],
-            )
-
-    if "allow_network" in tool_config:
-        policy["allow_network"] = bool(tool_config["allow_network"])
-
-    if "writable_paths" in tool_config:
-        policy["writable_paths"] = _normalize_writable_paths(tool_config["writable_paths"])
-
-    if "readonly_paths" in tool_config:
-        policy["readonly_paths"] = [str(p) for p in tool_config["readonly_paths"]]
-
-    if "sealed_worker_ro_import_paths" in tool_config:
-        policy["sealed_worker_ro_import_paths"] = _normalize_sealed_worker_ro_import_paths(
-            tool_config["sealed_worker_ro_import_paths"]
-        )
-
-    whitelist_file = tool_config.get("whitelist_file")
-    if isinstance(whitelist_file, str):
-        policy["whitelist"].update(_load_whitelist_file(Path(whitelist_file), config_path))
-
-    whitelist_raw = tool_config.get("whitelist")
-    if isinstance(whitelist_raw, dict):
-        policy["whitelist"].update({str(k): str(v) for k, v in whitelist_raw.items()})
-
-    logger.debug(
-        "Loaded Host Policy: %d whitelisted nodes, Sandbox=%s, Network=%s",
-        len(policy["whitelist"]),
-        policy["sandbox_mode"],
-        policy["allow_network"],
-    )
-    return policy
-
-
-__all__ = ["HostSecurityPolicy", "load_host_policy", "DEFAULT_POLICY"]
--- a/comfy/isolation/manifest_loader.py
+++ b/comfy/isolation/manifest_loader.py
@@ -1,221 +0,0 @@
-# pylint: disable=import-outside-toplevel
-from __future__ import annotations
-
-import hashlib
-import json
-import logging
-import os
-import sys
-import tempfile
-from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple
-
-import folder_paths
-
-try:
-    import tomllib
-except ImportError:
-    import tomli as tomllib  # type: ignore[no-redef]
-
-LOG_PREFIX = "]["
-logger = logging.getLogger(__name__)
-
-CACHE_SUBDIR = "cache"
-CACHE_KEY_FILE = "cache_key"
-CACHE_DATA_FILE = "node_info.json"
-CACHE_KEY_LENGTH = 16
-_NESTED_SCAN_ROOT = "packages"
-_IGNORED_MANIFEST_DIRS = {".git", ".venv", "__pycache__"}
-
-
-def _read_manifest(manifest_path: Path) -> dict[str, Any] | None:
-    try:
-        with manifest_path.open("rb") as f:
-            data = tomllib.load(f)
-        if isinstance(data, dict):
-            return data
-    except Exception:
-        return None
-    return None
-
-
-def _is_isolation_manifest(data: dict[str, Any]) -> bool:
-    return (
-        "tool" in data
-        and "comfy" in data["tool"]
-        and "isolation" in data["tool"]["comfy"]
-    )
-
-
-def _discover_nested_manifests(entry: Path) -> List[Tuple[Path, Path]]:
-    packages_root = entry / _NESTED_SCAN_ROOT
-    if not packages_root.exists() or not packages_root.is_dir():
-        return []
-
-    nested: List[Tuple[Path, Path]] = []
-    for manifest in sorted(packages_root.rglob("pyproject.toml")):
-        node_dir = manifest.parent
-        if any(part in _IGNORED_MANIFEST_DIRS for part in node_dir.parts):
-            continue
-
-        data = _read_manifest(manifest)
-        if not data or not _is_isolation_manifest(data):
-            continue
-
-        isolation = data["tool"]["comfy"]["isolation"]
-        if isolation.get("standalone") is True:
-            nested.append((node_dir, manifest))
-
-    return nested
-
-
-def find_manifest_directories() -> List[Tuple[Path, Path]]:
-    """Find custom node directories containing a valid pyproject.toml with [tool.comfy.isolation]."""
-    manifest_dirs: List[Tuple[Path, Path]] = []
-
-    # Standard custom_nodes paths
-    for base_path in folder_paths.get_folder_paths("custom_nodes"):
-        base = Path(base_path)
-        if not base.exists() or not base.is_dir():
-            continue
-
-        for entry in base.iterdir():
-            if not entry.is_dir():
-                continue
-
-            # Look for pyproject.toml
-            manifest = entry / "pyproject.toml"
-            if not manifest.exists():
-                continue
-
-            data = _read_manifest(manifest)
-            if not data or not _is_isolation_manifest(data):
-                continue
-
-            manifest_dirs.append((entry, manifest))
-            manifest_dirs.extend(_discover_nested_manifests(entry))
-
-    return manifest_dirs
-
-
-def compute_cache_key(node_dir: Path, manifest_path: Path) -> str:
-    """Hash manifest + .py mtimes + Python version + PyIsolate version."""
-    hasher = hashlib.sha256()
-
-    try:
-        # Hashing the manifest content ensures config changes invalidate cache
-        hasher.update(manifest_path.read_bytes())
-    except OSError:
-        hasher.update(b"__manifest_read_error__")
-
-    try:
-        py_files = sorted(node_dir.rglob("*.py"))
-        for py_file in py_files:
-            rel_path = py_file.relative_to(node_dir)
-            if "__pycache__" in str(rel_path) or ".venv" in str(rel_path):
-                continue
-            hasher.update(str(rel_path).encode("utf-8"))
-            try:
-                hasher.update(str(py_file.stat().st_mtime).encode("utf-8"))
-            except OSError:
-                hasher.update(b"__file_stat_error__")
-    except OSError:
-        hasher.update(b"__dir_scan_error__")
-
-    hasher.update(sys.version.encode("utf-8"))
-
-    try:
-        import pyisolate
-
-        hasher.update(pyisolate.__version__.encode("utf-8"))
-    except (ImportError, AttributeError):
-        hasher.update(b"__pyisolate_unknown__")
-
-    return hasher.hexdigest()[:CACHE_KEY_LENGTH]
-
-
-def get_cache_path(node_dir: Path, venv_root: Path) -> Tuple[Path, Path]:
-    """Return (cache_key_file, cache_data_file) in venv_root/{node}/cache/."""
-    cache_dir = venv_root / node_dir.name / CACHE_SUBDIR
-    return (cache_dir / CACHE_KEY_FILE, cache_dir / CACHE_DATA_FILE)
-
-
-def is_cache_valid(node_dir: Path, manifest_path: Path, venv_root: Path) -> bool:
-    """Return True only if stored cache key matches current computed key."""
-    try:
-        cache_key_file, cache_data_file = get_cache_path(node_dir, venv_root)
-        if not cache_key_file.exists() or not cache_data_file.exists():
-            return False
-        current_key = compute_cache_key(node_dir, manifest_path)
-        stored_key = cache_key_file.read_text(encoding="utf-8").strip()
-        return current_key == stored_key
-    except Exception as e:
-        logger.debug(
-            "%s Cache validation error for %s: %s", LOG_PREFIX, node_dir.name, e
-        )
-        return False
-
-
-def load_from_cache(node_dir: Path, venv_root: Path) -> Optional[Dict[str, Any]]:
-    """Load node metadata from cache, return None on any error."""
-    try:
-        _, cache_data_file = get_cache_path(node_dir, venv_root)
-        if not cache_data_file.exists():
-            return None
-        data = json.loads(cache_data_file.read_text(encoding="utf-8"))
-        if not isinstance(data, dict):
-            return None
-        return data
-    except Exception:
-        return None
-
-
-def save_to_cache(
-    node_dir: Path, venv_root: Path, node_data: Dict[str, Any], manifest_path: Path
-) -> None:
-    """Save node metadata and cache key atomically."""
-    try:
-        cache_key_file, cache_data_file = get_cache_path(node_dir, venv_root)
-        cache_dir = cache_key_file.parent
-        cache_dir.mkdir(parents=True, exist_ok=True)
-        cache_key = compute_cache_key(node_dir, manifest_path)
-
-        # Atomic write: data
-        tmp_data_fd, tmp_data_path = tempfile.mkstemp(dir=str(cache_dir), suffix=".tmp")
-        try:
-            with os.fdopen(tmp_data_fd, "w", encoding="utf-8") as f:
-                json.dump(node_data, f, indent=2)
-            os.replace(tmp_data_path, cache_data_file)
-        except Exception:
-            try:
-                os.unlink(tmp_data_path)
-            except OSError:
-                pass
-            raise
-
-        # Atomic write: key
-        tmp_key_fd, tmp_key_path = tempfile.mkstemp(dir=str(cache_dir), suffix=".tmp")
-        try:
-            with os.fdopen(tmp_key_fd, "w", encoding="utf-8") as f:
-                f.write(cache_key)
-            os.replace(tmp_key_path, cache_key_file)
-        except Exception:
-            try:
-                os.unlink(tmp_key_path)
-            except OSError:
-                pass
-            raise
-
-    except Exception as e:
-        logger.warning("%s Cache save failed for %s: %s", LOG_PREFIX, node_dir.name, e)
-
-
-__all__ = [
-    "LOG_PREFIX",
-    "find_manifest_directories",
-    "compute_cache_key",
-    "get_cache_path",
-    "is_cache_valid",
-    "load_from_cache",
-    "save_to_cache",
-]
--- a/comfy/isolation/model_patcher_proxy.py
+++ b/comfy/isolation/model_patcher_proxy.py
@@ -1,888 +0,0 @@
-# pylint: disable=bare-except,consider-using-from-import,import-outside-toplevel,protected-access
-# RPC proxy for ModelPatcher (parent process)
-from __future__ import annotations
-
-import logging
-from typing import Any, Optional, List, Set, Dict, Callable
-
-from comfy.isolation.proxies.base import (
-    IS_CHILD_PROCESS,
-    BaseProxy,
-)
-from comfy.isolation.model_patcher_proxy_registry import (
-    ModelPatcherRegistry,
-    AutoPatcherEjector,
-)
-
-logger = logging.getLogger(__name__)
-
-
-class ModelPatcherProxy(BaseProxy[ModelPatcherRegistry]):
-    _registry_class = ModelPatcherRegistry
-    __module__ = "comfy.model_patcher"
-    _APPLY_MODEL_GUARD_PADDING_BYTES = 32 * 1024 * 1024
-
-    def _spawn_related_proxy(self, instance_id: str) -> "ModelPatcherProxy":
-        proxy = ModelPatcherProxy(
-            instance_id,
-            self._registry,
-            manage_lifecycle=not IS_CHILD_PROCESS,
-        )
-        if getattr(self, "_rpc_caller", None) is not None:
-            proxy._rpc_caller = self._rpc_caller
-        return proxy
-
-    def _get_rpc(self) -> Any:
-        if self._rpc_caller is None:
-            from pyisolate._internal.rpc_protocol import get_child_rpc_instance
-
-            rpc = get_child_rpc_instance()
-            if rpc is not None:
-                self._rpc_caller = rpc.create_caller(
-                    self._registry_class, self._registry_class.get_remote_id()
-                )
-            else:
-                self._rpc_caller = self._registry
-        return self._rpc_caller
-
-    def get_all_callbacks(self, call_type: str = None) -> Any:
-        return self._call_rpc("get_all_callbacks", call_type)
-
-    def get_all_wrappers(self, wrapper_type: str = None) -> Any:
-        return self._call_rpc("get_all_wrappers", wrapper_type)
-
-    def _load_list(self, *args, **kwargs) -> Any:
-        return self._call_rpc("load_list_internal", *args, **kwargs)
-
-    def prepare_hook_patches_current_keyframe(
-        self, t: Any, hook_group: Any, model_options: Any
-    ) -> None:
-        self._call_rpc(
-            "prepare_hook_patches_current_keyframe", t, hook_group, model_options
-        )
-
-    def add_hook_patches(
-        self,
-        hook: Any,
-        patches: Any,
-        strength_patch: float = 1.0,
-        strength_model: float = 1.0,
-    ) -> None:
-        self._call_rpc(
-            "add_hook_patches", hook, patches, strength_patch, strength_model
-        )
-
-    def clear_cached_hook_weights(self) -> None:
-        self._call_rpc("clear_cached_hook_weights")
-
-    def get_combined_hook_patches(self, hooks: Any) -> Any:
-        return self._call_rpc("get_combined_hook_patches", hooks)
-
-    def get_additional_models_with_key(self, key: str) -> Any:
-        return self._call_rpc("get_additional_models_with_key", key)
-
-    @property
-    def object_patches(self) -> Any:
-        return self._call_rpc("get_object_patches")
-
-    @property
-    def patches(self) -> Any:
-        res = self._call_rpc("get_patches")
-        if isinstance(res, dict):
-            new_res = {}
-            for k, v in res.items():
-                new_list = []
-                for item in v:
-                    if isinstance(item, list):
-                        new_list.append(tuple(item))
-                    else:
-                        new_list.append(item)
-                new_res[k] = new_list
-            return new_res
-        return res
-
-    @property
-    def pinned(self) -> Set:
-        val = self._call_rpc("get_patcher_attr", "pinned")
-        return set(val) if val is not None else set()
-
-    @property
-    def hook_patches(self) -> Dict:
-        val = self._call_rpc("get_patcher_attr", "hook_patches")
-        if val is None:
-            return {}
-        try:
-            from comfy.hooks import _HookRef
-            import json
-
-            new_val = {}
-            for k, v in val.items():
-                if isinstance(k, str):
-                    if k.startswith("PYISOLATE_HOOKREF:"):
-                        ref_id = k.split(":", 1)[1]
-                        h = _HookRef()
-                        h._pyisolate_id = ref_id
-                        new_val[h] = v
-                    elif k.startswith("__pyisolate_key__"):
-                        try:
-                            json_str = k[len("__pyisolate_key__") :]
-                            data = json.loads(json_str)
-                            ref_id = None
-                            if isinstance(data, list):
-                                for item in data:
-                                    if (
-                                        isinstance(item, list)
-                                        and len(item) == 2
-                                        and item[0] == "id"
-                                    ):
-                                        ref_id = item[1]
-                                        break
-                            if ref_id:
-                                h = _HookRef()
-                                h._pyisolate_id = ref_id
-                                new_val[h] = v
-                            else:
-                                new_val[k] = v
-                        except Exception:
-                            new_val[k] = v
-                    else:
-                        new_val[k] = v
-                else:
-                    new_val[k] = v
-            return new_val
-        except ImportError:
-            return val
-
-    def set_hook_mode(self, hook_mode: Any) -> None:
-        self._call_rpc("set_hook_mode", hook_mode)
-
-    def register_all_hook_patches(
-        self,
-        hooks: Any,
-        target_dict: Any,
-        model_options: Any = None,
-        registered: Any = None,
-    ) -> None:
-        self._call_rpc(
-            "register_all_hook_patches", hooks, target_dict, model_options, registered
-        )
-
-    def is_clone(self, other: Any) -> bool:
-        if isinstance(other, ModelPatcherProxy):
-            return self._call_rpc("is_clone_by_id", other._instance_id)
-        return False
-
-    def clone(self) -> ModelPatcherProxy:
-        new_id = self._call_rpc("clone")
-        return self._spawn_related_proxy(new_id)
-
-    def clone_has_same_weights(self, clone: Any) -> bool:
-        if isinstance(clone, ModelPatcherProxy):
-            return self._call_rpc("clone_has_same_weights_by_id", clone._instance_id)
-        if not IS_CHILD_PROCESS:
-            return self._call_rpc("is_clone", clone)
-        return False
-
-    def get_model_object(self, name: str) -> Any:
-        return self._call_rpc("get_model_object", name)
-
-    @property
-    def model_options(self) -> dict:
-        data = self._call_rpc("get_model_options")
-        import json
-
-        def _decode_keys(obj):
-            if isinstance(obj, dict):
-                new_d = {}
-                for k, v in obj.items():
-                    if isinstance(k, str) and k.startswith("__pyisolate_key__"):
-                        try:
-                            json_str = k[17:]
-                            val = json.loads(json_str)
-                            if isinstance(val, list):
-                                val = tuple(val)
-                            new_d[val] = _decode_keys(v)
-                        except:
-                            new_d[k] = _decode_keys(v)
-                    else:
-                        new_d[k] = _decode_keys(v)
-                return new_d
-            if isinstance(obj, list):
-                return [_decode_keys(x) for x in obj]
-            return obj
-
-        return _decode_keys(data)
-
-    @model_options.setter
-    def model_options(self, value: dict) -> None:
-        self._call_rpc("set_model_options", value)
-
-    def apply_hooks(self, hooks: Any) -> Any:
-        return self._call_rpc("apply_hooks", hooks)
-
-    def prepare_state(self, timestep: Any) -> Any:
-        return self._call_rpc("prepare_state", timestep)
-
-    def restore_hook_patches(self) -> None:
-        self._call_rpc("restore_hook_patches")
-
-    def unpatch_hooks(self, whitelist_keys_set: Optional[Set[str]] = None) -> None:
-        self._call_rpc("unpatch_hooks", whitelist_keys_set)
-
-    def model_patches_to(self, device: Any) -> Any:
-        return self._call_rpc("model_patches_to", device)
-
-    def partially_load(
-        self, device: Any, extra_memory: Any, force_patch_weights: bool = False
-    ) -> Any:
-        return self._call_rpc(
-            "partially_load", device, extra_memory, force_patch_weights
-        )
-
-    def partially_unload(
-        self, device_to: Any, memory_to_free: int = 0, force_patch_weights: bool = False
-    ) -> int:
-        return self._call_rpc(
-            "partially_unload", device_to, memory_to_free, force_patch_weights
-        )
-
-    def load(
-        self,
-        device_to: Any = None,
-        lowvram_model_memory: int = 0,
-        force_patch_weights: bool = False,
-        full_load: bool = False,
-    ) -> None:
-        self._call_rpc(
-            "load", device_to, lowvram_model_memory, force_patch_weights, full_load
-        )
-
-    def patch_model(
-        self,
-        device_to: Any = None,
-        lowvram_model_memory: int = 0,
-        load_weights: bool = True,
-        force_patch_weights: bool = False,
-    ) -> Any:
-        self._call_rpc(
-            "patch_model",
-            device_to,
-            lowvram_model_memory,
-            load_weights,
-            force_patch_weights,
-        )
-        return self
-
-    def unpatch_model(
-        self, device_to: Any = None, unpatch_weights: bool = True
-    ) -> None:
-        self._call_rpc("unpatch_model", device_to, unpatch_weights)
-
-    def detach(self, unpatch_all: bool = True) -> Any:
-        self._call_rpc("detach", unpatch_all)
-        return self.model
-
-    def _cpu_tensor_bytes(self, obj: Any) -> int:
-        import torch
-
-        if isinstance(obj, torch.Tensor):
-            if obj.device.type == "cpu":
-                return obj.nbytes
-            return 0
-        if isinstance(obj, dict):
-            return sum(self._cpu_tensor_bytes(v) for v in obj.values())
-        if isinstance(obj, (list, tuple)):
-            return sum(self._cpu_tensor_bytes(v) for v in obj)
-        return 0
-
-    def _ensure_apply_model_headroom(self, required_bytes: int) -> bool:
-        if required_bytes <= 0:
-            return True
-
-        import torch
-        import comfy.model_management as model_management
-
-        target_raw = self.load_device
-        try:
-            if isinstance(target_raw, torch.device):
-                target = target_raw
-            elif isinstance(target_raw, str):
-                target = torch.device(target_raw)
-            elif isinstance(target_raw, int):
-                target = torch.device(f"cuda:{target_raw}")
-            else:
-                target = torch.device(target_raw)
-        except Exception:
-            return True
-
-        if target.type != "cuda":
-            return True
-
-        required = required_bytes + self._APPLY_MODEL_GUARD_PADDING_BYTES
-        if model_management.get_free_memory(target) >= required:
-            return True
-
-        model_management.cleanup_models_gc()
-        model_management.cleanup_models()
-        model_management.soft_empty_cache()
-
-        if model_management.get_free_memory(target) < required:
-            model_management.free_memory(required, target, for_dynamic=True)
-            model_management.soft_empty_cache()
-
-        if model_management.get_free_memory(target) < required:
-            # Escalate to non-dynamic unloading before dispatching CUDA transfer.
-            model_management.free_memory(required, target, for_dynamic=False)
-            model_management.soft_empty_cache()
-
-        if model_management.get_free_memory(target) < required:
-            model_management.load_models_gpu(
-                [self],
-                minimum_memory_required=required,
-            )
-
-        return model_management.get_free_memory(target) >= required
-
-    def apply_model(self, *args, **kwargs) -> Any:
-        import torch
-
-        def _preferred_device() -> Any:
-            for value in args:
-                if isinstance(value, torch.Tensor):
-                    return value.device
-            for value in kwargs.values():
-                if isinstance(value, torch.Tensor):
-                    return value.device
-            return None
-
-        def _move_result_to_device(obj: Any, device: Any) -> Any:
-            if device is None:
-                return obj
-            if isinstance(obj, torch.Tensor):
-                return obj.to(device) if obj.device != device else obj
-            if isinstance(obj, dict):
-                return {k: _move_result_to_device(v, device) for k, v in obj.items()}
-            if isinstance(obj, list):
-                return [_move_result_to_device(v, device) for v in obj]
-            if isinstance(obj, tuple):
-                return tuple(_move_result_to_device(v, device) for v in obj)
-            return obj
-
-        # DynamicVRAM models must keep load/offload decisions in host process.
-        # Child-side CUDA staging here can deadlock before first inference RPC.
-        if self.is_dynamic():
-            out = self._call_rpc("inner_model_apply_model", args, kwargs)
-            return _move_result_to_device(out, _preferred_device())
-
-        required_bytes = self._cpu_tensor_bytes(args) + self._cpu_tensor_bytes(kwargs)
-        self._ensure_apply_model_headroom(required_bytes)
-
-        def _to_cuda(obj: Any) -> Any:
-            if isinstance(obj, torch.Tensor) and obj.device.type == "cpu":
-                return obj.to("cuda")
-            if isinstance(obj, dict):
-                return {k: _to_cuda(v) for k, v in obj.items()}
-            if isinstance(obj, list):
-                return [_to_cuda(v) for v in obj]
-            if isinstance(obj, tuple):
-                return tuple(_to_cuda(v) for v in obj)
-            return obj
-
-        try:
-            args_cuda = _to_cuda(args)
-            kwargs_cuda = _to_cuda(kwargs)
-        except torch.OutOfMemoryError:
-            self._ensure_apply_model_headroom(required_bytes)
-            args_cuda = _to_cuda(args)
-            kwargs_cuda = _to_cuda(kwargs)
-
-        out = self._call_rpc("inner_model_apply_model", args_cuda, kwargs_cuda)
-        return _move_result_to_device(out, _preferred_device())
-
-    def model_state_dict(self, filter_prefix: Optional[str] = None) -> Any:
-        keys = self._call_rpc("model_state_dict", filter_prefix)
-        return dict.fromkeys(keys, None)
-
-    def add_patches(self, *args: Any, **kwargs: Any) -> Any:
-        res = self._call_rpc("add_patches", *args, **kwargs)
-        if isinstance(res, list):
-            return [tuple(x) if isinstance(x, list) else x for x in res]
-        return res
-
-    def get_key_patches(self, filter_prefix: Optional[str] = None) -> Any:
-        return self._call_rpc("get_key_patches", filter_prefix)
-
-    def patch_weight_to_device(self, key, device_to=None, inplace_update=False):
-        self._call_rpc("patch_weight_to_device", key, device_to, inplace_update)
-
-    def pin_weight_to_device(self, key):
-        self._call_rpc("pin_weight_to_device", key)
-
-    def unpin_weight(self, key):
-        self._call_rpc("unpin_weight", key)
-
-    def unpin_all_weights(self):
-        self._call_rpc("unpin_all_weights")
-
-    def calculate_weight(self, patches, weight, key, intermediate_dtype=None):
-        return self._call_rpc(
-            "calculate_weight", patches, weight, key, intermediate_dtype
-        )
-
-    def inject_model(self) -> None:
-        self._call_rpc("inject_model")
-
-    def eject_model(self) -> None:
-        self._call_rpc("eject_model")
-
-    def use_ejected(self, skip_and_inject_on_exit_only: bool = False) -> Any:
-        return AutoPatcherEjector(
-            self, skip_and_inject_on_exit_only=skip_and_inject_on_exit_only
-        )
-
-    @property
-    def is_injected(self) -> bool:
-        return self._call_rpc("get_is_injected")
-
-    @property
-    def skip_injection(self) -> bool:
-        return self._call_rpc("get_skip_injection")
-
-    @skip_injection.setter
-    def skip_injection(self, value: bool) -> None:
-        self._call_rpc("set_skip_injection", value)
-
-    def clean_hooks(self) -> None:
-        self._call_rpc("clean_hooks")
-
-    def pre_run(self) -> None:
-        self._call_rpc("pre_run")
-
-    def cleanup(self) -> None:
-        try:
-            self._call_rpc("cleanup")
-        except Exception:
-            logger.debug(
-                "ModelPatcherProxy cleanup RPC failed for %s",
-                self._instance_id,
-                exc_info=True,
-            )
-        finally:
-            super().cleanup()
-
-    @property
-    def model(self) -> _InnerModelProxy:
-        return _InnerModelProxy(self)
-
-    def __getattr__(self, name: str) -> Any:
-        _whitelisted_attrs = {
-            "hook_patches_backup",
-            "hook_backup",
-            "cached_hook_patches",
-            "current_hooks",
-            "forced_hooks",
-            "is_clip",
-            "patches_uuid",
-            "pinned",
-            "attachments",
-            "additional_models",
-            "injections",
-            "hook_patches",
-            "model_lowvram",
-            "model_loaded_weight_memory",
-            "backup",
-            "object_patches_backup",
-            "weight_wrapper_patches",
-            "weight_inplace_update",
-            "force_cast_weights",
-        }
-        if name in _whitelisted_attrs:
-            return self._call_rpc("get_patcher_attr", name)
-        raise AttributeError(
-            f"'{type(self).__name__}' object has no attribute '{name}'"
-        )
-
-    def load_lora(
-        self,
-        lora_path: str,
-        strength_model: float,
-        clip: Optional[Any] = None,
-        strength_clip: float = 1.0,
-    ) -> tuple:
-        clip_id = None
-        if clip is not None:
-            clip_id = getattr(clip, "_instance_id", getattr(clip, "_clip_id", None))
-        result = self._call_rpc(
-            "load_lora", lora_path, strength_model, clip_id, strength_clip
-        )
-        new_model = None
-        if result.get("model_id"):
-            new_model = self._spawn_related_proxy(result["model_id"])
-        new_clip = None
-        if result.get("clip_id"):
-            from comfy.isolation.clip_proxy import CLIPProxy
-
-            new_clip = CLIPProxy(result["clip_id"])
-        return (new_model, new_clip)
-
-    @property
-    def load_device(self) -> Any:
-        return self._call_rpc("get_load_device")
-
-    @property
-    def offload_device(self) -> Any:
-        return self._call_rpc("get_offload_device")
-
-    @property
-    def device(self) -> Any:
-        return self.load_device
-
-    def current_loaded_device(self) -> Any:
-        return self._call_rpc("current_loaded_device")
-
-    @property
-    def size(self) -> int:
-        return self._call_rpc("get_size")
-
-    def model_size(self) -> Any:
-        return self._call_rpc("model_size")
-
-    def loaded_size(self) -> Any:
-        return self._call_rpc("loaded_size")
-
-    def get_ram_usage(self) -> int:
-        return self._call_rpc("get_ram_usage")
-
-    def lowvram_patch_counter(self) -> int:
-        return self._call_rpc("lowvram_patch_counter")
-
-    def memory_required(self, input_shape: Any) -> Any:
-        return self._call_rpc("memory_required", input_shape)
-
-    def get_operation_state(self) -> Dict[str, Any]:
-        state = self._call_rpc("get_operation_state")
-        return state if isinstance(state, dict) else {}
-
-    def wait_for_idle(self, timeout_ms: int = 0) -> bool:
-        return bool(self._call_rpc("wait_for_idle", timeout_ms))
-
-    def is_dynamic(self) -> bool:
-        return bool(self._call_rpc("is_dynamic"))
-
-    def get_free_memory(self, device: Any) -> Any:
-        return self._call_rpc("get_free_memory", device)
-
-    def partially_unload_ram(self, ram_to_unload: int) -> Any:
-        return self._call_rpc("partially_unload_ram", ram_to_unload)
-
-    def model_dtype(self) -> Any:
-        res = self._call_rpc("model_dtype")
-        if isinstance(res, str) and res.startswith("torch."):
-            try:
-                import torch
-
-                attr = res.split(".")[-1]
-                if hasattr(torch, attr):
-                    return getattr(torch, attr)
-            except ImportError:
-                pass
-        return res
-
-    @property
-    def hook_mode(self) -> Any:
-        return self._call_rpc("get_hook_mode")
-
-    @hook_mode.setter
-    def hook_mode(self, value: Any) -> None:
-        self._call_rpc("set_hook_mode", value)
-
-    def set_model_sampler_cfg_function(
-        self, sampler_cfg_function: Any, disable_cfg1_optimization: bool = False
-    ) -> None:
-        self._call_rpc(
-            "set_model_sampler_cfg_function",
-            sampler_cfg_function,
-            disable_cfg1_optimization,
-        )
-
-    def set_model_sampler_post_cfg_function(
-        self, post_cfg_function: Any, disable_cfg1_optimization: bool = False
-    ) -> None:
-        self._call_rpc(
-            "set_model_sampler_post_cfg_function",
-            post_cfg_function,
-            disable_cfg1_optimization,
-        )
-
-    def set_model_sampler_pre_cfg_function(
-        self, pre_cfg_function: Any, disable_cfg1_optimization: bool = False
-    ) -> None:
-        self._call_rpc(
-            "set_model_sampler_pre_cfg_function",
-            pre_cfg_function,
-            disable_cfg1_optimization,
-        )
-
-    def set_model_sampler_calc_cond_batch_function(self, fn: Any) -> None:
-        self._call_rpc("set_model_sampler_calc_cond_batch_function", fn)
-
-    def set_model_unet_function_wrapper(self, unet_wrapper_function: Any) -> None:
-        self._call_rpc("set_model_unet_function_wrapper", unet_wrapper_function)
-
-    def set_model_denoise_mask_function(self, denoise_mask_function: Any) -> None:
-        self._call_rpc("set_model_denoise_mask_function", denoise_mask_function)
-
-    def set_model_patch(self, patch: Any, name: str) -> None:
-        self._call_rpc("set_model_patch", patch, name)
-
-    def set_model_patch_replace(
-        self,
-        patch: Any,
-        name: str,
-        block_name: str,
-        number: int,
-        transformer_index: Optional[int] = None,
-    ) -> None:
-        self._call_rpc(
-            "set_model_patch_replace",
-            patch,
-            name,
-            block_name,
-            number,
-            transformer_index,
-        )
-
-    def set_model_attn1_patch(self, patch: Any) -> None:
-        self.set_model_patch(patch, "attn1_patch")
-
-    def set_model_attn2_patch(self, patch: Any) -> None:
-        self.set_model_patch(patch, "attn2_patch")
-
-    def set_model_attn1_replace(
-        self,
-        patch: Any,
-        block_name: str,
-        number: int,
-        transformer_index: Optional[int] = None,
-    ) -> None:
-        self.set_model_patch_replace(
-            patch, "attn1", block_name, number, transformer_index
-        )
-
-    def set_model_attn2_replace(
-        self,
-        patch: Any,
-        block_name: str,
-        number: int,
-        transformer_index: Optional[int] = None,
-    ) -> None:
-        self.set_model_patch_replace(
-            patch, "attn2", block_name, number, transformer_index
-        )
-
-    def set_model_attn1_output_patch(self, patch: Any) -> None:
-        self.set_model_patch(patch, "attn1_output_patch")
-
-    def set_model_attn2_output_patch(self, patch: Any) -> None:
-        self.set_model_patch(patch, "attn2_output_patch")
-
-    def set_model_input_block_patch(self, patch: Any) -> None:
-        self.set_model_patch(patch, "input_block_patch")
-
-    def set_model_input_block_patch_after_skip(self, patch: Any) -> None:
-        self.set_model_patch(patch, "input_block_patch_after_skip")
-
-    def set_model_output_block_patch(self, patch: Any) -> None:
-        self.set_model_patch(patch, "output_block_patch")
-
-    def set_model_emb_patch(self, patch: Any) -> None:
-        self.set_model_patch(patch, "emb_patch")
-
-    def set_model_forward_timestep_embed_patch(self, patch: Any) -> None:
-        self.set_model_patch(patch, "forward_timestep_embed_patch")
-
-    def set_model_double_block_patch(self, patch: Any) -> None:
-        self.set_model_patch(patch, "double_block")
-
-    def set_model_post_input_patch(self, patch: Any) -> None:
-        self.set_model_patch(patch, "post_input")
-
-    def set_model_rope_options(
-        self,
-        scale_x=1.0,
-        shift_x=0.0,
-        scale_y=1.0,
-        shift_y=0.0,
-        scale_t=1.0,
-        shift_t=0.0,
-        **kwargs: Any,
-    ) -> None:
-        options = {
-            "scale_x": scale_x,
-            "shift_x": shift_x,
-            "scale_y": scale_y,
-            "shift_y": shift_y,
-            "scale_t": scale_t,
-            "shift_t": shift_t,
-        }
-        options.update(kwargs)
-        self._call_rpc("set_model_rope_options", options)
-
-    def set_model_compute_dtype(self, dtype: Any) -> None:
-        self._call_rpc("set_model_compute_dtype", dtype)
-
-    def add_object_patch(self, name: str, obj: Any) -> None:
-        self._call_rpc("add_object_patch", name, obj)
-
-    def add_weight_wrapper(self, name: str, function: Any) -> None:
-        self._call_rpc("add_weight_wrapper", name, function)
-
-    def add_wrapper_with_key(self, wrapper_type: Any, key: str, fn: Any) -> None:
-        self._call_rpc("add_wrapper_with_key", wrapper_type, key, fn)
-
-    def add_wrapper(self, wrapper_type: str, wrapper: Callable) -> None:
-        self.add_wrapper_with_key(wrapper_type, None, wrapper)
-
-    def remove_wrappers_with_key(self, wrapper_type: str, key: str) -> None:
-        self._call_rpc("remove_wrappers_with_key", wrapper_type, key)
-
-    @property
-    def wrappers(self) -> Any:
-        return self._call_rpc("get_wrappers")
-
-    def add_callback_with_key(self, call_type: str, key: str, callback: Any) -> None:
-        self._call_rpc("add_callback_with_key", call_type, key, callback)
-
-    def add_callback(self, call_type: str, callback: Any) -> None:
-        self.add_callback_with_key(call_type, None, callback)
-
-    def remove_callbacks_with_key(self, call_type: str, key: str) -> None:
-        self._call_rpc("remove_callbacks_with_key", call_type, key)
-
-    @property
-    def callbacks(self) -> Any:
-        return self._call_rpc("get_callbacks")
-
-    def set_attachments(self, key: str, attachment: Any) -> None:
-        self._call_rpc("set_attachments", key, attachment)
-
-    def get_attachment(self, key: str) -> Any:
-        return self._call_rpc("get_attachment", key)
-
-    def remove_attachments(self, key: str) -> None:
-        self._call_rpc("remove_attachments", key)
-
-    def set_injections(self, key: str, injections: Any) -> None:
-        self._call_rpc("set_injections", key, injections)
-
-    def get_injections(self, key: str) -> Any:
-        return self._call_rpc("get_injections", key)
-
-    def remove_injections(self, key: str) -> None:
-        self._call_rpc("remove_injections", key)
-
-    def set_additional_models(self, key: str, models: Any) -> None:
-        ids = [m._instance_id for m in models]
-        self._call_rpc("set_additional_models", key, ids)
-
-    def remove_additional_models(self, key: str) -> None:
-        self._call_rpc("remove_additional_models", key)
-
-    def get_nested_additional_models(self) -> Any:
-        return self._call_rpc("get_nested_additional_models")
-
-    def get_additional_models(self) -> List[ModelPatcherProxy]:
-        ids = self._call_rpc("get_additional_models")
-        return [self._spawn_related_proxy(mid) for mid in ids]
-
-    def model_patches_models(self) -> Any:
-        return self._call_rpc("model_patches_models")
-
-    @property
-    def parent(self) -> Any:
-        return self._call_rpc("get_parent")
-
-    def model_mmap_residency(self, free: bool = False) -> tuple:
-        result = self._call_rpc("model_mmap_residency", free)
-        if isinstance(result, list):
-            return tuple(result)
-        return result
-
-    def pinned_memory_size(self) -> int:
-        return self._call_rpc("pinned_memory_size")
-
-    def get_non_dynamic_delegate(self) -> ModelPatcherProxy:
-        new_id = self._call_rpc("get_non_dynamic_delegate")
-        return self._spawn_related_proxy(new_id)
-
-    def disable_model_cfg1_optimization(self) -> None:
-        self._call_rpc("disable_model_cfg1_optimization")
-
-    def set_model_noise_refiner_patch(self, patch: Any) -> None:
-        self.set_model_patch(patch, "noise_refiner")
-
-
-class _InnerModelProxy:
-    def __init__(self, parent: ModelPatcherProxy):
-        self._parent = parent
-        self._model_sampling = None
-
-    def __getattr__(self, name: str) -> Any:
-        if name.startswith("_"):
-            raise AttributeError(name)
-        if name == "model_config":
-            from types import SimpleNamespace
-
-            data = self._parent._call_rpc("get_inner_model_attr", name)
-            if isinstance(data, dict):
-                return SimpleNamespace(**data)
-            return data
-        if name in (
-            "latent_format",
-            "model_type",
-            "current_weight_patches_uuid",
-        ):
-            return self._parent._call_rpc("get_inner_model_attr", name)
-        if name == "load_device":
-            return self._parent._call_rpc("get_inner_model_attr", "load_device")
-        if name == "device":
-            return self._parent._call_rpc("get_inner_model_attr", "device")
-        if name == "current_patcher":
-            proxy = ModelPatcherProxy(
-                self._parent._instance_id,
-                self._parent._registry,
-                manage_lifecycle=False,
-            )
-            if getattr(self._parent, "_rpc_caller", None) is not None:
-                proxy._rpc_caller = self._parent._rpc_caller
-            return proxy
-        if name == "model_sampling":
-            if self._model_sampling is None:
-                self._model_sampling = self._parent._call_rpc(
-                    "get_model_object", "model_sampling"
-                )
-            return self._model_sampling
-        if name == "extra_conds_shapes":
-            return lambda *a, **k: self._parent._call_rpc(
-                "inner_model_extra_conds_shapes", a, k
-            )
-        if name == "extra_conds":
-            return lambda *a, **k: self._parent._call_rpc(
-                "inner_model_extra_conds", a, k
-            )
-        if name == "memory_required":
-            return lambda *a, **k: self._parent._call_rpc(
-                "inner_model_memory_required", a, k
-            )
-        if name == "apply_model":
-            # Delegate to parent's method to get the CPU->CUDA optimization
-            return self._parent.apply_model
-        if name == "process_latent_in":
-            return lambda *a, **k: self._parent._call_rpc("process_latent_in", a, k)
-        if name == "process_latent_out":
-            return lambda *a, **k: self._parent._call_rpc("process_latent_out", a, k)
-        if name == "scale_latent_inpaint":
-            return lambda *a, **k: self._parent._call_rpc("scale_latent_inpaint", a, k)
-        if name == "diffusion_model":
-            return self._parent._call_rpc("get_inner_model_attr", "diffusion_model")
-        raise AttributeError(f"'{name}' not supported on isolated InnerModel")
--- a/comfy/isolation/model_patcher_proxy_registry.py
+++ b/comfy/isolation/model_patcher_proxy_registry.py
--- a/comfy/isolation/model_patcher_proxy_utils.py
+++ b/comfy/isolation/model_patcher_proxy_utils.py
@@ -1,156 +0,0 @@
-# pylint: disable=import-outside-toplevel,logging-fstring-interpolation,protected-access
-# Isolation utilities and serializers for ModelPatcherProxy
-from __future__ import annotations
-
-import logging
-import os
-from typing import Any
-
-from comfy.cli_args import args
-
-logger = logging.getLogger(__name__)
-
-
-def maybe_wrap_model_for_isolation(model_patcher: Any) -> Any:
-    from comfy.isolation.model_patcher_proxy_registry import ModelPatcherRegistry
-    from comfy.isolation.model_patcher_proxy import ModelPatcherProxy
-
-    is_child = os.environ.get("PYISOLATE_CHILD") == "1"
-    isolation_active = args.use_process_isolation or is_child
-
-    if not isolation_active:
-        return model_patcher
-    if is_child:
-        return model_patcher
-    if isinstance(model_patcher, ModelPatcherProxy):
-        return model_patcher
-
-    registry = ModelPatcherRegistry()
-    model_id = registry.register(model_patcher)
-    logger.debug(f"Isolated ModelPatcher: {model_id}")
-    return ModelPatcherProxy(model_id, registry, manage_lifecycle=True)
-
-
-def register_hooks_serializers(registry=None):
-    from pyisolate._internal.serialization_registry import SerializerRegistry
-    import comfy.hooks
-
-    if registry is None:
-        registry = SerializerRegistry.get_instance()
-
-    def serialize_enum(obj):
-        return {"__enum__": f"{type(obj).__name__}.{obj.name}"}
-
-    def deserialize_enum(data):
-        cls_name, val_name = data["__enum__"].split(".")
-        cls = getattr(comfy.hooks, cls_name)
-        return cls[val_name]
-
-    registry.register("EnumHookType", serialize_enum, deserialize_enum)
-    registry.register("EnumHookScope", serialize_enum, deserialize_enum)
-    registry.register("EnumHookMode", serialize_enum, deserialize_enum)
-    registry.register("EnumWeightTarget", serialize_enum, deserialize_enum)
-
-    def serialize_hook_group(obj):
-        return {"__type__": "HookGroup", "hooks": obj.hooks}
-
-    def deserialize_hook_group(data):
-        hg = comfy.hooks.HookGroup()
-        for h in data["hooks"]:
-            hg.add(h)
-        return hg
-
-    registry.register("HookGroup", serialize_hook_group, deserialize_hook_group)
-
-    def serialize_dict_state(obj):
-        d = obj.__dict__.copy()
-        d["__type__"] = type(obj).__name__
-        if "custom_should_register" in d:
-            del d["custom_should_register"]
-        return d
-
-    def deserialize_dict_state_generic(cls):
-        def _deserialize(data):
-            h = cls()
-            h.__dict__.update(data)
-            return h
-
-        return _deserialize
-
-    def deserialize_hook_keyframe(data):
-        h = comfy.hooks.HookKeyframe(strength=data.get("strength", 1.0))
-        h.__dict__.update(data)
-        return h
-
-    registry.register("HookKeyframe", serialize_dict_state, deserialize_hook_keyframe)
-
-    def deserialize_hook_keyframe_group(data):
-        h = comfy.hooks.HookKeyframeGroup()
-        h.__dict__.update(data)
-        return h
-
-    registry.register(
-        "HookKeyframeGroup", serialize_dict_state, deserialize_hook_keyframe_group
-    )
-
-    def deserialize_hook(data):
-        h = comfy.hooks.Hook()
-        h.__dict__.update(data)
-        return h
-
-    registry.register("Hook", serialize_dict_state, deserialize_hook)
-
-    def deserialize_weight_hook(data):
-        h = comfy.hooks.WeightHook()
-        h.__dict__.update(data)
-        return h
-
-    registry.register("WeightHook", serialize_dict_state, deserialize_weight_hook)
-
-    def serialize_set(obj):
-        return {"__set__": list(obj)}
-
-    def deserialize_set(data):
-        return set(data["__set__"])
-
-    registry.register("set", serialize_set, deserialize_set)
-
-    try:
-        from comfy.weight_adapter.lora import LoRAAdapter
-
-        def serialize_lora(obj):
-            return {"weights": {}, "loaded_keys": list(obj.loaded_keys)}
-
-        def deserialize_lora(data):
-            return LoRAAdapter(set(data["loaded_keys"]), data["weights"])
-
-        registry.register("LoRAAdapter", serialize_lora, deserialize_lora)
-    except Exception:
-        pass
-
-    try:
-        from comfy.hooks import _HookRef
-        import uuid
-
-        def serialize_hook_ref(obj):
-            return {
-                "__hook_ref__": True,
-                "id": getattr(obj, "_pyisolate_id", str(uuid.uuid4())),
-            }
-
-        def deserialize_hook_ref(data):
-            h = _HookRef()
-            h._pyisolate_id = data.get("id", str(uuid.uuid4()))
-            return h
-
-        registry.register("_HookRef", serialize_hook_ref, deserialize_hook_ref)
-    except ImportError:
-        pass
-    except Exception as e:
-        logger.warning(f"Failed to register _HookRef: {e}")
-
-
-try:
-    register_hooks_serializers()
-except Exception as e:
-    logger.error(f"Failed to initialize hook serializers: {e}")
--- a/comfy/isolation/model_sampling_proxy.py
+++ b/comfy/isolation/model_sampling_proxy.py
@@ -1,360 +0,0 @@
-# pylint: disable=import-outside-toplevel
-from __future__ import annotations
-
-import asyncio
-import logging
-import os
-import threading
-import time
-from typing import Any
-
-from comfy.isolation.proxies.base import (
-    BaseProxy,
-    BaseRegistry,
-    detach_if_grad,
-    get_thread_loop,
-    run_coro_in_new_loop,
-)
-
-logger = logging.getLogger(__name__)
-
-
-def _describe_value(obj: Any) -> str:
-    try:
-        import torch
-    except Exception:
-        torch = None
-    try:
-        if torch is not None and isinstance(obj, torch.Tensor):
-            return (
-                "Tensor(shape=%s,dtype=%s,device=%s,id=%s)"
-                % (tuple(obj.shape), obj.dtype, obj.device, id(obj))
-            )
-    except Exception:
-        pass
-    return "%s(id=%s)" % (type(obj).__name__, id(obj))
-
-
-def _prefer_device(*tensors: Any) -> Any:
-    try:
-        import torch
-    except Exception:
-        return None
-    for t in tensors:
-        if isinstance(t, torch.Tensor) and t.is_cuda:
-            return t.device
-    for t in tensors:
-        if isinstance(t, torch.Tensor):
-            return t.device
-    return None
-
-
-def _to_device(obj: Any, device: Any) -> Any:
-    try:
-        import torch
-    except Exception:
-        return obj
-    if device is None:
-        return obj
-    if isinstance(obj, torch.Tensor):
-        if obj.device != device:
-            return obj.to(device)
-        return obj
-    if isinstance(obj, (list, tuple)):
-        converted = [_to_device(x, device) for x in obj]
-        return type(obj)(converted) if isinstance(obj, tuple) else converted
-    if isinstance(obj, dict):
-        return {k: _to_device(v, device) for k, v in obj.items()}
-    return obj
-
-
-def _to_cpu_for_rpc(obj: Any) -> Any:
-    try:
-        import torch
-    except Exception:
-        return obj
-    if isinstance(obj, torch.Tensor):
-        t = obj.detach() if obj.requires_grad else obj
-        if t.is_cuda:
-            return t.to("cpu")
-        return t
-    if isinstance(obj, (list, tuple)):
-        converted = [_to_cpu_for_rpc(x) for x in obj]
-        return type(obj)(converted) if isinstance(obj, tuple) else converted
-    if isinstance(obj, dict):
-        return {k: _to_cpu_for_rpc(v) for k, v in obj.items()}
-    return obj
-
-
-class ModelSamplingRegistry(BaseRegistry[Any]):
-    _type_prefix = "modelsampling"
-
-    async def calculate_input(self, instance_id: str, sigma: Any, noise: Any) -> Any:
-        sampling = self._get_instance(instance_id)
-        return detach_if_grad(sampling.calculate_input(sigma, noise))
-
-    async def calculate_denoised(
-        self, instance_id: str, sigma: Any, model_output: Any, model_input: Any
-    ) -> Any:
-        sampling = self._get_instance(instance_id)
-        return detach_if_grad(
-            sampling.calculate_denoised(sigma, model_output, model_input)
-        )
-
-    async def noise_scaling(
-        self,
-        instance_id: str,
-        sigma: Any,
-        noise: Any,
-        latent_image: Any,
-        max_denoise: bool = False,
-    ) -> Any:
-        sampling = self._get_instance(instance_id)
-        return detach_if_grad(
-            sampling.noise_scaling(sigma, noise, latent_image, max_denoise=max_denoise)
-        )
-
-    async def inverse_noise_scaling(
-        self, instance_id: str, sigma: Any, latent: Any
-    ) -> Any:
-        sampling = self._get_instance(instance_id)
-        return detach_if_grad(sampling.inverse_noise_scaling(sigma, latent))
-
-    async def timestep(self, instance_id: str, sigma: Any) -> Any:
-        sampling = self._get_instance(instance_id)
-        return sampling.timestep(sigma)
-
-    async def sigma(self, instance_id: str, timestep: Any) -> Any:
-        sampling = self._get_instance(instance_id)
-        return sampling.sigma(timestep)
-
-    async def percent_to_sigma(self, instance_id: str, percent: float) -> Any:
-        sampling = self._get_instance(instance_id)
-        return sampling.percent_to_sigma(percent)
-
-    async def get_sigma_min(self, instance_id: str) -> Any:
-        sampling = self._get_instance(instance_id)
-        return detach_if_grad(sampling.sigma_min)
-
-    async def get_sigma_max(self, instance_id: str) -> Any:
-        sampling = self._get_instance(instance_id)
-        return detach_if_grad(sampling.sigma_max)
-
-    async def get_sigma_data(self, instance_id: str) -> Any:
-        sampling = self._get_instance(instance_id)
-        return detach_if_grad(sampling.sigma_data)
-
-    async def get_sigmas(self, instance_id: str) -> Any:
-        sampling = self._get_instance(instance_id)
-        return detach_if_grad(sampling.sigmas)
-
-    async def set_sigmas(self, instance_id: str, sigmas: Any) -> None:
-        sampling = self._get_instance(instance_id)
-        sampling.set_sigmas(sigmas)
-
-
-class ModelSamplingProxy(BaseProxy[ModelSamplingRegistry]):
-    _registry_class = ModelSamplingRegistry
-    __module__ = "comfy.isolation.model_sampling_proxy"
-
-    def _get_rpc(self) -> Any:
-        if self._rpc_caller is None:
-            from pyisolate._internal.rpc_protocol import get_child_rpc_instance
-
-            rpc = get_child_rpc_instance()
-            if rpc is not None:
-                self._rpc_caller = rpc.create_caller(
-                    ModelSamplingRegistry, ModelSamplingRegistry.get_remote_id()
-                )
-            else:
-                registry = ModelSamplingRegistry()
-
-                class _LocalCaller:
-                    def calculate_input(
-                        self, instance_id: str, sigma: Any, noise: Any
-                    ) -> Any:
-                        return registry.calculate_input(instance_id, sigma, noise)
-
-                    def calculate_denoised(
-                        self,
-                        instance_id: str,
-                        sigma: Any,
-                        model_output: Any,
-                        model_input: Any,
-                    ) -> Any:
-                        return registry.calculate_denoised(
-                            instance_id, sigma, model_output, model_input
-                        )
-
-                    def noise_scaling(
-                        self,
-                        instance_id: str,
-                        sigma: Any,
-                        noise: Any,
-                        latent_image: Any,
-                        max_denoise: bool = False,
-                    ) -> Any:
-                        return registry.noise_scaling(
-                            instance_id, sigma, noise, latent_image, max_denoise
-                        )
-
-                    def inverse_noise_scaling(
-                        self, instance_id: str, sigma: Any, latent: Any
-                    ) -> Any:
-                        return registry.inverse_noise_scaling(
-                            instance_id, sigma, latent
-                        )
-
-                    def timestep(self, instance_id: str, sigma: Any) -> Any:
-                        return registry.timestep(instance_id, sigma)
-
-                    def sigma(self, instance_id: str, timestep: Any) -> Any:
-                        return registry.sigma(instance_id, timestep)
-
-                    def percent_to_sigma(self, instance_id: str, percent: float) -> Any:
-                        return registry.percent_to_sigma(instance_id, percent)
-
-                    def get_sigma_min(self, instance_id: str) -> Any:
-                        return registry.get_sigma_min(instance_id)
-
-                    def get_sigma_max(self, instance_id: str) -> Any:
-                        return registry.get_sigma_max(instance_id)
-
-                    def get_sigma_data(self, instance_id: str) -> Any:
-                        return registry.get_sigma_data(instance_id)
-
-                    def get_sigmas(self, instance_id: str) -> Any:
-                        return registry.get_sigmas(instance_id)
-
-                    def set_sigmas(self, instance_id: str, sigmas: Any) -> None:
-                        return registry.set_sigmas(instance_id, sigmas)
-
-                self._rpc_caller = _LocalCaller()
-        return self._rpc_caller
-
-    def _call(self, method_name: str, *args: Any) -> Any:
-        rpc = self._get_rpc()
-        method = getattr(rpc, method_name)
-        result = method(self._instance_id, *args)
-        timeout_ms = self._rpc_timeout_ms()
-        start_epoch = time.time()
-        start_perf = time.perf_counter()
-        thread_id = threading.get_ident()
-        call_id = "%s:%s:%s:%.6f" % (
-            self._instance_id,
-            method_name,
-            thread_id,
-            start_perf,
-        )
-        logger.debug(
-            "ISO:modelsampling_rpc_start method=%s instance_id=%s call_id=%s start_ts=%.6f thread=%s timeout_ms=%s",
-            method_name,
-            self._instance_id,
-            call_id,
-            start_epoch,
-            thread_id,
-            timeout_ms,
-        )
-        if asyncio.iscoroutine(result):
-            result = asyncio.wait_for(result, timeout=timeout_ms / 1000.0)
-            try:
-                asyncio.get_running_loop()
-                out = run_coro_in_new_loop(result)
-            except RuntimeError:
-                loop = get_thread_loop()
-                out = loop.run_until_complete(result)
-        else:
-            out = result
-        logger.debug(
-            "ISO:modelsampling_rpc_after_await method=%s instance_id=%s call_id=%s out=%s",
-            method_name,
-            self._instance_id,
-            call_id,
-            _describe_value(out),
-        )
-        elapsed_ms = (time.perf_counter() - start_perf) * 1000.0
-        logger.debug(
-            "ISO:modelsampling_rpc_end method=%s instance_id=%s call_id=%s elapsed_ms=%.3f thread=%s",
-            method_name,
-            self._instance_id,
-            call_id,
-            elapsed_ms,
-            thread_id,
-        )
-        logger.debug(
-            "ISO:modelsampling_rpc_return method=%s instance_id=%s call_id=%s",
-            method_name,
-            self._instance_id,
-            call_id,
-        )
-        return out
-
-    @staticmethod
-    def _rpc_timeout_ms() -> int:
-        raw = os.environ.get(
-            "COMFY_ISOLATION_MODEL_SAMPLING_RPC_TIMEOUT_MS",
-            os.environ.get("COMFY_ISOLATION_LOAD_RPC_TIMEOUT_MS", "30000"),
-        )
-        try:
-            timeout_ms = int(raw)
-        except ValueError:
-            timeout_ms = 30000
-        return max(1, timeout_ms)
-
-    @property
-    def sigma_min(self) -> Any:
-        return self._call("get_sigma_min")
-
-    @property
-    def sigma_max(self) -> Any:
-        return self._call("get_sigma_max")
-
-    @property
-    def sigma_data(self) -> Any:
-        return self._call("get_sigma_data")
-
-    @property
-    def sigmas(self) -> Any:
-        return self._call("get_sigmas")
-
-    def calculate_input(self, sigma: Any, noise: Any) -> Any:
-        return self._call("calculate_input", sigma, noise)
-
-    def calculate_denoised(
-        self, sigma: Any, model_output: Any, model_input: Any
-    ) -> Any:
-        return self._call("calculate_denoised", sigma, model_output, model_input)
-
-    def noise_scaling(
-        self, sigma: Any, noise: Any, latent_image: Any, max_denoise: bool = False
-    ) -> Any:
-        preferred_device = _prefer_device(noise, latent_image)
-        out = self._call(
-            "noise_scaling",
-            _to_cpu_for_rpc(sigma),
-            _to_cpu_for_rpc(noise),
-            _to_cpu_for_rpc(latent_image),
-            max_denoise,
-        )
-        return _to_device(out, preferred_device)
-
-    def inverse_noise_scaling(self, sigma: Any, latent: Any) -> Any:
-        preferred_device = _prefer_device(latent)
-        out = self._call(
-            "inverse_noise_scaling",
-            _to_cpu_for_rpc(sigma),
-            _to_cpu_for_rpc(latent),
-        )
-        return _to_device(out, preferred_device)
-
-    def timestep(self, sigma: Any) -> Any:
-        return self._call("timestep", sigma)
-
-    def sigma(self, timestep: Any) -> Any:
-        return self._call("sigma", timestep)
-
-    def percent_to_sigma(self, percent: float) -> Any:
-        return self._call("percent_to_sigma", percent)
-
-    def set_sigmas(self, sigmas: Any) -> None:
-        return self._call("set_sigmas", sigmas)
--- a/comfy/isolation/proxies/init.py
+++ b/comfy/isolation/proxies/init.py
@@ -1,17 +0,0 @@
-from .base import (
-    IS_CHILD_PROCESS,
-    BaseProxy,
-    BaseRegistry,
-    detach_if_grad,
-    get_thread_loop,
-    run_coro_in_new_loop,
-)
-
-__all__ = [
-    "IS_CHILD_PROCESS",
-    "BaseRegistry",
-    "BaseProxy",
-    "get_thread_loop",
-    "run_coro_in_new_loop",
-    "detach_if_grad",
-]
--- a/comfy/isolation/proxies/base.py
+++ b/comfy/isolation/proxies/base.py
@@ -1,301 +0,0 @@
-# pylint: disable=global-statement,import-outside-toplevel,protected-access
-from __future__ import annotations
-
-import asyncio
-import concurrent.futures
-import logging
-import os
-import threading
-import time
-import weakref
-from typing import Any, Callable, Dict, Generic, Optional, TypeVar
-
-try:
-    from pyisolate import ProxiedSingleton
-except ImportError:
-
-    class ProxiedSingleton:  # type: ignore[no-redef]
-        pass
-
-
-logger = logging.getLogger(__name__)
-
-IS_CHILD_PROCESS = os.environ.get("PYISOLATE_CHILD") == "1"
-_thread_local = threading.local()
-T = TypeVar("T")
-
-
-def get_thread_loop() -> asyncio.AbstractEventLoop:
-    loop = getattr(_thread_local, "loop", None)
-    if loop is None or loop.is_closed():
-        loop = asyncio.new_event_loop()
-        _thread_local.loop = loop
-    return loop
-
-
-def run_coro_in_new_loop(coro: Any) -> Any:
-    result_box: Dict[str, Any] = {}
-    exc_box: Dict[str, BaseException] = {}
-
-    def runner() -> None:
-        loop = asyncio.new_event_loop()
-        asyncio.set_event_loop(loop)
-        try:
-            result_box["value"] = loop.run_until_complete(coro)
-        except Exception as exc:  # noqa: BLE001
-            exc_box["exc"] = exc
-        finally:
-            loop.close()
-
-    t = threading.Thread(target=runner, daemon=True)
-    t.start()
-    t.join()
-    if "exc" in exc_box:
-        raise exc_box["exc"]
-    return result_box.get("value")
-
-
-def detach_if_grad(obj: Any) -> Any:
-    try:
-        import torch
-    except Exception:
-        return obj
-
-    if isinstance(obj, torch.Tensor):
-        return obj.detach() if obj.requires_grad else obj
-    if isinstance(obj, (list, tuple)):
-        return type(obj)(detach_if_grad(x) for x in obj)
-    if isinstance(obj, dict):
-        return {k: detach_if_grad(v) for k, v in obj.items()}
-    return obj
-
-
-class BaseRegistry(ProxiedSingleton, Generic[T]):
-    _type_prefix: str = "base"
-
-    def __init__(self) -> None:
-        if hasattr(ProxiedSingleton, "__init__") and ProxiedSingleton is not object:
-            super().__init__()
-        self._registry: Dict[str, T] = {}
-        self._id_map: Dict[int, str] = {}
-        self._counter = 0
-        self._lock = threading.Lock()
-
-    def register(self, instance: T) -> str:
-        with self._lock:
-            obj_id = id(instance)
-            if obj_id in self._id_map:
-                return self._id_map[obj_id]
-            instance_id = f"{self._type_prefix}_{self._counter}"
-            self._counter += 1
-            self._registry[instance_id] = instance
-            self._id_map[obj_id] = instance_id
-        return instance_id
-
-    def unregister_sync(self, instance_id: str) -> None:
-        with self._lock:
-            instance = self._registry.pop(instance_id, None)
-            if instance:
-                self._id_map.pop(id(instance), None)
-
-    def _get_instance(self, instance_id: str) -> T:
-        if IS_CHILD_PROCESS:
-            raise RuntimeError(
-                f"[{self.__class__.__name__}] _get_instance called in child"
-            )
-        with self._lock:
-            instance = self._registry.get(instance_id)
-        if instance is None:
-            raise ValueError(f"{instance_id} not found")
-        return instance
-
-
-_GLOBAL_LOOP: Optional[asyncio.AbstractEventLoop] = None
-
-
-def set_global_loop(loop: asyncio.AbstractEventLoop) -> None:
-    global _GLOBAL_LOOP
-    _GLOBAL_LOOP = loop
-
-
-def run_sync_rpc_coro(coro: Any, timeout_ms: Optional[int] = None) -> Any:
-    if timeout_ms is not None:
-        coro = asyncio.wait_for(coro, timeout=timeout_ms / 1000.0)
-
-    try:
-        if _GLOBAL_LOOP is not None and _GLOBAL_LOOP.is_running():
-            try:
-                curr_loop = asyncio.get_running_loop()
-                if curr_loop is _GLOBAL_LOOP:
-                    pass
-            except RuntimeError:
-                future = asyncio.run_coroutine_threadsafe(coro, _GLOBAL_LOOP)
-                return future.result(
-                    timeout=(timeout_ms / 1000.0) if timeout_ms is not None else None
-                )
-
-        try:
-            asyncio.get_running_loop()
-            return run_coro_in_new_loop(coro)
-        except RuntimeError:
-            loop = get_thread_loop()
-            return loop.run_until_complete(coro)
-    except asyncio.TimeoutError as exc:
-        raise TimeoutError(f"Isolation RPC timeout (timeout_ms={timeout_ms})") from exc
-    except concurrent.futures.TimeoutError as exc:
-        raise TimeoutError(f"Isolation RPC timeout (timeout_ms={timeout_ms})") from exc
-
-
-def call_singleton_rpc(
-    caller: Any,
-    method_name: str,
-    *args: Any,
-    timeout_ms: Optional[int] = None,
-    **kwargs: Any,
-) -> Any:
-    if caller is None:
-        raise RuntimeError(f"No RPC caller available for {method_name}")
-    method = getattr(caller, method_name)
-    return run_sync_rpc_coro(method(*args, **kwargs), timeout_ms=timeout_ms)
-
-
-class BaseProxy(Generic[T]):
-    _registry_class: type = BaseRegistry  # type: ignore[type-arg]
-    __module__: str = "comfy.isolation.proxies.base"
-    _TIMEOUT_RPC_METHODS = frozenset(
-        {
-            "partially_load",
-            "partially_unload",
-            "load",
-            "patch_model",
-            "unpatch_model",
-            "inner_model_apply_model",
-            "memory_required",
-            "model_dtype",
-            "inner_model_memory_required",
-            "inner_model_extra_conds_shapes",
-            "inner_model_extra_conds",
-            "process_latent_in",
-            "process_latent_out",
-            "scale_latent_inpaint",
-        }
-    )
-
-    def __init__(
-        self,
-        instance_id: str,
-        registry: Optional[Any] = None,
-        manage_lifecycle: bool = False,
-    ) -> None:
-        self._instance_id = instance_id
-        self._rpc_caller: Optional[Any] = None
-        self._registry = registry if registry is not None else self._registry_class()
-        self._manage_lifecycle = manage_lifecycle
-        self._cleaned_up = False
-        if manage_lifecycle and not IS_CHILD_PROCESS:
-            self._finalizer = weakref.finalize(
-                self, self._registry.unregister_sync, instance_id
-            )
-
-    def _get_rpc(self) -> Any:
-        if self._rpc_caller is None:
-            from pyisolate._internal.rpc_protocol import get_child_rpc_instance
-
-            rpc = get_child_rpc_instance()
-            if rpc is None:
-                raise RuntimeError(f"[{self.__class__.__name__}] No RPC in child")
-            self._rpc_caller = rpc.create_caller(
-                self._registry_class, self._registry_class.get_remote_id()
-            )
-        return self._rpc_caller
-
-    def _rpc_timeout_ms_for_method(self, method_name: str) -> Optional[int]:
-        if method_name not in self._TIMEOUT_RPC_METHODS:
-            return None
-        try:
-            timeout_ms = int(
-                os.environ.get("COMFY_ISOLATION_LOAD_RPC_TIMEOUT_MS", "120000")
-            )
-        except ValueError:
-            timeout_ms = 120000
-        return max(1, timeout_ms)
-
-    def _call_rpc(self, method_name: str, *args: Any, **kwargs: Any) -> Any:
-        rpc = self._get_rpc()
-        method = getattr(rpc, method_name)
-        timeout_ms = self._rpc_timeout_ms_for_method(method_name)
-        coro = method(self._instance_id, *args, **kwargs)
-        if timeout_ms is not None:
-            coro = asyncio.wait_for(coro, timeout=timeout_ms / 1000.0)
-
-        start_epoch = time.time()
-        start_perf = time.perf_counter()
-        thread_id = threading.get_ident()
-        try:
-            running_loop = asyncio.get_running_loop()
-            loop_id: Optional[int] = id(running_loop)
-        except RuntimeError:
-            loop_id = None
-        logger.debug(
-            "ISO:rpc_start proxy=%s method=%s instance_id=%s start_ts=%.6f "
-            "thread=%s loop=%s timeout_ms=%s",
-            self.__class__.__name__,
-            method_name,
-            self._instance_id,
-            start_epoch,
-            thread_id,
-            loop_id,
-            timeout_ms,
-        )
-
-        try:
-            return run_sync_rpc_coro(coro, timeout_ms=timeout_ms)
-        except TimeoutError as exc:
-            raise TimeoutError(
-                f"Isolation RPC timeout in {self.__class__.__name__}.{method_name} "
-                f"(instance_id={self._instance_id}, timeout_ms={timeout_ms})"
-            ) from exc
-        finally:
-            end_epoch = time.time()
-            elapsed_ms = (time.perf_counter() - start_perf) * 1000.0
-            logger.debug(
-                "ISO:rpc_end proxy=%s method=%s instance_id=%s end_ts=%.6f "
-                "elapsed_ms=%.3f thread=%s loop=%s",
-                self.__class__.__name__,
-                method_name,
-                self._instance_id,
-                end_epoch,
-                elapsed_ms,
-                thread_id,
-                loop_id,
-            )
-
-    def __getstate__(self) -> Dict[str, Any]:
-        return {"_instance_id": self._instance_id}
-
-    def __setstate__(self, state: Dict[str, Any]) -> None:
-        self._instance_id = state["_instance_id"]
-        self._rpc_caller = None
-        self._registry = self._registry_class()
-        self._manage_lifecycle = False
-        self._cleaned_up = False
-
-    def cleanup(self) -> None:
-        if self._cleaned_up or IS_CHILD_PROCESS:
-            return
-        self._cleaned_up = True
-        finalizer = getattr(self, "_finalizer", None)
-        if finalizer is not None:
-            finalizer.detach()
-        self._registry.unregister_sync(self._instance_id)
-
-    def __repr__(self) -> str:
-        return f"<{self.__class__.__name__} {self._instance_id}>"
-
-
-def create_rpc_method(method_name: str) -> Callable[..., Any]:
-    def method(self: BaseProxy[Any], *args: Any, **kwargs: Any) -> Any:
-        return self._call_rpc(method_name, *args, **kwargs)
-
-    method.__name__ = method_name
-    return method
--- a/comfy/isolation/proxies/folder_paths_proxy.py
+++ b/comfy/isolation/proxies/folder_paths_proxy.py
@@ -1,202 +0,0 @@
-from __future__ import annotations
-import os
-from typing import Any, Dict, Optional
-
-from pyisolate import ProxiedSingleton
-
-from .base import call_singleton_rpc
-
-
-def _folder_paths():
-    import folder_paths
-
-    return folder_paths
-
-
-def _is_child_process() -> bool:
-    return os.environ.get("PYISOLATE_CHILD") == "1"
-
-
-def _serialize_folder_names_and_paths(data: dict[str, tuple[list[str], set[str]]]) -> dict[str, dict[str, list[str]]]:
-    return {
-        key: {"paths": list(paths), "extensions": sorted(list(extensions))}
-        for key, (paths, extensions) in data.items()
-    }
-
-
-def _deserialize_folder_names_and_paths(data: dict[str, dict[str, list[str]]]) -> dict[str, tuple[list[str], set[str]]]:
-    return {
-        key: (list(value.get("paths", [])), set(value.get("extensions", [])))
-        for key, value in data.items()
-    }
-
-
-class FolderPathsProxy(ProxiedSingleton):
-    """
-    Dynamic proxy for folder_paths.
-    Uses __getattr__ for most lookups, with explicit handling for
-    mutable collections to ensure efficient by-value transfer.
-    """
-
-    _rpc: Optional[Any] = None
-
-    @classmethod
-    def set_rpc(cls, rpc: Any) -> None:
-        cls._rpc = rpc.create_caller(cls, cls.get_remote_id())
-
-    @classmethod
-    def clear_rpc(cls) -> None:
-        cls._rpc = None
-
-    @classmethod
-    def _get_caller(cls) -> Any:
-        if cls._rpc is None:
-            raise RuntimeError("FolderPathsProxy RPC caller is not configured")
-        return cls._rpc
-
-    def __getattr__(self, name):
-        if _is_child_process():
-            property_rpc = {
-                "models_dir": "rpc_get_models_dir",
-                "folder_names_and_paths": "rpc_get_folder_names_and_paths",
-                "extension_mimetypes_cache": "rpc_get_extension_mimetypes_cache",
-                "filename_list_cache": "rpc_get_filename_list_cache",
-            }
-            rpc_name = property_rpc.get(name)
-            if rpc_name is not None:
-                return call_singleton_rpc(self._get_caller(), rpc_name)
-            raise AttributeError(name)
-        return getattr(_folder_paths(), name)
-
-    @property
-    def folder_names_and_paths(self) -> Dict:
-        if _is_child_process():
-            payload = call_singleton_rpc(self._get_caller(), "rpc_get_folder_names_and_paths")
-            return _deserialize_folder_names_and_paths(payload)
-        return _folder_paths().folder_names_and_paths
-
-    @property
-    def extension_mimetypes_cache(self) -> Dict:
-        if _is_child_process():
-            return dict(call_singleton_rpc(self._get_caller(), "rpc_get_extension_mimetypes_cache"))
-        return dict(_folder_paths().extension_mimetypes_cache)
-
-    @property
-    def filename_list_cache(self) -> Dict:
-        if _is_child_process():
-            return dict(call_singleton_rpc(self._get_caller(), "rpc_get_filename_list_cache"))
-        return dict(_folder_paths().filename_list_cache)
-
-    @property
-    def models_dir(self) -> str:
-        if _is_child_process():
-            return str(call_singleton_rpc(self._get_caller(), "rpc_get_models_dir"))
-        return _folder_paths().models_dir
-
-    def get_temp_directory(self) -> str:
-        if _is_child_process():
-            return call_singleton_rpc(self._get_caller(), "rpc_get_temp_directory")
-        return _folder_paths().get_temp_directory()
-
-    def get_input_directory(self) -> str:
-        if _is_child_process():
-            return call_singleton_rpc(self._get_caller(), "rpc_get_input_directory")
-        return _folder_paths().get_input_directory()
-
-    def get_output_directory(self) -> str:
-        if _is_child_process():
-            return call_singleton_rpc(self._get_caller(), "rpc_get_output_directory")
-        return _folder_paths().get_output_directory()
-
-    def get_user_directory(self) -> str:
-        if _is_child_process():
-            return call_singleton_rpc(self._get_caller(), "rpc_get_user_directory")
-        return _folder_paths().get_user_directory()
-
-    def get_annotated_filepath(self, name: str, default_dir: str | None = None) -> str:
-        if _is_child_process():
-            return call_singleton_rpc(
-                self._get_caller(), "rpc_get_annotated_filepath", name, default_dir
-            )
-        return _folder_paths().get_annotated_filepath(name, default_dir)
-
-    def exists_annotated_filepath(self, name: str) -> bool:
-        if _is_child_process():
-            return bool(
-                call_singleton_rpc(self._get_caller(), "rpc_exists_annotated_filepath", name)
-            )
-        return bool(_folder_paths().exists_annotated_filepath(name))
-
-    def add_model_folder_path(
-        self, folder_name: str, full_folder_path: str, is_default: bool = False
-    ) -> None:
-        if _is_child_process():
-            call_singleton_rpc(
-                self._get_caller(),
-                "rpc_add_model_folder_path",
-                folder_name,
-                full_folder_path,
-                is_default,
-            )
-            return None
-        _folder_paths().add_model_folder_path(folder_name, full_folder_path, is_default)
-        return None
-
-    def get_folder_paths(self, folder_name: str) -> list[str]:
-        if _is_child_process():
-            return list(call_singleton_rpc(self._get_caller(), "rpc_get_folder_paths", folder_name))
-        return list(_folder_paths().get_folder_paths(folder_name))
-
-    def get_filename_list(self, folder_name: str) -> list[str]:
-        if _is_child_process():
-            return list(call_singleton_rpc(self._get_caller(), "rpc_get_filename_list", folder_name))
-        return list(_folder_paths().get_filename_list(folder_name))
-
-    def get_full_path(self, folder_name: str, filename: str) -> str | None:
-        if _is_child_process():
-            return call_singleton_rpc(self._get_caller(), "rpc_get_full_path", folder_name, filename)
-        return _folder_paths().get_full_path(folder_name, filename)
-
-    async def rpc_get_models_dir(self) -> str:
-        return _folder_paths().models_dir
-
-    async def rpc_get_folder_names_and_paths(self) -> dict[str, dict[str, list[str]]]:
-        return _serialize_folder_names_and_paths(_folder_paths().folder_names_and_paths)
-
-    async def rpc_get_extension_mimetypes_cache(self) -> dict[str, Any]:
-        return dict(_folder_paths().extension_mimetypes_cache)
-
-    async def rpc_get_filename_list_cache(self) -> dict[str, Any]:
-        return dict(_folder_paths().filename_list_cache)
-
-    async def rpc_get_temp_directory(self) -> str:
-        return _folder_paths().get_temp_directory()
-
-    async def rpc_get_input_directory(self) -> str:
-        return _folder_paths().get_input_directory()
-
-    async def rpc_get_output_directory(self) -> str:
-        return _folder_paths().get_output_directory()
-
-    async def rpc_get_user_directory(self) -> str:
-        return _folder_paths().get_user_directory()
-
-    async def rpc_get_annotated_filepath(self, name: str, default_dir: str | None = None) -> str:
-        return _folder_paths().get_annotated_filepath(name, default_dir)
-
-    async def rpc_exists_annotated_filepath(self, name: str) -> bool:
-        return _folder_paths().exists_annotated_filepath(name)
-
-    async def rpc_add_model_folder_path(
-        self, folder_name: str, full_folder_path: str, is_default: bool = False
-    ) -> None:
-        _folder_paths().add_model_folder_path(folder_name, full_folder_path, is_default)
-
-    async def rpc_get_folder_paths(self, folder_name: str) -> list[str]:
-        return _folder_paths().get_folder_paths(folder_name)
-
-    async def rpc_get_filename_list(self, folder_name: str) -> list[str]:
-        return _folder_paths().get_filename_list(folder_name)
-
-    async def rpc_get_full_path(self, folder_name: str, filename: str) -> str | None:
-        return _folder_paths().get_full_path(folder_name, filename)
--- a/comfy/isolation/proxies/helper_proxies.py
+++ b/comfy/isolation/proxies/helper_proxies.py
@@ -1,158 +0,0 @@
-from __future__ import annotations
-
-import os
-from typing import Any, Dict, Optional
-
-from pyisolate import ProxiedSingleton
-
-from .base import call_singleton_rpc
-
-
-class AnyTypeProxy(str):
-    """Replacement for custom AnyType objects used by some nodes."""
-
-    def __new__(cls, value: str = "*"):
-        return super().__new__(cls, value)
-
-    def __ne__(self, other):  # type: ignore[override]
-        return False
-
-
-class FlexibleOptionalInputProxy(dict):
-    """Replacement for FlexibleOptionalInputType to allow dynamic inputs."""
-
-    def __init__(self, flex_type, data: Optional[Dict[str, object]] = None):
-        super().__init__()
-        self.type = flex_type
-        if data:
-            self.update(data)
-
-    def __getitem__(self, key):  # type: ignore[override]
-        return (self.type,)
-
-    def __contains__(self, key):  # type: ignore[override]
-        return True
-
-
-class ByPassTypeTupleProxy(tuple):
-    """Replacement for ByPassTypeTuple to mirror wildcard fallback behavior."""
-
-    def __new__(cls, values):
-        return super().__new__(cls, values)
-
-    def __getitem__(self, index):  # type: ignore[override]
-        if index >= len(self):
-            return AnyTypeProxy("*")
-        return super().__getitem__(index)
-
-
-def _restore_special_value(value: Any) -> Any:
-    if isinstance(value, dict):
-        if value.get("__pyisolate_any_type__"):
-            return AnyTypeProxy(value.get("value", "*"))
-        if value.get("__pyisolate_flexible_optional__"):
-            flex_type = _restore_special_value(value.get("type"))
-            data_raw = value.get("data")
-            data = (
-                {k: _restore_special_value(v) for k, v in data_raw.items()}
-                if isinstance(data_raw, dict)
-                else {}
-            )
-            return FlexibleOptionalInputProxy(flex_type, data)
-        if value.get("__pyisolate_tuple__") is not None:
-            return tuple(
-                _restore_special_value(v) for v in value["__pyisolate_tuple__"]
-            )
-        if value.get("__pyisolate_bypass_tuple__") is not None:
-            return ByPassTypeTupleProxy(
-                tuple(
-                    _restore_special_value(v)
-                    for v in value["__pyisolate_bypass_tuple__"]
-                )
-            )
-        return {k: _restore_special_value(v) for k, v in value.items()}
-    if isinstance(value, list):
-        return [_restore_special_value(v) for v in value]
-    return value
-
-
-def _serialize_special_value(value: Any) -> Any:
-    if isinstance(value, AnyTypeProxy):
-        return {"__pyisolate_any_type__": True, "value": str(value)}
-    if isinstance(value, FlexibleOptionalInputProxy):
-        return {
-            "__pyisolate_flexible_optional__": True,
-            "type": _serialize_special_value(value.type),
-            "data": {k: _serialize_special_value(v) for k, v in value.items()},
-        }
-    if isinstance(value, ByPassTypeTupleProxy):
-        return {
-            "__pyisolate_bypass_tuple__": [_serialize_special_value(v) for v in value]
-        }
-    if isinstance(value, tuple):
-        return {"__pyisolate_tuple__": [_serialize_special_value(v) for v in value]}
-    if isinstance(value, list):
-        return [_serialize_special_value(v) for v in value]
-    if isinstance(value, dict):
-        return {k: _serialize_special_value(v) for k, v in value.items()}
-    return value
-
-
-def _restore_input_types_local(raw: Dict[str, object]) -> Dict[str, object]:
-    if not isinstance(raw, dict):
-        return raw  # type: ignore[return-value]
-
-    restored: Dict[str, object] = {}
-    for section, entries in raw.items():
-        if isinstance(entries, dict) and entries.get("__pyisolate_flexible_optional__"):
-            restored[section] = _restore_special_value(entries)
-        elif isinstance(entries, dict):
-            restored[section] = {
-                k: _restore_special_value(v) for k, v in entries.items()
-            }
-        else:
-            restored[section] = _restore_special_value(entries)
-    return restored
-
-
-class HelperProxiesService(ProxiedSingleton):
-    _rpc: Optional[Any] = None
-
-    @classmethod
-    def set_rpc(cls, rpc: Any) -> None:
-        cls._rpc = rpc.create_caller(cls, cls.get_remote_id())
-
-    @classmethod
-    def clear_rpc(cls) -> None:
-        cls._rpc = None
-
-    @classmethod
-    def _get_caller(cls) -> Any:
-        if cls._rpc is None:
-            raise RuntimeError("HelperProxiesService RPC caller is not configured")
-        return cls._rpc
-
-    async def rpc_restore_input_types(self, raw: Dict[str, object]) -> Dict[str, object]:
-        restored = _restore_input_types_local(raw)
-        return _serialize_special_value(restored)
-
-
-def restore_input_types(raw: Dict[str, object]) -> Dict[str, object]:
-    """Restore serialized INPUT_TYPES payload back into ComfyUI-compatible objects."""
-    if os.environ.get("PYISOLATE_CHILD") == "1":
-        payload = call_singleton_rpc(
-            HelperProxiesService._get_caller(),
-            "rpc_restore_input_types",
-            raw,
-        )
-        return _restore_input_types_local(payload)
-    return _restore_input_types_local(raw)
-
-
-__all__ = [
-    "AnyTypeProxy",
-    "FlexibleOptionalInputProxy",
-    "ByPassTypeTupleProxy",
-    "HelperProxiesService",
-    "restore_input_types",
-]
--- a/comfy/isolation/proxies/model_management_proxy.py
+++ b/comfy/isolation/proxies/model_management_proxy.py
@@ -1,142 +0,0 @@
-from __future__ import annotations
-
-import os
-from typing import Any, Optional
-
-from pyisolate import ProxiedSingleton
-
-from .base import call_singleton_rpc
-
-
-def _mm():
-    import comfy.model_management
-
-    return comfy.model_management
-
-
-def _is_child_process() -> bool:
-    return os.environ.get("PYISOLATE_CHILD") == "1"
-
-
-class TorchDeviceProxy:
-    def __init__(self, device_str: str):
-        self._device_str = device_str
-        if ":" in device_str:
-            device_type, index = device_str.split(":", 1)
-            self.type = device_type
-            self.index = int(index)
-        else:
-            self.type = device_str
-            self.index = None
-
-    def __str__(self) -> str:
-        return self._device_str
-
-    def __repr__(self) -> str:
-        return f"TorchDeviceProxy({self._device_str!r})"
-
-
-def _serialize_value(value: Any) -> Any:
-    value_type = type(value)
-    if value_type.__module__ == "torch" and value_type.__name__ == "device":
-        return {"__pyisolate_torch_device__": str(value)}
-    if isinstance(value, TorchDeviceProxy):
-        return {"__pyisolate_torch_device__": str(value)}
-    if isinstance(value, tuple):
-        return {"__pyisolate_tuple__": [_serialize_value(item) for item in value]}
-    if isinstance(value, list):
-        return [_serialize_value(item) for item in value]
-    if isinstance(value, dict):
-        return {key: _serialize_value(inner) for key, inner in value.items()}
-    return value
-
-
-def _deserialize_value(value: Any) -> Any:
-    if isinstance(value, dict):
-        if "__pyisolate_torch_device__" in value:
-            return TorchDeviceProxy(value["__pyisolate_torch_device__"])
-        if "__pyisolate_tuple__" in value:
-            return tuple(_deserialize_value(item) for item in value["__pyisolate_tuple__"])
-        return {key: _deserialize_value(inner) for key, inner in value.items()}
-    if isinstance(value, list):
-        return [_deserialize_value(item) for item in value]
-    return value
-
-
-def _normalize_argument(value: Any) -> Any:
-    if isinstance(value, TorchDeviceProxy):
-        import torch
-
-        return torch.device(str(value))
-    if isinstance(value, dict):
-        if "__pyisolate_torch_device__" in value:
-            import torch
-
-            return torch.device(value["__pyisolate_torch_device__"])
-        if "__pyisolate_tuple__" in value:
-            return tuple(_normalize_argument(item) for item in value["__pyisolate_tuple__"])
-        return {key: _normalize_argument(inner) for key, inner in value.items()}
-    if isinstance(value, list):
-        return [_normalize_argument(item) for item in value]
-    return value
-
-
-class ModelManagementProxy(ProxiedSingleton):
-    """
-    Exact-relay proxy for comfy.model_management.
-    Child calls never import comfy.model_management directly; they serialize
-    arguments, relay to host, and deserialize the host result back.
-    """
-
-    _rpc: Optional[Any] = None
-
-    @classmethod
-    def set_rpc(cls, rpc: Any) -> None:
-        cls._rpc = rpc.create_caller(cls, cls.get_remote_id())
-
-    @classmethod
-    def clear_rpc(cls) -> None:
-        cls._rpc = None
-
-    @classmethod
-    def _get_caller(cls) -> Any:
-        if cls._rpc is None:
-            raise RuntimeError("ModelManagementProxy RPC caller is not configured")
-        return cls._rpc
-
-    def _relay_call(self, method_name: str, *args: Any, **kwargs: Any) -> Any:
-        payload = call_singleton_rpc(
-            self._get_caller(),
-            "rpc_call",
-            method_name,
-            _serialize_value(args),
-            _serialize_value(kwargs),
-        )
-        return _deserialize_value(payload)
-
-    @property
-    def VRAMState(self):
-        return _mm().VRAMState
-
-    @property
-    def CPUState(self):
-        return _mm().CPUState
-
-    @property
-    def OOM_EXCEPTION(self):
-        return _mm().OOM_EXCEPTION
-
-    def __getattr__(self, name: str):
-        if _is_child_process():
-            def child_method(*args: Any, **kwargs: Any) -> Any:
-                return self._relay_call(name, *args, **kwargs)
-
-            return child_method
-        return getattr(_mm(), name)
-
-    async def rpc_call(self, method_name: str, args: Any, kwargs: Any) -> Any:
-        normalized_args = _normalize_argument(_deserialize_value(args))
-        normalized_kwargs = _normalize_argument(_deserialize_value(kwargs))
-        method = getattr(_mm(), method_name)
-        result = method(*normalized_args, **normalized_kwargs)
-        return _serialize_value(result)
--- a/comfy/isolation/proxies/progress_proxy.py
+++ b/comfy/isolation/proxies/progress_proxy.py
@@ -1,87 +0,0 @@
-from __future__ import annotations
-
-import logging
-import os
-from typing import Any, Optional
-
-try:
-    from pyisolate import ProxiedSingleton
-except ImportError:
-
-    class ProxiedSingleton:
-        pass
-
-from .base import call_singleton_rpc
-
-
-def _get_progress_state():
-    from comfy_execution.progress import get_progress_state
-
-    return get_progress_state()
-
-
-def _is_child_process() -> bool:
-    return os.environ.get("PYISOLATE_CHILD") == "1"
-
-logger = logging.getLogger(__name__)
-
-
-class ProgressProxy(ProxiedSingleton):
-    _rpc: Optional[Any] = None
-
-    @classmethod
-    def set_rpc(cls, rpc: Any) -> None:
-        cls._rpc = rpc.create_caller(cls, cls.get_remote_id())
-
-    @classmethod
-    def clear_rpc(cls) -> None:
-        cls._rpc = None
-
-    @classmethod
-    def _get_caller(cls) -> Any:
-        if cls._rpc is None:
-            raise RuntimeError("ProgressProxy RPC caller is not configured")
-        return cls._rpc
-
-    def set_progress(
-        self,
-        value: float,
-        max_value: float,
-        node_id: Optional[str] = None,
-        image: Any = None,
-    ) -> None:
-        if _is_child_process():
-            call_singleton_rpc(
-                self._get_caller(),
-                "rpc_set_progress",
-                value,
-                max_value,
-                node_id,
-                image,
-            )
-            return None
-
-        _get_progress_state().update_progress(
-            node_id=node_id,
-            value=value,
-            max_value=max_value,
-            image=image,
-        )
-        return None
-
-    async def rpc_set_progress(
-        self,
-        value: float,
-        max_value: float,
-        node_id: Optional[str] = None,
-        image: Any = None,
-    ) -> None:
-        _get_progress_state().update_progress(
-            node_id=node_id,
-            value=value,
-            max_value=max_value,
-            image=image,
-        )
-
-
-__all__ = ["ProgressProxy"]
--- a/comfy/isolation/proxies/prompt_server_impl.py
+++ b/comfy/isolation/proxies/prompt_server_impl.py
@@ -1,271 +0,0 @@
-# pylint: disable=import-outside-toplevel,logging-fstring-interpolation,redefined-outer-name,reimported,super-init-not-called
-"""Stateless RPC Implementation for PromptServer.
-
-Replaces the legacy PromptServerProxy (Singleton) with a clean Service/Stub architecture.
- Host: PromptServerService (RPC Handler)
- Child: PromptServerStub (Interface Implementation)
-"""
-
-from __future__ import annotations
-
-import asyncio
-import os
-from typing import Any, Dict, Optional, Callable
-
-import logging
-
-# IMPORTS
-from pyisolate import ProxiedSingleton
-from .base import call_singleton_rpc
-
-logger = logging.getLogger(__name__)
-LOG_PREFIX = "[Isolation:C<->H]"
-
-# ...
-
-# =============================================================================
-# CHILD SIDE: PromptServerStub
-# =============================================================================
-
-
-class PromptServerStub:
-    """Stateless Stub for PromptServer."""
-
-    # Masquerade as the real server module
-    __module__ = "server"
-
-    _instance: Optional["PromptServerStub"] = None
-    _rpc: Optional[Any] = None  # This will be the Caller object
-    _source_file: Optional[str] = None
-
-    def __init__(self):
-        self.routes = RouteStub(self)
-
-    @classmethod
-    def set_rpc(cls, rpc: Any) -> None:
-        """Inject RPC client (called by adapter.py or manually)."""
-        # Create caller for HOST Service
-        # Assuming Host Service is registered as "PromptServerService" (class name)
-        # We target the Host Service Class
-        target_id = "PromptServerService"
-        # We need to pass a class to create_caller? Usually yes.
-        # But we don't have the Service class imported here necessarily (if running on child).
-        # pyisolate check verify_service type?
-        # If we pass PromptServerStub as the 'class', it might mismatch if checking types.
-        # But we can try passing PromptServerStub if it mirrors the service name? No, stub is PromptServerStub.
-        # We need a dummy class with right name?
-        # Or just rely on string ID if create_caller supports it?
-        # Standard: rpc.create_caller(PromptServerStub, target_id)
-        # But wait, PromptServerStub is the *Local* class.
-        # We want to call *Remote* class.
-        # If we use PromptServerStub as the type, returning object will be typed as PromptServerStub?
-        # The first arg is 'service_cls'.
-        cls._rpc = rpc.create_caller(
-            PromptServerService, target_id
-        )  # We import Service below?
-
-    @classmethod
-    def clear_rpc(cls) -> None:
-        cls._rpc = None
-
-    # We need PromptServerService available for the create_caller call?
-    # Or just use the Stub class if ID matches?
-    # prompt_server_impl.py defines BOTH. So PromptServerService IS available!
-
-    @property
-    def instance(self) -> "PromptServerStub":
-        return self
-
-    # ... Compatibility ...
-    @classmethod
-    def _get_source_file(cls) -> str:
-        if cls._source_file is None:
-            import folder_paths
-
-            cls._source_file = os.path.join(folder_paths.base_path, "server.py")
-        return cls._source_file
-
-    @property
-    def __file__(self) -> str:
-        return self._get_source_file()
-
-    # --- Properties ---
-    @property
-    def client_id(self) -> Optional[str]:
-        return "isolated_client"
-
-    def supports(self, feature: str) -> bool:
-        return True
-
-    @property
-    def app(self):
-        raise RuntimeError(
-            "PromptServer.app is not accessible in isolated nodes. Use RPC routes instead."
-        )
-
-    @property
-    def prompt_queue(self):
-        raise RuntimeError(
-            "PromptServer.prompt_queue is not accessible in isolated nodes."
-        )
-
-    # --- UI Communication (RPC Delegates) ---
-    async def send_sync(
-        self, event: str, data: Dict[str, Any], sid: Optional[str] = None
-    ) -> None:
-        if self._rpc:
-            await self._rpc.ui_send_sync(event, data, sid)
-
-    async def send(
-        self, event: str, data: Dict[str, Any], sid: Optional[str] = None
-    ) -> None:
-        if self._rpc:
-            await self._rpc.ui_send(event, data, sid)
-
-    def send_progress_text(self, text: str, node_id: str, sid=None) -> None:
-        if self._rpc:
-            # Fire and forget likely needed. If method is async on host, caller invocation returns coroutine.
-            # We must schedule it?
-            # Or use fire_remote equivalent?
-            # Caller object usually proxies calls. If host method is async, it returns coro.
-            # If we are sync here (send_progress_text checks imply sync usage), we must background it.
-            # But UtilsProxy hook wrapper creates task.
-            # Does send_progress_text need to be sync? Yes, node code calls it sync.
-            import asyncio
-
-            try:
-                loop = asyncio.get_running_loop()
-                loop.create_task(self._rpc.ui_send_progress_text(text, node_id, sid))
-            except RuntimeError:
-                call_singleton_rpc(self._rpc, "ui_send_progress_text", text, node_id, sid)
-
-    # --- Route Registration Logic ---
-    def register_route(self, method: str, path: str, handler: Callable):
-        """Register a route handler via RPC."""
-        if not self._rpc:
-            logger.error("RPC not initialized in PromptServerStub")
-            return
-
-        # Fire registration async
-        try:
-            loop = asyncio.get_running_loop()
-            loop.create_task(self._rpc.register_route_rpc(method, path, handler))
-        except RuntimeError:
-            call_singleton_rpc(self._rpc, "register_route_rpc", method, path, handler)
-
-
-class RouteStub:
-    """Simulates aiohttp.web.RouteTableDef."""
-
-    def __init__(self, stub: PromptServerStub):
-        self._stub = stub
-
-    def get(self, path: str):
-        def decorator(handler):
-            self._stub.register_route("GET", path, handler)
-            return handler
-
-        return decorator
-
-    def post(self, path: str):
-        def decorator(handler):
-            self._stub.register_route("POST", path, handler)
-            return handler
-
-        return decorator
-
-    def patch(self, path: str):
-        def decorator(handler):
-            self._stub.register_route("PATCH", path, handler)
-            return handler
-
-        return decorator
-
-    def put(self, path: str):
-        def decorator(handler):
-            self._stub.register_route("PUT", path, handler)
-            return handler
-
-        return decorator
-
-    def delete(self, path: str):
-        def decorator(handler):
-            self._stub.register_route("DELETE", path, handler)
-            return handler
-
-        return decorator
-
-
-# =============================================================================
-# HOST SIDE: PromptServerService
-# =============================================================================
-
-
-class PromptServerService(ProxiedSingleton):
-    """Host-side RPC Service for PromptServer."""
-
-    def __init__(self):
-        # We will bind to the real server instance lazily or via global import
-        pass
-
-    @property
-    def server(self):
-        from server import PromptServer
-
-        return PromptServer.instance
-
-    async def ui_send_sync(
-        self, event: str, data: Dict[str, Any], sid: Optional[str] = None
-    ):
-        await self.server.send_sync(event, data, sid)
-
-    async def ui_send(
-        self, event: str, data: Dict[str, Any], sid: Optional[str] = None
-    ):
-        await self.server.send(event, data, sid)
-
-    async def ui_send_progress_text(self, text: str, node_id: str, sid=None):
-        # Made async to be awaitable by RPC layer
-        self.server.send_progress_text(text, node_id, sid)
-
-    async def register_route_rpc(self, method: str, path: str, child_handler_proxy):
-        """RPC Target: Register a route that forwards to the Child."""
-        from aiohttp import web
-        logger.debug(f"{LOG_PREFIX} Registering Isolated Route {method} {path}")
-
-        async def route_wrapper(request: web.Request) -> web.Response:
-            # 1. Capture request data
-            req_data = {
-                "method": request.method,
-                "path": request.path,
-                "query": dict(request.query),
-            }
-            if request.can_read_body:
-                req_data["text"] = await request.text()
-
-            try:
-                # 2. Call Child Handler via RPC (child_handler_proxy is async callable)
-                result = await child_handler_proxy(req_data)
-
-                # 3. Serialize Response
-                return self._serialize_response(result)
-            except Exception as e:
-                logger.error(f"{LOG_PREFIX} Isolated Route Error: {e}")
-                return web.Response(status=500, text=str(e))
-
-        # Register loop
-        self.server.app.router.add_route(method, path, route_wrapper)
-
-    def _serialize_response(self, result: Any) -> Any:
-        """Helper to convert Child result -> web.Response"""
-        from aiohttp import web
-        if isinstance(result, web.Response):
-            return result
-        # Handle dict (json)
-        if isinstance(result, dict):
-            return web.json_response(result)
-        # Handle string
-        if isinstance(result, str):
-            return web.Response(text=result)
-        # Fallback
-        return web.Response(text=str(result))
--- a/comfy/isolation/proxies/utils_proxy.py
+++ b/comfy/isolation/proxies/utils_proxy.py
@@ -1,64 +0,0 @@
-# pylint: disable=cyclic-import,import-outside-toplevel
-from __future__ import annotations
-
-from typing import Optional, Any
-from pyisolate import ProxiedSingleton
-
-import os
-
-
-def _comfy_utils():
-    import comfy.utils
-    return comfy.utils
-
-
-class UtilsProxy(ProxiedSingleton):
-    """
-    Proxy for comfy.utils.
-    Primarily handles the PROGRESS_BAR_HOOK to ensure progress updates
-    from isolated nodes reach the host.
-    """
-
-    # _instance and __new__ removed to rely on SingletonMetaclass
-    _rpc: Optional[Any] = None
-
-    @classmethod
-    def set_rpc(cls, rpc: Any) -> None:
-        # Create caller using class name as ID (standard for Singletons)
-        cls._rpc = rpc.create_caller(cls, "UtilsProxy")
-
-    @classmethod
-    def clear_rpc(cls) -> None:
-        cls._rpc = None
-
-    async def progress_bar_hook(
-        self,
-        value: int,
-        total: int,
-        preview: Optional[bytes] = None,
-        node_id: Optional[str] = None,
-    ) -> Any:
-        """
-        Host-side implementation: forwards the call to the real global hook.
-        Child-side: this method call is intercepted by RPC and sent to host.
-        """
-        if os.environ.get("PYISOLATE_CHILD") == "1":
-            if UtilsProxy._rpc is None:
-                raise RuntimeError("UtilsProxy RPC caller is not configured")
-            return await UtilsProxy._rpc.progress_bar_hook(
-                value, total, preview, node_id
-            )
-
-        # Host Execution
-        utils = _comfy_utils()
-        if utils.PROGRESS_BAR_HOOK is not None:
-            return utils.PROGRESS_BAR_HOOK(value, total, preview, node_id)
-        return None
-
-    def set_progress_bar_global_hook(self, hook: Any) -> None:
-        """Forward hook registration (though usually not needed from child)."""
-        if os.environ.get("PYISOLATE_CHILD") == "1":
-            raise RuntimeError(
-                "UtilsProxy.set_progress_bar_global_hook is not available in child without exact relay support"
-            )
-        _comfy_utils().set_progress_bar_global_hook(hook)
--- a/comfy/isolation/proxies/web_directory_proxy.py
+++ b/comfy/isolation/proxies/web_directory_proxy.py
@@ -1,219 +0,0 @@
-"""WebDirectoryProxy — serves isolated node web assets via RPC.
-
-Child side: enumerates and reads files from the extension's web/ directory.
-Host side: gets an RPC proxy that fetches file listings and contents on demand.
-
-Only files with allowed extensions (.js, .html, .css) are served.
-Directory traversal is rejected. File contents are base64-encoded for
-safe JSON-RPC transport.
-"""
-
-from __future__ import annotations
-
-import base64
-import logging
-import os
-from pathlib import Path, PurePosixPath
-from typing import Any, Dict, List
-
-from pyisolate import ProxiedSingleton
-
-logger = logging.getLogger(__name__)
-
-ALLOWED_EXTENSIONS = frozenset({".js", ".html", ".css"})
-
-MIME_TYPES = {
-    ".js": "application/javascript",
-    ".html": "text/html",
-    ".css": "text/css",
-}
-
-
-class WebDirectoryProxy(ProxiedSingleton):
-    """Proxy for serving isolated extension web directories.
-
-    On the child side, this class has direct filesystem access to the
-    extension's web/ directory.  On the host side, callers get an RPC
-    proxy whose method calls are forwarded to the child.
-    """
-
-    # {extension_name: absolute_path_to_web_dir}
-    _web_dirs: dict[str, str] = {}
-
-    @classmethod
-    def register_web_dir(cls, extension_name: str, web_dir_path: str) -> None:
-        """Register an extension's web directory (child-side only)."""
-        cls._web_dirs[extension_name] = web_dir_path
-        logger.info(
-            "][ WebDirectoryProxy: registered %s -> %s",
-            extension_name,
-            web_dir_path,
-        )
-
-    def list_web_files(self, extension_name: str) -> List[Dict[str, str]]:
-        """Return a list of servable files in the extension's web directory.
-
-        Each entry is {"relative_path": "js/foo.js", "content_type": "application/javascript"}.
-        Only files with allowed extensions are included.
-        """
-        web_dir = self._web_dirs.get(extension_name)
-        if not web_dir:
-            return []
-
-        root = Path(web_dir)
-        if not root.is_dir():
-            return []
-
-        result: List[Dict[str, str]] = []
-        for path in sorted(root.rglob("*")):
-            if not path.is_file():
-                continue
-            ext = path.suffix.lower()
-            if ext not in ALLOWED_EXTENSIONS:
-                continue
-            rel = path.relative_to(root)
-            result.append({
-                "relative_path": str(PurePosixPath(rel)),
-                "content_type": MIME_TYPES[ext],
-            })
-        return result
-
-    def get_web_file(
-        self, extension_name: str, relative_path: str
-    ) -> Dict[str, Any]:
-        """Return the contents of a single web file as base64.
-
-        Raises ValueError for traversal attempts or disallowed file types.
-        Returns {"content": <base64 str>, "content_type": <MIME str>}.
-        """
-        _validate_path(relative_path)
-
-        web_dir = self._web_dirs.get(extension_name)
-        if not web_dir:
-            raise FileNotFoundError(
-                f"No web directory registered for {extension_name}"
-            )
-
-        root = Path(web_dir)
-        target = (root / relative_path).resolve()
-
-        # Ensure resolved path is under the web directory
-        if not str(target).startswith(str(root.resolve())):
-            raise ValueError(f"Path escapes web directory: {relative_path}")
-
-        if not target.is_file():
-            raise FileNotFoundError(f"File not found: {relative_path}")
-
-        ext = target.suffix.lower()
-        if ext not in ALLOWED_EXTENSIONS:
-            raise ValueError(f"Disallowed file type: {ext}")
-
-        content_type = MIME_TYPES[ext]
-        raw = target.read_bytes()
-
-        return {
-            "content": base64.b64encode(raw).decode("ascii"),
-            "content_type": content_type,
-        }
-
-
-def _validate_path(relative_path: str) -> None:
-    """Reject directory traversal and absolute paths."""
-    if os.path.isabs(relative_path):
-        raise ValueError(f"Absolute paths are not allowed: {relative_path}")
-    if ".." in PurePosixPath(relative_path).parts:
-        raise ValueError(f"Directory traversal is not allowed: {relative_path}")
-
-
-# ---------------------------------------------------------------------------
-# Host-side cache and aiohttp handler
-# ---------------------------------------------------------------------------
-
-
-class WebDirectoryCache:
-    """Host-side in-memory cache for proxied web directory contents.
-
-    Populated lazily via RPC calls to the child's WebDirectoryProxy.
-    Once a file is cached, subsequent requests are served from memory.
-    """
-
-    def __init__(self) -> None:
-        # {extension_name: {relative_path: {"content": bytes, "content_type": str}}}
-        self._file_cache: dict[str, dict[str, dict[str, Any]]] = {}
-        # {extension_name: [{"relative_path": str, "content_type": str}, ...]}
-        self._listing_cache: dict[str, list[dict[str, str]]] = {}
-        # {extension_name: WebDirectoryProxy (RPC proxy instance)}
-        self._proxies: dict[str, Any] = {}
-
-    def register_proxy(self, extension_name: str, proxy: Any) -> None:
-        """Register an RPC proxy for an extension's web directory."""
-        self._proxies[extension_name] = proxy
-        logger.info(
-            "][ WebDirectoryCache: registered proxy for %s", extension_name
-        )
-
-    @property
-    def extension_names(self) -> list[str]:
-        return list(self._proxies.keys())
-
-    def list_files(self, extension_name: str) -> list[dict[str, str]]:
-        """List servable files for an extension (cached after first call)."""
-        if extension_name not in self._listing_cache:
-            proxy = self._proxies.get(extension_name)
-            if proxy is None:
-                return []
-            try:
-                self._listing_cache[extension_name] = proxy.list_web_files(
-                    extension_name
-                )
-            except Exception:
-                logger.warning(
-                    "][ WebDirectoryCache: failed to list files for %s",
-                    extension_name,
-                    exc_info=True,
-                )
-                return []
-        return self._listing_cache[extension_name]
-
-    def get_file(
-        self, extension_name: str, relative_path: str
-    ) -> dict[str, Any] | None:
-        """Get file content (cached after first fetch). Returns None on miss."""
-        ext_cache = self._file_cache.get(extension_name)
-        if ext_cache and relative_path in ext_cache:
-            return ext_cache[relative_path]
-
-        proxy = self._proxies.get(extension_name)
-        if proxy is None:
-            return None
-
-        try:
-            result = proxy.get_web_file(extension_name, relative_path)
-        except (FileNotFoundError, ValueError):
-            return None
-        except Exception:
-            logger.warning(
-                "][ WebDirectoryCache: failed to fetch %s/%s",
-                extension_name,
-                relative_path,
-                exc_info=True,
-            )
-            return None
-
-        decoded = {
-            "content": base64.b64decode(result["content"]),
-            "content_type": result["content_type"],
-        }
-
-        if extension_name not in self._file_cache:
-            self._file_cache[extension_name] = {}
-        self._file_cache[extension_name][relative_path] = decoded
-        return decoded
-
-
-# Global cache instance — populated during isolation loading
-_web_directory_cache = WebDirectoryCache()
-
-
-def get_web_directory_cache() -> WebDirectoryCache:
-    return _web_directory_cache
--- a/comfy/isolation/rpc_bridge.py
+++ b/comfy/isolation/rpc_bridge.py
@@ -1,49 +0,0 @@
-import asyncio
-import logging
-import threading
-
-logger = logging.getLogger(__name__)
-
-
-class RpcBridge:
-    """Minimal helper to run coroutines synchronously inside isolated processes.
-
-    If an event loop is already running, the coroutine is executed on a fresh
-    thread with its own loop to avoid nested run_until_complete errors.
-    """
-
-    def run_sync(self, maybe_coro):
-        if not asyncio.iscoroutine(maybe_coro):
-            return maybe_coro
-
-        try:
-            loop = asyncio.get_running_loop()
-        except RuntimeError:
-            loop = None
-
-        if loop and loop.is_running():
-            result_container = {}
-            exc_container = {}
-
-            def _runner():
-                try:
-                    new_loop = asyncio.new_event_loop()
-                    asyncio.set_event_loop(new_loop)
-                    result_container["value"] = new_loop.run_until_complete(maybe_coro)
-                except Exception as exc:  # pragma: no cover
-                    exc_container["error"] = exc
-                finally:
-                    try:
-                        new_loop.close()
-                    except Exception:
-                        pass
-
-            t = threading.Thread(target=_runner, daemon=True)
-            t.start()
-            t.join()
-
-            if "error" in exc_container:
-                raise exc_container["error"]
-            return result_container.get("value")
-
-        return asyncio.run(maybe_coro)
--- a/comfy/isolation/runtime_helpers.py
+++ b/comfy/isolation/runtime_helpers.py
@@ -1,471 +0,0 @@
-# pylint: disable=consider-using-from-import,import-outside-toplevel,no-member
-from __future__ import annotations
-
-import copy
-import logging
-import os
-from pathlib import Path
-from typing import Any, Dict, List, Set, TYPE_CHECKING
-
-from .proxies.helper_proxies import restore_input_types
-from .shm_forensics import scan_shm_forensics
-
-_IMPORT_TORCH = os.environ.get("PYISOLATE_IMPORT_TORCH", "1") == "1"
-
-_ComfyNodeInternal = object
-latest_io = None
-
-if _IMPORT_TORCH:
-    from comfy_api.internal import _ComfyNodeInternal
-    from comfy_api.latest import _io as latest_io
-
-if TYPE_CHECKING:
-    from .extension_wrapper import ComfyNodeExtension
-
-LOG_PREFIX = "]["
-_PRE_EXEC_MIN_FREE_VRAM_BYTES = 2 * 1024 * 1024 * 1024
-
-
-class _RemoteObjectRegistryCaller:
-    def __init__(self, extension: Any) -> None:
-        self._extension = extension
-
-    def __getattr__(self, method_name: str) -> Any:
-        async def _call(instance_id: str, *args: Any, **kwargs: Any) -> Any:
-            return await self._extension.call_remote_object_method(
-                instance_id,
-                method_name,
-                *args,
-                **kwargs,
-            )
-
-        return _call
-
-
-def _wrap_remote_handles_as_host_proxies(value: Any, extension: Any) -> Any:
-    from pyisolate._internal.remote_handle import RemoteObjectHandle
-
-    if isinstance(value, RemoteObjectHandle):
-        if value.type_name == "ModelPatcher":
-            from comfy.isolation.model_patcher_proxy import ModelPatcherProxy
-
-            proxy = ModelPatcherProxy(value.object_id, manage_lifecycle=False)
-            proxy._rpc_caller = _RemoteObjectRegistryCaller(extension)  # type: ignore[attr-defined]
-            proxy._pyisolate_remote_handle = value  # type: ignore[attr-defined]
-            return proxy
-        if value.type_name == "VAE":
-            from comfy.isolation.vae_proxy import VAEProxy
-
-            proxy = VAEProxy(value.object_id, manage_lifecycle=False)
-            proxy._rpc_caller = _RemoteObjectRegistryCaller(extension)  # type: ignore[attr-defined]
-            proxy._pyisolate_remote_handle = value  # type: ignore[attr-defined]
-            return proxy
-        if value.type_name == "CLIP":
-            from comfy.isolation.clip_proxy import CLIPProxy
-
-            proxy = CLIPProxy(value.object_id, manage_lifecycle=False)
-            proxy._rpc_caller = _RemoteObjectRegistryCaller(extension)  # type: ignore[attr-defined]
-            proxy._pyisolate_remote_handle = value  # type: ignore[attr-defined]
-            return proxy
-        if value.type_name == "ModelSampling":
-            from comfy.isolation.model_sampling_proxy import ModelSamplingProxy
-
-            proxy = ModelSamplingProxy(value.object_id, manage_lifecycle=False)
-            proxy._rpc_caller = _RemoteObjectRegistryCaller(extension)  # type: ignore[attr-defined]
-            proxy._pyisolate_remote_handle = value  # type: ignore[attr-defined]
-            return proxy
-        return value
-
-    if isinstance(value, dict):
-        return {
-            k: _wrap_remote_handles_as_host_proxies(v, extension) for k, v in value.items()
-        }
-
-    if isinstance(value, (list, tuple)):
-        wrapped = [_wrap_remote_handles_as_host_proxies(item, extension) for item in value]
-        return type(value)(wrapped)
-
-    return value
-
-
-def _resource_snapshot() -> Dict[str, int]:
-    fd_count = -1
-    shm_sender_files = 0
-    try:
-        fd_count = len(os.listdir("/proc/self/fd"))
-    except Exception:
-        pass
-    try:
-        shm_root = Path("/dev/shm")
-        if shm_root.exists():
-            prefix = f"torch_{os.getpid()}_"
-            shm_sender_files = sum(1 for _ in shm_root.glob(f"{prefix}*"))
-    except Exception:
-        pass
-    return {"fd_count": fd_count, "shm_sender_files": shm_sender_files}
-
-
-def _tensor_transport_summary(value: Any) -> Dict[str, int]:
-    summary: Dict[str, int] = {
-        "tensor_count": 0,
-        "cpu_tensors": 0,
-        "cuda_tensors": 0,
-        "shared_cpu_tensors": 0,
-        "tensor_bytes": 0,
-    }
-    try:
-        import torch
-    except Exception:
-        return summary
-
-    def visit(node: Any) -> None:
-        if isinstance(node, torch.Tensor):
-            summary["tensor_count"] += 1
-            summary["tensor_bytes"] += int(node.numel() * node.element_size())
-            if node.device.type == "cpu":
-                summary["cpu_tensors"] += 1
-                if node.is_shared():
-                    summary["shared_cpu_tensors"] += 1
-            elif node.device.type == "cuda":
-                summary["cuda_tensors"] += 1
-            return
-        if isinstance(node, dict):
-            for v in node.values():
-                visit(v)
-            return
-        if isinstance(node, (list, tuple)):
-            for v in node:
-                visit(v)
-
-    visit(value)
-    return summary
-
-
-def _extract_hidden_unique_id(inputs: Dict[str, Any]) -> str | None:
-    for key, value in inputs.items():
-        key_text = str(key)
-        if "unique_id" in key_text:
-            return str(value)
-    return None
-
-
-def _flush_tensor_transport_state(marker: str, logger: logging.Logger) -> None:
-    try:
-        from pyisolate import flush_tensor_keeper  # type: ignore[attr-defined]
-    except Exception:
-        return
-    if not callable(flush_tensor_keeper):
-        return
-    flushed = flush_tensor_keeper()
-    if flushed > 0:
-        logger.debug(
-            "%s %s flush_tensor_keeper released=%d", LOG_PREFIX, marker, flushed
-        )
-
-
-def _relieve_host_vram_pressure(marker: str, logger: logging.Logger) -> None:
-    import comfy.model_management as model_management
-
-    model_management.cleanup_models_gc()
-    model_management.cleanup_models()
-
-    device = model_management.get_torch_device()
-    if not hasattr(device, "type") or device.type == "cpu":
-        return
-
-    required = max(
-        model_management.minimum_inference_memory(),
-        _PRE_EXEC_MIN_FREE_VRAM_BYTES,
-    )
-    if model_management.get_free_memory(device) < required:
-        model_management.free_memory(required, device, for_dynamic=True)
-        if model_management.get_free_memory(device) < required:
-            model_management.free_memory(required, device, for_dynamic=False)
-        model_management.cleanup_models()
-        model_management.soft_empty_cache()
-        logger.debug("%s %s free_memory target=%d", LOG_PREFIX, marker, required)
-
-
-def _detach_shared_cpu_tensors(value: Any) -> Any:
-    try:
-        import torch
-    except Exception:
-        return value
-
-    if isinstance(value, torch.Tensor):
-        if value.device.type == "cpu" and value.is_shared():
-            clone = value.clone()
-            if value.requires_grad:
-                clone.requires_grad_(True)
-            return clone
-        return value
-    if isinstance(value, list):
-        return [_detach_shared_cpu_tensors(v) for v in value]
-    if isinstance(value, tuple):
-        return tuple(_detach_shared_cpu_tensors(v) for v in value)
-    if isinstance(value, dict):
-        return {k: _detach_shared_cpu_tensors(v) for k, v in value.items()}
-    return value
-
-
-def build_stub_class(
-    node_name: str,
-    info: Dict[str, object],
-    extension: "ComfyNodeExtension",
-    running_extensions: Dict[str, "ComfyNodeExtension"],
-    logger: logging.Logger,
-) -> type:
-    if latest_io is None:
-        raise RuntimeError("comfy_api.latest._io is required to build isolation stubs")
-    is_v3 = bool(info.get("is_v3", False))
-    function_name = "_pyisolate_execute"
-    restored_input_types = restore_input_types(info.get("input_types", {}))
-
-    async def _execute(self, **inputs):
-        from comfy.isolation import _RUNNING_EXTENSIONS
-
-        # Update BOTH the local dict AND the module-level dict
-        running_extensions[extension.name] = extension
-        _RUNNING_EXTENSIONS[extension.name] = extension
-        prev_child = None
-        node_unique_id = _extract_hidden_unique_id(inputs)
-        summary = _tensor_transport_summary(inputs)
-        resources = _resource_snapshot()
-        logger.debug(
-            "%s ISO:execute_start ext=%s node=%s uid=%s",
-            LOG_PREFIX,
-            extension.name,
-            node_name,
-            node_unique_id or "-",
-        )
-        logger.debug(
-            "%s ISO:execute_start ext=%s node=%s uid=%s tensors=%d cpu=%d cuda=%d shared_cpu=%d bytes=%d fds=%d sender_shm=%d",
-            LOG_PREFIX,
-            extension.name,
-            node_name,
-            node_unique_id or "-",
-            summary["tensor_count"],
-            summary["cpu_tensors"],
-            summary["cuda_tensors"],
-            summary["shared_cpu_tensors"],
-            summary["tensor_bytes"],
-            resources["fd_count"],
-            resources["shm_sender_files"],
-        )
-        scan_shm_forensics("RUNTIME:execute_start", refresh_model_context=True)
-        try:
-            if os.environ.get("PYISOLATE_CHILD") != "1":
-                _relieve_host_vram_pressure("RUNTIME:pre_execute", logger)
-                scan_shm_forensics("RUNTIME:pre_execute", refresh_model_context=True)
-            from pyisolate._internal.model_serialization import (
-                serialize_for_isolation,
-                deserialize_from_isolation,
-            )
-
-            prev_child = os.environ.pop("PYISOLATE_CHILD", None)
-            logger.debug(
-                "%s ISO:serialize_start ext=%s node=%s uid=%s",
-                LOG_PREFIX,
-                extension.name,
-                node_name,
-                node_unique_id or "-",
-            )
-            # Unwrap NodeOutput-like dicts before serialization.
-            # OUTPUT_NODE nodes return {"ui": {...}, "result": (outputs...)}
-            # and the executor may pass this dict as input to downstream nodes.
-            unwrapped_inputs = {}
-            for k, v in inputs.items():
-                if isinstance(v, dict) and "result" in v and ("ui" in v or "__node_output__" in v):
-                    result = v.get("result")
-                    if isinstance(result, (tuple, list)) and len(result) > 0:
-                        unwrapped_inputs[k] = result[0]
-                    else:
-                        unwrapped_inputs[k] = result
-                else:
-                    unwrapped_inputs[k] = v
-            serialized = serialize_for_isolation(unwrapped_inputs)
-            logger.debug(
-                "%s ISO:serialize_done ext=%s node=%s uid=%s",
-                LOG_PREFIX,
-                extension.name,
-                node_name,
-                node_unique_id or "-",
-            )
-            logger.debug(
-                "%s ISO:dispatch_start ext=%s node=%s uid=%s",
-                LOG_PREFIX,
-                extension.name,
-                node_name,
-                node_unique_id or "-",
-            )
-            result = await extension.execute_node(node_name, **serialized)
-            logger.debug(
-                "%s ISO:dispatch_done ext=%s node=%s uid=%s",
-                LOG_PREFIX,
-                extension.name,
-                node_name,
-                node_unique_id or "-",
-            )
-            # Reconstruct NodeOutput if the child serialized one
-            if isinstance(result, dict) and result.get("__node_output__"):
-                from comfy_api.latest import io as latest_io
-                args_raw = result.get("args", ())
-                deserialized_args = await deserialize_from_isolation(args_raw, extension)
-                deserialized_args = _wrap_remote_handles_as_host_proxies(
-                    deserialized_args, extension
-                )
-                deserialized_args = _detach_shared_cpu_tensors(deserialized_args)
-                ui_raw = result.get("ui")
-                deserialized_ui = None
-                if ui_raw is not None:
-                    deserialized_ui = await deserialize_from_isolation(ui_raw, extension)
-                    deserialized_ui = _wrap_remote_handles_as_host_proxies(
-                        deserialized_ui, extension
-                    )
-                    deserialized_ui = _detach_shared_cpu_tensors(deserialized_ui)
-                scan_shm_forensics("RUNTIME:post_execute", refresh_model_context=True)
-                return latest_io.NodeOutput(
-                    *deserialized_args,
-                    ui=deserialized_ui,
-                    expand=result.get("expand"),
-                    block_execution=result.get("block_execution"),
-                )
-            # OUTPUT_NODE: if sealed worker returned a tuple/list whose first
-            # element is a {"ui": ...} dict, unwrap it for the executor.
-            if (isinstance(result, (tuple, list)) and len(result) == 1
-                    and isinstance(result[0], dict) and "ui" in result[0]):
-                return result[0]
-            deserialized = await deserialize_from_isolation(result, extension)
-            deserialized = _wrap_remote_handles_as_host_proxies(deserialized, extension)
-            scan_shm_forensics("RUNTIME:post_execute", refresh_model_context=True)
-            return _detach_shared_cpu_tensors(deserialized)
-        except ImportError:
-            return await extension.execute_node(node_name, **inputs)
-        except Exception:
-            logger.exception(
-                "%s ISO:execute_error ext=%s node=%s uid=%s",
-                LOG_PREFIX,
-                extension.name,
-                node_name,
-                node_unique_id or "-",
-            )
-            raise
-        finally:
-            if prev_child is not None:
-                os.environ["PYISOLATE_CHILD"] = prev_child
-            logger.debug(
-                "%s ISO:execute_end ext=%s node=%s uid=%s",
-                LOG_PREFIX,
-                extension.name,
-                node_name,
-                node_unique_id or "-",
-            )
-            scan_shm_forensics("RUNTIME:execute_end", refresh_model_context=True)
-
-    def _input_types(
-        cls,
-        include_hidden: bool = True,
-        return_schema: bool = False,
-        live_inputs: Any = None,
-    ):
-        if not is_v3:
-            return restored_input_types
-
-        inputs_copy = copy.deepcopy(restored_input_types)
-        if not include_hidden:
-            inputs_copy.pop("hidden", None)
-
-        v3_data: Dict[str, Any] = {"hidden_inputs": {}}
-        dynamic = inputs_copy.pop("dynamic_paths", None)
-        if dynamic is not None:
-            v3_data["dynamic_paths"] = dynamic
-
-        if return_schema:
-            hidden_vals = info.get("hidden", []) or []
-            hidden_enums = []
-            for h in hidden_vals:
-                try:
-                    hidden_enums.append(latest_io.Hidden(h))
-                except Exception:
-                    hidden_enums.append(h)
-
-            class SchemaProxy:
-                hidden = hidden_enums
-
-            return inputs_copy, SchemaProxy, v3_data
-        return inputs_copy
-
-    def _validate_class(cls):
-        return True
-
-    def _get_node_info_v1(cls):
-        node_info = copy.deepcopy(info.get("schema_v1", {}))
-        relative_python_module = node_info.get("python_module")
-        if not isinstance(relative_python_module, str) or not relative_python_module:
-            relative_python_module = f"custom_nodes.{extension.name}"
-        node_info["python_module"] = relative_python_module
-        return node_info
-
-    def _get_base_class(cls):
-        return latest_io.ComfyNode
-
-    attributes: Dict[str, object] = {
-        "FUNCTION": function_name,
-        "CATEGORY": info.get("category", ""),
-        "OUTPUT_NODE": info.get("output_node", False),
-        "RETURN_TYPES": tuple(info.get("return_types", ()) or ()),
-        "RETURN_NAMES": info.get("return_names"),
-        function_name: _execute,
-        "_pyisolate_extension": extension,
-        "_pyisolate_node_name": node_name,
-        "INPUT_TYPES": classmethod(_input_types),
-    }
-
-    output_is_list = info.get("output_is_list")
-    if output_is_list is not None:
-        attributes["OUTPUT_IS_LIST"] = tuple(output_is_list)
-
-    if is_v3:
-        attributes["VALIDATE_CLASS"] = classmethod(_validate_class)
-        attributes["GET_NODE_INFO_V1"] = classmethod(_get_node_info_v1)
-        attributes["GET_BASE_CLASS"] = classmethod(_get_base_class)
-        attributes["DESCRIPTION"] = info.get("description", "")
-        attributes["EXPERIMENTAL"] = info.get("experimental", False)
-        attributes["DEPRECATED"] = info.get("deprecated", False)
-        attributes["API_NODE"] = info.get("api_node", False)
-        attributes["NOT_IDEMPOTENT"] = info.get("not_idempotent", False)
-        attributes["ACCEPT_ALL_INPUTS"] = info.get("accept_all_inputs", False)
-        attributes["_ACCEPT_ALL_INPUTS"] = info.get("accept_all_inputs", False)
-        attributes["INPUT_IS_LIST"] = info.get("input_is_list", False)
-
-    class_name = f"PyIsolate_{node_name}".replace(" ", "_")
-    bases = (_ComfyNodeInternal,) if is_v3 else ()
-    stub_cls = type(class_name, bases, attributes)
-
-    if is_v3:
-        try:
-            stub_cls.VALIDATE_CLASS()
-        except Exception as e:
-            logger.error("%s VALIDATE_CLASS failed: %s - %s", LOG_PREFIX, node_name, e)
-
-    return stub_cls
-
-
-def get_class_types_for_extension(
-    extension_name: str,
-    running_extensions: Dict[str, "ComfyNodeExtension"],
-    specs: List[Any],
-) -> Set[str]:
-    extension = running_extensions.get(extension_name)
-    if not extension:
-        return set()
-
-    ext_path = Path(extension.module_path)
-    class_types = set()
-    for spec in specs:
-        if spec.module_path.resolve() == ext_path.resolve():
-            class_types.add(spec.node_name)
-    return class_types
-
-
-__all__ = ["build_stub_class", "get_class_types_for_extension"]
--- a/comfy/isolation/shm_forensics.py
+++ b/comfy/isolation/shm_forensics.py
@@ -1,217 +0,0 @@
-# pylint: disable=consider-using-from-import,import-outside-toplevel
-from __future__ import annotations
-
-import atexit
-import hashlib
-import logging
-import os
-from pathlib import Path
-from typing import Any, Dict, List, Set
-
-LOG_PREFIX = "]["
-logger = logging.getLogger(__name__)
-
-
-def _shm_debug_enabled() -> bool:
-    return os.environ.get("COMFY_ISO_SHM_DEBUG") == "1"
-
-
-class _SHMForensicsTracker:
-    def __init__(self) -> None:
-        self._started = False
-        self._tracked_files: Set[str] = set()
-        self._current_model_context: Dict[str, str] = {
-            "id": "unknown",
-            "name": "unknown",
-            "hash": "????",
-        }
-
-    @staticmethod
-    def _snapshot_shm() -> Set[str]:
-        shm_path = Path("/dev/shm")
-        if not shm_path.exists():
-            return set()
-        return {f.name for f in shm_path.glob("torch_*")}
-
-    def start(self) -> None:
-        if self._started or not _shm_debug_enabled():
-            return
-        self._tracked_files = self._snapshot_shm()
-        self._started = True
-        logger.debug(
-            "%s SHM:forensics_enabled tracked=%d", LOG_PREFIX, len(self._tracked_files)
-        )
-
-    def stop(self) -> None:
-        if not self._started:
-            return
-        self.scan("shutdown", refresh_model_context=True)
-        self._started = False
-        logger.debug("%s SHM:forensics_disabled", LOG_PREFIX)
-
-    def _compute_model_hash(self, model_patcher: Any) -> str:
-        try:
-            model_instance_id = getattr(model_patcher, "_instance_id", None)
-            if model_instance_id is not None:
-                model_id_text = str(model_instance_id)
-                return model_id_text[-4:] if len(model_id_text) >= 4 else model_id_text
-
-            import torch
-
-            real_model = (
-                model_patcher.model
-                if hasattr(model_patcher, "model")
-                else model_patcher
-            )
-            tensor = None
-            if hasattr(real_model, "parameters"):
-                for p in real_model.parameters():
-                    if torch.is_tensor(p) and p.numel() > 0:
-                        tensor = p
-                        break
-
-            if tensor is None:
-                return "0000"
-
-            flat = tensor.flatten()
-            values = []
-            indices = [0, flat.shape[0] // 2, flat.shape[0] - 1]
-            for i in indices:
-                if i < flat.shape[0]:
-                    values.append(flat[i].item())
-
-            size = 0
-            if hasattr(model_patcher, "model_size"):
-                size = model_patcher.model_size()
-            sample_str = f"{values}_{id(model_patcher):016x}_{size}"
-            return hashlib.sha256(sample_str.encode()).hexdigest()[-4:]
-        except Exception:
-            return "err!"
-
-    def _get_models_snapshot(self) -> List[Dict[str, Any]]:
-        try:
-            import comfy.model_management as model_management
-        except Exception:
-            return []
-
-        snapshot: List[Dict[str, Any]] = []
-        try:
-            for loaded_model in model_management.current_loaded_models:
-                model = loaded_model.model
-                if model is None:
-                    continue
-                if str(getattr(loaded_model, "device", "")) != "cuda:0":
-                    continue
-
-                name = (
-                    model.model.__class__.__name__
-                    if hasattr(model, "model")
-                    else type(model).__name__
-                )
-                model_hash = self._compute_model_hash(model)
-                model_instance_id = getattr(model, "_instance_id", None)
-                if model_instance_id is None:
-                    model_instance_id = model_hash
-                snapshot.append(
-                    {
-                        "name": str(name),
-                        "id": str(model_instance_id),
-                        "hash": str(model_hash or "????"),
-                        "used": bool(getattr(loaded_model, "currently_used", False)),
-                    }
-                )
-        except Exception:
-            return []
-
-        return snapshot
-
-    def _update_model_context(self) -> None:
-        snapshot = self._get_models_snapshot()
-        selected = None
-
-        used_models = [m for m in snapshot if m.get("used") and m.get("id")]
-        if used_models:
-            selected = used_models[-1]
-        else:
-            live_models = [m for m in snapshot if m.get("id")]
-            if live_models:
-                selected = live_models[-1]
-
-        if selected is None:
-            self._current_model_context = {
-                "id": "unknown",
-                "name": "unknown",
-                "hash": "????",
-            }
-            return
-
-        self._current_model_context = {
-            "id": str(selected.get("id", "unknown")),
-            "name": str(selected.get("name", "unknown")),
-            "hash": str(selected.get("hash", "????") or "????"),
-        }
-
-    def scan(self, marker: str, refresh_model_context: bool = True) -> None:
-        if not self._started or not _shm_debug_enabled():
-            return
-
-        if refresh_model_context:
-            self._update_model_context()
-
-        current = self._snapshot_shm()
-        added = current - self._tracked_files
-        removed = self._tracked_files - current
-        self._tracked_files = current
-
-        if not added and not removed:
-            logger.debug("%s SHM:scan marker=%s changes=0", LOG_PREFIX, marker)
-            return
-
-        for filename in sorted(added):
-            logger.info("%s SHM:created | %s", LOG_PREFIX, filename)
-            model_id = self._current_model_context["id"]
-            if model_id == "unknown":
-                logger.error(
-                    "%s SHM:model_association_missing | file=%s | reason=no_active_model_context",
-                    LOG_PREFIX,
-                    filename,
-                )
-            else:
-                logger.info(
-                    "%s SHM:model_association | model=%s | file=%s | name=%s | hash=%s",
-                    LOG_PREFIX,
-                    model_id,
-                    filename,
-                    self._current_model_context["name"],
-                    self._current_model_context["hash"],
-                )
-
-        for filename in sorted(removed):
-            logger.info("%s SHM:deleted | %s", LOG_PREFIX, filename)
-
-        logger.debug(
-            "%s SHM:scan marker=%s created=%d deleted=%d active=%d",
-            LOG_PREFIX,
-            marker,
-            len(added),
-            len(removed),
-            len(self._tracked_files),
-        )
-
-
-_TRACKER = _SHMForensicsTracker()
-
-
-def start_shm_forensics() -> None:
-    _TRACKER.start()
-
-
-def scan_shm_forensics(marker: str, refresh_model_context: bool = True) -> None:
-    _TRACKER.scan(marker, refresh_model_context=refresh_model_context)
-
-
-def stop_shm_forensics() -> None:
-    _TRACKER.stop()
-
-
-atexit.register(stop_shm_forensics)
--- a/comfy/isolation/vae_proxy.py
+++ b/comfy/isolation/vae_proxy.py
@@ -1,214 +0,0 @@
-# pylint: disable=attribute-defined-outside-init
-import logging
-from typing import Any
-
-from comfy.isolation.proxies.base import (
-    IS_CHILD_PROCESS,
-    BaseProxy,
-    BaseRegistry,
-    detach_if_grad,
-)
-from comfy.isolation.model_patcher_proxy import ModelPatcherProxy, ModelPatcherRegistry
-
-logger = logging.getLogger(__name__)
-
-
-class FirstStageModelRegistry(BaseRegistry[Any]):
-    _type_prefix = "first_stage_model"
-
-    async def get_property(self, instance_id: str, name: str) -> Any:
-        obj = self._get_instance(instance_id)
-        return getattr(obj, name)
-
-    async def has_property(self, instance_id: str, name: str) -> bool:
-        obj = self._get_instance(instance_id)
-        return hasattr(obj, name)
-
-
-class FirstStageModelProxy(BaseProxy[FirstStageModelRegistry]):
-    _registry_class = FirstStageModelRegistry
-    __module__ = "comfy.ldm.models.autoencoder"
-
-    def __getattr__(self, name: str) -> Any:
-        try:
-            return self._call_rpc("get_property", name)
-        except Exception as e:
-            raise AttributeError(
-                f"'{self.__class__.__name__}' object has no attribute '{name}'"
-            ) from e
-
-    def __repr__(self) -> str:
-        return f"<FirstStageModelProxy {self._instance_id}>"
-
-
-class VAERegistry(BaseRegistry[Any]):
-    _type_prefix = "vae"
-
-    async def get_patcher_id(self, instance_id: str) -> str:
-        vae = self._get_instance(instance_id)
-        return ModelPatcherRegistry().register(vae.patcher)
-
-    async def get_first_stage_model_id(self, instance_id: str) -> str:
-        vae = self._get_instance(instance_id)
-        return FirstStageModelRegistry().register(vae.first_stage_model)
-
-    async def encode(self, instance_id: str, pixels: Any) -> Any:
-        return detach_if_grad(self._get_instance(instance_id).encode(pixels))
-
-    async def encode_tiled(
-        self,
-        instance_id: str,
-        pixels: Any,
-        tile_x: int = 512,
-        tile_y: int = 512,
-        overlap: int = 64,
-    ) -> Any:
-        return detach_if_grad(
-            self._get_instance(instance_id).encode_tiled(
-                pixels, tile_x=tile_x, tile_y=tile_y, overlap=overlap
-            )
-        )
-
-    async def decode(self, instance_id: str, samples: Any, **kwargs: Any) -> Any:
-        return detach_if_grad(self._get_instance(instance_id).decode(samples, **kwargs))
-
-    async def decode_tiled(
-        self,
-        instance_id: str,
-        samples: Any,
-        tile_x: int = 64,
-        tile_y: int = 64,
-        overlap: int = 16,
-        **kwargs: Any,
-    ) -> Any:
-        return detach_if_grad(
-            self._get_instance(instance_id).decode_tiled(
-                samples, tile_x=tile_x, tile_y=tile_y, overlap=overlap, **kwargs
-            )
-        )
-
-    async def get_property(self, instance_id: str, name: str) -> Any:
-        return getattr(self._get_instance(instance_id), name)
-
-    async def memory_used_encode(self, instance_id: str, shape: Any, dtype: Any) -> int:
-        return self._get_instance(instance_id).memory_used_encode(shape, dtype)
-
-    async def memory_used_decode(self, instance_id: str, shape: Any, dtype: Any) -> int:
-        return self._get_instance(instance_id).memory_used_decode(shape, dtype)
-
-    async def process_input(self, instance_id: str, image: Any) -> Any:
-        return detach_if_grad(self._get_instance(instance_id).process_input(image))
-
-    async def process_output(self, instance_id: str, image: Any) -> Any:
-        return detach_if_grad(self._get_instance(instance_id).process_output(image))
-
-
-class VAEProxy(BaseProxy[VAERegistry]):
-    _registry_class = VAERegistry
-    __module__ = "comfy.sd"
-
-    @property
-    def patcher(self) -> ModelPatcherProxy:
-        if not hasattr(self, "_patcher_proxy"):
-            patcher_id = self._call_rpc("get_patcher_id")
-            self._patcher_proxy = ModelPatcherProxy(patcher_id, manage_lifecycle=False)
-        return self._patcher_proxy
-
-    @property
-    def first_stage_model(self) -> FirstStageModelProxy:
-        if not hasattr(self, "_first_stage_model_proxy"):
-            fsm_id = self._call_rpc("get_first_stage_model_id")
-            self._first_stage_model_proxy = FirstStageModelProxy(
-                fsm_id, manage_lifecycle=False
-            )
-        return self._first_stage_model_proxy
-
-    @property
-    def vae_dtype(self) -> Any:
-        return self._get_property("vae_dtype")
-
-    def encode(self, pixels: Any) -> Any:
-        return self._call_rpc("encode", pixels)
-
-    def encode_tiled(
-        self, pixels: Any, tile_x: int = 512, tile_y: int = 512, overlap: int = 64
-    ) -> Any:
-        return self._call_rpc("encode_tiled", pixels, tile_x, tile_y, overlap)
-
-    def decode(self, samples: Any, **kwargs: Any) -> Any:
-        return self._call_rpc("decode", samples, **kwargs)
-
-    def decode_tiled(
-        self,
-        samples: Any,
-        tile_x: int = 64,
-        tile_y: int = 64,
-        overlap: int = 16,
-        **kwargs: Any,
-    ) -> Any:
-        return self._call_rpc(
-            "decode_tiled", samples, tile_x, tile_y, overlap, **kwargs
-        )
-
-    def get_sd(self) -> Any:
-        return self._call_rpc("get_sd")
-
-    def _get_property(self, name: str) -> Any:
-        return self._call_rpc("get_property", name)
-
-    @property
-    def latent_dim(self) -> int:
-        return self._get_property("latent_dim")
-
-    @property
-    def latent_channels(self) -> int:
-        return self._get_property("latent_channels")
-
-    @property
-    def downscale_ratio(self) -> Any:
-        return self._get_property("downscale_ratio")
-
-    @property
-    def upscale_ratio(self) -> Any:
-        return self._get_property("upscale_ratio")
-
-    @property
-    def output_channels(self) -> int:
-        return self._get_property("output_channels")
-
-    @property
-    def check_not_vide(self) -> bool:
-        return self._get_property("not_video")
-
-    @property
-    def device(self) -> Any:
-        return self._get_property("device")
-
-    @property
-    def working_dtypes(self) -> Any:
-        return self._get_property("working_dtypes")
-
-    @property
-    def disable_offload(self) -> bool:
-        return self._get_property("disable_offload")
-
-    @property
-    def size(self) -> Any:
-        return self._get_property("size")
-
-    def memory_used_encode(self, shape: Any, dtype: Any) -> int:
-        return self._call_rpc("memory_used_encode", shape, dtype)
-
-    def memory_used_decode(self, shape: Any, dtype: Any) -> int:
-        return self._call_rpc("memory_used_decode", shape, dtype)
-
-    def process_input(self, image: Any) -> Any:
-        return self._call_rpc("process_input", image)
-
-    def process_output(self, image: Any) -> Any:
-        return self._call_rpc("process_output", image)
-
-
-if not IS_CHILD_PROCESS:
-    _VAE_REGISTRY_SINGLETON = VAERegistry()
-    _FIRST_STAGE_MODEL_REGISTRY_SINGLETON = FirstStageModelRegistry()
--- a/comfy/k_diffusion/sampling.py
+++ b/comfy/k_diffusion/sampling.py
@@ -1,5 +1,4 @@
 import math
-import os
 from functools import partial

 from scipy import integrate
@@ -13,8 +12,8 @@ from . import deis
 from . import sa_solver
 import comfy.model_patcher
 import comfy.model_sampling
+
 import comfy.memory_management
-from comfy.cli_args import args
 from comfy.utils import model_trange as trange

 def append_zero(x):
@@ -192,13 +191,6 @@ def sample_euler(model, x, sigmas, extra_args=None, callback=None, disable=None,
    """Implements Algorithm 2 (Euler steps) from Karras et al. (2022)."""
    extra_args = {} if extra_args is None else extra_args
    s_in = x.new_ones([x.shape[0]])
-    isolation_active = args.use_process_isolation or os.environ.get("PYISOLATE_CHILD") == "1"
-    if isolation_active:
-        target_device = sigmas.device
-        if x.device != target_device:
-            x = x.to(target_device)
-        s_in = s_in.to(target_device)
-
    for i in trange(len(sigmas) - 1, disable=disable):
        if s_churn > 0:
            gamma = min(s_churn / (len(sigmas) - 1), 2 ** 0.5 - 1) if s_tmin <= sigmas[i] <= s_tmax else 0.
--- a/comfy/ldm/cascade/stage_a.py
+++ b/comfy/ldm/cascade/stage_a.py
@@ -136,16 +136,7 @@ class ResBlock(nn.Module):
            ops.Linear(c_hidden, c),
        )

-        self.gammas = nn.Parameter(torch.zeros(6), requires_grad=True)
-
-        # Init weights
-        def _basic_init(module):
-            if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d):
-                torch.nn.init.xavier_uniform_(module.weight)
-                if module.bias is not None:
-                    nn.init.constant_(module.bias, 0)
-
-        self.apply(_basic_init)
+        self.gammas = nn.Parameter(torch.zeros(6), requires_grad=False)

    def _norm(self, x, norm):
        return norm(x.permute(0, 2, 3, 1)).permute(0, 3, 1, 2)
--- a/comfy/ldm/flux/model.py
+++ b/comfy/ldm/flux/model.py
@@ -44,6 +44,22 @@ class FluxParams:
    txt_norm: bool = False


+def invert_slices(slices, length):
+    sorted_slices = sorted(slices)
+    result = []
+    current = 0
+
+    for start, end in sorted_slices:
+        if current < start:
+            result.append((current, start))
+        current = max(current, end)
+
+    if current < length:
+        result.append((current, length))
+
+    return result
+
+
 class Flux(nn.Module):
    """
    Transformer model for flow matching on sequences.
@@ -138,6 +154,7 @@ class Flux(nn.Module):
        y: Tensor,
        guidance: Tensor = None,
        control = None,
+        timestep_zero_index=None,
        transformer_options={},
        attn_mask: Tensor = None,
    ) -> Tensor:
@@ -164,10 +181,6 @@ class Flux(nn.Module):
            txt = self.txt_norm(txt)
        txt = self.txt_in(txt)

-        vec_orig = vec
-        if self.params.global_modulation:
-            vec = (self.double_stream_modulation_img(vec_orig), self.double_stream_modulation_txt(vec_orig))
-
        if "post_input" in patches:
            for p in patches["post_input"]:
                out = p({"img": img, "txt": txt, "img_ids": img_ids, "txt_ids": txt_ids, "transformer_options": transformer_options})
@@ -182,6 +195,24 @@ class Flux(nn.Module):
        else:
            pe = None

+        vec_orig = vec
+        txt_vec = vec
+        extra_kwargs = {}
+        if timestep_zero_index is not None:
+            modulation_dims = []
+            batch = vec.shape[0] // 2
+            vec_orig = vec_orig.reshape(2, batch, vec.shape[1]).movedim(0, 1)
+            invert = invert_slices(timestep_zero_index, img.shape[1])
+            for s in invert:
+                modulation_dims.append((s[0], s[1], 0))
+            for s in timestep_zero_index:
+                modulation_dims.append((s[0], s[1], 1))
+            extra_kwargs["modulation_dims_img"] = modulation_dims
+            txt_vec = vec[:batch]
+
+        if self.params.global_modulation:
+            vec = (self.double_stream_modulation_img(vec_orig), self.double_stream_modulation_txt(txt_vec))
+
        blocks_replace = patches_replace.get("dit", {})
        transformer_options["total_blocks"] = len(self.double_blocks)
        transformer_options["block_type"] = "double"
@@ -195,7 +226,8 @@ class Flux(nn.Module):
                                                   vec=args["vec"],
                                                   pe=args["pe"],
                                                   attn_mask=args.get("attn_mask"),
-                                                   transformer_options=args.get("transformer_options"))
+                                                   transformer_options=args.get("transformer_options"),
+                                                   **extra_kwargs)
                    return out

                out = blocks_replace[("double_block", i)]({"img": img,
@@ -213,7 +245,8 @@ class Flux(nn.Module):
                                 vec=vec,
                                 pe=pe,
                                 attn_mask=attn_mask,
-                                 transformer_options=transformer_options)
+                                 transformer_options=transformer_options,
+                                 **extra_kwargs)

            if control is not None: # Controlnet
                control_i = control.get("input")
@@ -230,6 +263,12 @@ class Flux(nn.Module):
        if self.params.global_modulation:
            vec, _ = self.single_stream_modulation(vec_orig)

+        extra_kwargs = {}
+        if timestep_zero_index is not None:
+            lambda a: 0 if a == 0 else a + txt.shape[1]
+            modulation_dims_combined = list(map(lambda x: (0 if x[0] == 0 else x[0] + txt.shape[1], x[1] + txt.shape[1], x[2]), modulation_dims))
+            extra_kwargs["modulation_dims"] = modulation_dims_combined
+
        transformer_options["total_blocks"] = len(self.single_blocks)
        transformer_options["block_type"] = "single"
        transformer_options["img_slice"] = [txt.shape[1], img.shape[1]]
@@ -242,7 +281,8 @@ class Flux(nn.Module):
                                       vec=args["vec"],
                                       pe=args["pe"],
                                       attn_mask=args.get("attn_mask"),
-                                       transformer_options=args.get("transformer_options"))
+                                       transformer_options=args.get("transformer_options"),
+                                       **extra_kwargs)
                    return out

                out = blocks_replace[("single_block", i)]({"img": img,
@@ -253,7 +293,7 @@ class Flux(nn.Module):
                                                          {"original_block": block_wrap})
                img = out["img"]
            else:
-                img = block(img, vec=vec, pe=pe, attn_mask=attn_mask, transformer_options=transformer_options)
+                img = block(img, vec=vec, pe=pe, attn_mask=attn_mask, transformer_options=transformer_options, **extra_kwargs)

            if control is not None: # Controlnet
                control_o = control.get("output")
@@ -264,7 +304,11 @@ class Flux(nn.Module):

        img = img[:, txt.shape[1] :, ...]

-        img = self.final_layer(img, vec_orig)  # (N, T, patch_size ** 2 * out_channels)
+        extra_kwargs = {}
+        if timestep_zero_index is not None:
+            extra_kwargs["modulation_dims"] = modulation_dims
+
+        img = self.final_layer(img, vec_orig, **extra_kwargs)  # (N, T, patch_size ** 2 * out_channels)
        return img

    def process_img(self, x, index=0, h_offset=0, w_offset=0, transformer_options={}):
@@ -312,13 +356,16 @@ class Flux(nn.Module):
        w_len = ((w_orig + (patch_size // 2)) // patch_size)
        img, img_ids = self.process_img(x, transformer_options=transformer_options)
        img_tokens = img.shape[1]
+        timestep_zero_index = None
        if ref_latents is not None:
+            ref_num_tokens = []
            h = 0
            w = 0
            index = 0
            ref_latents_method = kwargs.get("ref_latents_method", self.params.default_ref_method)
+            timestep_zero = ref_latents_method == "index_timestep_zero"
            for ref in ref_latents:
-                if ref_latents_method == "index":
+                if ref_latents_method in ("index", "index_timestep_zero"):
                    index += self.params.ref_index_scale
                    h_offset = 0
                    w_offset = 0
@@ -339,9 +386,16 @@ class Flux(nn.Module):
                    h = max(h, ref.shape[-2] + h_offset)
                    w = max(w, ref.shape[-1] + w_offset)

-                kontext, kontext_ids = self.process_img(ref, index=index, h_offset=h_offset, w_offset=w_offset)
+                kontext, kontext_ids = self.process_img(ref, index=index, h_offset=h_offset, w_offset=w_offset, transformer_options=transformer_options)
                img = torch.cat([img, kontext], dim=1)
                img_ids = torch.cat([img_ids, kontext_ids], dim=1)
+                ref_num_tokens.append(kontext.shape[1])
+            if timestep_zero:
+                if index > 0:
+                    timestep = torch.cat([timestep, timestep * 0], dim=0)
+                    timestep_zero_index = [[img_tokens, img_ids.shape[1]]]
+            transformer_options = transformer_options.copy()
+            transformer_options["reference_image_num_tokens"] = ref_num_tokens

        txt_ids = torch.zeros((bs, context.shape[1], len(self.params.axes_dim)), device=x.device, dtype=torch.float32)

@@ -349,6 +403,6 @@ class Flux(nn.Module):
            for i in self.params.txt_ids_dims:
                txt_ids[:, :, i] = torch.linspace(0, context.shape[1] - 1, steps=context.shape[1], device=x.device, dtype=torch.float32)

-        out = self.forward_orig(img, img_ids, context, txt_ids, timestep, y, guidance, control, transformer_options, attn_mask=kwargs.get("attention_mask", None))
+        out = self.forward_orig(img, img_ids, context, txt_ids, timestep, y, guidance, control, timestep_zero_index=timestep_zero_index, transformer_options=transformer_options, attn_mask=kwargs.get("attention_mask", None))
        out = out[:, :img_tokens]
        return rearrange(out, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=self.patch_size, pw=self.patch_size)[:,:,:h_orig,:w_orig]
--- a/comfy/ldm/hunyuan3dv2_1/hunyuandit.py
+++ b/comfy/ldm/hunyuan3dv2_1/hunyuandit.py
@@ -343,6 +343,7 @@ class CrossAttention(nn.Module):
            k.reshape(b, s2, self.num_heads * self.head_dim),
            v,
            heads=self.num_heads,
+            low_precision_attention=False,
        )

        out = self.out_proj(x)
@@ -412,6 +413,7 @@ class Attention(nn.Module):
            key.reshape(B, N, self.num_heads * self.head_dim),
            value,
            heads=self.num_heads,
+            low_precision_attention=False,
        )

        x = self.out_proj(x)
--- a/comfy/ldm/lightricks/av_model.py
+++ b/comfy/ldm/lightricks/av_model.py
@@ -681,6 +681,33 @@ class LTXAVModel(LTXVModel):
        additional_args["has_spatial_mask"] = has_spatial_mask

        ax, a_latent_coords = self.a_patchifier.patchify(ax)
+
+        # Inject reference audio for ID-LoRA in-context conditioning
+        ref_audio = kwargs.get("ref_audio", None)
+        ref_audio_seq_len = 0
+        if ref_audio is not None:
+            ref_tokens = ref_audio["tokens"].to(dtype=ax.dtype, device=ax.device)
+            if ref_tokens.shape[0] < ax.shape[0]:
+                ref_tokens = ref_tokens.expand(ax.shape[0], -1, -1)
+            ref_audio_seq_len = ref_tokens.shape[1]
+            B = ax.shape[0]
+
+            # Compute negative temporal positions matching ID-LoRA convention:
+            # offset by -(end_of_last_token + time_per_latent) so reference ends just before t=0
+            p = self.a_patchifier
+            tpl = p.hop_length * p.audio_latent_downsample_factor / p.sample_rate
+            ref_start = p._get_audio_latent_time_in_sec(0, ref_audio_seq_len, torch.float32, ax.device)
+            ref_end = p._get_audio_latent_time_in_sec(1, ref_audio_seq_len + 1, torch.float32, ax.device)
+            time_offset = ref_end[-1].item() + tpl
+            ref_start = (ref_start - time_offset).unsqueeze(0).expand(B, -1).unsqueeze(1)
+            ref_end = (ref_end - time_offset).unsqueeze(0).expand(B, -1).unsqueeze(1)
+            ref_pos = torch.stack([ref_start, ref_end], dim=-1)
+
+            additional_args["ref_audio_seq_len"] = ref_audio_seq_len
+            additional_args["target_audio_seq_len"] = ax.shape[1]
+            ax = torch.cat([ref_tokens, ax], dim=1)
+            a_latent_coords = torch.cat([ref_pos.to(a_latent_coords), a_latent_coords], dim=2)
+
        ax = self.audio_patchify_proj(ax)

        # additional_args.update({"av_orig_shape": list(x.shape)})
@@ -721,6 +748,14 @@ class LTXAVModel(LTXVModel):

        # Prepare audio timestep
        a_timestep = kwargs.get("a_timestep")
+        ref_audio_seq_len = kwargs.get("ref_audio_seq_len", 0)
+        if ref_audio_seq_len > 0 and a_timestep is not None:
+            # Reference tokens must have timestep=0, expand scalar/1D timestep to per-token so ref=0 and target=sigma.
+            target_len = kwargs.get("target_audio_seq_len")
+            if a_timestep.dim() <= 1:
+                a_timestep = a_timestep.view(-1, 1).expand(batch_size, target_len)
+            ref_ts = torch.zeros(batch_size, ref_audio_seq_len, *a_timestep.shape[2:], device=a_timestep.device, dtype=a_timestep.dtype)
+            a_timestep = torch.cat([ref_ts, a_timestep], dim=1)
        if a_timestep is not None:
            a_timestep_scaled = a_timestep * self.timestep_scale_multiplier
            a_timestep_flat = a_timestep_scaled.flatten()
@@ -955,6 +990,13 @@ class LTXAVModel(LTXVModel):
        v_embedded_timestep = embedded_timestep[0]
        a_embedded_timestep = embedded_timestep[1]

+        # Trim reference audio tokens before unpatchification
+        ref_audio_seq_len = kwargs.get("ref_audio_seq_len", 0)
+        if ref_audio_seq_len > 0:
+            ax = ax[:, ref_audio_seq_len:]
+            if a_embedded_timestep.shape[1] > 1:
+                a_embedded_timestep = a_embedded_timestep[:, ref_audio_seq_len:]
+
        # Expand compressed video timestep if needed
        if isinstance(v_embedded_timestep, CompressedTimestep):
            v_embedded_timestep = v_embedded_timestep.expand()
--- a/comfy/ldm/lightricks/vae/causal_conv3d.py
+++ b/comfy/ldm/lightricks/vae/causal_conv3d.py
@@ -23,6 +23,11 @@ class CausalConv3d(nn.Module):
        self.in_channels = in_channels
        self.out_channels = out_channels

+        if isinstance(stride, int):
+            self.time_stride = stride
+        else:
+            self.time_stride = stride[0]
+
        kernel_size = (kernel_size, kernel_size, kernel_size)
        self.time_kernel_size = kernel_size[0]

@@ -58,16 +63,25 @@ class CausalConv3d(nn.Module):
        pieces = [ cached, x ]
        if is_end and not causal:
            pieces.append(x[:, :, -1:, :, :].repeat((1, 1, (self.time_kernel_size - 1) // 2, 1, 1)))
+        input_length = sum([piece.shape[2] for piece in pieces])
+        cache_length = (self.time_kernel_size - self.time_stride) + ((input_length - self.time_kernel_size) % self.time_stride)

        needs_caching = not is_end
-        if needs_caching and x.shape[2] >= self.time_kernel_size - 1:
+        if needs_caching and cache_length == 0:
+            self.temporal_cache_state[tid] = (x[:, :, :0, :, :], False)
            needs_caching = False
-            self.temporal_cache_state[tid] = (x[:, :, -(self.time_kernel_size - 1):, :, :], False)
+        if needs_caching and x.shape[2] >= cache_length:
+            needs_caching = False
+            self.temporal_cache_state[tid] = (x[:, :, -cache_length:, :, :], False)

        x = torch.cat(pieces, dim=2)
+        del pieces
+        del cached

        if needs_caching:
-            self.temporal_cache_state[tid] = (x[:, :, -(self.time_kernel_size - 1):, :, :], False)
+            self.temporal_cache_state[tid] = (x[:, :, -cache_length:, :, :], False)
+        elif is_end:
+            self.temporal_cache_state[tid] = (None, True)

        return self.conv(x) if x.shape[2] >= self.time_kernel_size else x[:, :, :0, :, :]

--- a/comfy/ldm/lightricks/vae/causal_video_autoencoder.py
+++ b/comfy/ldm/lightricks/vae/causal_video_autoencoder.py
@@ -11,6 +11,7 @@ from .causal_conv3d import CausalConv3d
 from .pixel_norm import PixelNorm
 from ..model import PixArtAlphaCombinedTimestepSizeEmbeddings
 import comfy.ops
+import comfy.model_management
 from comfy.ldm.modules.diffusionmodules.model import torch_cat_if_needed

 ops = comfy.ops.disable_weight_init
@@ -232,10 +233,7 @@ class Encoder(nn.Module):

        self.gradient_checkpointing = False

-    def forward_orig(self, sample: torch.FloatTensor) -> torch.FloatTensor:
-        r"""The forward method of the `Encoder` class."""
-
-        sample = patchify(sample, patch_size_hw=self.patch_size, patch_size_t=1)
+    def _forward_chunk(self, sample: torch.FloatTensor) -> Optional[torch.FloatTensor]:
        sample = self.conv_in(sample)

        checkpoint_fn = (
@@ -246,10 +244,14 @@ class Encoder(nn.Module):

        for down_block in self.down_blocks:
            sample = checkpoint_fn(down_block)(sample)
+            if sample is None or sample.shape[2] == 0:
+                return None

        sample = self.conv_norm_out(sample)
        sample = self.conv_act(sample)
        sample = self.conv_out(sample)
+        if sample is None or sample.shape[2] == 0:
+            return None

        if self.latent_log_var == "uniform":
            last_channel = sample[:, -1:, ...]
@@ -281,9 +283,35 @@ class Encoder(nn.Module):

        return sample

+    def forward_orig(self, sample: torch.FloatTensor, device=None) -> torch.FloatTensor:
+        r"""The forward method of the `Encoder` class."""
+
+        max_chunk_size = get_max_chunk_size(sample.device if device is None else device) * 2  # encoder is more memory-efficient than decoder
+        frame_size = sample[:, :, :1, :, :].numel() * sample.element_size()
+        frame_size = int(frame_size * (self.conv_in.out_channels / self.conv_in.in_channels))
+
+        outputs = []
+        samples = [sample[:, :, :1, :, :]]
+        if sample.shape[2] > 1:
+            chunk_t = max(2, max_chunk_size // frame_size)
+            if chunk_t < 4:
+                chunk_t = 2
+            elif chunk_t < 8:
+                chunk_t = 4
+            else:
+                chunk_t = (chunk_t // 8) * 8
+            samples += list(torch.split(sample[:, :, 1:, :, :], chunk_t, dim=2))
+        for chunk_idx, chunk in enumerate(samples):
+            if chunk_idx == len(samples) - 1:
+                mark_conv3d_ended(self)
+            chunk = patchify(chunk, patch_size_hw=self.patch_size, patch_size_t=1).to(device=device)
+            output = self._forward_chunk(chunk)
+            if output is not None:
+                outputs.append(output)
+
+        return torch_cat_if_needed(outputs, dim=2)
+
    def forward(self, *args, **kwargs):
-        #No encoder support so just flag the end so it doesnt use the cache.
-        mark_conv3d_ended(self)
        try:
            return self.forward_orig(*args, **kwargs)
        finally:
@@ -296,7 +324,23 @@ class Encoder(nn.Module):
                    module.temporal_cache_state.pop(tid, None)


-MAX_CHUNK_SIZE=(128 * 1024 ** 2)
+MIN_VRAM_FOR_CHUNK_SCALING = 6 * 1024 ** 3
+MAX_VRAM_FOR_CHUNK_SCALING = 24 * 1024 ** 3
+MIN_CHUNK_SIZE = 32 * 1024 ** 2
+MAX_CHUNK_SIZE = 128 * 1024 ** 2
+
+def get_max_chunk_size(device: torch.device) -> int:
+    total_memory = comfy.model_management.get_total_memory(dev=device)
+
+    if total_memory <= MIN_VRAM_FOR_CHUNK_SCALING:
+        return MIN_CHUNK_SIZE
+    if total_memory >= MAX_VRAM_FOR_CHUNK_SCALING:
+        return MAX_CHUNK_SIZE
+
+    interp = (total_memory - MIN_VRAM_FOR_CHUNK_SCALING) / (
+        MAX_VRAM_FOR_CHUNK_SCALING - MIN_VRAM_FOR_CHUNK_SCALING
+    )
+    return int(MIN_CHUNK_SIZE + interp * (MAX_CHUNK_SIZE - MIN_CHUNK_SIZE))

 class Decoder(nn.Module):
    r"""
@@ -456,6 +500,17 @@ class Decoder(nn.Module):

        self.gradient_checkpointing = False

+        # Precompute output scale factors: (channels, (t_scale, h_scale, w_scale), t_offset)
+        ts, hs, ws, to = 1, 1, 1, 0
+        for block in self.up_blocks:
+            if isinstance(block, DepthToSpaceUpsample):
+                ts *= block.stride[0]
+                hs *= block.stride[1]
+                ws *= block.stride[2]
+                if block.stride[0] > 1:
+                    to = to * block.stride[0] + 1
+        self._output_scale = (out_channels // (patch_size ** 2), (ts, hs * patch_size, ws * patch_size), to)
+
        self.timestep_conditioning = timestep_conditioning

        if timestep_conditioning:
@@ -477,11 +532,62 @@ class Decoder(nn.Module):
            )


-    # def forward(self, sample: torch.FloatTensor, target_shape) -> torch.FloatTensor:
+    def decode_output_shape(self, input_shape):
+        c, (ts, hs, ws), to = self._output_scale
+        return (input_shape[0], c, input_shape[2] * ts - to, input_shape[3] * hs, input_shape[4] * ws)
+
+    def run_up(self, idx, sample_ref, ended, timestep_shift_scale, scaled_timestep, checkpoint_fn, output_buffer, output_offset, max_chunk_size):
+        sample = sample_ref[0]
+        sample_ref[0] = None
+        if idx >= len(self.up_blocks):
+            sample = self.conv_norm_out(sample)
+            if timestep_shift_scale is not None:
+                shift, scale = timestep_shift_scale
+                sample = sample * (1 + scale) + shift
+            sample = self.conv_act(sample)
+            if ended:
+                mark_conv3d_ended(self.conv_out)
+            sample = self.conv_out(sample, causal=self.causal)
+            if sample is not None and sample.shape[2] > 0:
+                sample = unpatchify(sample, patch_size_hw=self.patch_size, patch_size_t=1)
+                t = sample.shape[2]
+                output_buffer[:, :, output_offset[0]:output_offset[0] + t].copy_(sample)
+                output_offset[0] += t
+            return
+
+        up_block = self.up_blocks[idx]
+        if ended:
+            mark_conv3d_ended(up_block)
+        if self.timestep_conditioning and isinstance(up_block, UNetMidBlock3D):
+            sample = checkpoint_fn(up_block)(
+                sample, causal=self.causal, timestep=scaled_timestep
+            )
+        else:
+            sample = checkpoint_fn(up_block)(sample, causal=self.causal)
+
+        if sample is None or sample.shape[2] == 0:
+            return
+
+        total_bytes = sample.numel() * sample.element_size()
+        num_chunks = (total_bytes + max_chunk_size - 1) // max_chunk_size
+
+        if num_chunks == 1:
+            # when we are not chunking, detach our x so the callee can free it as soon as they are done
+            next_sample_ref = [sample]
+            del sample
+            self.run_up(idx + 1, next_sample_ref, ended, timestep_shift_scale, scaled_timestep, checkpoint_fn, output_buffer, output_offset, max_chunk_size)
+            return
+        else:
+            samples = torch.chunk(sample, chunks=num_chunks, dim=2)
+
+            for chunk_idx, sample1 in enumerate(samples):
+                self.run_up(idx + 1, [sample1], ended and chunk_idx == len(samples) - 1, timestep_shift_scale, scaled_timestep, checkpoint_fn, output_buffer, output_offset, max_chunk_size)
+
    def forward_orig(
        self,
        sample: torch.FloatTensor,
        timestep: Optional[torch.Tensor] = None,
+        output_buffer: Optional[torch.Tensor] = None,
    ) -> torch.FloatTensor:
        r"""The forward method of the `Decoder` class."""
        batch_size = sample.shape[0]
@@ -496,6 +602,7 @@ class Decoder(nn.Module):
        )

        timestep_shift_scale = None
+        scaled_timestep = None
        if self.timestep_conditioning:
            assert (
                timestep is not None
@@ -523,48 +630,18 @@ class Decoder(nn.Module):
            )
            timestep_shift_scale = ada_values.unbind(dim=1)

-        output = []
+        if output_buffer is None:
+            output_buffer = torch.empty(
+                self.decode_output_shape(sample.shape),
+                dtype=sample.dtype, device=comfy.model_management.intermediate_device(),
+            )
+        output_offset = [0]

-        def run_up(idx, sample, ended):
-            if idx >= len(self.up_blocks):
-                sample = self.conv_norm_out(sample)
-                if timestep_shift_scale is not None:
-                    shift, scale = timestep_shift_scale
-                    sample = sample * (1 + scale) + shift
-                sample = self.conv_act(sample)
-                if ended:
-                    mark_conv3d_ended(self.conv_out)
-                sample = self.conv_out(sample, causal=self.causal)
-                if sample is not None and sample.shape[2] > 0:
-                    output.append(sample)
-                return
+        max_chunk_size = get_max_chunk_size(sample.device)

-            up_block = self.up_blocks[idx]
-            if (ended):
-                mark_conv3d_ended(up_block)
-            if self.timestep_conditioning and isinstance(up_block, UNetMidBlock3D):
-                sample = checkpoint_fn(up_block)(
-                    sample, causal=self.causal, timestep=scaled_timestep
-                )
-            else:
-                sample = checkpoint_fn(up_block)(sample, causal=self.causal)
+        self.run_up(0, [sample], True, timestep_shift_scale, scaled_timestep, checkpoint_fn, output_buffer, output_offset, max_chunk_size)

-            if sample is None or sample.shape[2] == 0:
-                return
-
-            total_bytes = sample.numel() * sample.element_size()
-            num_chunks = (total_bytes + MAX_CHUNK_SIZE - 1) // MAX_CHUNK_SIZE
-            samples = torch.chunk(sample, chunks=num_chunks, dim=2)
-
-            for chunk_idx, sample1 in enumerate(samples):
-                run_up(idx + 1, sample1, ended and chunk_idx == len(samples) - 1)
-
-        run_up(0, sample, True)
-        sample = torch.cat(output, dim=2)
-
-        sample = unpatchify(sample, patch_size_hw=self.patch_size, patch_size_t=1)
-
-        return sample
+        return output_buffer

    def forward(self, *args, **kwargs):
        try:
@@ -688,12 +765,25 @@ class SpaceToDepthDownsample(nn.Module):
            causal=True,
            spatial_padding_mode=spatial_padding_mode,
        )
+        self.temporal_cache_state = {}

    def forward(self, x, causal: bool = True):
-        if self.stride[0] == 2:
+        tid = threading.get_ident()
+        cached, pad_first, cached_x, cached_input = self.temporal_cache_state.get(tid, (None, True, None, None))
+        if cached_input is not None:
+            x = torch_cat_if_needed([cached_input, x], dim=2)
+            cached_input = None
+
+        if self.stride[0] == 2 and pad_first:
            x = torch.cat(
                [x[:, :, :1, :, :], x], dim=2
            )  # duplicate first frames for padding
+            pad_first = False
+
+        if x.shape[2] < self.stride[0]:
+            cached_input = x
+            self.temporal_cache_state[tid] = (cached, pad_first, cached_x, cached_input)
+            return None

        # skip connection
        x_in = rearrange(
@@ -708,15 +798,26 @@ class SpaceToDepthDownsample(nn.Module):

        # conv
        x = self.conv(x, causal=causal)
-        x = rearrange(
-            x,
-            "b c (d p1) (h p2) (w p3) -> b (c p1 p2 p3) d h w",
-            p1=self.stride[0],
-            p2=self.stride[1],
-            p3=self.stride[2],
-        )
+        if self.stride[0] == 2 and x.shape[2] == 1:
+            if cached_x is not None:
+                x = torch_cat_if_needed([cached_x, x], dim=2)
+                cached_x = None
+            else:
+                cached_x = x
+                x = None

-        x = x + x_in
+        if x is not None:
+            x = rearrange(
+                x,
+                "b c (d p1) (h p2) (w p3) -> b (c p1 p2 p3) d h w",
+                p1=self.stride[0],
+                p2=self.stride[1],
+                p3=self.stride[2],
+            )
+
+        cached = add_exchange_cache(x, cached, x_in, dim=2)
+
+        self.temporal_cache_state[tid] = (cached, pad_first, cached_x, cached_input)

        return x

@@ -1049,6 +1150,8 @@ class processor(nn.Module):
        return (x - self.get_buffer("mean-of-means").view(1, -1, 1, 1, 1).to(x)) / self.get_buffer("std-of-means").view(1, -1, 1, 1, 1).to(x)

 class VideoVAE(nn.Module):
+    comfy_has_chunked_io = True
+
    def __init__(self, version=0, config=None):
        super().__init__()

@@ -1191,14 +1294,15 @@ class VideoVAE(nn.Module):
            }
        return config

-    def encode(self, x):
-        frames_count = x.shape[2]
-        if ((frames_count - 1) % 8) != 0:
-            raise ValueError("Invalid number of frames: Encode input must have 1 + 8 * x frames (e.g., 1, 9, 17, ...). Please check your input.")
-        means, logvar = torch.chunk(self.encoder(x), 2, dim=1)
+    def encode(self, x, device=None):
+        x = x[:, :, :max(1, 1 + ((x.shape[2] - 1) // 8) * 8), :, :]
+        means, logvar = torch.chunk(self.encoder(x, device=device), 2, dim=1)
        return self.per_channel_statistics.normalize(means)

-    def decode(self, x):
+    def decode_output_shape(self, input_shape):
+        return self.decoder.decode_output_shape(input_shape)
+
+    def decode(self, x, output_buffer=None):
        if self.timestep_conditioning: #TODO: seed
            x = torch.randn_like(x) * self.decode_noise_scale + (1.0 - self.decode_noise_scale) * x
-        return self.decoder(self.per_channel_statistics.un_normalize(x), timestep=self.decode_timestep)
+        return self.decoder(self.per_channel_statistics.un_normalize(x), timestep=self.decode_timestep, output_buffer=output_buffer)
--- a/comfy/ldm/rt_detr/rtdetr_v4.py
+++ b/comfy/ldm/rt_detr/rtdetr_v4.py
@@ -0,0 +1,725 @@
+from collections import OrderedDict
+from typing import List
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision
+import comfy.model_management
+from comfy.ldm.modules.attention import optimized_attention_for_device
+
+COCO_CLASSES = [
+    'person','bicycle','car','motorcycle','airplane','bus','train','truck','boat',
+    'traffic light','fire hydrant','stop sign','parking meter','bench','bird','cat',
+    'dog','horse','sheep','cow','elephant','bear','zebra','giraffe','backpack',
+    'umbrella','handbag','tie','suitcase','frisbee','skis','snowboard','sports ball',
+    'kite','baseball bat','baseball glove','skateboard','surfboard','tennis racket',
+    'bottle','wine glass','cup','fork','knife','spoon','bowl','banana','apple',
+    'sandwich','orange','broccoli','carrot','hot dog','pizza','donut','cake','chair',
+    'couch','potted plant','bed','dining table','toilet','tv','laptop','mouse',
+    'remote','keyboard','cell phone','microwave','oven','toaster','sink',
+    'refrigerator','book','clock','vase','scissors','teddy bear','hair drier','toothbrush',
+]
+
+# ---------------------------------------------------------------------------
+# HGNetv2 backbone
+# ---------------------------------------------------------------------------
+
+class ConvBNAct(nn.Module):
+    """Conv→BN→ReLU.  padding='same' adds asymmetric zero-pad (stem)."""
+    def __init__(self, ic, oc, k=3, s=1, groups=1, use_act=True, device=None, dtype=None, operations=None):
+        super().__init__()
+
+        self.conv = operations.Conv2d(ic, oc, k, s, (k - 1) // 2, groups=groups, bias=False, device=device, dtype=dtype)
+        self.bn   = nn.BatchNorm2d(oc, device=device, dtype=dtype)
+        self.act  = nn.ReLU() if use_act else nn.Identity()
+
+    def forward(self, x):
+        return self.act(self.bn(self.conv(x)))
+
+class LightConvBNAct(nn.Module):
+    def __init__(self, ic, oc, k, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.conv1 = ConvBNAct(ic, oc, 1, use_act=False, device=device, dtype=dtype, operations=operations)
+        self.conv2 = ConvBNAct(oc, oc, k, groups=oc, use_act=True, device=device, dtype=dtype, operations=operations)
+
+    def forward(self, x):
+        return self.conv2(self.conv1(x))
+
+class _StemBlock(nn.Module):
+    def __init__(self, ic, mc, oc, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.stem1  = ConvBNAct(ic,    mc,    3, 2, device=device, dtype=dtype, operations=operations)
+        # stem2a/stem2b use kernel=2, stride=1, no internal padding;
+        # padding is applied manually in forward (matching PaddlePaddle original)
+        self.stem2a = ConvBNAct(mc,    mc//2, 2, 1, device=device, dtype=dtype, operations=operations)
+        self.stem2b = ConvBNAct(mc//2, mc,    2, 1, device=device, dtype=dtype, operations=operations)
+        self.stem3  = ConvBNAct(mc*2,  mc,    3, 2, device=device, dtype=dtype, operations=operations)
+        self.stem4  = ConvBNAct(mc,    oc,    1, device=device, dtype=dtype, operations=operations)
+        self.pool   = nn.MaxPool2d(2, 1, ceil_mode=True)
+
+    def forward(self, x):
+        x  = self.stem1(x)
+        x  = F.pad(x, (0, 1, 0, 1))   # pad before pool and stem2a
+        x2 = self.stem2a(x)
+        x2 = F.pad(x2, (0, 1, 0, 1))  # pad before stem2b
+        x2 = self.stem2b(x2)
+        x1 = self.pool(x)
+        return self.stem4(self.stem3(torch.cat([x1, x2], 1)))
+
+
+class _HG_Block(nn.Module):
+    def __init__(self, ic, mc, oc, layer_num, k=3, residual=False, light=False, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.residual = residual
+        if light:
+            self.layers = nn.ModuleList(
+                [LightConvBNAct(ic if i == 0 else mc, mc, k, device=device, dtype=dtype, operations=operations) for i in range(layer_num)])
+        else:
+            self.layers = nn.ModuleList(
+                [ConvBNAct(ic if i == 0 else mc, mc, k, device=device, dtype=dtype, operations=operations) for i in range(layer_num)])
+        total = ic + layer_num * mc
+
+        self.aggregation = nn.Sequential(
+            ConvBNAct(total,   oc // 2, 1, device=device, dtype=dtype, operations=operations),
+            ConvBNAct(oc // 2, oc,      1, device=device, dtype=dtype, operations=operations))
+
+    def forward(self, x):
+        identity = x
+        outs = [x]
+        for layer in self.layers:
+            x = layer(x)
+            outs.append(x)
+        x = self.aggregation(torch.cat(outs, 1))
+        return x + identity if self.residual else x
+
+
+class _HG_Stage(nn.Module):
+    # config order: ic, mc, oc, num_blocks, downsample, light, k, layer_num
+    def __init__(self, ic, mc, oc, num_blocks, downsample=True, light=False, k=3, layer_num=6, device=None, dtype=None, operations=None):
+        super().__init__()
+        if downsample:
+            self.downsample = ConvBNAct(ic, ic, 3, 2, groups=ic, use_act=False, device=device, dtype=dtype, operations=operations)
+        else:
+            self.downsample = nn.Identity()
+        self.blocks = nn.Sequential(*[
+            _HG_Block(ic if i == 0 else oc, mc, oc, layer_num,
+                      k=k, residual=(i != 0), light=light, device=device, dtype=dtype, operations=operations)
+            for i in range(num_blocks)
+        ])
+
+    def forward(self, x):
+        return self.blocks(self.downsample(x))
+
+
+class HGNetv2(nn.Module):
+    # B5 config: stem=[3,32,64], stages=[ic, mc, oc, blocks, down, light, k, layers]
+    _STAGE_CFGS = [[64,  64,  128,  1, False, False, 3, 6],
+                   [128, 128, 512,  2, True,  False, 3, 6],
+                   [512, 256, 1024, 5, True,  True,  5, 6],
+                   [1024,512, 2048, 2, True,  True,  5, 6]]
+
+    def __init__(self, return_idx=(1, 2, 3), device=None, dtype=None, operations=None):
+        super().__init__()
+        self.stem   = _StemBlock(3, 32, 64, device=device, dtype=dtype, operations=operations)
+        self.stages = nn.ModuleList([_HG_Stage(*cfg, device=device, dtype=dtype, operations=operations) for cfg in self._STAGE_CFGS])
+        self.return_idx  = list(return_idx)
+        self.out_channels = [self._STAGE_CFGS[i][2] for i in return_idx]
+
+    def forward(self, x: torch.Tensor) -> List[torch.Tensor]:
+        x = self.stem(x)
+        outs = []
+        for i, stage in enumerate(self.stages):
+            x = stage(x)
+            if i in self.return_idx:
+                outs.append(x)
+        return outs
+
+
+# ---------------------------------------------------------------------------
+# Encoder — HybridEncoder  (dfine version: RepNCSPELAN4 + SCDown PAN)
+# ---------------------------------------------------------------------------
+
+class ConvNormLayer(nn.Module):
+    """Conv→act (expects pre-fused BN weights)."""
+    def __init__(self, ic, oc, k, s, g=1, padding=None, act=None, device=None, dtype=None, operations=None):
+        super().__init__()
+        p = (k - 1) // 2 if padding is None else padding
+        self.conv = operations.Conv2d(ic, oc, k, s, p, groups=g, bias=True, device=device, dtype=dtype)
+        self.act  = nn.SiLU() if act == 'silu' else nn.Identity()
+
+    def forward(self, x):
+        return self.act(self.conv(x))
+
+
+class VGGBlock(nn.Module):
+    """Rep-VGG block (expects pre-fused weights)."""
+    def __init__(self, ic, oc, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.conv = operations.Conv2d(ic, oc, 3, 1, padding=1, bias=True, device=device, dtype=dtype)
+        self.act  = nn.SiLU()
+
+    def forward(self, x):
+        return self.act(self.conv(x))
+
+
+class CSPLayer(nn.Module):
+    def __init__(self, ic, oc, num_blocks=3, expansion=1.0, act='silu', device=None, dtype=None, operations=None):
+        super().__init__()
+        h = int(oc * expansion)
+        self.conv1 = ConvNormLayer(ic, h, 1, 1, act=act, device=device, dtype=dtype, operations=operations)
+        self.conv2 = ConvNormLayer(ic, h, 1, 1, act=act, device=device, dtype=dtype, operations=operations)
+        self.bottlenecks = nn.Sequential(*[VGGBlock(h, h, device=device, dtype=dtype, operations=operations) for _ in range(num_blocks)])
+        self.conv3 = ConvNormLayer(h, oc, 1, 1, act=act, device=device, dtype=dtype, operations=operations) if h != oc else nn.Identity()
+
+    def forward(self, x):
+        return self.conv3(self.bottlenecks(self.conv1(x)) + self.conv2(x))
+
+
+class RepNCSPELAN4(nn.Module):
+    """CSP-ELAN block — the FPN/PAN block in RTv4's HybridEncoder."""
+    def __init__(self, c1, c2, c3, c4, n=3, act='silu', device=None, dtype=None, operations=None):
+        super().__init__()
+        self.c = c3 // 2
+        self.cv1 = ConvNormLayer(c1, c3, 1, 1, act=act, device=device, dtype=dtype, operations=operations)
+        self.cv2 = nn.Sequential(CSPLayer(c3 // 2, c4, n, 1.0, act=act, device=device, dtype=dtype, operations=operations), ConvNormLayer(c4, c4, 3, 1, act=act, device=device, dtype=dtype, operations=operations))
+        self.cv3 = nn.Sequential(CSPLayer(c4, c4, n, 1.0, act=act, device=device, dtype=dtype, operations=operations), ConvNormLayer(c4, c4, 3, 1, act=act, device=device, dtype=dtype, operations=operations))
+        self.cv4 = ConvNormLayer(c3 + 2 * c4, c2, 1, 1, act=act, device=device, dtype=dtype, operations=operations)
+
+    def forward(self, x):
+        y = list(self.cv1(x).split((self.c, self.c), 1))
+        y.extend(m(y[-1]) for m in [self.cv2, self.cv3])
+        return self.cv4(torch.cat(y, 1))
+
+
+class SCDown(nn.Module):
+    """Separable conv downsampling used in HybridEncoder PAN bottom-up path."""
+    def __init__(self, ic, oc, k, s, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.cv1 = ConvNormLayer(ic, oc, 1, 1, device=device, dtype=dtype, operations=operations)
+        self.cv2 = ConvNormLayer(oc, oc, k, s, g=oc, device=device, dtype=dtype, operations=operations)
+
+    def forward(self, x):
+        return self.cv2(self.cv1(x))
+
+
+class SelfAttention(nn.Module):
+    def __init__(self, embed_dim, num_heads, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.embed_dim = embed_dim
+        self.num_heads = num_heads
+        self.head_dim  = embed_dim // num_heads
+        self.q_proj   = operations.Linear(embed_dim, embed_dim, device=device, dtype=dtype)
+        self.k_proj   = operations.Linear(embed_dim, embed_dim, device=device, dtype=dtype)
+        self.v_proj   = operations.Linear(embed_dim, embed_dim, device=device, dtype=dtype)
+        self.out_proj = operations.Linear(embed_dim, embed_dim, device=device, dtype=dtype)
+
+    def forward(self, query, key, value, attn_mask=None):
+        optimized_attention = optimized_attention_for_device(query.device, False, small_input=True)
+        q, k, v = self.q_proj(query), self.k_proj(key), self.v_proj(value)
+        out = optimized_attention(q, k, v, heads=self.num_heads, mask=attn_mask)
+        return self.out_proj(out)
+
+
+class _TransformerEncoderLayer(nn.Module):
+    """Single AIFI encoder layer (pre- or post-norm, GELU by default)."""
+    def __init__(self, d_model, nhead, dim_feedforward, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.self_attn  = SelfAttention(d_model, nhead, device=device, dtype=dtype, operations=operations)
+        self.linear1    = operations.Linear(d_model, dim_feedforward, device=device, dtype=dtype)
+        self.linear2    = operations.Linear(dim_feedforward, d_model, device=device, dtype=dtype)
+        self.norm1      = operations.LayerNorm(d_model, device=device, dtype=dtype)
+        self.norm2      = operations.LayerNorm(d_model, device=device, dtype=dtype)
+        self.activation = nn.GELU()
+
+    def forward(self, src, src_mask=None, pos_embed=None):
+        q = k = src if pos_embed is None else src + pos_embed
+        src2 = self.self_attn(q, k, value=src, attn_mask=src_mask)
+        src = self.norm1(src + src2)
+        src2 = self.linear2(self.activation(self.linear1(src)))
+        return self.norm2(src + src2)
+
+
+class _TransformerEncoder(nn.Module):
+    """Thin wrapper so state-dict keys are  encoder.0.layers.N.*"""
+    def __init__(self, num_layers, d_model, nhead, dim_feedforward, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.layers = nn.ModuleList([
+            _TransformerEncoderLayer(d_model, nhead, dim_feedforward, device=device, dtype=dtype, operations=operations)
+            for _ in range(num_layers)
+        ])
+
+    def forward(self, src, src_mask=None, pos_embed=None):
+        for layer in self.layers:
+            src = layer(src, src_mask=src_mask, pos_embed=pos_embed)
+        return src
+
+
+class HybridEncoder(nn.Module):
+    def __init__(self, in_channels=(512, 1024, 2048), feat_strides=(8, 16, 32), hidden_dim=256, nhead=8, dim_feedforward=2048, use_encoder_idx=(2,), num_encoder_layers=1,
+                 pe_temperature=10000, expansion=1.0, depth_mult=1.0, act='silu', eval_spatial_size=(640, 640), device=None, dtype=None, operations=None):
+        super().__init__()
+        self.in_channels       = list(in_channels)
+        self.feat_strides      = list(feat_strides)
+        self.hidden_dim        = hidden_dim
+        self.use_encoder_idx   = list(use_encoder_idx)
+        self.pe_temperature    = pe_temperature
+        self.eval_spatial_size = eval_spatial_size
+        self.out_channels      = [hidden_dim] * len(in_channels)
+        self.out_strides       = list(feat_strides)
+
+        # channel projection (expects pre-fused weights)
+        self.input_proj = nn.ModuleList([
+            nn.Sequential(OrderedDict([('conv', operations.Conv2d(ch, hidden_dim, 1, bias=True, device=device, dtype=dtype))]))
+            for ch in in_channels
+        ])
+
+        # AIFI transformer — use _TransformerEncoder so keys are  encoder.0.layers.N.*
+        self.encoder = nn.ModuleList([
+            _TransformerEncoder(num_encoder_layers, hidden_dim, nhead, dim_feedforward, device=device, dtype=dtype, operations=operations)
+            for _ in range(len(use_encoder_idx))
+        ])
+
+        nb  = round(3 * depth_mult)
+        exp = expansion
+
+        # top-down FPN  (dfine: lateral conv has no act)
+        self.lateral_convs = nn.ModuleList(
+            [ConvNormLayer(hidden_dim, hidden_dim, 1, 1, device=device, dtype=dtype, operations=operations)
+             for _ in range(len(in_channels) - 1)])
+        self.fpn_blocks = nn.ModuleList(
+            [RepNCSPELAN4(hidden_dim * 2, hidden_dim, hidden_dim * 2, round(exp * hidden_dim // 2), nb, act=act, device=device, dtype=dtype, operations=operations)
+             for _ in range(len(in_channels) - 1)])
+
+        # bottom-up PAN  (dfine: nn.Sequential(SCDown) — keeps checkpoint key  .0.cv1/.0.cv2)
+        self.downsample_convs = nn.ModuleList(
+            [nn.Sequential(SCDown(hidden_dim, hidden_dim, 3, 2, device=device, dtype=dtype, operations=operations))
+             for _ in range(len(in_channels) - 1)])
+        self.pan_blocks = nn.ModuleList(
+            [RepNCSPELAN4(hidden_dim * 2, hidden_dim, hidden_dim * 2, round(exp * hidden_dim // 2), nb, act=act, device=device, dtype=dtype, operations=operations)
+             for _ in range(len(in_channels) - 1)])
+
+        # cache positional embeddings for fixed spatial size
+        if eval_spatial_size:
+            for idx in self.use_encoder_idx:
+                stride = self.feat_strides[idx]
+                pe = self._build_pe(eval_spatial_size[1] // stride,
+                                    eval_spatial_size[0] // stride,
+                                    hidden_dim, pe_temperature)
+                setattr(self, f'pos_embed{idx}', pe)
+
+    @staticmethod
+    def _build_pe(w, h, dim=256, temp=10000.):
+        assert dim % 4 == 0
+        gw = torch.arange(w, dtype=torch.float32)
+        gh = torch.arange(h, dtype=torch.float32)
+        gw, gh = torch.meshgrid(gw, gh, indexing='ij')
+        pdim  = dim // 4
+        omega = 1. / (temp ** (torch.arange(pdim, dtype=torch.float32) / pdim))
+        ow = gw.flatten()[:, None] @ omega[None]
+        oh = gh.flatten()[:, None] @ omega[None]
+        return torch.cat([ow.sin(), ow.cos(), oh.sin(), oh.cos()], 1)[None]
+
+    def forward(self, feats: List[torch.Tensor]) -> List[torch.Tensor]:
+        proj = [self.input_proj[i](f) for i, f in enumerate(feats)]
+
+        for i, enc_idx in enumerate(self.use_encoder_idx):
+            h, w = proj[enc_idx].shape[2:]
+            src  = proj[enc_idx].flatten(2).permute(0, 2, 1)
+            pe = getattr(self, f'pos_embed{enc_idx}').to(device=src.device, dtype=src.dtype)
+            for layer in self.encoder[i].layers:
+                src = layer(src, pos_embed=pe)
+            proj[enc_idx] = src.permute(0, 2, 1).reshape(-1, self.hidden_dim, h, w).contiguous()
+
+        n = len(self.in_channels)
+        inner = [proj[-1]]
+        for k in range(n - 1, 0, -1):
+            j = n - 1 - k
+            top = self.lateral_convs[j](inner[0])
+            inner[0] = top
+            up = F.interpolate(top, scale_factor=2., mode='nearest')
+            inner.insert(0, self.fpn_blocks[j](torch.cat([up, proj[k - 1]], 1)))
+
+        outs = [inner[0]]
+        for k in range(n - 1):
+            outs.append(self.pan_blocks[k](
+                torch.cat([self.downsample_convs[k](outs[-1]), inner[k + 1]], 1)))
+        return outs
+
+
+# ---------------------------------------------------------------------------
+# Decoder — DFINETransformer
+# ---------------------------------------------------------------------------
+
+def _deformable_attn_v2(value: list, spatial_shapes, sampling_locations: torch.Tensor, attention_weights: torch.Tensor, num_points_list: List[int]) -> torch.Tensor:
+    """
+    value            : list of per-level tensors  [bs*n_head, c, h_l, w_l]
+    sampling_locations: [bs, Lq, n_head, sum(pts), 2]  in [0,1]
+    attention_weights : [bs, Lq, n_head, sum(pts)]
+    """
+    _, c = value[0].shape[:2]      # bs*n_head, c
+    _, Lq, n_head, _, _ = sampling_locations.shape
+    bs = sampling_locations.shape[0]
+    n_h = n_head
+
+    grids = (2 * sampling_locations - 1)          # [bs, Lq, n_head, sum_pts, 2]
+    grids = grids.permute(0, 2, 1, 3, 4).flatten(0, 1)  # [bs*n_head, Lq, sum_pts, 2]
+    grids_per_lvl = grids.split(num_points_list, dim=2)  # list of [bs*n_head, Lq, pts_l, 2]
+
+    sampled = []
+    for lvl, (h, w) in enumerate(spatial_shapes):
+        val_l = value[lvl].reshape(bs * n_h, c, h, w)
+        sv = F.grid_sample(val_l, grids_per_lvl[lvl], mode='bilinear', padding_mode='zeros', align_corners=False)
+        sampled.append(sv) # sv: [bs*n_head, c, Lq, pts_l]
+
+    attn = attention_weights.permute(0, 2, 1, 3)  # [bs, n_head, Lq, sum_pts]
+    attn = attn.flatten(0, 1).unsqueeze(1)         # [bs*n_head, 1, Lq, sum_pts]
+    out  = (torch.cat(sampled, -1) * attn).sum(-1) # [bs*n_head, c, Lq]
+    out  = out.reshape(bs, n_h * c, Lq)
+    return out.permute(0, 2, 1)                    # [bs, Lq, hidden]
+
+
+class MSDeformableAttention(nn.Module):
+    def __init__(self, embed_dim=256, num_heads=8, num_levels=3, num_points=4, offset_scale=0.5, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.embed_dim, self.num_heads = embed_dim, num_heads
+        self.head_dim  = embed_dim // num_heads
+        pts = num_points if isinstance(num_points, list) else [num_points] * num_levels
+        self.num_points_list = pts
+        self.offset_scale    = offset_scale
+        total = num_heads * sum(pts)
+        self.register_buffer('num_points_scale', torch.tensor([1. / n for n in pts for _ in range(n)], dtype=torch.float32))
+        self.sampling_offsets  = operations.Linear(embed_dim, total * 2, device=device, dtype=dtype)
+        self.attention_weights = operations.Linear(embed_dim, total, device=device, dtype=dtype)
+
+    def forward(self, query, ref_pts, value, spatial_shapes):
+        bs, Lq = query.shape[:2]
+        offsets = self.sampling_offsets(query).reshape(
+            bs, Lq, self.num_heads, sum(self.num_points_list), 2)
+        attn_w  = F.softmax(
+            self.attention_weights(query).reshape(
+                bs, Lq, self.num_heads, sum(self.num_points_list)), -1)
+        scale   = self.num_points_scale.to(query).unsqueeze(-1)
+        offset  = offsets * scale * ref_pts[:, :, None, :, 2:] * self.offset_scale
+        locs    = ref_pts[:, :, None, :, :2] + offset  # [bs, Lq, n_head, sum_pts, 2]
+        return _deformable_attn_v2(value, spatial_shapes, locs, attn_w, self.num_points_list)
+
+
+class Gate(nn.Module):
+    def __init__(self, d_model, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.gate = operations.Linear(2 * d_model, 2 * d_model, device=device, dtype=dtype)
+        self.norm = operations.LayerNorm(d_model, device=device, dtype=dtype)
+
+    def forward(self, x1, x2):
+        g1, g2 = torch.sigmoid(self.gate(torch.cat([x1, x2], -1))).chunk(2, -1)
+        return self.norm(g1 * x1 + g2 * x2)
+
+
+class MLP(nn.Module):
+    def __init__(self, in_dim, hidden_dim, out_dim, num_layers, device=None, dtype=None, operations=None):
+        super().__init__()
+        dims = [in_dim] + [hidden_dim] * (num_layers - 1) + [out_dim]
+        self.layers = nn.ModuleList(operations.Linear(dims[i], dims[i + 1], device=device, dtype=dtype) for i in range(num_layers))
+
+    def forward(self, x):
+        for i, layer in enumerate(self.layers):
+            x = nn.SiLU()(layer(x)) if i < len(self.layers) - 1 else layer(x)
+        return x
+
+
+class TransformerDecoderLayer(nn.Module):
+    def __init__(self, d_model=256, nhead=8, dim_feedforward=1024, num_levels=3, num_points=4, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.self_attn  = SelfAttention(d_model, nhead, device=device, dtype=dtype, operations=operations)
+        self.norm1      = operations.LayerNorm(d_model, device=device, dtype=dtype)
+        self.cross_attn = MSDeformableAttention(d_model, nhead, num_levels, num_points, device=device, dtype=dtype, operations=operations)
+        self.gateway    = Gate(d_model, device=device, dtype=dtype, operations=operations)
+        self.linear1    = operations.Linear(d_model, dim_feedforward, device=device, dtype=dtype)
+        self.activation = nn.ReLU()
+        self.linear2    = operations.Linear(dim_feedforward, d_model, device=device, dtype=dtype)
+        self.norm3      = operations.LayerNorm(d_model, device=device, dtype=dtype)
+
+    def forward(self, target, ref_pts, value, spatial_shapes, attn_mask=None, query_pos=None):
+        q = k = target if query_pos is None else target + query_pos
+        t2 = self.self_attn(q, k, value=target, attn_mask=attn_mask)
+        target = self.norm1(target + t2)
+        t2 = self.cross_attn(
+            target if query_pos is None else target + query_pos,
+            ref_pts, value, spatial_shapes)
+        target = self.gateway(target, t2)
+        t2 = self.linear2(self.activation(self.linear1(target)))
+        target = self.norm3((target + t2).clamp(-65504, 65504))
+        return target
+
+
+# ---------------------------------------------------------------------------
+# FDR utilities
+# ---------------------------------------------------------------------------
+
+def weighting_function(reg_max, up, reg_scale):
+    """Non-uniform weighting function W(n) for FDR box regression."""
+    ub1 = (abs(up[0]) * abs(reg_scale)).item()
+    ub2 = ub1 * 2
+    step = (ub1 + 1) ** (2 / (reg_max - 2))
+    left  = [-(step ** i) + 1 for i in range(reg_max // 2 - 1, 0, -1)]
+    right = [ (step ** i) - 1 for i in range(1, reg_max // 2)]
+    vals  = [-ub2] + left + [0] + right + [ub2]
+    return torch.tensor(vals, dtype=up.dtype, device=up.device)
+
+
+def distance2bbox(points, distance, reg_scale):
+    """Decode edge-distances → cxcywh boxes."""
+    rs = abs(reg_scale).to(dtype=points.dtype)
+    x1 = points[..., 0] - (0.5 * rs + distance[..., 0]) * (points[..., 2] / rs)
+    y1 = points[..., 1] - (0.5 * rs + distance[..., 1]) * (points[..., 3] / rs)
+    x2 = points[..., 0] + (0.5 * rs + distance[..., 2]) * (points[..., 2] / rs)
+    y2 = points[..., 1] + (0.5 * rs + distance[..., 3]) * (points[..., 3] / rs)
+    x0, y0, x1_, y1_ = (x1 + x2) / 2, (y1 + y2) / 2, x2 - x1, y2 - y1
+    return torch.stack([x0, y0, x1_, y1_], -1)
+
+
+class Integral(nn.Module):
+    """Sum Pr(n)·W(n) over the distribution bins."""
+    def __init__(self, reg_max=32):
+        super().__init__()
+        self.reg_max = reg_max
+
+    def forward(self, x, project):
+        shape = x.shape
+        x = F.softmax(x.reshape(-1, self.reg_max + 1), 1)
+        x = F.linear(x, project.to(device=x.device, dtype=x.dtype)).reshape(-1, 4)
+        return x.reshape(list(shape[:-1]) + [-1])
+
+
+class LQE(nn.Module):
+    """Location Quality Estimator — refines class scores using corner distribution."""
+    def __init__(self, k=4, hidden_dim=64, num_layers=2, reg_max=32, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.k, self.reg_max = k, reg_max
+        self.reg_conf = MLP(4 * (k + 1), hidden_dim, 1, num_layers, device=device, dtype=dtype, operations=operations)
+
+    def forward(self, scores, pred_corners):
+        B, L, _ = pred_corners.shape
+        prob     = F.softmax(pred_corners.reshape(B, L, 4, self.reg_max + 1), -1)
+        topk, _  = prob.topk(self.k, -1)
+        stat     = torch.cat([topk, topk.mean(-1, keepdim=True)], -1)
+        return scores + self.reg_conf(stat.reshape(B, L, -1))
+
+
+class TransformerDecoder(nn.Module):
+    def __init__(self, hidden_dim, nhead, dim_feedforward, num_levels, num_points, num_layers, reg_max, reg_scale, up, eval_idx=-1, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.hidden_dim = hidden_dim
+        self.num_layers = num_layers
+        self.nhead      = nhead
+        self.eval_idx   = eval_idx if eval_idx >= 0 else num_layers + eval_idx
+        self.up, self.reg_scale, self.reg_max = up, reg_scale, reg_max
+        self.layers = nn.ModuleList([
+            TransformerDecoderLayer(hidden_dim, nhead, dim_feedforward, num_levels, num_points, device=device, dtype=dtype, operations=operations)
+            for _ in range(self.eval_idx + 1)
+        ])
+        self.lqe_layers = nn.ModuleList([LQE(4, 64, 2, reg_max, device=device, dtype=dtype, operations=operations) for _ in range(self.eval_idx + 1)])
+        self.register_buffer('project', weighting_function(reg_max, up, reg_scale))
+
+    def _value_op(self, memory, spatial_shapes):
+        """Reshape memory to per-level value tensors for deformable attention."""
+        c = self.hidden_dim // self.nhead
+        split = [h * w for h, w in spatial_shapes]
+        val = memory.reshape(memory.shape[0], memory.shape[1], self.nhead, c) # memory: [bs, sum(h*w), hidden_dim]
+        # → [bs, n_head, c, sum_hw]
+        val = val.permute(0, 2, 3, 1).flatten(0, 1)  # [bs*n_head, c, sum_hw]
+        return val.split(split, dim=-1)  # list of [bs*n_head, c, h_l*w_l]
+
+    def forward(self, target, ref_pts_unact, memory, spatial_shapes, bbox_head, score_head, query_pos_head, pre_bbox_head, integral):
+        val_split_flat = self._value_op(memory, spatial_shapes) # pre-split value for deformable attention
+
+        # reshape to [bs*n_head, c, h_l, w_l]
+        value = []
+        for lvl, (h, w) in enumerate(spatial_shapes):
+            v = val_split_flat[lvl]   # [bs*n_head, c, h*w]
+            value.append(v.reshape(v.shape[0], v.shape[1], h, w))
+
+        ref_pts  = F.sigmoid(ref_pts_unact)
+        output   = target
+        output_detach = pred_corners_undetach = 0
+
+        dec_bboxes, dec_logits = [], []
+
+        for i, layer in enumerate(self.layers):
+            ref_input    = ref_pts.unsqueeze(2)           # [bs, Lq, 1, 4]
+            query_pos    = query_pos_head(ref_pts).clamp(-10, 10)
+            output       = layer(output, ref_input, value, spatial_shapes, query_pos=query_pos)
+
+            if i == 0:
+                ref_unact = ref_pts.clamp(1e-5, 1 - 1e-5)
+                ref_unact = torch.log(ref_unact / (1 - ref_unact))
+                pre_bboxes = F.sigmoid(pre_bbox_head(output) + ref_unact)
+                ref_pts_initial = pre_bboxes.detach()
+
+            pred_corners = bbox_head[i](output + output_detach) + pred_corners_undetach
+            inter_ref_bbox = distance2bbox(ref_pts_initial, integral(pred_corners, self.project), self.reg_scale)
+
+            if i == self.eval_idx:
+                scores = score_head[i](output)
+                scores = self.lqe_layers[i](scores, pred_corners)
+                dec_bboxes.append(inter_ref_bbox)
+                dec_logits.append(scores)
+                break
+
+            pred_corners_undetach = pred_corners
+            ref_pts        = inter_ref_bbox.detach()
+            output_detach  = output.detach()
+
+        return torch.stack(dec_bboxes), torch.stack(dec_logits)
+
+
+class DFINETransformer(nn.Module):
+    def __init__(self, num_classes=80, hidden_dim=256, num_queries=300, feat_channels=[256, 256, 256], feat_strides=[8, 16, 32],
+                 num_levels=3, num_points=[3, 6, 3], nhead=8, num_layers=6, dim_feedforward=1024, eval_idx=-1, eps=1e-2, reg_max=32,
+                 reg_scale=8.0, eval_spatial_size=(640, 640), device=None, dtype=None, operations=None):
+        super().__init__()
+        assert len(feat_strides) == len(feat_channels)
+        self.hidden_dim  = hidden_dim
+        self.num_queries = num_queries
+        self.num_levels  = num_levels
+        self.eps         = eps
+        self.eval_spatial_size = eval_spatial_size
+
+        self.feat_strides = list(feat_strides)
+        for i in range(num_levels - len(feat_strides)):
+            self.feat_strides.append(feat_strides[-1] * 2 ** (i + 1))
+
+        # input projection (expects pre-fused weights)
+        self.input_proj = nn.ModuleList()
+        for ch in feat_channels:
+            if ch == hidden_dim:
+                self.input_proj.append(nn.Identity())
+            else:
+                self.input_proj.append(nn.Sequential(OrderedDict([
+                    ('conv', operations.Conv2d(ch, hidden_dim, 1, bias=True, device=device, dtype=dtype))])))
+        in_ch = feat_channels[-1]
+        for i in range(num_levels - len(feat_channels)):
+            self.input_proj.append(nn.Sequential(OrderedDict([
+                ('conv', operations.Conv2d(in_ch if i == 0 else hidden_dim,
+                                           hidden_dim, 3, 2, 1, bias=True, device=device, dtype=dtype))])))
+            in_ch = hidden_dim
+
+        # FDR parameters (non-trainable placeholders, set from config)
+        self.up        = nn.Parameter(torch.tensor([0.5]),      requires_grad=False)
+        self.reg_scale = nn.Parameter(torch.tensor([reg_scale]), requires_grad=False)
+
+        pts = num_points if isinstance(num_points, (list, tuple)) else [num_points] * num_levels
+        self.decoder = TransformerDecoder(hidden_dim, nhead, dim_feedforward, num_levels, pts,
+                                          num_layers, reg_max, self.reg_scale, self.up, eval_idx, device=device, dtype=dtype, operations=operations)
+
+        self.query_pos_head = MLP(4, 2 * hidden_dim, hidden_dim, 2, device=device, dtype=dtype, operations=operations)
+        self.enc_output     = nn.Sequential(OrderedDict([
+            ('proj', operations.Linear(hidden_dim, hidden_dim, device=device, dtype=dtype)),
+            ('norm', operations.LayerNorm(hidden_dim, device=device, dtype=dtype))]))
+        self.enc_score_head = operations.Linear(hidden_dim, num_classes, device=device, dtype=dtype)
+        self.enc_bbox_head  = MLP(hidden_dim, hidden_dim, 4, 3, device=device, dtype=dtype, operations=operations)
+
+        self.eval_idx_ = eval_idx if eval_idx >= 0 else num_layers + eval_idx
+        self.dec_score_head = nn.ModuleList(
+            [operations.Linear(hidden_dim, num_classes, device=device, dtype=dtype) for _ in range(self.eval_idx_ + 1)])
+        self.pre_bbox_head  = MLP(hidden_dim, hidden_dim, 4, 3, device=device, dtype=dtype, operations=operations)
+        self.dec_bbox_head  = nn.ModuleList(
+            [MLP(hidden_dim, hidden_dim, 4 * (reg_max + 1), 3, device=device, dtype=dtype, operations=operations)
+             for _ in range(self.eval_idx_ + 1)])
+        self.integral = Integral(reg_max)
+
+        if eval_spatial_size:
+            # Register as buffers so checkpoint values override the freshly-computed defaults
+            anchors, valid_mask = self._gen_anchors()
+            self.register_buffer('anchors', anchors)
+            self.register_buffer('valid_mask', valid_mask)
+
+    def _gen_anchors(self, spatial_shapes=None, grid_size=0.05, dtype=torch.float32, device='cpu'):
+        if spatial_shapes is None:
+            h0, w0 = self.eval_spatial_size
+            spatial_shapes = [[int(h0 / s), int(w0 / s)] for s in self.feat_strides]
+        anchors = []
+        for lvl, (h, w) in enumerate(spatial_shapes):
+            gy, gx = torch.meshgrid(torch.arange(h), torch.arange(w), indexing='ij')
+            gxy = (torch.stack([gx, gy], -1).float() + 0.5) / torch.tensor([w, h], dtype=dtype)
+            wh  = torch.ones_like(gxy) * grid_size * (2. ** lvl)
+            anchors.append(torch.cat([gxy, wh], -1).reshape(-1, h * w, 4))
+        anchors    = torch.cat(anchors, 1).to(device)
+        valid_mask = ((anchors > self.eps) & (anchors < 1 - self.eps)).all(-1, keepdim=True)
+        anchors    = torch.log(anchors / (1 - anchors))
+        anchors    = torch.where(valid_mask, anchors, torch.full_like(anchors, float('inf')))
+        return anchors, valid_mask
+
+    def _encoder_input(self, feats: List[torch.Tensor]):
+        proj = [self.input_proj[i](f) for i, f in enumerate(feats)]
+        for i in range(len(feats), self.num_levels):
+            proj.append(self.input_proj[i](feats[-1] if i == len(feats) else proj[-1]))
+        flat, shapes = [], []
+        for f in proj:
+            _, _, h, w = f.shape
+            flat.append(f.flatten(2).permute(0, 2, 1))
+            shapes.append([h, w])
+        return torch.cat(flat, 1), shapes
+
+    def _decoder_input(self, memory: torch.Tensor):
+        anchors, valid_mask = self.anchors.to(memory), self.valid_mask
+        if memory.shape[0] > 1:
+            anchors = anchors.repeat(memory.shape[0], 1, 1)
+
+        mem      = valid_mask.to(memory) * memory
+        out_mem  = self.enc_output(mem)
+        logits   = self.enc_score_head(out_mem)
+        _, idx   = torch.topk(logits.max(-1).values, self.num_queries, dim=-1)
+        idx_e    = idx.unsqueeze(-1)
+        topk_mem = out_mem.gather(1, idx_e.expand(-1, -1, out_mem.shape[-1]))
+        topk_anc = anchors.gather(1, idx_e.expand(-1, -1, anchors.shape[-1]))
+        topk_ref = self.enc_bbox_head(topk_mem) + topk_anc
+        return topk_mem.detach(), topk_ref.detach()
+
+    def forward(self, feats: List[torch.Tensor]):
+        memory, shapes = self._encoder_input(feats)
+        content, ref   = self._decoder_input(memory)
+        out_bboxes, out_logits = self.decoder(
+            content, ref, memory, shapes,
+            self.dec_bbox_head, self.dec_score_head,
+            self.query_pos_head, self.pre_bbox_head, self.integral)
+        return {'pred_logits': out_logits[-1], 'pred_boxes': out_bboxes[-1]}
+
+
+# ---------------------------------------------------------------------------
+# Main model
+# ---------------------------------------------------------------------------
+
+class RTv4(nn.Module):
+    def __init__(self, num_classes=80, num_queries=300, enc_h=256, dec_h=256, enc_ff=2048, dec_ff=1024, feat_strides=[8, 16, 32], device=None, dtype=None, operations=None, **kwargs):
+        super().__init__()
+        self.device = device
+        self.dtype = dtype
+        self.operations = operations
+
+        self.backbone = HGNetv2(device=device, dtype=dtype, operations=operations)
+        self.encoder  = HybridEncoder(hidden_dim=enc_h, dim_feedforward=enc_ff, device=device, dtype=dtype, operations=operations)
+        self.decoder  = DFINETransformer(num_classes=num_classes, hidden_dim=dec_h, num_queries=num_queries,
+            feat_channels=[enc_h] * len(feat_strides), feat_strides=feat_strides, dim_feedforward=dec_ff, device=device, dtype=dtype, operations=operations)
+
+        self.num_classes = num_classes
+        self.num_queries = num_queries
+        self.load_device = comfy.model_management.get_torch_device()
+
+    def _forward(self, x: torch.Tensor):
+        return self.decoder(self.encoder(self.backbone(x)))
+
+    def postprocess(self, outputs, orig_size: tuple = (640, 640)) -> List[dict]:
+        logits = outputs['pred_logits']
+        boxes  = torchvision.ops.box_convert(outputs['pred_boxes'], 'cxcywh', 'xyxy')
+        boxes  = boxes * torch.tensor(orig_size, device=boxes.device, dtype=boxes.dtype).repeat(1, 2).unsqueeze(1)
+        scores = F.sigmoid(logits)
+        scores, idx = torch.topk(scores.flatten(1), self.num_queries, dim=-1)
+        labels = idx % self.num_classes
+        boxes  = boxes.gather(1, (idx // self.num_classes).unsqueeze(-1).expand(-1, -1, 4))
+        return [{'labels': lbl, 'boxes': b, 'scores': s} for lbl, b, s in zip(labels, boxes, scores)]
+
+    def forward(self, x: torch.Tensor, orig_size: tuple = (640, 640), **kwargs):
+        outputs = self._forward(x.to(device=self.load_device, dtype=self.dtype))
+        return self.postprocess(outputs, orig_size)
--- a/comfy/ldm/wan/vae.py
+++ b/comfy/ldm/wan/vae.py
@@ -99,7 +99,7 @@ class Resample(nn.Module):
        else:
            self.resample = nn.Identity()

-    def forward(self, x, feat_cache=None, feat_idx=[0]):
+    def forward(self, x, feat_cache=None, feat_idx=[0], final=False):
        b, c, t, h, w = x.size()
        if self.mode == 'upsample3d':
            if feat_cache is not None:
@@ -109,22 +109,7 @@ class Resample(nn.Module):
                    feat_idx[0] += 1
                else:

-                    cache_x = x[:, :, -CACHE_T:, :, :].clone()
-                    if cache_x.shape[2] < 2 and feat_cache[
-                            idx] is not None and feat_cache[idx] != 'Rep':
-                        # cache last frame of last two chunk
-                        cache_x = torch.cat([
-                            feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to(
-                                cache_x.device), cache_x
-                        ],
-                                            dim=2)
-                    if cache_x.shape[2] < 2 and feat_cache[
-                            idx] is not None and feat_cache[idx] == 'Rep':
-                        cache_x = torch.cat([
-                            torch.zeros_like(cache_x).to(cache_x.device),
-                            cache_x
-                        ],
-                                            dim=2)
+                    cache_x = x[:, :, -CACHE_T:, :, :]
                    if feat_cache[idx] == 'Rep':
                        x = self.time_conv(x)
                    else:
@@ -145,19 +130,24 @@ class Resample(nn.Module):
            if feat_cache is not None:
                idx = feat_idx[0]
                if feat_cache[idx] is None:
-                    feat_cache[idx] = x.clone()
-                    feat_idx[0] += 1
+                    feat_cache[idx] = x
                else:

-                    cache_x = x[:, :, -1:, :, :].clone()
-                    # if cache_x.shape[2] < 2 and feat_cache[idx] is not None and feat_cache[idx]!='Rep':
-                    #     # cache last frame of last two chunk
-                    #     cache_x = torch.cat([feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to(cache_x.device), cache_x], dim=2)
-
+                    cache_x = x[:, :, -1:, :, :]
                    x = self.time_conv(
                        torch.cat([feat_cache[idx][:, :, -1:, :, :], x], 2))
                    feat_cache[idx] = cache_x
-                    feat_idx[0] += 1
+
+                    deferred_x = feat_cache[idx + 1]
+                    if deferred_x is not None:
+                        x = torch.cat([deferred_x, x], 2)
+                        feat_cache[idx + 1] = None
+
+                    if x.shape[2] == 1 and not final:
+                        feat_cache[idx + 1] = x
+                        x = None
+
+                feat_idx[0] += 2
        return x


@@ -177,19 +167,12 @@ class ResidualBlock(nn.Module):
        self.shortcut = CausalConv3d(in_dim, out_dim, 1) \
            if in_dim != out_dim else nn.Identity()

-    def forward(self, x, feat_cache=None, feat_idx=[0]):
+    def forward(self, x, feat_cache=None, feat_idx=[0], final=False):
        old_x = x
        for layer in self.residual:
            if isinstance(layer, CausalConv3d) and feat_cache is not None:
                idx = feat_idx[0]
-                cache_x = x[:, :, -CACHE_T:, :, :].clone()
-                if cache_x.shape[2] < 2 and feat_cache[idx] is not None:
-                    # cache last frame of last two chunk
-                    cache_x = torch.cat([
-                        feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to(
-                            cache_x.device), cache_x
-                    ],
-                                        dim=2)
+                cache_x = x[:, :, -CACHE_T:, :, :]
                x = layer(x, cache_list=feat_cache, cache_idx=idx)
                feat_cache[idx] = cache_x
                feat_idx[0] += 1
@@ -213,7 +196,7 @@ class AttentionBlock(nn.Module):
        self.proj = ops.Conv2d(dim, dim, 1)
        self.optimized_attention = vae_attention()

-    def forward(self, x):
+    def forward(self, x, feat_cache=None, feat_idx=[0], final=False):
        identity = x
        b, c, t, h, w = x.size()
        x = rearrange(x, 'b c t h w -> (b t) c h w')
@@ -283,17 +266,10 @@ class Encoder3d(nn.Module):
            RMS_norm(out_dim, images=False), nn.SiLU(),
            CausalConv3d(out_dim, z_dim, 3, padding=1))

-    def forward(self, x, feat_cache=None, feat_idx=[0]):
+    def forward(self, x, feat_cache=None, feat_idx=[0], final=False):
        if feat_cache is not None:
            idx = feat_idx[0]
-            cache_x = x[:, :, -CACHE_T:, :, :].clone()
-            if cache_x.shape[2] < 2 and feat_cache[idx] is not None:
-                # cache last frame of last two chunk
-                cache_x = torch.cat([
-                    feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to(
-                        cache_x.device), cache_x
-                ],
-                                    dim=2)
+            cache_x = x[:, :, -CACHE_T:, :, :]
            x = self.conv1(x, feat_cache[idx])
            feat_cache[idx] = cache_x
            feat_idx[0] += 1
@@ -303,14 +279,16 @@ class Encoder3d(nn.Module):
        ## downsamples
        for layer in self.downsamples:
            if feat_cache is not None:
-                x = layer(x, feat_cache, feat_idx)
+                x = layer(x, feat_cache, feat_idx, final=final)
+                if x is None:
+                    return None
            else:
                x = layer(x)

        ## middle
        for layer in self.middle:
-            if isinstance(layer, ResidualBlock) and feat_cache is not None:
-                x = layer(x, feat_cache, feat_idx)
+            if feat_cache is not None:
+                x = layer(x, feat_cache, feat_idx, final=final)
            else:
                x = layer(x)

@@ -318,14 +296,7 @@ class Encoder3d(nn.Module):
        for layer in self.head:
            if isinstance(layer, CausalConv3d) and feat_cache is not None:
                idx = feat_idx[0]
-                cache_x = x[:, :, -CACHE_T:, :, :].clone()
-                if cache_x.shape[2] < 2 and feat_cache[idx] is not None:
-                    # cache last frame of last two chunk
-                    cache_x = torch.cat([
-                        feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to(
-                            cache_x.device), cache_x
-                    ],
-                                        dim=2)
+                cache_x = x[:, :, -CACHE_T:, :, :]
                x = layer(x, feat_cache[idx])
                feat_cache[idx] = cache_x
                feat_idx[0] += 1
@@ -389,18 +360,48 @@ class Decoder3d(nn.Module):
            RMS_norm(out_dim, images=False), nn.SiLU(),
            CausalConv3d(out_dim, output_channels, 3, padding=1))

+    def run_up(self, layer_idx, x_ref, feat_cache, feat_idx, out_chunks):
+        x = x_ref[0]
+        x_ref[0] = None
+        if layer_idx >= len(self.upsamples):
+            for layer in self.head:
+                if isinstance(layer, CausalConv3d) and feat_cache is not None:
+                    cache_x = x[:, :, -CACHE_T:, :, :]
+                    x = layer(x, feat_cache[feat_idx[0]])
+                    feat_cache[feat_idx[0]] = cache_x
+                    feat_idx[0] += 1
+                else:
+                    x = layer(x)
+            out_chunks.append(x)
+            return
+
+        layer = self.upsamples[layer_idx]
+        if feat_cache is not None:
+            x = layer(x, feat_cache, feat_idx)
+        else:
+            x = layer(x)
+
+        if isinstance(layer, Resample) and layer.mode == 'upsample3d' and x.shape[2] > 2:
+            for frame_idx in range(0, x.shape[2], 2):
+                self.run_up(
+                    layer_idx + 1,
+                    [x[:, :, frame_idx:frame_idx + 2, :, :]],
+                    feat_cache,
+                    feat_idx.copy(),
+                    out_chunks,
+                )
+            del x
+            return
+
+        next_x_ref = [x]
+        del x
+        self.run_up(layer_idx + 1, next_x_ref, feat_cache, feat_idx, out_chunks)
+
    def forward(self, x, feat_cache=None, feat_idx=[0]):
        ## conv1
        if feat_cache is not None:
            idx = feat_idx[0]
-            cache_x = x[:, :, -CACHE_T:, :, :].clone()
-            if cache_x.shape[2] < 2 and feat_cache[idx] is not None:
-                # cache last frame of last two chunk
-                cache_x = torch.cat([
-                    feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to(
-                        cache_x.device), cache_x
-                ],
-                                    dim=2)
+            cache_x = x[:, :, -CACHE_T:, :, :]
            x = self.conv1(x, feat_cache[idx])
            feat_cache[idx] = cache_x
            feat_idx[0] += 1
@@ -409,42 +410,21 @@ class Decoder3d(nn.Module):

        ## middle
        for layer in self.middle:
-            if isinstance(layer, ResidualBlock) and feat_cache is not None:
-                x = layer(x, feat_cache, feat_idx)
-            else:
-                x = layer(x)
-
-        ## upsamples
-        for layer in self.upsamples:
            if feat_cache is not None:
                x = layer(x, feat_cache, feat_idx)
            else:
                x = layer(x)

-        ## head
-        for layer in self.head:
-            if isinstance(layer, CausalConv3d) and feat_cache is not None:
-                idx = feat_idx[0]
-                cache_x = x[:, :, -CACHE_T:, :, :].clone()
-                if cache_x.shape[2] < 2 and feat_cache[idx] is not None:
-                    # cache last frame of last two chunk
-                    cache_x = torch.cat([
-                        feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to(
-                            cache_x.device), cache_x
-                    ],
-                                        dim=2)
-                x = layer(x, feat_cache[idx])
-                feat_cache[idx] = cache_x
-                feat_idx[0] += 1
-            else:
-                x = layer(x)
-        return x
+        out_chunks = []
+
+        self.run_up(0, [x], feat_cache, feat_idx, out_chunks)
+        return out_chunks


-def count_conv3d(model):
+def count_cache_layers(model):
    count = 0
    for m in model.modules():
-        if isinstance(m, CausalConv3d):
+        if isinstance(m, CausalConv3d) or (isinstance(m, Resample) and m.mode == 'downsample3d'):
            count += 1
    return count

@@ -482,11 +462,12 @@ class WanVAE(nn.Module):
        conv_idx = [0]
        ## cache
        t = x.shape[2]
-        iter_ = 1 + (t - 1) // 4
+        t = 1 + ((t - 1) // 4) * 4
+        iter_ = 1 + (t - 1) // 2
        feat_map = None
        if iter_ > 1:
-            feat_map = [None] * count_conv3d(self.encoder)
-        ## 对encode输入的x，按时间拆分为1、4、4、4....
+            feat_map = [None] * count_cache_layers(self.encoder)
+        ## 对encode输入的x，按时间拆分为1、2、2、2....(总帧数先按4N+1向下取整)
        for i in range(iter_):
            conv_idx = [0]
            if i == 0:
@@ -496,20 +477,23 @@ class WanVAE(nn.Module):
                    feat_idx=conv_idx)
            else:
                out_ = self.encoder(
-                    x[:, :, 1 + 4 * (i - 1):1 + 4 * i, :, :],
+                    x[:, :, 1 + 2 * (i - 1):1 + 2 * i, :, :],
                    feat_cache=feat_map,
-                    feat_idx=conv_idx)
+                    feat_idx=conv_idx,
+                    final=(i == (iter_ - 1)))
+                if out_ is None:
+                    continue
                out = torch.cat([out, out_], 2)
+
        mu, log_var = self.conv1(out).chunk(2, dim=1)
        return mu

    def decode(self, z):
-        conv_idx = [0]
        # z: [b,c,t,h,w]
-        iter_ = z.shape[2]
+        iter_ = 1 + z.shape[2] // 2
        feat_map = None
        if iter_ > 1:
-            feat_map = [None] * count_conv3d(self.decoder)
+            feat_map = [None] * count_cache_layers(self.decoder)
        x = self.conv2(z)
        for i in range(iter_):
            conv_idx = [0]
@@ -520,8 +504,8 @@ class WanVAE(nn.Module):
                    feat_idx=conv_idx)
            else:
                out_ = self.decoder(
-                    x[:, :, i:i + 1, :, :],
+                    x[:, :, 1 + 2 * (i - 1):1 + 2 * i, :, :],
                    feat_cache=feat_map,
                    feat_idx=conv_idx)
-                out = torch.cat([out, out_], 2)
-        return out
+                out += out_
+        return torch.cat(out, 2)
--- a/comfy/memory_management.py
+++ b/comfy/memory_management.py
@@ -1,9 +1,71 @@
 import math
+import ctypes
+import threading
+import dataclasses
 import torch
 from typing import NamedTuple

 from comfy.quant_ops import QuantizedTensor

+
+class TensorFileSlice(NamedTuple):
+    file_ref: object
+    thread_id: int
+    offset: int
+    size: int
+
+
+def read_tensor_file_slice_into(tensor, destination):
+
+    if isinstance(tensor, QuantizedTensor):
+        if not isinstance(destination, QuantizedTensor):
+            return False
+        if tensor._layout_cls != destination._layout_cls:
+            return False
+
+        if not read_tensor_file_slice_into(tensor._qdata, destination._qdata):
+            return False
+
+        dst_orig_dtype = destination._params.orig_dtype
+        destination._params.copy_from(tensor._params, non_blocking=False)
+        destination._params = dataclasses.replace(destination._params, orig_dtype=dst_orig_dtype)
+        return True
+
+    info = getattr(tensor.untyped_storage(), "_comfy_tensor_file_slice", None)
+    if info is None:
+        return False
+
+    file_obj = info.file_ref
+    if (destination.device.type != "cpu"
+            or file_obj is None
+            or threading.get_ident() != info.thread_id
+            or destination.numel() * destination.element_size() < info.size
+            or tensor.numel() * tensor.element_size() != info.size
+            or tensor.storage_offset() != 0
+            or not tensor.is_contiguous()):
+        return False
+
+    if info.size == 0:
+        return True
+
+    buf_type = ctypes.c_ubyte * info.size
+    view = memoryview(buf_type.from_address(destination.data_ptr()))
+
+    try:
+        file_obj.seek(info.offset)
+        done = 0
+        while done < info.size:
+            try:
+                n = file_obj.readinto(view[done:])
+            except OSError:
+                return False
+            if n <= 0:
+                return False
+            done += n
+        return True
+    finally:
+        view.release()
+
 class TensorGeometry(NamedTuple):
    shape: any
    dtype: torch.dtype
@@ -79,3 +141,17 @@ def interpret_gathered_like(tensors, gathered):
    return dest_views

 aimdo_enabled = False
+
+extra_ram_release_callback = None
+RAM_CACHE_HEADROOM = 0
+
+def set_ram_cache_release_state(callback, headroom):
+    global extra_ram_release_callback
+    global RAM_CACHE_HEADROOM
+    extra_ram_release_callback = callback
+    RAM_CACHE_HEADROOM = max(0, int(headroom))
+
+def extra_ram_release(target):
+    if extra_ram_release_callback is None:
+        return 0
+    return extra_ram_release_callback(target)
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@@ -20,8 +20,8 @@ import comfy.ldm.hunyuan3dv2_1
 import comfy.ldm.hunyuan3dv2_1.hunyuandit
 import torch
 import logging
-import os
 import comfy.ldm.lightricks.av_model
+import comfy.context_windows
 from comfy.ldm.modules.diffusionmodules.openaimodel import UNetModel, Timestep
 from comfy.ldm.cascade.stage_c import StageC
 from comfy.ldm.cascade.stage_b import StageB
@@ -52,6 +52,7 @@ import comfy.ldm.qwen_image.model
 import comfy.ldm.kandinsky5.model
 import comfy.ldm.anima.model
 import comfy.ldm.ace.ace_step15
+import comfy.ldm.rt_detr.rtdetr_v4

 import comfy.model_management
 import comfy.patcher_extension
@@ -113,20 +114,8 @@ def model_sampling(model_config, model_type):
    elif model_type == ModelType.IMG_TO_IMG_FLOW:
        c = comfy.model_sampling.IMG_TO_IMG_FLOW

-    from comfy.cli_args import args
-    isolation_runtime_enabled = args.use_process_isolation or os.environ.get("PYISOLATE_CHILD") == "1"
-
    class ModelSampling(s, c):
-        if isolation_runtime_enabled:
-            def __reduce__(self):
-                """Ensure pickling yields a proxy instead of failing on local class."""
-                try:
-                    from comfy.isolation.model_sampling_proxy import ModelSamplingRegistry, ModelSamplingProxy
-                    registry = ModelSamplingRegistry()
-                    ms_id = registry.register(self)
-                    return (ModelSamplingProxy, (ms_id,))
-                except Exception as exc:
-                    raise RuntimeError("Failed to serialize ModelSampling for isolation.") from exc
+        pass

    return ModelSampling(model_config)

@@ -298,6 +287,12 @@ class BaseModel(torch.nn.Module):
            return data
        return None

+    def resize_cond_for_context_window(self, cond_key, cond_value, window, x_in, device, retain_index_list=[]):
+        """Override in subclasses to handle model-specific cond slicing for context windows.
+        Return a sliced cond object, or None to fall through to default handling.
+        Use comfy.context_windows.slice_cond() for common cases."""
+        return None
+
    def extra_conds(self, **kwargs):
        out = {}
        concat_cond = self.concat_cond(**kwargs)
@@ -896,7 +891,7 @@ class Flux(BaseModel):
        return torch.cat((image, mask), dim=1)

    def encode_adm(self, **kwargs):
-        return kwargs["pooled_output"]
+        return kwargs.get("pooled_output", None)

    def extra_conds(self, **kwargs):
        out = super().extra_conds(**kwargs)
@@ -943,9 +938,10 @@ class LongCatImage(Flux):
        transformer_options = transformer_options.copy()
        rope_opts = transformer_options.get("rope_options", {})
        rope_opts = dict(rope_opts)
+        pe_len = float(c_crossattn.shape[1]) if c_crossattn is not None else 512.0
        rope_opts.setdefault("shift_t", 1.0)
-        rope_opts.setdefault("shift_y", 512.0)
-        rope_opts.setdefault("shift_x", 512.0)
+        rope_opts.setdefault("shift_y", pe_len)
+        rope_opts.setdefault("shift_x", pe_len)
        transformer_options["rope_options"] = rope_opts
        return super()._apply_model(x, t, c_concat, c_crossattn, control, transformer_options, **kwargs)

@@ -1066,6 +1062,10 @@ class LTXAV(BaseModel):
        if guide_attention_entries is not None:
            out['guide_attention_entries'] = comfy.conds.CONDConstant(guide_attention_entries)

+        ref_audio = kwargs.get("ref_audio", None)
+        if ref_audio is not None:
+            out['ref_audio'] = comfy.conds.CONDConstant(ref_audio)
+
        return out

    def process_timestep(self, timestep, x, denoise_mask=None, audio_denoise_mask=None, **kwargs):
@@ -1388,6 +1388,11 @@ class WAN21_Vace(WAN21):
        out['vace_strength'] = comfy.conds.CONDConstant(vace_strength)
        return out

+    def resize_cond_for_context_window(self, cond_key, cond_value, window, x_in, device, retain_index_list=[]):
+        if cond_key == "vace_context":
+            return comfy.context_windows.slice_cond(cond_value, window, x_in, device, temporal_dim=3, retain_index_list=retain_index_list)
+        return super().resize_cond_for_context_window(cond_key, cond_value, window, x_in, device, retain_index_list=retain_index_list)
+
 class WAN21_Camera(WAN21):
    def __init__(self, model_config, model_type=ModelType.FLOW, image_to_video=False, device=None):
        super(WAN21, self).__init__(model_config, model_type, device=device, unet_model=comfy.ldm.wan.model.CameraWanModel)
@@ -1440,6 +1445,11 @@ class WAN21_HuMo(WAN21):

        return out

+    def resize_cond_for_context_window(self, cond_key, cond_value, window, x_in, device, retain_index_list=[]):
+        if cond_key == "audio_embed":
+            return comfy.context_windows.slice_cond(cond_value, window, x_in, device, temporal_dim=1)
+        return super().resize_cond_for_context_window(cond_key, cond_value, window, x_in, device, retain_index_list=retain_index_list)
+
 class WAN22_Animate(WAN21):
    def __init__(self, model_config, model_type=ModelType.FLOW, image_to_video=False, device=None):
        super(WAN21, self).__init__(model_config, model_type, device=device, unet_model=comfy.ldm.wan.model_animate.AnimateWanModel)
@@ -1457,6 +1467,13 @@ class WAN22_Animate(WAN21):
            out['pose_latents'] = comfy.conds.CONDRegular(self.process_latent_in(pose_latents))
        return out

+    def resize_cond_for_context_window(self, cond_key, cond_value, window, x_in, device, retain_index_list=[]):
+        if cond_key == "face_pixel_values":
+            return comfy.context_windows.slice_cond(cond_value, window, x_in, device, temporal_dim=2, temporal_scale=4, temporal_offset=1)
+        if cond_key == "pose_latents":
+            return comfy.context_windows.slice_cond(cond_value, window, x_in, device, temporal_dim=2, temporal_offset=1)
+        return super().resize_cond_for_context_window(cond_key, cond_value, window, x_in, device, retain_index_list=retain_index_list)
+
 class WAN22_S2V(WAN21):
    def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
        super(WAN21, self).__init__(model_config, model_type, device=device, unet_model=comfy.ldm.wan.model.WanModel_S2V)
@@ -1493,6 +1510,11 @@ class WAN22_S2V(WAN21):
            out['reference_motion'] = reference_motion.shape
        return out

+    def resize_cond_for_context_window(self, cond_key, cond_value, window, x_in, device, retain_index_list=[]):
+        if cond_key == "audio_embed":
+            return comfy.context_windows.slice_cond(cond_value, window, x_in, device, temporal_dim=1)
+        return super().resize_cond_for_context_window(cond_key, cond_value, window, x_in, device, retain_index_list=retain_index_list)
+
 class WAN22(WAN21):
    def __init__(self, model_config, model_type=ModelType.FLOW, image_to_video=False, device=None):
        super(WAN21, self).__init__(model_config, model_type, device=device, unet_model=comfy.ldm.wan.model.WanModel)
@@ -1936,3 +1958,7 @@ class Kandinsky5Image(Kandinsky5):

    def concat_cond(self, **kwargs):
        return None
+
+class RT_DETR_v4(BaseModel):
+    def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
+        super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.rt_detr.rtdetr_v4.RTv4)
--- a/comfy/model_detection.py
+++ b/comfy/model_detection.py
@@ -698,6 +698,12 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
        dit_config["audio_model"] = "ace1.5"
        return dit_config

+    if '{}encoder.pan_blocks.1.cv4.conv.weight'.format(key_prefix) in state_dict_keys: # RT-DETR_v4
+        dit_config = {}
+        dit_config["image_model"] = "RT_DETR_v4"
+        dit_config["enc_h"] = state_dict['{}encoder.pan_blocks.1.cv4.conv.weight'.format(key_prefix)].shape[0]
+        return dit_config
+
    if '{}input_blocks.0.0.weight'.format(key_prefix) not in state_dict_keys:
        return None

--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -55,6 +55,7 @@ total_vram = 0

 # Training Related State
 in_training = False
+training_fp8_bwd = False


 def get_supported_float8_types():
@@ -372,7 +373,7 @@ AMD_ENABLE_MIOPEN_ENV = 'COMFYUI_ENABLE_MIOPEN'

 try:
    if is_amd():
-        arch = torch.cuda.get_device_properties(get_torch_device()).gcnArchName
+        arch = torch.cuda.get_device_properties(get_torch_device()).gcnArchName.split(':')[0]
        if not (any((a in arch) for a in AMD_RDNA2_AND_OLDER_ARCH)):
            if os.getenv(AMD_ENABLE_MIOPEN_ENV) != '1':
                torch.backends.cudnn.enabled = False  # Seems to improve things a lot on AMD
@@ -400,7 +401,7 @@ try:
        if args.use_split_cross_attention == False and args.use_quad_cross_attention == False:
            if aotriton_supported(arch):  # AMD efficient attention implementation depends on aotriton.
                if torch_version_numeric >= (2, 7):  # works on 2.6 but doesn't actually seem to improve much
-                    if any((a in arch) for a in ["gfx90a", "gfx942", "gfx1100", "gfx1101", "gfx1151"]):  # TODO: more arches, TODO: gfx950
+                    if any((a in arch) for a in ["gfx90a", "gfx942", "gfx950", "gfx1100", "gfx1101", "gfx1150", "gfx1151"]):  # TODO: more arches, TODO: gfx950
                        ENABLE_PYTORCH_ATTENTION = True
                if rocm_version >= (7, 0):
                   if any((a in arch) for a in ["gfx1200", "gfx1201"]):
@@ -497,9 +498,6 @@ except:

 current_loaded_models = []

-def _isolation_mode_enabled():
-    return args.use_process_isolation or os.environ.get("PYISOLATE_CHILD") == "1"
-
 def module_size(module):
    module_mem = 0
    sd = module.state_dict()
@@ -508,6 +506,28 @@ def module_size(module):
        module_mem += t.nbytes
    return module_mem

+def module_mmap_residency(module, free=False):
+    mmap_touched_mem = 0
+    module_mem = 0
+    bounced_mmaps = set()
+    sd = module.state_dict()
+    for k in sd:
+        t = sd[k]
+        module_mem += t.nbytes
+        storage = t._qdata.untyped_storage() if isinstance(t, comfy.quant_ops.QuantizedTensor) else t.untyped_storage()
+        if not getattr(storage, "_comfy_tensor_mmap_touched", False):
+            continue
+        mmap_touched_mem += t.nbytes
+        if not free:
+            continue
+        storage._comfy_tensor_mmap_touched = False
+        mmap_obj = storage._comfy_tensor_mmap_refs[0]
+        if mmap_obj in bounced_mmaps:
+            continue
+        mmap_obj.bounce()
+        bounced_mmaps.add(mmap_obj)
+    return mmap_touched_mem, module_mem
+
 class LoadedModel:
    def __init__(self, model):
        self._set_model(model)
@@ -522,6 +542,7 @@ class LoadedModel:
        if model.parent is not None:
            self._parent_model = weakref.ref(model.parent)
            self._patcher_finalizer = weakref.finalize(model, self._switch_parent)
+            self._patcher_finalizer.atexit = False

    def _switch_parent(self):
        model = self._parent_model()
@@ -535,6 +556,9 @@ class LoadedModel:
    def model_memory(self):
        return self.model.model_size()

+    def model_mmap_residency(self, free=False):
+        return self.model.model_mmap_residency(free=free)
+
    def model_loaded_memory(self):
        return self.model.loaded_size()

@@ -565,6 +589,7 @@ class LoadedModel:

        self.real_model = weakref.ref(real_model)
        self.model_finalizer = weakref.finalize(real_model, cleanup_models)
+        self.model_finalizer.atexit = False
        return real_model

    def should_reload_model(self, force_patch_weights=False):
@@ -579,9 +604,8 @@ class LoadedModel:
                if freed >= memory_to_free:
                    return False
        self.model.detach(unpatch_weights)
-        if self.model_finalizer is not None:
-            self.model_finalizer.detach()
-            self.model_finalizer = None
+        self.model_finalizer.detach()
+        self.model_finalizer = None
        self.real_model = None
        return True

@@ -595,15 +619,8 @@ class LoadedModel:
        if self._patcher_finalizer is not None:
            self._patcher_finalizer.detach()

-    def dead_state(self):
-        model_ref_gone = self.model is None
-        real_model_ref = self.real_model
-        real_model_ref_gone = callable(real_model_ref) and real_model_ref() is None
-        return model_ref_gone, real_model_ref_gone
-
    def is_dead(self):
-        model_ref_gone, real_model_ref_gone = self.dead_state()
-        return model_ref_gone or real_model_ref_gone
+        return self.real_model() is not None and self.model is None


 def use_more_memory(extra_memory, loaded_models, device):
@@ -644,38 +661,27 @@ def extra_reserved_memory():
 def minimum_inference_memory():
    return (1024 * 1024 * 1024) * 0.8 + extra_reserved_memory()

-def free_memory(memory_required, device, keep_loaded=[], for_dynamic=False, ram_required=0):
+def free_memory(memory_required, device, keep_loaded=[], for_dynamic=False, pins_required=0, ram_required=0):
    cleanup_models_gc()
    unloaded_model = []
    can_unload = []
    unloaded_models = []
-    isolation_active = _isolation_mode_enabled()

    for i in range(len(current_loaded_models) -1, -1, -1):
        shift_model = current_loaded_models[i]
-        if shift_model.device == device:
+        if device is None or shift_model.device == device:
            if shift_model not in keep_loaded and not shift_model.is_dead():
                can_unload.append((-shift_model.model_offloaded_memory(), sys.getrefcount(shift_model.model), shift_model.model_memory(), i))
                shift_model.currently_used = False

-    if can_unload and isolation_active:
-        try:
-            from pyisolate import flush_tensor_keeper  # type: ignore[attr-defined]
-        except Exception:
-            flush_tensor_keeper = None
-        if callable(flush_tensor_keeper):
-            flushed = flush_tensor_keeper()
-            if flushed > 0:
-                logging.debug("][ MM:tensor_keeper_flush | released=%d", flushed)
-                gc.collect()
-
-    for x in sorted(can_unload):
+    can_unload_sorted = sorted(can_unload)
+    for x in can_unload_sorted:
        i = x[-1]
        memory_to_free = 1e32
-        ram_to_free = 1e32
-        if not DISABLE_SMART_MEMORY:
-            memory_to_free = memory_required - get_free_memory(device)
-            ram_to_free = ram_required - get_free_ram()
+        pins_to_free = 1e32
+        if not DISABLE_SMART_MEMORY or device is None:
+            memory_to_free = 0 if device is None else memory_required - get_free_memory(device)
+            pins_to_free = pins_required - get_free_ram()
            if current_loaded_models[i].model.is_dynamic() and for_dynamic:
                #don't actually unload dynamic models for the sake of other dynamic models
                #as that works on-demand.
@@ -684,22 +690,25 @@ def free_memory(memory_required, device, keep_loaded=[], for_dynamic=False, ram_
        if memory_to_free > 0 and current_loaded_models[i].model_unload(memory_to_free):
            logging.debug(f"Unloading {current_loaded_models[i].model.model.__class__.__name__}")
            unloaded_model.append(i)
-        if ram_to_free > 0:
+        if pins_to_free > 0:
+            logging.debug(f"PIN Unloading {current_loaded_models[i].model.model.__class__.__name__}")
+            current_loaded_models[i].model.partially_unload_ram(pins_to_free)
+
+    for x in can_unload_sorted:
+        i = x[-1]
+        ram_to_free = ram_required - psutil.virtual_memory().available
+        if ram_to_free <= 0 and i not in unloaded_model:
+            continue
+        resident_memory, _ = current_loaded_models[i].model_mmap_residency(free=True)
+        if resident_memory > 0:
            logging.debug(f"RAM Unloading {current_loaded_models[i].model.model.__class__.__name__}")
-            current_loaded_models[i].model.partially_unload_ram(ram_to_free)

    for i in sorted(unloaded_model, reverse=True):
-        unloaded = current_loaded_models.pop(i)
-        model_obj = unloaded.model
-        if model_obj is not None:
-            cleanup = getattr(model_obj, "cleanup", None)
-            if callable(cleanup):
-                cleanup()
-        unloaded_models.append(unloaded)
+        unloaded_models.append(current_loaded_models.pop(i))

    if len(unloaded_model) > 0:
        soft_empty_cache()
-    else:
+    elif device is not None:
        if vram_state != VRAMState.HIGH_VRAM:
            mem_free_total, mem_free_torch = get_free_memory(device, torch_free_too=True)
            if mem_free_torch > mem_free_total * 0.25:
@@ -754,23 +763,31 @@ def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimu
        for i in to_unload:
            model_to_unload = current_loaded_models.pop(i)
            model_to_unload.model.detach(unpatch_all=False)
-            if model_to_unload.model_finalizer is not None:
-                model_to_unload.model_finalizer.detach()
-                model_to_unload.model_finalizer = None
+            model_to_unload.model_finalizer.detach()


    total_memory_required = {}
+    total_pins_required = {}
    total_ram_required = {}
    for loaded_model in models_to_load:
-        total_memory_required[loaded_model.device] = total_memory_required.get(loaded_model.device, 0) + loaded_model.model_memory_required(loaded_model.device)
-        #x2, one to make sure the OS can fit the model for loading in disk cache, and for us to do any pinning we
-        #want to do.
-        #FIXME: This should subtract off the to_load current pin consumption.
-        total_ram_required[loaded_model.device] = total_ram_required.get(loaded_model.device, 0) + loaded_model.model_memory() * 2
+        device = loaded_model.device
+        total_memory_required[device] = total_memory_required.get(device, 0) + loaded_model.model_memory_required(device)
+        resident_memory, model_memory = loaded_model.model_mmap_residency()
+        pinned_memory = loaded_model.model.pinned_memory_size()
+        #FIXME: This can over-free the pins as it budgets to pin the entire model. We should
+        #make this JIT to keep as much pinned as possible.
+        pins_required = model_memory - pinned_memory
+        ram_required = model_memory - resident_memory
+        total_pins_required[device] = total_pins_required.get(device, 0) + pins_required
+        total_ram_required[device] = total_ram_required.get(device, 0) + ram_required

    for device in total_memory_required:
        if device != torch.device("cpu"):
-            free_memory(total_memory_required[device] * 1.1 + extra_mem, device, for_dynamic=free_for_dynamic, ram_required=total_ram_required[device])
+            free_memory(total_memory_required[device] * 1.1 + extra_mem,
+                        device,
+                        for_dynamic=free_for_dynamic,
+                        pins_required=total_pins_required[device],
+                        ram_required=total_ram_required[device])

    for device in total_memory_required:
        if device != torch.device("cpu"):
@@ -819,62 +836,25 @@ def loaded_models(only_currently_used=False):


 def cleanup_models_gc():
+    do_gc = False
+
    reset_cast_buffers()
-    if not _isolation_mode_enabled():
-        dead_found = False
-        for i in range(len(current_loaded_models)):
-            if current_loaded_models[i].is_dead():
-                dead_found = True
-                break

-        if dead_found:
-            logging.info("Potential memory leak detected with model NoneType, doing a full garbage collect, for maximum performance avoid circular references in the model code.")
-            gc.collect()
-            soft_empty_cache()
-
-            for i in range(len(current_loaded_models) - 1, -1, -1):
-                cur = current_loaded_models[i]
-                if cur.is_dead():
-                    logging.warning("WARNING, memory leak with model NoneType. Please make sure it is not being referenced from somewhere.")
-                    leaked = current_loaded_models.pop(i)
-                    model_obj = getattr(leaked, "model", None)
-                    if model_obj is not None:
-                        cleanup = getattr(model_obj, "cleanup", None)
-                        if callable(cleanup):
-                            cleanup()
-        return
-
-    dead_found = False
-    has_real_model_leak = False
    for i in range(len(current_loaded_models)):
-        model_ref_gone, real_model_ref_gone = current_loaded_models[i].dead_state()
-        if model_ref_gone or real_model_ref_gone:
-            dead_found = True
-            if real_model_ref_gone and not model_ref_gone:
-                has_real_model_leak = True
+        cur = current_loaded_models[i]
+        if cur.is_dead():
+            logging.info("Potential memory leak detected with model {}, doing a full garbage collect, for maximum performance avoid circular references in the model code.".format(cur.real_model().__class__.__name__))
+            do_gc = True
+            break

-    if dead_found:
-        if has_real_model_leak:
-            logging.info("Potential memory leak detected with model NoneType, doing a full garbage collect, for maximum performance avoid circular references in the model code.")
-        else:
-            logging.debug("Cleaning stale loaded-model entries with released patcher references.")
+    if do_gc:
        gc.collect()
        soft_empty_cache()

-        for i in range(len(current_loaded_models) - 1, -1, -1):
+        for i in range(len(current_loaded_models)):
            cur = current_loaded_models[i]
-            model_ref_gone, real_model_ref_gone = cur.dead_state()
-            if model_ref_gone or real_model_ref_gone:
-                if real_model_ref_gone and not model_ref_gone:
-                    logging.warning("WARNING, memory leak with model NoneType. Please make sure it is not being referenced from somewhere.")
-                else:
-                    logging.debug("Cleaning stale loaded-model entry with released patcher reference.")
-                leaked = current_loaded_models.pop(i)
-                model_obj = getattr(leaked, "model", None)
-                if model_obj is not None:
-                    cleanup = getattr(model_obj, "cleanup", None)
-                    if callable(cleanup):
-                        cleanup()
+            if cur.is_dead():
+                logging.warning("WARNING, memory leak with model {}. Please make sure it is not being referenced from somewhere.".format(cur.real_model().__class__.__name__))


 def archive_model_dtypes(model):
@@ -888,20 +868,11 @@ def archive_model_dtypes(model):
 def cleanup_models():
    to_delete = []
    for i in range(len(current_loaded_models)):
-        real_model_ref = current_loaded_models[i].real_model
-        if real_model_ref is None:
-            to_delete = [i] + to_delete
-            continue
-        if callable(real_model_ref) and real_model_ref() is None:
+        if current_loaded_models[i].real_model() is None:
            to_delete = [i] + to_delete

    for i in to_delete:
        x = current_loaded_models.pop(i)
-        model_obj = getattr(x, "model", None)
-        if model_obj is not None:
-            cleanup = getattr(model_obj, "cleanup", None)
-            if callable(cleanup):
-                cleanup()
        del x

 def dtype_size(dtype):
@@ -1082,6 +1053,12 @@ def intermediate_device():
    else:
        return torch.device("cpu")

+def intermediate_dtype():
+    if args.fp16_intermediates:
+        return torch.float16
+    else:
+        return torch.float32
+
 def vae_device():
    if args.cpu_vae:
        return torch.device("cpu")
@@ -1302,6 +1279,11 @@ def cast_to_gathered(tensors, r, non_blocking=False, stream=None):
            dest_view = dest_views.pop(0)
            if tensor is None:
                continue
+            if comfy.memory_management.read_tensor_file_slice_into(tensor, dest_view):
+                continue
+            storage = tensor._qdata.untyped_storage() if isinstance(tensor, comfy.quant_ops.QuantizedTensor) else tensor.untyped_storage()
+            if hasattr(storage, "_comfy_tensor_mmap_touched"):
+                storage._comfy_tensor_mmap_touched = True
            dest_view.copy_(tensor, non_blocking=non_blocking)


@@ -1344,9 +1326,9 @@ MAX_PINNED_MEMORY = -1
 if not args.disable_pinned_memory:
    if is_nvidia() or is_amd():
        if WINDOWS:
-            MAX_PINNED_MEMORY = get_total_memory(torch.device("cpu")) * 0.45  # Windows limit is apparently 50%
+            MAX_PINNED_MEMORY = get_total_memory(torch.device("cpu")) * 0.40  # Windows limit is apparently 50%
        else:
-            MAX_PINNED_MEMORY = get_total_memory(torch.device("cpu")) * 0.95
+            MAX_PINNED_MEMORY = get_total_memory(torch.device("cpu")) * 0.90
        logging.info("Enabled pinned memory {}".format(MAX_PINNED_MEMORY // (1024 * 1024)))

 PINNING_ALLOWED_TYPES = set(["Tensor", "Parameter", "QuantizedTensor"])
@@ -1421,8 +1403,6 @@ def unpin_memory(tensor):

    if torch.cuda.cudart().cudaHostUnregister(ptr) == 0:
        TOTAL_PINNED_MEMORY -= PINNED_MEMORY.pop(ptr)
-        if len(PINNED_MEMORY) == 0:
-            TOTAL_PINNED_MEMORY = 0
        return True
    else:
        logging.warning("Unpin error.")
@@ -1739,6 +1719,19 @@ def supports_nvfp4_compute(device=None):

    return True

+def supports_mxfp8_compute(device=None):
+    if not is_nvidia():
+        return False
+
+    if torch_version_numeric < (2, 10):
+        return False
+
+    props = torch.cuda.get_device_properties(device)
+    if props.major < 10:
+        return False
+
+    return True
+
 def extended_fp16_support():
    # TODO: check why some models work with fp16 on newer torch versions but not on older
    if torch_version_numeric < (2, 7):
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@@ -297,8 +297,8 @@ class ModelPatcher:
        self.size = comfy.model_management.module_size(self.model)
        return self.size

-    def get_ram_usage(self):
-        return self.model_size()
+    def model_mmap_residency(self, free=False):
+        return comfy.model_management.module_mmap_residency(self.model, free=free)

    def loaded_size(self):
        return self.model.model_loaded_weight_memory
@@ -1063,6 +1063,10 @@ class ModelPatcher:

            return self.model.model_loaded_weight_memory - current_used

+    def pinned_memory_size(self):
+        # Pinned memory pressure tracking is only implemented for DynamicVram loading
+        return 0
+
    def partially_unload_ram(self, ram_to_unload):
        pass

@@ -1653,6 +1657,16 @@ class ModelPatcherDynamic(ModelPatcher):

        return freed

+    def pinned_memory_size(self):
+        total = 0
+        loading = self._load_list(for_dynamic=True)
+        for x in loading:
+            _, _, _, _, m, _ = x
+            pin = comfy.pinned_memory.get_pin(m)
+            if pin is not None:
+                total += pin.numel() * pin.element_size()
+        return total
+
    def partially_unload_ram(self, ram_to_unload):
        loading = self._load_list(for_dynamic=True, default_device=self.offload_device)
        for x in loading:
--- a/comfy/ops.py
+++ b/comfy/ops.py
@@ -306,10 +306,40 @@ class CastWeightBiasOp:
    bias_function = []

 class disable_weight_init:
+    @staticmethod
+    def _lazy_load_from_state_dict(module, state_dict, prefix, local_metadata,
+                                   missing_keys, unexpected_keys, weight_shape,
+                                   bias_shape=None):
+        assign_to_params_buffers = local_metadata.get("assign_to_params_buffers", False)
+        prefix_len = len(prefix)
+        for k, v in state_dict.items():
+            key = k[prefix_len:]
+            if key == "weight":
+                if not assign_to_params_buffers:
+                    v = v.clone()
+                module.weight = torch.nn.Parameter(v, requires_grad=False)
+            elif bias_shape is not None and key == "bias" and v is not None:
+                if not assign_to_params_buffers:
+                    v = v.clone()
+                module.bias = torch.nn.Parameter(v, requires_grad=False)
+            else:
+                unexpected_keys.append(k)
+
+        if module.weight is None:
+            module.weight = torch.nn.Parameter(torch.zeros(weight_shape), requires_grad=False)
+            missing_keys.append(prefix + "weight")
+
+        if bias_shape is not None and module.bias is None and getattr(module, "comfy_need_lazy_init_bias", False):
+            module.bias = torch.nn.Parameter(torch.zeros(bias_shape), requires_grad=False)
+            missing_keys.append(prefix + "bias")
+
    class Linear(torch.nn.Linear, CastWeightBiasOp):

        def __init__(self, in_features, out_features, bias=True, device=None, dtype=None):
-            if not comfy.model_management.WINDOWS or not comfy.memory_management.aimdo_enabled:
+            # don't trust subclasses that BYO state dict loader to call us.
+            if (not comfy.model_management.WINDOWS
+                or not comfy.memory_management.aimdo_enabled
+                or type(self)._load_from_state_dict is not disable_weight_init.Linear._load_from_state_dict):
                super().__init__(in_features, out_features, bias, device, dtype)
                return

@@ -330,32 +360,21 @@ class disable_weight_init:
        def _load_from_state_dict(self, state_dict, prefix, local_metadata,
                                strict, missing_keys, unexpected_keys, error_msgs):

-            if not comfy.model_management.WINDOWS or not comfy.memory_management.aimdo_enabled:
+            if (not comfy.model_management.WINDOWS
+                or not comfy.memory_management.aimdo_enabled
+                or type(self)._load_from_state_dict is not disable_weight_init.Linear._load_from_state_dict):
                return super()._load_from_state_dict(state_dict, prefix, local_metadata, strict,
                                                     missing_keys, unexpected_keys, error_msgs)
-            assign_to_params_buffers = local_metadata.get("assign_to_params_buffers", False)
-            prefix_len = len(prefix)
-            for k,v in state_dict.items():
-                if k[prefix_len:] == "weight":
-                    if not assign_to_params_buffers:
-                        v = v.clone()
-                    self.weight = torch.nn.Parameter(v, requires_grad=False)
-                elif k[prefix_len:] == "bias" and v is not None:
-                    if not assign_to_params_buffers:
-                        v = v.clone()
-                    self.bias = torch.nn.Parameter(v, requires_grad=False)
-                else:
-                    unexpected_keys.append(k)
-
-            #Reconcile default construction of the weight if its missing.
-            if self.weight is None:
-                v = torch.zeros(self.in_features, self.out_features)
-                self.weight = torch.nn.Parameter(v, requires_grad=False)
-                missing_keys.append(prefix+"weight")
-            if self.bias is None and self.comfy_need_lazy_init_bias:
-                v = torch.zeros(self.out_features,)
-                self.bias = torch.nn.Parameter(v, requires_grad=False)
-                missing_keys.append(prefix+"bias")
+            disable_weight_init._lazy_load_from_state_dict(
+                self,
+                state_dict,
+                prefix,
+                local_metadata,
+                missing_keys,
+                unexpected_keys,
+                weight_shape=(self.in_features, self.out_features),
+                bias_shape=(self.out_features,),
+            )


        def reset_parameters(self):
@@ -547,6 +566,53 @@ class disable_weight_init:
                return super().forward(*args, **kwargs)

    class Embedding(torch.nn.Embedding, CastWeightBiasOp):
+        def __init__(self, num_embeddings, embedding_dim, padding_idx=None, max_norm=None,
+                     norm_type=2.0, scale_grad_by_freq=False, sparse=False, _weight=None,
+                     _freeze=False, device=None, dtype=None):
+            # don't trust subclasses that BYO state dict loader to call us.
+            if (not comfy.model_management.WINDOWS
+                or not comfy.memory_management.aimdo_enabled
+                or type(self)._load_from_state_dict is not disable_weight_init.Embedding._load_from_state_dict):
+                super().__init__(num_embeddings, embedding_dim, padding_idx, max_norm,
+                                 norm_type, scale_grad_by_freq, sparse, _weight,
+                                 _freeze, device, dtype)
+                return
+
+            torch.nn.Module.__init__(self)
+            self.num_embeddings = num_embeddings
+            self.embedding_dim = embedding_dim
+            self.padding_idx = padding_idx
+            self.max_norm = max_norm
+            self.norm_type = norm_type
+            self.scale_grad_by_freq = scale_grad_by_freq
+            self.sparse = sparse
+            # Keep shape/dtype visible for module introspection without reserving storage.
+            embedding_dtype = dtype if dtype is not None else torch.get_default_dtype()
+            self.weight = torch.nn.Parameter(
+                torch.empty((num_embeddings, embedding_dim), device="meta", dtype=embedding_dtype),
+                requires_grad=False,
+            )
+            self.bias = None
+            self.weight_comfy_model_dtype = dtype
+
+        def _load_from_state_dict(self, state_dict, prefix, local_metadata,
+                                strict, missing_keys, unexpected_keys, error_msgs):
+
+            if (not comfy.model_management.WINDOWS
+                or not comfy.memory_management.aimdo_enabled
+                or type(self)._load_from_state_dict is not disable_weight_init.Embedding._load_from_state_dict):
+                return super()._load_from_state_dict(state_dict, prefix, local_metadata, strict,
+                                                     missing_keys, unexpected_keys, error_msgs)
+            disable_weight_init._lazy_load_from_state_dict(
+                self,
+                state_dict,
+                prefix,
+                local_metadata,
+                missing_keys,
+                unexpected_keys,
+                weight_shape=(self.num_embeddings, self.embedding_dim),
+            )
+
        def reset_parameters(self):
            self.bias = None
            return None
@@ -710,6 +776,104 @@ from .quant_ops import (
 )


+class QuantLinearFunc(torch.autograd.Function):
+    """Custom autograd function for quantized linear: quantized forward, optionally FP8 backward.
+
+    When training_fp8_bwd is enabled:
+      - Forward: quantize input per layout (FP8/NVFP4), use quantized matmul
+      - Backward: all matmuls use FP8 tensor cores via torch.mm dispatch
+      - Cached input is FP8 (half the memory of bf16)
+
+    When training_fp8_bwd is disabled:
+      - Forward: quantize input per layout, use quantized matmul
+      - Backward: dequantize weight to compute_dtype, use standard matmul
+    """
+
+    @staticmethod
+    def forward(ctx, input_float, weight, bias, layout_type, input_scale, compute_dtype):
+        input_shape = input_float.shape
+        inp = input_float.detach().flatten(0, -2)  # zero-cost view to 2D
+
+        # Quantize input for forward (same layout as weight)
+        if layout_type is not None:
+            q_input = QuantizedTensor.from_float(inp, layout_type, scale=input_scale)
+        else:
+            q_input = inp
+
+        w = weight.detach() if weight.requires_grad else weight
+        b = bias.detach() if bias is not None and bias.requires_grad else bias
+
+        output = torch.nn.functional.linear(q_input, w, b)
+
+        # Unflatten output to match original input shape
+        if len(input_shape) > 2:
+            output = output.unflatten(0, input_shape[:-1])
+
+        # Save for backward
+        ctx.input_shape = input_shape
+        ctx.has_bias = bias is not None
+        ctx.compute_dtype = compute_dtype
+        ctx.weight_requires_grad = weight.requires_grad
+        ctx.fp8_bwd = comfy.model_management.training_fp8_bwd
+
+        if ctx.fp8_bwd:
+            # Cache FP8 quantized input — half the memory of bf16
+            if isinstance(q_input, QuantizedTensor) and layout_type.startswith('TensorCoreFP8'):
+                ctx.q_input = q_input  # already FP8, reuse
+            else:
+                # NVFP4 or other layout — quantize input to FP8 for backward
+                ctx.q_input = QuantizedTensor.from_float(inp, "TensorCoreFP8E4M3Layout")
+            ctx.save_for_backward(weight)
+        else:
+            ctx.q_input = None
+            ctx.save_for_backward(input_float, weight)
+
+        return output
+
+    @staticmethod
+    @torch.autograd.function.once_differentiable
+    def backward(ctx, grad_output):
+        compute_dtype = ctx.compute_dtype
+        grad_2d = grad_output.flatten(0, -2).to(compute_dtype)
+
+        # Value casting — only difference between fp8 and non-fp8 paths
+        if ctx.fp8_bwd:
+            weight, = ctx.saved_tensors
+            # Wrap as FP8 QuantizedTensors → torch.mm dispatches to _scaled_mm
+            grad_mm = QuantizedTensor.from_float(grad_2d, "TensorCoreFP8E5M2Layout")
+            if isinstance(weight, QuantizedTensor) and weight._layout_cls.startswith("TensorCoreFP8"):
+                weight_mm = weight
+            elif isinstance(weight, QuantizedTensor):
+                weight_mm = QuantizedTensor.from_float(weight.dequantize().to(compute_dtype), "TensorCoreFP8E4M3Layout")
+            else:
+                weight_mm = QuantizedTensor.from_float(weight.to(compute_dtype), "TensorCoreFP8E4M3Layout")
+            input_mm = ctx.q_input
+        else:
+            input_float, weight = ctx.saved_tensors
+            # Standard tensors → torch.mm does regular matmul
+            grad_mm = grad_2d
+            if isinstance(weight, QuantizedTensor):
+                weight_mm = weight.dequantize().to(compute_dtype)
+            else:
+                weight_mm = weight.to(compute_dtype)
+            input_mm = input_float.flatten(0, -2).to(compute_dtype) if ctx.weight_requires_grad else None
+
+        # Computation — same for both paths, dispatch handles the rest
+        grad_input = torch.mm(grad_mm, weight_mm)
+        if len(ctx.input_shape) > 2:
+            grad_input = grad_input.unflatten(0, ctx.input_shape[:-1])
+
+        grad_weight = None
+        if ctx.weight_requires_grad:
+            grad_weight = torch.mm(grad_mm.t(), input_mm)
+
+        grad_bias = None
+        if ctx.has_bias:
+            grad_bias = grad_2d.sum(dim=0)
+
+        return grad_input, grad_weight, grad_bias, None, None, None
+
+
 def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_precision_mm=False, disabled=[]):
    class MixedPrecisionOps(manual_cast):
        _quant_config = quant_config
@@ -764,6 +928,7 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
                weight = state_dict.pop(weight_key, None)
                if weight is None:
                    logging.warning(f"Missing weight for layer {layer_name}")
+                    self.weight = None
                    return

                manually_loaded_keys = [weight_key]
@@ -801,6 +966,22 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
                            orig_shape=(self.out_features, self.in_features),
                        )

+                    elif self.quant_format == "mxfp8":
+                        # MXFP8: E8M0 block scales stored as uint8 in safetensors
+                        block_scale = self._load_scale_param(state_dict, prefix, "weight_scale", device, manually_loaded_keys,
+                                                             dtype=torch.uint8)
+
+                        if block_scale is None:
+                            raise ValueError(f"Missing MXFP8 block scales for layer {layer_name}")
+
+                        block_scale = block_scale.view(torch.float8_e8m0fnu)
+
+                        params = layout_cls.Params(
+                            scale=block_scale,
+                            orig_dtype=MixedPrecisionOps._compute_dtype,
+                            orig_shape=(self.out_features, self.in_features),
+                        )
+
                    elif self.quant_format == "nvfp4":
                        # NVFP4: tensor_scale (weight_scale_2) + block_scale (weight_scale)
                        tensor_scale = self._load_scale_param(state_dict, prefix, "weight_scale_2", device, manually_loaded_keys)
@@ -854,6 +1035,9 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
                if self.bias is not None:
                    sd["{}bias".format(prefix)] = self.bias

+                if self.weight is None:
+                    return sd
+
                if isinstance(self.weight, QuantizedTensor):
                    sd_out = self.weight.state_dict("{}weight".format(prefix))
                    for k in sd_out:
@@ -888,10 +1072,37 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
                #If cast needs to apply lora, it should be done in the compute dtype
                compute_dtype = input.dtype

-                if (getattr(self, 'layout_type', None) is not None and
+                _use_quantized = (
+                    getattr(self, 'layout_type', None) is not None and
                    not isinstance(input, QuantizedTensor) and not self._full_precision_mm and
                    not getattr(self, 'comfy_force_cast_weights', False) and
-                    len(self.weight_function) == 0 and len(self.bias_function) == 0):
+                    len(self.weight_function) == 0 and len(self.bias_function) == 0
+                )
+
+                # Training path: quantized forward with compute_dtype backward via autograd function
+                if (input.requires_grad and _use_quantized):
+
+                    weight, bias, offload_stream = cast_bias_weight(
+                        self,
+                        input,
+                        offloadable=True,
+                        compute_dtype=compute_dtype,
+                        want_requant=True
+                    )
+
+                    scale = getattr(self, 'input_scale', None)
+                    if scale is not None:
+                        scale = comfy.model_management.cast_to_device(scale, input.device, None)
+
+                    output = QuantLinearFunc.apply(
+                        input, weight, bias, self.layout_type, scale, compute_dtype
+                    )
+
+                    uncast_bias_weight(self, weight, bias, offload_stream)
+                    return output
+
+                # Inference path (unchanged)
+                if _use_quantized:

                    # Reshape 3D tensors to 2D for quantization (needed for NVFP4 and others)
                    input_reshaped = input.reshape(-1, input_shape[2]) if input.ndim == 3 else input
@@ -939,7 +1150,10 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
                for key, param in self._parameters.items():
                    if param is None:
                        continue
-                    self.register_parameter(key, torch.nn.Parameter(fn(param), requires_grad=False))
+                    p = fn(param)
+                    if p.is_inference():
+                        p = p.clone()
+                    self.register_parameter(key, torch.nn.Parameter(p, requires_grad=False))
                for key, buf in self._buffers.items():
                    if buf is not None:
                        self._buffers[key] = fn(buf)
@@ -950,12 +1164,15 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
 def pick_operations(weight_dtype, compute_dtype, load_device=None, disable_fast_fp8=False, fp8_optimizations=False, model_config=None):
    fp8_compute = comfy.model_management.supports_fp8_compute(load_device) # TODO: if we support more ops this needs to be more granular
    nvfp4_compute = comfy.model_management.supports_nvfp4_compute(load_device)
+    mxfp8_compute = comfy.model_management.supports_mxfp8_compute(load_device)

    if model_config and hasattr(model_config, 'quant_config') and model_config.quant_config:
        logging.info("Using mixed precision operations")
        disabled = set()
        if not nvfp4_compute:
            disabled.add("nvfp4")
+        if not mxfp8_compute:
+            disabled.add("mxfp8")
        if not fp8_compute:
            disabled.add("float8_e4m3fn")
            disabled.add("float8_e5m2")
--- a/comfy/pinned_memory.py
+++ b/comfy/pinned_memory.py
@@ -1,6 +1,8 @@
-import torch
 import comfy.model_management
 import comfy.memory_management
+import comfy_aimdo.host_buffer
+import comfy_aimdo.torch
+import psutil

 from comfy.cli_args import args

@@ -11,19 +13,37 @@ def pin_memory(module):
    if module.pin_failed or args.disable_pinned_memory or get_pin(module) is not None:
        return
    #FIXME: This is a RAM cache trigger event
+    ram_headroom = comfy.memory_management.RAM_CACHE_HEADROOM
+    #we split the difference and assume half the RAM cache headroom is for us
+    if ram_headroom > 0 and psutil.virtual_memory().available < (ram_headroom * 0.5):
+        comfy.memory_management.extra_ram_release(ram_headroom)
+
    size = comfy.memory_management.vram_aligned_size([ module.weight, module.bias ])
-    pin = torch.empty((size,), dtype=torch.uint8)
-    if comfy.model_management.pin_memory(pin):
-        module._pin = pin
-    else:
+
+    if comfy.model_management.MAX_PINNED_MEMORY <= 0 or (comfy.model_management.TOTAL_PINNED_MEMORY + size) > comfy.model_management.MAX_PINNED_MEMORY:
        module.pin_failed = True
        return False
+
+    try:
+        hostbuf = comfy_aimdo.host_buffer.HostBuffer(size)
+    except RuntimeError:
+        module.pin_failed = True
+        return False
+
+    module._pin = comfy_aimdo.torch.hostbuf_to_tensor(hostbuf)
+    module._pin_hostbuf = hostbuf
+    comfy.model_management.TOTAL_PINNED_MEMORY += size
    return True

 def unpin_memory(module):
    if get_pin(module) is None:
        return 0
    size = module._pin.numel() * module._pin.element_size()
-    comfy.model_management.unpin_memory(module._pin)
+
+    comfy.model_management.TOTAL_PINNED_MEMORY -= size
+    if comfy.model_management.TOTAL_PINNED_MEMORY < 0:
+        comfy.model_management.TOTAL_PINNED_MEMORY = 0
+
    del module._pin
+    del module._pin_hostbuf
    return size
--- a/comfy/quant_ops.py
+++ b/comfy/quant_ops.py
@@ -43,6 +43,18 @@ except ImportError as e:
    def get_layout_class(name):
        return None

+_CK_MXFP8_AVAILABLE = False
+if _CK_AVAILABLE:
+    try:
+        from comfy_kitchen.tensor import TensorCoreMXFP8Layout as _CKMxfp8Layout
+        _CK_MXFP8_AVAILABLE = True
+    except ImportError:
+        logging.warning("comfy_kitchen does not support MXFP8, please update comfy_kitchen.")
+
+if not _CK_MXFP8_AVAILABLE:
+    class _CKMxfp8Layout:
+        pass
+
 import comfy.float

 # ==============================================================================
@@ -84,6 +96,31 @@ class _TensorCoreFP8LayoutBase(_CKFp8Layout):
        return qdata, params


+class TensorCoreMXFP8Layout(_CKMxfp8Layout):
+    @classmethod
+    def quantize(cls, tensor, scale=None, stochastic_rounding=0, inplace_ops=False):
+        if tensor.dim() != 2:
+            raise ValueError(f"MXFP8 requires 2D tensor, got {tensor.dim()}D")
+
+        orig_dtype = tensor.dtype
+        orig_shape = tuple(tensor.shape)
+
+        padded_shape = cls.get_padded_shape(orig_shape)
+        needs_padding = padded_shape != orig_shape
+
+        if stochastic_rounding > 0:
+            qdata, block_scale = comfy.float.stochastic_round_quantize_mxfp8_by_block(tensor, pad_32x=needs_padding, seed=stochastic_rounding)
+        else:
+            qdata, block_scale = ck.quantize_mxfp8(tensor, pad_32x=needs_padding)
+
+        params = cls.Params(
+            scale=block_scale,
+            orig_dtype=orig_dtype,
+            orig_shape=orig_shape,
+        )
+        return qdata, params
+
+
 class TensorCoreNVFP4Layout(_CKNvfp4Layout):
    @classmethod
    def quantize(cls, tensor, scale=None, stochastic_rounding=0, inplace_ops=False):
@@ -137,6 +174,8 @@ register_layout_class("TensorCoreFP8Layout", TensorCoreFP8Layout)
 register_layout_class("TensorCoreFP8E4M3Layout", TensorCoreFP8E4M3Layout)
 register_layout_class("TensorCoreFP8E5M2Layout", TensorCoreFP8E5M2Layout)
 register_layout_class("TensorCoreNVFP4Layout", TensorCoreNVFP4Layout)
+if _CK_MXFP8_AVAILABLE:
+    register_layout_class("TensorCoreMXFP8Layout", TensorCoreMXFP8Layout)

 QUANT_ALGOS = {
    "float8_e4m3fn": {
@@ -157,6 +196,14 @@ QUANT_ALGOS = {
    },
 }

+if _CK_MXFP8_AVAILABLE:
+    QUANT_ALGOS["mxfp8"] = {
+        "storage_t": torch.float8_e4m3fn,
+        "parameters": {"weight_scale", "input_scale"},
+        "comfy_tensor_layout": "TensorCoreMXFP8Layout",
+        "group_size": 32,
+    }
+

 # ==============================================================================
 # Re-exports for backward compatibility
--- a/comfy/sample.py
+++ b/comfy/sample.py
@@ -8,12 +8,12 @@ import comfy.nested_tensor

 def prepare_noise_inner(latent_image, generator, noise_inds=None):
    if noise_inds is None:
-        return torch.randn(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, generator=generator, device="cpu")
+        return torch.randn(latent_image.size(), dtype=torch.float32, layout=latent_image.layout, generator=generator, device="cpu").to(dtype=latent_image.dtype)

    unique_inds, inverse = np.unique(noise_inds, return_inverse=True)
    noises = []
    for i in range(unique_inds[-1]+1):
-        noise = torch.randn([1] + list(latent_image.size())[1:], dtype=latent_image.dtype, layout=latent_image.layout, generator=generator, device="cpu")
+        noise = torch.randn([1] + list(latent_image.size())[1:], dtype=torch.float32, layout=latent_image.layout, generator=generator, device="cpu").to(dtype=latent_image.dtype)
        if i in unique_inds:
            noises.append(noise)
    noises = [noises[i] for i in inverse]
@@ -64,10 +64,10 @@ def sample(model, noise, steps, cfg, sampler_name, scheduler, positive, negative
    sampler = comfy.samplers.KSampler(model, steps=steps, device=model.load_device, sampler=sampler_name, scheduler=scheduler, denoise=denoise, model_options=model.model_options)

    samples = sampler.sample(noise, positive, negative, cfg=cfg, latent_image=latent_image, start_step=start_step, last_step=last_step, force_full_denoise=force_full_denoise, denoise_mask=noise_mask, sigmas=sigmas, callback=callback, disable_pbar=disable_pbar, seed=seed)
-    samples = samples.to(comfy.model_management.intermediate_device())
+    samples = samples.to(device=comfy.model_management.intermediate_device(), dtype=comfy.model_management.intermediate_dtype())
    return samples

 def sample_custom(model, noise, cfg, sampler, sigmas, positive, negative, latent_image, noise_mask=None, callback=None, disable_pbar=False, seed=None):
    samples = comfy.samplers.sample(model, noise, positive, negative, cfg, model.load_device, sampler, sigmas, model_options=model.model_options, latent_image=latent_image, denoise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar, seed=seed)
-    samples = samples.to(comfy.model_management.intermediate_device())
+    samples = samples.to(device=comfy.model_management.intermediate_device(), dtype=comfy.model_management.intermediate_dtype())
    return samples
--- a/comfy/samplers.py
+++ b/comfy/samplers.py
@@ -11,14 +11,12 @@ from functools import partial
 import collections
 import math
 import logging
-import os
 import comfy.sampler_helpers
 import comfy.model_patcher
 import comfy.patcher_extension
 import comfy.hooks
 import comfy.context_windows
 import comfy.utils
-from comfy.cli_args import args
 import scipy.stats
 import numpy

@@ -212,11 +210,9 @@ def _calc_cond_batch_outer(model: BaseModel, conds: list[list[dict]], x_in: torc
        _calc_cond_batch,
        comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.CALC_COND_BATCH, model_options, is_model_options=True)
    )
-    result = executor.execute(model, conds, x_in, timestep, model_options)
-    return result
+    return executor.execute(model, conds, x_in, timestep, model_options)

 def _calc_cond_batch(model: BaseModel, conds: list[list[dict]], x_in: torch.Tensor, timestep, model_options):
-    isolation_active = args.use_process_isolation or os.environ.get("PYISOLATE_CHILD") == "1"
    out_conds = []
    out_counts = []
    # separate conds by matching hooks
@@ -273,8 +269,7 @@ def _calc_cond_batch(model: BaseModel, conds: list[list[dict]], x_in: torch.Tens
                    for k, v in to_run[tt][0].conditioning.items():
                        cond_shapes[k].append(v.size())

-                memory_required = model.memory_required(input_shape, cond_shapes=cond_shapes)
-                if memory_required * 1.5 < free_memory:
+                if model.memory_required(input_shape, cond_shapes=cond_shapes) * 1.5 < free_memory:
                    to_batch = batch_amount
                    break

@@ -299,17 +294,9 @@ def _calc_cond_batch(model: BaseModel, conds: list[list[dict]], x_in: torch.Tens
                patches = p.patches

            batch_chunks = len(cond_or_uncond)
-            if isolation_active:
-                target_device = model.load_device if hasattr(model, "load_device") else input_x[0].device
-                input_x = torch.cat(input_x).to(target_device)
-            else:
-                input_x = torch.cat(input_x)
+            input_x = torch.cat(input_x)
            c = cond_cat(c)
-            if isolation_active:
-                timestep_ = torch.cat([timestep] * batch_chunks).to(target_device)
-                mult = [m.to(target_device) if hasattr(m, "to") else m for m in mult]
-            else:
-                timestep_ = torch.cat([timestep] * batch_chunks)
+            timestep_ = torch.cat([timestep] * batch_chunks)

            transformer_options = model.current_patcher.apply_hooks(hooks=hooks)
            if 'transformer_options' in model_options:
@@ -340,17 +327,9 @@ def _calc_cond_batch(model: BaseModel, conds: list[list[dict]], x_in: torch.Tens
            for o in range(batch_chunks):
                cond_index = cond_or_uncond[o]
                a = area[o]
-                out_t = output[o]
-                mult_t = mult[o]
-                if isolation_active:
-                    target_dev = out_conds[cond_index].device
-                    if hasattr(out_t, "device") and out_t.device != target_dev:
-                        out_t = out_t.to(target_dev)
-                    if hasattr(mult_t, "device") and mult_t.device != target_dev:
-                        mult_t = mult_t.to(target_dev)
                if a is None:
-                    out_conds[cond_index] += out_t * mult_t
-                    out_counts[cond_index] += mult_t
+                    out_conds[cond_index] += output[o] * mult[o]
+                    out_counts[cond_index] += mult[o]
                else:
                    out_c = out_conds[cond_index]
                    out_cts = out_counts[cond_index]
@@ -358,8 +337,8 @@ def _calc_cond_batch(model: BaseModel, conds: list[list[dict]], x_in: torch.Tens
                    for i in range(dims):
                        out_c = out_c.narrow(i + 2, a[i + dims], a[i])
                        out_cts = out_cts.narrow(i + 2, a[i + dims], a[i])
-                    out_c += out_t * mult_t
-                    out_cts += mult_t
+                    out_c += output[o] * mult[o]
+                    out_cts += mult[o]

    for i in range(len(out_conds)):
        out_conds[i] /= out_counts[i]
@@ -413,31 +392,14 @@ class KSamplerX0Inpaint:
        self.inner_model = model
        self.sigmas = sigmas
    def __call__(self, x, sigma, denoise_mask, model_options={}, seed=None):
-        isolation_active = args.use_process_isolation or os.environ.get("PYISOLATE_CHILD") == "1"
        if denoise_mask is not None:
-            if isolation_active and denoise_mask.device != x.device:
-                denoise_mask = denoise_mask.to(x.device)
            if "denoise_mask_function" in model_options:
                denoise_mask = model_options["denoise_mask_function"](sigma, denoise_mask, extra_options={"model": self.inner_model, "sigmas": self.sigmas})
            latent_mask = 1. - denoise_mask
-            if isolation_active:
-                latent_image = self.latent_image
-                if hasattr(latent_image, "device") and latent_image.device != x.device:
-                    latent_image = latent_image.to(x.device)
-                scaled = self.inner_model.inner_model.scale_latent_inpaint(x=x, sigma=sigma, noise=self.noise, latent_image=latent_image)
-                if hasattr(scaled, "device") and scaled.device != x.device:
-                    scaled = scaled.to(x.device)
-            else:
-                scaled = self.inner_model.inner_model.scale_latent_inpaint(
-                    x=x, sigma=sigma, noise=self.noise, latent_image=self.latent_image
-                )
-            x = x * denoise_mask + scaled * latent_mask
+            x = x * denoise_mask + self.inner_model.inner_model.scale_latent_inpaint(x=x, sigma=sigma, noise=self.noise, latent_image=self.latent_image) * latent_mask
        out = self.inner_model(x, sigma, model_options=model_options, seed=seed)
        if denoise_mask is not None:
-            latent_image = self.latent_image
-            if isolation_active and hasattr(latent_image, "device") and latent_image.device != out.device:
-                latent_image = latent_image.to(out.device)
-            out = out * denoise_mask + latent_image * latent_mask
+            out = out * denoise_mask + self.latent_image * latent_mask
        return out

 def simple_scheduler(model_sampling, steps):
@@ -779,11 +741,7 @@ class KSAMPLER(Sampler):
        else:
            model_k.noise = noise

-        max_denoise = self.max_denoise(model_wrap, sigmas)
-        model_sampling = model_wrap.inner_model.model_sampling
-        noise = model_sampling.noise_scaling(
-            sigmas[0], noise, latent_image, max_denoise
-        )
+        noise = model_wrap.inner_model.model_sampling.noise_scaling(sigmas[0], noise, latent_image, self.max_denoise(model_wrap, sigmas))

        k_callback = None
        total_steps = len(sigmas) - 1
@@ -1027,8 +985,8 @@ class CFGGuider:
        self.inner_model, self.conds, self.loaded_models = comfy.sampler_helpers.prepare_sampling(self.model_patcher, noise.shape, self.conds, self.model_options)
        device = self.model_patcher.load_device

-        noise = noise.to(device)
-        latent_image = latent_image.to(device)
+        noise = noise.to(device=device, dtype=torch.float32)
+        latent_image = latent_image.to(device=device, dtype=torch.float32)
        sigmas = sigmas.to(device)
        cast_to_load_options(self.model_options, device=device, dtype=self.model_patcher.model_dtype())

@@ -1070,6 +1028,7 @@ class CFGGuider:
                denoise_mask, _ = comfy.utils.pack_latents(denoise_masks)
            else:
                denoise_mask = denoise_masks[0]
+            denoise_mask = denoise_mask.float()

        self.conds = {}
        for k in self.original_conds:
--- a/comfy/sd.py
+++ b/comfy/sd.py
@@ -61,6 +61,7 @@ import comfy.text_encoders.newbie
 import comfy.text_encoders.anima
 import comfy.text_encoders.ace15
 import comfy.text_encoders.longcat_image
+import comfy.text_encoders.qwen35

 import comfy.model_patcher
 import comfy.lora
@@ -279,9 +280,6 @@ class CLIP:
        n.apply_hooks_to_conds = self.apply_hooks_to_conds
        return n

-    def get_ram_usage(self):
-        return self.patcher.get_ram_usage()
-
    def add_patches(self, patches, strength_patch=1.0, strength_model=1.0):
        return self.patcher.add_patches(patches, strength_patch, strength_model)

@@ -425,13 +423,13 @@ class CLIP:
    def get_key_patches(self):
        return self.patcher.get_key_patches()

-    def generate(self, tokens, do_sample=True, max_length=256, temperature=1.0, top_k=50, top_p=0.95, min_p=0.0, repetition_penalty=1.0, seed=None):
+    def generate(self, tokens, do_sample=True, max_length=256, temperature=1.0, top_k=50, top_p=0.95, min_p=0.0, repetition_penalty=1.0, seed=None, presence_penalty=0.0):
        self.cond_stage_model.reset_clip_options()

        self.load_model(tokens)
        self.cond_stage_model.set_clip_options({"layer": None})
        self.cond_stage_model.set_clip_options({"execution_device": self.patcher.load_device})
-        return self.cond_stage_model.generate(tokens, do_sample=do_sample, max_length=max_length, temperature=temperature, top_k=top_k, top_p=top_p, min_p=min_p, repetition_penalty=repetition_penalty, seed=seed)
+        return self.cond_stage_model.generate(tokens, do_sample=do_sample, max_length=max_length, temperature=temperature, top_k=top_k, top_p=top_p, min_p=min_p, repetition_penalty=repetition_penalty, seed=seed, presence_penalty=presence_penalty)

    def decode(self, token_ids, skip_special_tokens=True):
        return self.tokenizer.decode(token_ids, skip_special_tokens=skip_special_tokens)
@@ -455,7 +453,7 @@ class VAE:
        self.output_channels = 3
        self.pad_channel_value = None
        self.process_input = lambda image: image * 2.0 - 1.0
-        self.process_output = lambda image: torch.clamp((image + 1.0) / 2.0, min=0.0, max=1.0)
+        self.process_output = lambda image: image.add_(1.0).div_(2.0).clamp_(0.0, 1.0)
        self.working_dtypes = [torch.bfloat16, torch.float32]
        self.disable_offload = False
        self.not_video = False
@@ -839,9 +837,6 @@ class VAE:
        self.size = comfy.model_management.module_size(self.first_stage_model)
        return self.size

-    def get_ram_usage(self):
-        return self.model_size()
-
    def throw_exception_if_invalid(self):
        if self.first_stage_model is None:
            raise RuntimeError("ERROR: VAE is invalid: None\n\nIf the VAE is from a checkpoint loader node your checkpoint does not contain a valid VAE.")
@@ -871,13 +866,16 @@ class VAE:
                pixels = torch.nn.functional.pad(pixels, (0, self.output_channels - pixels.shape[-1]), mode=mode, value=value)
        return pixels

+    def vae_output_dtype(self):
+        return model_management.intermediate_dtype()
+
    def decode_tiled_(self, samples, tile_x=64, tile_y=64, overlap = 16):
        steps = samples.shape[0] * comfy.utils.get_tiled_scale_steps(samples.shape[3], samples.shape[2], tile_x, tile_y, overlap)
        steps += samples.shape[0] * comfy.utils.get_tiled_scale_steps(samples.shape[3], samples.shape[2], tile_x // 2, tile_y * 2, overlap)
        steps += samples.shape[0] * comfy.utils.get_tiled_scale_steps(samples.shape[3], samples.shape[2], tile_x * 2, tile_y // 2, overlap)
        pbar = comfy.utils.ProgressBar(steps)

-        decode_fn = lambda a: self.first_stage_model.decode(a.to(self.vae_dtype).to(self.device)).float()
+        decode_fn = lambda a: self.first_stage_model.decode(a.to(self.vae_dtype).to(self.device)).to(dtype=self.vae_output_dtype())
        output = self.process_output(
            (comfy.utils.tiled_scale(samples, decode_fn, tile_x // 2, tile_y * 2, overlap, upscale_amount = self.upscale_ratio, output_device=self.output_device, pbar = pbar) +
            comfy.utils.tiled_scale(samples, decode_fn, tile_x * 2, tile_y // 2, overlap, upscale_amount = self.upscale_ratio, output_device=self.output_device, pbar = pbar) +
@@ -887,16 +885,16 @@ class VAE:

    def decode_tiled_1d(self, samples, tile_x=256, overlap=32):
        if samples.ndim == 3:
-            decode_fn = lambda a: self.first_stage_model.decode(a.to(self.vae_dtype).to(self.device)).float()
+            decode_fn = lambda a: self.first_stage_model.decode(a.to(self.vae_dtype).to(self.device)).to(dtype=self.vae_output_dtype())
        else:
            og_shape = samples.shape
            samples = samples.reshape((og_shape[0], og_shape[1] * og_shape[2], -1))
-            decode_fn = lambda a: self.first_stage_model.decode(a.reshape((-1, og_shape[1], og_shape[2], a.shape[-1])).to(self.vae_dtype).to(self.device)).float()
+            decode_fn = lambda a: self.first_stage_model.decode(a.reshape((-1, og_shape[1], og_shape[2], a.shape[-1])).to(self.vae_dtype).to(self.device)).to(dtype=self.vae_output_dtype())

        return self.process_output(comfy.utils.tiled_scale_multidim(samples, decode_fn, tile=(tile_x,), overlap=overlap, upscale_amount=self.upscale_ratio, out_channels=self.output_channels, output_device=self.output_device))

    def decode_tiled_3d(self, samples, tile_t=999, tile_x=32, tile_y=32, overlap=(1, 8, 8)):
-        decode_fn = lambda a: self.first_stage_model.decode(a.to(self.vae_dtype).to(self.device)).float()
+        decode_fn = lambda a: self.first_stage_model.decode(a.to(self.vae_dtype).to(self.device)).to(dtype=self.vae_output_dtype())
        return self.process_output(comfy.utils.tiled_scale_multidim(samples, decode_fn, tile=(tile_t, tile_x, tile_y), overlap=overlap, upscale_amount=self.upscale_ratio, out_channels=self.output_channels, index_formulas=self.upscale_index_formula, output_device=self.output_device))

    def encode_tiled_(self, pixel_samples, tile_x=512, tile_y=512, overlap = 64):
@@ -905,7 +903,7 @@ class VAE:
        steps += pixel_samples.shape[0] * comfy.utils.get_tiled_scale_steps(pixel_samples.shape[3], pixel_samples.shape[2], tile_x * 2, tile_y // 2, overlap)
        pbar = comfy.utils.ProgressBar(steps)

-        encode_fn = lambda a: self.first_stage_model.encode((self.process_input(a)).to(self.vae_dtype).to(self.device)).float()
+        encode_fn = lambda a: self.first_stage_model.encode((self.process_input(a)).to(self.vae_dtype).to(self.device)).to(dtype=self.vae_output_dtype())
        samples = comfy.utils.tiled_scale(pixel_samples, encode_fn, tile_x, tile_y, overlap, upscale_amount = (1/self.downscale_ratio), out_channels=self.latent_channels, output_device=self.output_device, pbar=pbar)
        samples += comfy.utils.tiled_scale(pixel_samples, encode_fn, tile_x * 2, tile_y // 2, overlap, upscale_amount = (1/self.downscale_ratio), out_channels=self.latent_channels, output_device=self.output_device, pbar=pbar)
        samples += comfy.utils.tiled_scale(pixel_samples, encode_fn, tile_x // 2, tile_y * 2, overlap, upscale_amount = (1/self.downscale_ratio), out_channels=self.latent_channels, output_device=self.output_device, pbar=pbar)
@@ -914,7 +912,7 @@ class VAE:

    def encode_tiled_1d(self, samples, tile_x=256 * 2048, overlap=64 * 2048):
        if self.latent_dim == 1:
-            encode_fn = lambda a: self.first_stage_model.encode((self.process_input(a)).to(self.vae_dtype).to(self.device)).float()
+            encode_fn = lambda a: self.first_stage_model.encode((self.process_input(a)).to(self.vae_dtype).to(self.device)).to(dtype=self.vae_output_dtype())
            out_channels = self.latent_channels
            upscale_amount = 1 / self.downscale_ratio
        else:
@@ -923,7 +921,7 @@ class VAE:
            tile_x = tile_x // extra_channel_size
            overlap = overlap // extra_channel_size
            upscale_amount = 1 / self.downscale_ratio
-            encode_fn = lambda a: self.first_stage_model.encode((self.process_input(a)).to(self.vae_dtype).to(self.device)).reshape(1, out_channels, -1).float()
+            encode_fn = lambda a: self.first_stage_model.encode((self.process_input(a)).to(self.vae_dtype).to(self.device)).reshape(1, out_channels, -1).to(dtype=self.vae_output_dtype())

        out = comfy.utils.tiled_scale_multidim(samples, encode_fn, tile=(tile_x,), overlap=overlap, upscale_amount=upscale_amount, out_channels=out_channels, output_device=self.output_device)
        if self.latent_dim == 1:
@@ -932,7 +930,7 @@ class VAE:
            return out.reshape(samples.shape[0], self.latent_channels, extra_channel_size, -1)

    def encode_tiled_3d(self, samples, tile_t=9999, tile_x=512, tile_y=512, overlap=(1, 64, 64)):
-        encode_fn = lambda a: self.first_stage_model.encode((self.process_input(a)).to(self.vae_dtype).to(self.device)).float()
+        encode_fn = lambda a: self.first_stage_model.encode((self.process_input(a)).to(self.vae_dtype).to(self.device)).to(dtype=self.vae_output_dtype())
        return comfy.utils.tiled_scale_multidim(samples, encode_fn, tile=(tile_t, tile_x, tile_y), overlap=overlap, upscale_amount=self.downscale_ratio, out_channels=self.latent_channels, downscale=True, index_formulas=self.downscale_index_formula, output_device=self.output_device)

    def decode(self, samples_in, vae_options={}):
@@ -948,12 +946,23 @@ class VAE:
            batch_number = int(free_memory / memory_used)
            batch_number = max(1, batch_number)

+            # Pre-allocate output for VAEs that support direct buffer writes
+            preallocated = False
+            if getattr(self.first_stage_model, 'comfy_has_chunked_io', False):
+                pixel_samples = torch.empty(self.first_stage_model.decode_output_shape(samples_in.shape), device=self.output_device, dtype=self.vae_output_dtype())
+                preallocated = True
+
            for x in range(0, samples_in.shape[0], batch_number):
-                samples = samples_in[x:x+batch_number].to(self.vae_dtype).to(self.device)
-                out = self.process_output(self.first_stage_model.decode(samples, **vae_options).to(self.output_device).float())
-                if pixel_samples is None:
-                    pixel_samples = torch.empty((samples_in.shape[0],) + tuple(out.shape[1:]), device=self.output_device)
-                pixel_samples[x:x+batch_number] = out
+                samples = samples_in[x:x + batch_number].to(device=self.device, dtype=self.vae_dtype)
+                if preallocated:
+                    self.first_stage_model.decode(samples, output_buffer=pixel_samples[x:x+batch_number], **vae_options)
+                else:
+                    out = self.first_stage_model.decode(samples, **vae_options).to(device=self.output_device, dtype=self.vae_output_dtype(), copy=True)
+                    if pixel_samples is None:
+                        pixel_samples = torch.empty((samples_in.shape[0],) + tuple(out.shape[1:]), device=self.output_device, dtype=self.vae_output_dtype())
+                    pixel_samples[x:x+batch_number].copy_(out)
+                    del out
+                self.process_output(pixel_samples[x:x+batch_number])
        except Exception as e:
            model_management.raise_non_oom(e)
            logging.warning("Warning: Ran out of memory when regular VAE decoding, retrying with tiled VAE decoding.")
@@ -964,6 +973,7 @@ class VAE:
            do_tile = True

        if do_tile:
+            comfy.model_management.soft_empty_cache()
            dims = samples_in.ndim - 2
            if dims == 1 or self.extra_1d_channel is not None:
                pixel_samples = self.decode_tiled_1d(samples_in)
@@ -1024,10 +1034,15 @@ class VAE:
            batch_number = max(1, batch_number)
            samples = None
            for x in range(0, pixel_samples.shape[0], batch_number):
-                pixels_in = self.process_input(pixel_samples[x:x + batch_number]).to(self.vae_dtype).to(self.device)
-                out = self.first_stage_model.encode(pixels_in).to(self.output_device).float()
+                pixels_in = self.process_input(pixel_samples[x:x + batch_number]).to(self.vae_dtype)
+                if getattr(self.first_stage_model, 'comfy_has_chunked_io', False):
+                    out = self.first_stage_model.encode(pixels_in, device=self.device)
+                else:
+                    pixels_in = pixels_in.to(self.device)
+                    out = self.first_stage_model.encode(pixels_in)
+                out = out.to(self.output_device).to(dtype=self.vae_output_dtype())
                if samples is None:
-                    samples = torch.empty((pixel_samples.shape[0],) + tuple(out.shape[1:]), device=self.output_device)
+                    samples = torch.empty((pixel_samples.shape[0],) + tuple(out.shape[1:]), device=self.output_device, dtype=self.vae_output_dtype())
                samples[x:x + batch_number] = out

        except Exception as e:
@@ -1040,6 +1055,7 @@ class VAE:
            do_tile = True

        if do_tile:
+            comfy.model_management.soft_empty_cache()
            if self.latent_dim == 3:
                tile = 256
                overlap = tile // 4
@@ -1207,6 +1223,11 @@ class TEModel(Enum):
    QWEN3_8B = 20
    QWEN3_06B = 21
    GEMMA_3_4B_VISION = 22
+    QWEN35_08B = 23
+    QWEN35_2B = 24
+    QWEN35_4B = 25
+    QWEN35_9B = 26
+    QWEN35_27B = 27


 def detect_te_model(sd):
@@ -1246,6 +1267,17 @@ def detect_te_model(sd):
            return TEModel.QWEN25_3B
        if weight.shape[0] == 512:
            return TEModel.QWEN25_7B
+    if "model.language_model.layers.0.linear_attn.A_log" in sd and "model.language_model.layers.0.input_layernorm.weight" in sd:
+        weight = sd['model.language_model.layers.0.input_layernorm.weight']
+        if weight.shape[0] == 1024:
+            return TEModel.QWEN35_08B
+        if weight.shape[0] == 2560:
+            return TEModel.QWEN35_4B
+        if weight.shape[0] == 4096:
+            return TEModel.QWEN35_9B
+        if weight.shape[0] == 5120:
+            return TEModel.QWEN35_27B
+        return TEModel.QWEN35_2B
    if "model.layers.0.post_attention_layernorm.weight" in sd:
        weight = sd['model.layers.0.post_attention_layernorm.weight']
        if 'model.layers.0.self_attn.q_norm.weight' in sd:
@@ -1278,11 +1310,12 @@ def t5xxl_detect(clip_data):
    return {}

 def llama_detect(clip_data):
-    weight_name = "model.layers.0.self_attn.k_proj.weight"
+    weight_names = ["model.layers.0.self_attn.k_proj.weight", "model.layers.0.linear_attn.in_proj_a.weight"]

    for sd in clip_data:
-        if weight_name in sd:
-            return comfy.text_encoders.hunyuan_video.llama_detect(sd)
+        for weight_name in weight_names:
+            if weight_name in sd:
+                return comfy.text_encoders.hunyuan_video.llama_detect(sd)

    return {}

@@ -1410,6 +1443,11 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip
        elif te_model == TEModel.JINA_CLIP_2:
            clip_target.clip = comfy.text_encoders.jina_clip_2.JinaClip2TextModelWrapper
            clip_target.tokenizer = comfy.text_encoders.jina_clip_2.JinaClip2TokenizerWrapper
+        elif te_model in (TEModel.QWEN35_08B, TEModel.QWEN35_2B, TEModel.QWEN35_4B, TEModel.QWEN35_9B, TEModel.QWEN35_27B):
+            clip_data[0] = comfy.utils.state_dict_prefix_replace(clip_data[0], {"model.language_model.": "model.", "model.visual.": "visual.", "lm_head.": "model.lm_head."})
+            qwen35_type = {TEModel.QWEN35_08B: "qwen35_08b", TEModel.QWEN35_2B: "qwen35_2b", TEModel.QWEN35_4B: "qwen35_4b", TEModel.QWEN35_9B: "qwen35_9b", TEModel.QWEN35_27B: "qwen35_27b"}[te_model]
+            clip_target.clip = comfy.text_encoders.qwen35.te(**llama_detect(clip_data), model_type=qwen35_type)
+            clip_target.tokenizer = comfy.text_encoders.qwen35.tokenizer(model_type=qwen35_type)
        elif te_model == TEModel.QWEN3_06B:
            clip_target.clip = comfy.text_encoders.anima.te(**llama_detect(clip_data))
            clip_target.tokenizer = comfy.text_encoders.anima.AnimaTokenizer
@@ -1698,15 +1736,18 @@ def load_diffusion_model_state_dict(sd, model_options={}, metadata=None, disable
    """
    dtype = model_options.get("dtype", None)

+    custom_operations = model_options.get("custom_operations", None)
+    if custom_operations is None:
+        sd, metadata = comfy.utils.convert_old_quants(sd, "", metadata=metadata)
+
    #Allow loading unets from checkpoint files
    diffusion_model_prefix = model_detection.unet_prefix_from_state_dict(sd)
    temp_sd = comfy.utils.state_dict_prefix_replace(sd, {diffusion_model_prefix: ""}, filter_keys=True)
    if len(temp_sd) > 0:
        sd = temp_sd
+        if custom_operations is None:
+            sd, metadata = comfy.utils.convert_old_quants(sd, "", metadata=metadata)

-    custom_operations = model_options.get("custom_operations", None)
-    if custom_operations is None:
-        sd, metadata = comfy.utils.convert_old_quants(sd, "", metadata=metadata)
    parameters = comfy.utils.calculate_parameters(sd)
    weight_dtype = comfy.utils.weight_dtype(sd)

--- a/comfy/sd1_clip.py
+++ b/comfy/sd1_clip.py
@@ -46,7 +46,7 @@ class ClipTokenWeightEncoder:
        out, pooled = o[:2]

        if pooled is not None:
-            first_pooled = pooled[0:1].to(model_management.intermediate_device())
+            first_pooled = pooled[0:1].to(device=model_management.intermediate_device())
        else:
            first_pooled = pooled

@@ -63,16 +63,16 @@ class ClipTokenWeightEncoder:
            output.append(z)

        if (len(output) == 0):
-            r = (out[-1:].to(model_management.intermediate_device()), first_pooled)
+            r = (out[-1:].to(device=model_management.intermediate_device()), first_pooled)
        else:
-            r = (torch.cat(output, dim=-2).to(model_management.intermediate_device()), first_pooled)
+            r = (torch.cat(output, dim=-2).to(device=model_management.intermediate_device()), first_pooled)

        if len(o) > 2:
            extra = {}
            for k in o[2]:
                v = o[2][k]
                if k == "attention_mask":
-                    v = v[:sections].flatten().unsqueeze(dim=0).to(model_management.intermediate_device())
+                    v = v[:sections].flatten().unsqueeze(dim=0).to(device=model_management.intermediate_device())
                extra[k] = v

            r = r + (extra,)
@@ -308,14 +308,14 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder):
    def load_sd(self, sd):
        return self.transformer.load_state_dict(sd, strict=False, assign=getattr(self, "can_assign_sd", False))

-    def generate(self, tokens, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed):
+    def generate(self, tokens, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed, presence_penalty=0.0):
        if isinstance(tokens, dict):
            tokens_only = next(iter(tokens.values())) # todo: get this better?
        else:
            tokens_only = tokens
        tokens_only = [[t[0] for t in b] for b in tokens_only]
        embeds = self.process_tokens(tokens_only, device=self.execution_device)[0]
-        return self.transformer.generate(embeds, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed)
+        return self.transformer.generate(embeds, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed, presence_penalty=presence_penalty)

 def parse_parentheses(string):
    result = []
@@ -740,5 +740,5 @@ class SD1ClipModel(torch.nn.Module):
    def load_sd(self, sd):
        return getattr(self, self.clip).load_sd(sd)

-    def generate(self, tokens, do_sample=True, max_length=256, temperature=1.0, top_k=50, top_p=0.95, min_p=0.0, repetition_penalty=1.0, seed=None):
-        return getattr(self, self.clip).generate(tokens, do_sample=do_sample, max_length=max_length, temperature=temperature, top_k=top_k, top_p=top_p, min_p=min_p, repetition_penalty=repetition_penalty, seed=seed)
+    def generate(self, tokens, do_sample=True, max_length=256, temperature=1.0, top_k=50, top_p=0.95, min_p=0.0, repetition_penalty=1.0, seed=None, presence_penalty=0.0):
+        return getattr(self, self.clip).generate(tokens, do_sample=do_sample, max_length=max_length, temperature=temperature, top_k=top_k, top_p=top_p, min_p=min_p, repetition_penalty=repetition_penalty, seed=seed, presence_penalty=presence_penalty)
--- a/comfy/supported_models.py
+++ b/comfy/supported_models.py
@@ -1734,6 +1734,21 @@ class LongCatImage(supported_models_base.BASE):
        hunyuan_detect = comfy.text_encoders.hunyuan_video.llama_detect(state_dict, "{}qwen25_7b.transformer.".format(pref))
        return supported_models_base.ClipTarget(comfy.text_encoders.longcat_image.LongCatImageTokenizer, comfy.text_encoders.longcat_image.te(**hunyuan_detect))

-models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, LongCatImage, FluxSchnell, GenmoMochi, LTXV, LTXAV, HunyuanVideo15_SR_Distilled, HunyuanVideo15, HunyuanImage21Refiner, HunyuanImage21, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, CosmosT2IPredict2, CosmosI2VPredict2, ZImagePixelSpace, ZImage, Lumina2, WAN22_T2V, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, WAN21_Vace, WAN21_Camera, WAN22_Camera, WAN22_S2V, WAN21_HuMo, WAN22_Animate, WAN21_FlowRVS, WAN21_SCAIL, Hunyuan3Dv2mini, Hunyuan3Dv2, Hunyuan3Dv2_1, HiDream, Chroma, ChromaRadiance, ACEStep, ACEStep15, Omnigen2, QwenImage, Flux2, Kandinsky5Image, Kandinsky5, Anima]
+
+class RT_DETR_v4(supported_models_base.BASE):
+    unet_config = {
+        "image_model": "RT_DETR_v4",
+    }
+
+    supported_inference_dtypes = [torch.float16, torch.float32]
+
+    def get_model(self, state_dict, prefix="", device=None):
+        out = model_base.RT_DETR_v4(self, device=device)
+        return out
+
+    def clip_target(self, state_dict={}):
+        return None
+
+models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, LongCatImage, FluxSchnell, GenmoMochi, LTXV, LTXAV, HunyuanVideo15_SR_Distilled, HunyuanVideo15, HunyuanImage21Refiner, HunyuanImage21, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, CosmosT2IPredict2, CosmosI2VPredict2, ZImagePixelSpace, ZImage, Lumina2, WAN22_T2V, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, WAN21_Vace, WAN21_Camera, WAN22_Camera, WAN22_S2V, WAN21_HuMo, WAN22_Animate, WAN21_FlowRVS, WAN21_SCAIL, Hunyuan3Dv2mini, Hunyuan3Dv2, Hunyuan3Dv2_1, HiDream, Chroma, ChromaRadiance, ACEStep, ACEStep15, Omnigen2, QwenImage, Flux2, Kandinsky5Image, Kandinsky5, Anima, RT_DETR_v4]

 models += [SVD_img2vid]
--- a/comfy/text_encoders/llama.py
+++ b/comfy/text_encoders/llama.py
@@ -224,7 +224,7 @@ class Qwen3_8BConfig:
    k_norm = "gemma3"
    rope_scale = None
    final_norm: bool = True
-    lm_head: bool = False
+    lm_head: bool = True
    stop_tokens = [151643, 151645]

@dataclass
@@ -655,6 +655,17 @@ class Llama2_(nn.Module):
        if config.lm_head:
            self.lm_head = ops.Linear(config.hidden_size, config.vocab_size, bias=False, device=device, dtype=dtype)

+    def get_past_len(self, past_key_values):
+        return past_key_values[0][2]
+
+    def compute_freqs_cis(self, position_ids, device):
+        return precompute_freqs_cis(self.config.head_dim,
+                                    position_ids,
+                                    self.config.rope_theta,
+                                    self.config.rope_scale,
+                                    self.config.rope_dims,
+                                    device=device)
+
    def forward(self, x, attention_mask=None, embeds=None, num_tokens=None, intermediate_output=None, final_layer_norm_intermediate=True, dtype=None, position_ids=None, embeds_info=[], past_key_values=None):
        if embeds is not None:
            x = embeds
@@ -667,17 +678,12 @@ class Llama2_(nn.Module):
        seq_len = x.shape[1]
        past_len = 0
        if past_key_values is not None and len(past_key_values) > 0:
-            past_len = past_key_values[0][2]
+            past_len = self.get_past_len(past_key_values)

        if position_ids is None:
            position_ids = torch.arange(past_len, past_len + seq_len, device=x.device).unsqueeze(0)

-        freqs_cis = precompute_freqs_cis(self.config.head_dim,
-                                         position_ids,
-                                         self.config.rope_theta,
-                                         self.config.rope_scale,
-                                         self.config.rope_dims,
-                                         device=x.device)
+        freqs_cis = self.compute_freqs_cis(position_ids, x.device)

        mask = None
        if attention_mask is not None:
@@ -812,9 +818,16 @@ class BaseGenerate:
        comfy.ops.uncast_bias_weight(module, weight, None, offload_stream)
        return x

-    def generate(self, embeds=None, do_sample=True, max_length=256, temperature=1.0, top_k=50, top_p=0.9, min_p=0.0, repetition_penalty=1.0, seed=42, stop_tokens=None, initial_tokens=[], execution_dtype=None, min_tokens=0):
-        device = embeds.device
+    def init_kv_cache(self, batch, max_cache_len, device, execution_dtype):
        model_config = self.model.config
+        past_key_values = []
+        for x in range(model_config.num_hidden_layers):
+            past_key_values.append((torch.empty([batch, model_config.num_key_value_heads, max_cache_len, model_config.head_dim], device=device, dtype=execution_dtype),
+                                    torch.empty([batch, model_config.num_key_value_heads, max_cache_len, model_config.head_dim], device=device, dtype=execution_dtype), 0))
+        return past_key_values
+
+    def generate(self, embeds=None, do_sample=True, max_length=256, temperature=1.0, top_k=50, top_p=0.9, min_p=0.0, repetition_penalty=1.0, seed=42, stop_tokens=None, initial_tokens=[], execution_dtype=None, min_tokens=0, presence_penalty=0.0):
+        device = embeds.device

        if stop_tokens is None:
            stop_tokens = self.model.config.stop_tokens
@@ -829,11 +842,8 @@ class BaseGenerate:
        if embeds.ndim == 2:
            embeds = embeds.unsqueeze(0)

-        past_key_values = [] #kv_cache init
        max_cache_len = embeds.shape[1] + max_length
-        for x in range(model_config.num_hidden_layers):
-            past_key_values.append((torch.empty([embeds.shape[0], model_config.num_key_value_heads, max_cache_len, model_config.head_dim], device=device, dtype=execution_dtype),
-                                    torch.empty([embeds.shape[0], model_config.num_key_value_heads, max_cache_len, model_config.head_dim], device=device, dtype=execution_dtype), 0))
+        past_key_values = self.init_kv_cache(embeds.shape[0], max_cache_len, device, execution_dtype)

        generator = torch.Generator(device=device).manual_seed(seed) if do_sample else None

@@ -844,7 +854,7 @@ class BaseGenerate:
        for step in tqdm(range(max_length), desc="Generating tokens"):
            x, _, past_key_values = self.model.forward(None, embeds=embeds, attention_mask=None, past_key_values=past_key_values)
            logits = self.logits(x)[:, -1]
-            next_token = self.sample_token(logits, temperature, top_k, top_p, min_p, repetition_penalty, initial_tokens + generated_token_ids, generator, do_sample=do_sample)
+            next_token = self.sample_token(logits, temperature, top_k, top_p, min_p, repetition_penalty, initial_tokens + generated_token_ids, generator, do_sample=do_sample, presence_penalty=presence_penalty)
            token_id = next_token[0].item()
            generated_token_ids.append(token_id)

@@ -856,7 +866,7 @@ class BaseGenerate:

        return generated_token_ids

-    def sample_token(self, logits, temperature, top_k, top_p, min_p, repetition_penalty, token_history, generator, do_sample=True):
+    def sample_token(self, logits, temperature, top_k, top_p, min_p, repetition_penalty, token_history, generator, do_sample=True, presence_penalty=0.0):

        if not do_sample or temperature == 0.0:
            return torch.argmax(logits, dim=-1, keepdim=True)
@@ -867,6 +877,11 @@ class BaseGenerate:
                for token_id in set(token_history):
                    logits[i, token_id] *= repetition_penalty if logits[i, token_id] < 0 else 1/repetition_penalty

+        if presence_penalty is not None and presence_penalty != 0.0:
+            for i in range(logits.shape[0]):
+                for token_id in set(token_history):
+                    logits[i, token_id] -= presence_penalty
+
        if temperature != 1.0:
            logits = logits / temperature

@@ -897,6 +912,9 @@ class BaseGenerate:
 class BaseQwen3:
    def logits(self, x):
        input = x[:, -1:]
+        if self.model.config.lm_head:
+            return self.model.lm_head(input)
+
        module = self.model.embed_tokens

        offload_stream = None
@@ -1028,12 +1046,19 @@ class Qwen25_7BVLI(BaseLlama, BaseGenerate, torch.nn.Module):
                grid = e.get("extra", None)
                start = e.get("index")
                if position_ids is None:
-                    position_ids = torch.zeros((3, embeds.shape[1]), device=embeds.device)
+                    position_ids = torch.ones((3, embeds.shape[1]), device=embeds.device, dtype=torch.long)
                    position_ids[:, :start] = torch.arange(0, start, device=embeds.device)
                end = e.get("size") + start
                len_max = int(grid.max()) // 2
                start_next = len_max + start
-                position_ids[:, end:] = torch.arange(start_next + offset, start_next + (embeds.shape[1] - end) + offset, device=embeds.device)
+                if attention_mask is not None:
+                    # Assign compact sequential positions to attended tokens only,
+                    # skipping over padding so post-padding tokens aren't inflated.
+                    after_mask = attention_mask[0, end:]
+                    text_positions = after_mask.cumsum(0) - 1 + start_next + offset
+                    position_ids[:, end:] = torch.where(after_mask.bool(), text_positions, position_ids[0, end:])
+                else:
+                    position_ids[:, end:] = torch.arange(start_next + offset, start_next + (embeds.shape[1] - end) + offset, device=embeds.device)
                position_ids[0, start:end] = start + offset
                max_d = int(grid[0][1]) // 2
                position_ids[1, start:end] = torch.arange(start + offset, start + max_d + offset, device=embeds.device).unsqueeze(1).repeat(1, math.ceil((end - start) / max_d)).flatten(0)[:end - start]
--- a/comfy/text_encoders/longcat_image.py
+++ b/comfy/text_encoders/longcat_image.py
@@ -64,7 +64,13 @@ class LongCatImageBaseTokenizer(Qwen25_7BVLITokenizer):
        return [output]


+IMAGE_PAD_TOKEN_ID = 151655
+
 class LongCatImageTokenizer(sd1_clip.SD1Tokenizer):
+    T2I_PREFIX = "<|im_start|>system\nAs an image captioning expert, generate a descriptive text prompt based on an image content, suitable for input to a text-to-image model.<|im_end|>\n<|im_start|>user\n"
+    EDIT_PREFIX = "<|im_start|>system\nAs an image editing expert, first analyze the content and attributes of the input image(s). Then, based on the user's editing instructions, clearly and precisely determine how to modify the given image(s), ensuring that only the specified parts are altered and all other aspects remain consistent with the original(s).<|im_end|>\n<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|>"
+    SUFFIX = "<|im_end|>\n<|im_start|>assistant\n"
+
    def __init__(self, embedding_directory=None, tokenizer_data={}):
        super().__init__(
            embedding_directory=embedding_directory,
@@ -72,10 +78,8 @@ class LongCatImageTokenizer(sd1_clip.SD1Tokenizer):
            name="qwen25_7b",
            tokenizer=LongCatImageBaseTokenizer,
        )
-        self.longcat_template_prefix = "<|im_start|>system\nAs an image captioning expert, generate a descriptive text prompt based on an image content, suitable for input to a text-to-image model.<|im_end|>\n<|im_start|>user\n"
-        self.longcat_template_suffix = "<|im_end|>\n<|im_start|>assistant\n"

-    def tokenize_with_weights(self, text, return_word_ids=False, **kwargs):
+    def tokenize_with_weights(self, text, return_word_ids=False, images=None, **kwargs):
        skip_template = False
        if text.startswith("<|im_start|>"):
            skip_template = True
@@ -90,11 +94,14 @@ class LongCatImageTokenizer(sd1_clip.SD1Tokenizer):
                text, return_word_ids=return_word_ids, disable_weights=True, **kwargs
            )
        else:
+            has_images = images is not None and len(images) > 0
+            template_prefix = self.EDIT_PREFIX if has_images else self.T2I_PREFIX
+
            prefix_ids = base_tok.tokenizer(
-                self.longcat_template_prefix, add_special_tokens=False
+                template_prefix, add_special_tokens=False
            )["input_ids"]
            suffix_ids = base_tok.tokenizer(
-                self.longcat_template_suffix, add_special_tokens=False
+                self.SUFFIX, add_special_tokens=False
            )["input_ids"]

            prompt_tokens = base_tok.tokenize_with_weights(
@@ -106,6 +113,14 @@ class LongCatImageTokenizer(sd1_clip.SD1Tokenizer):
            suffix_pairs = [(t, 1.0) for t in suffix_ids]

            combined = prefix_pairs + prompt_pairs + suffix_pairs
+
+            if has_images:
+                embed_count = 0
+                for i in range(len(combined)):
+                    if combined[i][0] == IMAGE_PAD_TOKEN_ID and embed_count < len(images):
+                        combined[i] = ({"type": "image", "data": images[embed_count], "original_type": "image"}, combined[i][1])
+                        embed_count += 1
+
            tokens = {"qwen25_7b": [combined]}

        return tokens
--- a/comfy/text_encoders/lt.py
+++ b/comfy/text_encoders/lt.py
@@ -91,11 +91,11 @@ class Gemma3_12BModel(sd1_clip.SDClipModel):
        self.dtypes.add(dtype)
        super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"start": 2, "pad": 0}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Gemma3_12B, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options)

-    def generate(self, tokens, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed):
+    def generate(self, tokens, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed, presence_penalty):
        tokens_only = [[t[0] for t in b] for b in tokens]
        embeds, _, _, embeds_info = self.process_tokens(tokens_only, self.execution_device)
        comfy.utils.normalize_image_embeddings(embeds, embeds_info, self.transformer.model.config.hidden_size ** 0.5)
-        return self.transformer.generate(embeds, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed, stop_tokens=[106])  # 106 is <end_of_turn>
+        return self.transformer.generate(embeds, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed, stop_tokens=[106], presence_penalty=presence_penalty)  # 106 is <end_of_turn>

 class DualLinearProjection(torch.nn.Module):
    def __init__(self, in_dim, out_dim_video, out_dim_audio, dtype=None, device=None, operations=None):
@@ -189,8 +189,8 @@ class LTXAVTEModel(torch.nn.Module):

        return out.to(device=out_device, dtype=torch.float), pooled, extra

-    def generate(self, tokens, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed):
-        return self.gemma3_12b.generate(tokens["gemma3_12b"], do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed)
+    def generate(self, tokens, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed, presence_penalty):
+        return self.gemma3_12b.generate(tokens["gemma3_12b"], do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed, presence_penalty)

    def load_sd(self, sd):
        if "model.layers.47.self_attn.q_norm.weight" in sd:
--- a/comfy/text_encoders/qwen35.py
+++ b/comfy/text_encoders/qwen35.py
@@ -0,0 +1,833 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from dataclasses import dataclass, field
+import os
+import math
+
+import comfy.model_management
+from comfy.ldm.modules.attention import optimized_attention_for_device
+from comfy import sd1_clip
+import comfy.text_encoders.qwen_vl
+
+from .llama import BaseLlama, BaseGenerate, Llama2_, MLP, RMSNorm, apply_rope
+
+
+def _qwen35_layer_types(n):
+    return [("full_attention" if (i + 1) % 4 == 0 else "linear_attention") for i in range(n)]
+
+@dataclass
+class Qwen35Config:
+    vocab_size: int = 248320
+    hidden_size: int = 2048
+    intermediate_size: int = 6144
+    num_hidden_layers: int = 24
+    # Full attention params
+    num_attention_heads: int = 8
+    num_key_value_heads: int = 2
+    head_dim: int = 256
+    partial_rotary_factor: float = 0.25
+    # Linear attention (DeltaNet) params
+    linear_num_key_heads: int = 16
+    linear_num_value_heads: int = 16
+    linear_key_head_dim: int = 128
+    linear_value_head_dim: int = 128
+    conv_kernel_size: int = 4
+    # Shared params
+    max_position_embeddings: int = 32768
+    rms_norm_eps: float = 1e-6
+    rope_theta: float = 10000000.0
+    mrope_section: list = field(default_factory=lambda: [11, 11, 10])
+    layer_types: list = field(default_factory=lambda: _qwen35_layer_types(24))
+    rms_norm_add: bool = True
+    mlp_activation: str = "silu"
+    qkv_bias: bool = False
+    final_norm: bool = True
+    lm_head: bool = False
+    stop_tokens: list = field(default_factory=lambda: [248044, 248046])
+    # These are needed for BaseLlama/BaseGenerate compatibility but unused directly
+    transformer_type: str = "qwen35_2b"
+    rope_dims: list = None
+    rope_scale: float = None
+
+QWEN35_VISION_DEFAULTS = dict(hidden_size=1024, num_heads=16, intermediate_size=4096, depth=24, patch_size=16, temporal_patch_size=2, in_channels=3, spatial_merge_size=2, num_position_embeddings=2304)
+
+QWEN35_MODELS = {
+    "qwen35_08b": dict(hidden_size=1024, intermediate_size=3584, vision=dict(hidden_size=768, num_heads=12, intermediate_size=3072, depth=12)),
+    "qwen35_2b": dict(hidden_size=2048, intermediate_size=6144, num_hidden_layers=24, num_attention_heads=8, num_key_value_heads=2, linear_num_value_heads=16),
+    "qwen35_4b": dict(hidden_size=2560, intermediate_size=9216, num_hidden_layers=32, num_attention_heads=16, num_key_value_heads=4, linear_num_value_heads=32),
+    "qwen35_9b": dict(hidden_size=4096, intermediate_size=12288, num_hidden_layers=32, num_attention_heads=16, num_key_value_heads=4, linear_num_value_heads=32, lm_head=True, vision=dict(hidden_size=1152, intermediate_size=4304, depth=27)),
+    "qwen35_27b": dict(hidden_size=5120, intermediate_size=17408, num_hidden_layers=64, num_attention_heads=24, num_key_value_heads=4, linear_num_value_heads=48, lm_head=True, vision=dict(hidden_size=1152, intermediate_size=4304, depth=27)),
+}
+
+
+def _make_config(model_type, config_dict={}):
+    overrides = QWEN35_MODELS.get(model_type, {}).copy()
+    overrides.pop("vision", None)
+    if "num_hidden_layers" in overrides:
+        overrides["layer_types"] = _qwen35_layer_types(overrides["num_hidden_layers"])
+    overrides.update(config_dict)
+    return Qwen35Config(**overrides)
+
+
+class RMSNormGated(RMSNorm):
+    def forward(self, x, gate):
+        return super().forward(x) * F.silu(gate.to(x.dtype))
+
+def torch_chunk_gated_delta_rule(query, key, value, g, beta, chunk_size=64, initial_state=None, output_final_state=False):
+    initial_dtype = query.dtype
+    query = F.normalize(query, dim=-1)
+    key = F.normalize(key, dim=-1)
+    query, key, value, beta, g = [x.transpose(1, 2).contiguous().to(torch.float32) for x in (query, key, value, beta, g)]
+
+    batch_size, num_heads, sequence_length, k_head_dim = key.shape
+    v_head_dim = value.shape[-1]
+    pad_size = (chunk_size - sequence_length % chunk_size) % chunk_size
+    query = F.pad(query, (0, 0, 0, pad_size))
+    key = F.pad(key, (0, 0, 0, pad_size))
+    value = F.pad(value, (0, 0, 0, pad_size))
+    beta = F.pad(beta, (0, pad_size))
+    g = F.pad(g, (0, pad_size))
+    total_sequence_length = sequence_length + pad_size
+    scale = 1 / (query.shape[-1] ** 0.5)
+    query = query * scale
+
+    v_beta = value * beta.unsqueeze(-1)
+    k_beta = key * beta.unsqueeze(-1)
+    query, key, value, k_beta, v_beta = [x.reshape(x.shape[0], x.shape[1], -1, chunk_size, x.shape[-1]) for x in (query, key, value, k_beta, v_beta)]
+    g = g.reshape(g.shape[0], g.shape[1], -1, chunk_size)
+    mask = torch.triu(torch.ones(chunk_size, chunk_size, dtype=torch.bool, device=query.device), diagonal=0)
+
+    g = g.cumsum(dim=-1)
+    decay_mask = ((g.unsqueeze(-1) - g.unsqueeze(-2)).tril().exp().float()).tril()
+    attn = -((k_beta @ key.transpose(-1, -2)) * decay_mask).masked_fill(mask, 0)
+    for i in range(1, chunk_size):
+        row = attn[..., i, :i].clone()
+        sub = attn[..., :i, :i].clone()
+        attn[..., i, :i] = row + (row.unsqueeze(-1) * sub).sum(-2)
+    attn = attn + torch.eye(chunk_size, dtype=attn.dtype, device=attn.device)
+    value = attn @ v_beta
+    k_cumdecay = attn @ (k_beta * g.exp().unsqueeze(-1))
+    last_recurrent_state = (
+        torch.zeros(batch_size, num_heads, k_head_dim, v_head_dim).to(value)
+        if initial_state is None
+        else initial_state.to(value)
+    )
+    core_attn_out = torch.zeros_like(value)
+    mask = torch.triu(torch.ones(chunk_size, chunk_size, dtype=torch.bool, device=query.device), diagonal=1)
+
+    for i in range(0, total_sequence_length // chunk_size):
+        q_i, k_i, v_i = query[:, :, i], key[:, :, i], value[:, :, i]
+        attn = (q_i @ k_i.transpose(-1, -2) * decay_mask[:, :, i]).masked_fill_(mask, 0)
+        v_prime = (k_cumdecay[:, :, i]) @ last_recurrent_state
+        v_new = v_i - v_prime
+        attn_inter = (q_i * g[:, :, i, :, None].exp()) @ last_recurrent_state
+        core_attn_out[:, :, i] = attn_inter + attn @ v_new
+        last_recurrent_state = (
+            last_recurrent_state * g[:, :, i, -1, None, None].exp()
+            + (k_i * (g[:, :, i, -1, None] - g[:, :, i]).exp()[..., None]).transpose(-1, -2) @ v_new
+        )
+
+    if not output_final_state:
+        last_recurrent_state = None
+    core_attn_out = core_attn_out.reshape(core_attn_out.shape[0], core_attn_out.shape[1], -1, core_attn_out.shape[-1])
+    core_attn_out = core_attn_out[:, :, :sequence_length]
+    core_attn_out = core_attn_out.transpose(1, 2).contiguous().to(initial_dtype)
+    return core_attn_out, last_recurrent_state
+
+
+def torch_causal_conv1d_update(x, conv_state, weight, bias=None):
+    # conv_state: [B, channels, kernel_size-1], x: [B, channels, 1]
+    # weight: [channels, kernel_size]
+    state_len = conv_state.shape[-1]
+    combined = torch.cat([conv_state, x], dim=-1).to(weight.dtype)  # [B, channels, kernel_size]
+    conv_state.copy_(combined[:, :, -state_len:])
+    out = (combined * weight).sum(dim=-1, keepdim=True)  # [B, channels, 1]
+    if bias is not None:
+        out = out + bias.unsqueeze(0).unsqueeze(-1)
+    return F.silu(out).to(x.dtype)
+
+
+# GatedDeltaNet - Linear Attention Layer
+
+class GatedDeltaNet(nn.Module):
+    def __init__(self, config, device=None, dtype=None, ops=None):
+        super().__init__()
+
+        hidden = config.hidden_size
+        self.num_key_heads = config.linear_num_key_heads
+        self.num_value_heads = config.linear_num_value_heads
+        self.key_head_dim = config.linear_key_head_dim
+        self.value_head_dim = config.linear_value_head_dim
+        self.conv_kernel_size = config.conv_kernel_size
+
+        key_dim = self.num_key_heads * self.key_head_dim
+        value_dim = self.num_value_heads * self.value_head_dim
+        self.key_dim = key_dim
+        self.value_dim = value_dim
+        conv_dim = key_dim * 2 + value_dim
+
+        self.in_proj_qkv = ops.Linear(hidden, conv_dim, bias=False, device=device, dtype=dtype)
+        self.in_proj_z = ops.Linear(hidden, value_dim, bias=False, device=device, dtype=dtype)
+        self.in_proj_b = ops.Linear(hidden, self.num_value_heads, bias=False, device=device, dtype=dtype)
+        self.in_proj_a = ops.Linear(hidden, self.num_value_heads, bias=False, device=device, dtype=dtype)
+        self.out_proj = ops.Linear(value_dim, hidden, bias=False, device=device, dtype=dtype)
+
+        self.dt_bias = nn.Parameter(torch.empty(self.num_value_heads, device=device, dtype=dtype))
+        self.A_log = nn.Parameter(torch.empty(self.num_value_heads, device=device, dtype=dtype))
+
+        self.conv1d = ops.Conv1d(in_channels=conv_dim, out_channels=conv_dim, bias=False, kernel_size=self.conv_kernel_size,
+            groups=conv_dim, padding=self.conv_kernel_size - 1, device=device, dtype=dtype)
+
+        self.norm = RMSNormGated(self.value_head_dim, eps=config.rms_norm_eps, device=device, dtype=dtype)
+
+    def forward(self, x, past_key_value=None, **kwargs):
+        batch_size, seq_len, _ = x.shape
+
+        use_recurrent = (
+            past_key_value is not None
+            and past_key_value[2] > 0
+            and seq_len == 1
+        )
+
+        # Projections (shared)
+        mixed_qkv = self.in_proj_qkv(x).transpose(1, 2)  # [B, conv_dim, seq_len]
+        z = self.in_proj_z(x)
+        b = self.in_proj_b(x)
+        a = self.in_proj_a(x)
+
+        # Conv1d
+        if use_recurrent:
+            recurrent_state, conv_state, step_index = past_key_value
+            conv_weight = comfy.model_management.cast_to_device(self.conv1d.weight, mixed_qkv.device, mixed_qkv.dtype).squeeze(1)
+            conv_bias = comfy.model_management.cast_to_device(self.conv1d.bias, mixed_qkv.device, mixed_qkv.dtype) if self.conv1d.bias is not None else None
+            mixed_qkv = torch_causal_conv1d_update(mixed_qkv, conv_state, conv_weight, conv_bias)
+        else:
+            if past_key_value is not None:
+                recurrent_state, conv_state, step_index = past_key_value
+                conv_state_init = F.pad(mixed_qkv, (self.conv_kernel_size - mixed_qkv.shape[-1], 0))
+                conv_state.copy_(conv_state_init[:, :, -conv_state.shape[-1]:])
+            mixed_qkv = F.silu(self.conv1d(mixed_qkv)[:, :, :seq_len])
+
+        # Split QKV and compute beta/g
+        mixed_qkv = mixed_qkv.transpose(1, 2)  # [B, seq_len, conv_dim]
+        query, key, value = mixed_qkv.split([self.key_dim, self.key_dim, self.value_dim], dim=-1)
+        beta = b.sigmoid()
+        g = -self.A_log.float().exp() * F.softplus(a.float() + self.dt_bias.float())
+
+        # Delta rule
+        if use_recurrent:
+            # single-token path: work in [B, heads, dim] without seq dim
+            query = query.reshape(batch_size, self.num_key_heads, self.key_head_dim)
+            key = key.reshape(batch_size, self.num_key_heads, self.key_head_dim)
+            value = value.reshape(batch_size, self.num_value_heads, self.value_head_dim)
+
+            if self.num_value_heads != self.num_key_heads:
+                rep = self.num_value_heads // self.num_key_heads
+                query = query.repeat_interleave(rep, dim=1)
+                key = key.repeat_interleave(rep, dim=1)
+
+            scale = self.key_head_dim ** -0.5
+            q = F.normalize(query.float(), dim=-1) * scale
+            k = F.normalize(key.float(), dim=-1)
+            v = value.float()
+            beta_t = beta.reshape(batch_size, -1)
+            g_t = g.reshape(batch_size, -1).exp()
+
+            # In-place state update: [B, heads, k_dim, v_dim]
+            recurrent_state.mul_(g_t[:, :, None, None])
+            kv_mem = torch.einsum('bhk,bhkv->bhv', k, recurrent_state)
+            delta = (v - kv_mem) * beta_t[:, :, None]
+            recurrent_state.add_(k.unsqueeze(-1) * delta.unsqueeze(-2))
+            core_attn_out = torch.einsum('bhk,bhkv->bhv', q, recurrent_state)
+
+            core_attn_out = core_attn_out.to(x.dtype).unsqueeze(1)
+            present_key_value = (recurrent_state, conv_state, step_index + 1)
+        else:
+            query = query.reshape(batch_size, seq_len, -1, self.key_head_dim)
+            key = key.reshape(batch_size, seq_len, -1, self.key_head_dim)
+            value = value.reshape(batch_size, seq_len, -1, self.value_head_dim)
+
+            if self.num_value_heads != self.num_key_heads:
+                rep = self.num_value_heads // self.num_key_heads
+                query = query.repeat_interleave(rep, dim=2)
+                key = key.repeat_interleave(rep, dim=2)
+
+            core_attn_out, last_recurrent_state = torch_chunk_gated_delta_rule(
+                query, key, value, g=g, beta=beta,
+                initial_state=None,
+                output_final_state=past_key_value is not None,
+            )
+
+            present_key_value = None
+            if past_key_value is not None:
+                if last_recurrent_state is not None:
+                    recurrent_state.copy_(last_recurrent_state.to(recurrent_state.dtype))
+                present_key_value = (recurrent_state, conv_state, step_index + seq_len)
+
+        # Gated norm + output projection (shared)
+        core_attn_out = self.norm(core_attn_out.reshape(-1, self.value_head_dim), z.reshape(-1, self.value_head_dim))
+        output = self.out_proj(core_attn_out.reshape(batch_size, seq_len, -1))
+        return output, present_key_value
+
+
+# GatedAttention - Full Attention with output gating
+def precompute_partial_rope(head_dim, rotary_dim, position_ids, theta, device=None, mrope_section=None):
+    """Compute RoPE frequencies for partial rotary embeddings."""
+    theta_numerator = torch.arange(0, rotary_dim, 2, device=device).float()
+    inv_freq = 1.0 / (theta ** (theta_numerator / rotary_dim))
+
+    inv_freq_expanded = inv_freq[None, :, None].float().expand(position_ids.shape[0], -1, 1)
+    position_ids_expanded = position_ids[:, None, :].float()
+    freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2)
+    emb = torch.cat((freqs, freqs), dim=-1)
+    cos = emb.cos()
+    sin = emb.sin()
+
+    if mrope_section is not None and position_ids.shape[0] == 3:
+        mrope_section_2 = [s * 2 for s in mrope_section]
+        cos = torch.cat([m[i % 3] for i, m in enumerate(cos.split(mrope_section_2, dim=-1))], dim=-1).unsqueeze(0)
+        sin = torch.cat([m[i % 3] for i, m in enumerate(sin.split(mrope_section_2, dim=-1))], dim=-1).unsqueeze(0)
+
+    cos = cos.unsqueeze(1)
+    sin = sin.unsqueeze(1)
+    sin_split = sin.shape[-1] // 2
+    return (cos, sin[..., :sin_split], -sin[..., sin_split:])
+
+
+def apply_partial_rope(xq, xk, freqs_cis, rotary_dim):
+    """Apply RoPE to only the first rotary_dim dimensions."""
+    xq_rot = xq[..., :rotary_dim]
+    xq_pass = xq[..., rotary_dim:]
+    xk_rot = xk[..., :rotary_dim]
+    xk_pass = xk[..., rotary_dim:]
+
+    xq_rot, xk_rot = apply_rope(xq_rot, xk_rot, freqs_cis)
+
+    xq = torch.cat([xq_rot, xq_pass], dim=-1)
+    xk = torch.cat([xk_rot, xk_pass], dim=-1)
+    return xq, xk
+
+
+class GatedAttention(nn.Module):
+    def __init__(self, config, device=None, dtype=None, ops=None):
+        super().__init__()
+
+        self.num_heads = config.num_attention_heads
+        self.num_kv_heads = config.num_key_value_heads
+        self.head_dim = config.head_dim
+        self.hidden_size = config.hidden_size
+        self.inner_size = self.num_heads * self.head_dim
+        self.rotary_dim = int(self.head_dim * config.partial_rotary_factor)
+
+        # q_proj outputs 2x: query + gate
+        self.q_proj = ops.Linear(config.hidden_size, self.inner_size * 2, bias=config.qkv_bias, device=device, dtype=dtype)
+        self.k_proj = ops.Linear(config.hidden_size, self.num_kv_heads * self.head_dim, bias=config.qkv_bias, device=device, dtype=dtype)
+        self.v_proj = ops.Linear(config.hidden_size, self.num_kv_heads * self.head_dim, bias=config.qkv_bias, device=device, dtype=dtype)
+        self.o_proj = ops.Linear(self.inner_size, config.hidden_size, bias=False, device=device, dtype=dtype)
+
+        # QK norms with (1+weight) scaling
+        self.q_norm = RMSNorm(self.head_dim, eps=config.rms_norm_eps, add=config.rms_norm_add, device=device, dtype=dtype)
+        self.k_norm = RMSNorm(self.head_dim, eps=config.rms_norm_eps, add=config.rms_norm_add, device=device, dtype=dtype)
+
+    def forward(self, x, attention_mask=None, freqs_cis=None, optimized_attention=None, past_key_value=None):
+        batch_size, seq_length, _ = x.shape
+
+        # Project Q (with gate), K, V
+        qg = self.q_proj(x)
+        # Split into query and gate: each is [B, seq, inner_size]
+        qg = qg.view(batch_size, seq_length, self.num_heads, self.head_dim * 2)
+        xq, gate = qg[..., :self.head_dim], qg[..., self.head_dim:]
+        gate = gate.reshape(batch_size, seq_length, -1)  # [B, seq, inner_size]
+
+        xk = self.k_proj(x)
+        xv = self.v_proj(x)
+
+        xq = self.q_norm(xq).transpose(1, 2)  # [B, heads, seq, head_dim]
+        xk = self.k_norm(xk.view(batch_size, seq_length, self.num_kv_heads, self.head_dim)).transpose(1, 2)
+        xv = xv.view(batch_size, seq_length, self.num_kv_heads, self.head_dim).transpose(1, 2)
+
+        # Apply partial RoPE
+        xq, xk = apply_partial_rope(xq, xk, freqs_cis, self.rotary_dim)
+
+        # KV cache
+        present_key_value = None
+        if past_key_value is not None:
+            past_key, past_value, index = past_key_value
+            num_tokens = xk.shape[2]
+            if past_key.shape[2] >= (index + num_tokens):
+                past_key[:, :, index:index + num_tokens] = xk
+                past_value[:, :, index:index + num_tokens] = xv
+                xk = past_key[:, :, :index + num_tokens]
+                xv = past_value[:, :, :index + num_tokens]
+                present_key_value = (past_key, past_value, index + num_tokens)
+            else:
+                if index > 0:
+                    xk = torch.cat((past_key[:, :, :index], xk), dim=2)
+                    xv = torch.cat((past_value[:, :, :index], xv), dim=2)
+                present_key_value = (xk, xv, index + num_tokens)
+
+        # Expand KV heads for GQA
+        if self.num_heads != self.num_kv_heads:
+            xk = xk.repeat_interleave(self.num_heads // self.num_kv_heads, dim=1)
+            xv = xv.repeat_interleave(self.num_heads // self.num_kv_heads, dim=1)
+
+        output = optimized_attention(xq, xk, xv, self.num_heads, mask=attention_mask, skip_reshape=True)
+        output = output * gate.sigmoid()
+
+        return self.o_proj(output), present_key_value
+
+
+# Hybrid Transformer Block
+class Qwen35TransformerBlock(nn.Module):
+    def __init__(self, config, index, device=None, dtype=None, ops=None):
+        super().__init__()
+        self.layer_type = config.layer_types[index]
+        if self.layer_type == "linear_attention":
+            self.linear_attn = GatedDeltaNet(config, device=device, dtype=dtype, ops=ops)
+        else:
+            self.self_attn = GatedAttention(config, device=device, dtype=dtype, ops=ops)
+        self.mlp = MLP(config, device=device, dtype=dtype, ops=ops)
+        self.input_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps, add=config.rms_norm_add, device=device, dtype=dtype)
+        self.post_attention_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps, add=config.rms_norm_add, device=device, dtype=dtype)
+
+    def forward(self, x, attention_mask=None, freqs_cis=None, optimized_attention=None, past_key_value=None):
+        if self.layer_type == "linear_attention":
+            h, present_key_value = self.linear_attn(self.input_layernorm(x), attention_mask=attention_mask, past_key_value=past_key_value)
+        else:
+            h, present_key_value = self.self_attn(self.input_layernorm(x), attention_mask=attention_mask, freqs_cis=freqs_cis, optimized_attention=optimized_attention, past_key_value=past_key_value)
+
+        x = x + h
+        x = x + self.mlp(self.post_attention_layernorm(x))
+        return x, present_key_value
+
+
+# Qwen35 Transformer Backbone
+class Qwen35Transformer(Llama2_):
+    def __init__(self, config, device=None, dtype=None, ops=None):
+        nn.Module.__init__(self)
+        self.config = config
+        self.vocab_size = config.vocab_size
+        self.normalize_in = False
+
+        self.embed_tokens = ops.Embedding(config.vocab_size, config.hidden_size, device=device, dtype=dtype)
+        self.layers = nn.ModuleList([
+            Qwen35TransformerBlock(config, index=i, device=device, dtype=dtype, ops=ops)
+            for i in range(config.num_hidden_layers)
+        ])
+
+        if config.final_norm:
+            self.norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps, add=config.rms_norm_add, device=device, dtype=dtype)
+        else:
+            self.norm = None
+
+        if config.lm_head:
+            self.lm_head = ops.Linear(config.hidden_size, config.vocab_size, bias=False, device=device, dtype=dtype)
+
+    def get_past_len(self, past_key_values):
+        for i, layer in enumerate(self.layers):
+            if layer.layer_type == "full_attention":
+                if len(past_key_values) > i:
+                    return past_key_values[i][2]
+                break
+        return 0
+
+    def compute_freqs_cis(self, position_ids, device):
+        rotary_dim = int(self.config.head_dim * self.config.partial_rotary_factor)
+        return precompute_partial_rope(
+            self.config.head_dim, rotary_dim, position_ids,
+            self.config.rope_theta, device=device,
+            mrope_section=self.config.mrope_section,
+        )
+
+
+# Vision Encoder
+class Qwen35VisionPatchEmbed(nn.Module):
+    def __init__(self, config, device=None, dtype=None, ops=None):
+        super().__init__()
+        self.patch_size = config["patch_size"]
+        self.temporal_patch_size = config["temporal_patch_size"]
+        self.in_channels = config["in_channels"]
+        self.embed_dim = config["hidden_size"]
+        kernel_size = [self.temporal_patch_size, self.patch_size, self.patch_size]
+        self.proj = ops.Conv3d(self.in_channels, self.embed_dim, kernel_size=kernel_size, stride=kernel_size, bias=True, device=device, dtype=dtype)
+
+    def forward(self, x):
+        target_dtype = self.proj.weight.dtype
+        x = x.view(-1, self.in_channels, self.temporal_patch_size, self.patch_size, self.patch_size)
+        return self.proj(x.to(target_dtype)).view(-1, self.embed_dim)
+
+
+class Qwen35VisionMLP(nn.Module):
+    def __init__(self, hidden_size, intermediate_size, device=None, dtype=None, ops=None):
+        super().__init__()
+
+        self.linear_fc1 = ops.Linear(hidden_size, intermediate_size, bias=True, device=device, dtype=dtype)
+        self.linear_fc2 = ops.Linear(intermediate_size, hidden_size, bias=True, device=device, dtype=dtype)
+
+    def forward(self, hidden_state):
+        return self.linear_fc2(F.gelu(self.linear_fc1(hidden_state), approximate="tanh"))
+
+
+class Qwen35VisionRotaryEmbedding(nn.Module):
+    def __init__(self, dim, theta=10000.0):
+        super().__init__()
+        self.dim = dim
+        inv_freq = 1.0 / (theta ** (torch.arange(0, dim, 2, dtype=torch.float) / dim))
+        self.register_buffer("inv_freq", inv_freq, persistent=False)
+
+    def forward(self, seqlen):
+        seq = torch.arange(seqlen, device=self.inv_freq.device, dtype=self.inv_freq.dtype)
+        freqs = torch.outer(seq, self.inv_freq)
+        return freqs
+
+
+class Qwen35VisionAttention(nn.Module):
+    def __init__(self, hidden_size, num_heads, device=None, dtype=None, ops=None):
+        super().__init__()
+
+        self.dim = hidden_size
+        self.num_heads = num_heads
+        self.head_dim = self.dim // self.num_heads
+        self.qkv = ops.Linear(self.dim, self.dim * 3, bias=True, device=device, dtype=dtype)
+        self.proj = ops.Linear(self.dim, self.dim, device=device, dtype=dtype)
+
+    def forward(self, x, cu_seqlens, position_embeddings, optimized_attention=None):
+        seq_length = x.shape[0]
+        query_states, key_states, value_states = (
+            self.qkv(x).reshape(seq_length, 3, self.num_heads, -1).permute(1, 0, 2, 3).unbind(0)
+        )
+        query_states, key_states = apply_rope(query_states, key_states, position_embeddings)
+
+        # Process per-sequence attention
+        lengths = (cu_seqlens[1:] - cu_seqlens[:-1]).tolist()
+        q_splits = torch.split(query_states, lengths, dim=0)
+        k_splits = torch.split(key_states, lengths, dim=0)
+        v_splits = torch.split(value_states, lengths, dim=0)
+
+        attn_outputs = []
+        for q, k, v in zip(q_splits, k_splits, v_splits):
+            q = q.transpose(0, 1).unsqueeze(0)
+            k = k.transpose(0, 1).unsqueeze(0)
+            v = v.transpose(0, 1).unsqueeze(0)
+            attn_outputs.append(optimized_attention(q, k, v, self.num_heads, skip_reshape=True))
+
+        attn_output = torch.cat(attn_outputs, dim=1)
+        attn_output = attn_output.reshape(seq_length, -1)
+        return self.proj(attn_output)
+
+
+class Qwen35VisionBlock(nn.Module):
+    def __init__(self, hidden_size, num_heads, intermediate_size, device=None, dtype=None, ops=None):
+        super().__init__()
+
+        self.norm1 = ops.LayerNorm(hidden_size, eps=1e-6, device=device, dtype=dtype)
+        self.norm2 = ops.LayerNorm(hidden_size, eps=1e-6, device=device, dtype=dtype)
+        self.attn = Qwen35VisionAttention(hidden_size, num_heads, device=device, dtype=dtype, ops=ops)
+        self.mlp = Qwen35VisionMLP(hidden_size, intermediate_size, device=device, dtype=dtype, ops=ops)
+
+    def forward(self, x, cu_seqlens, position_embeddings, optimized_attention=None):
+        x = x + self.attn(self.norm1(x), cu_seqlens=cu_seqlens, position_embeddings=position_embeddings, optimized_attention=optimized_attention)
+        return x + self.mlp(self.norm2(x))
+
+
+class Qwen35VisionPatchMerger(nn.Module):
+    def __init__(self, hidden_size, spatial_merge_size, out_hidden_size, device=None, dtype=None, ops=None):
+        super().__init__()
+
+        merge_dim = hidden_size * (spatial_merge_size ** 2)
+        self.norm = ops.LayerNorm(hidden_size, eps=1e-6, device=device, dtype=dtype)
+        self.linear_fc1 = ops.Linear(merge_dim, merge_dim, device=device, dtype=dtype)
+        self.linear_fc2 = ops.Linear(merge_dim, out_hidden_size, device=device, dtype=dtype)
+        self.merge_dim = merge_dim
+
+    def forward(self, x):
+        x = self.norm(x).view(-1, self.merge_dim)
+        return self.linear_fc2(F.gelu(self.linear_fc1(x)))
+
+
+class Qwen35VisionModel(nn.Module):
+    def __init__(self, config, device=None, dtype=None, ops=None):
+        super().__init__()
+        self.spatial_merge_size = config["spatial_merge_size"]
+        self.patch_size = config["patch_size"]
+        self.spatial_merge_unit = self.spatial_merge_size * self.spatial_merge_size
+
+        self.hidden_size = config["hidden_size"]
+        self.num_heads = config["num_heads"]
+        self.num_position_embeddings = config["num_position_embeddings"]
+
+        self.patch_embed = Qwen35VisionPatchEmbed(config, device=device, dtype=dtype, ops=ops)
+        self.pos_embed = ops.Embedding(self.num_position_embeddings, self.hidden_size, device=device, dtype=dtype)
+        self.num_grid_per_side = int(self.num_position_embeddings ** 0.5)
+        self.rotary_pos_emb = Qwen35VisionRotaryEmbedding(self.hidden_size // self.num_heads // 2)
+        self.blocks = nn.ModuleList([
+            Qwen35VisionBlock(self.hidden_size, self.num_heads, config["intermediate_size"], device=device, dtype=dtype, ops=ops)
+            for _ in range(config["depth"])
+        ])
+        self.merger = Qwen35VisionPatchMerger(self.hidden_size, self.spatial_merge_size, config["out_hidden_size"], device=device, dtype=dtype, ops=ops)
+
+    def rot_pos_emb(self, grid_thw):
+        merge_size = self.spatial_merge_size
+        grid_thw_list = grid_thw.tolist()
+        max_hw = max(max(h, w) for _, h, w in grid_thw_list)
+        freq_table = self.rotary_pos_emb(max_hw)
+        device = freq_table.device
+        total_tokens = sum(int(t * h * w) for t, h, w in grid_thw_list)
+        pos_ids = torch.empty((total_tokens, 2), dtype=torch.long, device=device)
+        offset = 0
+        for num_frames, height, width in grid_thw_list:
+            num_frames, height, width = int(num_frames), int(height), int(width)
+            merged_h, merged_w = height // merge_size, width // merge_size
+            block_rows = torch.arange(merged_h, device=device)
+            block_cols = torch.arange(merged_w, device=device)
+            intra_row = torch.arange(merge_size, device=device)
+            intra_col = torch.arange(merge_size, device=device)
+            row_idx = block_rows[:, None, None, None] * merge_size + intra_row[None, None, :, None]
+            col_idx = block_cols[None, :, None, None] * merge_size + intra_col[None, None, None, :]
+            row_idx = row_idx.expand(merged_h, merged_w, merge_size, merge_size).reshape(-1)
+            col_idx = col_idx.expand(merged_h, merged_w, merge_size, merge_size).reshape(-1)
+            coords = torch.stack((row_idx, col_idx), dim=-1)
+            if num_frames > 1:
+                coords = coords.repeat(num_frames, 1)
+            num_tokens = coords.shape[0]
+            pos_ids[offset:offset + num_tokens] = coords
+            offset += num_tokens
+        embeddings = freq_table[pos_ids]
+        embeddings = embeddings.flatten(1)
+        return embeddings
+
+    def fast_pos_embed_interpolate(self, grid_thw):
+        grid_thw_list = grid_thw.tolist()
+        grid_ts = [int(row[0]) for row in grid_thw_list]
+        grid_hs = [int(row[1]) for row in grid_thw_list]
+        grid_ws = [int(row[2]) for row in grid_thw_list]
+        device = self.pos_embed.weight.device
+        idx_list = [[] for _ in range(4)]
+        weight_list = [[] for _ in range(4)]
+        for t, h, w in grid_thw_list:
+            h, w = int(h), int(w)
+            h_idxs = torch.linspace(0, self.num_grid_per_side - 1, h, device=device)
+            w_idxs = torch.linspace(0, self.num_grid_per_side - 1, w, device=device)
+            h_idxs_floor = h_idxs.int()
+            w_idxs_floor = w_idxs.int()
+            h_idxs_ceil = (h_idxs.int() + 1).clip(max=self.num_grid_per_side - 1)
+            w_idxs_ceil = (w_idxs.int() + 1).clip(max=self.num_grid_per_side - 1)
+            dh = h_idxs - h_idxs_floor
+            dw = w_idxs - w_idxs_floor
+            base_h = h_idxs_floor * self.num_grid_per_side
+            base_h_ceil = h_idxs_ceil * self.num_grid_per_side
+            indices = [
+                (base_h[None].T + w_idxs_floor[None]).flatten(),
+                (base_h[None].T + w_idxs_ceil[None]).flatten(),
+                (base_h_ceil[None].T + w_idxs_floor[None]).flatten(),
+                (base_h_ceil[None].T + w_idxs_ceil[None]).flatten(),
+            ]
+            weights = [
+                ((1 - dh)[None].T * (1 - dw)[None]).flatten(),
+                ((1 - dh)[None].T * dw[None]).flatten(),
+                (dh[None].T * (1 - dw)[None]).flatten(),
+                (dh[None].T * dw[None]).flatten(),
+            ]
+            for j in range(4):
+                idx_list[j].extend(indices[j].tolist())
+                weight_list[j].extend(weights[j].tolist())
+        idx_tensor = torch.tensor(idx_list, dtype=torch.long, device=device)
+        weight_tensor = torch.tensor(weight_list, dtype=self.pos_embed.weight.dtype, device=device)
+        pos_embeds = self.pos_embed(idx_tensor).to(device) * weight_tensor[:, :, None]
+        patch_pos_embeds = pos_embeds[0] + pos_embeds[1] + pos_embeds[2] + pos_embeds[3]
+        patch_pos_embeds = patch_pos_embeds.split([h * w for h, w in zip(grid_hs, grid_ws)])
+        patch_pos_embeds_permute = []
+        merge_size = self.spatial_merge_size
+        for pos_embed, t, h, w in zip(patch_pos_embeds, grid_ts, grid_hs, grid_ws):
+            pos_embed = pos_embed.repeat(t, 1)
+            pos_embed = (
+                pos_embed.view(t, h // merge_size, merge_size, w // merge_size, merge_size, -1)
+                .permute(0, 1, 3, 2, 4, 5)
+                .flatten(0, 4)
+            )
+            patch_pos_embeds_permute.append(pos_embed)
+        return torch.cat(patch_pos_embeds_permute)
+
+    def forward(self, x, grid_thw):
+        x = self.patch_embed(x)
+        pos_embeds = self.fast_pos_embed_interpolate(grid_thw).to(x.device)
+        x = x + pos_embeds
+        rotary_pos_emb = self.rot_pos_emb(grid_thw)
+        seq_len = x.shape[0]
+        x = x.reshape(seq_len, -1)
+        rotary_pos_emb = rotary_pos_emb.reshape(seq_len, -1)
+        emb = torch.cat((rotary_pos_emb, rotary_pos_emb), dim=-1)
+        cos = emb.cos().unsqueeze(-2)
+        sin = emb.sin().unsqueeze(-2)
+        sin_half = sin.shape[-1] // 2
+        position_embeddings = (cos, sin[..., :sin_half], -sin[..., sin_half:])
+        cu_seqlens = torch.repeat_interleave(
+            grid_thw[:, 1] * grid_thw[:, 2], grid_thw[:, 0]
+        ).cumsum(dim=0, dtype=torch.int32)
+        cu_seqlens = F.pad(cu_seqlens, (1, 0), value=0)
+        optimized_attention = optimized_attention_for_device(x.device, mask=False, small_input=True)
+        for blk in self.blocks:
+            x = blk(x, cu_seqlens=cu_seqlens, position_embeddings=position_embeddings, optimized_attention=optimized_attention)
+        merged = self.merger(x)
+        return merged
+
+# Model Wrapper
+class Qwen35(BaseLlama, BaseGenerate, torch.nn.Module):
+    model_type = "qwen35_2b"
+
+    def __init__(self, config_dict, dtype, device, operations):
+        super().__init__()
+        config = _make_config(self.model_type, config_dict)
+        self.num_layers = config.num_hidden_layers
+        self.model = Qwen35Transformer(config, device=device, dtype=dtype, ops=operations)
+        vision_overrides = QWEN35_MODELS.get(self.model_type, {}).get("vision", {})
+        vision_config = {**QWEN35_VISION_DEFAULTS, **vision_overrides, "out_hidden_size": config.hidden_size}
+        self.visual = Qwen35VisionModel(vision_config, device=device, dtype=dtype, ops=operations)
+        self.dtype = dtype
+
+    def preprocess_embed(self, embed, device):
+        if embed["type"] == "image":
+            image, grid = comfy.text_encoders.qwen_vl.process_qwen2vl_images(embed["data"], patch_size=16)
+            return self.visual(image.to(device, dtype=torch.float32), grid), grid
+        return None, None
+
+    def forward(self, x, attention_mask=None, embeds=None, num_tokens=None, intermediate_output=None, final_layer_norm_intermediate=True, dtype=None, embeds_info=[], past_key_values=None):
+        grid = None
+        position_ids = None
+        offset = 0
+        for e in embeds_info:
+            if e.get("type") == "image":
+                grid = e.get("extra", None)
+                start = e.get("index")
+                if position_ids is None:
+                    position_ids = torch.zeros((3, embeds.shape[1]), device=embeds.device)
+                    position_ids[:, :start] = torch.arange(0, start, device=embeds.device)
+                end = e.get("size") + start
+                len_max = int(grid.max()) // 2
+                start_next = len_max + start
+                position_ids[:, end:] = torch.arange(start_next + offset, start_next + (embeds.shape[1] - end) + offset, device=embeds.device)
+                position_ids[0, start:end] = start + offset
+                max_d = int(grid[0][1]) // 2
+                position_ids[1, start:end] = torch.arange(start + offset, start + max_d + offset, device=embeds.device).unsqueeze(1).repeat(1, math.ceil((end - start) / max_d)).flatten(0)[:end - start]
+                max_d = int(grid[0][2]) // 2
+                position_ids[2, start:end] = torch.arange(start + offset, start + max_d + offset, device=embeds.device).unsqueeze(0).repeat(math.ceil((end - start) / max_d), 1).flatten(0)[:end - start]
+                offset += len_max - (end - start)
+
+        if grid is None:
+            position_ids = None
+
+        return super().forward(x, attention_mask=attention_mask, embeds=embeds, num_tokens=num_tokens, intermediate_output=intermediate_output, final_layer_norm_intermediate=final_layer_norm_intermediate, dtype=dtype, position_ids=position_ids, past_key_values=past_key_values)
+
+    def init_kv_cache(self, batch, max_cache_len, device, execution_dtype):
+        model_config = self.model.config
+        past_key_values = []
+        for i in range(model_config.num_hidden_layers):
+            if model_config.layer_types[i] == "linear_attention":
+                recurrent_state = torch.zeros(
+                    [batch, model_config.linear_num_value_heads, model_config.linear_key_head_dim, model_config.linear_value_head_dim],
+                    device=device, dtype=torch.float32
+                )
+                conv_dim = model_config.linear_num_key_heads * model_config.linear_key_head_dim * 2 + model_config.linear_num_value_heads * model_config.linear_value_head_dim
+                conv_state = torch.zeros(
+                    [batch, conv_dim, model_config.conv_kernel_size - 1],
+                    device=device, dtype=execution_dtype
+                )
+                past_key_values.append((recurrent_state, conv_state, 0))
+            else:
+                past_key_values.append((
+                    torch.empty([batch, model_config.num_key_value_heads, max_cache_len, model_config.head_dim], device=device, dtype=execution_dtype),
+                    torch.empty([batch, model_config.num_key_value_heads, max_cache_len, model_config.head_dim], device=device, dtype=execution_dtype),
+                    0
+                ))
+        return past_key_values
+
+# Tokenizer and Text Encoder Wrappers
+
+class Qwen35Tokenizer(sd1_clip.SDTokenizer):
+    def __init__(self, embedding_directory=None, tokenizer_data={}, embedding_size=2048, embedding_key="qwen35_2b"):
+        from transformers import Qwen2Tokenizer
+        tokenizer_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "qwen35_tokenizer")
+        super().__init__(tokenizer_path, pad_with_end=False, embedding_directory=embedding_directory, embedding_size=embedding_size, embedding_key=embedding_key, tokenizer_class=Qwen2Tokenizer,
+            has_start_token=False, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, pad_token=248044, tokenizer_data=tokenizer_data)
+
+
+class Qwen35ImageTokenizer(sd1_clip.SD1Tokenizer):
+    def __init__(self, embedding_directory=None, tokenizer_data={}, model_type="qwen35_2b"):
+        embedding_size = QWEN35_MODELS.get(model_type, {}).get("hidden_size", 2048)
+        tokenizer = lambda *a, **kw: Qwen35Tokenizer(*a, **kw, embedding_size=embedding_size, embedding_key=model_type)
+        super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, name=model_type, tokenizer=tokenizer)
+        self.llama_template = "<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n"
+        self.llama_template_images = "<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|>{}<|im_end|>\n<|im_start|>assistant\n"
+
+    def tokenize_with_weights(self, text, return_word_ids=False, llama_template=None, images=[], prevent_empty_text=False, thinking=False, **kwargs):
+        image = kwargs.get("image", None)
+        if image is not None and len(images) == 0:
+            images = [image]
+
+        skip_template = False
+        if text.startswith('<|im_start|>'):
+            skip_template = True
+        if prevent_empty_text and text == '':
+            text = ' '
+
+        if skip_template:
+            llama_text = text
+        else:
+            if llama_template is None:
+                if len(images) > 0:
+                    llama_text = self.llama_template_images.format(text)
+                else:
+                    llama_text = self.llama_template.format(text)
+            else:
+                llama_text = llama_template.format(text)
+            if not thinking:
+                llama_text += "<think>\n</think>\n"
+
+        tokens = super().tokenize_with_weights(llama_text, return_word_ids=return_word_ids, disable_weights=True, **kwargs)
+        key_name = next(iter(tokens))
+        embed_count = 0
+        qwen_tokens = tokens[key_name]
+        for r in qwen_tokens:
+            for i in range(len(r)):
+                if r[i][0] == 248056:  # <|image_pad|>
+                    if len(images) > embed_count:
+                        r[i] = ({"type": "image", "data": images[embed_count], "original_type": "image"},) + r[i][1:]
+                        embed_count += 1
+        return tokens
+
+
+class Qwen35ClipModel(sd1_clip.SDClipModel):
+    def __init__(self, device="cpu", layer="hidden", layer_idx=-2, dtype=None, attention_mask=True, model_options={}, model_type="qwen35_2b"):
+        class Qwen35_(Qwen35):
+            pass
+        Qwen35_.model_type = model_type
+
+        super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={},
+            dtype=dtype, special_tokens={"pad": 248044}, layer_norm_hidden_state=False,
+            model_class=Qwen35_, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options)
+
+
+class Qwen35TEModel(sd1_clip.SD1ClipModel):
+    def __init__(self, device="cpu", dtype=None, model_options={}, model_type="qwen35_2b"):
+        clip_model = lambda **kw: Qwen35ClipModel(**kw, model_type=model_type)
+        super().__init__(device=device, dtype=dtype, name=model_type, clip_model=clip_model, model_options=model_options)
+
+
+def tokenizer(model_type="qwen35_2b"):
+    class Qwen35ImageTokenizer_(Qwen35ImageTokenizer):
+        def __init__(self, embedding_directory=None, tokenizer_data={}):
+            super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, model_type=model_type)
+    return Qwen35ImageTokenizer_
+
+
+def te(dtype_llama=None, llama_quantization_metadata=None, model_type="qwen35_2b"):
+    class Qwen35TEModel_(Qwen35TEModel):
+        def __init__(self, device="cpu", dtype=None, model_options={}):
+            if dtype_llama is not None:
+                dtype = dtype_llama
+            if llama_quantization_metadata is not None:
+                model_options = model_options.copy()
+                model_options["quantization_metadata"] = llama_quantization_metadata
+            super().__init__(device=device, dtype=dtype, model_options=model_options, model_type=model_type)
+    return Qwen35TEModel_
--- a/comfy/text_encoders/qwen35_tokenizer/merges.txt
+++ b/comfy/text_encoders/qwen35_tokenizer/merges.txt
--- a/comfy/text_encoders/qwen35_tokenizer/tokenizer_config.json
+++ b/comfy/text_encoders/qwen35_tokenizer/tokenizer_config.json
--- a/comfy/text_encoders/qwen35_tokenizer/vocab.json
+++ b/comfy/text_encoders/qwen35_tokenizer/vocab.json
--- a/comfy/text_encoders/qwen_vl.py
+++ b/comfy/text_encoders/qwen_vl.py
@@ -425,4 +425,7 @@ class Qwen2VLVisionTransformer(nn.Module):
            hidden_states = block(hidden_states, position_embeddings, cu_seqlens_now, optimized_attention=optimized_attention)

        hidden_states = self.merger(hidden_states)
+        # Potentially important for spatially precise edits. This is present in the HF implementation.
+        reverse_indices = torch.argsort(window_index)
+        hidden_states = hidden_states[reverse_indices, :]
        return hidden_states
--- a/comfy/utils.py
+++ b/comfy/utils.py
@@ -20,6 +20,8 @@
 import torch
 import math
 import struct
+import ctypes
+import os
 import comfy.memory_management
 import safetensors.torch
 import numpy as np
@@ -32,7 +34,7 @@ from einops import rearrange
 from comfy.cli_args import args
 import json
 import time
-import mmap
+import threading
 import warnings

 MMAP_TORCH_FILES = args.mmap_torch_files
@@ -81,14 +83,17 @@ _TYPES = {
 }

 def load_safetensors(ckpt):
-    f = open(ckpt, "rb")
-    mapping = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
-    mv = memoryview(mapping)
+    import comfy_aimdo.model_mmap

-    header_size = struct.unpack("<Q", mapping[:8])[0]
-    header = json.loads(mapping[8:8+header_size].decode("utf-8"))
+    f = open(ckpt, "rb", buffering=0)
+    model_mmap = comfy_aimdo.model_mmap.ModelMMAP(ckpt)
+    file_size = os.path.getsize(ckpt)
+    mv = memoryview((ctypes.c_uint8 * file_size).from_address(model_mmap.get()))

-    mv = mv[8 + header_size:]
+    header_size = struct.unpack("<Q", mv[:8])[0]
+    header = json.loads(mv[8:8 + header_size].tobytes().decode("utf-8"))
+
+    mv = mv[(data_base_offset := 8 + header_size):]

    sd = {}
    for name, info in header.items():
@@ -102,7 +107,14 @@ def load_safetensors(ckpt):
            with warnings.catch_warnings():
                #We are working with read-only RAM by design
                warnings.filterwarnings("ignore", message="The given buffer is not writable")
-                sd[name] = torch.frombuffer(mv[start:end], dtype=_TYPES[info["dtype"]]).view(info["shape"])
+                tensor = torch.frombuffer(mv[start:end], dtype=_TYPES[info["dtype"]]).view(info["shape"])
+                storage = tensor.untyped_storage()
+                setattr(storage,
+                        "_comfy_tensor_file_slice",
+                        comfy.memory_management.TensorFileSlice(f, threading.get_ident(), data_base_offset + start, end - start))
+                setattr(storage, "_comfy_tensor_mmap_refs", (model_mmap, mv))
+                setattr(storage, "_comfy_tensor_mmap_touched", False)
+                sd[name] = tensor

    return sd, header.get("__metadata__", {}),

@@ -885,6 +897,10 @@ def set_attr(obj, attr, value):
    return prev

 def set_attr_param(obj, attr, value):
+    # Clone inference tensors (created under torch.inference_mode) since
+    # their version counter is frozen and nn.Parameter() cannot wrap them.
+    if (not torch.is_inference_mode_enabled()) and value.is_inference():
+        value = value.clone()
    return set_attr(obj, attr, torch.nn.Parameter(value, requires_grad=False))

 def set_attr_buffer(obj, attr, value):
@@ -1119,8 +1135,8 @@ def tiled_scale_multidim(samples, function, tile=(64, 64), overlap=8, upscale_am
                pbar.update(1)
            continue

-        out = torch.zeros([s.shape[0], out_channels] + mult_list_upscale(s.shape[2:]), device=output_device)
-        out_div = torch.zeros([s.shape[0], out_channels] + mult_list_upscale(s.shape[2:]), device=output_device)
+        out = output[b:b+1].zero_()
+        out_div = torch.zeros([s.shape[0], 1] + mult_list_upscale(s.shape[2:]), device=output_device)

        positions = [range(0, s.shape[d+2] - overlap[d], tile[d] - overlap[d]) if s.shape[d+2] > tile[d] else [0] for d in range(dims)]

@@ -1135,7 +1151,7 @@ def tiled_scale_multidim(samples, function, tile=(64, 64), overlap=8, upscale_am
                upscaled.append(round(get_pos(d, pos)))

            ps = function(s_in).to(output_device)
-            mask = torch.ones_like(ps)
+            mask = torch.ones([1, 1] + list(ps.shape[2:]), device=output_device)

            for d in range(2, dims + 2):
                feather = round(get_scale(d - 2, overlap[d - 2]))
@@ -1158,7 +1174,7 @@ def tiled_scale_multidim(samples, function, tile=(64, 64), overlap=8, upscale_am
            if pbar is not None:
                pbar.update(1)

-        output[b:b+1] = out/out_div
+        out.div_(out_div)
    return output

 def tiled_scale(samples, function, tile_x=64, tile_y=64, overlap = 8, upscale_amount = 4, out_channels = 3, output_device="cpu", pbar = None):
--- a/comfy_api/input/init.py
+++ b/comfy_api/input/init.py
@@ -5,6 +5,10 @@ from comfy_api.latest._input import (
    MaskInput,
    LatentInput,
    VideoInput,
+    CurvePoint,
+    CurveInput,
+    MonotoneCubicCurve,
+    LinearCurve,
 )

 __all__ = [
@@ -13,4 +17,8 @@ __all__ = [
    "MaskInput",
    "LatentInput",
    "VideoInput",
+    "CurvePoint",
+    "CurveInput",
+    "MonotoneCubicCurve",
+    "LinearCurve",
 ]
--- a/comfy_api/latest/init.py
+++ b/comfy_api/latest/init.py
@@ -25,6 +25,7 @@ class ComfyAPI_latest(ComfyAPIBase):
        super().__init__()
        self.node_replacement = self.NodeReplacement()
        self.execution = self.Execution()
+        self.caching = self.Caching()

    class NodeReplacement(ProxiedSingleton):
        async def register(self, node_replace: io.NodeReplace) -> None:
@@ -84,6 +85,36 @@ class ComfyAPI_latest(ComfyAPIBase):
                image=to_display,
            )

+    class Caching(ProxiedSingleton):
+        """
+        External cache provider API for sharing cached node outputs
+        across ComfyUI instances.
+
+        Example::
+
+            from comfy_api.latest import Caching
+
+            class MyCacheProvider(Caching.CacheProvider):
+                async def on_lookup(self, context):
+                    ...  # check external storage
+
+                async def on_store(self, context, value):
+                    ...  # store to external storage
+
+            Caching.register_provider(MyCacheProvider())
+        """
+        from ._caching import CacheProvider, CacheContext, CacheValue
+
+        async def register_provider(self, provider: "ComfyAPI_latest.Caching.CacheProvider") -> None:
+            """Register an external cache provider. Providers are called in registration order."""
+            from comfy_execution.cache_provider import register_cache_provider
+            register_cache_provider(provider)
+
+        async def unregister_provider(self, provider: "ComfyAPI_latest.Caching.CacheProvider") -> None:
+            """Unregister a previously registered cache provider."""
+            from comfy_execution.cache_provider import unregister_cache_provider
+            unregister_cache_provider(provider)
+
 class ComfyExtension(ABC):
    async def on_load(self) -> None:
        """
@@ -116,6 +147,9 @@ class Types:
    VOXEL = VOXEL
    File3D = File3D

+
+Caching = ComfyAPI_latest.Caching
+
 ComfyAPI = ComfyAPI_latest

 # Create a synchronous version of the API
@@ -135,6 +169,7 @@ __all__ = [
    "Input",
    "InputImpl",
    "Types",
+    "Caching",
    "ComfyExtension",
    "io",
    "IO",
--- a/comfy_api/latest/_caching.py
+++ b/comfy_api/latest/_caching.py
@@ -0,0 +1,42 @@
+from abc import ABC, abstractmethod
+from typing import Optional
+from dataclasses import dataclass
+
+
+@dataclass
+class CacheContext:
+    node_id: str
+    class_type: str
+    cache_key_hash: str  # SHA256 hex digest
+
+
+@dataclass
+class CacheValue:
+    outputs: list
+    ui: dict = None
+
+
+class CacheProvider(ABC):
+    """Abstract base class for external cache providers.
+    Exceptions from provider methods are caught by the caller and never break execution.
+    """
+
+    @abstractmethod
+    async def on_lookup(self, context: CacheContext) -> Optional[CacheValue]:
+        """Called on local cache miss. Return CacheValue if found, None otherwise."""
+        pass
+
+    @abstractmethod
+    async def on_store(self, context: CacheContext, value: CacheValue) -> None:
+        """Called after local store. Dispatched via asyncio.create_task."""
+        pass
+
+    def should_cache(self, context: CacheContext, value: Optional[CacheValue] = None) -> bool:
+        """Return False to skip external caching for this node. Default: True."""
+        return True
+
+    def on_prompt_start(self, prompt_id: str) -> None:
+        pass
+
+    def on_prompt_end(self, prompt_id: str) -> None:
+        pass
--- a/comfy_api/latest/_input/init.py
+++ b/comfy_api/latest/_input/init.py
@@ -1,4 +1,5 @@
 from .basic_types import ImageInput, AudioInput, MaskInput, LatentInput
+from .curve_types import CurvePoint, CurveInput, MonotoneCubicCurve, LinearCurve
 from .video_types import VideoInput

 __all__ = [
@@ -7,4 +8,8 @@ __all__ = [
    "VideoInput",
    "MaskInput",
    "LatentInput",
+    "CurvePoint",
+    "CurveInput",
+    "MonotoneCubicCurve",
+    "LinearCurve",
 ]
--- a/comfy_api/latest/_input/curve_types.py
+++ b/comfy_api/latest/_input/curve_types.py
@@ -0,0 +1,219 @@
+from __future__ import annotations
+
+import logging
+import math
+from abc import ABC, abstractmethod
+import numpy as np
+
+logger = logging.getLogger(__name__)
+
+
+CurvePoint = tuple[float, float]
+
+
+class CurveInput(ABC):
+    """Abstract base class for curve inputs.
+
+    Subclasses represent different curve representations (control-point
+    interpolation, analytical functions, LUT-based, etc.) while exposing a
+    uniform evaluation interface to downstream nodes.
+    """
+
+    @property
+    @abstractmethod
+    def points(self) -> list[CurvePoint]:
+        """The control points that define this curve."""
+
+    @abstractmethod
+    def interp(self, x: float) -> float:
+        """Evaluate the curve at a single *x* value in [0, 1]."""
+
+    def interp_array(self, xs: np.ndarray) -> np.ndarray:
+        """Vectorised evaluation over a numpy array of x values.
+
+        Subclasses should override this for better performance. The default
+        falls back to scalar ``interp`` calls.
+        """
+        return np.fromiter((self.interp(float(x)) for x in xs), dtype=np.float64, count=len(xs))
+
+    def to_lut(self, size: int = 256) -> np.ndarray:
+        """Generate a float64 lookup table of *size* evenly-spaced samples in [0, 1]."""
+        return self.interp_array(np.linspace(0.0, 1.0, size))
+
+    @staticmethod
+    def from_raw(data) -> CurveInput:
+        """Convert raw curve data (dict or point list) to a CurveInput instance.
+
+        Accepts:
+        - A ``CurveInput`` instance (returned as-is).
+        - A dict with ``"points"`` and optional ``"interpolation"`` keys.
+        - A bare list/sequence of ``(x, y)`` pairs (defaults to monotone cubic).
+        """
+        if isinstance(data, CurveInput):
+            return data
+        if isinstance(data, dict):
+            raw_points = data["points"]
+            interpolation = data.get("interpolation", "monotone_cubic")
+        else:
+            raw_points = data
+            interpolation = "monotone_cubic"
+        points = [(float(x), float(y)) for x, y in raw_points]
+        if interpolation == "linear":
+            return LinearCurve(points)
+        if interpolation != "monotone_cubic":
+            logger.warning("Unknown curve interpolation %r, falling back to monotone_cubic", interpolation)
+        return MonotoneCubicCurve(points)
+
+
+class MonotoneCubicCurve(CurveInput):
+    """Monotone cubic Hermite interpolation over control points.
+
+    Mirrors the frontend ``createMonotoneInterpolator`` in
+    ``ComfyUI_frontend/src/components/curve/curveUtils.ts`` so that
+    backend evaluation matches the editor preview exactly.
+
+    All heavy work (sorting, slope computation) happens once at construction.
+    ``interp_array`` is fully vectorised with numpy.
+    """
+
+    def __init__(self, control_points: list[CurvePoint]):
+        sorted_pts = sorted(control_points, key=lambda p: p[0])
+        self._points = [(float(x), float(y)) for x, y in sorted_pts]
+        self._xs = np.array([p[0] for p in self._points], dtype=np.float64)
+        self._ys = np.array([p[1] for p in self._points], dtype=np.float64)
+        self._slopes = self._compute_slopes()
+
+    @property
+    def points(self) -> list[CurvePoint]:
+        return list(self._points)
+
+    def _compute_slopes(self) -> np.ndarray:
+        xs, ys = self._xs, self._ys
+        n = len(xs)
+        if n < 2:
+            return np.zeros(n, dtype=np.float64)
+
+        dx = np.diff(xs)
+        dy = np.diff(ys)
+        dx_safe = np.where(dx == 0, 1.0, dx)
+        deltas = np.where(dx == 0, 0.0, dy / dx_safe)
+
+        slopes = np.empty(n, dtype=np.float64)
+        slopes[0] = deltas[0]
+        slopes[-1] = deltas[-1]
+        for i in range(1, n - 1):
+            if deltas[i - 1] * deltas[i] <= 0:
+                slopes[i] = 0.0
+            else:
+                slopes[i] = (deltas[i - 1] + deltas[i]) / 2
+
+        for i in range(n - 1):
+            if deltas[i] == 0:
+                slopes[i] = 0.0
+                slopes[i + 1] = 0.0
+            else:
+                alpha = slopes[i] / deltas[i]
+                beta = slopes[i + 1] / deltas[i]
+                s = alpha * alpha + beta * beta
+                if s > 9:
+                    t = 3 / math.sqrt(s)
+                    slopes[i] = t * alpha * deltas[i]
+                    slopes[i + 1] = t * beta * deltas[i]
+        return slopes
+
+    def interp(self, x: float) -> float:
+        xs, ys, slopes = self._xs, self._ys, self._slopes
+        n = len(xs)
+        if n == 0:
+            return 0.0
+        if n == 1:
+            return float(ys[0])
+        if x <= xs[0]:
+            return float(ys[0])
+        if x >= xs[-1]:
+            return float(ys[-1])
+
+        hi = int(np.searchsorted(xs, x, side='right'))
+        hi = min(hi, n - 1)
+        lo = hi - 1
+
+        dx = xs[hi] - xs[lo]
+        if dx == 0:
+            return float(ys[lo])
+
+        t = (x - xs[lo]) / dx
+        t2 = t * t
+        t3 = t2 * t
+        h00 = 2 * t3 - 3 * t2 + 1
+        h10 = t3 - 2 * t2 + t
+        h01 = -2 * t3 + 3 * t2
+        h11 = t3 - t2
+        return float(h00 * ys[lo] + h10 * dx * slopes[lo] + h01 * ys[hi] + h11 * dx * slopes[hi])
+
+    def interp_array(self, xs_in: np.ndarray) -> np.ndarray:
+        """Fully vectorised evaluation using numpy."""
+        xs, ys, slopes = self._xs, self._ys, self._slopes
+        n = len(xs)
+        if n == 0:
+            return np.zeros_like(xs_in, dtype=np.float64)
+        if n == 1:
+            return np.full_like(xs_in, ys[0], dtype=np.float64)
+
+        hi = np.searchsorted(xs, xs_in, side='right').clip(1, n - 1)
+        lo = hi - 1
+
+        dx = xs[hi] - xs[lo]
+        dx_safe = np.where(dx == 0, 1.0, dx)
+        t = np.where(dx == 0, 0.0, (xs_in - xs[lo]) / dx_safe)
+        t2 = t * t
+        t3 = t2 * t
+
+        h00 = 2 * t3 - 3 * t2 + 1
+        h10 = t3 - 2 * t2 + t
+        h01 = -2 * t3 + 3 * t2
+        h11 = t3 - t2
+
+        result = h00 * ys[lo] + h10 * dx * slopes[lo] + h01 * ys[hi] + h11 * dx * slopes[hi]
+        result = np.where(xs_in <= xs[0], ys[0], result)
+        result = np.where(xs_in >= xs[-1], ys[-1], result)
+        return result
+
+    def __repr__(self) -> str:
+        return f"MonotoneCubicCurve(points={self._points})"
+
+
+class LinearCurve(CurveInput):
+    """Piecewise linear interpolation over control points.
+
+    Mirrors the frontend ``createLinearInterpolator`` in
+    ``ComfyUI_frontend/src/components/curve/curveUtils.ts``.
+    """
+
+    def __init__(self, control_points: list[CurvePoint]):
+        sorted_pts = sorted(control_points, key=lambda p: p[0])
+        self._points = [(float(x), float(y)) for x, y in sorted_pts]
+        self._xs = np.array([p[0] for p in self._points], dtype=np.float64)
+        self._ys = np.array([p[1] for p in self._points], dtype=np.float64)
+
+    @property
+    def points(self) -> list[CurvePoint]:
+        return list(self._points)
+
+    def interp(self, x: float) -> float:
+        xs, ys = self._xs, self._ys
+        n = len(xs)
+        if n == 0:
+            return 0.0
+        if n == 1:
+            return float(ys[0])
+        return float(np.interp(x, xs, ys))
+
+    def interp_array(self, xs_in: np.ndarray) -> np.ndarray:
+        if len(self._xs) == 0:
+            return np.zeros_like(xs_in, dtype=np.float64)
+        if len(self._xs) == 1:
+            return np.full_like(xs_in, self._ys[0], dtype=np.float64)
+        return np.interp(xs_in, self._xs, self._ys)
+
+    def __repr__(self) -> str:
+        return f"LinearCurve(points={self._points})"
--- a/Show More
+++ b/Show More