Merge branch 'master' into flipflop-stream

Add '--flipflop-offload' startup argument
2026-02-16 13:10:02 +00:00 · 2025-10-28 15:08:27 -07:00 · 2025-10-13 21:10:44 -07:00 · 2025-10-13 21:04:37 -07:00 · 2025-10-03 16:21:01 -07:00 · 2025-10-03 14:32:56 -07:00
363 changed files with 11986 additions and 49935 deletions
--- a/.ci/update_windows/update.py
+++ b/.ci/update_windows/update.py
@@ -53,16 +53,6 @@ try:
    repo.stash(ident)
 except KeyError:
    print("nothing to stash")  # noqa: T201
-except:
-    print("Could not stash, cleaning index and trying again.")  # noqa: T201
-    repo.state_cleanup()
-    repo.index.read_tree(repo.head.peel().tree)
-    repo.index.write()
-    try:
-        repo.stash(ident)
-    except KeyError:
-        print("nothing to stash.")  # noqa: T201
-
 backup_branch_name = 'backup_branch_{}'.format(datetime.today().strftime('%Y-%m-%d_%H_%M_%S'))
 print("creating backup branch: {}".format(backup_branch_name))  # noqa: T201
 try:
@@ -76,10 +66,8 @@ if branch is None:
    try:
        ref = repo.lookup_reference('refs/remotes/origin/master')
    except:
-        print("fetching.")  # noqa: T201
-        for remote in repo.remotes:
-            if remote.name == "origin":
-                remote.fetch()
+        print("pulling.")  # noqa: T201
+        pull(repo)
        ref = repo.lookup_reference('refs/remotes/origin/master')
    repo.checkout(ref)
    branch = repo.lookup_branch('master')
@@ -161,4 +149,3 @@ try:
        shutil.copy(stable_update_script, stable_update_script_to)
 except:
    pass
-
--- a/.ci/windows_amd_base_files/README_VERY_IMPORTANT.txt
+++ b/.ci/windows_amd_base_files/README_VERY_IMPORTANT.txt
@@ -1,5 +1,5 @@
-As of the time of writing this you need this driver for best results:
-https://www.amd.com/en/resources/support-articles/release-notes/RN-AMDGPU-WINDOWS-PYTORCH-7-1-1.html
+As of the time of writing this you need this preview driver for best results:
+https://www.amd.com/en/resources/support-articles/release-notes/RN-AMDGPU-WINDOWS-PYTORCH-PREVIEW.html

 HOW TO RUN:

@@ -25,4 +25,3 @@ In the ComfyUI directory you will find a file: extra_model_paths.yaml.example
 Rename this file to: extra_model_paths.yaml and edit it with your favorite text editor.


-
--- a/.ci/windows_nvidia_base_files/advanced/run_nvidia_gpu_disable_api_nodes.bat
+++ b/.ci/windows_nvidia_base_files/advanced/run_nvidia_gpu_disable_api_nodes.bat
@@ -1,3 +1,3 @@
 ..\python_embeded\python.exe -s ..\ComfyUI\main.py --windows-standalone-build --disable-api-nodes
-echo If you see this and ComfyUI did not start try updating your Nvidia Drivers to the latest. If you get a c10.dll error you need to install vc redist that you can find: https://aka.ms/vc14/vc_redist.x64.exe
+echo If you see this and ComfyUI did not start try updating your Nvidia Drivers to the latest.
 pause
--- a/.ci/windows_nvidia_base_files/run_nvidia_gpu.bat
+++ b/.ci/windows_nvidia_base_files/run_nvidia_gpu.bat
@@ -1,3 +1,3 @@
 .\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build
-echo If you see this and ComfyUI did not start try updating your Nvidia Drivers to the latest. If you get a c10.dll error you need to install vc redist that you can find: https://aka.ms/vc14/vc_redist.x64.exe
+echo If you see this and ComfyUI did not start try updating your Nvidia Drivers to the latest.
 pause
--- a/.ci/windows_nvidia_base_files/run_nvidia_gpu_fast_fp16_accumulation.bat
+++ b/.ci/windows_nvidia_base_files/run_nvidia_gpu_fast_fp16_accumulation.bat
@@ -1,3 +1,3 @@
 .\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build --fast fp16_accumulation
-echo If you see this and ComfyUI did not start try updating your Nvidia Drivers to the latest. If you get a c10.dll error you need to install vc redist that you can find: https://aka.ms/vc14/vc_redist.x64.exe
+echo If you see this and ComfyUI did not start try updating your Nvidia Drivers to the latest.
 pause
--- a/.github/ISSUE_TEMPLATE/bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -8,15 +8,13 @@ body:
        Before submitting a **Bug Report**, please ensure the following:

        - **1:** You are running the latest version of ComfyUI.
-        - **2:** You have your ComfyUI logs and relevant workflow on hand and will post them in this bug report.
+        - **2:** You have looked at the existing bug reports and made sure this isn't already reported.
        - **3:** You confirmed that the bug is not caused by a custom node. You can disable all custom nodes by passing
-        `--disable-all-custom-nodes` command line argument. If you have custom node try updating them to the latest version.
+        `--disable-all-custom-nodes` command line argument.
        - **4:** This is an actual bug in ComfyUI, not just a support question. A bug is when you can specify exact
        steps to replicate what went wrong and others will be able to repeat your steps and see the same issue happen.

-        ## Very Important
-
-        Please make sure that you post ALL your ComfyUI logs in the bug report. A bug report without logs will likely be ignored.
+        If unsure, ask on the [ComfyUI Matrix Space](https://app.element.io/#/room/%23comfyui_space%3Amatrix.org) or the [Comfy Org Discord](https://discord.gg/comfyorg) first.
  - type: checkboxes
    id: custom-nodes-test
    attributes:
--- a/.github/PULL_REQUEST_TEMPLATE/api-node.md
+++ b/.github/PULL_REQUEST_TEMPLATE/api-node.md
@@ -1,21 +0,0 @@
-<!-- API_NODE_PR_CHECKLIST: do not remove -->
-
-## API Node PR Checklist
-
-### Scope
- [ ] **Is API Node Change**
-
-### Pricing & Billing
- [ ] **Need pricing update**
- [ ] **No pricing update**
-
-If **Need pricing update**:
- [ ] Metronome rate cards updated
- [ ] Auto‑billing tests updated and passing
-
-### QA
- [ ] **QA done**
- [ ] **QA not required**
-
-### Comms
- [ ] Informed **Kosinkadink**
--- a/.github/workflows/api-node-template.yml
+++ b/.github/workflows/api-node-template.yml
@@ -1,58 +0,0 @@
-name: Append API Node PR template
-
-on:
-  pull_request_target:
-    types: [opened, reopened, synchronize, ready_for_review]
-    paths:
-      - 'comfy_api_nodes/**'   # only run if these files changed
-
-permissions:
-  contents: read
-  pull-requests: write
-
-jobs:
-  inject:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Ensure template exists and append to PR body
-        uses: actions/github-script@v7
-        with:
-          script: |
-            const { owner, repo } = context.repo;
-            const number = context.payload.pull_request.number;
-            const templatePath = '.github/PULL_REQUEST_TEMPLATE/api-node.md';
-            const marker = '<!-- API_NODE_PR_CHECKLIST: do not remove -->';
-
-            const { data: pr } = await github.rest.pulls.get({ owner, repo, pull_number: number });
-
-            let templateText;
-            try {
-              const res = await github.rest.repos.getContent({
-                owner,
-                repo,
-                path: templatePath,
-                ref: pr.base.ref
-              });
-              const buf = Buffer.from(res.data.content, res.data.encoding || 'base64');
-              templateText = buf.toString('utf8');
-            } catch (e) {
-              core.setFailed(`Required PR template not found at "${templatePath}" on ${pr.base.ref}. Please add it to the repo.`);
-              return;
-            }
-
-            // Enforce the presence of the marker inside the template (for idempotence)
-            if (!templateText.includes(marker)) {
-              core.setFailed(`Template at "${templatePath}" does not contain the required marker:\n${marker}\nAdd it so we can detect duplicates safely.`);
-              return;
-            }
-
-            // If the PR already contains the marker, do not append again.
-            const body = pr.body || '';
-            if (body.includes(marker)) {
-              core.info('Template already present in PR body; nothing to inject.');
-              return;
-            }
-
-            const newBody = (body ? body + '\n\n' : '') + templateText + '\n';
-            await github.rest.pulls.update({ owner, repo, pull_number: number, body: newBody });
-            core.notice('API Node template appended to PR description.');
--- a/.github/workflows/release-stable-all.yml
+++ b/.github/workflows/release-stable-all.yml
@@ -14,13 +14,13 @@ jobs:
      contents: "write"
      packages: "write"
      pull-requests: "read"
-    name: "Release NVIDIA Default (cu130)"
+    name: "Release NVIDIA Default (cu129)"
    uses: ./.github/workflows/stable-release.yml
    with:
      git_tag: ${{ inputs.git_tag }}
      cache_tag: "cu130"
      python_minor: "13"
-      python_patch: "11"
+      python_patch: "9"
      rel_name: "nvidia"
      rel_extra_name: ""
      test_release: true
@@ -43,33 +43,16 @@ jobs:
      test_release: true
    secrets: inherit

-  release_nvidia_cu126:
-    permissions:
-      contents: "write"
-      packages: "write"
-      pull-requests: "read"
-    name: "Release NVIDIA cu126"
-    uses: ./.github/workflows/stable-release.yml
-    with:
-      git_tag: ${{ inputs.git_tag }}
-      cache_tag: "cu126"
-      python_minor: "12"
-      python_patch: "10"
-      rel_name: "nvidia"
-      rel_extra_name: "_cu126"
-      test_release: true
-    secrets: inherit
-
  release_amd_rocm:
    permissions:
      contents: "write"
      packages: "write"
      pull-requests: "read"
-    name: "Release AMD ROCm 7.2"
+    name: "Release AMD ROCm 6.4.4"
    uses: ./.github/workflows/stable-release.yml
    with:
      git_tag: ${{ inputs.git_tag }}
-      cache_tag: "rocm72"
+      cache_tag: "rocm644"
      python_minor: "12"
      python_patch: "10"
      rel_name: "amd"
--- a/.github/workflows/release-webhook.yml
+++ b/.github/workflows/release-webhook.yml
@@ -7,8 +7,6 @@ on:
 jobs:
  send-webhook:
    runs-on: ubuntu-latest
-    env:
-      DESKTOP_REPO_DISPATCH_TOKEN: ${{ secrets.DESKTOP_REPO_DISPATCH_TOKEN }}
    steps:
      - name: Send release webhook
        env:
@@ -108,37 +106,3 @@ jobs:
            --fail --silent --show-error
          
          echo "✅ Release webhook sent successfully"
-
-      - name: Send repository dispatch to desktop
-        env:
-          DISPATCH_TOKEN: ${{ env.DESKTOP_REPO_DISPATCH_TOKEN }}
-          RELEASE_TAG: ${{ github.event.release.tag_name }}
-          RELEASE_URL: ${{ github.event.release.html_url }}
-        run: |
-          set -euo pipefail
-
-          if [ -z "${DISPATCH_TOKEN:-}" ]; then
-            echo "::error::DESKTOP_REPO_DISPATCH_TOKEN is required but not set."
-            exit 1
-          fi
-
-          PAYLOAD="$(jq -n \
-            --arg release_tag "$RELEASE_TAG" \
-            --arg release_url "$RELEASE_URL" \
-            '{
-              event_type: "comfyui_release_published",
-              client_payload: {
-                release_tag: $release_tag,
-                release_url: $release_url
-              }
-            }')"
-
-          curl -fsSL \
-            -X POST \
-            -H "Accept: application/vnd.github+json" \
-            -H "Content-Type: application/json" \
-            -H "Authorization: Bearer ${DISPATCH_TOKEN}" \
-            https://api.github.com/repos/Comfy-Org/desktop/dispatches \
-            -d "$PAYLOAD"
-
-          echo "✅ Dispatched ComfyUI release ${RELEASE_TAG} to Comfy-Org/desktop"
--- a/.github/workflows/stable-release.yml
+++ b/.github/workflows/stable-release.yml
@@ -117,7 +117,7 @@ jobs:
          ./python.exe get-pip.py
          ./python.exe -s -m pip install ../${{ inputs.cache_tag }}_python_deps/*

-          grep comfy ../ComfyUI/requirements.txt > ./requirements_comfyui.txt
+          grep comfyui ../ComfyUI/requirements.txt > ./requirements_comfyui.txt
          ./python.exe -s -m pip install -r requirements_comfyui.txt
          rm requirements_comfyui.txt

--- a/.github/workflows/test-build.yml
+++ b/.github/workflows/test-build.yml
@@ -18,7 +18,7 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
+        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
    steps:
      - uses: actions/checkout@v4
      - name: Set up Python ${{ matrix.python-version }}
--- a/.github/workflows/test-ci.yml
+++ b/.github/workflows/test-ci.yml
@@ -5,7 +5,6 @@ on:
  push:
    branches:
      - master
-      - release/**
    paths-ignore:
      - 'app/**'
      - 'input/**'
@@ -22,15 +21,14 @@ jobs:
      fail-fast: false
      matrix:
        # os: [macos, linux, windows]
-        # os: [macos, linux]
-        os: [linux]
-        python_version: ["3.10", "3.11", "3.12"]
+        os: [macos, linux]
+        python_version: ["3.9", "3.10", "3.11", "3.12"]
        cuda_version: ["12.1"]
        torch_version: ["stable"]
        include:
-          # - os: macos
-          #   runner_label: [self-hosted, macOS]
-          #   flags: "--use-pytorch-cross-attention"
+          - os: macos
+            runner_label: [self-hosted, macOS]
+            flags: "--use-pytorch-cross-attention"
          - os: linux
            runner_label: [self-hosted, Linux]
            flags: ""
@@ -75,15 +73,14 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        # os: [macos, linux]
-        os: [linux]
+        os: [macos, linux]
        python_version: ["3.11"]
        cuda_version: ["12.1"]
        torch_version: ["nightly"]
        include:
-          # - os: macos
-          #   runner_label: [self-hosted, macOS]
-          #   flags: "--use-pytorch-cross-attention"
+          - os: macos
+            runner_label: [self-hosted, macOS]
+            flags: "--use-pytorch-cross-attention"
          - os: linux
            runner_label: [self-hosted, Linux]
            flags: ""
--- a/.github/workflows/test-execution.yml
+++ b/.github/workflows/test-execution.yml
@@ -2,9 +2,9 @@ name: Execution Tests

 on:
  push:
-    branches: [ main, master, release/** ]
+    branches: [ main, master ]
  pull_request:
-    branches: [ main, master, release/** ]
+    branches: [ main, master ]

 jobs:
  test:
--- a/.github/workflows/test-launch.yml
+++ b/.github/workflows/test-launch.yml
@@ -2,9 +2,9 @@ name: Test server launches without errors

 on:
  push:
-    branches: [ main, master, release/** ]
+    branches: [ main, master ]
  pull_request:
-    branches: [ main, master, release/** ]
+    branches: [ main, master ]

 jobs:
  test:
@@ -13,7 +13,7 @@ jobs:
    - name: Checkout ComfyUI
      uses: actions/checkout@v4
      with:
-        repository: "Comfy-Org/ComfyUI"
+        repository: "comfyanonymous/ComfyUI"
        path: "ComfyUI"
    - uses: actions/setup-python@v4
      with:
@@ -32,9 +32,7 @@ jobs:
      working-directory: ComfyUI
    - name: Check for unhandled exceptions in server log
      run: |
-        grep -v "Found comfy_kitchen backend triton: {'available': False, 'disabled': True, 'unavailable_reason': \"ImportError: No module named 'triton'\", 'capabilities': \[\]}" console_output.log | grep -v "Found comfy_kitchen backend triton: {'available': False, 'disabled': False, 'unavailable_reason': \"ImportError: No module named 'triton'\", 'capabilities': \[\]}" > console_output_filtered.log
-        cat console_output_filtered.log
-        if grep -qE "Exception|Error" console_output_filtered.log; then
+        if grep -qE "Exception|Error" console_output.log; then
          echo "Unhandled exception/error found in server log."
          exit 1
        fi
--- a/.github/workflows/test-unit.yml
+++ b/.github/workflows/test-unit.yml
@@ -2,9 +2,9 @@ name: Unit Tests

 on:
  push:
-    branches: [ main, master, release/** ]
+    branches: [ main, master ]
  pull_request:
-    branches: [ main, master, release/** ]
+    branches: [ main, master ]

 jobs:
  test:
--- a/.github/workflows/update-ci-container.yml
+++ b/.github/workflows/update-ci-container.yml
@@ -1,59 +0,0 @@
-name: "CI: Update CI Container"
-
-on:
-  release:
-    types: [published]
-  workflow_dispatch:
-    inputs:
-      version:
-        description: 'ComfyUI version (e.g., v0.7.0)'
-        required: true
-        type: string
-
-jobs:
-  update-ci-container:
-    runs-on: ubuntu-latest
-    # Skip pre-releases unless manually triggered
-    if: github.event_name == 'workflow_dispatch' || !github.event.release.prerelease
-    steps:
-      - name: Get version
-        id: version
-        run: |
-          if [ "${{ github.event_name }}" = "release" ]; then
-            VERSION="${{ github.event.release.tag_name }}"
-          else
-            VERSION="${{ inputs.version }}"
-          fi
-          echo "version=$VERSION" >> $GITHUB_OUTPUT
-
-      - name: Checkout comfyui-ci-container
-        uses: actions/checkout@v4
-        with:
-          repository: comfy-org/comfyui-ci-container
-          token: ${{ secrets.CI_CONTAINER_PAT }}
-
-      - name: Check current version
-        id: current
-        run: |
-          CURRENT=$(grep -oP 'ARG COMFYUI_VERSION=\K.*' Dockerfile || echo "unknown")
-          echo "current_version=$CURRENT" >> $GITHUB_OUTPUT
-
-      - name: Update Dockerfile
-        run: |
-          VERSION="${{ steps.version.outputs.version }}"
-          sed -i "s/^ARG COMFYUI_VERSION=.*/ARG COMFYUI_VERSION=${VERSION}/" Dockerfile
-
-      - name: Create Pull Request
-        id: create-pr
-        uses: peter-evans/create-pull-request@v7
-        with:
-          token: ${{ secrets.CI_CONTAINER_PAT }}
-          branch: automation/comfyui-${{ steps.version.outputs.version }}
-          title: "chore: bump ComfyUI to ${{ steps.version.outputs.version }}"
-          body: |
-            Updates ComfyUI version from `${{ steps.current.outputs.current_version }}` to `${{ steps.version.outputs.version }}`
-
-            **Triggered by:** ${{ github.event_name == 'release' && format('[Release {0}]({1})', github.event.release.tag_name, github.event.release.html_url) || 'Manual workflow dispatch' }}
-
-          labels: automation
-          commit-message: "chore: bump ComfyUI to ${{ steps.version.outputs.version }}"
--- a/.github/workflows/update-version.yml
+++ b/.github/workflows/update-version.yml
@@ -6,7 +6,6 @@ on:
      - "pyproject.toml"
    branches:
      - master
-      - release/**

 jobs:
  update-version:
--- a/.github/workflows/windows_release_dependencies.yml
+++ b/.github/workflows/windows_release_dependencies.yml
@@ -29,7 +29,7 @@ on:
        description: 'python patch version'
        required: true
        type: string
-        default: "11"
+        default: "9"
 #  push:
 #    branches:
 #      - master
--- a/3
+++ b/3
@@ -1,2 +1,3 @@
 # Admins
-* @comfyanonymous @kosinkadink @guill
+* @comfyanonymous
+* @kosinkadink
--- a/QUANTIZATION.md
+++ b/QUANTIZATION.md
@@ -1,168 +0,0 @@
-# The Comfy guide to Quantization
-
-
-## How does quantization work?
-
-Quantization aims to map a high-precision value x_f to a lower precision format with minimal loss in accuracy. These smaller formats then serve to reduce the models memory footprint and increase throughput by using specialized hardware.
-
-When simply converting a value from FP16 to FP8 using the round-nearest method we might hit two issues:
- The dynamic range of FP16 (-65,504, 65,504) far exceeds FP8 formats like E4M3 (-448, 448) or E5M2 (-57,344, 57,344), potentially resulting in clipped values
- The original values are concentrated in a small range (e.g. -1,1) leaving many FP8-bits "unused"
-
-By using a scaling factor, we aim to map these values into the quantized-dtype range, making use of the full spectrum. One of the easiest approaches, and common, is using per-tensor absolute-maximum scaling.
-
-```
-absmax = max(abs(tensor))
-scale = amax / max_dynamic_range_low_precision
-
-# Quantization
-tensor_q = (tensor / scale).to(low_precision_dtype)
-
-# De-Quantization
-tensor_dq = tensor_q.to(fp16) * scale
-
-tensor_dq ~ tensor
-```
-
-Given that additional information (scaling factor) is needed to "interpret" the quantized values, we describe those as derived datatypes.
-
-
-## Quantization in Comfy
-
-```
-QuantizedTensor (torch.Tensor subclass)
-  ↓ __torch_dispatch__
-Two-Level Registry (generic + layout handlers)
-  ↓
-MixedPrecisionOps + Metadata Detection
-```
-
-### Representation
-
-To represent these derived datatypes, ComfyUI uses a subclass of torch.Tensor to implements these using the `QuantizedTensor` class found in `comfy/quant_ops.py`
-
-A `Layout` class defines how a specific quantization format behaves:
- Required parameters
- Quantize method
- De-Quantize method
-
-```python
-from comfy.quant_ops import QuantizedLayout
-
-class MyLayout(QuantizedLayout):
-    @classmethod
-    def quantize(cls, tensor, **kwargs):
-        # Convert to quantized format
-        qdata = ...
-        params = {'scale': ..., 'orig_dtype': tensor.dtype}
-        return qdata, params
-    
-    @staticmethod
-    def dequantize(qdata, scale, orig_dtype, **kwargs):
-        return qdata.to(orig_dtype) * scale
-```
-
-To then run operations using these QuantizedTensors we use two registry systems to define supported operations. 
-The first is a **generic registry** that handles operations common to all quantized formats (e.g., `.to()`, `.clone()`, `.reshape()`).
-
-The second registry is layout-specific and allows to implement fast-paths like nn.Linear.
-```python
-from comfy.quant_ops import register_layout_op
-
-@register_layout_op(torch.ops.aten.linear.default, MyLayout)
-def my_linear(func, args, kwargs):
-    # Extract tensors, call optimized kernel
-    ...
-```
-When `torch.nn.functional.linear()` is called with QuantizedTensor arguments, `__torch_dispatch__` automatically routes to the registered implementation.
-For any unsupported operation, QuantizedTensor will fallback to call `dequantize` and dispatch using the high-precision implementation.
-
-
-### Mixed Precision
-
-The `MixedPrecisionOps` class (lines 542-648 in `comfy/ops.py`) enables per-layer quantization decisions, allowing different layers in a model to use different precisions. This is activated when a model config contains a `layer_quant_config` dictionary that specifies which layers should be quantized and how.
-
-**Architecture:**
-
-```python
-class MixedPrecisionOps(disable_weight_init):
-    _layer_quant_config = {}  # Maps layer names to quantization configs
-    _compute_dtype = torch.bfloat16  # Default compute / dequantize precision
-```
-
-**Key mechanism:**
-
-The custom `Linear._load_from_state_dict()` method inspects each layer during model loading:
- If the layer name is **not** in `_layer_quant_config`: load weight as regular tensor in `_compute_dtype`
- If the layer name **is** in `_layer_quant_config`: 
-  - Load weight as `QuantizedTensor` with the specified layout (e.g., `TensorCoreFP8Layout`)
-  - Load associated quantization parameters (scales, block_size, etc.)
-
-**Why it's needed:**
-
-Not all layers tolerate quantization equally. Sensitive operations like final projections can be kept in higher precision, while compute-heavy matmuls are quantized. This provides most of the performance benefits while maintaining quality.
-
-The system is selected in `pick_operations()` when `model_config.layer_quant_config` is present, making it the highest-priority operation mode.
-
-
-## Checkpoint Format
-
-Quantized checkpoints are stored as standard safetensors files with quantized weight tensors and associated scaling parameters, plus a `_quantization_metadata` JSON entry describing the quantization scheme.
-
-The quantized checkpoint will contain the same layers as the original checkpoint but:
- The weights are stored as quantized values, sometimes using a different storage datatype. E.g. uint8 container for fp8.
- For each quantized weight a number of additional scaling parameters are stored alongside depending on the recipe.
- We store a metadata.json in the metadata of the final safetensor containing the `_quantization_metadata` describing which layers are quantized and what layout has been used.
-
-### Scaling Parameters details
-We define 4 possible scaling parameters that should cover most recipes in the near-future:
- **weight_scale**: quantization scalers for the weights
- **weight_scale_2**: global scalers in the context of double scaling
- **pre_quant_scale**: scalers used for smoothing salient weights
- **input_scale**: quantization scalers for the activations
-
-| Format | Storage dtype | weight_scale | weight_scale_2 | pre_quant_scale | input_scale |
-|--------|---------------|--------------|----------------|-----------------|-------------|
-| float8_e4m3fn | float32 | float32 (scalar) | - | - | float32 (scalar) |
-
-You can find the defined formats in `comfy/quant_ops.py` (QUANT_ALGOS).
-
-### Quantization Metadata
-
-The metadata stored alongside the checkpoint contains:
- **format_version**: String to define a version of the standard
- **layers**: A dictionary mapping layer names to their quantization format. The format string maps to the definitions found in `QUANT_ALGOS`. 
-
-Example:
-```json
-{
-  "_quantization_metadata": {
-    "format_version": "1.0",
-    "layers": {
-      "model.layers.0.mlp.up_proj": "float8_e4m3fn",
-      "model.layers.0.mlp.down_proj": "float8_e4m3fn",
-      "model.layers.1.mlp.up_proj": "float8_e4m3fn"
-    }
-  }
-}
-```
-
-
-## Creating Quantized Checkpoints
-
-To create compatible checkpoints, use any quantization tool provided the output follows the checkpoint format described above and uses a layout defined in `QUANT_ALGOS`.
-
-### Weight Quantization
-
-Weight quantization is straightforward - compute the scaling factor directly from the weight tensor using the absolute maximum method described earlier. Each layer's weights are quantized independently and stored with their corresponding `weight_scale` parameter.
-
-### Calibration (for Activation Quantization)
-
-Activation quantization (e.g., for FP8 Tensor Core operations) requires `input_scale` parameters that cannot be determined from static weights alone. Since activation values depend on actual inputs, we use **post-training calibration (PTQ)**:
-
-1. **Collect statistics**: Run inference on N representative samples
-2. **Track activations**: Record the absolute maximum (`amax`) of inputs to each quantized layer
-3. **Compute scales**: Derive `input_scale` from collected statistics
-4. **Store in checkpoint**: Save `input_scale` parameters alongside weights
-
-The calibration dataset should be representative of your target use case. For diffusion models, this typically means a diverse set of prompts and generation parameters.
--- a/README.md
+++ b/README.md
@@ -67,8 +67,6 @@ See what ComfyUI can do with the [example workflows](https://comfyanonymous.gith
   - [HiDream](https://comfyanonymous.github.io/ComfyUI_examples/hidream/)
   - [Qwen Image](https://comfyanonymous.github.io/ComfyUI_examples/qwen_image/)
   - [Hunyuan Image 2.1](https://comfyanonymous.github.io/ComfyUI_examples/hunyuan_image/)
-   - [Flux 2](https://comfyanonymous.github.io/ComfyUI_examples/flux2/)
-   - [Z Image](https://comfyanonymous.github.io/ComfyUI_examples/z_image/)
 - Image Editing Models
   - [Omnigen 2](https://comfyanonymous.github.io/ComfyUI_examples/omnigen/)
   - [Flux Kontext](https://comfyanonymous.github.io/ComfyUI_examples/flux/#flux-kontext-image-editing-model)
@@ -81,7 +79,6 @@ See what ComfyUI can do with the [example workflows](https://comfyanonymous.gith
   - [Hunyuan Video](https://comfyanonymous.github.io/ComfyUI_examples/hunyuan_video/)
   - [Wan 2.1](https://comfyanonymous.github.io/ComfyUI_examples/wan/)
   - [Wan 2.2](https://comfyanonymous.github.io/ComfyUI_examples/wan22/)
-   - [Hunyuan Video 1.5](https://docs.comfy.org/tutorials/video/hunyuan/hunyuan-video-1-5)
 - Audio Models
   - [Stable Audio](https://comfyanonymous.github.io/ComfyUI_examples/audio/)
   - [ACE Step](https://comfyanonymous.github.io/ComfyUI_examples/audio/)
@@ -108,21 +105,17 @@ See what ComfyUI can do with the [example workflows](https://comfyanonymous.gith
 - [LCM models and Loras](https://comfyanonymous.github.io/ComfyUI_examples/lcm/)
 - Latent previews with [TAESD](#how-to-show-high-quality-previews)
 - Works fully offline: core will never download anything unless you want to.
- Optional API nodes to use paid models from external providers through the online [Comfy API](https://docs.comfy.org/tutorials/api-nodes/overview) disable with: `--disable-api-nodes`
+- Optional API nodes to use paid models from external providers through the online [Comfy API](https://docs.comfy.org/tutorials/api-nodes/overview).
 - [Config file](extra_model_paths.yaml.example) to set the search paths for models.

 Workflow examples can be found on the [Examples page](https://comfyanonymous.github.io/ComfyUI_examples/)

 ## Release Process

-ComfyUI follows a weekly release cycle targeting Monday but this regularly changes because of model releases or large changes to the codebase. There are three interconnected repositories:
+ComfyUI follows a weekly release cycle targeting Friday but this regularly changes because of model releases or large changes to the codebase. There are three interconnected repositories:

 1. **[ComfyUI Core](https://github.com/comfyanonymous/ComfyUI)**
-   - Releases a new stable version (e.g., v0.7.0) roughly every week.
-   - Starting from v0.4.0 patch versions will be used for fixes backported onto the current stable release.
-   - Minor versions will be used for releases off the master branch.
-   - Patch versions may still be used for releases on the master branch in cases where a backport would not make sense.
-   - Commits outside of the stable release tags may be very unstable and break many custom nodes.
+   - Releases a new stable version (e.g., v0.7.0)
   - Serves as the foundation for the desktop release

 2. **[ComfyUI Desktop](https://github.com/Comfy-Org/desktop)**
@@ -179,19 +172,17 @@ There is a portable standalone build for Windows that should work for running on

 ### [Direct link to download](https://github.com/comfyanonymous/ComfyUI/releases/latest/download/ComfyUI_windows_portable_nvidia.7z)

-Simply download, extract with [7-Zip](https://7-zip.org) or with the windows explorer on recent windows versions and run. For smaller models you normally only need to put the checkpoints (the huge ckpt/safetensors files) in: ComfyUI\models\checkpoints but many of the larger models have multiple files. Make sure to follow the instructions to know which subfolder to put them in ComfyUI\models\
+Simply download, extract with [7-Zip](https://7-zip.org) and run. Make sure you put your Stable Diffusion checkpoints/models (the huge ckpt/safetensors files) in: ComfyUI\models\checkpoints

 If you have trouble extracting it, right click the file -> properties -> unblock

-The portable above currently comes with python 3.13 and pytorch cuda 13.0. Update your Nvidia drivers if it doesn't start.
+Update your Nvidia drivers if it doesn't start.

 #### Alternative Downloads:

 [Experimental portable for AMD GPUs](https://github.com/comfyanonymous/ComfyUI/releases/latest/download/ComfyUI_windows_portable_amd.7z)

-[Portable with pytorch cuda 12.8 and python 3.12](https://github.com/comfyanonymous/ComfyUI/releases/latest/download/ComfyUI_windows_portable_nvidia_cu128.7z).
-
-[Portable with pytorch cuda 12.6 and python 3.12](https://github.com/comfyanonymous/ComfyUI/releases/latest/download/ComfyUI_windows_portable_nvidia_cu126.7z) (Supports Nvidia 10 series and older GPUs).
+[Portable with pytorch cuda 12.8 and python 3.12](https://github.com/comfyanonymous/ComfyUI/releases/latest/download/ComfyUI_windows_portable_nvidia_cu128.7z) (Supports Nvidia 10 series and older GPUs).

 #### How do I share models between another UI and ComfyUI?

@@ -208,12 +199,10 @@ comfy install

 ## Manual Install (Windows, Linux)

-Python 3.14 works but some custom nodes may have issues. The free threaded variant works but some dependencies will enable the GIL so it's not fully supported.
+Python 3.14 will work if you comment out the `kornia` dependency in the requirements.txt file (breaks the canny node) but it is not recommended.

 Python 3.13 is very well supported. If you have trouble with some custom node dependencies on 3.13 you can try 3.12

-torch 2.4 and above is supported but some features and optimizations might only work on newer versions. We generally recommend using the latest major version of pytorch with the latest cuda version unless it is less than 2 weeks old.
-
 ### Instructions:

 Git clone this repo.
@@ -227,11 +216,11 @@ Put your VAE in: models/vae

 AMD users can install rocm and pytorch with pip if you don't have it already installed, this is the command to install the stable version:

-```pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm7.1```
+```pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.4```

-This is the command to install the nightly with ROCm 7.1 which might have some performance improvements:
+This is the command to install the nightly with ROCm 7.0 which might have some performance improvements:

-```pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/rocm7.1```
+```pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/rocm7.0```


 ### AMD GPUs (Experimental: Windows and Linux), RDNA 3, 3.5 and 4 only.
@@ -240,7 +229,7 @@ These have less hardware support than the builds above but they work on windows.

 RDNA 3 (RX 7000 series):

-```pip install --pre torch torchvision torchaudio --index-url https://rocm.nightlies.amd.com/v2/gfx110X-all/```
+```pip install --pre torch torchvision torchaudio --index-url https://rocm.nightlies.amd.com/v2/gfx110X-dgpu/```

 RDNA 3.5 (Strix halo/Ryzen AI Max+ 365):

@@ -252,7 +241,7 @@ RDNA 4 (RX 9000 series):

 ### Intel GPUs (Windows and Linux)

-Intel Arc GPU users can install native PyTorch with torch.xpu support using pip. More information can be found [here](https://pytorch.org/docs/main/notes/get_start_xpu.html)
+(Option 1) Intel Arc GPU users can install native PyTorch with torch.xpu support using pip. More information can be found [here](https://pytorch.org/docs/main/notes/get_start_xpu.html)

 1. To install PyTorch xpu, use the following command:

@@ -262,6 +251,10 @@ This is the command to install the Pytorch xpu nightly which might have some per

 ```pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/xpu```

+(Option 2) Alternatively, Intel GPUs supported by Intel Extension for PyTorch (IPEX) can leverage IPEX for improved performance.
+
+1. visit [Installation](https://intel.github.io/intel-extension-for-pytorch/index.html#installation?platform=gpu) for more information.
+
 ### NVIDIA

 Nvidia users should install stable pytorch using this command:
@@ -325,32 +318,6 @@ For models compatible with Iluvatar Extension for PyTorch. Here's a step-by-step
 1. Install the Iluvatar Corex Toolkit by adhering to the platform-specific instructions on the [Installation](https://support.iluvatar.com/#/DocumentCentre?id=1&nameCenter=2&productId=520117912052801536)
 2. Launch ComfyUI by running `python main.py`

-
-## [ComfyUI-Manager](https://github.com/Comfy-Org/ComfyUI-Manager/tree/manager-v4)
-
-**ComfyUI-Manager** is an extension that allows you to easily install, update, and manage custom nodes for ComfyUI.
-
-### Setup
-
-1. Install the manager dependencies:
-   ```bash
-   pip install -r manager_requirements.txt
-   ```
-
-2. Enable the manager with the `--enable-manager` flag when running ComfyUI:
-   ```bash
-   python main.py --enable-manager
-   ```
-
-### Command Line Options
-
-| Flag | Description |
-|------|-------------|
-| `--enable-manager` | Enable ComfyUI-Manager |
-| `--enable-manager-legacy-ui` | Use the legacy manager UI instead of the new UI (requires `--enable-manager`) |
-| `--disable-manager-ui` | Disable the manager UI and endpoints while keeping background features like security checks and scheduled installation completion (requires `--enable-manager`) |
-
-
 # Running

 ```python main.py```
--- a/alembic_db/versions/0001_assets.py
+++ b/alembic_db/versions/0001_assets.py
@@ -1,174 +0,0 @@
-"""
-Initial assets schema
-Revision ID: 0001_assets
-Revises: None
-Create Date: 2025-12-10 00:00:00
-"""
-
-from alembic import op
-import sqlalchemy as sa
-
-revision = "0001_assets"
-down_revision = None
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    # ASSETS: content identity
-    op.create_table(
-        "assets",
-        sa.Column("id", sa.String(length=36), primary_key=True),
-        sa.Column("hash", sa.String(length=256), nullable=True),
-        sa.Column("size_bytes", sa.BigInteger(), nullable=False, server_default="0"),
-        sa.Column("mime_type", sa.String(length=255), nullable=True),
-        sa.Column("created_at", sa.DateTime(timezone=False), nullable=False),
-        sa.CheckConstraint("size_bytes >= 0", name="ck_assets_size_nonneg"),
-    )
-    op.create_index("uq_assets_hash", "assets", ["hash"], unique=True)
-    op.create_index("ix_assets_mime_type", "assets", ["mime_type"])
-
-    # ASSETS_INFO: user-visible references
-    op.create_table(
-        "assets_info",
-        sa.Column("id", sa.String(length=36), primary_key=True),
-        sa.Column("owner_id", sa.String(length=128), nullable=False, server_default=""),
-        sa.Column("name", sa.String(length=512), nullable=False),
-        sa.Column("asset_id", sa.String(length=36), sa.ForeignKey("assets.id", ondelete="RESTRICT"), nullable=False),
-        sa.Column("preview_id", sa.String(length=36), sa.ForeignKey("assets.id", ondelete="SET NULL"), nullable=True),
-        sa.Column("user_metadata", sa.JSON(), nullable=True),
-        sa.Column("created_at", sa.DateTime(timezone=False), nullable=False),
-        sa.Column("updated_at", sa.DateTime(timezone=False), nullable=False),
-        sa.Column("last_access_time", sa.DateTime(timezone=False), nullable=False),
-        sa.UniqueConstraint("asset_id", "owner_id", "name", name="uq_assets_info_asset_owner_name"),
-    )
-    op.create_index("ix_assets_info_owner_id", "assets_info", ["owner_id"])
-    op.create_index("ix_assets_info_asset_id", "assets_info", ["asset_id"])
-    op.create_index("ix_assets_info_name", "assets_info", ["name"])
-    op.create_index("ix_assets_info_created_at", "assets_info", ["created_at"])
-    op.create_index("ix_assets_info_last_access_time", "assets_info", ["last_access_time"])
-    op.create_index("ix_assets_info_owner_name", "assets_info", ["owner_id", "name"])
-
-    # TAGS: normalized tag vocabulary
-    op.create_table(
-        "tags",
-        sa.Column("name", sa.String(length=512), primary_key=True),
-        sa.Column("tag_type", sa.String(length=32), nullable=False, server_default="user"),
-        sa.CheckConstraint("name = lower(name)", name="ck_tags_lowercase"),
-    )
-    op.create_index("ix_tags_tag_type", "tags", ["tag_type"])
-
-    # ASSET_INFO_TAGS: many-to-many for tags on AssetInfo
-    op.create_table(
-        "asset_info_tags",
-        sa.Column("asset_info_id", sa.String(length=36), sa.ForeignKey("assets_info.id", ondelete="CASCADE"), nullable=False),
-        sa.Column("tag_name", sa.String(length=512), sa.ForeignKey("tags.name", ondelete="RESTRICT"), nullable=False),
-        sa.Column("origin", sa.String(length=32), nullable=False, server_default="manual"),
-        sa.Column("added_at", sa.DateTime(timezone=False), nullable=False),
-        sa.PrimaryKeyConstraint("asset_info_id", "tag_name", name="pk_asset_info_tags"),
-    )
-    op.create_index("ix_asset_info_tags_tag_name", "asset_info_tags", ["tag_name"])
-    op.create_index("ix_asset_info_tags_asset_info_id", "asset_info_tags", ["asset_info_id"])
-
-    # ASSET_CACHE_STATE: N:1 local cache rows per Asset
-    op.create_table(
-        "asset_cache_state",
-        sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
-        sa.Column("asset_id", sa.String(length=36), sa.ForeignKey("assets.id", ondelete="CASCADE"), nullable=False),
-        sa.Column("file_path", sa.Text(), nullable=False),  # absolute local path to cached file
-        sa.Column("mtime_ns", sa.BigInteger(), nullable=True),
-        sa.Column("needs_verify", sa.Boolean(), nullable=False, server_default=sa.text("false")),
-        sa.CheckConstraint("(mtime_ns IS NULL) OR (mtime_ns >= 0)", name="ck_acs_mtime_nonneg"),
-        sa.UniqueConstraint("file_path", name="uq_asset_cache_state_file_path"),
-    )
-    op.create_index("ix_asset_cache_state_file_path", "asset_cache_state", ["file_path"])
-    op.create_index("ix_asset_cache_state_asset_id", "asset_cache_state", ["asset_id"])
-
-    # ASSET_INFO_META: typed KV projection of user_metadata for filtering/sorting
-    op.create_table(
-        "asset_info_meta",
-        sa.Column("asset_info_id", sa.String(length=36), sa.ForeignKey("assets_info.id", ondelete="CASCADE"), nullable=False),
-        sa.Column("key", sa.String(length=256), nullable=False),
-        sa.Column("ordinal", sa.Integer(), nullable=False, server_default="0"),
-        sa.Column("val_str", sa.String(length=2048), nullable=True),
-        sa.Column("val_num", sa.Numeric(38, 10), nullable=True),
-        sa.Column("val_bool", sa.Boolean(), nullable=True),
-        sa.Column("val_json", sa.JSON(), nullable=True),
-        sa.PrimaryKeyConstraint("asset_info_id", "key", "ordinal", name="pk_asset_info_meta"),
-    )
-    op.create_index("ix_asset_info_meta_key", "asset_info_meta", ["key"])
-    op.create_index("ix_asset_info_meta_key_val_str", "asset_info_meta", ["key", "val_str"])
-    op.create_index("ix_asset_info_meta_key_val_num", "asset_info_meta", ["key", "val_num"])
-    op.create_index("ix_asset_info_meta_key_val_bool", "asset_info_meta", ["key", "val_bool"])
-
-    # Tags vocabulary
-    tags_table = sa.table(
-        "tags",
-        sa.column("name", sa.String(length=512)),
-        sa.column("tag_type", sa.String()),
-    )
-    op.bulk_insert(
-        tags_table,
-        [
-            {"name": "models", "tag_type": "system"},
-            {"name": "input", "tag_type": "system"},
-            {"name": "output", "tag_type": "system"},
-
-            {"name": "configs", "tag_type": "system"},
-            {"name": "checkpoints", "tag_type": "system"},
-            {"name": "loras", "tag_type": "system"},
-            {"name": "vae", "tag_type": "system"},
-            {"name": "text_encoders", "tag_type": "system"},
-            {"name": "diffusion_models", "tag_type": "system"},
-            {"name": "clip_vision", "tag_type": "system"},
-            {"name": "style_models", "tag_type": "system"},
-            {"name": "embeddings", "tag_type": "system"},
-            {"name": "diffusers", "tag_type": "system"},
-            {"name": "vae_approx", "tag_type": "system"},
-            {"name": "controlnet", "tag_type": "system"},
-            {"name": "gligen", "tag_type": "system"},
-            {"name": "upscale_models", "tag_type": "system"},
-            {"name": "hypernetworks", "tag_type": "system"},
-            {"name": "photomaker", "tag_type": "system"},
-            {"name": "classifiers", "tag_type": "system"},
-
-            {"name": "encoder", "tag_type": "system"},
-            {"name": "decoder", "tag_type": "system"},
-
-            {"name": "missing", "tag_type": "system"},
-            {"name": "rescan", "tag_type": "system"},
-        ],
-    )
-
-
-def downgrade() -> None:
-    op.drop_index("ix_asset_info_meta_key_val_bool", table_name="asset_info_meta")
-    op.drop_index("ix_asset_info_meta_key_val_num", table_name="asset_info_meta")
-    op.drop_index("ix_asset_info_meta_key_val_str", table_name="asset_info_meta")
-    op.drop_index("ix_asset_info_meta_key", table_name="asset_info_meta")
-    op.drop_table("asset_info_meta")
-
-    op.drop_index("ix_asset_cache_state_asset_id", table_name="asset_cache_state")
-    op.drop_index("ix_asset_cache_state_file_path", table_name="asset_cache_state")
-    op.drop_constraint("uq_asset_cache_state_file_path", table_name="asset_cache_state")
-    op.drop_table("asset_cache_state")
-
-    op.drop_index("ix_asset_info_tags_asset_info_id", table_name="asset_info_tags")
-    op.drop_index("ix_asset_info_tags_tag_name", table_name="asset_info_tags")
-    op.drop_table("asset_info_tags")
-
-    op.drop_index("ix_tags_tag_type", table_name="tags")
-    op.drop_table("tags")
-
-    op.drop_constraint("uq_assets_info_asset_owner_name", table_name="assets_info")
-    op.drop_index("ix_assets_info_owner_name", table_name="assets_info")
-    op.drop_index("ix_assets_info_last_access_time", table_name="assets_info")
-    op.drop_index("ix_assets_info_created_at", table_name="assets_info")
-    op.drop_index("ix_assets_info_name", table_name="assets_info")
-    op.drop_index("ix_assets_info_asset_id", table_name="assets_info")
-    op.drop_index("ix_assets_info_owner_id", table_name="assets_info")
-    op.drop_table("assets_info")
-
-    op.drop_index("uq_assets_hash", table_name="assets")
-    op.drop_index("ix_assets_mime_type", table_name="assets")
-    op.drop_table("assets")
--- a/api_server/routes/internal/internal_routes.py
+++ b/api_server/routes/internal/internal_routes.py
@@ -58,13 +58,8 @@ class InternalRoutes:
                return web.json_response({"error": "Invalid directory type"}, status=400)

            directory = get_directory_by_type(directory_type)
-
-            def is_visible_file(entry: os.DirEntry) -> bool:
-                """Filter out hidden files (e.g., .DS_Store on macOS)."""
-                return entry.is_file() and not entry.name.startswith('.')
-
            sorted_files = sorted(
-                (entry for entry in os.scandir(directory) if is_visible_file(entry)),
+                (entry for entry in os.scandir(directory) if entry.is_file()),
                key=lambda entry: -entry.stat().st_mtime
            )
            return web.json_response([entry.name for entry in sorted_files], status=200)
--- a/app/assets/api/routes.py
+++ b/app/assets/api/routes.py
@@ -1,514 +0,0 @@
-import logging
-import uuid
-import urllib.parse
-import os
-import contextlib
-from aiohttp import web
-
-from pydantic import ValidationError
-
-import app.assets.manager as manager
-from app import user_manager
-from app.assets.api import schemas_in
-from app.assets.helpers import get_query_dict
-from app.assets.scanner import seed_assets
-
-import folder_paths
-
-ROUTES = web.RouteTableDef()
-USER_MANAGER: user_manager.UserManager | None = None
-
-# UUID regex (canonical hyphenated form, case-insensitive)
-UUID_RE = r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}"
-
-# Note to any custom node developers reading this code:
-# The assets system is not yet fully implemented, do not rely on the code in /app/assets remaining the same.
-
-def register_assets_system(app: web.Application, user_manager_instance: user_manager.UserManager) -> None:
-    global USER_MANAGER
-    USER_MANAGER = user_manager_instance
-    app.add_routes(ROUTES)
-
-def _error_response(status: int, code: str, message: str, details: dict | None = None) -> web.Response:
-    return web.json_response({"error": {"code": code, "message": message, "details": details or {}}}, status=status)
-
-
-def _validation_error_response(code: str, ve: ValidationError) -> web.Response:
-    return _error_response(400, code, "Validation failed.", {"errors": ve.json()})
-
-
-@ROUTES.head("/api/assets/hash/{hash}")
-async def head_asset_by_hash(request: web.Request) -> web.Response:
-    hash_str = request.match_info.get("hash", "").strip().lower()
-    if not hash_str or ":" not in hash_str:
-        return _error_response(400, "INVALID_HASH", "hash must be like 'blake3:<hex>'")
-    algo, digest = hash_str.split(":", 1)
-    if algo != "blake3" or not digest or any(c for c in digest if c not in "0123456789abcdef"):
-        return _error_response(400, "INVALID_HASH", "hash must be like 'blake3:<hex>'")
-    exists = manager.asset_exists(asset_hash=hash_str)
-    return web.Response(status=200 if exists else 404)
-
-
-@ROUTES.get("/api/assets")
-async def list_assets(request: web.Request) -> web.Response:
-    """
-    GET request to list assets.
-    """
-    query_dict = get_query_dict(request)
-    try:
-        q = schemas_in.ListAssetsQuery.model_validate(query_dict)
-    except ValidationError as ve:
-        return _validation_error_response("INVALID_QUERY", ve)
-
-    payload = manager.list_assets(
-        include_tags=q.include_tags,
-        exclude_tags=q.exclude_tags,
-        name_contains=q.name_contains,
-        metadata_filter=q.metadata_filter,
-        limit=q.limit,
-        offset=q.offset,
-        sort=q.sort,
-        order=q.order,
-        owner_id=USER_MANAGER.get_request_user_id(request),
-    )
-    return web.json_response(payload.model_dump(mode="json", exclude_none=True))
-
-
-@ROUTES.get(f"/api/assets/{{id:{UUID_RE}}}")
-async def get_asset(request: web.Request) -> web.Response:
-    """
-    GET request to get an asset's info as JSON.
-    """
-    asset_info_id = str(uuid.UUID(request.match_info["id"]))
-    try:
-        result = manager.get_asset(
-            asset_info_id=asset_info_id,
-            owner_id=USER_MANAGER.get_request_user_id(request),
-        )
-    except ValueError as e:
-        return _error_response(404, "ASSET_NOT_FOUND", str(e), {"id": asset_info_id})
-    except Exception:
-        logging.exception(
-            "get_asset failed for asset_info_id=%s, owner_id=%s",
-            asset_info_id,
-            USER_MANAGER.get_request_user_id(request),
-        )
-        return _error_response(500, "INTERNAL", "Unexpected server error.")
-    return web.json_response(result.model_dump(mode="json"), status=200)
-
-
-@ROUTES.get(f"/api/assets/{{id:{UUID_RE}}}/content")
-async def download_asset_content(request: web.Request) -> web.Response:
-    # question: do we need disposition? could we just stick with one of these?
-    disposition = request.query.get("disposition", "attachment").lower().strip()
-    if disposition not in {"inline", "attachment"}:
-        disposition = "attachment"
-
-    try:
-        abs_path, content_type, filename = manager.resolve_asset_content_for_download(
-            asset_info_id=str(uuid.UUID(request.match_info["id"])),
-            owner_id=USER_MANAGER.get_request_user_id(request),
-        )
-    except ValueError as ve:
-        return _error_response(404, "ASSET_NOT_FOUND", str(ve))
-    except NotImplementedError as nie:
-        return _error_response(501, "BACKEND_UNSUPPORTED", str(nie))
-    except FileNotFoundError:
-        return _error_response(404, "FILE_NOT_FOUND", "Underlying file not found on disk.")
-
-    quoted = (filename or "").replace("\r", "").replace("\n", "").replace('"', "'")
-    cd = f'{disposition}; filename="{quoted}"; filename*=UTF-8\'\'{urllib.parse.quote(filename)}'
-
-    file_size = os.path.getsize(abs_path)
-    logging.info(
-        "download_asset_content: path=%s, size=%d bytes (%.2f MB), content_type=%s, filename=%s",
-        abs_path,
-        file_size,
-        file_size / (1024 * 1024),
-        content_type,
-        filename,
-    )
-
-    async def file_sender():
-        chunk_size = 64 * 1024
-        with open(abs_path, "rb") as f:
-            while True:
-                chunk = f.read(chunk_size)
-                if not chunk:
-                    break
-                yield chunk
-
-    return web.Response(
-        body=file_sender(),
-        content_type=content_type,
-        headers={
-            "Content-Disposition": cd,
-            "Content-Length": str(file_size),
-        },
-    )
-
-
-@ROUTES.post("/api/assets/from-hash")
-async def create_asset_from_hash(request: web.Request) -> web.Response:
-    try:
-        payload = await request.json()
-        body = schemas_in.CreateFromHashBody.model_validate(payload)
-    except ValidationError as ve:
-        return _validation_error_response("INVALID_BODY", ve)
-    except Exception:
-        return _error_response(400, "INVALID_JSON", "Request body must be valid JSON.")
-
-    result = manager.create_asset_from_hash(
-        hash_str=body.hash,
-        name=body.name,
-        tags=body.tags,
-        user_metadata=body.user_metadata,
-        owner_id=USER_MANAGER.get_request_user_id(request),
-    )
-    if result is None:
-        return _error_response(404, "ASSET_NOT_FOUND", f"Asset content {body.hash} does not exist")
-    return web.json_response(result.model_dump(mode="json"), status=201)
-
-
-@ROUTES.post("/api/assets")
-async def upload_asset(request: web.Request) -> web.Response:
-    """Multipart/form-data endpoint for Asset uploads."""
-    if not (request.content_type or "").lower().startswith("multipart/"):
-        return _error_response(415, "UNSUPPORTED_MEDIA_TYPE", "Use multipart/form-data for uploads.")
-
-    reader = await request.multipart()
-
-    file_present = False
-    file_client_name: str | None = None
-    tags_raw: list[str] = []
-    provided_name: str | None = None
-    user_metadata_raw: str | None = None
-    provided_hash: str | None = None
-    provided_hash_exists: bool | None = None
-
-    file_written = 0
-    tmp_path: str | None = None
-    while True:
-        field = await reader.next()
-        if field is None:
-            break
-
-        fname = getattr(field, "name", "") or ""
-
-        if fname == "hash":
-            try:
-                s = ((await field.text()) or "").strip().lower()
-            except Exception:
-                return _error_response(400, "INVALID_HASH", "hash must be like 'blake3:<hex>'")
-
-            if s:
-                if ":" not in s:
-                    return _error_response(400, "INVALID_HASH", "hash must be like 'blake3:<hex>'")
-                algo, digest = s.split(":", 1)
-                if algo != "blake3" or not digest or any(c for c in digest if c not in "0123456789abcdef"):
-                    return _error_response(400, "INVALID_HASH", "hash must be like 'blake3:<hex>'")
-                provided_hash = f"{algo}:{digest}"
-                try:
-                    provided_hash_exists = manager.asset_exists(asset_hash=provided_hash)
-                except Exception:
-                    provided_hash_exists = None  # do not fail the whole request here
-
-        elif fname == "file":
-            file_present = True
-            file_client_name = (field.filename or "").strip()
-
-            if provided_hash and provided_hash_exists is True:
-                # If client supplied a hash that we know exists, drain but do not write to disk
-                try:
-                    while True:
-                        chunk = await field.read_chunk(8 * 1024 * 1024)
-                        if not chunk:
-                            break
-                        file_written += len(chunk)
-                except Exception:
-                    return _error_response(500, "UPLOAD_IO_ERROR", "Failed to receive uploaded file.")
-                continue  # Do not create temp file; we will create AssetInfo from the existing content
-
-            # Otherwise, store to temp for hashing/ingest
-            uploads_root = os.path.join(folder_paths.get_temp_directory(), "uploads")
-            unique_dir = os.path.join(uploads_root, uuid.uuid4().hex)
-            os.makedirs(unique_dir, exist_ok=True)
-            tmp_path = os.path.join(unique_dir, ".upload.part")
-
-            try:
-                with open(tmp_path, "wb") as f:
-                    while True:
-                        chunk = await field.read_chunk(8 * 1024 * 1024)
-                        if not chunk:
-                            break
-                        f.write(chunk)
-                        file_written += len(chunk)
-            except Exception:
-                try:
-                    if os.path.exists(tmp_path or ""):
-                        os.remove(tmp_path)
-                finally:
-                    return _error_response(500, "UPLOAD_IO_ERROR", "Failed to receive and store uploaded file.")
-        elif fname == "tags":
-            tags_raw.append((await field.text()) or "")
-        elif fname == "name":
-            provided_name = (await field.text()) or None
-        elif fname == "user_metadata":
-            user_metadata_raw = (await field.text()) or None
-
-    # If client did not send file, and we are not doing a from-hash fast path -> error
-    if not file_present and not (provided_hash and provided_hash_exists):
-        return _error_response(400, "MISSING_FILE", "Form must include a 'file' part or a known 'hash'.")
-
-    if file_present and file_written == 0 and not (provided_hash and provided_hash_exists):
-        # Empty upload is only acceptable if we are fast-pathing from existing hash
-        try:
-            if tmp_path and os.path.exists(tmp_path):
-                os.remove(tmp_path)
-        finally:
-            return _error_response(400, "EMPTY_UPLOAD", "Uploaded file is empty.")
-
-    try:
-        spec = schemas_in.UploadAssetSpec.model_validate({
-            "tags": tags_raw,
-            "name": provided_name,
-            "user_metadata": user_metadata_raw,
-            "hash": provided_hash,
-        })
-    except ValidationError as ve:
-        try:
-            if tmp_path and os.path.exists(tmp_path):
-                os.remove(tmp_path)
-        finally:
-            return _validation_error_response("INVALID_BODY", ve)
-
-    # Validate models category against configured folders (consistent with previous behavior)
-    if spec.tags and spec.tags[0] == "models":
-        if len(spec.tags) < 2 or spec.tags[1] not in folder_paths.folder_names_and_paths:
-            if tmp_path and os.path.exists(tmp_path):
-                os.remove(tmp_path)
-            return _error_response(
-                400, "INVALID_BODY", f"unknown models category '{spec.tags[1] if len(spec.tags) >= 2 else ''}'"
-            )
-
-    owner_id = USER_MANAGER.get_request_user_id(request)
-
-    # Fast path: if a valid provided hash exists, create AssetInfo without writing anything
-    if spec.hash and provided_hash_exists is True:
-        try:
-            result = manager.create_asset_from_hash(
-                hash_str=spec.hash,
-                name=spec.name or (spec.hash.split(":", 1)[1]),
-                tags=spec.tags,
-                user_metadata=spec.user_metadata or {},
-                owner_id=owner_id,
-            )
-        except Exception:
-            logging.exception("create_asset_from_hash failed for hash=%s, owner_id=%s", spec.hash, owner_id)
-            return _error_response(500, "INTERNAL", "Unexpected server error.")
-
-        if result is None:
-            return _error_response(404, "ASSET_NOT_FOUND", f"Asset content {spec.hash} does not exist")
-
-        # Drain temp if we accidentally saved (e.g., hash field came after file)
-        if tmp_path and os.path.exists(tmp_path):
-            with contextlib.suppress(Exception):
-                os.remove(tmp_path)
-
-        status = 200 if (not result.created_new) else 201
-        return web.json_response(result.model_dump(mode="json"), status=status)
-
-    # Otherwise, we must have a temp file path to ingest
-    if not tmp_path or not os.path.exists(tmp_path):
-        # The only case we reach here without a temp file is: client sent a hash that does not exist and no file
-        return _error_response(404, "ASSET_NOT_FOUND", "Provided hash not found and no file uploaded.")
-
-    try:
-        created = manager.upload_asset_from_temp_path(
-            spec,
-            temp_path=tmp_path,
-            client_filename=file_client_name,
-            owner_id=owner_id,
-            expected_asset_hash=spec.hash,
-        )
-        status = 201 if created.created_new else 200
-        return web.json_response(created.model_dump(mode="json"), status=status)
-    except ValueError as e:
-        if tmp_path and os.path.exists(tmp_path):
-            os.remove(tmp_path)
-        msg = str(e)
-        if "HASH_MISMATCH" in msg or msg.strip().upper() == "HASH_MISMATCH":
-            return _error_response(
-                400,
-                "HASH_MISMATCH",
-                "Uploaded file hash does not match provided hash.",
-            )
-        return _error_response(400, "BAD_REQUEST", "Invalid inputs.")
-    except Exception:
-        if tmp_path and os.path.exists(tmp_path):
-            os.remove(tmp_path)
-        logging.exception("upload_asset_from_temp_path failed for tmp_path=%s, owner_id=%s", tmp_path, owner_id)
-        return _error_response(500, "INTERNAL", "Unexpected server error.")
-
-
-@ROUTES.put(f"/api/assets/{{id:{UUID_RE}}}")
-async def update_asset(request: web.Request) -> web.Response:
-    asset_info_id = str(uuid.UUID(request.match_info["id"]))
-    try:
-        body = schemas_in.UpdateAssetBody.model_validate(await request.json())
-    except ValidationError as ve:
-        return _validation_error_response("INVALID_BODY", ve)
-    except Exception:
-        return _error_response(400, "INVALID_JSON", "Request body must be valid JSON.")
-
-    try:
-        result = manager.update_asset(
-            asset_info_id=asset_info_id,
-            name=body.name,
-            user_metadata=body.user_metadata,
-            owner_id=USER_MANAGER.get_request_user_id(request),
-        )
-    except (ValueError, PermissionError) as ve:
-        return _error_response(404, "ASSET_NOT_FOUND", str(ve), {"id": asset_info_id})
-    except Exception:
-        logging.exception(
-            "update_asset failed for asset_info_id=%s, owner_id=%s",
-            asset_info_id,
-            USER_MANAGER.get_request_user_id(request),
-        )
-        return _error_response(500, "INTERNAL", "Unexpected server error.")
-    return web.json_response(result.model_dump(mode="json"), status=200)
-
-
-@ROUTES.delete(f"/api/assets/{{id:{UUID_RE}}}")
-async def delete_asset(request: web.Request) -> web.Response:
-    asset_info_id = str(uuid.UUID(request.match_info["id"]))
-    delete_content = request.query.get("delete_content")
-    delete_content = True if delete_content is None else delete_content.lower() not in {"0", "false", "no"}
-
-    try:
-        deleted = manager.delete_asset_reference(
-            asset_info_id=asset_info_id,
-            owner_id=USER_MANAGER.get_request_user_id(request),
-            delete_content_if_orphan=delete_content,
-        )
-    except Exception:
-        logging.exception(
-            "delete_asset_reference failed for asset_info_id=%s, owner_id=%s",
-            asset_info_id,
-            USER_MANAGER.get_request_user_id(request),
-        )
-        return _error_response(500, "INTERNAL", "Unexpected server error.")
-
-    if not deleted:
-        return _error_response(404, "ASSET_NOT_FOUND", f"AssetInfo {asset_info_id} not found.")
-    return web.Response(status=204)
-
-
-@ROUTES.get("/api/tags")
-async def get_tags(request: web.Request) -> web.Response:
-    """
-    GET request to list all tags based on query parameters.
-    """
-    query_map = dict(request.rel_url.query)
-
-    try:
-        query = schemas_in.TagsListQuery.model_validate(query_map)
-    except ValidationError as e:
-        return web.json_response(
-            {"error": {"code": "INVALID_QUERY", "message": "Invalid query parameters", "details": e.errors()}},
-            status=400,
-        )
-
-    result = manager.list_tags(
-        prefix=query.prefix,
-        limit=query.limit,
-        offset=query.offset,
-        order=query.order,
-        include_zero=query.include_zero,
-        owner_id=USER_MANAGER.get_request_user_id(request),
-    )
-    return web.json_response(result.model_dump(mode="json"))
-
-
-@ROUTES.post(f"/api/assets/{{id:{UUID_RE}}}/tags")
-async def add_asset_tags(request: web.Request) -> web.Response:
-    asset_info_id = str(uuid.UUID(request.match_info["id"]))
-    try:
-        payload = await request.json()
-        data = schemas_in.TagsAdd.model_validate(payload)
-    except ValidationError as ve:
-        return _error_response(400, "INVALID_BODY", "Invalid JSON body for tags add.", {"errors": ve.errors()})
-    except Exception:
-        return _error_response(400, "INVALID_JSON", "Request body must be valid JSON.")
-
-    try:
-        result = manager.add_tags_to_asset(
-            asset_info_id=asset_info_id,
-            tags=data.tags,
-            origin="manual",
-            owner_id=USER_MANAGER.get_request_user_id(request),
-        )
-    except (ValueError, PermissionError) as ve:
-        return _error_response(404, "ASSET_NOT_FOUND", str(ve), {"id": asset_info_id})
-    except Exception:
-        logging.exception(
-            "add_tags_to_asset failed for asset_info_id=%s, owner_id=%s",
-            asset_info_id,
-            USER_MANAGER.get_request_user_id(request),
-        )
-        return _error_response(500, "INTERNAL", "Unexpected server error.")
-
-    return web.json_response(result.model_dump(mode="json"), status=200)
-
-
-@ROUTES.delete(f"/api/assets/{{id:{UUID_RE}}}/tags")
-async def delete_asset_tags(request: web.Request) -> web.Response:
-    asset_info_id = str(uuid.UUID(request.match_info["id"]))
-    try:
-        payload = await request.json()
-        data = schemas_in.TagsRemove.model_validate(payload)
-    except ValidationError as ve:
-        return _error_response(400, "INVALID_BODY", "Invalid JSON body for tags remove.", {"errors": ve.errors()})
-    except Exception:
-        return _error_response(400, "INVALID_JSON", "Request body must be valid JSON.")
-
-    try:
-        result = manager.remove_tags_from_asset(
-            asset_info_id=asset_info_id,
-            tags=data.tags,
-            owner_id=USER_MANAGER.get_request_user_id(request),
-        )
-    except ValueError as ve:
-        return _error_response(404, "ASSET_NOT_FOUND", str(ve), {"id": asset_info_id})
-    except Exception:
-        logging.exception(
-            "remove_tags_from_asset failed for asset_info_id=%s, owner_id=%s",
-            asset_info_id,
-            USER_MANAGER.get_request_user_id(request),
-        )
-        return _error_response(500, "INTERNAL", "Unexpected server error.")
-
-    return web.json_response(result.model_dump(mode="json"), status=200)
-
-
-@ROUTES.post("/api/assets/seed")
-async def seed_assets_endpoint(request: web.Request) -> web.Response:
-    """Trigger asset seeding for specified roots (models, input, output)."""
-    try:
-        payload = await request.json()
-        roots = payload.get("roots", ["models", "input", "output"])
-    except Exception:
-        roots = ["models", "input", "output"]
-
-    valid_roots = [r for r in roots if r in ("models", "input", "output")]
-    if not valid_roots:
-        return _error_response(400, "INVALID_BODY", "No valid roots specified")
-
-    try:
-        seed_assets(tuple(valid_roots))
-    except Exception:
-        logging.exception("seed_assets failed for roots=%s", valid_roots)
-        return _error_response(500, "INTERNAL", "Seed operation failed")
-
-    return web.json_response({"seeded": valid_roots}, status=200)
--- a/app/assets/api/schemas_in.py
+++ b/app/assets/api/schemas_in.py
@@ -1,264 +0,0 @@
-import json
-from typing import Any, Literal
-
-from pydantic import (
-    BaseModel,
-    ConfigDict,
-    Field,
-    conint,
-    field_validator,
-    model_validator,
-)
-
-class ListAssetsQuery(BaseModel):
-    include_tags: list[str] = Field(default_factory=list)
-    exclude_tags: list[str] = Field(default_factory=list)
-    name_contains: str | None = None
-
-    # Accept either a JSON string (query param) or a dict
-    metadata_filter: dict[str, Any] | None = None
-
-    limit: conint(ge=1, le=500) = 20
-    offset: conint(ge=0) = 0
-
-    sort: Literal["name", "created_at", "updated_at", "size", "last_access_time"] = "created_at"
-    order: Literal["asc", "desc"] = "desc"
-
-    @field_validator("include_tags", "exclude_tags", mode="before")
-    @classmethod
-    def _split_csv_tags(cls, v):
-        # Accept "a,b,c" or ["a","b"] (we are liberal in what we accept)
-        if v is None:
-            return []
-        if isinstance(v, str):
-            return [t.strip() for t in v.split(",") if t.strip()]
-        if isinstance(v, list):
-            out: list[str] = []
-            for item in v:
-                if isinstance(item, str):
-                    out.extend([t.strip() for t in item.split(",") if t.strip()])
-            return out
-        return v
-
-    @field_validator("metadata_filter", mode="before")
-    @classmethod
-    def _parse_metadata_json(cls, v):
-        if v is None or isinstance(v, dict):
-            return v
-        if isinstance(v, str) and v.strip():
-            try:
-                parsed = json.loads(v)
-            except Exception as e:
-                raise ValueError(f"metadata_filter must be JSON: {e}") from e
-            if not isinstance(parsed, dict):
-                raise ValueError("metadata_filter must be a JSON object")
-            return parsed
-        return None
-
-
-class UpdateAssetBody(BaseModel):
-    name: str | None = None
-    user_metadata: dict[str, Any] | None = None
-
-    @model_validator(mode="after")
-    def _at_least_one(self):
-        if self.name is None and self.user_metadata is None:
-            raise ValueError("Provide at least one of: name, user_metadata.")
-        return self
-
-
-class CreateFromHashBody(BaseModel):
-    model_config = ConfigDict(extra="ignore", str_strip_whitespace=True)
-
-    hash: str
-    name: str
-    tags: list[str] = Field(default_factory=list)
-    user_metadata: dict[str, Any] = Field(default_factory=dict)
-
-    @field_validator("hash")
-    @classmethod
-    def _require_blake3(cls, v):
-        s = (v or "").strip().lower()
-        if ":" not in s:
-            raise ValueError("hash must be 'blake3:<hex>'")
-        algo, digest = s.split(":", 1)
-        if algo != "blake3":
-            raise ValueError("only canonical 'blake3:<hex>' is accepted here")
-        if not digest or any(c for c in digest if c not in "0123456789abcdef"):
-            raise ValueError("hash digest must be lowercase hex")
-        return s
-
-    @field_validator("tags", mode="before")
-    @classmethod
-    def _tags_norm(cls, v):
-        if v is None:
-            return []
-        if isinstance(v, list):
-            out = [str(t).strip().lower() for t in v if str(t).strip()]
-            seen = set()
-            dedup = []
-            for t in out:
-                if t not in seen:
-                    seen.add(t)
-                    dedup.append(t)
-            return dedup
-        if isinstance(v, str):
-            return [t.strip().lower() for t in v.split(",") if t.strip()]
-        return []
-
-
-class TagsListQuery(BaseModel):
-    model_config = ConfigDict(extra="ignore", str_strip_whitespace=True)
-
-    prefix: str | None = Field(None, min_length=1, max_length=256)
-    limit: int = Field(100, ge=1, le=1000)
-    offset: int = Field(0, ge=0, le=10_000_000)
-    order: Literal["count_desc", "name_asc"] = "count_desc"
-    include_zero: bool = True
-
-    @field_validator("prefix")
-    @classmethod
-    def normalize_prefix(cls, v: str | None) -> str | None:
-        if v is None:
-            return v
-        v = v.strip()
-        return v.lower() or None
-
-
-class TagsAdd(BaseModel):
-    model_config = ConfigDict(extra="ignore")
-    tags: list[str] = Field(..., min_length=1)
-
-    @field_validator("tags")
-    @classmethod
-    def normalize_tags(cls, v: list[str]) -> list[str]:
-        out = []
-        for t in v:
-            if not isinstance(t, str):
-                raise TypeError("tags must be strings")
-            tnorm = t.strip().lower()
-            if tnorm:
-                out.append(tnorm)
-        seen = set()
-        deduplicated = []
-        for x in out:
-            if x not in seen:
-                seen.add(x)
-                deduplicated.append(x)
-        return deduplicated
-
-
-class TagsRemove(TagsAdd):
-    pass
-
-
-class UploadAssetSpec(BaseModel):
-    """Upload Asset operation.
-    - tags: ordered; first is root ('models'|'input'|'output');
-            if root == 'models', second must be a valid category from folder_paths.folder_names_and_paths
-    - name: display name
-    - user_metadata: arbitrary JSON object (optional)
-    - hash: optional canonical 'blake3:<hex>' provided by the client for validation / fast-path
-
-    Files created via this endpoint are stored on disk using the **content hash** as the filename stem
-    and the original extension is preserved when available.
-    """
-    model_config = ConfigDict(extra="ignore", str_strip_whitespace=True)
-
-    tags: list[str] = Field(..., min_length=1)
-    name: str | None = Field(default=None, max_length=512, description="Display Name")
-    user_metadata: dict[str, Any] = Field(default_factory=dict)
-    hash: str | None = Field(default=None)
-
-    @field_validator("hash", mode="before")
-    @classmethod
-    def _parse_hash(cls, v):
-        if v is None:
-            return None
-        s = str(v).strip().lower()
-        if not s:
-            return None
-        if ":" not in s:
-            raise ValueError("hash must be 'blake3:<hex>'")
-        algo, digest = s.split(":", 1)
-        if algo != "blake3":
-            raise ValueError("only canonical 'blake3:<hex>' is accepted here")
-        if not digest or any(c for c in digest if c not in "0123456789abcdef"):
-            raise ValueError("hash digest must be lowercase hex")
-        return f"{algo}:{digest}"
-
-    @field_validator("tags", mode="before")
-    @classmethod
-    def _parse_tags(cls, v):
-        """
-        Accepts a list of strings (possibly multiple form fields),
-        where each string can be:
-          - JSON array (e.g., '["models","loras","foo"]')
-          - comma-separated ('models, loras, foo')
-          - single token ('models')
-        Returns a normalized, deduplicated, ordered list.
-        """
-        items: list[str] = []
-        if v is None:
-            return []
-        if isinstance(v, str):
-            v = [v]
-
-        if isinstance(v, list):
-            for item in v:
-                if item is None:
-                    continue
-                s = str(item).strip()
-                if not s:
-                    continue
-                if s.startswith("["):
-                    try:
-                        arr = json.loads(s)
-                        if isinstance(arr, list):
-                            items.extend(str(x) for x in arr)
-                            continue
-                    except Exception:
-                        pass  # fallback to CSV parse below
-                items.extend([p for p in s.split(",") if p.strip()])
-        else:
-            return []
-
-        # normalize + dedupe
-        norm = []
-        seen = set()
-        for t in items:
-            tnorm = str(t).strip().lower()
-            if tnorm and tnorm not in seen:
-                seen.add(tnorm)
-                norm.append(tnorm)
-        return norm
-
-    @field_validator("user_metadata", mode="before")
-    @classmethod
-    def _parse_metadata_json(cls, v):
-        if v is None or isinstance(v, dict):
-            return v or {}
-        if isinstance(v, str):
-            s = v.strip()
-            if not s:
-                return {}
-            try:
-                parsed = json.loads(s)
-            except Exception as e:
-                raise ValueError(f"user_metadata must be JSON: {e}") from e
-            if not isinstance(parsed, dict):
-                raise ValueError("user_metadata must be a JSON object")
-            return parsed
-        return {}
-
-    @model_validator(mode="after")
-    def _validate_order(self):
-        if not self.tags:
-            raise ValueError("tags must be provided and non-empty")
-        root = self.tags[0]
-        if root not in {"models", "input", "output"}:
-            raise ValueError("first tag must be one of: models, input, output")
-        if root == "models":
-            if len(self.tags) < 2:
-                raise ValueError("models uploads require a category tag as the second tag")
-        return self
--- a/app/assets/api/schemas_out.py
+++ b/app/assets/api/schemas_out.py
@@ -1,93 +0,0 @@
-from datetime import datetime
-from typing import Any
-
-from pydantic import BaseModel, ConfigDict, Field, field_serializer
-
-
-class AssetSummary(BaseModel):
-    id: str
-    name: str
-    asset_hash: str | None = None
-    size: int | None = None
-    mime_type: str | None = None
-    tags: list[str] = Field(default_factory=list)
-    preview_url: str | None = None
-    created_at: datetime | None = None
-    updated_at: datetime | None = None
-    last_access_time: datetime | None = None
-
-    model_config = ConfigDict(from_attributes=True)
-
-    @field_serializer("created_at", "updated_at", "last_access_time")
-    def _ser_dt(self, v: datetime | None, _info):
-        return v.isoformat() if v else None
-
-
-class AssetsList(BaseModel):
-    assets: list[AssetSummary]
-    total: int
-    has_more: bool
-
-
-class AssetUpdated(BaseModel):
-    id: str
-    name: str
-    asset_hash: str | None = None
-    tags: list[str] = Field(default_factory=list)
-    user_metadata: dict[str, Any] = Field(default_factory=dict)
-    updated_at: datetime | None = None
-
-    model_config = ConfigDict(from_attributes=True)
-
-    @field_serializer("updated_at")
-    def _ser_updated(self, v: datetime | None, _info):
-        return v.isoformat() if v else None
-
-
-class AssetDetail(BaseModel):
-    id: str
-    name: str
-    asset_hash: str | None = None
-    size: int | None = None
-    mime_type: str | None = None
-    tags: list[str] = Field(default_factory=list)
-    user_metadata: dict[str, Any] = Field(default_factory=dict)
-    preview_id: str | None = None
-    created_at: datetime | None = None
-    last_access_time: datetime | None = None
-
-    model_config = ConfigDict(from_attributes=True)
-
-    @field_serializer("created_at", "last_access_time")
-    def _ser_dt(self, v: datetime | None, _info):
-        return v.isoformat() if v else None
-
-
-class AssetCreated(AssetDetail):
-    created_new: bool
-
-
-class TagUsage(BaseModel):
-    name: str
-    count: int
-    type: str
-
-
-class TagsList(BaseModel):
-    tags: list[TagUsage] = Field(default_factory=list)
-    total: int
-    has_more: bool
-
-
-class TagsAdd(BaseModel):
-    model_config = ConfigDict(str_strip_whitespace=True)
-    added: list[str] = Field(default_factory=list)
-    already_present: list[str] = Field(default_factory=list)
-    total_tags: list[str] = Field(default_factory=list)
-
-
-class TagsRemove(BaseModel):
-    model_config = ConfigDict(str_strip_whitespace=True)
-    removed: list[str] = Field(default_factory=list)
-    not_present: list[str] = Field(default_factory=list)
-    total_tags: list[str] = Field(default_factory=list)
--- a/app/assets/database/bulk_ops.py
+++ b/app/assets/database/bulk_ops.py
@@ -1,204 +0,0 @@
-import os
-import uuid
-import sqlalchemy
-from typing import Iterable
-from sqlalchemy.orm import Session
-from sqlalchemy.dialects import sqlite
-
-from app.assets.helpers import utcnow
-from app.assets.database.models import Asset, AssetCacheState, AssetInfo, AssetInfoTag, AssetInfoMeta
-
-MAX_BIND_PARAMS = 800
-
-def _chunk_rows(rows: list[dict], cols_per_row: int, max_bind_params: int) -> Iterable[list[dict]]:
-    if not rows:
-        return []
-    rows_per_stmt = max(1, max_bind_params // max(1, cols_per_row))
-    for i in range(0, len(rows), rows_per_stmt):
-        yield rows[i:i + rows_per_stmt]
-
-def _iter_chunks(seq, n: int):
-    for i in range(0, len(seq), n):
-        yield seq[i:i + n]
-
-def _rows_per_stmt(cols: int) -> int:
-    return max(1, MAX_BIND_PARAMS // max(1, cols))
-
-
-def seed_from_paths_batch(
-    session: Session,
-    *,
-    specs: list[dict],
-    owner_id: str = "",
-) -> dict:
-    """Each spec is a dict with keys:
-      - abs_path: str
-      - size_bytes: int
-      - mtime_ns: int
-      - info_name: str
-      - tags: list[str]
-      - fname: Optional[str]
-    """
-    if not specs:
-        return {"inserted_infos": 0, "won_states": 0, "lost_states": 0}
-
-    now = utcnow()
-    asset_rows: list[dict] = []
-    state_rows: list[dict] = []
-    path_to_asset: dict[str, str] = {}
-    asset_to_info: dict[str, dict] = {}  # asset_id -> prepared info row
-    path_list: list[str] = []
-
-    for sp in specs:
-        ap = os.path.abspath(sp["abs_path"])
-        aid = str(uuid.uuid4())
-        iid = str(uuid.uuid4())
-        path_list.append(ap)
-        path_to_asset[ap] = aid
-
-        asset_rows.append(
-            {
-                "id": aid,
-                "hash": None,
-                "size_bytes": sp["size_bytes"],
-                "mime_type": None,
-                "created_at": now,
-            }
-        )
-        state_rows.append(
-            {
-                "asset_id": aid,
-                "file_path": ap,
-                "mtime_ns": sp["mtime_ns"],
-            }
-        )
-        asset_to_info[aid] = {
-            "id": iid,
-            "owner_id": owner_id,
-            "name": sp["info_name"],
-            "asset_id": aid,
-            "preview_id": None,
-            "user_metadata": {"filename": sp["fname"]} if sp["fname"] else None,
-            "created_at": now,
-            "updated_at": now,
-            "last_access_time": now,
-            "_tags": sp["tags"],
-            "_filename": sp["fname"],
-        }
-
-    # insert all seed Assets (hash=NULL)
-    ins_asset = sqlite.insert(Asset)
-    for chunk in _iter_chunks(asset_rows, _rows_per_stmt(5)):
-        session.execute(ins_asset, chunk)
-
-    # try to claim AssetCacheState (file_path)
-    # Insert with ON CONFLICT DO NOTHING, then query to find which paths were actually inserted
-    ins_state = (
-        sqlite.insert(AssetCacheState)
-        .on_conflict_do_nothing(index_elements=[AssetCacheState.file_path])
-    )
-    for chunk in _iter_chunks(state_rows, _rows_per_stmt(3)):
-        session.execute(ins_state, chunk)
-
-    # Query to find which of our paths won (were actually inserted)
-    winners_by_path: set[str] = set()
-    for chunk in _iter_chunks(path_list, MAX_BIND_PARAMS):
-        result = session.execute(
-            sqlalchemy.select(AssetCacheState.file_path)
-            .where(AssetCacheState.file_path.in_(chunk))
-            .where(AssetCacheState.asset_id.in_([path_to_asset[p] for p in chunk]))
-        )
-        winners_by_path.update(result.scalars().all())
-
-    all_paths_set = set(path_list)
-    losers_by_path = all_paths_set - winners_by_path
-    lost_assets = [path_to_asset[p] for p in losers_by_path]
-    if lost_assets:  # losers get their Asset removed
-        for id_chunk in _iter_chunks(lost_assets, MAX_BIND_PARAMS):
-            session.execute(sqlalchemy.delete(Asset).where(Asset.id.in_(id_chunk)))
-
-    if not winners_by_path:
-        return {"inserted_infos": 0, "won_states": 0, "lost_states": len(losers_by_path)}
-
-    # insert AssetInfo only for winners
-    # Insert with ON CONFLICT DO NOTHING, then query to find which were actually inserted
-    winner_info_rows = [asset_to_info[path_to_asset[p]] for p in winners_by_path]
-    ins_info = (
-        sqlite.insert(AssetInfo)
-        .on_conflict_do_nothing(index_elements=[AssetInfo.asset_id, AssetInfo.owner_id, AssetInfo.name])
-    )
-    for chunk in _iter_chunks(winner_info_rows, _rows_per_stmt(9)):
-        session.execute(ins_info, chunk)
-
-    # Query to find which info rows were actually inserted (by matching our generated IDs)
-    all_info_ids = [row["id"] for row in winner_info_rows]
-    inserted_info_ids: set[str] = set()
-    for chunk in _iter_chunks(all_info_ids, MAX_BIND_PARAMS):
-        result = session.execute(
-            sqlalchemy.select(AssetInfo.id).where(AssetInfo.id.in_(chunk))
-        )
-        inserted_info_ids.update(result.scalars().all())
-
-    # build and insert tag + meta rows for the AssetInfo
-    tag_rows: list[dict] = []
-    meta_rows: list[dict] = []
-    if inserted_info_ids:
-        for row in winner_info_rows:
-            iid = row["id"]
-            if iid not in inserted_info_ids:
-                continue
-            for t in row["_tags"]:
-                tag_rows.append({
-                    "asset_info_id": iid,
-                    "tag_name": t,
-                    "origin": "automatic",
-                    "added_at": now,
-                })
-            if row["_filename"]:
-                meta_rows.append(
-                    {
-                        "asset_info_id": iid,
-                        "key": "filename",
-                        "ordinal": 0,
-                        "val_str": row["_filename"],
-                        "val_num": None,
-                        "val_bool": None,
-                        "val_json": None,
-                    }
-                )
-
-    bulk_insert_tags_and_meta(session, tag_rows=tag_rows, meta_rows=meta_rows, max_bind_params=MAX_BIND_PARAMS)
-    return {
-        "inserted_infos": len(inserted_info_ids),
-        "won_states": len(winners_by_path),
-        "lost_states": len(losers_by_path),
-    }
-
-
-def bulk_insert_tags_and_meta(
-    session: Session,
-    *,
-    tag_rows: list[dict],
-    meta_rows: list[dict],
-    max_bind_params: int,
-) -> None:
-    """Batch insert into asset_info_tags and asset_info_meta with ON CONFLICT DO NOTHING.
-    - tag_rows keys: asset_info_id, tag_name, origin, added_at
-    - meta_rows keys: asset_info_id, key, ordinal, val_str, val_num, val_bool, val_json
-    """
-    if tag_rows:
-        ins_links = (
-            sqlite.insert(AssetInfoTag)
-            .on_conflict_do_nothing(index_elements=[AssetInfoTag.asset_info_id, AssetInfoTag.tag_name])
-        )
-        for chunk in _chunk_rows(tag_rows, cols_per_row=4, max_bind_params=max_bind_params):
-            session.execute(ins_links, chunk)
-    if meta_rows:
-        ins_meta = (
-            sqlite.insert(AssetInfoMeta)
-            .on_conflict_do_nothing(
-                index_elements=[AssetInfoMeta.asset_info_id, AssetInfoMeta.key, AssetInfoMeta.ordinal]
-            )
-        )
-        for chunk in _chunk_rows(meta_rows, cols_per_row=7, max_bind_params=max_bind_params):
-            session.execute(ins_meta, chunk)
--- a/app/assets/database/models.py
+++ b/app/assets/database/models.py
@@ -1,233 +0,0 @@
-from __future__ import annotations
-
-import uuid
-from datetime import datetime
-
-from typing import Any
-from sqlalchemy import (
-    JSON,
-    BigInteger,
-    Boolean,
-    CheckConstraint,
-    DateTime,
-    ForeignKey,
-    Index,
-    Integer,
-    Numeric,
-    String,
-    Text,
-    UniqueConstraint,
-)
-from sqlalchemy.orm import Mapped, foreign, mapped_column, relationship
-
-from app.assets.helpers import utcnow
-from app.database.models import to_dict, Base
-
-
-class Asset(Base):
-    __tablename__ = "assets"
-
-    id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
-    hash: Mapped[str | None] = mapped_column(String(256), nullable=True)
-    size_bytes: Mapped[int] = mapped_column(BigInteger, nullable=False, default=0)
-    mime_type: Mapped[str | None] = mapped_column(String(255))
-    created_at: Mapped[datetime] = mapped_column(
-        DateTime(timezone=False), nullable=False, default=utcnow
-    )
-
-    infos: Mapped[list[AssetInfo]] = relationship(
-        "AssetInfo",
-        back_populates="asset",
-        primaryjoin=lambda: Asset.id == foreign(AssetInfo.asset_id),
-        foreign_keys=lambda: [AssetInfo.asset_id],
-        cascade="all,delete-orphan",
-        passive_deletes=True,
-    )
-
-    preview_of: Mapped[list[AssetInfo]] = relationship(
-        "AssetInfo",
-        back_populates="preview_asset",
-        primaryjoin=lambda: Asset.id == foreign(AssetInfo.preview_id),
-        foreign_keys=lambda: [AssetInfo.preview_id],
-        viewonly=True,
-    )
-
-    cache_states: Mapped[list[AssetCacheState]] = relationship(
-        back_populates="asset",
-        cascade="all, delete-orphan",
-        passive_deletes=True,
-    )
-
-    __table_args__ = (
-        Index("uq_assets_hash", "hash", unique=True),
-        Index("ix_assets_mime_type", "mime_type"),
-        CheckConstraint("size_bytes >= 0", name="ck_assets_size_nonneg"),
-    )
-
-    def to_dict(self, include_none: bool = False) -> dict[str, Any]:
-        return to_dict(self, include_none=include_none)
-
-    def __repr__(self) -> str:
-        return f"<Asset id={self.id} hash={(self.hash or '')[:12]}>"
-
-
-class AssetCacheState(Base):
-    __tablename__ = "asset_cache_state"
-
-    id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
-    asset_id: Mapped[str] = mapped_column(String(36), ForeignKey("assets.id", ondelete="CASCADE"), nullable=False)
-    file_path: Mapped[str] = mapped_column(Text, nullable=False)
-    mtime_ns: Mapped[int | None] = mapped_column(BigInteger, nullable=True)
-    needs_verify: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
-
-    asset: Mapped[Asset] = relationship(back_populates="cache_states")
-
-    __table_args__ = (
-        Index("ix_asset_cache_state_file_path", "file_path"),
-        Index("ix_asset_cache_state_asset_id", "asset_id"),
-        CheckConstraint("(mtime_ns IS NULL) OR (mtime_ns >= 0)", name="ck_acs_mtime_nonneg"),
-        UniqueConstraint("file_path", name="uq_asset_cache_state_file_path"),
-    )
-
-    def to_dict(self, include_none: bool = False) -> dict[str, Any]:
-        return to_dict(self, include_none=include_none)
-
-    def __repr__(self) -> str:
-        return f"<AssetCacheState id={self.id} asset_id={self.asset_id} path={self.file_path!r}>"
-
-
-class AssetInfo(Base):
-    __tablename__ = "assets_info"
-
-    id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
-    owner_id: Mapped[str] = mapped_column(String(128), nullable=False, default="")
-    name: Mapped[str] = mapped_column(String(512), nullable=False)
-    asset_id: Mapped[str] = mapped_column(String(36), ForeignKey("assets.id", ondelete="RESTRICT"), nullable=False)
-    preview_id: Mapped[str | None] = mapped_column(String(36), ForeignKey("assets.id", ondelete="SET NULL"))
-    user_metadata: Mapped[dict[str, Any] | None] = mapped_column(JSON(none_as_null=True))
-    created_at: Mapped[datetime] = mapped_column(DateTime(timezone=False), nullable=False, default=utcnow)
-    updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=False), nullable=False, default=utcnow)
-    last_access_time: Mapped[datetime] = mapped_column(DateTime(timezone=False), nullable=False, default=utcnow)
-
-    asset: Mapped[Asset] = relationship(
-        "Asset",
-        back_populates="infos",
-        foreign_keys=[asset_id],
-        lazy="selectin",
-    )
-    preview_asset: Mapped[Asset | None] = relationship(
-        "Asset",
-        back_populates="preview_of",
-        foreign_keys=[preview_id],
-    )
-
-    metadata_entries: Mapped[list[AssetInfoMeta]] = relationship(
-        back_populates="asset_info",
-        cascade="all,delete-orphan",
-        passive_deletes=True,
-    )
-
-    tag_links: Mapped[list[AssetInfoTag]] = relationship(
-        back_populates="asset_info",
-        cascade="all,delete-orphan",
-        passive_deletes=True,
-        overlaps="tags,asset_infos",
-    )
-
-    tags: Mapped[list[Tag]] = relationship(
-        secondary="asset_info_tags",
-        back_populates="asset_infos",
-        lazy="selectin",
-        viewonly=True,
-        overlaps="tag_links,asset_info_links,asset_infos,tag",
-    )
-
-    __table_args__ = (
-        UniqueConstraint("asset_id", "owner_id", "name", name="uq_assets_info_asset_owner_name"),
-        Index("ix_assets_info_owner_name", "owner_id", "name"),
-        Index("ix_assets_info_owner_id", "owner_id"),
-        Index("ix_assets_info_asset_id", "asset_id"),
-        Index("ix_assets_info_name", "name"),
-        Index("ix_assets_info_created_at", "created_at"),
-        Index("ix_assets_info_last_access_time", "last_access_time"),
-    )
-
-    def to_dict(self, include_none: bool = False) -> dict[str, Any]:
-        data = to_dict(self, include_none=include_none)
-        data["tags"] = [t.name for t in self.tags]
-        return data
-
-    def __repr__(self) -> str:
-        return f"<AssetInfo id={self.id} name={self.name!r} asset_id={self.asset_id}>"
-
-
-class AssetInfoMeta(Base):
-    __tablename__ = "asset_info_meta"
-
-    asset_info_id: Mapped[str] = mapped_column(
-        String(36), ForeignKey("assets_info.id", ondelete="CASCADE"), primary_key=True
-    )
-    key: Mapped[str] = mapped_column(String(256), primary_key=True)
-    ordinal: Mapped[int] = mapped_column(Integer, primary_key=True, default=0)
-
-    val_str: Mapped[str | None] = mapped_column(String(2048), nullable=True)
-    val_num: Mapped[float | None] = mapped_column(Numeric(38, 10), nullable=True)
-    val_bool: Mapped[bool | None] = mapped_column(Boolean, nullable=True)
-    val_json: Mapped[Any | None] = mapped_column(JSON(none_as_null=True), nullable=True)
-
-    asset_info: Mapped[AssetInfo] = relationship(back_populates="metadata_entries")
-
-    __table_args__ = (
-        Index("ix_asset_info_meta_key", "key"),
-        Index("ix_asset_info_meta_key_val_str", "key", "val_str"),
-        Index("ix_asset_info_meta_key_val_num", "key", "val_num"),
-        Index("ix_asset_info_meta_key_val_bool", "key", "val_bool"),
-    )
-
-
-class AssetInfoTag(Base):
-    __tablename__ = "asset_info_tags"
-
-    asset_info_id: Mapped[str] = mapped_column(
-        String(36), ForeignKey("assets_info.id", ondelete="CASCADE"), primary_key=True
-    )
-    tag_name: Mapped[str] = mapped_column(
-        String(512), ForeignKey("tags.name", ondelete="RESTRICT"), primary_key=True
-    )
-    origin: Mapped[str] = mapped_column(String(32), nullable=False, default="manual")
-    added_at: Mapped[datetime] = mapped_column(
-        DateTime(timezone=False), nullable=False, default=utcnow
-    )
-
-    asset_info: Mapped[AssetInfo] = relationship(back_populates="tag_links")
-    tag: Mapped[Tag] = relationship(back_populates="asset_info_links")
-
-    __table_args__ = (
-        Index("ix_asset_info_tags_tag_name", "tag_name"),
-        Index("ix_asset_info_tags_asset_info_id", "asset_info_id"),
-    )
-
-
-class Tag(Base):
-    __tablename__ = "tags"
-
-    name: Mapped[str] = mapped_column(String(512), primary_key=True)
-    tag_type: Mapped[str] = mapped_column(String(32), nullable=False, default="user")
-
-    asset_info_links: Mapped[list[AssetInfoTag]] = relationship(
-        back_populates="tag",
-        overlaps="asset_infos,tags",
-    )
-    asset_infos: Mapped[list[AssetInfo]] = relationship(
-        secondary="asset_info_tags",
-        back_populates="tags",
-        viewonly=True,
-        overlaps="asset_info_links,tag_links,tags,asset_info",
-    )
-
-    __table_args__ = (
-        Index("ix_tags_tag_type", "tag_type"),
-    )
-
-    def __repr__(self) -> str:
-        return f"<Tag {self.name}>"
--- a/app/assets/database/queries.py
+++ b/app/assets/database/queries.py
@@ -1,976 +0,0 @@
-import os
-import logging
-import sqlalchemy as sa
-from collections import defaultdict
-from datetime import datetime
-from typing import Iterable, Any
-from sqlalchemy import select, delete, exists, func
-from sqlalchemy.dialects import sqlite
-from sqlalchemy.exc import IntegrityError
-from sqlalchemy.orm import Session, contains_eager, noload
-from app.assets.database.models import Asset, AssetInfo, AssetCacheState, AssetInfoMeta, AssetInfoTag, Tag
-from app.assets.helpers import (
-    compute_relative_filename, escape_like_prefix, normalize_tags, project_kv, utcnow
-)
-from typing import Sequence
-
-
-def visible_owner_clause(owner_id: str) -> sa.sql.ClauseElement:
-    """Build owner visibility predicate for reads. Owner-less rows are visible to everyone."""
-    owner_id = (owner_id or "").strip()
-    if owner_id == "":
-        return AssetInfo.owner_id == ""
-    return AssetInfo.owner_id.in_(["", owner_id])
-
-
-def pick_best_live_path(states: Sequence[AssetCacheState]) -> str:
-    """
-    Return the best on-disk path among cache states:
-      1) Prefer a path that exists with needs_verify == False (already verified).
-      2) Otherwise, pick the first path that exists.
-      3) Otherwise return empty string.
-    """
-    alive = [s for s in states if getattr(s, "file_path", None) and os.path.isfile(s.file_path)]
-    if not alive:
-        return ""
-    for s in alive:
-        if not getattr(s, "needs_verify", False):
-            return s.file_path
-    return alive[0].file_path
-
-
-def apply_tag_filters(
-    stmt: sa.sql.Select,
-    include_tags: Sequence[str] | None = None,
-    exclude_tags: Sequence[str] | None = None,
-) -> sa.sql.Select:
-    """include_tags: every tag must be present; exclude_tags: none may be present."""
-    include_tags = normalize_tags(include_tags)
-    exclude_tags = normalize_tags(exclude_tags)
-
-    if include_tags:
-        for tag_name in include_tags:
-            stmt = stmt.where(
-                exists().where(
-                    (AssetInfoTag.asset_info_id == AssetInfo.id)
-                    & (AssetInfoTag.tag_name == tag_name)
-                )
-            )
-
-    if exclude_tags:
-        stmt = stmt.where(
-            ~exists().where(
-                (AssetInfoTag.asset_info_id == AssetInfo.id)
-                & (AssetInfoTag.tag_name.in_(exclude_tags))
-            )
-        )
-    return stmt
-
-
-def apply_metadata_filter(
-    stmt: sa.sql.Select,
-    metadata_filter: dict | None = None,
-) -> sa.sql.Select:
-    """Apply filters using asset_info_meta projection table."""
-    if not metadata_filter:
-        return stmt
-
-    def _exists_for_pred(key: str, *preds) -> sa.sql.ClauseElement:
-        return sa.exists().where(
-            AssetInfoMeta.asset_info_id == AssetInfo.id,
-            AssetInfoMeta.key == key,
-            *preds,
-        )
-
-    def _exists_clause_for_value(key: str, value) -> sa.sql.ClauseElement:
-        if value is None:
-            no_row_for_key = sa.not_(
-                sa.exists().where(
-                    AssetInfoMeta.asset_info_id == AssetInfo.id,
-                    AssetInfoMeta.key == key,
-                )
-            )
-            null_row = _exists_for_pred(
-                key,
-                AssetInfoMeta.val_json.is_(None),
-                AssetInfoMeta.val_str.is_(None),
-                AssetInfoMeta.val_num.is_(None),
-                AssetInfoMeta.val_bool.is_(None),
-            )
-            return sa.or_(no_row_for_key, null_row)
-
-        if isinstance(value, bool):
-            return _exists_for_pred(key, AssetInfoMeta.val_bool == bool(value))
-        if isinstance(value, (int, float)):
-            from decimal import Decimal
-            num = value if isinstance(value, Decimal) else Decimal(str(value))
-            return _exists_for_pred(key, AssetInfoMeta.val_num == num)
-        if isinstance(value, str):
-            return _exists_for_pred(key, AssetInfoMeta.val_str == value)
-        return _exists_for_pred(key, AssetInfoMeta.val_json == value)
-
-    for k, v in metadata_filter.items():
-        if isinstance(v, list):
-            ors = [_exists_clause_for_value(k, elem) for elem in v]
-            if ors:
-                stmt = stmt.where(sa.or_(*ors))
-        else:
-            stmt = stmt.where(_exists_clause_for_value(k, v))
-    return stmt
-
-
-def asset_exists_by_hash(
-    session: Session,
-    *,
-    asset_hash: str,
-) -> bool:
-    """
-    Check if an asset with a given hash exists in database.
-    """
-    row = (
-        session.execute(
-            select(sa.literal(True)).select_from(Asset).where(Asset.hash == asset_hash).limit(1)
-        )
-    ).first()
-    return row is not None
-
-
-def asset_info_exists_for_asset_id(
-    session: Session,
-    *,
-    asset_id: str,
-) -> bool:
-    q = (
-        select(sa.literal(True))
-        .select_from(AssetInfo)
-        .where(AssetInfo.asset_id == asset_id)
-        .limit(1)
-    )
-    return (session.execute(q)).first() is not None
-
-
-def get_asset_by_hash(
-    session: Session,
-    *,
-    asset_hash: str,
-) -> Asset | None:
-    return (
-        session.execute(select(Asset).where(Asset.hash == asset_hash).limit(1))
-    ).scalars().first()
-
-
-def get_asset_info_by_id(
-    session: Session,
-    *,
-    asset_info_id: str,
-) -> AssetInfo | None:
-    return session.get(AssetInfo, asset_info_id)
-
-
-def list_asset_infos_page(
-    session: Session,
-    owner_id: str = "",
-    include_tags: Sequence[str] | None = None,
-    exclude_tags: Sequence[str] | None = None,
-    name_contains: str | None = None,
-    metadata_filter: dict | None = None,
-    limit: int = 20,
-    offset: int = 0,
-    sort: str = "created_at",
-    order: str = "desc",
-) -> tuple[list[AssetInfo], dict[str, list[str]], int]:
-    base = (
-        select(AssetInfo)
-        .join(Asset, Asset.id == AssetInfo.asset_id)
-        .options(contains_eager(AssetInfo.asset), noload(AssetInfo.tags))
-        .where(visible_owner_clause(owner_id))
-    )
-
-    if name_contains:
-        escaped, esc = escape_like_prefix(name_contains)
-        base = base.where(AssetInfo.name.ilike(f"%{escaped}%", escape=esc))
-
-    base = apply_tag_filters(base, include_tags, exclude_tags)
-    base = apply_metadata_filter(base, metadata_filter)
-
-    sort = (sort or "created_at").lower()
-    order = (order or "desc").lower()
-    sort_map = {
-        "name": AssetInfo.name,
-        "created_at": AssetInfo.created_at,
-        "updated_at": AssetInfo.updated_at,
-        "last_access_time": AssetInfo.last_access_time,
-        "size": Asset.size_bytes,
-    }
-    sort_col = sort_map.get(sort, AssetInfo.created_at)
-    sort_exp = sort_col.desc() if order == "desc" else sort_col.asc()
-
-    base = base.order_by(sort_exp).limit(limit).offset(offset)
-
-    count_stmt = (
-        select(sa.func.count())
-        .select_from(AssetInfo)
-        .join(Asset, Asset.id == AssetInfo.asset_id)
-        .where(visible_owner_clause(owner_id))
-    )
-    if name_contains:
-        escaped, esc = escape_like_prefix(name_contains)
-        count_stmt = count_stmt.where(AssetInfo.name.ilike(f"%{escaped}%", escape=esc))
-    count_stmt = apply_tag_filters(count_stmt, include_tags, exclude_tags)
-    count_stmt = apply_metadata_filter(count_stmt, metadata_filter)
-
-    total = int((session.execute(count_stmt)).scalar_one() or 0)
-
-    infos = (session.execute(base)).unique().scalars().all()
-
-    id_list: list[str] = [i.id for i in infos]
-    tag_map: dict[str, list[str]] = defaultdict(list)
-    if id_list:
-        rows = session.execute(
-            select(AssetInfoTag.asset_info_id, Tag.name)
-            .join(Tag, Tag.name == AssetInfoTag.tag_name)
-            .where(AssetInfoTag.asset_info_id.in_(id_list))
-            .order_by(AssetInfoTag.added_at)
-        )
-        for aid, tag_name in rows.all():
-            tag_map[aid].append(tag_name)
-
-    return infos, tag_map, total
-
-
-def fetch_asset_info_asset_and_tags(
-    session: Session,
-    asset_info_id: str,
-    owner_id: str = "",
-) -> tuple[AssetInfo, Asset, list[str]] | None:
-    stmt = (
-        select(AssetInfo, Asset, Tag.name)
-        .join(Asset, Asset.id == AssetInfo.asset_id)
-        .join(AssetInfoTag, AssetInfoTag.asset_info_id == AssetInfo.id, isouter=True)
-        .join(Tag, Tag.name == AssetInfoTag.tag_name, isouter=True)
-        .where(
-            AssetInfo.id == asset_info_id,
-            visible_owner_clause(owner_id),
-        )
-        .options(noload(AssetInfo.tags))
-        .order_by(Tag.name.asc())
-    )
-
-    rows = (session.execute(stmt)).all()
-    if not rows:
-        return None
-
-    first_info, first_asset, _ = rows[0]
-    tags: list[str] = []
-    seen: set[str] = set()
-    for _info, _asset, tag_name in rows:
-        if tag_name and tag_name not in seen:
-            seen.add(tag_name)
-            tags.append(tag_name)
-    return first_info, first_asset, tags
-
-
-def fetch_asset_info_and_asset(
-    session: Session,
-    *,
-    asset_info_id: str,
-    owner_id: str = "",
-) -> tuple[AssetInfo, Asset] | None:
-    stmt = (
-        select(AssetInfo, Asset)
-        .join(Asset, Asset.id == AssetInfo.asset_id)
-        .where(
-            AssetInfo.id == asset_info_id,
-            visible_owner_clause(owner_id),
-        )
-        .limit(1)
-        .options(noload(AssetInfo.tags))
-    )
-    row = session.execute(stmt)
-    pair = row.first()
-    if not pair:
-        return None
-    return pair[0], pair[1]
-
-def list_cache_states_by_asset_id(
-    session: Session, *, asset_id: str
-) -> Sequence[AssetCacheState]:
-    return (
-        session.execute(
-            select(AssetCacheState)
-            .where(AssetCacheState.asset_id == asset_id)
-            .order_by(AssetCacheState.id.asc())
-        )
-    ).scalars().all()
-
-
-def touch_asset_info_by_id(
-    session: Session,
-    *,
-    asset_info_id: str,
-    ts: datetime | None = None,
-    only_if_newer: bool = True,
-) -> None:
-    ts = ts or utcnow()
-    stmt = sa.update(AssetInfo).where(AssetInfo.id == asset_info_id)
-    if only_if_newer:
-        stmt = stmt.where(
-            sa.or_(AssetInfo.last_access_time.is_(None), AssetInfo.last_access_time < ts)
-        )
-    session.execute(stmt.values(last_access_time=ts))
-
-
-def create_asset_info_for_existing_asset(
-    session: Session,
-    *,
-    asset_hash: str,
-    name: str,
-    user_metadata: dict | None = None,
-    tags: Sequence[str] | None = None,
-    tag_origin: str = "manual",
-    owner_id: str = "",
-) -> AssetInfo:
-    """Create or return an existing AssetInfo for an Asset identified by asset_hash."""
-    now = utcnow()
-    asset = get_asset_by_hash(session, asset_hash=asset_hash)
-    if not asset:
-        raise ValueError(f"Unknown asset hash {asset_hash}")
-
-    info = AssetInfo(
-        owner_id=owner_id,
-        name=name,
-        asset_id=asset.id,
-        preview_id=None,
-        created_at=now,
-        updated_at=now,
-        last_access_time=now,
-    )
-    try:
-        with session.begin_nested():
-            session.add(info)
-            session.flush()
-    except IntegrityError:
-        existing = (
-            session.execute(
-                select(AssetInfo)
-                .options(noload(AssetInfo.tags))
-                .where(
-                    AssetInfo.asset_id == asset.id,
-                    AssetInfo.name == name,
-                    AssetInfo.owner_id == owner_id,
-                )
-                .limit(1)
-            )
-        ).unique().scalars().first()
-        if not existing:
-            raise RuntimeError("AssetInfo upsert failed to find existing row after conflict.")
-        return existing
-
-    # metadata["filename"] hack
-    new_meta = dict(user_metadata or {})
-    computed_filename = None
-    try:
-        p = pick_best_live_path(list_cache_states_by_asset_id(session, asset_id=asset.id))
-        if p:
-            computed_filename = compute_relative_filename(p)
-    except Exception:
-        computed_filename = None
-    if computed_filename:
-        new_meta["filename"] = computed_filename
-    if new_meta:
-        replace_asset_info_metadata_projection(
-            session,
-            asset_info_id=info.id,
-            user_metadata=new_meta,
-        )
-
-    if tags is not None:
-        set_asset_info_tags(
-            session,
-            asset_info_id=info.id,
-            tags=tags,
-            origin=tag_origin,
-        )
-    return info
-
-
-def set_asset_info_tags(
-    session: Session,
-    *,
-    asset_info_id: str,
-    tags: Sequence[str],
-    origin: str = "manual",
-) -> dict:
-    desired = normalize_tags(tags)
-
-    current = set(
-        tag_name for (tag_name,) in (
-            session.execute(select(AssetInfoTag.tag_name).where(AssetInfoTag.asset_info_id == asset_info_id))
-        ).all()
-    )
-
-    to_add = [t for t in desired if t not in current]
-    to_remove = [t for t in current if t not in desired]
-
-    if to_add:
-        ensure_tags_exist(session, to_add, tag_type="user")
-        session.add_all([
-            AssetInfoTag(asset_info_id=asset_info_id, tag_name=t, origin=origin, added_at=utcnow())
-            for t in to_add
-        ])
-        session.flush()
-
-    if to_remove:
-        session.execute(
-            delete(AssetInfoTag)
-            .where(AssetInfoTag.asset_info_id == asset_info_id, AssetInfoTag.tag_name.in_(to_remove))
-        )
-        session.flush()
-
-    return {"added": to_add, "removed": to_remove, "total": desired}
-
-
-def replace_asset_info_metadata_projection(
-    session: Session,
-    *,
-    asset_info_id: str,
-    user_metadata: dict | None = None,
-) -> None:
-    info = session.get(AssetInfo, asset_info_id)
-    if not info:
-        raise ValueError(f"AssetInfo {asset_info_id} not found")
-
-    info.user_metadata = user_metadata or {}
-    info.updated_at = utcnow()
-    session.flush()
-
-    session.execute(delete(AssetInfoMeta).where(AssetInfoMeta.asset_info_id == asset_info_id))
-    session.flush()
-
-    if not user_metadata:
-        return
-
-    rows: list[AssetInfoMeta] = []
-    for k, v in user_metadata.items():
-        for r in project_kv(k, v):
-            rows.append(
-                AssetInfoMeta(
-                    asset_info_id=asset_info_id,
-                    key=r["key"],
-                    ordinal=int(r["ordinal"]),
-                    val_str=r.get("val_str"),
-                    val_num=r.get("val_num"),
-                    val_bool=r.get("val_bool"),
-                    val_json=r.get("val_json"),
-                )
-            )
-    if rows:
-        session.add_all(rows)
-        session.flush()
-
-
-def ingest_fs_asset(
-    session: Session,
-    *,
-    asset_hash: str,
-    abs_path: str,
-    size_bytes: int,
-    mtime_ns: int,
-    mime_type: str | None = None,
-    info_name: str | None = None,
-    owner_id: str = "",
-    preview_id: str | None = None,
-    user_metadata: dict | None = None,
-    tags: Sequence[str] = (),
-    tag_origin: str = "manual",
-    require_existing_tags: bool = False,
-) -> dict:
-    """
-    Idempotently upsert:
-      - Asset by content hash (create if missing)
-      - AssetCacheState(file_path) pointing to asset_id
-      - Optionally AssetInfo + tag links and metadata projection
-    Returns flags and ids.
-    """
-    locator = os.path.abspath(abs_path)
-    now = utcnow()
-
-    if preview_id:
-        if not session.get(Asset, preview_id):
-            preview_id = None
-
-    out: dict[str, Any] = {
-        "asset_created": False,
-        "asset_updated": False,
-        "state_created": False,
-        "state_updated": False,
-        "asset_info_id": None,
-    }
-
-    # 1) Asset by hash
-    asset = (
-        session.execute(select(Asset).where(Asset.hash == asset_hash).limit(1))
-    ).scalars().first()
-    if not asset:
-        vals = {
-            "hash": asset_hash,
-            "size_bytes": int(size_bytes),
-            "mime_type": mime_type,
-            "created_at": now,
-        }
-        res = session.execute(
-            sqlite.insert(Asset)
-            .values(**vals)
-            .on_conflict_do_nothing(index_elements=[Asset.hash])
-        )
-        if int(res.rowcount or 0) > 0:
-            out["asset_created"] = True
-        asset = (
-            session.execute(
-                select(Asset).where(Asset.hash == asset_hash).limit(1)
-            )
-        ).scalars().first()
-        if not asset:
-            raise RuntimeError("Asset row not found after upsert.")
-    else:
-        changed = False
-        if asset.size_bytes != int(size_bytes) and int(size_bytes) > 0:
-            asset.size_bytes = int(size_bytes)
-            changed = True
-        if mime_type and asset.mime_type != mime_type:
-            asset.mime_type = mime_type
-            changed = True
-        if changed:
-            out["asset_updated"] = True
-
-    # 2) AssetCacheState upsert by file_path (unique)
-    vals = {
-        "asset_id": asset.id,
-        "file_path": locator,
-        "mtime_ns": int(mtime_ns),
-    }
-    ins = (
-        sqlite.insert(AssetCacheState)
-        .values(**vals)
-        .on_conflict_do_nothing(index_elements=[AssetCacheState.file_path])
-    )
-
-    res = session.execute(ins)
-    if int(res.rowcount or 0) > 0:
-        out["state_created"] = True
-    else:
-        upd = (
-            sa.update(AssetCacheState)
-            .where(AssetCacheState.file_path == locator)
-            .where(
-                sa.or_(
-                    AssetCacheState.asset_id != asset.id,
-                    AssetCacheState.mtime_ns.is_(None),
-                    AssetCacheState.mtime_ns != int(mtime_ns),
-                )
-            )
-            .values(asset_id=asset.id, mtime_ns=int(mtime_ns))
-        )
-        res2 = session.execute(upd)
-        if int(res2.rowcount or 0) > 0:
-            out["state_updated"] = True
-
-    # 3) Optional AssetInfo + tags + metadata
-    if info_name:
-        try:
-            with session.begin_nested():
-                info = AssetInfo(
-                    owner_id=owner_id,
-                    name=info_name,
-                    asset_id=asset.id,
-                    preview_id=preview_id,
-                    created_at=now,
-                    updated_at=now,
-                    last_access_time=now,
-                )
-                session.add(info)
-                session.flush()
-                out["asset_info_id"] = info.id
-        except IntegrityError:
-            pass
-
-        existing_info = (
-            session.execute(
-                select(AssetInfo)
-                .where(
-                    AssetInfo.asset_id == asset.id,
-                    AssetInfo.name == info_name,
-                    (AssetInfo.owner_id == owner_id),
-                )
-                .limit(1)
-            )
-        ).unique().scalar_one_or_none()
-        if not existing_info:
-            raise RuntimeError("Failed to update or insert AssetInfo.")
-
-        if preview_id and existing_info.preview_id != preview_id:
-            existing_info.preview_id = preview_id
-
-        existing_info.updated_at = now
-        if existing_info.last_access_time < now:
-            existing_info.last_access_time = now
-        session.flush()
-        out["asset_info_id"] = existing_info.id
-
-        norm = [t.strip().lower() for t in (tags or []) if (t or "").strip()]
-        if norm and out["asset_info_id"] is not None:
-            if not require_existing_tags:
-                ensure_tags_exist(session, norm, tag_type="user")
-
-            existing_tag_names = set(
-                name for (name,) in (session.execute(select(Tag.name).where(Tag.name.in_(norm)))).all()
-            )
-            missing = [t for t in norm if t not in existing_tag_names]
-            if missing and require_existing_tags:
-                raise ValueError(f"Unknown tags: {missing}")
-
-            existing_links = set(
-                tag_name
-                for (tag_name,) in (
-                    session.execute(
-                        select(AssetInfoTag.tag_name).where(AssetInfoTag.asset_info_id == out["asset_info_id"])
-                    )
-                ).all()
-            )
-            to_add = [t for t in norm if t in existing_tag_names and t not in existing_links]
-            if to_add:
-                session.add_all(
-                    [
-                        AssetInfoTag(
-                            asset_info_id=out["asset_info_id"],
-                            tag_name=t,
-                            origin=tag_origin,
-                            added_at=now,
-                        )
-                        for t in to_add
-                    ]
-                )
-                session.flush()
-
-        # metadata["filename"] hack
-        if out["asset_info_id"] is not None:
-            primary_path = pick_best_live_path(list_cache_states_by_asset_id(session, asset_id=asset.id))
-            computed_filename = compute_relative_filename(primary_path) if primary_path else None
-
-            current_meta = existing_info.user_metadata or {}
-            new_meta = dict(current_meta)
-            if user_metadata is not None:
-                for k, v in user_metadata.items():
-                    new_meta[k] = v
-            if computed_filename:
-                new_meta["filename"] = computed_filename
-
-            if new_meta != current_meta:
-                replace_asset_info_metadata_projection(
-                    session,
-                    asset_info_id=out["asset_info_id"],
-                    user_metadata=new_meta,
-                )
-
-    try:
-        remove_missing_tag_for_asset_id(session, asset_id=asset.id)
-    except Exception:
-        logging.exception("Failed to clear 'missing' tag for asset %s", asset.id)
-    return out
-
-
-def update_asset_info_full(
-    session: Session,
-    *,
-    asset_info_id: str,
-    name: str | None = None,
-    tags: Sequence[str] | None = None,
-    user_metadata: dict | None = None,
-    tag_origin: str = "manual",
-    asset_info_row: Any = None,
-) -> AssetInfo:
-    if not asset_info_row:
-        info = session.get(AssetInfo, asset_info_id)
-        if not info:
-            raise ValueError(f"AssetInfo {asset_info_id} not found")
-    else:
-        info = asset_info_row
-
-    touched = False
-    if name is not None and name != info.name:
-        info.name = name
-        touched = True
-
-    computed_filename = None
-    try:
-        p = pick_best_live_path(list_cache_states_by_asset_id(session, asset_id=info.asset_id))
-        if p:
-            computed_filename = compute_relative_filename(p)
-    except Exception:
-        computed_filename = None
-
-    if user_metadata is not None:
-        new_meta = dict(user_metadata)
-        if computed_filename:
-            new_meta["filename"] = computed_filename
-        replace_asset_info_metadata_projection(
-            session, asset_info_id=asset_info_id, user_metadata=new_meta
-        )
-        touched = True
-    else:
-        if computed_filename:
-            current_meta = info.user_metadata or {}
-            if current_meta.get("filename") != computed_filename:
-                new_meta = dict(current_meta)
-                new_meta["filename"] = computed_filename
-                replace_asset_info_metadata_projection(
-                    session, asset_info_id=asset_info_id, user_metadata=new_meta
-                )
-                touched = True
-
-    if tags is not None:
-        set_asset_info_tags(
-            session,
-            asset_info_id=asset_info_id,
-            tags=tags,
-            origin=tag_origin,
-        )
-        touched = True
-
-    if touched and user_metadata is None:
-        info.updated_at = utcnow()
-        session.flush()
-
-    return info
-
-
-def delete_asset_info_by_id(
-    session: Session,
-    *,
-    asset_info_id: str,
-    owner_id: str,
-) -> bool:
-    stmt = sa.delete(AssetInfo).where(
-        AssetInfo.id == asset_info_id,
-        visible_owner_clause(owner_id),
-    )
-    return int((session.execute(stmt)).rowcount or 0) > 0
-
-
-def list_tags_with_usage(
-    session: Session,
-    prefix: str | None = None,
-    limit: int = 100,
-    offset: int = 0,
-    include_zero: bool = True,
-    order: str = "count_desc",
-    owner_id: str = "",
-) -> tuple[list[tuple[str, str, int]], int]:
-    counts_sq = (
-        select(
-            AssetInfoTag.tag_name.label("tag_name"),
-            func.count(AssetInfoTag.asset_info_id).label("cnt"),
-        )
-        .select_from(AssetInfoTag)
-        .join(AssetInfo, AssetInfo.id == AssetInfoTag.asset_info_id)
-        .where(visible_owner_clause(owner_id))
-        .group_by(AssetInfoTag.tag_name)
-        .subquery()
-    )
-
-    q = (
-        select(
-            Tag.name,
-            Tag.tag_type,
-            func.coalesce(counts_sq.c.cnt, 0).label("count"),
-        )
-        .select_from(Tag)
-        .join(counts_sq, counts_sq.c.tag_name == Tag.name, isouter=True)
-    )
-
-    if prefix:
-        escaped, esc = escape_like_prefix(prefix.strip().lower())
-        q = q.where(Tag.name.like(escaped + "%", escape=esc))
-
-    if not include_zero:
-        q = q.where(func.coalesce(counts_sq.c.cnt, 0) > 0)
-
-    if order == "name_asc":
-        q = q.order_by(Tag.name.asc())
-    else:
-        q = q.order_by(func.coalesce(counts_sq.c.cnt, 0).desc(), Tag.name.asc())
-
-    total_q = select(func.count()).select_from(Tag)
-    if prefix:
-        escaped, esc = escape_like_prefix(prefix.strip().lower())
-        total_q = total_q.where(Tag.name.like(escaped + "%", escape=esc))
-    if not include_zero:
-        total_q = total_q.where(
-            Tag.name.in_(select(AssetInfoTag.tag_name).group_by(AssetInfoTag.tag_name))
-        )
-
-    rows = (session.execute(q.limit(limit).offset(offset))).all()
-    total = (session.execute(total_q)).scalar_one()
-
-    rows_norm = [(name, ttype, int(count or 0)) for (name, ttype, count) in rows]
-    return rows_norm, int(total or 0)
-
-
-def ensure_tags_exist(session: Session, names: Iterable[str], tag_type: str = "user") -> None:
-    wanted = normalize_tags(list(names))
-    if not wanted:
-        return
-    rows = [{"name": n, "tag_type": tag_type} for n in list(dict.fromkeys(wanted))]
-    ins = (
-        sqlite.insert(Tag)
-        .values(rows)
-        .on_conflict_do_nothing(index_elements=[Tag.name])
-    )
-    session.execute(ins)
-
-
-def get_asset_tags(session: Session, *, asset_info_id: str) -> list[str]:
-    return [
-        tag_name for (tag_name,) in (
-            session.execute(
-                select(AssetInfoTag.tag_name).where(AssetInfoTag.asset_info_id == asset_info_id)
-            )
-        ).all()
-    ]
-
-
-def add_tags_to_asset_info(
-    session: Session,
-    *,
-    asset_info_id: str,
-    tags: Sequence[str],
-    origin: str = "manual",
-    create_if_missing: bool = True,
-    asset_info_row: Any = None,
-) -> dict:
-    if not asset_info_row:
-        info = session.get(AssetInfo, asset_info_id)
-        if not info:
-            raise ValueError(f"AssetInfo {asset_info_id} not found")
-
-    norm = normalize_tags(tags)
-    if not norm:
-        total = get_asset_tags(session, asset_info_id=asset_info_id)
-        return {"added": [], "already_present": [], "total_tags": total}
-
-    if create_if_missing:
-        ensure_tags_exist(session, norm, tag_type="user")
-
-    current = {
-        tag_name
-        for (tag_name,) in (
-            session.execute(
-                sa.select(AssetInfoTag.tag_name).where(AssetInfoTag.asset_info_id == asset_info_id)
-            )
-        ).all()
-    }
-
-    want = set(norm)
-    to_add = sorted(want - current)
-
-    if to_add:
-        with session.begin_nested() as nested:
-            try:
-                session.add_all(
-                    [
-                        AssetInfoTag(
-                            asset_info_id=asset_info_id,
-                            tag_name=t,
-                            origin=origin,
-                            added_at=utcnow(),
-                        )
-                        for t in to_add
-                    ]
-                )
-                session.flush()
-            except IntegrityError:
-                nested.rollback()
-
-    after = set(get_asset_tags(session, asset_info_id=asset_info_id))
-    return {
-        "added": sorted(((after - current) & want)),
-        "already_present": sorted(want & current),
-        "total_tags": sorted(after),
-    }
-
-
-def remove_tags_from_asset_info(
-    session: Session,
-    *,
-    asset_info_id: str,
-    tags: Sequence[str],
-) -> dict:
-    info = session.get(AssetInfo, asset_info_id)
-    if not info:
-        raise ValueError(f"AssetInfo {asset_info_id} not found")
-
-    norm = normalize_tags(tags)
-    if not norm:
-        total = get_asset_tags(session, asset_info_id=asset_info_id)
-        return {"removed": [], "not_present": [], "total_tags": total}
-
-    existing = {
-        tag_name
-        for (tag_name,) in (
-            session.execute(
-                sa.select(AssetInfoTag.tag_name).where(AssetInfoTag.asset_info_id == asset_info_id)
-            )
-        ).all()
-    }
-
-    to_remove = sorted(set(t for t in norm if t in existing))
-    not_present = sorted(set(t for t in norm if t not in existing))
-
-    if to_remove:
-        session.execute(
-            delete(AssetInfoTag)
-            .where(
-                AssetInfoTag.asset_info_id == asset_info_id,
-                AssetInfoTag.tag_name.in_(to_remove),
-            )
-        )
-        session.flush()
-
-    total = get_asset_tags(session, asset_info_id=asset_info_id)
-    return {"removed": to_remove, "not_present": not_present, "total_tags": total}
-
-
-def remove_missing_tag_for_asset_id(
-    session: Session,
-    *,
-    asset_id: str,
-) -> None:
-    session.execute(
-        sa.delete(AssetInfoTag).where(
-            AssetInfoTag.asset_info_id.in_(sa.select(AssetInfo.id).where(AssetInfo.asset_id == asset_id)),
-            AssetInfoTag.tag_name == "missing",
-        )
-    )
-
-
-def set_asset_info_preview(
-    session: Session,
-    *,
-    asset_info_id: str,
-    preview_asset_id: str | None = None,
-) -> None:
-    """Set or clear preview_id and bump updated_at. Raises on unknown IDs."""
-    info = session.get(AssetInfo, asset_info_id)
-    if not info:
-        raise ValueError(f"AssetInfo {asset_info_id} not found")
-
-    if preview_asset_id is None:
-        info.preview_id = None
-    else:
-        # validate preview asset exists
-        if not session.get(Asset, preview_asset_id):
-            raise ValueError(f"Preview Asset {preview_asset_id} not found")
-        info.preview_id = preview_asset_id
-
-    info.updated_at = utcnow()
-    session.flush()
--- a/app/assets/database/tags.py
+++ b/app/assets/database/tags.py
@@ -1,62 +0,0 @@
-from typing import Iterable
-
-import sqlalchemy
-from sqlalchemy.orm import Session
-from sqlalchemy.dialects import sqlite
-
-from app.assets.helpers import normalize_tags, utcnow
-from app.assets.database.models import Tag, AssetInfoTag, AssetInfo
-
-
-def ensure_tags_exist(session: Session, names: Iterable[str], tag_type: str = "user") -> None:
-    wanted = normalize_tags(list(names))
-    if not wanted:
-        return
-    rows = [{"name": n, "tag_type": tag_type} for n in list(dict.fromkeys(wanted))]
-    ins = (
-            sqlite.insert(Tag)
-            .values(rows)
-            .on_conflict_do_nothing(index_elements=[Tag.name])
-        )
-    return session.execute(ins)
-
-def add_missing_tag_for_asset_id(
-    session: Session,
-    *,
-    asset_id: str,
-    origin: str = "automatic",
-) -> None:
-    select_rows = (
-        sqlalchemy.select(
-            AssetInfo.id.label("asset_info_id"),
-            sqlalchemy.literal("missing").label("tag_name"),
-            sqlalchemy.literal(origin).label("origin"),
-            sqlalchemy.literal(utcnow()).label("added_at"),
-        )
-        .where(AssetInfo.asset_id == asset_id)
-        .where(
-            sqlalchemy.not_(
-                sqlalchemy.exists().where((AssetInfoTag.asset_info_id == AssetInfo.id) & (AssetInfoTag.tag_name == "missing"))
-            )
-        )
-    )
-    session.execute(
-        sqlite.insert(AssetInfoTag)
-        .from_select(
-            ["asset_info_id", "tag_name", "origin", "added_at"],
-            select_rows,
-        )
-        .on_conflict_do_nothing(index_elements=[AssetInfoTag.asset_info_id, AssetInfoTag.tag_name])
-    )
-
-def remove_missing_tag_for_asset_id(
-    session: Session,
-    *,
-    asset_id: str,
-) -> None:
-    session.execute(
-        sqlalchemy.delete(AssetInfoTag).where(
-            AssetInfoTag.asset_info_id.in_(sqlalchemy.select(AssetInfo.id).where(AssetInfo.asset_id == asset_id)),
-            AssetInfoTag.tag_name == "missing",
-        )
-    )
--- a/app/assets/hashing.py
+++ b/app/assets/hashing.py
@@ -1,75 +0,0 @@
-from blake3 import blake3
-from typing import IO
-import os
-import asyncio
-
-
-DEFAULT_CHUNK = 8 * 1024 *1024 # 8MB
-
-# NOTE: this allows hashing different representations of a file-like object
-def blake3_hash(
-    fp: str | IO[bytes],
-    chunk_size: int = DEFAULT_CHUNK,
-) -> str:
-    """
-    Returns a BLAKE3 hex digest for ``fp``, which may be:
-      - a filename (str/bytes) or PathLike
-      - an open binary file object
-    If ``fp`` is a file object, it must be opened in **binary** mode and support
-    ``read``, ``seek``, and ``tell``. The function will seek to the start before
-    reading and will attempt to restore the original position afterward.
-    """
-    # duck typing to check if input is a file-like object
-    if hasattr(fp, "read"):
-        return _hash_file_obj(fp, chunk_size)
-
-    with open(os.fspath(fp), "rb") as f:
-        return _hash_file_obj(f, chunk_size)
-
-
-async def blake3_hash_async(
-    fp: str | IO[bytes],
-    chunk_size: int = DEFAULT_CHUNK,
-) -> str:
-    """Async wrapper for ``blake3_hash_sync``.
-    Uses a worker thread so the event loop remains responsive.
-    """
-    # If it is a path, open inside the worker thread to keep I/O off the loop.
-    if hasattr(fp, "read"):
-        return await asyncio.to_thread(blake3_hash, fp, chunk_size)
-
-    def _worker() -> str:
-        with open(os.fspath(fp), "rb") as f:
-            return _hash_file_obj(f, chunk_size)
-
-    return await asyncio.to_thread(_worker)
-
-
-def _hash_file_obj(file_obj: IO, chunk_size: int = DEFAULT_CHUNK) -> str:
-    """
-    Hash an already-open binary file object by streaming in chunks.
-    - Seeks to the beginning before reading (if supported).
-    - Restores the original position afterward (if tell/seek are supported).
-    """
-    if chunk_size <= 0:
-        chunk_size = DEFAULT_CHUNK
-
-    # in case file object is already open and not at the beginning, track so can be restored after hashing
-    orig_pos = file_obj.tell()
-
-    try:
-        # seek to the beginning before reading
-        if orig_pos != 0:
-            file_obj.seek(0)
-
-        h = blake3()
-        while True:
-            chunk = file_obj.read(chunk_size)
-            if not chunk:
-                break
-            h.update(chunk)
-        return h.hexdigest()
-    finally:
-        # restore original position in file object, if needed
-        if orig_pos != 0:
-            file_obj.seek(orig_pos)
--- a/app/assets/helpers.py
+++ b/app/assets/helpers.py
@@ -1,312 +0,0 @@
-import contextlib
-import os
-from decimal import Decimal
-from aiohttp import web
-from datetime import datetime, timezone
-from pathlib import Path
-from typing import Literal, Any
-
-import folder_paths
-
-
-RootType = Literal["models", "input", "output"]
-ALLOWED_ROOTS: tuple[RootType, ...] = ("models", "input", "output")
-
-def get_query_dict(request: web.Request) -> dict[str, Any]:
-    """
-    Gets a dictionary of query parameters from the request.
-
-    'request.query' is a MultiMapping[str], needs to be converted to a dictionary to be validated by Pydantic.
-    """
-    query_dict = {
-        key: request.query.getall(key) if len(request.query.getall(key)) > 1 else request.query.get(key)
-        for key in request.query.keys()
-    }
-    return query_dict
-
-def list_tree(base_dir: str) -> list[str]:
-    out: list[str] = []
-    base_abs = os.path.abspath(base_dir)
-    if not os.path.isdir(base_abs):
-        return out
-    for dirpath, _subdirs, filenames in os.walk(base_abs, topdown=True, followlinks=False):
-        for name in filenames:
-            out.append(os.path.abspath(os.path.join(dirpath, name)))
-    return out
-
-def prefixes_for_root(root: RootType) -> list[str]:
-    if root == "models":
-        bases: list[str] = []
-        for _bucket, paths in get_comfy_models_folders():
-            bases.extend(paths)
-        return [os.path.abspath(p) for p in bases]
-    if root == "input":
-        return [os.path.abspath(folder_paths.get_input_directory())]
-    if root == "output":
-        return [os.path.abspath(folder_paths.get_output_directory())]
-    return []
-
-def escape_like_prefix(s: str, escape: str = "!") -> tuple[str, str]:
-    """Escapes %, _ and the escape char itself in a LIKE prefix.
-    Returns (escaped_prefix, escape_char). Caller should append '%' and pass escape=escape_char to .like().
-    """
-    s = s.replace(escape, escape + escape)  # escape the escape char first
-    s = s.replace("%", escape + "%").replace("_", escape + "_")  # escape LIKE wildcards
-    return s, escape
-
-def fast_asset_file_check(
-    *,
-    mtime_db: int | None,
-    size_db: int | None,
-    stat_result: os.stat_result,
-) -> bool:
-    if mtime_db is None:
-        return False
-    actual_mtime_ns = getattr(stat_result, "st_mtime_ns", int(stat_result.st_mtime * 1_000_000_000))
-    if int(mtime_db) != int(actual_mtime_ns):
-        return False
-    sz = int(size_db or 0)
-    if sz > 0:
-        return int(stat_result.st_size) == sz
-    return True
-
-def utcnow() -> datetime:
-    """Naive UTC timestamp (no tzinfo). We always treat DB datetimes as UTC."""
-    return datetime.now(timezone.utc).replace(tzinfo=None)
-
-def get_comfy_models_folders() -> list[tuple[str, list[str]]]:
-    """Build a list of (folder_name, base_paths[]) categories that are configured for model locations.
-
-    We trust `folder_paths.folder_names_and_paths` and include a category if
-    *any* of its base paths lies under the Comfy `models_dir`.
-    """
-    targets: list[tuple[str, list[str]]] = []
-    models_root = os.path.abspath(folder_paths.models_dir)
-    for name, values in folder_paths.folder_names_and_paths.items():
-        paths, _exts = values[0], values[1]  # NOTE: this prevents nodepacks that hackily edit folder_... from breaking ComfyUI
-        if any(os.path.abspath(p).startswith(models_root + os.sep) for p in paths):
-            targets.append((name, paths))
-    return targets
-
-def resolve_destination_from_tags(tags: list[str]) -> tuple[str, list[str]]:
-    """Validates and maps tags -> (base_dir, subdirs_for_fs)"""
-    root = tags[0]
-    if root == "models":
-        if len(tags) < 2:
-            raise ValueError("at least two tags required for model asset")
-        try:
-            bases = folder_paths.folder_names_and_paths[tags[1]][0]
-        except KeyError:
-            raise ValueError(f"unknown model category '{tags[1]}'")
-        if not bases:
-            raise ValueError(f"no base path configured for category '{tags[1]}'")
-        base_dir = os.path.abspath(bases[0])
-        raw_subdirs = tags[2:]
-    else:
-        base_dir = os.path.abspath(
-            folder_paths.get_input_directory() if root == "input" else folder_paths.get_output_directory()
-        )
-        raw_subdirs = tags[1:]
-    for i in raw_subdirs:
-        if i in (".", ".."):
-            raise ValueError("invalid path component in tags")
-
-    return base_dir, raw_subdirs if raw_subdirs else []
-
-def ensure_within_base(candidate: str, base: str) -> None:
-    cand_abs = os.path.abspath(candidate)
-    base_abs = os.path.abspath(base)
-    try:
-        if os.path.commonpath([cand_abs, base_abs]) != base_abs:
-            raise ValueError("destination escapes base directory")
-    except Exception:
-        raise ValueError("invalid destination path")
-
-def compute_relative_filename(file_path: str) -> str | None:
-    """
-    Return the model's path relative to the last well-known folder (the model category),
-    using forward slashes, eg:
-      /.../models/checkpoints/flux/123/flux.safetensors -> "flux/123/flux.safetensors"
-      /.../models/text_encoders/clip_g.safetensors -> "clip_g.safetensors"
-
-    For non-model paths, returns None.
-    NOTE: this is a temporary helper, used only for initializing metadata["filename"] field.
-    """
-    try:
-        root_category, rel_path = get_relative_to_root_category_path_of_asset(file_path)
-    except ValueError:
-        return None
-
-    p = Path(rel_path)
-    parts = [seg for seg in p.parts if seg not in (".", "..", p.anchor)]
-    if not parts:
-        return None
-
-    if root_category == "models":
-        # parts[0] is the category ("checkpoints", "vae", etc) – drop it
-        inside = parts[1:] if len(parts) > 1 else [parts[0]]
-        return "/".join(inside)
-    return "/".join(parts)  # input/output: keep all parts
-
-def get_relative_to_root_category_path_of_asset(file_path: str) -> tuple[Literal["input", "output", "models"], str]:
-    """Given an absolute or relative file path, determine which root category the path belongs to:
-      - 'input' if the file resides under `folder_paths.get_input_directory()`
-      - 'output' if the file resides under `folder_paths.get_output_directory()`
-      - 'models' if the file resides under any base path of categories returned by `get_comfy_models_folders()`
-
-    Returns:
-        (root_category, relative_path_inside_that_root)
-        For 'models', the relative path is prefixed with the category name:
-            e.g. ('models', 'vae/test/sub/ae.safetensors')
-
-    Raises:
-        ValueError: if the path does not belong to input, output, or configured model bases.
-    """
-    fp_abs = os.path.abspath(file_path)
-
-    def _is_within(child: str, parent: str) -> bool:
-        try:
-            return os.path.commonpath([child, parent]) == parent
-        except Exception:
-            return False
-
-    def _rel(child: str, parent: str) -> str:
-        return os.path.relpath(os.path.join(os.sep, os.path.relpath(child, parent)), os.sep)
-
-    # 1) input
-    input_base = os.path.abspath(folder_paths.get_input_directory())
-    if _is_within(fp_abs, input_base):
-        return "input", _rel(fp_abs, input_base)
-
-    # 2) output
-    output_base = os.path.abspath(folder_paths.get_output_directory())
-    if _is_within(fp_abs, output_base):
-        return "output", _rel(fp_abs, output_base)
-
-    # 3) models (check deepest matching base to avoid ambiguity)
-    best: tuple[int, str, str] | None = None  # (base_len, bucket, rel_inside_bucket)
-    for bucket, bases in get_comfy_models_folders():
-        for b in bases:
-            base_abs = os.path.abspath(b)
-            if not _is_within(fp_abs, base_abs):
-                continue
-            cand = (len(base_abs), bucket, _rel(fp_abs, base_abs))
-            if best is None or cand[0] > best[0]:
-                best = cand
-
-    if best is not None:
-        _, bucket, rel_inside = best
-        combined = os.path.join(bucket, rel_inside)
-        return "models", os.path.relpath(os.path.join(os.sep, combined), os.sep)
-
-    raise ValueError(f"Path is not within input, output, or configured model bases: {file_path}")
-
-def get_name_and_tags_from_asset_path(file_path: str) -> tuple[str, list[str]]:
-    """Return a tuple (name, tags) derived from a filesystem path.
-
-    Semantics:
-      - Root category is determined by `get_relative_to_root_category_path_of_asset`.
-      - The returned `name` is the base filename with extension from the relative path.
-      - The returned `tags` are:
-            [root_category] + parent folders of the relative path (in order)
-        For 'models', this means:
-            file '/.../ModelsDir/vae/test_tag/ae.safetensors'
-            -> root_category='models', some_path='vae/test_tag/ae.safetensors'
-            -> name='ae.safetensors', tags=['models', 'vae', 'test_tag']
-
-    Raises:
-        ValueError: if the path does not belong to input, output, or configured model bases.
-    """
-    root_category, some_path = get_relative_to_root_category_path_of_asset(file_path)
-    p = Path(some_path)
-    parent_parts = [part for part in p.parent.parts if part not in (".", "..", p.anchor)]
-    return p.name, list(dict.fromkeys(normalize_tags([root_category, *parent_parts])))
-
-def normalize_tags(tags: list[str] | None) -> list[str]:
-    """
-    Normalize a list of tags by:
-      - Stripping whitespace and converting to lowercase.
-      - Removing duplicates.
-    """
-    return [t.strip().lower() for t in (tags or []) if (t or "").strip()]
-
-def collect_models_files() -> list[str]:
-    out: list[str] = []
-    for folder_name, bases in get_comfy_models_folders():
-        rel_files = folder_paths.get_filename_list(folder_name) or []
-        for rel_path in rel_files:
-            abs_path = folder_paths.get_full_path(folder_name, rel_path)
-            if not abs_path:
-                continue
-            abs_path = os.path.abspath(abs_path)
-            allowed = False
-            for b in bases:
-                base_abs = os.path.abspath(b)
-                with contextlib.suppress(Exception):
-                    if os.path.commonpath([abs_path, base_abs]) == base_abs:
-                        allowed = True
-                        break
-            if allowed:
-                out.append(abs_path)
-    return out
-
-def is_scalar(v):
-    if v is None:
-        return True
-    if isinstance(v, bool):
-        return True
-    if isinstance(v, (int, float, Decimal, str)):
-        return True
-    return False
-
-def project_kv(key: str, value):
-    """
-    Turn a metadata key/value into typed projection rows.
-    Returns list[dict] with keys:
-      key, ordinal, and one of val_str / val_num / val_bool / val_json (others None)
-    """
-    rows: list[dict] = []
-
-    def _null_row(ordinal: int) -> dict:
-        return {
-            "key": key, "ordinal": ordinal,
-            "val_str": None, "val_num": None, "val_bool": None, "val_json": None
-        }
-
-    if value is None:
-        rows.append(_null_row(0))
-        return rows
-
-    if is_scalar(value):
-        if isinstance(value, bool):
-            rows.append({"key": key, "ordinal": 0, "val_bool": bool(value)})
-        elif isinstance(value, (int, float, Decimal)):
-            num = value if isinstance(value, Decimal) else Decimal(str(value))
-            rows.append({"key": key, "ordinal": 0, "val_num": num})
-        elif isinstance(value, str):
-            rows.append({"key": key, "ordinal": 0, "val_str": value})
-        else:
-            rows.append({"key": key, "ordinal": 0, "val_json": value})
-        return rows
-
-    if isinstance(value, list):
-        if all(is_scalar(x) for x in value):
-            for i, x in enumerate(value):
-                if x is None:
-                    rows.append(_null_row(i))
-                elif isinstance(x, bool):
-                    rows.append({"key": key, "ordinal": i, "val_bool": bool(x)})
-                elif isinstance(x, (int, float, Decimal)):
-                    num = x if isinstance(x, Decimal) else Decimal(str(x))
-                    rows.append({"key": key, "ordinal": i, "val_num": num})
-                elif isinstance(x, str):
-                    rows.append({"key": key, "ordinal": i, "val_str": x})
-                else:
-                    rows.append({"key": key, "ordinal": i, "val_json": x})
-            return rows
-        for i, x in enumerate(value):
-            rows.append({"key": key, "ordinal": i, "val_json": x})
-        return rows
-
-    rows.append({"key": key, "ordinal": 0, "val_json": value})
-    return rows
--- a/app/assets/manager.py
+++ b/app/assets/manager.py
@@ -1,516 +0,0 @@
-import os
-import mimetypes
-import contextlib
-from typing import Sequence
-
-from app.database.db import create_session
-from app.assets.api import schemas_out, schemas_in
-from app.assets.database.queries import (
-    asset_exists_by_hash,
-    asset_info_exists_for_asset_id,
-    get_asset_by_hash,
-    get_asset_info_by_id,
-    fetch_asset_info_asset_and_tags,
-    fetch_asset_info_and_asset,
-    create_asset_info_for_existing_asset,
-    touch_asset_info_by_id,
-    update_asset_info_full,
-    delete_asset_info_by_id,
-    list_cache_states_by_asset_id,
-    list_asset_infos_page,
-    list_tags_with_usage,
-    get_asset_tags,
-    add_tags_to_asset_info,
-    remove_tags_from_asset_info,
-    pick_best_live_path,
-    ingest_fs_asset,
-    set_asset_info_preview,
-)
-from app.assets.helpers import resolve_destination_from_tags, ensure_within_base
-from app.assets.database.models import Asset
-
-
-def _safe_sort_field(requested: str | None) -> str:
-    if not requested:
-        return "created_at"
-    v = requested.lower()
-    if v in {"name", "created_at", "updated_at", "size", "last_access_time"}:
-        return v
-    return "created_at"
-
-
-def _get_size_mtime_ns(path: str) -> tuple[int, int]:
-    st = os.stat(path, follow_symlinks=True)
-    return st.st_size, getattr(st, "st_mtime_ns", int(st.st_mtime * 1_000_000_000))
-
-
-def _safe_filename(name: str | None, fallback: str) -> str:
-    n = os.path.basename((name or "").strip() or fallback)
-    if n:
-        return n
-    return fallback
-
-
-def asset_exists(*, asset_hash: str) -> bool:
-    """
-    Check if an asset with a given hash exists in database.
-    """
-    with create_session() as session:
-        return asset_exists_by_hash(session, asset_hash=asset_hash)
-
-
-def list_assets(
-    *,
-    include_tags: Sequence[str] | None = None,
-    exclude_tags: Sequence[str] | None = None,
-    name_contains: str | None = None,
-    metadata_filter: dict | None = None,
-    limit: int = 20,
-    offset: int = 0,
-    sort: str = "created_at",
-    order: str = "desc",
-    owner_id: str = "",
-) -> schemas_out.AssetsList:
-    sort = _safe_sort_field(sort)
-    order = "desc" if (order or "desc").lower() not in {"asc", "desc"} else order.lower()
-
-    with create_session() as session:
-        infos, tag_map, total = list_asset_infos_page(
-            session,
-            owner_id=owner_id,
-            include_tags=include_tags,
-            exclude_tags=exclude_tags,
-            name_contains=name_contains,
-            metadata_filter=metadata_filter,
-            limit=limit,
-            offset=offset,
-            sort=sort,
-            order=order,
-        )
-
-    summaries: list[schemas_out.AssetSummary] = []
-    for info in infos:
-        asset = info.asset
-        tags = tag_map.get(info.id, [])
-        summaries.append(
-            schemas_out.AssetSummary(
-                id=info.id,
-                name=info.name,
-                asset_hash=asset.hash if asset else None,
-                size=int(asset.size_bytes) if asset else None,
-                mime_type=asset.mime_type if asset else None,
-                tags=tags,
-                created_at=info.created_at,
-                updated_at=info.updated_at,
-                last_access_time=info.last_access_time,
-            )
-        )
-
-    return schemas_out.AssetsList(
-        assets=summaries,
-        total=total,
-        has_more=(offset + len(summaries)) < total,
-    )
-
-
-def get_asset(
-    *,
-    asset_info_id: str,
-    owner_id: str = "",
-) -> schemas_out.AssetDetail:
-    with create_session() as session:
-        res = fetch_asset_info_asset_and_tags(session, asset_info_id=asset_info_id, owner_id=owner_id)
-        if not res:
-            raise ValueError(f"AssetInfo {asset_info_id} not found")
-        info, asset, tag_names = res
-        preview_id = info.preview_id
-
-    return schemas_out.AssetDetail(
-        id=info.id,
-        name=info.name,
-        asset_hash=asset.hash if asset else None,
-        size=int(asset.size_bytes) if asset and asset.size_bytes is not None else None,
-        mime_type=asset.mime_type if asset else None,
-        tags=tag_names,
-        user_metadata=info.user_metadata or {},
-        preview_id=preview_id,
-        created_at=info.created_at,
-        last_access_time=info.last_access_time,
-    )
-
-
-def resolve_asset_content_for_download(
-    *,
-    asset_info_id: str,
-    owner_id: str = "",
-) -> tuple[str, str, str]:
-    with create_session() as session:
-        pair = fetch_asset_info_and_asset(session, asset_info_id=asset_info_id, owner_id=owner_id)
-        if not pair:
-            raise ValueError(f"AssetInfo {asset_info_id} not found")
-
-        info, asset = pair
-        states = list_cache_states_by_asset_id(session, asset_id=asset.id)
-        abs_path = pick_best_live_path(states)
-        if not abs_path:
-            raise FileNotFoundError
-
-        touch_asset_info_by_id(session, asset_info_id=asset_info_id)
-        session.commit()
-
-        ctype = asset.mime_type or mimetypes.guess_type(info.name or abs_path)[0] or "application/octet-stream"
-        download_name = info.name or os.path.basename(abs_path)
-        return abs_path, ctype, download_name
-
-
-def upload_asset_from_temp_path(
-    spec: schemas_in.UploadAssetSpec,
-    *,
-    temp_path: str,
-    client_filename: str | None = None,
-    owner_id: str = "",
-    expected_asset_hash: str | None = None,
-) -> schemas_out.AssetCreated:
-    """
-    Create new asset or update existing asset from a temporary file path.
-    """
-    try:
-        # NOTE: blake3 is not required right now, so this will fail if blake3 is not installed in local environment
-        import app.assets.hashing as hashing
-        digest = hashing.blake3_hash(temp_path)
-    except Exception as e:
-        raise RuntimeError(f"failed to hash uploaded file: {e}")
-    asset_hash = "blake3:" + digest
-
-    if expected_asset_hash and asset_hash != expected_asset_hash.strip().lower():
-        raise ValueError("HASH_MISMATCH")
-
-    with create_session() as session:
-        existing = get_asset_by_hash(session, asset_hash=asset_hash)
-        if existing is not None:
-            with contextlib.suppress(Exception):
-                if temp_path and os.path.exists(temp_path):
-                    os.remove(temp_path)
-
-            display_name = _safe_filename(spec.name or (client_filename or ""), fallback=digest)
-            info = create_asset_info_for_existing_asset(
-                session,
-                asset_hash=asset_hash,
-                name=display_name,
-                user_metadata=spec.user_metadata or {},
-                tags=spec.tags or [],
-                tag_origin="manual",
-                owner_id=owner_id,
-            )
-            tag_names = get_asset_tags(session, asset_info_id=info.id)
-            session.commit()
-
-            return schemas_out.AssetCreated(
-                id=info.id,
-                name=info.name,
-                asset_hash=existing.hash,
-                size=int(existing.size_bytes) if existing.size_bytes is not None else None,
-                mime_type=existing.mime_type,
-                tags=tag_names,
-                user_metadata=info.user_metadata or {},
-                preview_id=info.preview_id,
-                created_at=info.created_at,
-                last_access_time=info.last_access_time,
-                created_new=False,
-            )
-
-    base_dir, subdirs = resolve_destination_from_tags(spec.tags)
-    dest_dir = os.path.join(base_dir, *subdirs) if subdirs else base_dir
-    os.makedirs(dest_dir, exist_ok=True)
-
-    src_for_ext = (client_filename or spec.name or "").strip()
-    _ext = os.path.splitext(os.path.basename(src_for_ext))[1] if src_for_ext else ""
-    ext = _ext if 0 < len(_ext) <= 16 else ""
-    hashed_basename = f"{digest}{ext}"
-    dest_abs = os.path.abspath(os.path.join(dest_dir, hashed_basename))
-    ensure_within_base(dest_abs, base_dir)
-
-    content_type = (
-        mimetypes.guess_type(os.path.basename(src_for_ext), strict=False)[0]
-        or mimetypes.guess_type(hashed_basename, strict=False)[0]
-        or "application/octet-stream"
-    )
-
-    try:
-        os.replace(temp_path, dest_abs)
-    except Exception as e:
-        raise RuntimeError(f"failed to move uploaded file into place: {e}")
-
-    try:
-        size_bytes, mtime_ns = _get_size_mtime_ns(dest_abs)
-    except OSError as e:
-        raise RuntimeError(f"failed to stat destination file: {e}")
-
-    with create_session() as session:
-        result = ingest_fs_asset(
-            session,
-            asset_hash=asset_hash,
-            abs_path=dest_abs,
-            size_bytes=size_bytes,
-            mtime_ns=mtime_ns,
-            mime_type=content_type,
-            info_name=_safe_filename(spec.name or (client_filename or ""), fallback=digest),
-            owner_id=owner_id,
-            preview_id=None,
-            user_metadata=spec.user_metadata or {},
-            tags=spec.tags,
-            tag_origin="manual",
-            require_existing_tags=False,
-        )
-        info_id = result["asset_info_id"]
-        if not info_id:
-            raise RuntimeError("failed to create asset metadata")
-
-        pair = fetch_asset_info_and_asset(session, asset_info_id=info_id, owner_id=owner_id)
-        if not pair:
-            raise RuntimeError("inconsistent DB state after ingest")
-        info, asset = pair
-        tag_names = get_asset_tags(session, asset_info_id=info.id)
-        created_result = schemas_out.AssetCreated(
-            id=info.id,
-            name=info.name,
-            asset_hash=asset.hash,
-            size=int(asset.size_bytes),
-            mime_type=asset.mime_type,
-            tags=tag_names,
-            user_metadata=info.user_metadata or {},
-            preview_id=info.preview_id,
-            created_at=info.created_at,
-            last_access_time=info.last_access_time,
-            created_new=result["asset_created"],
-        )
-        session.commit()
-
-    return created_result
-
-
-def update_asset(
-    *,
-    asset_info_id: str,
-    name: str | None = None,
-    tags: list[str] | None = None,
-    user_metadata: dict | None = None,
-    owner_id: str = "",
-) -> schemas_out.AssetUpdated:
-    with create_session() as session:
-        info_row = get_asset_info_by_id(session, asset_info_id=asset_info_id)
-        if not info_row:
-            raise ValueError(f"AssetInfo {asset_info_id} not found")
-        if info_row.owner_id and info_row.owner_id != owner_id:
-            raise PermissionError("not owner")
-
-        info = update_asset_info_full(
-            session,
-            asset_info_id=asset_info_id,
-            name=name,
-            tags=tags,
-            user_metadata=user_metadata,
-            tag_origin="manual",
-            asset_info_row=info_row,
-        )
-
-        tag_names = get_asset_tags(session, asset_info_id=asset_info_id)
-        result = schemas_out.AssetUpdated(
-            id=info.id,
-            name=info.name,
-            asset_hash=info.asset.hash if info.asset else None,
-            tags=tag_names,
-            user_metadata=info.user_metadata or {},
-            updated_at=info.updated_at,
-        )
-        session.commit()
-
-    return result
-
-
-def set_asset_preview(
-    *,
-    asset_info_id: str,
-    preview_asset_id: str | None = None,
-    owner_id: str = "",
-) -> schemas_out.AssetDetail:
-    with create_session() as session:
-        info_row = get_asset_info_by_id(session, asset_info_id=asset_info_id)
-        if not info_row:
-            raise ValueError(f"AssetInfo {asset_info_id} not found")
-        if info_row.owner_id and info_row.owner_id != owner_id:
-            raise PermissionError("not owner")
-
-        set_asset_info_preview(
-            session,
-            asset_info_id=asset_info_id,
-            preview_asset_id=preview_asset_id,
-        )
-
-        res = fetch_asset_info_asset_and_tags(session, asset_info_id=asset_info_id, owner_id=owner_id)
-        if not res:
-            raise RuntimeError("State changed during preview update")
-        info, asset, tags = res
-        result = schemas_out.AssetDetail(
-            id=info.id,
-            name=info.name,
-            asset_hash=asset.hash if asset else None,
-            size=int(asset.size_bytes) if asset and asset.size_bytes is not None else None,
-            mime_type=asset.mime_type if asset else None,
-            tags=tags,
-            user_metadata=info.user_metadata or {},
-            preview_id=info.preview_id,
-            created_at=info.created_at,
-            last_access_time=info.last_access_time,
-        )
-        session.commit()
-
-    return result
-
-
-def delete_asset_reference(*, asset_info_id: str, owner_id: str, delete_content_if_orphan: bool = True) -> bool:
-    with create_session() as session:
-        info_row = get_asset_info_by_id(session, asset_info_id=asset_info_id)
-        asset_id = info_row.asset_id if info_row else None
-        deleted = delete_asset_info_by_id(session, asset_info_id=asset_info_id, owner_id=owner_id)
-        if not deleted:
-            session.commit()
-            return False
-
-        if not delete_content_if_orphan or not asset_id:
-            session.commit()
-            return True
-
-        still_exists = asset_info_exists_for_asset_id(session, asset_id=asset_id)
-        if still_exists:
-            session.commit()
-            return True
-
-        states = list_cache_states_by_asset_id(session, asset_id=asset_id)
-        file_paths = [s.file_path for s in (states or []) if getattr(s, "file_path", None)]
-
-        asset_row = session.get(Asset, asset_id)
-        if asset_row is not None:
-            session.delete(asset_row)
-
-        session.commit()
-        for p in file_paths:
-            with contextlib.suppress(Exception):
-                if p and os.path.isfile(p):
-                    os.remove(p)
-    return True
-
-
-def create_asset_from_hash(
-    *,
-    hash_str: str,
-    name: str,
-    tags: list[str] | None = None,
-    user_metadata: dict | None = None,
-    owner_id: str = "",
-) -> schemas_out.AssetCreated | None:
-    canonical = hash_str.strip().lower()
-    with create_session() as session:
-        asset = get_asset_by_hash(session, asset_hash=canonical)
-        if not asset:
-            return None
-
-        info = create_asset_info_for_existing_asset(
-            session,
-            asset_hash=canonical,
-            name=_safe_filename(name, fallback=canonical.split(":", 1)[1]),
-            user_metadata=user_metadata or {},
-            tags=tags or [],
-            tag_origin="manual",
-            owner_id=owner_id,
-        )
-        tag_names = get_asset_tags(session, asset_info_id=info.id)
-        result = schemas_out.AssetCreated(
-            id=info.id,
-            name=info.name,
-            asset_hash=asset.hash,
-            size=int(asset.size_bytes),
-            mime_type=asset.mime_type,
-            tags=tag_names,
-            user_metadata=info.user_metadata or {},
-            preview_id=info.preview_id,
-            created_at=info.created_at,
-            last_access_time=info.last_access_time,
-            created_new=False,
-        )
-        session.commit()
-
-    return result
-
-
-def add_tags_to_asset(
-    *,
-    asset_info_id: str,
-    tags: list[str],
-    origin: str = "manual",
-    owner_id: str = "",
-) -> schemas_out.TagsAdd:
-    with create_session() as session:
-        info_row = get_asset_info_by_id(session, asset_info_id=asset_info_id)
-        if not info_row:
-            raise ValueError(f"AssetInfo {asset_info_id} not found")
-        if info_row.owner_id and info_row.owner_id != owner_id:
-            raise PermissionError("not owner")
-        data = add_tags_to_asset_info(
-            session,
-            asset_info_id=asset_info_id,
-            tags=tags,
-            origin=origin,
-            create_if_missing=True,
-            asset_info_row=info_row,
-        )
-        session.commit()
-    return schemas_out.TagsAdd(**data)
-
-
-def remove_tags_from_asset(
-    *,
-    asset_info_id: str,
-    tags: list[str],
-    owner_id: str = "",
-) -> schemas_out.TagsRemove:
-    with create_session() as session:
-        info_row = get_asset_info_by_id(session, asset_info_id=asset_info_id)
-        if not info_row:
-            raise ValueError(f"AssetInfo {asset_info_id} not found")
-        if info_row.owner_id and info_row.owner_id != owner_id:
-            raise PermissionError("not owner")
-
-        data = remove_tags_from_asset_info(
-            session,
-            asset_info_id=asset_info_id,
-            tags=tags,
-        )
-        session.commit()
-    return schemas_out.TagsRemove(**data)
-
-
-def list_tags(
-    prefix: str | None = None,
-    limit: int = 100,
-    offset: int = 0,
-    order: str = "count_desc",
-    include_zero: bool = True,
-    owner_id: str = "",
-) -> schemas_out.TagsList:
-    limit = max(1, min(1000, limit))
-    offset = max(0, offset)
-
-    with create_session() as session:
-        rows, total = list_tags_with_usage(
-            session,
-            prefix=prefix,
-            limit=limit,
-            offset=offset,
-            include_zero=include_zero,
-            order=order,
-            owner_id=owner_id,
-        )
-
-    tags = [schemas_out.TagUsage(name=name, count=count, type=tag_type) for (name, tag_type, count) in rows]
-    return schemas_out.TagsList(tags=tags, total=total, has_more=(offset + len(tags)) < total)
--- a/app/assets/scanner.py
+++ b/app/assets/scanner.py
@@ -1,263 +0,0 @@
-import contextlib
-import time
-import logging
-import os
-import sqlalchemy
-
-import folder_paths
-from app.database.db import create_session, dependencies_available
-from app.assets.helpers import (
-    collect_models_files, compute_relative_filename, fast_asset_file_check, get_name_and_tags_from_asset_path,
-    list_tree,prefixes_for_root, escape_like_prefix,
-    RootType
-)
-from app.assets.database.tags import add_missing_tag_for_asset_id, ensure_tags_exist, remove_missing_tag_for_asset_id
-from app.assets.database.bulk_ops import seed_from_paths_batch
-from app.assets.database.models import Asset, AssetCacheState, AssetInfo
-
-
-def seed_assets(roots: tuple[RootType, ...], enable_logging: bool = False) -> None:
-    """
-    Scan the given roots and seed the assets into the database.
-    """
-    if not dependencies_available():
-        if enable_logging:
-            logging.warning("Database dependencies not available, skipping assets scan")
-        return
-    t_start = time.perf_counter()
-    created = 0
-    skipped_existing = 0
-    orphans_pruned = 0
-    paths: list[str] = []
-    try:
-        existing_paths: set[str] = set()
-        for r in roots:
-            try:
-                survivors: set[str] = _fast_db_consistency_pass(r, collect_existing_paths=True, update_missing_tags=True)
-                if survivors:
-                    existing_paths.update(survivors)
-            except Exception as e:
-                logging.exception("fast DB scan failed for %s: %s", r, e)
-
-        try:
-            orphans_pruned = _prune_orphaned_assets(roots)
-        except Exception as e:
-            logging.exception("orphan pruning failed: %s", e)
-
-        if "models" in roots:
-            paths.extend(collect_models_files())
-        if "input" in roots:
-            paths.extend(list_tree(folder_paths.get_input_directory()))
-        if "output" in roots:
-            paths.extend(list_tree(folder_paths.get_output_directory()))
-
-        specs: list[dict] = []
-        tag_pool: set[str] = set()
-        for p in paths:
-            abs_p = os.path.abspath(p)
-            if abs_p in existing_paths:
-                skipped_existing += 1
-                continue
-            try:
-                stat_p = os.stat(abs_p, follow_symlinks=False)
-            except OSError:
-                continue
-            # skip empty files
-            if not stat_p.st_size:
-                continue
-            name, tags = get_name_and_tags_from_asset_path(abs_p)
-            specs.append(
-                {
-                    "abs_path": abs_p,
-                    "size_bytes": stat_p.st_size,
-                    "mtime_ns": getattr(stat_p, "st_mtime_ns", int(stat_p.st_mtime * 1_000_000_000)),
-                    "info_name": name,
-                    "tags": tags,
-                    "fname": compute_relative_filename(abs_p),
-                }
-            )
-            for t in tags:
-                tag_pool.add(t)
-        # if no file specs, nothing to do
-        if not specs:
-            return
-        with create_session() as sess:
-            if tag_pool:
-                ensure_tags_exist(sess, tag_pool, tag_type="user")
-
-            result = seed_from_paths_batch(sess, specs=specs, owner_id="")
-            created += result["inserted_infos"]
-            sess.commit()
-    finally:
-        if enable_logging:
-            logging.info(
-                "Assets scan(roots=%s) completed in %.3fs (created=%d, skipped_existing=%d, orphans_pruned=%d, total_seen=%d)",
-                roots,
-                time.perf_counter() - t_start,
-                created,
-                skipped_existing,
-                orphans_pruned,
-                len(paths),
-            )
-
-
-def _prune_orphaned_assets(roots: tuple[RootType, ...]) -> int:
-    """Prune cache states outside configured prefixes, then delete orphaned seed assets."""
-    all_prefixes = [os.path.abspath(p) for r in roots for p in prefixes_for_root(r)]
-    if not all_prefixes:
-        return 0
-
-    def make_prefix_condition(prefix: str):
-        base = prefix if prefix.endswith(os.sep) else prefix + os.sep
-        escaped, esc = escape_like_prefix(base)
-        return AssetCacheState.file_path.like(escaped + "%", escape=esc)
-
-    matches_valid_prefix = sqlalchemy.or_(*[make_prefix_condition(p) for p in all_prefixes])
-
-    orphan_subq = (
-        sqlalchemy.select(Asset.id)
-        .outerjoin(AssetCacheState, AssetCacheState.asset_id == Asset.id)
-        .where(Asset.hash.is_(None), AssetCacheState.id.is_(None))
-    ).scalar_subquery()
-
-    with create_session() as sess:
-        sess.execute(sqlalchemy.delete(AssetCacheState).where(~matches_valid_prefix))
-        sess.execute(sqlalchemy.delete(AssetInfo).where(AssetInfo.asset_id.in_(orphan_subq)))
-        result = sess.execute(sqlalchemy.delete(Asset).where(Asset.id.in_(orphan_subq)))
-        sess.commit()
-        return result.rowcount
-
-
-def _fast_db_consistency_pass(
-    root: RootType,
-    *,
-    collect_existing_paths: bool = False,
-    update_missing_tags: bool = False,
-) -> set[str] | None:
-    """Fast DB+FS pass for a root:
-      - Toggle needs_verify per state using fast check
-      - For hashed assets with at least one fast-ok state in this root: delete stale missing states
-      - For seed assets with all states missing: delete Asset and its AssetInfos
-      - Optionally add/remove 'missing' tags based on fast-ok in this root
-      - Optionally return surviving absolute paths
-    """
-    prefixes = prefixes_for_root(root)
-    if not prefixes:
-        return set() if collect_existing_paths else None
-
-    conds = []
-    for p in prefixes:
-        base = os.path.abspath(p)
-        if not base.endswith(os.sep):
-            base += os.sep
-        escaped, esc = escape_like_prefix(base)
-        conds.append(AssetCacheState.file_path.like(escaped + "%", escape=esc))
-
-    with create_session() as sess:
-        rows = (
-            sess.execute(
-                sqlalchemy.select(
-                    AssetCacheState.id,
-                    AssetCacheState.file_path,
-                    AssetCacheState.mtime_ns,
-                    AssetCacheState.needs_verify,
-                    AssetCacheState.asset_id,
-                    Asset.hash,
-                    Asset.size_bytes,
-                )
-                .join(Asset, Asset.id == AssetCacheState.asset_id)
-                .where(sqlalchemy.or_(*conds))
-                .order_by(AssetCacheState.asset_id.asc(), AssetCacheState.id.asc())
-            )
-        ).all()
-
-        by_asset: dict[str, dict] = {}
-        for sid, fp, mtime_db, needs_verify, aid, a_hash, a_size in rows:
-            acc = by_asset.get(aid)
-            if acc is None:
-                acc = {"hash": a_hash, "size_db": int(a_size or 0), "states": []}
-                by_asset[aid] = acc
-
-            fast_ok = False
-            try:
-                exists = True
-                fast_ok = fast_asset_file_check(
-                    mtime_db=mtime_db,
-                    size_db=acc["size_db"],
-                    stat_result=os.stat(fp, follow_symlinks=True),
-                )
-            except FileNotFoundError:
-                exists = False
-            except OSError:
-                exists = False
-
-            acc["states"].append({
-                "sid": sid,
-                "fp": fp,
-                "exists": exists,
-                "fast_ok": fast_ok,
-                "needs_verify": bool(needs_verify),
-            })
-
-        to_set_verify: list[int] = []
-        to_clear_verify: list[int] = []
-        stale_state_ids: list[int] = []
-        survivors: set[str] = set()
-
-        for aid, acc in by_asset.items():
-            a_hash = acc["hash"]
-            states = acc["states"]
-            any_fast_ok = any(s["fast_ok"] for s in states)
-            all_missing = all(not s["exists"] for s in states)
-
-            for s in states:
-                if not s["exists"]:
-                    continue
-                if s["fast_ok"] and s["needs_verify"]:
-                    to_clear_verify.append(s["sid"])
-                if not s["fast_ok"] and not s["needs_verify"]:
-                    to_set_verify.append(s["sid"])
-
-            if a_hash is None:
-                if states and all_missing:  # remove seed Asset completely, if no valid AssetCache exists
-                    sess.execute(sqlalchemy.delete(AssetInfo).where(AssetInfo.asset_id == aid))
-                    asset = sess.get(Asset, aid)
-                    if asset:
-                        sess.delete(asset)
-                else:
-                    for s in states:
-                        if s["exists"]:
-                            survivors.add(os.path.abspath(s["fp"]))
-                continue
-
-            if any_fast_ok:  # if Asset has at least one valid AssetCache record, remove any invalid AssetCache records
-                for s in states:
-                    if not s["exists"]:
-                        stale_state_ids.append(s["sid"])
-                if update_missing_tags:
-                    with contextlib.suppress(Exception):
-                        remove_missing_tag_for_asset_id(sess, asset_id=aid)
-            elif update_missing_tags:
-                with contextlib.suppress(Exception):
-                    add_missing_tag_for_asset_id(sess, asset_id=aid, origin="automatic")
-
-            for s in states:
-                if s["exists"]:
-                    survivors.add(os.path.abspath(s["fp"]))
-
-        if stale_state_ids:
-            sess.execute(sqlalchemy.delete(AssetCacheState).where(AssetCacheState.id.in_(stale_state_ids)))
-        if to_set_verify:
-            sess.execute(
-                sqlalchemy.update(AssetCacheState)
-                .where(AssetCacheState.id.in_(to_set_verify))
-                .values(needs_verify=True)
-            )
-        if to_clear_verify:
-            sess.execute(
-                sqlalchemy.update(AssetCacheState)
-                .where(AssetCacheState.id.in_(to_clear_verify))
-                .values(needs_verify=False)
-            )
-        sess.commit()
-        return survivors if collect_existing_paths else None
--- a/app/database/models.py
+++ b/app/database/models.py
@@ -1,21 +1,14 @@
-from typing import Any
-from datetime import datetime
-from sqlalchemy.orm import DeclarativeBase
+from sqlalchemy.orm import declarative_base

-class Base(DeclarativeBase):
-    pass
+Base = declarative_base()

-def to_dict(obj: Any, include_none: bool = False) -> dict[str, Any]:
+
+def to_dict(obj):
    fields = obj.__table__.columns.keys()
-    out: dict[str, Any] = {}
-    for field in fields:
-        val = getattr(obj, field)
-        if val is None and not include_none:
-            continue
-        if isinstance(val, datetime):
-            out[field] = val.isoformat()
-        else:
-            out[field] = val
-    return out
+    return {
+        field: (val.to_dict() if hasattr(val, "to_dict") else val)
+        for field in fields
+        if (val := getattr(obj, field))
+    }

 # TODO: Define models here
--- a/app/frontend_management.py
+++ b/app/frontend_management.py
@@ -10,8 +10,7 @@ import importlib
 from dataclasses import dataclass
 from functools import cached_property
 from pathlib import Path
-from typing import Dict, TypedDict, Optional
-from aiohttp import web
+from typing import TypedDict, Optional
 from importlib.metadata import version

 import requests
@@ -258,54 +257,7 @@ comfyui-frontend-package is not installed.
            sys.exit(-1)

    @classmethod
-    def template_asset_map(cls) -> Optional[Dict[str, str]]:
-        """Return a mapping of template asset names to their absolute paths."""
-        try:
-            from comfyui_workflow_templates import (
-                get_asset_path,
-                iter_templates,
-            )
-        except ImportError:
-            logging.error(
-                f"""
-********** ERROR ***********
-
-comfyui-workflow-templates is not installed.
-
-{frontend_install_warning_message()}
-
-********** ERROR ***********
-""".strip()
-            )
-            return None
-
-        try:
-            template_entries = list(iter_templates())
-        except Exception as exc:
-            logging.error(f"Failed to enumerate workflow templates: {exc}")
-            return None
-
-        asset_map: Dict[str, str] = {}
-        try:
-            for entry in template_entries:
-                for asset in entry.assets:
-                    asset_map[asset.filename] = get_asset_path(
-                        entry.template_id, asset.filename
-                    )
-        except Exception as exc:
-            logging.error(f"Failed to resolve template asset paths: {exc}")
-            return None
-
-        if not asset_map:
-            logging.error("No workflow template assets found. Did the packages install correctly?")
-            return None
-
-        return asset_map
-
-
-    @classmethod
-    def legacy_templates_path(cls) -> Optional[str]:
-        """Return the legacy templates directory shipped inside the meta package."""
+    def templates_path(cls) -> str:
        try:
            import comfyui_workflow_templates

@@ -324,7 +276,6 @@ comfyui-workflow-templates is not installed.
 ********** ERROR ***********
 """.strip()
            )
-            return None

    @classmethod
    def embedded_docs_path(cls) -> str:
@@ -441,17 +392,3 @@ comfyui-workflow-templates is not installed.
            logging.info("Falling back to the default frontend.")
            check_frontend_version()
            return cls.default_frontend_path()
-    @classmethod
-    def template_asset_handler(cls):
-        assets = cls.template_asset_map()
-        if not assets:
-            return None
-
-        async def serve_template(request: web.Request) -> web.StreamResponse:
-            rel_path = request.match_info.get("path", "")
-            target = assets.get(rel_path)
-            if target is None:
-                raise web.HTTPNotFound()
-            return web.FileResponse(target)
-
-        return serve_template
--- a/app/model_manager.py
+++ b/app/model_manager.py
@@ -44,7 +44,7 @@ class ModelFileManager:
        @routes.get("/experiment/models/{folder}")
        async def get_all_models(request):
            folder = request.match_info.get("folder", None)
-            if folder not in folder_paths.folder_names_and_paths:
+            if not folder in folder_paths.folder_names_and_paths:
                return web.Response(status=404)
            files = self.get_model_file_list(folder)
            return web.json_response(files)
@@ -55,7 +55,7 @@ class ModelFileManager:
            path_index = int(request.match_info.get("path_index", None))
            filename = request.match_info.get("filename", None)

-            if folder_name not in folder_paths.folder_names_and_paths:
+            if not folder_name in folder_paths.folder_names_and_paths:
                return web.Response(status=404)

            folders = folder_paths.folder_names_and_paths[folder_name]
--- a/app/subgraph_manager.py
+++ b/app/subgraph_manager.py
@@ -10,7 +10,6 @@ import hashlib

 class Source:
    custom_node = "custom_node"
-    templates = "templates"

 class SubgraphEntry(TypedDict):
    source: str
@@ -39,18 +38,6 @@ class CustomNodeSubgraphEntryInfo(TypedDict):
 class SubgraphManager:
    def __init__(self):
        self.cached_custom_node_subgraphs: dict[SubgraphEntry] | None = None
-        self.cached_blueprint_subgraphs: dict[SubgraphEntry] | None = None
-
-    def _create_entry(self, file: str, source: str, node_pack: str) -> tuple[str, SubgraphEntry]:
-        """Create a subgraph entry from a file path. Expects normalized path (forward slashes)."""
-        entry_id = hashlib.sha256(f"{source}{file}".encode()).hexdigest()
-        entry: SubgraphEntry = {
-            "source": source,
-            "name": os.path.splitext(os.path.basename(file))[0],
-            "path": file,
-            "info": {"node_pack": node_pack},
-        }
-        return entry_id, entry

    async def load_entry_data(self, entry: SubgraphEntry):
        with open(entry['path'], 'r') as f:
@@ -73,60 +60,53 @@ class SubgraphManager:
        return entries

    async def get_custom_node_subgraphs(self, loadedModules, force_reload=False):
-        """Load subgraphs from custom nodes."""
+        # if not forced to reload and cached, return cache
        if not force_reload and self.cached_custom_node_subgraphs is not None:
            return self.cached_custom_node_subgraphs
-
+        # Load subgraphs from custom nodes
+        subfolder = "subgraphs"
        subgraphs_dict: dict[SubgraphEntry] = {}
-        for folder in folder_paths.get_folder_paths("custom_nodes"):
-            pattern = os.path.join(folder, "*/subgraphs/*.json")
-            for file in glob.glob(pattern):
-                file = file.replace('\\', '/')
-                node_pack = "custom_nodes." + file.split('/')[-3]
-                entry_id, entry = self._create_entry(file, Source.custom_node, node_pack)
-                subgraphs_dict[entry_id] = entry

+        for folder in folder_paths.get_folder_paths("custom_nodes"):
+            pattern = os.path.join(folder, f"*/{subfolder}/*.json")
+            matched_files = glob.glob(pattern)
+            for file in matched_files:
+                # replace backslashes with forward slashes
+                file = file.replace('\\', '/')
+                info: CustomNodeSubgraphEntryInfo = {
+                    "node_pack": "custom_nodes." + file.split('/')[-3]
+                }
+                source = Source.custom_node
+                # hash source + path to make sure id will be as unique as possible, but
+                # reproducible across backend reloads
+                id = hashlib.sha256(f"{source}{file}".encode()).hexdigest()
+                entry: SubgraphEntry = {
+                    "source": Source.custom_node,
+                    "name": os.path.splitext(os.path.basename(file))[0],
+                    "path": file,
+                    "info": info,
+                }
+                subgraphs_dict[id] = entry
        self.cached_custom_node_subgraphs = subgraphs_dict
        return subgraphs_dict

-    async def get_blueprint_subgraphs(self, force_reload=False):
-        """Load subgraphs from the blueprints directory."""
-        if not force_reload and self.cached_blueprint_subgraphs is not None:
-            return self.cached_blueprint_subgraphs
-
-        subgraphs_dict: dict[SubgraphEntry] = {}
-        blueprints_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'blueprints')
-
-        if os.path.exists(blueprints_dir):
-            for file in glob.glob(os.path.join(blueprints_dir, "*.json")):
-                file = file.replace('\\', '/')
-                entry_id, entry = self._create_entry(file, Source.templates, "comfyui")
-                subgraphs_dict[entry_id] = entry
-
-        self.cached_blueprint_subgraphs = subgraphs_dict
-        return subgraphs_dict
-
-    async def get_all_subgraphs(self, loadedModules, force_reload=False):
-        """Get all subgraphs from all sources (custom nodes and blueprints)."""
-        custom_node_subgraphs = await self.get_custom_node_subgraphs(loadedModules, force_reload)
-        blueprint_subgraphs = await self.get_blueprint_subgraphs(force_reload)
-        return {**custom_node_subgraphs, **blueprint_subgraphs}
-
-    async def get_subgraph(self, id: str, loadedModules):
-        """Get a specific subgraph by ID from any source."""
-        entry = (await self.get_all_subgraphs(loadedModules)).get(id)
-        if entry is not None and entry.get('data') is None:
+    async def get_custom_node_subgraph(self, id: str, loadedModules):
+        subgraphs = await self.get_custom_node_subgraphs(loadedModules)
+        entry: SubgraphEntry = subgraphs.get(id, None)
+        if entry is not None and entry.get('data', None) is None:
            await self.load_entry_data(entry)
        return entry

    def add_routes(self, routes, loadedModules):
        @routes.get("/global_subgraphs")
        async def get_global_subgraphs(request):
-            subgraphs_dict = await self.get_all_subgraphs(loadedModules)
+            subgraphs_dict = await self.get_custom_node_subgraphs(loadedModules)
+            # NOTE: we may want to include other sources of global subgraphs such as templates in the future;
+            # that's the reasoning for the current implementation
            return web.json_response(await self.sanitize_entries(subgraphs_dict, remove_data=True))

        @routes.get("/global_subgraphs/{id}")
        async def get_global_subgraph(request):
            id = request.match_info.get("id", None)
-            subgraph = await self.get_subgraph(id, loadedModules)
+            subgraph = await self.get_custom_node_subgraph(id, loadedModules)
            return web.json_response(await self.sanitize_entry(subgraph))
--- a/app/user_manager.py
+++ b/app/user_manager.py
@@ -59,9 +59,6 @@ class UserManager():
        user = "default"
        if args.multi_user and "comfy-user" in request.headers:
            user = request.headers["comfy-user"]
-            # Block System Users (use same error message to prevent probing)
-            if user.startswith(folder_paths.SYSTEM_USER_PREFIX):
-                raise KeyError("Unknown user: " + user)

        if user not in self.users:
            raise KeyError("Unknown user: " + user)
@@ -69,16 +66,15 @@ class UserManager():
        return user

    def get_request_user_filepath(self, request, file, type="userdata", create_dir=True):
+        user_directory = folder_paths.get_user_directory()
+
        if type == "userdata":
-            root_dir = folder_paths.get_user_directory()
+            root_dir = user_directory
        else:
            raise KeyError("Unknown filepath type:" + type)

        user = self.get_request_user_id(request)
-        user_root = folder_paths.get_public_user_directory(user)
-        if user_root is None:
-            return None
-        path = user_root
+        path = user_root = os.path.abspath(os.path.join(root_dir, user))

        # prevent leaving /{type}
        if os.path.commonpath((root_dir, user_root)) != root_dir:
@@ -105,11 +101,7 @@ class UserManager():
        name = name.strip()
        if not name:
            raise ValueError("username not provided")
-        if name.startswith(folder_paths.SYSTEM_USER_PREFIX):
-            raise ValueError("System User prefix not allowed")
        user_id = re.sub("[^a-zA-Z0-9-_]+", '-', name)
-        if user_id.startswith(folder_paths.SYSTEM_USER_PREFIX):
-            raise ValueError("System User prefix not allowed")
        user_id = user_id + "_" + str(uuid.uuid4())

        self.users[user_id] = name
@@ -140,10 +132,7 @@ class UserManager():
            if username in self.users.values():
                return web.json_response({"error": "Duplicate username."}, status=400)

-            try:
-                user_id = self.add_user(username)
-            except ValueError as e:
-                return web.json_response({"error": str(e)}, status=400)
+            user_id = self.add_user(username)
            return web.json_response(user_id)

        @routes.get("/userdata")
@@ -435,7 +424,7 @@ class UserManager():
                return source

            dest = get_user_data_path(request, check_exists=False, param="dest")
-            if not isinstance(dest, str):
+            if not isinstance(source, str):
                return dest

            overwrite = request.query.get("overwrite", 'true') != "false"
--- a/blueprints/.glsl/Brightness_and_Contrast_1.frag
+++ b/blueprints/.glsl/Brightness_and_Contrast_1.frag
@@ -1,44 +0,0 @@
-#version 300 es
-precision highp float;
-
-uniform sampler2D u_image0;
-uniform float u_float0; // Brightness slider -100..100
-uniform float u_float1; // Contrast slider -100..100
-
-in vec2 v_texCoord;
-out vec4 fragColor;
-
-const float MID_GRAY = 0.18;  // 18% reflectance
-
-// sRGB gamma 2.2 approximation
-vec3 srgbToLinear(vec3 c) {
-    return pow(max(c, 0.0), vec3(2.2));
-}
-
-vec3 linearToSrgb(vec3 c) {
-    return pow(max(c, 0.0), vec3(1.0/2.2));
-}
-
-float mapBrightness(float b) {
-    return clamp(b / 100.0, -1.0, 1.0);
-}
-
-float mapContrast(float c) {
-    return clamp(c / 100.0 + 1.0, 0.0, 2.0);
-}
-
-void main() {
-    vec4 orig = texture(u_image0, v_texCoord);
-
-    float brightness = mapBrightness(u_float0);
-    float contrast   = mapContrast(u_float1);
-
-    vec3 lin = srgbToLinear(orig.rgb);
-
-    lin = (lin - MID_GRAY) * contrast + brightness + MID_GRAY;
-
-    // Convert back to sRGB
-    vec3 result = linearToSrgb(clamp(lin, 0.0, 1.0));
-
-    fragColor = vec4(result, orig.a);
-}
--- a/blueprints/.glsl/Chromatic_Aberration_16.frag
+++ b/blueprints/.glsl/Chromatic_Aberration_16.frag
@@ -1,72 +0,0 @@
-#version 300 es
-precision highp float;
-
-uniform sampler2D u_image0;
-uniform vec2 u_resolution;
-uniform int u_int0;      // Mode
-uniform float u_float0;  // Amount (0 to 100)
-
-in vec2 v_texCoord;
-out vec4 fragColor;
-
-const int MODE_LINEAR   = 0;
-const int MODE_RADIAL   = 1;
-const int MODE_BARREL   = 2;
-const int MODE_SWIRL    = 3;
-const int MODE_DIAGONAL = 4;
-
-const float AMOUNT_SCALE = 0.0005;
-const float RADIAL_MULT = 4.0;
-const float BARREL_MULT = 8.0;
-const float INV_SQRT2 = 0.70710678118;
-
-void main() {
-    vec2 uv = v_texCoord;
-    vec4 original = texture(u_image0, uv);
-
-    float amount = u_float0 * AMOUNT_SCALE;
-
-    if (amount < 0.000001) {
-        fragColor = original;
-        return;
-    }
-
-    // Aspect-corrected coordinates for circular effects
-    float aspect = u_resolution.x / u_resolution.y;
-    vec2 centered = uv - 0.5;
-    vec2 corrected = vec2(centered.x * aspect, centered.y);
-    float r = length(corrected);
-    vec2 dir = r > 0.0001 ? corrected / r : vec2(0.0);
-    vec2 offset = vec2(0.0);
-
-    if (u_int0 == MODE_LINEAR) {
-        // Horizontal shift (no aspect correction needed)
-        offset = vec2(amount, 0.0);
-    }
-    else if (u_int0 == MODE_RADIAL) {
-        // Outward from center, stronger at edges
-        offset = dir * r * amount * RADIAL_MULT;
-        offset.x /= aspect;  // Convert back to UV space
-    }
-    else if (u_int0 == MODE_BARREL) {
-        // Lens distortion simulation (r² falloff)
-        offset = dir * r * r * amount * BARREL_MULT;
-        offset.x /= aspect;  // Convert back to UV space
-    }
-    else if (u_int0 == MODE_SWIRL) {
-        // Perpendicular to radial (rotational aberration)
-        vec2 perp = vec2(-dir.y, dir.x);
-        offset = perp * r * amount * RADIAL_MULT;
-        offset.x /= aspect;  // Convert back to UV space
-    }
-    else if (u_int0 == MODE_DIAGONAL) {
-        // 45° offset (no aspect correction needed)
-        offset = vec2(amount, amount) * INV_SQRT2;
-    }
-    
-    float red = texture(u_image0, uv + offset).r;
-    float green = original.g;
-    float blue = texture(u_image0, uv - offset).b;
-    
-    fragColor = vec4(red, green, blue, original.a);
-}
--- a/blueprints/.glsl/Color_Adjustment_15.frag
+++ b/blueprints/.glsl/Color_Adjustment_15.frag
@@ -1,78 +0,0 @@
-#version 300 es
-precision highp float;
-
-uniform sampler2D u_image0;
-uniform float u_float0; // temperature (-100 to 100)
-uniform float u_float1; // tint (-100 to 100)
-uniform float u_float2; // vibrance (-100 to 100)
-uniform float u_float3; // saturation (-100 to 100)
-
-in vec2 v_texCoord;
-out vec4 fragColor;
-
-const float INPUT_SCALE = 0.01;
-const float TEMP_TINT_PRIMARY = 0.3;
-const float TEMP_TINT_SECONDARY = 0.15;
-const float VIBRANCE_BOOST = 2.0;
-const float SATURATION_BOOST = 2.0;
-const float SKIN_PROTECTION = 0.5;
-const float EPSILON = 0.001;
-const vec3 LUMA_WEIGHTS = vec3(0.299, 0.587, 0.114);
-
-void main() {
-    vec4 tex = texture(u_image0, v_texCoord);
-    vec3 color = tex.rgb;
-    
-    // Scale inputs: -100/100 → -1/1
-    float temperature = u_float0 * INPUT_SCALE;
-    float tint = u_float1 * INPUT_SCALE;
-    float vibrance = u_float2 * INPUT_SCALE;
-    float saturation = u_float3 * INPUT_SCALE;
-    
-    // Temperature (warm/cool): positive = warm, negative = cool
-    color.r += temperature * TEMP_TINT_PRIMARY;
-    color.b -= temperature * TEMP_TINT_PRIMARY;
-    
-    // Tint (green/magenta): positive = green, negative = magenta
-    color.g += tint * TEMP_TINT_PRIMARY;
-    color.r -= tint * TEMP_TINT_SECONDARY;
-    color.b -= tint * TEMP_TINT_SECONDARY;
-    
-    // Single clamp after temperature/tint
-    color = clamp(color, 0.0, 1.0);
-    
-    // Vibrance with skin protection
-    if (vibrance != 0.0) {
-        float maxC = max(color.r, max(color.g, color.b));
-        float minC = min(color.r, min(color.g, color.b));
-        float sat = maxC - minC;
-        float gray = dot(color, LUMA_WEIGHTS);
-        
-        if (vibrance < 0.0) {
-            // Desaturate: -100 → gray
-            color = mix(vec3(gray), color, 1.0 + vibrance);
-        } else {
-            // Boost less saturated colors more
-            float vibranceAmt = vibrance * (1.0 - sat);
-            
-            // Branchless skin tone protection
-            float isWarmTone = step(color.b, color.g) * step(color.g, color.r);
-            float warmth = (color.r - color.b) / max(maxC, EPSILON);
-            float skinTone = isWarmTone * warmth * sat * (1.0 - sat);
-            vibranceAmt *= (1.0 - skinTone * SKIN_PROTECTION);
-            
-            color = mix(vec3(gray), color, 1.0 + vibranceAmt * VIBRANCE_BOOST);
-        }
-    }
-    
-    // Saturation
-    if (saturation != 0.0) {
-        float gray = dot(color, LUMA_WEIGHTS);
-        float satMix = saturation < 0.0
-            ? 1.0 + saturation                      // -100 → gray
-            : 1.0 + saturation * SATURATION_BOOST;  // +100 → 3x boost
-        color = mix(vec3(gray), color, satMix);
-    }
-    
-    fragColor = vec4(clamp(color, 0.0, 1.0), tex.a);
-}
--- a/blueprints/.glsl/Edge-Preserving_Blur_128.frag
+++ b/blueprints/.glsl/Edge-Preserving_Blur_128.frag
@@ -1,94 +0,0 @@
-#version 300 es
-precision highp float;
-
-uniform sampler2D u_image0;
-uniform float u_float0;   // Blur radius (0–20, default ~5)
-uniform float u_float1;   // Edge threshold (0–100, default ~30)
-uniform int u_int0;       // Step size (0/1 = every pixel, 2+ = skip pixels)
-
-in vec2 v_texCoord;
-out vec4 fragColor;
-
-const int MAX_RADIUS = 20;
-const float EPSILON = 0.0001;
-
-// Perceptual luminance
-float getLuminance(vec3 rgb) {
-    return dot(rgb, vec3(0.299, 0.587, 0.114));
-}
-
-vec4 bilateralFilter(vec2 uv, vec2 texelSize, int radius,
-                     float sigmaSpatial, float sigmaColor)
-{
-    vec4 center = texture(u_image0, uv);
-    vec3 centerRGB = center.rgb;
-
-    float invSpatial2 = -0.5 / (sigmaSpatial * sigmaSpatial);
-    float invColor2   = -0.5 / (sigmaColor * sigmaColor + EPSILON);
-
-    vec3 sumRGB = vec3(0.0);
-    float sumWeight = 0.0;
-
-    int step = max(u_int0, 1);
-    float radius2 = float(radius * radius);
-
-    for (int dy = -MAX_RADIUS; dy <= MAX_RADIUS; dy++) {
-        if (dy < -radius || dy > radius) continue;
-        if (abs(dy) % step != 0) continue;
-
-        for (int dx = -MAX_RADIUS; dx <= MAX_RADIUS; dx++) {
-            if (dx < -radius || dx > radius) continue;
-            if (abs(dx) % step != 0) continue;
-
-            vec2 offset = vec2(float(dx), float(dy));
-            float dist2 = dot(offset, offset);
-            if (dist2 > radius2) continue;
-
-            vec3 sampleRGB = texture(u_image0, uv + offset * texelSize).rgb;
-
-            // Spatial Gaussian
-            float spatialWeight = exp(dist2 * invSpatial2);
-
-            // Perceptual color distance (weighted RGB)
-            vec3 diff = sampleRGB - centerRGB;
-            float colorDist = dot(diff * diff, vec3(0.299, 0.587, 0.114));
-            float colorWeight = exp(colorDist * invColor2);
-
-            float w = spatialWeight * colorWeight;
-            sumRGB += sampleRGB * w;
-            sumWeight += w;
-        }
-    }
-
-    vec3 resultRGB = sumRGB / max(sumWeight, EPSILON);
-    return vec4(resultRGB, center.a); // preserve center alpha
-}
-
-void main() {
-    vec2 texelSize = 1.0 / vec2(textureSize(u_image0, 0));
-
-    float radiusF = clamp(u_float0, 0.0, float(MAX_RADIUS));
-    int radius = int(radiusF + 0.5);
-
-    if (radius == 0) {
-        fragColor = texture(u_image0, v_texCoord);
-        return;
-    }
-
-    // Edge threshold → color sigma
-    // Squared curve for better low-end control
-    float t = clamp(u_float1, 0.0, 100.0) / 100.0;
-    t *= t;
-    float sigmaColor = mix(0.01, 0.5, t);
-
-    // Spatial sigma tied to radius
-    float sigmaSpatial = max(radiusF * 0.75, 0.5);
-
-    fragColor = bilateralFilter(
-        v_texCoord,
-        texelSize,
-        radius,
-        sigmaSpatial,
-        sigmaColor
-    );
-}
--- a/blueprints/.glsl/Film_Grain_15.frag
+++ b/blueprints/.glsl/Film_Grain_15.frag
@@ -1,124 +0,0 @@
-#version 300 es
-precision highp float;
-
-uniform sampler2D u_image0;
-uniform vec2 u_resolution;
-uniform float u_float0; // grain amount      [0.0 – 1.0]   typical: 0.2–0.8
-uniform float u_float1; // grain size        [0.3 – 3.0]   lower = finer grain
-uniform float u_float2; // color amount      [0.0 – 1.0]   0 = monochrome, 1 = RGB grain
-uniform float u_float3; // luminance bias    [0.0 – 1.0]   0 = uniform, 1 = shadows only
-uniform int   u_int0;   // noise mode        [0 or 1]      0 = smooth, 1 = grainy
-
-in vec2 v_texCoord;
-layout(location = 0) out vec4 fragColor0;
-
-// High-quality integer hash (pcg-like)
-uint pcg(uint v) {
-    uint state = v * 747796405u + 2891336453u;
-    uint word = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u;
-    return (word >> 22u) ^ word;
-}
-
-// 2D -> 1D hash input
-uint hash2d(uvec2 p) {
-    return pcg(p.x + pcg(p.y));
-}
-
-// Hash to float [0, 1]
-float hashf(uvec2 p) {
-    return float(hash2d(p)) / float(0xffffffffu);
-}
-
-// Hash to float with offset (for RGB channels)
-float hashf(uvec2 p, uint offset) {
-    return float(pcg(hash2d(p) + offset)) / float(0xffffffffu);
-}
-
-// Convert uniform [0,1] to roughly Gaussian distribution
-// Using simple approximation: average of multiple samples
-float toGaussian(uvec2 p) {
-    float sum = hashf(p, 0u) + hashf(p, 1u) + hashf(p, 2u) + hashf(p, 3u);
-    return (sum - 2.0) * 0.7;  // Centered, scaled
-}
-
-float toGaussian(uvec2 p, uint offset) {
-    float sum = hashf(p, offset) + hashf(p, offset + 1u) 
-              + hashf(p, offset + 2u) + hashf(p, offset + 3u);
-    return (sum - 2.0) * 0.7;
-}
-
-// Smooth noise with better interpolation
-float smoothNoise(vec2 p) {
-    vec2 i = floor(p);
-    vec2 f = fract(p);
-    
-    // Quintic interpolation (less banding than cubic)
-    f = f * f * f * (f * (f * 6.0 - 15.0) + 10.0);
-    
-    uvec2 ui = uvec2(i);
-    float a = toGaussian(ui);
-    float b = toGaussian(ui + uvec2(1u, 0u));
-    float c = toGaussian(ui + uvec2(0u, 1u));
-    float d = toGaussian(ui + uvec2(1u, 1u));
-    
-    return mix(mix(a, b, f.x), mix(c, d, f.x), f.y);
-}
-
-float smoothNoise(vec2 p, uint offset) {
-    vec2 i = floor(p);
-    vec2 f = fract(p);
-    
-    f = f * f * f * (f * (f * 6.0 - 15.0) + 10.0);
-    
-    uvec2 ui = uvec2(i);
-    float a = toGaussian(ui, offset);
-    float b = toGaussian(ui + uvec2(1u, 0u), offset);
-    float c = toGaussian(ui + uvec2(0u, 1u), offset);
-    float d = toGaussian(ui + uvec2(1u, 1u), offset);
-    
-    return mix(mix(a, b, f.x), mix(c, d, f.x), f.y);
-}
-
-void main() {
-    vec4 color = texture(u_image0, v_texCoord);
-    
-    // Luminance (Rec.709)
-    float luma = dot(color.rgb, vec3(0.2126, 0.7152, 0.0722));
-    
-    // Grain UV (resolution-independent)
-    vec2 grainUV = v_texCoord * u_resolution / max(u_float1, 0.01);
-    uvec2 grainPixel = uvec2(grainUV);
-    
-    float g;
-    vec3 grainRGB;
-    
-    if (u_int0 == 1) {
-        // Grainy mode: pure hash noise (no interpolation = no banding)
-        g = toGaussian(grainPixel);
-        grainRGB = vec3(
-            toGaussian(grainPixel, 100u),
-            toGaussian(grainPixel, 200u),
-            toGaussian(grainPixel, 300u)
-        );
-    } else {
-        // Smooth mode: interpolated with quintic curve
-        g = smoothNoise(grainUV);
-        grainRGB = vec3(
-            smoothNoise(grainUV, 100u),
-            smoothNoise(grainUV, 200u),
-            smoothNoise(grainUV, 300u)
-        );
-    }
-    
-    // Luminance weighting (less grain in highlights)
-    float lumWeight = mix(1.0, 1.0 - luma, clamp(u_float3, 0.0, 1.0));
-    
-    // Strength
-    float strength = u_float0 * 0.15;
-    
-    // Color vs monochrome grain
-    vec3 grainColor = mix(vec3(g), grainRGB, clamp(u_float2, 0.0, 1.0));
-    
-    color.rgb += grainColor * strength * lumWeight;
-    fragColor0 = vec4(clamp(color.rgb, 0.0, 1.0), color.a);
-}
--- a/blueprints/.glsl/Glow_30.frag
+++ b/blueprints/.glsl/Glow_30.frag
@@ -1,133 +0,0 @@
-#version 300 es
-precision mediump float;
-
-uniform sampler2D u_image0;
-uniform vec2 u_resolution;
-uniform int u_int0;      // Blend mode
-uniform int u_int1;      // Color tint
-uniform float u_float0;  // Intensity
-uniform float u_float1;  // Radius
-uniform float u_float2;  // Threshold
-
-in vec2 v_texCoord;
-out vec4 fragColor;
-
-const int BLEND_ADD      = 0;
-const int BLEND_SCREEN   = 1;
-const int BLEND_SOFT     = 2;
-const int BLEND_OVERLAY  = 3;
-const int BLEND_LIGHTEN  = 4;
-
-const float GOLDEN_ANGLE = 2.39996323;
-const int MAX_SAMPLES = 48;
-const vec3 LUMA = vec3(0.299, 0.587, 0.114);
-
-float hash(vec2 p) {
-    p = fract(p * vec2(123.34, 456.21));
-    p += dot(p, p + 45.32);
-    return fract(p.x * p.y);
-}
-
-vec3 hexToRgb(int h) {
-    return vec3(
-        float((h >> 16) & 255),
-        float((h >> 8) & 255),
-        float(h & 255)
-    ) * (1.0 / 255.0);
-}
-
-vec3 blend(vec3 base, vec3 glow, int mode) {
-    if (mode == BLEND_SCREEN) {
-        return 1.0 - (1.0 - base) * (1.0 - glow);
-    }
-    if (mode == BLEND_SOFT) {
-        return mix(
-            base - (1.0 - 2.0 * glow) * base * (1.0 - base),
-            base + (2.0 * glow - 1.0) * (sqrt(base) - base),
-            step(0.5, glow)
-        );
-    }
-    if (mode == BLEND_OVERLAY) {
-        return mix(
-            2.0 * base * glow,
-            1.0 - 2.0 * (1.0 - base) * (1.0 - glow),
-            step(0.5, base)
-        );
-    }
-    if (mode == BLEND_LIGHTEN) {
-        return max(base, glow);
-    }
-    return base + glow;
-}
-
-void main() {
-    vec4 original = texture(u_image0, v_texCoord);
-    
-    float intensity = u_float0 * 0.05;
-    float radius = u_float1 * u_float1 * 0.012;
-    
-    if (intensity < 0.001 || radius < 0.1) {
-        fragColor = original;
-        return;
-    }
-    
-    float threshold = 1.0 - u_float2 * 0.01;
-    float t0 = threshold - 0.15;
-    float t1 = threshold + 0.15;
-    
-    vec2 texelSize = 1.0 / u_resolution;
-    float radius2 = radius * radius;
-    
-    float sampleScale = clamp(radius * 0.75, 0.35, 1.0);
-    int samples = int(float(MAX_SAMPLES) * sampleScale);
-    
-    float noise = hash(gl_FragCoord.xy);
-    float angleOffset = noise * GOLDEN_ANGLE;
-    float radiusJitter = 0.85 + noise * 0.3;
-    
-    float ca = cos(GOLDEN_ANGLE);
-    float sa = sin(GOLDEN_ANGLE);
-    vec2 dir = vec2(cos(angleOffset), sin(angleOffset));
-    
-    vec3 glow = vec3(0.0);
-    float totalWeight = 0.0;
-    
-    // Center tap
-    float centerMask = smoothstep(t0, t1, dot(original.rgb, LUMA));
-    glow += original.rgb * centerMask * 2.0;
-    totalWeight += 2.0;
-    
-    for (int i = 1; i < MAX_SAMPLES; i++) {
-        if (i >= samples) break;
-        
-        float fi = float(i);
-        float dist = sqrt(fi / float(samples)) * radius * radiusJitter;
-        
-        vec2 offset = dir * dist * texelSize;
-        vec3 c = texture(u_image0, v_texCoord + offset).rgb;
-        float mask = smoothstep(t0, t1, dot(c, LUMA));
-        
-        float w = 1.0 - (dist * dist) / (radius2 * 1.5);
-        w = max(w, 0.0);
-        w *= w;
-        
-        glow += c * mask * w;
-        totalWeight += w;
-        
-        dir = vec2(
-            dir.x * ca - dir.y * sa,
-            dir.x * sa + dir.y * ca
-        );
-    }
-    
-    glow *= intensity / max(totalWeight, 0.001);
-    
-    if (u_int1 > 0) {
-        glow *= hexToRgb(u_int1);
-    }
-    
-    vec3 result = blend(original.rgb, glow, u_int0);
-    result += (noise - 0.5) * (1.0 / 255.0);
-    
-    fragColor = vec4(clamp(result, 0.0, 1.0), original.a);
-}
--- a/blueprints/.glsl/Hue_and_Saturation_1.frag
+++ b/blueprints/.glsl/Hue_and_Saturation_1.frag
@@ -1,222 +0,0 @@
-#version 300 es
-precision highp float;
-
-uniform sampler2D u_image0;
-uniform int u_int0;      // Mode: 0=Master, 1=Reds, 2=Yellows, 3=Greens, 4=Cyans, 5=Blues, 6=Magentas, 7=Colorize
-uniform int u_int1;      // Color Space: 0=HSL, 1=HSB/HSV
-uniform float u_float0;  // Hue (-180 to 180)
-uniform float u_float1;  // Saturation (-100 to 100)
-uniform float u_float2;  // Lightness/Brightness (-100 to 100)
-uniform float u_float3;  // Overlap (0 to 100) - feathering between adjacent color ranges
-
-in vec2 v_texCoord;
-out vec4 fragColor;
-
-// Color range modes
-const int MODE_MASTER   = 0;
-const int MODE_RED      = 1;
-const int MODE_YELLOW   = 2;
-const int MODE_GREEN    = 3;
-const int MODE_CYAN     = 4;
-const int MODE_BLUE     = 5;
-const int MODE_MAGENTA  = 6;
-const int MODE_COLORIZE = 7;
-
-// Color space modes
-const int COLORSPACE_HSL = 0;
-const int COLORSPACE_HSB = 1;
-
-const float EPSILON = 0.0001;
-
-//=============================================================================
-// RGB <-> HSL Conversions
-//=============================================================================
-
-vec3 rgb2hsl(vec3 c) {
-    float maxC = max(max(c.r, c.g), c.b);
-    float minC = min(min(c.r, c.g), c.b);
-    float delta = maxC - minC;
-
-    float h = 0.0;
-    float s = 0.0;
-    float l = (maxC + minC) * 0.5;
-
-    if (delta > EPSILON) {
-        s = l < 0.5
-            ? delta / (maxC + minC)
-            : delta / (2.0 - maxC - minC);
-
-        if (maxC == c.r) {
-            h = (c.g - c.b) / delta + (c.g < c.b ? 6.0 : 0.0);
-        } else if (maxC == c.g) {
-            h = (c.b - c.r) / delta + 2.0;
-        } else {
-            h = (c.r - c.g) / delta + 4.0;
-        }
-        h /= 6.0;
-    }
-
-    return vec3(h, s, l);
-}
-
-float hue2rgb(float p, float q, float t) {
-    t = fract(t);
-    if (t < 1.0/6.0) return p + (q - p) * 6.0 * t;
-    if (t < 0.5)       return q;
-    if (t < 2.0/3.0)   return p + (q - p) * (2.0/3.0 - t) * 6.0;
-    return p;
-}
-
-vec3 hsl2rgb(vec3 hsl) {
-    if (hsl.y < EPSILON) return vec3(hsl.z);
-
-    float q = hsl.z < 0.5
-        ? hsl.z * (1.0 + hsl.y)
-        : hsl.z + hsl.y - hsl.z * hsl.y;
-    float p = 2.0 * hsl.z - q;
-
-    return vec3(
-        hue2rgb(p, q, hsl.x + 1.0/3.0),
-        hue2rgb(p, q, hsl.x),
-        hue2rgb(p, q, hsl.x - 1.0/3.0)
-    );
-}
-
-vec3 rgb2hsb(vec3 c) {
-    float maxC = max(max(c.r, c.g), c.b);
-    float minC = min(min(c.r, c.g), c.b);
-    float delta = maxC - minC;
-
-    float h = 0.0;
-    float s = (maxC > EPSILON) ? delta / maxC : 0.0;
-    float b = maxC;
-
-    if (delta > EPSILON) {
-        if (maxC == c.r) {
-            h = (c.g - c.b) / delta + (c.g < c.b ? 6.0 : 0.0);
-        } else if (maxC == c.g) {
-            h = (c.b - c.r) / delta + 2.0;
-        } else {
-            h = (c.r - c.g) / delta + 4.0;
-        }
-        h /= 6.0;
-    }
-
-    return vec3(h, s, b);
-}
-
-vec3 hsb2rgb(vec3 hsb) {
-    vec3 rgb = clamp(abs(mod(hsb.x * 6.0 + vec3(0.0, 4.0, 2.0), 6.0) - 3.0) - 1.0, 0.0, 1.0);
-    return hsb.z * mix(vec3(1.0), rgb, hsb.y);
-}
-
-//=============================================================================
-// Color Range Weight Calculation
-//=============================================================================
-
-float hueDistance(float a, float b) {
-    float d = abs(a - b);
-    return min(d, 1.0 - d);
-}
-
-float getHueWeight(float hue, float center, float overlap) {
-    float baseWidth = 1.0 / 6.0;
-    float feather = baseWidth * overlap;
-
-    float d = hueDistance(hue, center);
-
-    float inner = baseWidth * 0.5;
-    float outer = inner + feather;
-
-    return 1.0 - smoothstep(inner, outer, d);
-}
-
-float getModeWeight(float hue, int mode, float overlap) {
-    if (mode == MODE_MASTER || mode == MODE_COLORIZE) return 1.0;
-
-    if (mode == MODE_RED) {
-        return max(
-            getHueWeight(hue, 0.0, overlap),
-            getHueWeight(hue, 1.0, overlap)
-        );
-    }
-
-    float center = float(mode - 1) / 6.0;
-    return getHueWeight(hue, center, overlap);
-}
-
-//=============================================================================
-// Adjustment Functions
-//=============================================================================
-
-float adjustLightness(float l, float amount) {
-    return amount > 0.0
-        ? l + (1.0 - l) * amount
-        : l + l * amount;
-}
-
-float adjustBrightness(float b, float amount) {
-    return clamp(b + amount, 0.0, 1.0);
-}
-
-float adjustSaturation(float s, float amount) {
-    return amount > 0.0
-        ? s + (1.0 - s) * amount
-        : s + s * amount;
-}
-
-vec3 colorize(vec3 rgb, float hue, float sat, float light) {
-    float lum = dot(rgb, vec3(0.299, 0.587, 0.114));
-    float l = adjustLightness(lum, light);
-
-    vec3 hsl = vec3(fract(hue), clamp(abs(sat), 0.0, 1.0), clamp(l, 0.0, 1.0));
-    return hsl2rgb(hsl);
-}
-
-//=============================================================================
-// Main
-//=============================================================================
-
-void main() {
-    vec4 original = texture(u_image0, v_texCoord);
-
-    float hueShift   = u_float0 / 360.0;   // -180..180 -> -0.5..0.5
-    float satAmount  = u_float1 / 100.0;   // -100..100 -> -1..1
-    float lightAmount= u_float2 / 100.0;   // -100..100 -> -1..1
-    float overlap    = u_float3 / 100.0;   // 0..100 -> 0..1
-
-    vec3 result;
-
-    if (u_int0 == MODE_COLORIZE) {
-        result = colorize(original.rgb, hueShift, satAmount, lightAmount);
-        fragColor = vec4(result, original.a);
-        return;
-    }
-
-    vec3 hsx = (u_int1 == COLORSPACE_HSL)
-        ? rgb2hsl(original.rgb)
-        : rgb2hsb(original.rgb);
-
-    float weight = getModeWeight(hsx.x, u_int0, overlap);
-
-    if (u_int0 != MODE_MASTER && hsx.y < EPSILON) {
-        weight = 0.0;
-    }
-
-    if (weight > EPSILON) {
-        float h = fract(hsx.x + hueShift * weight);
-        float s = clamp(adjustSaturation(hsx.y, satAmount * weight), 0.0, 1.0);
-        float v = (u_int1 == COLORSPACE_HSL)
-            ? clamp(adjustLightness(hsx.z, lightAmount * weight), 0.0, 1.0)
-            : clamp(adjustBrightness(hsx.z, lightAmount * weight), 0.0, 1.0);
-
-        vec3 adjusted = vec3(h, s, v);
-        result = (u_int1 == COLORSPACE_HSL)
-            ? hsl2rgb(adjusted)
-            : hsb2rgb(adjusted);
-    } else {
-        result = original.rgb;
-    }
-
-    fragColor = vec4(result, original.a);
-}
--- a/blueprints/.glsl/Image_Blur_1.frag
+++ b/blueprints/.glsl/Image_Blur_1.frag
@@ -1,111 +0,0 @@
-#version 300 es
-#pragma passes 2
-precision highp float;
-
-// Blur type constants
-const int BLUR_GAUSSIAN = 0;
-const int BLUR_BOX = 1;
-const int BLUR_RADIAL = 2;
-
-// Radial blur config
-const int RADIAL_SAMPLES = 12;
-const float RADIAL_STRENGTH = 0.0003;
-
-uniform sampler2D u_image0;
-uniform vec2 u_resolution;
-uniform int u_int0;      // Blur type (BLUR_GAUSSIAN, BLUR_BOX, BLUR_RADIAL)
-uniform float u_float0;  // Blur radius/amount
-uniform int u_pass;      // Pass index (0 = horizontal, 1 = vertical)
-
-in vec2 v_texCoord;
-layout(location = 0) out vec4 fragColor0;
-
-float gaussian(float x, float sigma) {
-    return exp(-(x * x) / (2.0 * sigma * sigma));
-}
-
-void main() {
-    vec2 texelSize = 1.0 / u_resolution;
-    float radius = max(u_float0, 0.0);
-
-    // Radial (angular) blur - single pass, doesn't use separable
-    if (u_int0 == BLUR_RADIAL) {
-        // Only execute on first pass
-        if (u_pass > 0) {
-            fragColor0 = texture(u_image0, v_texCoord);
-            return;
-        }
-
-        vec2 center = vec2(0.5);
-        vec2 dir = v_texCoord - center;
-        float dist = length(dir);
-
-        if (dist < 1e-4) {
-            fragColor0 = texture(u_image0, v_texCoord);
-            return;
-        }
-
-        vec4 sum = vec4(0.0);
-        float totalWeight = 0.0;
-        float angleStep = radius * RADIAL_STRENGTH;
-
-        dir /= dist;
-
-        float cosStep = cos(angleStep);
-        float sinStep = sin(angleStep);
-
-        float negAngle = -float(RADIAL_SAMPLES) * angleStep;
-        vec2 rotDir = vec2(
-            dir.x * cos(negAngle) - dir.y * sin(negAngle),
-            dir.x * sin(negAngle) + dir.y * cos(negAngle)
-        );
-
-        for (int i = -RADIAL_SAMPLES; i <= RADIAL_SAMPLES; i++) {
-            vec2 uv = center + rotDir * dist;
-            float w = 1.0 - abs(float(i)) / float(RADIAL_SAMPLES);
-            sum += texture(u_image0, uv) * w;
-            totalWeight += w;
-
-            rotDir = vec2(
-                rotDir.x * cosStep - rotDir.y * sinStep,
-                rotDir.x * sinStep + rotDir.y * cosStep
-            );
-        }
-
-        fragColor0 = sum / max(totalWeight, 0.001);
-        return;
-    }
-
-    // Separable Gaussian / Box blur
-    int samples = int(ceil(radius));
-
-    if (samples == 0) {
-        fragColor0 = texture(u_image0, v_texCoord);
-        return;
-    }
-
-    // Direction: pass 0 = horizontal, pass 1 = vertical
-    vec2 dir = (u_pass == 0) ? vec2(1.0, 0.0) : vec2(0.0, 1.0);
-
-    vec4 color = vec4(0.0);
-    float totalWeight = 0.0;
-    float sigma = radius / 2.0;
-
-    for (int i = -samples; i <= samples; i++) {
-        vec2 offset = dir * float(i) * texelSize;
-        vec4 sample_color = texture(u_image0, v_texCoord + offset);
-
-        float weight;
-        if (u_int0 == BLUR_GAUSSIAN) {
-            weight = gaussian(float(i), sigma);
-        } else {
-            // BLUR_BOX
-            weight = 1.0;
-        }
-
-        color += sample_color * weight;
-        totalWeight += weight;
-    }
-
-    fragColor0 = color / totalWeight;
-}
--- a/blueprints/.glsl/Image_Channels_23.frag
+++ b/blueprints/.glsl/Image_Channels_23.frag
@@ -1,19 +0,0 @@
-#version 300 es
-precision highp float;
-
-uniform sampler2D u_image0;
-
-in vec2 v_texCoord;
-layout(location = 0) out vec4 fragColor0;
-layout(location = 1) out vec4 fragColor1;
-layout(location = 2) out vec4 fragColor2;
-layout(location = 3) out vec4 fragColor3;
-
-void main() {
-  vec4 color = texture(u_image0, v_texCoord);
-  // Output each channel as grayscale to separate render targets
-  fragColor0 = vec4(vec3(color.r), 1.0);  // Red channel
-  fragColor1 = vec4(vec3(color.g), 1.0);  // Green channel
-  fragColor2 = vec4(vec3(color.b), 1.0);  // Blue channel
-  fragColor3 = vec4(vec3(color.a), 1.0);  // Alpha channel
-}
--- a/blueprints/.glsl/Image_Levels_1.frag
+++ b/blueprints/.glsl/Image_Levels_1.frag
@@ -1,71 +0,0 @@
-#version 300 es
-precision highp float;
-
-// Levels Adjustment
-// u_int0:   channel      (0=RGB, 1=R, 2=G, 3=B)         default: 0
-// u_float0: input black  (0-255)                        default: 0
-// u_float1: input white  (0-255)                        default: 255
-// u_float2: gamma        (0.01-9.99)                    default: 1.0
-// u_float3: output black (0-255)                        default: 0
-// u_float4: output white (0-255)                        default: 255
-
-uniform sampler2D u_image0;
-uniform int u_int0;
-uniform float u_float0;
-uniform float u_float1;
-uniform float u_float2;
-uniform float u_float3;
-uniform float u_float4;
-
-in vec2 v_texCoord;
-out vec4 fragColor;
-
-vec3 applyLevels(vec3 color, float inBlack, float inWhite, float gamma, float outBlack, float outWhite) {
-    float inRange = max(inWhite - inBlack, 0.0001);
-    vec3 result = clamp((color - inBlack) / inRange, 0.0, 1.0);
-    result = pow(result, vec3(1.0 / gamma));
-    result = mix(vec3(outBlack), vec3(outWhite), result);
-    return result;
-}
-
-float applySingleChannel(float value, float inBlack, float inWhite, float gamma, float outBlack, float outWhite) {
-    float inRange = max(inWhite - inBlack, 0.0001);
-    float result = clamp((value - inBlack) / inRange, 0.0, 1.0);
-    result = pow(result, 1.0 / gamma);
-    result = mix(outBlack, outWhite, result);
-    return result;
-}
-
-void main() {
-    vec4 texColor = texture(u_image0, v_texCoord);
-    vec3 color = texColor.rgb;
-    
-    float inBlack = u_float0 / 255.0;
-    float inWhite = u_float1 / 255.0;
-    float gamma = u_float2;
-    float outBlack = u_float3 / 255.0;
-    float outWhite = u_float4 / 255.0;
-    
-    vec3 result;
-    
-    if (u_int0 == 0) {
-        result = applyLevels(color, inBlack, inWhite, gamma, outBlack, outWhite);
-    }
-    else if (u_int0 == 1) {
-        result = color;
-        result.r = applySingleChannel(color.r, inBlack, inWhite, gamma, outBlack, outWhite);
-    }
-    else if (u_int0 == 2) {
-        result = color;
-        result.g = applySingleChannel(color.g, inBlack, inWhite, gamma, outBlack, outWhite);
-    }
-    else if (u_int0 == 3) {
-        result = color;
-        result.b = applySingleChannel(color.b, inBlack, inWhite, gamma, outBlack, outWhite);
-    }
-    else {
-        result = color;
-    }
-    
-    fragColor = vec4(result, texColor.a);
-}
--- a/blueprints/.glsl/README.md
+++ b/blueprints/.glsl/README.md
@@ -1,28 +0,0 @@
-# GLSL Shader Sources
-
-This folder contains the GLSL fragment shaders extracted from blueprint JSON files for easier editing and version control.
-
-## File Naming Convention
-
-`{Blueprint_Name}_{node_id}.frag`
-
- **Blueprint_Name**: The JSON filename with spaces/special chars replaced by underscores
- **node_id**: The GLSLShader node ID within the subgraph
-
-## Usage
-
-```bash
-# Extract shaders from blueprint JSONs to this folder
-python update_blueprints.py extract
-
-# Patch edited shaders back into blueprint JSONs
-python update_blueprints.py patch
-```
-
-## Workflow
-
-1. Run `extract` to pull current shaders from JSONs
-2. Edit `.frag` files
-3. Run `patch` to update the blueprint JSONs
-4. Test
-5. Commit both `.frag` files and updated JSONs
--- a/blueprints/.glsl/Sharpen_23.frag
+++ b/blueprints/.glsl/Sharpen_23.frag
@@ -1,28 +0,0 @@
-#version 300 es
-precision highp float;
-
-uniform sampler2D u_image0;
-uniform vec2 u_resolution;
-uniform float u_float0;  // strength [0.0 – 2.0] typical: 0.3–1.0
-
-in vec2 v_texCoord;
-layout(location = 0) out vec4 fragColor0;
-
-void main() {
-    vec2 texel = 1.0 / u_resolution;
-    
-    // Sample center and neighbors
-    vec4 center = texture(u_image0, v_texCoord);
-    vec4 top    = texture(u_image0, v_texCoord + vec2( 0.0, -texel.y));
-    vec4 bottom = texture(u_image0, v_texCoord + vec2( 0.0,  texel.y));
-    vec4 left   = texture(u_image0, v_texCoord + vec2(-texel.x,  0.0));
-    vec4 right  = texture(u_image0, v_texCoord + vec2( texel.x,  0.0));
-    
-    // Edge enhancement (Laplacian)
-    vec4 edges = center * 4.0 - top - bottom - left - right;
-    
-    // Add edges back scaled by strength
-    vec4 sharpened = center + edges * u_float0;
-    
-    fragColor0 = vec4(clamp(sharpened.rgb, 0.0, 1.0), center.a);
-}
--- a/blueprints/.glsl/Unsharp_Mask_26.frag
+++ b/blueprints/.glsl/Unsharp_Mask_26.frag
@@ -1,61 +0,0 @@
-#version 300 es
-precision highp float;
-
-uniform sampler2D u_image0;
-uniform vec2 u_resolution;
-uniform float u_float0;  // amount    [0.0 - 3.0]  typical: 0.5-1.5
-uniform float u_float1;  // radius    [0.5 - 10.0] blur radius in pixels
-uniform float u_float2;  // threshold [0.0 - 0.1]  min difference to sharpen
-
-in vec2 v_texCoord;
-layout(location = 0) out vec4 fragColor0;
-
-float gaussian(float x, float sigma) {
-    return exp(-(x * x) / (2.0 * sigma * sigma));
-}
-
-float getLuminance(vec3 color) {
-    return dot(color, vec3(0.2126, 0.7152, 0.0722));
-}
-
-void main() {
-    vec2 texel = 1.0 / u_resolution;
-    float radius = max(u_float1, 0.5);
-    float amount = u_float0;
-    float threshold = u_float2;
-
-    vec4 original = texture(u_image0, v_texCoord);
-
-    // Gaussian blur for the "unsharp" mask
-    int samples = int(ceil(radius));
-    float sigma = radius / 2.0;
-
-    vec4 blurred = vec4(0.0);
-    float totalWeight = 0.0;
-
-    for (int x = -samples; x <= samples; x++) {
-        for (int y = -samples; y <= samples; y++) {
-            vec2 offset = vec2(float(x), float(y)) * texel;
-            vec4 sample_color = texture(u_image0, v_texCoord + offset);
-
-            float dist = length(vec2(float(x), float(y)));
-            float weight = gaussian(dist, sigma);
-            blurred += sample_color * weight;
-            totalWeight += weight;
-        }
-    }
-    blurred /= totalWeight;
-
-    // Unsharp mask = original - blurred
-    vec3 mask = original.rgb - blurred.rgb;
-
-    // Luminance-based threshold with smooth falloff
-    float lumaDelta = abs(getLuminance(original.rgb) - getLuminance(blurred.rgb));
-    float thresholdScale = smoothstep(0.0, threshold, lumaDelta);
-    mask *= thresholdScale;
-
-    // Sharpen: original + mask * amount
-    vec3 sharpened = original.rgb + mask * amount;
-
-    fragColor0 = vec4(clamp(sharpened, 0.0, 1.0), original.a);
-}
--- a/blueprints/.glsl/update_blueprints.py
+++ b/blueprints/.glsl/update_blueprints.py
@@ -1,159 +0,0 @@
-#!/usr/bin/env python3
-"""
-Shader Blueprint Updater
-
-Syncs GLSL shader files between this folder and blueprint JSON files.
-
-File naming convention:
-    {Blueprint Name}_{node_id}.frag
-
-Usage:
-    python update_blueprints.py extract   # Extract shaders from JSONs to here
-    python update_blueprints.py patch     # Patch shaders back into JSONs
-    python update_blueprints.py           # Same as patch (default)
-"""
-
-import json
-import logging
-import sys
-import re
-from pathlib import Path
-
-logging.basicConfig(level=logging.INFO, format='%(message)s')
-logger = logging.getLogger(__name__)
-
-GLSL_DIR = Path(__file__).parent
-BLUEPRINTS_DIR = GLSL_DIR.parent
-
-
-def get_blueprint_files():
-    """Get all blueprint JSON files."""
-    return sorted(BLUEPRINTS_DIR.glob("*.json"))
-
-
-def sanitize_filename(name):
-    """Convert blueprint name to safe filename."""
-    return re.sub(r'[^\w\-]', '_', name)
-
-
-def extract_shaders():
-    """Extract all shaders from blueprint JSONs to this folder."""
-    extracted = 0
-    for json_path in get_blueprint_files():
-        blueprint_name = json_path.stem
-
-        try:
-            with open(json_path, 'r') as f:
-                data = json.load(f)
-        except (json.JSONDecodeError, IOError) as e:
-            logger.warning("Skipping %s: %s", json_path.name, e)
-            continue
-
-        # Find GLSLShader nodes in subgraphs
-        for subgraph in data.get('definitions', {}).get('subgraphs', []):
-            for node in subgraph.get('nodes', []):
-                if node.get('type') == 'GLSLShader':
-                    node_id = node.get('id')
-                    widgets = node.get('widgets_values', [])
-
-                    # Find shader code (first string that looks like GLSL)
-                    for widget in widgets:
-                        if isinstance(widget, str) and widget.startswith('#version'):
-                            safe_name = sanitize_filename(blueprint_name)
-                            frag_name = f"{safe_name}_{node_id}.frag"
-                            frag_path = GLSL_DIR / frag_name
-
-                            with open(frag_path, 'w') as f:
-                                f.write(widget)
-
-                            logger.info("  Extracted: %s", frag_name)
-                            extracted += 1
-                            break
-
-    logger.info("\nExtracted %d shader(s)", extracted)
-
-
-def patch_shaders():
-    """Patch shaders from this folder back into blueprint JSONs."""
-    # Build lookup: blueprint_name -> [(node_id, shader_code), ...]
-    shader_updates = {}
-
-    for frag_path in sorted(GLSL_DIR.glob("*.frag")):
-        # Parse filename: {blueprint_name}_{node_id}.frag
-        parts = frag_path.stem.rsplit('_', 1)
-        if len(parts) != 2:
-            logger.warning("Skipping %s: invalid filename format", frag_path.name)
-            continue
-
-        blueprint_name, node_id_str = parts
-
-        try:
-            node_id = int(node_id_str)
-        except ValueError:
-            logger.warning("Skipping %s: invalid node_id", frag_path.name)
-            continue
-
-        with open(frag_path, 'r') as f:
-            shader_code = f.read()
-
-        if blueprint_name not in shader_updates:
-            shader_updates[blueprint_name] = []
-        shader_updates[blueprint_name].append((node_id, shader_code))
-
-    # Apply updates to JSON files
-    patched = 0
-    for json_path in get_blueprint_files():
-        blueprint_name = sanitize_filename(json_path.stem)
-
-        if blueprint_name not in shader_updates:
-            continue
-
-        try:
-            with open(json_path, 'r') as f:
-                data = json.load(f)
-        except (json.JSONDecodeError, IOError) as e:
-            logger.error("Error reading %s: %s", json_path.name, e)
-            continue
-
-        modified = False
-        for node_id, shader_code in shader_updates[blueprint_name]:
-            # Find the node and update
-            for subgraph in data.get('definitions', {}).get('subgraphs', []):
-                for node in subgraph.get('nodes', []):
-                    if node.get('id') == node_id and node.get('type') == 'GLSLShader':
-                        widgets = node.get('widgets_values', [])
-                        if len(widgets) > 0 and widgets[0] != shader_code:
-                            widgets[0] = shader_code
-                            modified = True
-                            logger.info("  Patched: %s (node %d)", json_path.name, node_id)
-                            patched += 1
-
-        if modified:
-            with open(json_path, 'w') as f:
-                json.dump(data, f)
-
-    if patched == 0:
-        logger.info("No changes to apply.")
-    else:
-        logger.info("\nPatched %d shader(s)", patched)
-
-
-def main():
-    if len(sys.argv) < 2:
-        command = "patch"
-    else:
-        command = sys.argv[1].lower()
-
-    if command == "extract":
-        logger.info("Extracting shaders from blueprints...")
-        extract_shaders()
-    elif command in ("patch", "update", "apply"):
-        logger.info("Patching shaders into blueprints...")
-        patch_shaders()
-    else:
-        logger.info(__doc__)
-        sys.exit(1)
-
-
-if __name__ == "__main__":
-    main()
--- a/blueprints/Brightness
+++ b/blueprints/Brightness
--- a/blueprints/Chromatic
+++ b/blueprints/Chromatic
--- a/blueprints/Color
+++ b/blueprints/Color
--- a/blueprints/Edge-Preserving
+++ b/blueprints/Edge-Preserving
--- a/blueprints/Film
+++ b/blueprints/Film
--- a/blueprints/Glow.json
+++ b/blueprints/Glow.json
--- a/Saturation.json
+++ b/Saturation.json
--- a/blueprints/Image
+++ b/blueprints/Image
--- a/blueprints/Image
+++ b/blueprints/Image
@@ -1 +0,0 @@
-{"revision": 0, "last_node_id": 29, "last_link_id": 0, "nodes": [{"id": 29, "type": "4c9d6ea4-b912-40e5-8766-6793a9758c53", "pos": [1970, -230], "size": [180, 86], "flags": {}, "order": 5, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "R", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}, {"label": "G", "localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": []}, {"label": "B", "localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": []}, {"label": "A", "localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": []}], "title": "Image Channels", "properties": {"proxyWidgets": []}, "widgets_values": []}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "4c9d6ea4-b912-40e5-8766-6793a9758c53", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 28, "lastLinkId": 39, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Image Channels", "inputNode": {"id": -10, "bounding": [1820, -185, 120, 60]}, "outputNode": {"id": -20, "bounding": [2460, -215, 120, 120]}, "inputs": [{"id": "3522932b-2d86-4a1f-a02a-cb29f3a9d7fe", "name": "images.image0", "type": "IMAGE", "linkIds": [39], "localized_name": "images.image0", "label": "image", "pos": [1920, -165]}], "outputs": [{"id": "605cb9c3-b065-4d9b-81d2-3ec331889b2b", "name": "IMAGE0", "type": "IMAGE", "linkIds": [26], "localized_name": "IMAGE0", "label": "R", "pos": [2480, -195]}, {"id": "fb44a77e-0522-43e9-9527-82e7465b3596", "name": "IMAGE1", "type": "IMAGE", "linkIds": [27], "localized_name": "IMAGE1", "label": "G", "pos": [2480, -175]}, {"id": "81460ee6-0131-402a-874f-6bf3001fc4ff", "name": "IMAGE2", "type": "IMAGE", "linkIds": [28], "localized_name": "IMAGE2", "label": "B", "pos": [2480, -155]}, {"id": "ae690246-80d4-4951-b1d9-9306d8a77417", "name": "IMAGE3", "type": "IMAGE", "linkIds": [29], "localized_name": "IMAGE3", "label": "A", "pos": [2480, -135]}], "widgets": [], "nodes": [{"id": 23, "type": "GLSLShader", "pos": [2000, -330], "size": [400, 172], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 39}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}], "outputs": [{"label": "R", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [26]}, {"label": "G", "localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": [27]}, {"label": "B", "localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": [28]}, {"label": "A", "localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": [29]}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\nlayout(location = 1) out vec4 fragColor1;\nlayout(location = 2) out vec4 fragColor2;\nlayout(location = 3) out vec4 fragColor3;\n\nvoid main() {\n  vec4 color = texture(u_image0, v_texCoord);\n  // Output each channel as grayscale to separate render targets\n  fragColor0 = vec4(vec3(color.r), 1.0);  // Red channel\n  fragColor1 = vec4(vec3(color.g), 1.0);  // Green channel\n  fragColor2 = vec4(vec3(color.b), 1.0);  // Blue channel\n  fragColor3 = vec4(vec3(color.a), 1.0);  // Alpha channel\n}\n", "from_input"]}], "groups": [], "links": [{"id": 39, "origin_id": -10, "origin_slot": 0, "target_id": 23, "target_slot": 0, "type": "IMAGE"}, {"id": 26, "origin_id": 23, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 27, "origin_id": 23, "origin_slot": 1, "target_id": -20, "target_slot": 1, "type": "IMAGE"}, {"id": 28, "origin_id": 23, "origin_slot": 2, "target_id": -20, "target_slot": 2, "type": "IMAGE"}, {"id": 29, "origin_id": 23, "origin_slot": 3, "target_id": -20, "target_slot": 3, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}}]}}
--- a/blueprints/Image
+++ b/blueprints/Image
--- a/blueprints/Sharpen.json
+++ b/blueprints/Sharpen.json
@@ -1 +0,0 @@
-{"revision":0,"last_node_id":25,"last_link_id":0,"nodes":[{"id":25,"type":"621ba4e2-22a8-482d-a369-023753198b7b","pos":[4610,-790],"size":[230,58],"flags":{},"order":4,"mode":0,"inputs":[{"label":"image","localized_name":"images.image0","name":"images.image0","type":"IMAGE","link":null}],"outputs":[{"label":"IMAGE","localized_name":"IMAGE0","name":"IMAGE0","type":"IMAGE","links":[]}],"title":"Sharpen","properties":{"proxyWidgets":[["24","value"]]},"widgets_values":[]}],"links":[],"version":0.4,"definitions":{"subgraphs":[{"id":"621ba4e2-22a8-482d-a369-023753198b7b","version":1,"state":{"lastGroupId":0,"lastNodeId":24,"lastLinkId":36,"lastRerouteId":0},"revision":0,"config":{},"name":"Sharpen","inputNode":{"id":-10,"bounding":[4090,-825,120,60]},"outputNode":{"id":-20,"bounding":[5150,-825,120,60]},"inputs":[{"id":"37011fb7-14b7-4e0e-b1a0-6a02e8da1fd7","name":"images.image0","type":"IMAGE","linkIds":[34],"localized_name":"images.image0","label":"image","pos":[4190,-805]}],"outputs":[{"id":"e9182b3f-635c-4cd4-a152-4b4be17ae4b9","name":"IMAGE0","type":"IMAGE","linkIds":[35],"localized_name":"IMAGE0","label":"IMAGE","pos":[5170,-805]}],"widgets":[],"nodes":[{"id":24,"type":"PrimitiveFloat","pos":[4280,-1240],"size":[270,58],"flags":{},"order":0,"mode":0,"inputs":[{"label":"strength","localized_name":"value","name":"value","type":"FLOAT","widget":{"name":"value"},"link":null}],"outputs":[{"localized_name":"FLOAT","name":"FLOAT","type":"FLOAT","links":[36]}],"properties":{"Node name for S&R":"PrimitiveFloat","min":0,"max":3,"precision":2,"step":0.05},"widgets_values":[0.5]},{"id":23,"type":"GLSLShader","pos":[4570,-1240],"size":[370,192],"flags":{},"order":1,"mode":0,"inputs":[{"label":"image0","localized_name":"images.image0","name":"images.image0","type":"IMAGE","link":34},{"label":"image1","localized_name":"images.image1","name":"images.image1","shape":7,"type":"IMAGE","link":null},{"label":"u_float0","localized_name":"floats.u_float0","name":"floats.u_float0","shape":7,"type":"FLOAT","link":36},{"label":"u_float1","localized_name":"floats.u_float1","name":"floats.u_float1","shape":7,"type":"FLOAT","link":null},{"label":"u_int0","localized_name":"ints.u_int0","name":"ints.u_int0","shape":7,"type":"INT","link":null},{"localized_name":"fragment_shader","name":"fragment_shader","type":"STRING","widget":{"name":"fragment_shader"},"link":null},{"localized_name":"size_mode","name":"size_mode","type":"COMFY_DYNAMICCOMBO_V3","widget":{"name":"size_mode"},"link":null}],"outputs":[{"localized_name":"IMAGE0","name":"IMAGE0","type":"IMAGE","links":[35]},{"localized_name":"IMAGE1","name":"IMAGE1","type":"IMAGE","links":null},{"localized_name":"IMAGE2","name":"IMAGE2","type":"IMAGE","links":null},{"localized_name":"IMAGE3","name":"IMAGE3","type":"IMAGE","links":null}],"properties":{"Node name for S&R":"GLSLShader"},"widgets_values":["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform vec2 u_resolution;\nuniform float u_float0;  // strength [0.0 – 2.0] typical: 0.3–1.0\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\nvoid main() {\n    vec2 texel = 1.0 / u_resolution;\n    \n    // Sample center and neighbors\n    vec4 center = texture(u_image0, v_texCoord);\n    vec4 top    = texture(u_image0, v_texCoord + vec2( 0.0, -texel.y));\n    vec4 bottom = texture(u_image0, v_texCoord + vec2( 0.0,  texel.y));\n    vec4 left   = texture(u_image0, v_texCoord + vec2(-texel.x,  0.0));\n    vec4 right  = texture(u_image0, v_texCoord + vec2( texel.x,  0.0));\n    \n    // Edge enhancement (Laplacian)\n    vec4 edges = center * 4.0 - top - bottom - left - right;\n    \n    // Add edges back scaled by strength\n    vec4 sharpened = center + edges * u_float0;\n    \n    fragColor0 = vec4(clamp(sharpened.rgb, 0.0, 1.0), center.a);\n}","from_input"]}],"groups":[],"links":[{"id":36,"origin_id":24,"origin_slot":0,"target_id":23,"target_slot":2,"type":"FLOAT"},{"id":34,"origin_id":-10,"origin_slot":0,"target_id":23,"target_slot":0,"type":"IMAGE"},{"id":35,"origin_id":23,"origin_slot":0,"target_id":-20,"target_slot":0,"type":"IMAGE"}],"extra":{"workflowRendererVersion":"LG"}}]}}
--- a/blueprints/Unsharp
+++ b/blueprints/Unsharp
--- a/blueprints/put_blueprints_here
+++ b/blueprints/put_blueprints_here
--- a/comfy/audio_encoders/audio_encoders.py
+++ b/comfy/audio_encoders/audio_encoders.py
@@ -25,11 +25,11 @@ class AudioEncoderModel():
        elif model_type == "whisper3":
            self.model = WhisperLargeV3(**model_config)
        self.model.eval()
-        self.patcher = comfy.model_patcher.CoreModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device)
+        self.patcher = comfy.model_patcher.ModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device)
        self.model_sample_rate = 16000

    def load_sd(self, sd):
-        return self.model.load_state_dict(sd, strict=False, assign=self.patcher.is_dynamic())
+        return self.model.load_state_dict(sd, strict=False)

    def get_sd(self):
        return self.model.state_dict()
--- a/comfy/checkpoint_pickle.py
+++ b/comfy/checkpoint_pickle.py
@@ -0,0 +1,13 @@
+import pickle
+
+load = pickle.load
+
+class Empty:
+    pass
+
+class Unpickler(pickle.Unpickler):
+    def find_class(self, module, name):
+        #TODO: safe unpickle
+        if module.startswith("pytorch_lightning"):
+            return Empty
+        return super().find_class(module, name)
--- a/comfy/cldm/cldm.py
+++ b/comfy/cldm/cldm.py
@@ -413,8 +413,7 @@ class ControlNet(nn.Module):
        out_middle = []

        if self.num_classes is not None:
-            if y is None:
-                raise ValueError("y is None, did you try using a controlnet for SDXL on SD1?")
+            assert y.shape[0] == x.shape[0]
            emb = emb + self.label_emb(y)

        h = x
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@@ -97,13 +97,6 @@ class LatentPreviewMethod(enum.Enum):
    Latent2RGB = "latent2rgb"
    TAESD = "taesd"

-    @classmethod
-    def from_string(cls, value: str):
-        for member in cls:
-            if member.value == value:
-                return member
-        return None
-
 parser.add_argument("--preview-method", type=LatentPreviewMethod, default=LatentPreviewMethod.NoPreviews, help="Default preview method for sampler nodes.", action=EnumAction)

 parser.add_argument("--preview-size", type=int, default=512, help="Sets the maximum preview size for sampler nodes.")
@@ -112,7 +105,6 @@ cache_group = parser.add_mutually_exclusive_group()
 cache_group.add_argument("--cache-classic", action="store_true", help="Use the old style (aggressive) caching.")
 cache_group.add_argument("--cache-lru", type=int, default=0, help="Use LRU caching with a maximum of N node results cached. May use more RAM/VRAM.")
 cache_group.add_argument("--cache-none", action="store_true", help="Reduced RAM/VRAM usage at the expense of executing every node for each run.")
-cache_group.add_argument("--cache-ram", nargs='?', const=4.0, type=float, default=0, help="Use RAM pressure caching with the specified headroom threshold. If available RAM drops below the threhold the cache remove large items to free RAM. Default 4GB")

 attn_group = parser.add_mutually_exclusive_group()
 attn_group.add_argument("--use-split-cross-attention", action="store_true", help="Use the split cross attention optimization. Ignored when xformers is used.")
@@ -128,12 +120,6 @@ upcast.add_argument("--force-upcast-attention", action="store_true", help="Force
 upcast.add_argument("--dont-upcast-attention", action="store_true", help="Disable all upcasting of attention. Should be unnecessary except for debugging.")


-parser.add_argument("--enable-manager", action="store_true", help="Enable the ComfyUI-Manager feature.")
-manager_group = parser.add_mutually_exclusive_group()
-manager_group.add_argument("--disable-manager-ui", action="store_true", help="Disables only the ComfyUI-Manager UI and endpoints. Scheduled installations and similar background tasks will still operate.")
-manager_group.add_argument("--enable-manager-legacy-ui", action="store_true", help="Enables the legacy UI of ComfyUI-Manager")
-
-
 vram_group = parser.add_mutually_exclusive_group()
 vram_group.add_argument("--gpu-only", action="store_true", help="Store and run everything (text encoders/CLIP models, etc... on the GPU).")
 vram_group.add_argument("--highvram", action="store_true", help="By default models will be unloaded to CPU memory after being used. This option keeps them in GPU memory.")
@@ -144,8 +130,9 @@ vram_group.add_argument("--cpu", action="store_true", help="To use the CPU for e

 parser.add_argument("--reserve-vram", type=float, default=None, help="Set the amount of vram in GB you want to reserve for use by your OS/other software. By default some amount is reserved depending on your OS.")

-parser.add_argument("--async-offload", nargs='?', const=2, type=int, default=None, metavar="NUM_STREAMS", help="Use async weight offloading. An optional argument controls the amount of offload streams. Default is 2. Enabled by default on Nvidia.")
-parser.add_argument("--disable-async-offload", action="store_true", help="Disable async weight offloading.")
+parser.add_argument("--async-offload", action="store_true", help="Use async weight offloading.")
+
+parser.add_argument("--flipflop-offload", action="store_true", help="Use async flipflop weight offloading for supported DiT models.")

 parser.add_argument("--force-non-blocking", action="store_true", help="Force ComfyUI to use non-blocking operations for all applicable tensors. This may improve performance on some non-Nvidia systems but can cause issues with some workflows.")

@@ -159,11 +146,8 @@ class PerformanceFeature(enum.Enum):
    Fp8MatrixMultiplication = "fp8_matrix_mult"
    CublasOps = "cublas_ops"
    AutoTune = "autotune"
-    DynamicVRAM = "dynamic_vram"

-parser.add_argument("--fast", nargs="*", type=PerformanceFeature, help="Enable some untested and potentially quality deteriorating optimizations. This is used to test new features so using it might crash your comfyui. --fast with no arguments enables everything. You can pass a list specific optimizations if you only want to enable specific ones. Current valid optimizations: {}".format(" ".join(map(lambda c: c.value, PerformanceFeature))))
-
-parser.add_argument("--disable-pinned-memory", action="store_true", help="Disable pinned memory use.")
+parser.add_argument("--fast", nargs="*", type=PerformanceFeature, help="Enable some untested and potentially quality deteriorating optimizations. --fast with no arguments enables everything. You can pass a list specific optimizations if you only want to enable specific ones. Current valid optimizations: {}".format(" ".join(map(lambda c: c.value, PerformanceFeature))))

 parser.add_argument("--mmap-torch-files", action="store_true", help="Use mmap when loading ckpt/pt files.")
 parser.add_argument("--disable-mmap", action="store_true", help="Don't use mmap when loading safetensors.")
@@ -175,14 +159,13 @@ parser.add_argument("--windows-standalone-build", action="store_true", help="Win
 parser.add_argument("--disable-metadata", action="store_true", help="Disable saving prompt metadata in files.")
 parser.add_argument("--disable-all-custom-nodes", action="store_true", help="Disable loading all custom nodes.")
 parser.add_argument("--whitelist-custom-nodes", type=str, nargs='+', default=[], help="Specify custom node folders to load even when --disable-all-custom-nodes is enabled.")
-parser.add_argument("--disable-api-nodes", action="store_true", help="Disable loading all api nodes. Also prevents the frontend from communicating with the internet.")
+parser.add_argument("--disable-api-nodes", action="store_true", help="Disable loading all api nodes.")

 parser.add_argument("--multi-user", action="store_true", help="Enables per-user storage.")

 parser.add_argument("--verbose", default='INFO', const='DEBUG', nargs="?", choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help='Set the logging level')
 parser.add_argument("--log-stdout", action="store_true", help="Send normal process output to stdout instead of stderr (default).")

-
 # The default built-in provider hosted under web/
 DEFAULT_VERSION_STRING = "comfyanonymous/ComfyUI@latest"

@@ -232,7 +215,6 @@ database_default_path = os.path.abspath(
    os.path.join(os.path.dirname(__file__), "..", "user", "comfyui.db")
 )
 parser.add_argument("--database-url", type=str, default=f"sqlite:///{database_default_path}", help="Specify the database URL, e.g. for an in-memory database you can use 'sqlite:///:memory:'.")
-parser.add_argument("--disable-assets-autoscan", action="store_true", help="Disable asset scanning on startup for database synchronization.")

 if comfy.options.args_parsing:
    args = parser.parse_args()
@@ -258,6 +240,3 @@ elif args.fast == []:
 # '--fast' is provided with a list of performance features, use that list
 else:
    args.fast = set(args.fast)
-
-def enables_dynamic_vram():
-    return PerformanceFeature.DynamicVRAM in args.fast and not args.highvram and not args.gpu_only
--- a/comfy/clip_model.py
+++ b/comfy/clip_model.py
@@ -1,59 +1,6 @@
 import torch
 from comfy.ldm.modules.attention import optimized_attention_for_device
 import comfy.ops
-import math
-
-def clip_preprocess(image, size=224, mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711], crop=True):
-    image = image[:, :, :, :3] if image.shape[3] > 3 else image
-    mean = torch.tensor(mean, device=image.device, dtype=image.dtype)
-    std = torch.tensor(std, device=image.device, dtype=image.dtype)
-    image = image.movedim(-1, 1)
-    if not (image.shape[2] == size and image.shape[3] == size):
-        if crop:
-            scale = (size / min(image.shape[2], image.shape[3]))
-            scale_size = (round(scale * image.shape[2]), round(scale * image.shape[3]))
-        else:
-            scale_size = (size, size)
-
-        image = torch.nn.functional.interpolate(image, size=scale_size, mode="bicubic", antialias=True)
-        h = (image.shape[2] - size)//2
-        w = (image.shape[3] - size)//2
-        image = image[:,:,h:h+size,w:w+size]
-    image = torch.clip((255. * image), 0, 255).round() / 255.0
-    return (image - mean.view([3,1,1])) / std.view([3,1,1])
-
-def siglip2_flex_calc_resolution(oh, ow, patch_size, max_num_patches, eps=1e-5):
-    def scale_dim(size, scale):
-        scaled = math.ceil(size * scale / patch_size) * patch_size
-        return max(patch_size, int(scaled))
-
-    # Binary search for optimal scale
-    lo, hi = eps / 10, 100.0
-    while hi - lo >= eps:
-        mid = (lo + hi) / 2
-        h, w = scale_dim(oh, mid), scale_dim(ow, mid)
-        if (h // patch_size) * (w // patch_size) <= max_num_patches:
-            lo = mid
-        else:
-            hi = mid
-
-    return scale_dim(oh, lo), scale_dim(ow, lo)
-
-def siglip2_preprocess(image, size, patch_size, num_patches, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], crop=True):
-    if size > 0:
-        return clip_preprocess(image, size=size, mean=mean, std=std, crop=crop)
-
-    image = image[:, :, :, :3] if image.shape[3] > 3 else image
-    mean = torch.tensor(mean, device=image.device, dtype=image.dtype)
-    std = torch.tensor(std, device=image.device, dtype=image.dtype)
-    image = image.movedim(-1, 1)
-
-    b, c, h, w = image.shape
-    h, w = siglip2_flex_calc_resolution(h, w, patch_size, num_patches)
-
-    image = torch.nn.functional.interpolate(image, size=(h, w), mode="bilinear", antialias=True)
-    image = torch.clip((255. * image), 0, 255).round() / 255.0
-    return (image - mean.view([3, 1, 1])) / std.view([3, 1, 1])

 class CLIPAttention(torch.nn.Module):
    def __init__(self, embed_dim, heads, dtype, device, operations):
@@ -209,27 +156,6 @@ class CLIPTextModel(torch.nn.Module):
        out = self.text_projection(x[2])
        return (x[0], x[1], out, x[2])

-def siglip2_pos_embed(embed_weight, embeds, orig_shape):
-    embed_weight_len = round(embed_weight.shape[0] ** 0.5)
-    embed_weight = comfy.ops.cast_to_input(embed_weight, embeds).movedim(1, 0).reshape(1, -1, embed_weight_len, embed_weight_len)
-    embed_weight = torch.nn.functional.interpolate(embed_weight, size=orig_shape, mode="bilinear", align_corners=False, antialias=True)
-    embed_weight = embed_weight.reshape(-1, embed_weight.shape[-2] * embed_weight.shape[-1]).movedim(0, 1)
-    return embeds + embed_weight
-
-class Siglip2Embeddings(torch.nn.Module):
-    def __init__(self, embed_dim, num_channels=3, patch_size=14, image_size=224, model_type="", num_patches=None, dtype=None, device=None, operations=None):
-        super().__init__()
-        self.patch_embedding = operations.Linear(num_channels * patch_size * patch_size, embed_dim, dtype=dtype, device=device)
-        self.position_embedding = operations.Embedding(num_patches, embed_dim, dtype=dtype, device=device)
-        self.patch_size = patch_size
-
-    def forward(self, pixel_values):
-        b, c, h, w = pixel_values.shape
-        img = pixel_values.movedim(1, -1).reshape(b, h // self.patch_size, self.patch_size, w // self.patch_size, self.patch_size, c)
-        img = img.permute(0, 1, 3, 2, 4, 5)
-        img = img.reshape(b, img.shape[1] * img.shape[2], -1)
-        img = self.patch_embedding(img)
-        return siglip2_pos_embed(self.position_embedding.weight, img, (h // self.patch_size, w // self.patch_size))

 class CLIPVisionEmbeddings(torch.nn.Module):
    def __init__(self, embed_dim, num_channels=3, patch_size=14, image_size=224, model_type="", dtype=None, device=None, operations=None):
@@ -273,11 +199,8 @@ class CLIPVision(torch.nn.Module):
        intermediate_activation = config_dict["hidden_act"]
        model_type = config_dict["model_type"]

-        if model_type in ["siglip2_vision_model"]:
-            self.embeddings = Siglip2Embeddings(embed_dim, config_dict["num_channels"], config_dict["patch_size"], config_dict["image_size"], model_type=model_type, num_patches=config_dict.get("num_patches", None), dtype=dtype, device=device, operations=operations)
-        else:
-            self.embeddings = CLIPVisionEmbeddings(embed_dim, config_dict["num_channels"], config_dict["patch_size"], config_dict["image_size"], model_type=model_type, dtype=dtype, device=device, operations=operations)
-        if model_type in ["siglip_vision_model", "siglip2_vision_model"]:
+        self.embeddings = CLIPVisionEmbeddings(embed_dim, config_dict["num_channels"], config_dict["patch_size"], config_dict["image_size"], model_type=model_type, dtype=dtype, device=device, operations=operations)
+        if model_type == "siglip_vision_model":
            self.pre_layrnorm = lambda a: a
            self.output_layernorm = True
        else:
--- a/comfy/clip_vision.py
+++ b/comfy/clip_vision.py
@@ -1,5 +1,6 @@
 from .utils import load_torch_file, transformers_convert, state_dict_prefix_replace
 import os
+import torch
 import json
 import logging

@@ -16,12 +17,28 @@ class Output:
    def __setitem__(self, key, item):
        setattr(self, key, item)

-clip_preprocess = comfy.clip_model.clip_preprocess  # Prevent some stuff from breaking, TODO: remove eventually
+def clip_preprocess(image, size=224, mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711], crop=True):
+    image = image[:, :, :, :3] if image.shape[3] > 3 else image
+    mean = torch.tensor(mean, device=image.device, dtype=image.dtype)
+    std = torch.tensor(std, device=image.device, dtype=image.dtype)
+    image = image.movedim(-1, 1)
+    if not (image.shape[2] == size and image.shape[3] == size):
+        if crop:
+            scale = (size / min(image.shape[2], image.shape[3]))
+            scale_size = (round(scale * image.shape[2]), round(scale * image.shape[3]))
+        else:
+            scale_size = (size, size)
+
+        image = torch.nn.functional.interpolate(image, size=scale_size, mode="bicubic", antialias=True)
+        h = (image.shape[2] - size)//2
+        w = (image.shape[3] - size)//2
+        image = image[:,:,h:h+size,w:w+size]
+    image = torch.clip((255. * image), 0, 255).round() / 255.0
+    return (image - mean.view([3,1,1])) / std.view([3,1,1])

 IMAGE_ENCODERS = {
    "clip_vision_model": comfy.clip_model.CLIPVisionModelProjection,
    "siglip_vision_model": comfy.clip_model.CLIPVisionModelProjection,
-    "siglip2_vision_model": comfy.clip_model.CLIPVisionModelProjection,
    "dinov2": comfy.image_encoders.dino2.Dinov2Model,
 }

@@ -33,10 +50,9 @@ class ClipVisionModel():
        self.image_size = config.get("image_size", 224)
        self.image_mean = config.get("image_mean", [0.48145466, 0.4578275, 0.40821073])
        self.image_std = config.get("image_std", [0.26862954, 0.26130258, 0.27577711])
-        self.model_type = config.get("model_type", "clip_vision_model")
-        self.config = config.copy()
-        model_class = IMAGE_ENCODERS.get(self.model_type)
-        if self.model_type == "siglip_vision_model":
+        model_type = config.get("model_type", "clip_vision_model")
+        model_class = IMAGE_ENCODERS.get(model_type)
+        if model_type == "siglip_vision_model":
            self.return_all_hidden_states = True
        else:
            self.return_all_hidden_states = False
@@ -47,26 +63,22 @@ class ClipVisionModel():
        self.model = model_class(config, self.dtype, offload_device, comfy.ops.manual_cast)
        self.model.eval()

-        self.patcher = comfy.model_patcher.CoreModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device)
+        self.patcher = comfy.model_patcher.ModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device)

    def load_sd(self, sd):
-        return self.model.load_state_dict(sd, strict=False, assign=self.patcher.is_dynamic())
+        return self.model.load_state_dict(sd, strict=False)

    def get_sd(self):
        return self.model.state_dict()

    def encode_image(self, image, crop=True):
        comfy.model_management.load_model_gpu(self.patcher)
-        if self.model_type == "siglip2_vision_model":
-            pixel_values = comfy.clip_model.siglip2_preprocess(image.to(self.load_device), size=self.image_size, patch_size=self.config.get("patch_size", 16), num_patches=self.config.get("num_patches", 256), mean=self.image_mean, std=self.image_std, crop=crop).float()
-        else:
-            pixel_values = comfy.clip_model.clip_preprocess(image.to(self.load_device), size=self.image_size, mean=self.image_mean, std=self.image_std, crop=crop).float()
+        pixel_values = clip_preprocess(image.to(self.load_device), size=self.image_size, mean=self.image_mean, std=self.image_std, crop=crop).float()
        out = self.model(pixel_values=pixel_values, intermediate_output='all' if self.return_all_hidden_states else -2)

        outputs = Output()
        outputs["last_hidden_state"] = out[0].to(comfy.model_management.intermediate_device())
        outputs["image_embeds"] = out[2].to(comfy.model_management.intermediate_device())
-        outputs["image_sizes"] = [pixel_values.shape[1:]] * pixel_values.shape[0]
        if self.return_all_hidden_states:
            all_hs = out[1].to(comfy.model_management.intermediate_device())
            outputs["penultimate_hidden_states"] = all_hs[:, -2]
@@ -113,14 +125,10 @@ def load_clipvision_from_sd(sd, prefix="", convert_keys=False):
    elif "vision_model.encoder.layers.22.layer_norm1.weight" in sd:
        embed_shape = sd["vision_model.embeddings.position_embedding.weight"].shape[0]
        if sd["vision_model.encoder.layers.0.layer_norm1.weight"].shape[0] == 1152:
-            patch_embedding_shape = sd["vision_model.embeddings.patch_embedding.weight"].shape
-            if len(patch_embedding_shape) == 2:
-                json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_siglip2_base_naflex.json")
-            else:
-                if embed_shape == 729:
-                    json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_siglip_384.json")
-                elif embed_shape == 1024:
-                    json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_siglip_512.json")
+            if embed_shape == 729:
+                json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_siglip_384.json")
+            elif embed_shape == 1024:
+                json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_siglip_512.json")
        elif embed_shape == 577:
            if "multi_modal_projector.linear_1.bias" in sd:
                json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_vitl_336_llava.json")
--- a/comfy/clip_vision_siglip2_base_naflex.json
+++ b/comfy/clip_vision_siglip2_base_naflex.json
@@ -1,14 +0,0 @@
-{
-  "num_channels": 3,
-  "hidden_act": "gelu_pytorch_tanh",
-  "hidden_size": 1152,
-  "image_size": -1,
-  "intermediate_size": 4304,
-  "model_type": "siglip2_vision_model",
-  "num_attention_heads": 16,
-  "num_hidden_layers": 27,
-  "patch_size": 16,
-  "num_patches": 256,
-  "image_mean": [0.5, 0.5, 0.5],
-  "image_std": [0.5, 0.5, 0.5]
-}
--- a/comfy/comfy_types/node_typing.py
+++ b/comfy/comfy_types/node_typing.py
@@ -236,8 +236,6 @@ class ComfyNodeABC(ABC):
    """Flags a node as experimental, informing users that it may change or not work as expected."""
    DEPRECATED: bool
    """Flags a node as deprecated, indicating to users that they should find alternatives to this node."""
-    DEV_ONLY: bool
-    """Flags a node as dev-only, hiding it from search/menus unless dev mode is enabled."""
    API_NODE: Optional[bool]
    """Flags a node as an API node. See: https://docs.comfy.org/tutorials/api-nodes/overview."""

--- a/comfy/context_windows.py
+++ b/comfy/context_windows.py
@@ -51,43 +51,32 @@ class ContextHandlerABC(ABC):


 class IndexListContextWindow(ContextWindowABC):
-    def __init__(self, index_list: list[int], dim: int=0, total_frames: int=0):
+    def __init__(self, index_list: list[int], dim: int=0):
        self.index_list = index_list
        self.context_length = len(index_list)
        self.dim = dim
-        self.total_frames = total_frames
-        self.center_ratio = (min(index_list) + max(index_list)) / (2 * total_frames)

-    def get_tensor(self, full: torch.Tensor, device=None, dim=None, retain_index_list=[]) -> torch.Tensor:
+    def get_tensor(self, full: torch.Tensor, device=None, dim=None) -> torch.Tensor:
        if dim is None:
            dim = self.dim
        if dim == 0 and full.shape[dim] == 1:
            return full
-        idx = tuple([slice(None)] * dim + [self.index_list])
-        window = full[idx]
-        if retain_index_list:
-            idx = tuple([slice(None)] * dim + [retain_index_list])
-            window[idx] = full[idx]
-        return window.to(device)
+        idx = [slice(None)] * dim + [self.index_list]
+        return full[idx].to(device)

    def add_window(self, full: torch.Tensor, to_add: torch.Tensor, dim=None) -> torch.Tensor:
        if dim is None:
            dim = self.dim
-        idx = tuple([slice(None)] * dim + [self.index_list])
+        idx = [slice(None)] * dim + [self.index_list]
        full[idx] += to_add
        return full

-    def get_region_index(self, num_regions: int) -> int:
-        region_idx = int(self.center_ratio * num_regions)
-        return min(max(region_idx, 0), num_regions - 1)
-

 class IndexListCallbacks:
    EVALUATE_CONTEXT_WINDOWS = "evaluate_context_windows"
    COMBINE_CONTEXT_WINDOW_RESULTS = "combine_context_window_results"
    EXECUTE_START = "execute_start"
    EXECUTE_CLEANUP = "execute_cleanup"
-    RESIZE_COND_ITEM = "resize_cond_item"

    def init_callbacks(self):
        return {}
@@ -105,8 +94,7 @@ class ContextFuseMethod:

 ContextResults = collections.namedtuple("ContextResults", ['window_idx', 'sub_conds_out', 'sub_conds', 'window'])
 class IndexListContextHandler(ContextHandlerABC):
-    def __init__(self, context_schedule: ContextSchedule, fuse_method: ContextFuseMethod, context_length: int=1, context_overlap: int=0, context_stride: int=1,
-                 closed_loop: bool=False, dim:int=0, freenoise: bool=False, cond_retain_index_list: list[int]=[], split_conds_to_windows: bool=False):
+    def __init__(self, context_schedule: ContextSchedule, fuse_method: ContextFuseMethod, context_length: int=1, context_overlap: int=0, context_stride: int=1, closed_loop=False, dim=0):
        self.context_schedule = context_schedule
        self.fuse_method = fuse_method
        self.context_length = context_length
@@ -115,18 +103,13 @@ class IndexListContextHandler(ContextHandlerABC):
        self.closed_loop = closed_loop
        self.dim = dim
        self._step = 0
-        self.freenoise = freenoise
-        self.cond_retain_index_list = [int(x.strip()) for x in cond_retain_index_list.split(",")] if cond_retain_index_list else []
-        self.split_conds_to_windows = split_conds_to_windows

        self.callbacks = {}

    def should_use_context(self, model: BaseModel, conds: list[list[dict]], x_in: torch.Tensor, timestep: torch.Tensor, model_options: dict[str]) -> bool:
        # for now, assume first dim is batch - should have stored on BaseModel in actual implementation
        if x_in.size(self.dim) > self.context_length:
-            logging.info(f"Using context windows {self.context_length} with overlap {self.context_overlap} for {x_in.size(self.dim)} frames.")
-            if self.cond_retain_index_list:
-                logging.info(f"Retaining original cond for indexes: {self.cond_retain_index_list}")
+            logging.info(f"Using context windows {self.context_length} for {x_in.size(self.dim)} frames.")
            return True
        return False

@@ -140,11 +123,6 @@ class IndexListContextHandler(ContextHandlerABC):
            return None
        # reuse or resize cond items to match context requirements
        resized_cond = []
-        # if multiple conds, split based on primary region
-        if self.split_conds_to_windows and len(cond_in) > 1:
-            region = window.get_region_index(len(cond_in))
-            logging.info(f"Splitting conds to windows; using region {region} for window {window.index_list[0]}-{window.index_list[-1]} with center ratio {window.center_ratio:.3f}")
-            cond_in = [cond_in[region]]
        # cond object is a list containing a dict - outer list is irrelevant, so just loop through it
        for actual_cond in cond_in:
            resized_actual_cond = actual_cond.copy()
@@ -167,38 +145,13 @@ class IndexListContextHandler(ContextHandlerABC):
                        new_cond_item = cond_item.copy()
                        # when in dictionary, look for tensors and CONDCrossAttn [comfy/conds.py] (has cond attr that is a tensor)
                        for cond_key, cond_value in new_cond_item.items():
-                            # Allow callbacks to handle custom conditioning items
-                            handled = False
-                            for callback in comfy.patcher_extension.get_all_callbacks(
-                                IndexListCallbacks.RESIZE_COND_ITEM, self.callbacks
-                            ):
-                                result = callback(cond_key, cond_value, window, x_in, device, new_cond_item)
-                                if result is not None:
-                                    new_cond_item[cond_key] = result
-                                    handled = True
-                                    break
-                            if handled:
-                                continue
                            if isinstance(cond_value, torch.Tensor):
-                                if (self.dim < cond_value.ndim and cond_value(self.dim) == x_in.size(self.dim)) or \
-                                   (cond_value.ndim < self.dim and cond_value.size(0) == x_in.size(self.dim)):
+                                if cond_value.ndim < self.dim and cond_value.size(0) == x_in.size(self.dim):
                                    new_cond_item[cond_key] = window.get_tensor(cond_value, device)
-                            # Handle audio_embed (temporal dim is 1)
-                            elif cond_key == "audio_embed" and hasattr(cond_value, "cond") and isinstance(cond_value.cond, torch.Tensor):
-                                audio_cond = cond_value.cond
-                                if audio_cond.ndim > 1 and audio_cond.size(1) == x_in.size(self.dim):
-                                    new_cond_item[cond_key] = cond_value._copy_with(window.get_tensor(audio_cond, device, dim=1))
-                            # Handle vace_context (temporal dim is 3)
-                            elif cond_key == "vace_context" and hasattr(cond_value, "cond") and isinstance(cond_value.cond, torch.Tensor):
-                                vace_cond = cond_value.cond
-                                if vace_cond.ndim >= 4 and vace_cond.size(3) == x_in.size(self.dim):
-                                    sliced_vace = window.get_tensor(vace_cond, device, dim=3, retain_index_list=self.cond_retain_index_list)
-                                    new_cond_item[cond_key] = cond_value._copy_with(sliced_vace)
                            # if has cond that is a Tensor, check if needs to be subset
                            elif hasattr(cond_value, "cond") and isinstance(cond_value.cond, torch.Tensor):
-                                if  (self.dim < cond_value.cond.ndim and cond_value.cond.size(self.dim) == x_in.size(self.dim)) or \
-                                    (cond_value.cond.ndim < self.dim and cond_value.cond.size(0) == x_in.size(self.dim)):
-                                    new_cond_item[cond_key] = cond_value._copy_with(window.get_tensor(cond_value.cond, device, retain_index_list=self.cond_retain_index_list))
+                                if cond_value.cond.ndim < self.dim and cond_value.cond.size(0) == x_in.size(self.dim):
+                                    new_cond_item[cond_key] = cond_value._copy_with(window.get_tensor(cond_value.cond, device))
                            elif cond_key == "num_video_frames": # for SVD
                                new_cond_item[cond_key] = cond_value._copy_with(cond_value.cond)
                                new_cond_item[cond_key].cond = window.context_length
@@ -211,7 +164,7 @@ class IndexListContextHandler(ContextHandlerABC):
        return resized_cond

    def set_step(self, timestep: torch.Tensor, model_options: dict[str]):
-        mask = torch.isclose(model_options["transformer_options"]["sample_sigmas"], timestep[0], rtol=0.0001)
+        mask = torch.isclose(model_options["transformer_options"]["sample_sigmas"], timestep, rtol=0.0001)
        matches = torch.nonzero(mask)
        if torch.numel(matches) == 0:
            raise Exception("No sample_sigmas matched current timestep; something went wrong.")
@@ -220,7 +173,7 @@ class IndexListContextHandler(ContextHandlerABC):
    def get_context_windows(self, model: BaseModel, x_in: torch.Tensor, model_options: dict[str]) -> list[IndexListContextWindow]:
        full_length = x_in.size(self.dim) # TODO: choose dim based on model
        context_windows = self.context_schedule.func(full_length, self, model_options)
-        context_windows = [IndexListContextWindow(window, dim=self.dim, total_frames=full_length) for window in context_windows]
+        context_windows = [IndexListContextWindow(window, dim=self.dim) for window in context_windows]
        return context_windows

    def execute(self, calc_cond_batch: Callable, model: BaseModel, conds: list[list[dict]], x_in: torch.Tensor, timestep: torch.Tensor, model_options: dict[str]):
@@ -297,8 +250,8 @@ class IndexListContextHandler(ContextHandlerABC):
                    prev_weight = (bias_total / (bias_total + bias))
                    new_weight = (bias / (bias_total + bias))
                    # account for dims of tensors
-                    idx_window = tuple([slice(None)] * self.dim + [idx])
-                    pos_window = tuple([slice(None)] * self.dim + [pos])
+                    idx_window = [slice(None)] * self.dim + [idx]
+                    pos_window = [slice(None)] * self.dim + [pos]
                    # apply new values
                    conds_final[i][idx_window] = conds_final[i][idx_window] * prev_weight + sub_conds_out[i][pos_window] * new_weight
                    biases_final[i][idx] = bias_total + bias
@@ -334,28 +287,6 @@ def create_prepare_sampling_wrapper(model: ModelPatcher):
    )


-def _sampler_sample_wrapper(executor, guider, sigmas, extra_args, callback, noise, *args, **kwargs):
-    model_options = extra_args.get("model_options", None)
-    if model_options is None:
-        raise Exception("model_options not found in sampler_sample_wrapper; this should never happen, something went wrong.")
-    handler: IndexListContextHandler = model_options.get("context_handler", None)
-    if handler is None:
-        raise Exception("context_handler not found in sampler_sample_wrapper; this should never happen, something went wrong.")
-    if not handler.freenoise:
-        return executor(guider, sigmas, extra_args, callback, noise, *args, **kwargs)
-    noise = apply_freenoise(noise, handler.dim, handler.context_length, handler.context_overlap, extra_args["seed"])
-
-    return executor(guider, sigmas, extra_args, callback, noise, *args, **kwargs)
-
-
-def create_sampler_sample_wrapper(model: ModelPatcher):
-    model.add_wrapper_with_key(
-        comfy.patcher_extension.WrappersMP.SAMPLER_SAMPLE,
-        "ContextWindows_sampler_sample",
-        _sampler_sample_wrapper
-    )
-
-
 def match_weights_to_dim(weights: list[float], x_in: torch.Tensor, dim: int, device=None) -> torch.Tensor:
    total_dims = len(x_in.shape)
    weights_tensor = torch.Tensor(weights).to(device=device)
@@ -607,29 +538,3 @@ def shift_window_to_end(window: list[int], num_frames: int):
    for i in range(len(window)):
        # 2) add end_delta to each val to slide windows to end
        window[i] = window[i] + end_delta
-
-
-# https://github.com/Kosinkadink/ComfyUI-AnimateDiff-Evolved/blob/90fb1331201a4b29488089e4fbffc0d82cc6d0a9/animatediff/sample_settings.py#L465
-def apply_freenoise(noise: torch.Tensor, dim: int, context_length: int, context_overlap: int, seed: int):
-    logging.info("Context windows: Applying FreeNoise")
-    generator = torch.Generator(device='cpu').manual_seed(seed)
-    latent_video_length = noise.shape[dim]
-    delta = context_length - context_overlap
-
-    for start_idx in range(0, latent_video_length - context_length, delta):
-        place_idx = start_idx + context_length
-
-        actual_delta = min(delta, latent_video_length - place_idx)
-        if actual_delta <= 0:
-            break
-
-        list_idx = torch.randperm(actual_delta, generator=generator, device='cpu') + start_idx
-
-        source_slice = [slice(None)] * noise.ndim
-        source_slice[dim] = list_idx
-        target_slice = [slice(None)] * noise.ndim
-        target_slice[dim] = slice(place_idx, place_idx + actual_delta)
-
-        noise[tuple(target_slice)] = noise[tuple(source_slice)]
-
-    return noise
--- a/comfy/controlnet.py
+++ b/comfy/controlnet.py
@@ -203,7 +203,7 @@ class ControlNet(ControlBase):
        self.control_model = control_model
        self.load_device = load_device
        if control_model is not None:
-            self.control_model_wrapped = comfy.model_patcher.CoreModelPatcher(self.control_model, load_device=load_device, offload_device=comfy.model_management.unet_offload_device())
+            self.control_model_wrapped = comfy.model_patcher.ModelPatcher(self.control_model, load_device=load_device, offload_device=comfy.model_management.unet_offload_device())

        self.compression_ratio = compression_ratio
        self.global_average_pooling = global_average_pooling
@@ -297,30 +297,6 @@ class ControlNet(ControlBase):
        self.model_sampling_current = None
        super().cleanup()

-
-class QwenFunControlNet(ControlNet):
-    def get_control(self, x_noisy, t, cond, batched_number, transformer_options):
-        # Fun checkpoints are more sensitive to high strengths in the generic
-        # ControlNet merge path. Use a soft response curve so strength=1.0 stays
-        # unchanged while >1 grows more gently.
-        original_strength = self.strength
-        self.strength = math.sqrt(max(self.strength, 0.0))
-        try:
-            return super().get_control(x_noisy, t, cond, batched_number, transformer_options)
-        finally:
-            self.strength = original_strength
-
-    def pre_run(self, model, percent_to_timestep_function):
-        super().pre_run(model, percent_to_timestep_function)
-        self.set_extra_arg("base_model", model.diffusion_model)
-
-    def copy(self):
-        c = QwenFunControlNet(None, global_average_pooling=self.global_average_pooling, load_device=self.load_device, manual_cast_dtype=self.manual_cast_dtype)
-        c.control_model = self.control_model
-        c.control_model_wrapped = self.control_model_wrapped
-        self.copy_to(c)
-        return c
-
 class ControlLoraOps:
    class Linear(torch.nn.Module, comfy.ops.CastWeightBiasOp):
        def __init__(self, in_features: int, out_features: int, bias: bool = True,
@@ -334,13 +310,11 @@ class ControlLoraOps:
            self.bias = None

        def forward(self, input):
-            weight, bias, offload_stream = comfy.ops.cast_bias_weight(self, input, offloadable=True)
+            weight, bias = comfy.ops.cast_bias_weight(self, input)
            if self.up is not None:
-                x = torch.nn.functional.linear(input, weight + (torch.mm(self.up.flatten(start_dim=1), self.down.flatten(start_dim=1))).reshape(self.weight.shape).type(input.dtype), bias)
+                return torch.nn.functional.linear(input, weight + (torch.mm(self.up.flatten(start_dim=1), self.down.flatten(start_dim=1))).reshape(self.weight.shape).type(input.dtype), bias)
            else:
-                x = torch.nn.functional.linear(input, weight, bias)
-            comfy.ops.uncast_bias_weight(self, weight, bias, offload_stream)
-            return x
+                return torch.nn.functional.linear(input, weight, bias)

    class Conv2d(torch.nn.Module, comfy.ops.CastWeightBiasOp):
        def __init__(
@@ -376,13 +350,12 @@ class ControlLoraOps:


        def forward(self, input):
-            weight, bias, offload_stream = comfy.ops.cast_bias_weight(self, input, offloadable=True)
+            weight, bias = comfy.ops.cast_bias_weight(self, input)
            if self.up is not None:
-                x = torch.nn.functional.conv2d(input, weight + (torch.mm(self.up.flatten(start_dim=1), self.down.flatten(start_dim=1))).reshape(self.weight.shape).type(input.dtype), bias, self.stride, self.padding, self.dilation, self.groups)
+                return torch.nn.functional.conv2d(input, weight + (torch.mm(self.up.flatten(start_dim=1), self.down.flatten(start_dim=1))).reshape(self.weight.shape).type(input.dtype), bias, self.stride, self.padding, self.dilation, self.groups)
            else:
-                x = torch.nn.functional.conv2d(input, weight, bias, self.stride, self.padding, self.dilation, self.groups)
-            comfy.ops.uncast_bias_weight(self, weight, bias, offload_stream)
-            return x
+                return torch.nn.functional.conv2d(input, weight, bias, self.stride, self.padding, self.dilation, self.groups)
+

 class ControlLora(ControlNet):
    def __init__(self, control_weights, global_average_pooling=False, model_options={}): #TODO? model_options
@@ -584,7 +557,6 @@ def load_controlnet_hunyuandit(controlnet_data, model_options={}):
 def load_controlnet_flux_xlabs_mistoline(sd, mistoline=False, model_options={}):
    model_config, operations, load_device, unet_dtype, manual_cast_dtype, offload_device = controlnet_config(sd, model_options=model_options)
    control_model = comfy.ldm.flux.controlnet.ControlNetFlux(mistoline=mistoline, operations=operations, device=offload_device, dtype=unet_dtype, **model_config.unet_config)
-    sd = model_config.process_unet_state_dict(sd)
    control_model = controlnet_load_state_dict(control_model, sd)
    extra_conds = ['y', 'guidance']
    control = ControlNet(control_model, load_device=load_device, manual_cast_dtype=manual_cast_dtype, extra_conds=extra_conds)
@@ -630,53 +602,6 @@ def load_controlnet_qwen_instantx(sd, model_options={}):
    control = ControlNet(control_model, compression_ratio=1, latent_format=latent_format, concat_mask=concat_mask, load_device=load_device, manual_cast_dtype=manual_cast_dtype, extra_conds=extra_conds)
    return control

-
-def load_controlnet_qwen_fun(sd, model_options={}):
-    load_device = comfy.model_management.get_torch_device()
-    weight_dtype = comfy.utils.weight_dtype(sd)
-    unet_dtype = model_options.get("dtype", weight_dtype)
-    manual_cast_dtype = comfy.model_management.unet_manual_cast(unet_dtype, load_device)
-
-    operations = model_options.get("custom_operations", None)
-    if operations is None:
-        operations = comfy.ops.pick_operations(unet_dtype, manual_cast_dtype, disable_fast_fp8=True)
-
-    in_features = sd["control_img_in.weight"].shape[1]
-    inner_dim = sd["control_img_in.weight"].shape[0]
-
-    block_weight = sd["control_blocks.0.attn.to_q.weight"]
-    attention_head_dim = sd["control_blocks.0.attn.norm_q.weight"].shape[0]
-    num_attention_heads = max(1, block_weight.shape[0] // max(1, attention_head_dim))
-
-    model = comfy.ldm.qwen_image.controlnet.QwenImageFunControlNetModel(
-        control_in_features=in_features,
-        inner_dim=inner_dim,
-        num_attention_heads=num_attention_heads,
-        attention_head_dim=attention_head_dim,
-        num_control_blocks=5,
-        main_model_double=60,
-        injection_layers=(0, 12, 24, 36, 48),
-        operations=operations,
-        device=comfy.model_management.unet_offload_device(),
-        dtype=unet_dtype,
-    )
-    model = controlnet_load_state_dict(model, sd)
-
-    latent_format = comfy.latent_formats.Wan21()
-    control = QwenFunControlNet(
-        model,
-        compression_ratio=1,
-        latent_format=latent_format,
-        # Fun checkpoints already expect their own 33-channel context handling.
-        # Enabling generic concat_mask injects an extra mask channel at apply-time
-        # and breaks the intended fallback packing path.
-        concat_mask=False,
-        load_device=load_device,
-        manual_cast_dtype=manual_cast_dtype,
-        extra_conds=[],
-    )
-    return control
-
 def convert_mistoline(sd):
    return comfy.utils.state_dict_prefix_replace(sd, {"single_controlnet_blocks.": "controlnet_single_blocks."})

@@ -754,8 +679,6 @@ def load_controlnet_state_dict(state_dict, model=None, model_options={}):
            return load_controlnet_qwen_instantx(controlnet_data, model_options=model_options)
        elif "controlnet_x_embedder.weight" in controlnet_data:
            return load_controlnet_flux_instantx(controlnet_data, model_options=model_options)
-    elif "control_blocks.0.after_proj.weight" in controlnet_data and "control_img_in.weight" in controlnet_data:
-        return load_controlnet_qwen_fun(controlnet_data, model_options=model_options)

    elif "controlnet_blocks.0.linear.weight" in controlnet_data: #mistoline flux
        return load_controlnet_flux_xlabs_mistoline(convert_mistoline(controlnet_data), mistoline=True, model_options=model_options)
--- a/comfy/float.py
+++ b/comfy/float.py
@@ -65,147 +65,3 @@ def stochastic_rounding(value, dtype, seed=0):
        return output

    return value.to(dtype=dtype)
-
-
-# TODO: improve this?
-def stochastic_float_to_fp4_e2m1(x, generator):
-    orig_shape = x.shape
-    sign = torch.signbit(x).to(torch.uint8)
-
-    exp = torch.floor(torch.log2(x.abs()) + 1.0).clamp(0, 3)
-    x += (torch.rand(x.size(), dtype=x.dtype, layout=x.layout, device=x.device, generator=generator) - 0.5) * (2 ** (exp - 2.0)) * 1.25
-
-    x = x.abs()
-    exp = torch.floor(torch.log2(x) + 1.1925).clamp(0, 3)
-
-    mantissa = torch.where(
-        exp > 0,
-        (x / (2.0 ** (exp - 1)) - 1.0) * 2.0,
-        (x * 2.0),
-        out=x
-    ).round().to(torch.uint8)
-    del x
-
-    exp = exp.to(torch.uint8)
-
-    fp4 = (sign << 3) | (exp << 1) | mantissa
-    del sign, exp, mantissa
-
-    fp4_flat = fp4.view(-1)
-    packed = (fp4_flat[0::2] << 4) | fp4_flat[1::2]
-    return packed.reshape(list(orig_shape)[:-1] + [-1])
-
-
-def to_blocked(input_matrix, flatten: bool = True) -> torch.Tensor:
-    """
-    Rearrange a large matrix by breaking it into blocks and applying the rearrangement pattern.
-    See:
-        https://docs.nvidia.com/cuda/cublas/index.html#d-block-scaling-factors-layout
-
-    Args:
-        input_matrix: Input tensor of shape (H, W)
-    Returns:
-        Rearranged tensor of shape (32*ceil_div(H,128), 16*ceil_div(W,4))
-    """
-
-    def ceil_div(a, b):
-        return (a + b - 1) // b
-
-    rows, cols = input_matrix.shape
-    n_row_blocks = ceil_div(rows, 128)
-    n_col_blocks = ceil_div(cols, 4)
-
-    # Calculate the padded shape
-    padded_rows = n_row_blocks * 128
-    padded_cols = n_col_blocks * 4
-
-    padded = input_matrix
-    if (rows, cols) != (padded_rows, padded_cols):
-        padded = torch.zeros(
-            (padded_rows, padded_cols),
-            device=input_matrix.device,
-            dtype=input_matrix.dtype,
-        )
-        padded[:rows, :cols] = input_matrix
-
-    # Rearrange the blocks
-    blocks = padded.view(n_row_blocks, 128, n_col_blocks, 4).permute(0, 2, 1, 3)
-    rearranged = blocks.reshape(-1, 4, 32, 4).transpose(1, 2).reshape(-1, 32, 16)
-    if flatten:
-        return rearranged.flatten()
-
-    return rearranged.reshape(padded_rows, padded_cols)
-
-
-def stochastic_round_quantize_nvfp4_block(x, per_tensor_scale, generator):
-    F4_E2M1_MAX = 6.0
-    F8_E4M3_MAX = 448.0
-
-    orig_shape = x.shape
-
-    block_size = 16
-
-    x = x.reshape(orig_shape[0], -1, block_size)
-    scaled_block_scales_fp8 = torch.clamp(((torch.amax(torch.abs(x), dim=-1)) / F4_E2M1_MAX) / per_tensor_scale.to(x.dtype), max=F8_E4M3_MAX).to(torch.float8_e4m3fn)
-    x = x / (per_tensor_scale.to(x.dtype) * scaled_block_scales_fp8.to(x.dtype)).unsqueeze(-1)
-
-    x = x.view(orig_shape).nan_to_num()
-    data_lp = stochastic_float_to_fp4_e2m1(x, generator=generator)
-    return data_lp, scaled_block_scales_fp8
-
-
-def stochastic_round_quantize_nvfp4(x, per_tensor_scale, pad_16x, seed=0):
-    def roundup(x: int, multiple: int) -> int:
-        """Round up x to the nearest multiple."""
-        return ((x + multiple - 1) // multiple) * multiple
-
-    generator = torch.Generator(device=x.device)
-    generator.manual_seed(seed)
-
-    # Handle padding
-    if pad_16x:
-        rows, cols = x.shape
-        padded_rows = roundup(rows, 16)
-        padded_cols = roundup(cols, 16)
-        if padded_rows != rows or padded_cols != cols:
-            x = torch.nn.functional.pad(x, (0, padded_cols - cols, 0, padded_rows - rows))
-
-    x, blocked_scaled = stochastic_round_quantize_nvfp4_block(x, per_tensor_scale, generator)
-    return x, to_blocked(blocked_scaled, flatten=False)
-
-
-def stochastic_round_quantize_nvfp4_by_block(x, per_tensor_scale, pad_16x, seed=0, block_size=4096 * 4096):
-    def roundup(x: int, multiple: int) -> int:
-        """Round up x to the nearest multiple."""
-        return ((x + multiple - 1) // multiple) * multiple
-
-    orig_shape = x.shape
-
-    # Handle padding
-    if pad_16x:
-        rows, cols = x.shape
-        padded_rows = roundup(rows, 16)
-        padded_cols = roundup(cols, 16)
-        if padded_rows != rows or padded_cols != cols:
-            x = torch.nn.functional.pad(x, (0, padded_cols - cols, 0, padded_rows - rows))
-            # Note: We update orig_shape because the output tensor logic below assumes x.shape matches
-            # what we want to produce. If we pad here, we want the padded output.
-            orig_shape = x.shape
-
-    orig_shape = list(orig_shape)
-
-    output_fp4 = torch.empty(orig_shape[:-1] + [orig_shape[-1] // 2], dtype=torch.uint8, device=x.device)
-    output_block = torch.empty(orig_shape[:-1] + [orig_shape[-1] // 16], dtype=torch.float8_e4m3fn, device=x.device)
-
-    generator = torch.Generator(device=x.device)
-    generator.manual_seed(seed)
-
-    num_slices = max(1, (x.numel() / block_size))
-    slice_size = max(1, (round(x.shape[0] / num_slices)))
-
-    for i in range(0, x.shape[0], slice_size):
-        fp4, block = stochastic_round_quantize_nvfp4_block(x[i: i + slice_size], per_tensor_scale, generator=generator)
-        output_fp4[i:i + slice_size].copy_(fp4)
-        output_block[i:i + slice_size].copy_(block)
-
-    return output_fp4, to_blocked(output_block, flatten=False)
--- a/comfy/hooks.py
+++ b/comfy/hooks.py
@@ -527,8 +527,7 @@ class HookKeyframeGroup:
                        if self._current_keyframe.get_effective_guarantee_steps(max_sigma) > 0:
                            break
                    # if eval_c is outside the percent range, stop looking further
-                    else:
-                        break
+                    else: break
        # update steps current context is used
        self._current_used_steps += 1
        # update current timestep this was performed on
--- a/comfy/k_diffusion/sampling.py
+++ b/comfy/k_diffusion/sampling.py
@@ -5,7 +5,7 @@ from scipy import integrate
 import torch
 from torch import nn
 import torchsde
-from tqdm.auto import tqdm
+from tqdm.auto import trange, tqdm

 from . import utils
 from . import deis
@@ -13,9 +13,6 @@ from . import sa_solver
 import comfy.model_patcher
 import comfy.model_sampling

-import comfy.memory_management
-from comfy.utils import model_trange as trange
-
 def append_zero(x):
    return torch.cat([x, x.new_zeros([1])])

@@ -77,9 +74,6 @@ def get_ancestral_step(sigma_from, sigma_to, eta=1.):

 def default_noise_sampler(x, seed=None):
    if seed is not None:
-        if x.device == torch.device("cpu"):
-            seed += 1
-
        generator = torch.Generator(device=x.device)
        generator.manual_seed(seed)
    else:
@@ -1563,13 +1557,10 @@ def sample_er_sde(model, x, sigmas, extra_args=None, callback=None, disable=None


@torch.no_grad()
-def sample_seeds_2(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, r=0.5, solver_type="phi_1"):
+def sample_seeds_2(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, r=0.5):
    """SEEDS-2 - Stochastic Explicit Exponential Derivative-free Solvers (VP Data Prediction) stage 2.
    arXiv: https://arxiv.org/abs/2305.14267 (NeurIPS 2023)
    """
-    if solver_type not in {"phi_1", "phi_2"}:
-        raise ValueError("solver_type must be 'phi_1' or 'phi_2'")
-
    extra_args = {} if extra_args is None else extra_args
    seed = extra_args.get("seed", None)
    noise_sampler = default_noise_sampler(x, seed=seed) if noise_sampler is None else noise_sampler
@@ -1609,14 +1600,8 @@ def sample_seeds_2(model, x, sigmas, extra_args=None, callback=None, disable=Non
        denoised_2 = model(x_2, sigma_s_1 * s_in, **extra_args)

        # Step 2
-        if solver_type == "phi_1":
-            denoised_d = torch.lerp(denoised, denoised_2, fac)
-            x = sigmas[i + 1] / sigmas[i] * (-h * eta).exp() * x - alpha_t * ei_h_phi_1(-h_eta) * denoised_d
-        elif solver_type == "phi_2":
-            b2 = ei_h_phi_2(-h_eta) / r
-            b1 = ei_h_phi_1(-h_eta) - b2
-            x = sigmas[i + 1] / sigmas[i] * (-h * eta).exp() * x - alpha_t * (b1 * denoised + b2 * denoised_2)
-
+        denoised_d = torch.lerp(denoised, denoised_2, fac)
+        x = sigmas[i + 1] / sigmas[i] * (-h * eta).exp() * x - alpha_t * ei_h_phi_1(-h_eta) * denoised_d
        if inject_noise:
            segment_factor = (r - 1) * h * eta
            sde_noise = sde_noise * segment_factor.exp()
@@ -1624,17 +1609,6 @@ def sample_seeds_2(model, x, sigmas, extra_args=None, callback=None, disable=Non
            x = x + sde_noise * sigmas[i + 1] * s_noise
    return x

-@torch.no_grad()
-def sample_exp_heun_2_x0(model, x, sigmas, extra_args=None, callback=None, disable=None, solver_type="phi_2"):
-    """Deterministic exponential Heun second order method in data prediction (x0) and logSNR time."""
-    return sample_seeds_2(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=0.0, s_noise=0.0, noise_sampler=None, r=1.0, solver_type=solver_type)
-
-
-@torch.no_grad()
-def sample_exp_heun_2_x0_sde(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, solver_type="phi_2"):
-    """Stochastic exponential Heun second order method in data prediction (x0) and logSNR time."""
-    return sample_seeds_2(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler, r=1.0, solver_type=solver_type)
-

@torch.no_grad()
 def sample_seeds_3(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, r_1=1./3, r_2=2./3):
@@ -1782,7 +1756,7 @@ def sample_sa_solver(model, x, sigmas, extra_args=None, callback=None, disable=F
        # Predictor
        if sigmas[i + 1] == 0:
            # Denoising step
-            x_pred = denoised
+            x = denoised
        else:
            tau_t = tau_func(sigmas[i + 1])
            curr_lambdas = lambdas[i - predictor_order_used + 1:i + 1]
@@ -1803,7 +1777,7 @@ def sample_sa_solver(model, x, sigmas, extra_args=None, callback=None, disable=F
            if tau_t > 0 and s_noise > 0:
                noise = noise_sampler(sigmas[i], sigmas[i + 1]) * sigmas[i + 1] * (-2 * tau_t ** 2 * h).expm1().neg().sqrt() * s_noise
                x_pred = x_pred + noise
-    return x_pred
+    return x


@torch.no_grad()
--- a/comfy/latent_formats.py
+++ b/comfy/latent_formats.py
@@ -6,9 +6,7 @@ class LatentFormat:
    latent_dimensions = 2
    latent_rgb_factors = None
    latent_rgb_factors_bias = None
-    latent_rgb_factors_reshape = None
    taesd_decoder_name = None
-    spacial_downscale_ratio = 8

    def process_in(self, latent):
        return latent * self.scale_factor
@@ -81,7 +79,6 @@ class SD_X4(LatentFormat):

 class SC_Prior(LatentFormat):
    latent_channels = 16
-    spacial_downscale_ratio = 42
    def __init__(self):
        self.scale_factor = 1.0
        self.latent_rgb_factors = [
@@ -104,7 +101,6 @@ class SC_Prior(LatentFormat):
        ]

 class SC_B(LatentFormat):
-    spacial_downscale_ratio = 4
    def __init__(self):
        self.scale_factor = 1.0 / 0.43
        self.latent_rgb_factors = [
@@ -182,55 +178,6 @@ class Flux(SD3):
    def process_out(self, latent):
        return (latent / self.scale_factor) + self.shift_factor

-class Flux2(LatentFormat):
-    latent_channels = 128
-    spacial_downscale_ratio = 16
-
-    def __init__(self):
-        self.latent_rgb_factors =[
-            [0.0058, 0.0113, 0.0073],
-            [0.0495, 0.0443, 0.0836],
-            [-0.0099, 0.0096, 0.0644],
-            [0.2144, 0.3009, 0.3652],
-            [0.0166, -0.0039, -0.0054],
-            [0.0157, 0.0103, -0.0160],
-            [-0.0398, 0.0902, -0.0235],
-            [-0.0052, 0.0095, 0.0109],
-            [-0.3527, -0.2712, -0.1666],
-            [-0.0301, -0.0356, -0.0180],
-            [-0.0107, 0.0078, 0.0013],
-            [0.0746, 0.0090, -0.0941],
-            [0.0156, 0.0169, 0.0070],
-            [-0.0034, -0.0040, -0.0114],
-            [0.0032, 0.0181, 0.0080],
-            [-0.0939, -0.0008, 0.0186],
-            [0.0018, 0.0043, 0.0104],
-            [0.0284, 0.0056, -0.0127],
-            [-0.0024, -0.0022, -0.0030],
-            [0.1207, -0.0026, 0.0065],
-            [0.0128, 0.0101, 0.0142],
-            [0.0137, -0.0072, -0.0007],
-            [0.0095, 0.0092, -0.0059],
-            [0.0000, -0.0077, -0.0049],
-            [-0.0465, -0.0204, -0.0312],
-            [0.0095, 0.0012, -0.0066],
-            [0.0290, -0.0034, 0.0025],
-            [0.0220, 0.0169, -0.0048],
-            [-0.0332, -0.0457, -0.0468],
-            [-0.0085, 0.0389, 0.0609],
-            [-0.0076, 0.0003, -0.0043],
-            [-0.0111, -0.0460, -0.0614],
-        ]
-
-        self.latent_rgb_factors_bias = [-0.0329, -0.0718, -0.0851]
-        self.latent_rgb_factors_reshape = lambda t: t.reshape(t.shape[0], 32, 2, 2, t.shape[-2], t.shape[-1]).permute(0, 1, 4, 2, 5, 3).reshape(t.shape[0], 32, t.shape[-2] * 2, t.shape[-1] * 2)
-
-    def process_in(self, latent):
-        return latent
-
-    def process_out(self, latent):
-        return latent
-
 class Mochi(LatentFormat):
    latent_channels = 12
    latent_dimensions = 3
@@ -276,7 +223,6 @@ class Mochi(LatentFormat):
 class LTXV(LatentFormat):
    latent_channels = 128
    latent_dimensions = 3
-    spacial_downscale_ratio = 32

    def __init__(self):
        self.latent_rgb_factors = [
@@ -412,11 +358,6 @@ class LTXV(LatentFormat):

        self.latent_rgb_factors_bias = [-0.0571, -0.1657, -0.2512]

-class LTXAV(LTXV):
-    def __init__(self):
-        self.latent_rgb_factors = None
-        self.latent_rgb_factors_bias = None
-
 class HunyuanVideo(LatentFormat):
    latent_channels = 16
    latent_dimensions = 3
@@ -441,7 +382,6 @@ class HunyuanVideo(LatentFormat):
    ]

    latent_rgb_factors_bias = [ 0.0259, -0.0192, -0.0761]
-    taesd_decoder_name = "taehv"

 class Cosmos1CV8x8x8(LatentFormat):
    latent_channels = 16
@@ -505,7 +445,7 @@ class Wan21(LatentFormat):
        ]).view(1, self.latent_channels, 1, 1, 1)


-        self.taesd_decoder_name = "lighttaew2_1"
+        self.taesd_decoder_name = None #TODO

    def process_in(self, latent):
        latents_mean = self.latents_mean.to(latent.device, latent.dtype)
@@ -520,7 +460,6 @@ class Wan21(LatentFormat):
 class Wan22(Wan21):
    latent_channels = 48
    latent_dimensions = 3
-    spacial_downscale_ratio = 16

    latent_rgb_factors = [
            [ 0.0119,  0.0103,  0.0046],
@@ -577,7 +516,6 @@ class Wan22(Wan21):

    def __init__(self):
        self.scale_factor = 1.0
-        self.taesd_decoder_name = "lighttaew2_2"
        self.latents_mean = torch.tensor([
                -0.2289, -0.0052, -0.1323, -0.2339, -0.2799, 0.0174, 0.1838, 0.1557,
                -0.1382, 0.0542, 0.2813, 0.0891, 0.1570, -0.0098, 0.0375, -0.1825,
@@ -598,7 +536,6 @@ class Wan22(Wan21):
 class HunyuanImage21(LatentFormat):
    latent_channels = 64
    latent_dimensions = 2
-    spacial_downscale_ratio = 32
    scale_factor = 0.75289

    latent_rgb_factors = [
@@ -674,68 +611,6 @@ class HunyuanImage21Refiner(LatentFormat):
    latent_dimensions = 3
    scale_factor = 1.03682

-    def process_in(self, latent):
-        out = latent * self.scale_factor
-        out = torch.cat((out[:, :, :1], out), dim=2)
-        out = out.permute(0, 2, 1, 3, 4)
-        b, f_times_2, c, h, w = out.shape
-        out = out.reshape(b, f_times_2 // 2, 2 * c, h, w)
-        out = out.permute(0, 2, 1, 3, 4).contiguous()
-        return out
-
-    def process_out(self, latent):
-        z = latent / self.scale_factor
-        z = z.permute(0, 2, 1, 3, 4)
-        b, f, c, h, w = z.shape
-        z = z.reshape(b, f, 2, c // 2, h, w)
-        z = z.permute(0, 1, 2, 3, 4, 5).reshape(b, f * 2, c // 2, h, w)
-        z = z.permute(0, 2, 1, 3, 4)
-        z = z[:, :, 1:]
-        return z
-
-class HunyuanVideo15(LatentFormat):
-    latent_rgb_factors = [
-        [ 0.0568, -0.0521, -0.0131],
-        [ 0.0014,  0.0735,  0.0326],
-        [ 0.0186,  0.0531, -0.0138],
-        [-0.0031,  0.0051,  0.0288],
-        [ 0.0110,  0.0556,  0.0432],
-        [-0.0041, -0.0023, -0.0485],
-        [ 0.0530,  0.0413,  0.0253],
-        [ 0.0283,  0.0251,  0.0339],
-        [ 0.0277, -0.0372, -0.0093],
-        [ 0.0393,  0.0944,  0.1131],
-        [ 0.0020,  0.0251,  0.0037],
-        [-0.0017,  0.0012,  0.0234],
-        [ 0.0468,  0.0436,  0.0203],
-        [ 0.0354,  0.0439, -0.0233],
-        [ 0.0090,  0.0123,  0.0346],
-        [ 0.0382,  0.0029,  0.0217],
-        [ 0.0261, -0.0300,  0.0030],
-        [-0.0088, -0.0220, -0.0283],
-        [-0.0272, -0.0121, -0.0363],
-        [-0.0664, -0.0622,  0.0144],
-        [ 0.0414,  0.0479,  0.0529],
-        [ 0.0355,  0.0612, -0.0247],
-        [ 0.0147,  0.0264,  0.0174],
-        [ 0.0438,  0.0038,  0.0542],
-        [ 0.0431, -0.0573, -0.0033],
-        [-0.0162, -0.0211, -0.0406],
-        [-0.0487, -0.0295, -0.0393],
-        [ 0.0005, -0.0109,  0.0253],
-        [ 0.0296,  0.0591,  0.0353],
-        [ 0.0119,  0.0181, -0.0306],
-        [-0.0085, -0.0362,  0.0229],
-        [ 0.0005, -0.0106,  0.0242]
-    ]
-
-    latent_rgb_factors_bias = [ 0.0456, -0.0202, -0.0644]
-    latent_channels = 32
-    latent_dimensions = 3
-    spacial_downscale_ratio = 16
-    scale_factor = 1.03682
-    taesd_decoder_name = "lighttaehy1_5"
-
 class Hunyuan3Dv2(LatentFormat):
    latent_channels = 64
    latent_dimensions = 1
@@ -755,13 +630,8 @@ class ACEAudio(LatentFormat):
    latent_channels = 8
    latent_dimensions = 2

-class ACEAudio15(LatentFormat):
-    latent_channels = 64
-    latent_dimensions = 1
-
 class ChromaRadiance(LatentFormat):
    latent_channels = 3
-    spacial_downscale_ratio = 1

    def __init__(self):
        self.latent_rgb_factors = [
--- a/comfy/ldm/ace/ace_step15.py
+++ b/comfy/ldm/ace/ace_step15.py
--- a/comfy/ldm/anima/model.py
+++ b/comfy/ldm/anima/model.py
@@ -1,214 +0,0 @@
-from comfy.ldm.cosmos.predict2 import MiniTrainDIT
-import torch
-from torch import nn
-import torch.nn.functional as F
-
-
-def rotate_half(x):
-    x1 = x[..., : x.shape[-1] // 2]
-    x2 = x[..., x.shape[-1] // 2 :]
-    return torch.cat((-x2, x1), dim=-1)
-
-
-def apply_rotary_pos_emb(x, cos, sin, unsqueeze_dim=1):
-    cos = cos.unsqueeze(unsqueeze_dim)
-    sin = sin.unsqueeze(unsqueeze_dim)
-    x_embed = (x * cos) + (rotate_half(x) * sin)
-    return x_embed
-
-
-class RotaryEmbedding(nn.Module):
-    def __init__(self, head_dim):
-        super().__init__()
-        self.rope_theta = 10000
-        inv_freq = 1.0 / (self.rope_theta ** (torch.arange(0, head_dim, 2, dtype=torch.int64).to(dtype=torch.float) / head_dim))
-        self.register_buffer("inv_freq", inv_freq, persistent=False)
-
-    @torch.no_grad()
-    def forward(self, x, position_ids):
-        inv_freq_expanded = self.inv_freq[None, :, None].float().expand(position_ids.shape[0], -1, 1).to(x.device)
-        position_ids_expanded = position_ids[:, None, :].float()
-
-        device_type = x.device.type if isinstance(x.device.type, str) and x.device.type != "mps" else "cpu"
-        with torch.autocast(device_type=device_type, enabled=False):  # Force float32
-            freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2)
-            emb = torch.cat((freqs, freqs), dim=-1)
-            cos = emb.cos()
-            sin = emb.sin()
-
-        return cos.to(dtype=x.dtype), sin.to(dtype=x.dtype)
-
-
-class Attention(nn.Module):
-    def __init__(self, query_dim, context_dim, n_heads, head_dim, device=None, dtype=None, operations=None):
-        super().__init__()
-
-        inner_dim = head_dim * n_heads
-        self.n_heads = n_heads
-        self.head_dim = head_dim
-        self.query_dim = query_dim
-        self.context_dim = context_dim
-
-        self.q_proj = operations.Linear(query_dim, inner_dim, bias=False, device=device, dtype=dtype)
-        self.q_norm = operations.RMSNorm(self.head_dim, eps=1e-6, device=device, dtype=dtype)
-
-        self.k_proj = operations.Linear(context_dim, inner_dim, bias=False, device=device, dtype=dtype)
-        self.k_norm = operations.RMSNorm(self.head_dim, eps=1e-6, device=device, dtype=dtype)
-
-        self.v_proj = operations.Linear(context_dim, inner_dim, bias=False, device=device, dtype=dtype)
-
-        self.o_proj = operations.Linear(inner_dim, query_dim, bias=False, device=device, dtype=dtype)
-
-    def forward(self, x, mask=None, context=None, position_embeddings=None, position_embeddings_context=None):
-        context = x if context is None else context
-        input_shape = x.shape[:-1]
-        q_shape = (*input_shape, self.n_heads, self.head_dim)
-        context_shape = context.shape[:-1]
-        kv_shape = (*context_shape, self.n_heads, self.head_dim)
-
-        query_states = self.q_norm(self.q_proj(x).view(q_shape)).transpose(1, 2)
-        key_states = self.k_norm(self.k_proj(context).view(kv_shape)).transpose(1, 2)
-        value_states = self.v_proj(context).view(kv_shape).transpose(1, 2)
-
-        if position_embeddings is not None:
-            assert position_embeddings_context is not None
-            cos, sin = position_embeddings
-            query_states = apply_rotary_pos_emb(query_states, cos, sin)
-            cos, sin = position_embeddings_context
-            key_states = apply_rotary_pos_emb(key_states, cos, sin)
-
-        attn_output = F.scaled_dot_product_attention(query_states, key_states, value_states, attn_mask=mask)
-
-        attn_output = attn_output.transpose(1, 2).reshape(*input_shape, -1).contiguous()
-        attn_output = self.o_proj(attn_output)
-        return attn_output
-
-    def init_weights(self):
-        torch.nn.init.zeros_(self.o_proj.weight)
-
-
-class TransformerBlock(nn.Module):
-    def __init__(self, source_dim, model_dim, num_heads=16, mlp_ratio=4.0, use_self_attn=False, layer_norm=False, device=None, dtype=None, operations=None):
-        super().__init__()
-        self.use_self_attn = use_self_attn
-
-        if self.use_self_attn:
-            self.norm_self_attn = operations.LayerNorm(model_dim, device=device, dtype=dtype) if layer_norm else operations.RMSNorm(model_dim, eps=1e-6, device=device, dtype=dtype)
-            self.self_attn = Attention(
-                query_dim=model_dim,
-                context_dim=model_dim,
-                n_heads=num_heads,
-                head_dim=model_dim//num_heads,
-                device=device,
-                dtype=dtype,
-                operations=operations,
-            )
-
-        self.norm_cross_attn = operations.LayerNorm(model_dim, device=device, dtype=dtype) if layer_norm else operations.RMSNorm(model_dim, eps=1e-6, device=device, dtype=dtype)
-        self.cross_attn = Attention(
-            query_dim=model_dim,
-            context_dim=source_dim,
-            n_heads=num_heads,
-            head_dim=model_dim//num_heads,
-            device=device,
-            dtype=dtype,
-            operations=operations,
-        )
-
-        self.norm_mlp = operations.LayerNorm(model_dim, device=device, dtype=dtype) if layer_norm else operations.RMSNorm(model_dim, eps=1e-6, device=device, dtype=dtype)
-        self.mlp = nn.Sequential(
-            operations.Linear(model_dim, int(model_dim * mlp_ratio), device=device, dtype=dtype),
-            nn.GELU(),
-            operations.Linear(int(model_dim * mlp_ratio), model_dim, device=device, dtype=dtype)
-        )
-
-    def forward(self, x, context, target_attention_mask=None, source_attention_mask=None, position_embeddings=None, position_embeddings_context=None):
-        if self.use_self_attn:
-            normed = self.norm_self_attn(x)
-            attn_out = self.self_attn(normed, mask=target_attention_mask, position_embeddings=position_embeddings, position_embeddings_context=position_embeddings)
-            x = x + attn_out
-
-        normed = self.norm_cross_attn(x)
-        attn_out = self.cross_attn(normed, mask=source_attention_mask, context=context, position_embeddings=position_embeddings, position_embeddings_context=position_embeddings_context)
-        x = x + attn_out
-
-        x = x + self.mlp(self.norm_mlp(x))
-        return x
-
-    def init_weights(self):
-        torch.nn.init.zeros_(self.mlp[2].weight)
-        self.cross_attn.init_weights()
-
-
-class LLMAdapter(nn.Module):
-    def __init__(
-            self,
-            source_dim=1024,
-            target_dim=1024,
-            model_dim=1024,
-            num_layers=6,
-            num_heads=16,
-            use_self_attn=True,
-            layer_norm=False,
-            device=None,
-            dtype=None,
-            operations=None,
-        ):
-        super().__init__()
-
-        self.embed = operations.Embedding(32128, target_dim, device=device, dtype=dtype)
-        if model_dim != target_dim:
-            self.in_proj = operations.Linear(target_dim, model_dim, device=device, dtype=dtype)
-        else:
-            self.in_proj = nn.Identity()
-        self.rotary_emb = RotaryEmbedding(model_dim//num_heads)
-        self.blocks = nn.ModuleList([
-            TransformerBlock(source_dim, model_dim, num_heads=num_heads, use_self_attn=use_self_attn, layer_norm=layer_norm, device=device, dtype=dtype, operations=operations) for _ in range(num_layers)
-        ])
-        self.out_proj = operations.Linear(model_dim, target_dim, device=device, dtype=dtype)
-        self.norm = operations.RMSNorm(target_dim, eps=1e-6, device=device, dtype=dtype)
-
-    def forward(self, source_hidden_states, target_input_ids, target_attention_mask=None, source_attention_mask=None):
-        if target_attention_mask is not None:
-            target_attention_mask = target_attention_mask.to(torch.bool)
-            if target_attention_mask.ndim == 2:
-                target_attention_mask = target_attention_mask.unsqueeze(1).unsqueeze(1)
-
-        if source_attention_mask is not None:
-            source_attention_mask = source_attention_mask.to(torch.bool)
-            if source_attention_mask.ndim == 2:
-                source_attention_mask = source_attention_mask.unsqueeze(1).unsqueeze(1)
-
-        x = self.in_proj(self.embed(target_input_ids))
-        context = source_hidden_states
-        position_ids = torch.arange(x.shape[1], device=x.device).unsqueeze(0)
-        position_ids_context = torch.arange(context.shape[1], device=x.device).unsqueeze(0)
-        position_embeddings = self.rotary_emb(x, position_ids)
-        position_embeddings_context = self.rotary_emb(x, position_ids_context)
-        for block in self.blocks:
-            x = block(x, context, target_attention_mask=target_attention_mask, source_attention_mask=source_attention_mask, position_embeddings=position_embeddings, position_embeddings_context=position_embeddings_context)
-        return self.norm(self.out_proj(x))
-
-
-class Anima(MiniTrainDIT):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.llm_adapter = LLMAdapter(device=kwargs.get("device"), dtype=kwargs.get("dtype"), operations=kwargs.get("operations"))
-
-    def preprocess_text_embeds(self, text_embeds, text_ids, t5xxl_weights=None):
-        if text_ids is not None:
-            out = self.llm_adapter(text_embeds, text_ids)
-            if t5xxl_weights is not None:
-                out = out * t5xxl_weights
-
-            if out.shape[1] < 512:
-                out = torch.nn.functional.pad(out, (0, 0, 0, 512 - out.shape[1]))
-            return out
-        else:
-            return text_embeds
-
-    def forward(self, x, timesteps, context, **kwargs):
-        t5xxl_ids = kwargs.pop("t5xxl_ids", None)
-        if t5xxl_ids is not None:
-            context = self.preprocess_text_embeds(context, t5xxl_ids, t5xxl_weights=kwargs.pop("t5xxl_weights", None))
-        return super().forward(x, timesteps, context, **kwargs)
--- a/comfy/ldm/chroma/layers.py
+++ b/comfy/ldm/chroma/layers.py
@@ -1,14 +1,15 @@
 import torch
 from torch import Tensor, nn

+from comfy.ldm.flux.math import attention
 from comfy.ldm.flux.layers import (
    MLPEmbedder,
+    RMSNorm,
+    QKNorm,
+    SelfAttention,
    ModulationOut,
 )

-# TODO: remove this in a few months
-SingleStreamBlock = None
-DoubleStreamBlock = None


 class ChromaModulationOut(ModulationOut):
@@ -28,7 +29,7 @@ class Approximator(nn.Module):
        super().__init__()
        self.in_proj = operations.Linear(in_dim, hidden_dim, bias=True, dtype=dtype, device=device)
        self.layers = nn.ModuleList([MLPEmbedder(hidden_dim, hidden_dim, dtype=dtype, device=device, operations=operations) for x in range( n_layers)])
-        self.norms = nn.ModuleList([operations.RMSNorm(hidden_dim, dtype=dtype, device=device) for x in range( n_layers)])
+        self.norms = nn.ModuleList([RMSNorm(hidden_dim, dtype=dtype, device=device, operations=operations) for x in range( n_layers)])
        self.out_proj = operations.Linear(hidden_dim, out_dim, dtype=dtype, device=device)

    @property
@@ -47,6 +48,124 @@ class Approximator(nn.Module):
        return x


+class DoubleStreamBlock(nn.Module):
+    def __init__(self, hidden_size: int, num_heads: int, mlp_ratio: float, qkv_bias: bool = False, flipped_img_txt=False, dtype=None, device=None, operations=None):
+        super().__init__()
+
+        mlp_hidden_dim = int(hidden_size * mlp_ratio)
+        self.num_heads = num_heads
+        self.hidden_size = hidden_size
+        self.img_norm1 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
+        self.img_attn = SelfAttention(dim=hidden_size, num_heads=num_heads, qkv_bias=qkv_bias, dtype=dtype, device=device, operations=operations)
+
+        self.img_norm2 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
+        self.img_mlp = nn.Sequential(
+            operations.Linear(hidden_size, mlp_hidden_dim, bias=True, dtype=dtype, device=device),
+            nn.GELU(approximate="tanh"),
+            operations.Linear(mlp_hidden_dim, hidden_size, bias=True, dtype=dtype, device=device),
+        )
+
+        self.txt_norm1 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
+        self.txt_attn = SelfAttention(dim=hidden_size, num_heads=num_heads, qkv_bias=qkv_bias, dtype=dtype, device=device, operations=operations)
+
+        self.txt_norm2 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
+        self.txt_mlp = nn.Sequential(
+            operations.Linear(hidden_size, mlp_hidden_dim, bias=True, dtype=dtype, device=device),
+            nn.GELU(approximate="tanh"),
+            operations.Linear(mlp_hidden_dim, hidden_size, bias=True, dtype=dtype, device=device),
+        )
+        self.flipped_img_txt = flipped_img_txt
+
+    def forward(self, img: Tensor, txt: Tensor, pe: Tensor, vec: Tensor, attn_mask=None, transformer_options={}):
+        (img_mod1, img_mod2), (txt_mod1, txt_mod2) = vec
+
+        # prepare image for attention
+        img_modulated = torch.addcmul(img_mod1.shift, 1 + img_mod1.scale, self.img_norm1(img))
+        img_qkv = self.img_attn.qkv(img_modulated)
+        img_q, img_k, img_v = img_qkv.view(img_qkv.shape[0], img_qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
+        img_q, img_k = self.img_attn.norm(img_q, img_k, img_v)
+
+        # prepare txt for attention
+        txt_modulated = torch.addcmul(txt_mod1.shift, 1 + txt_mod1.scale, self.txt_norm1(txt))
+        txt_qkv = self.txt_attn.qkv(txt_modulated)
+        txt_q, txt_k, txt_v = txt_qkv.view(txt_qkv.shape[0], txt_qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
+        txt_q, txt_k = self.txt_attn.norm(txt_q, txt_k, txt_v)
+
+        # run actual attention
+        attn = attention(torch.cat((txt_q, img_q), dim=2),
+                         torch.cat((txt_k, img_k), dim=2),
+                         torch.cat((txt_v, img_v), dim=2),
+                         pe=pe, mask=attn_mask, transformer_options=transformer_options)
+
+        txt_attn, img_attn = attn[:, : txt.shape[1]], attn[:, txt.shape[1] :]
+
+        # calculate the img bloks
+        img.addcmul_(img_mod1.gate, self.img_attn.proj(img_attn))
+        img.addcmul_(img_mod2.gate, self.img_mlp(torch.addcmul(img_mod2.shift, 1 + img_mod2.scale, self.img_norm2(img))))
+
+        # calculate the txt bloks
+        txt.addcmul_(txt_mod1.gate, self.txt_attn.proj(txt_attn))
+        txt.addcmul_(txt_mod2.gate, self.txt_mlp(torch.addcmul(txt_mod2.shift, 1 + txt_mod2.scale, self.txt_norm2(txt))))
+
+        if txt.dtype == torch.float16:
+            txt = torch.nan_to_num(txt, nan=0.0, posinf=65504, neginf=-65504)
+
+        return img, txt
+
+
+class SingleStreamBlock(nn.Module):
+    """
+    A DiT block with parallel linear layers as described in
+    https://arxiv.org/abs/2302.05442 and adapted modulation interface.
+    """
+
+    def __init__(
+        self,
+        hidden_size: int,
+        num_heads: int,
+        mlp_ratio: float = 4.0,
+        qk_scale: float = None,
+        dtype=None,
+        device=None,
+        operations=None
+    ):
+        super().__init__()
+        self.hidden_dim = hidden_size
+        self.num_heads = num_heads
+        head_dim = hidden_size // num_heads
+        self.scale = qk_scale or head_dim**-0.5
+
+        self.mlp_hidden_dim = int(hidden_size * mlp_ratio)
+        # qkv and mlp_in
+        self.linear1 = operations.Linear(hidden_size, hidden_size * 3 + self.mlp_hidden_dim, dtype=dtype, device=device)
+        # proj and mlp_out
+        self.linear2 = operations.Linear(hidden_size + self.mlp_hidden_dim, hidden_size, dtype=dtype, device=device)
+
+        self.norm = QKNorm(head_dim, dtype=dtype, device=device, operations=operations)
+
+        self.hidden_size = hidden_size
+        self.pre_norm = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
+
+        self.mlp_act = nn.GELU(approximate="tanh")
+
+    def forward(self, x: Tensor, pe: Tensor, vec: Tensor, attn_mask=None, transformer_options={}) -> Tensor:
+        mod = vec
+        x_mod = torch.addcmul(mod.shift, 1 + mod.scale, self.pre_norm(x))
+        qkv, mlp = torch.split(self.linear1(x_mod), [3 * self.hidden_size, self.mlp_hidden_dim], dim=-1)
+
+        q, k, v = qkv.view(qkv.shape[0], qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
+        q, k = self.norm(q, k, v)
+
+        # compute attention
+        attn = attention(q, k, v, pe=pe, mask=attn_mask, transformer_options=transformer_options)
+        # compute activation in mlp stream, cat again and run second linear layer
+        output = self.linear2(torch.cat((attn, self.mlp_act(mlp)), 2))
+        x.addcmul_(mod.gate, output)
+        if x.dtype == torch.float16:
+            x = torch.nan_to_num(x, nan=0.0, posinf=65504, neginf=-65504)
+        return x
+
+
 class LastLayer(nn.Module):
    def __init__(self, hidden_size: int, patch_size: int, out_channels: int, dtype=None, device=None, operations=None):
        super().__init__()
--- a/comfy/ldm/chroma/model.py
+++ b/comfy/ldm/chroma/model.py
@@ -11,12 +11,12 @@ import comfy.ldm.common_dit
 from comfy.ldm.flux.layers import (
    EmbedND,
    timestep_embedding,
-    DoubleStreamBlock,
-    SingleStreamBlock,
 )

 from .layers import (
+    DoubleStreamBlock,
    LastLayer,
+    SingleStreamBlock,
    Approximator,
    ChromaModulationOut,
 )
@@ -40,8 +40,7 @@ class ChromaParams:
    out_dim: int
    hidden_dim: int
    n_layers: int
-    txt_ids_dims: list
-    vec_in_dim: int
+



@@ -91,7 +90,6 @@ class Chroma(nn.Module):
                    self.num_heads,
                    mlp_ratio=params.mlp_ratio,
                    qkv_bias=params.qkv_bias,
-                    modulation=False,
                    dtype=dtype, device=device, operations=operations
                )
                for _ in range(params.depth)
@@ -100,7 +98,7 @@ class Chroma(nn.Module):

        self.single_blocks = nn.ModuleList(
            [
-                SingleStreamBlock(self.hidden_size, self.num_heads, mlp_ratio=params.mlp_ratio, modulation=False, dtype=dtype, device=device, operations=operations)
+                SingleStreamBlock(self.hidden_size, self.num_heads, mlp_ratio=params.mlp_ratio, dtype=dtype, device=device, operations=operations)
                for _ in range(params.depth_single_blocks)
            ]
        )
@@ -180,10 +178,7 @@ class Chroma(nn.Module):
        pe = self.pe_embedder(ids)

        blocks_replace = patches_replace.get("dit", {})
-        transformer_options["total_blocks"] = len(self.double_blocks)
-        transformer_options["block_type"] = "double"
        for i, block in enumerate(self.double_blocks):
-            transformer_options["block_index"] = i
            if i not in self.skip_mmdit:
                double_mod = (
                    self.get_modulations(mod_vectors, "double_img", idx=i),
@@ -226,10 +221,7 @@ class Chroma(nn.Module):

        img = torch.cat((txt, img), 1)

-        transformer_options["total_blocks"] = len(self.single_blocks)
-        transformer_options["block_type"] = "single"
        for i, block in enumerate(self.single_blocks):
-            transformer_options["block_index"] = i
            if i not in self.skip_dit:
                single_mod = self.get_modulations(mod_vectors, "single", idx=i)
                if ("single_block", i) in blocks_replace:
--- a/comfy/ldm/chroma_radiance/layers.py
+++ b/comfy/ldm/chroma_radiance/layers.py
@@ -4,6 +4,8 @@ from functools import lru_cache
 import torch
 from torch import nn

+from comfy.ldm.flux.layers import RMSNorm
+

 class NerfEmbedder(nn.Module):
    """
@@ -143,7 +145,7 @@ class NerfGLUBlock(nn.Module):
        # We now need to generate parameters for 3 matrices.
        total_params = 3 * hidden_size_x**2 * mlp_ratio
        self.param_generator = operations.Linear(hidden_size_s, total_params, dtype=dtype, device=device)
-        self.norm = operations.RMSNorm(hidden_size_x, dtype=dtype, device=device)
+        self.norm = RMSNorm(hidden_size_x, dtype=dtype, device=device, operations=operations)
        self.mlp_ratio = mlp_ratio


@@ -176,7 +178,7 @@ class NerfGLUBlock(nn.Module):
 class NerfFinalLayer(nn.Module):
    def __init__(self, hidden_size, out_channels, dtype=None, device=None, operations=None):
        super().__init__()
-        self.norm = operations.RMSNorm(hidden_size, dtype=dtype, device=device)
+        self.norm = RMSNorm(hidden_size, dtype=dtype, device=device, operations=operations)
        self.linear = operations.Linear(hidden_size, out_channels, dtype=dtype, device=device)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
@@ -188,7 +190,7 @@ class NerfFinalLayer(nn.Module):
 class NerfFinalLayerConv(nn.Module):
    def __init__(self, hidden_size: int, out_channels: int, dtype=None, device=None, operations=None):
        super().__init__()
-        self.norm = operations.RMSNorm(hidden_size, dtype=dtype, device=device)
+        self.norm = RMSNorm(hidden_size, dtype=dtype, device=device, operations=operations)
        self.conv = operations.Conv2d(
            in_channels=hidden_size,
            out_channels=out_channels,
--- a/comfy/ldm/chroma_radiance/model.py
+++ b/comfy/ldm/chroma_radiance/model.py
@@ -10,10 +10,12 @@ from torch import Tensor, nn
 from einops import repeat
 import comfy.ldm.common_dit

-from comfy.ldm.flux.layers import EmbedND, DoubleStreamBlock, SingleStreamBlock
+from comfy.ldm.flux.layers import EmbedND

 from comfy.ldm.chroma.model import Chroma, ChromaParams
 from comfy.ldm.chroma.layers import (
+    DoubleStreamBlock,
+    SingleStreamBlock,
    Approximator,
 )
 from .layers import (
@@ -37,7 +39,7 @@ class ChromaRadianceParams(ChromaParams):
    nerf_final_head_type: str
    # None means use the same dtype as the model.
    nerf_embedder_dtype: Optional[torch.dtype]
-    use_x0: bool
+

 class ChromaRadiance(Chroma):
    """
@@ -87,6 +89,7 @@ class ChromaRadiance(Chroma):
                    dtype=dtype, device=device, operations=operations
                )

+
        self.double_blocks = nn.ModuleList(
            [
                DoubleStreamBlock(
@@ -94,7 +97,6 @@ class ChromaRadiance(Chroma):
                    self.num_heads,
                    mlp_ratio=params.mlp_ratio,
                    qkv_bias=params.qkv_bias,
-                    modulation=False,
                    dtype=dtype, device=device, operations=operations
                )
                for _ in range(params.depth)
@@ -107,7 +109,6 @@ class ChromaRadiance(Chroma):
                    self.hidden_size,
                    self.num_heads,
                    mlp_ratio=params.mlp_ratio,
-                    modulation=False,
                    dtype=dtype, device=device, operations=operations,
                )
                for _ in range(params.depth_single_blocks)
@@ -159,9 +160,6 @@ class ChromaRadiance(Chroma):
        self.skip_dit = []
        self.lite = False

-        if params.use_x0:
-            self.register_buffer("__x0__", torch.tensor([]))
-
    @property
    def _nerf_final_layer(self) -> nn.Module:
        if self.params.nerf_final_head_type == "linear":
@@ -270,7 +268,7 @@ class ChromaRadiance(Chroma):
        bad_keys = tuple(
            k
            for k, v in overrides.items()
-            if not isinstance(v, type(getattr(params, k))) and (v is not None or k not in nullable_keys)
+            if type(v) != type(getattr(params, k)) and (v is not None or k not in nullable_keys)
        )
        if bad_keys:
            e = f"Invalid value(s) in transformer_options chroma_radiance_options: {', '.join(bad_keys)}"
@@ -279,12 +277,6 @@ class ChromaRadiance(Chroma):
        params_dict |= overrides
        return params.__class__(**params_dict)

-    def _apply_x0_residual(self, predicted, noisy, timesteps):
-
-        # non zero during training to prevent 0 div
-        eps = 0.0
-        return (noisy - predicted) / (timesteps.view(-1,1,1,1) + eps)
-
    def _forward(
        self,
        x: Tensor,
@@ -325,11 +317,4 @@ class ChromaRadiance(Chroma):
            transformer_options,
            attn_mask=kwargs.get("attention_mask", None),
        )
-
-        out = self.forward_nerf(img, img_out, params)[:, :, :h, :w]
-
-        # If x0 variant → v-pred, just return this instead
-        if hasattr(self, "__x0__"):
-            out = self._apply_x0_residual(out, img, timestep)
-        return out
-
+        return self.forward_nerf(img, img_out, params)[:, :, :h, :w]
--- a/comfy/ldm/cosmos/predict2.py
+++ b/comfy/ldm/cosmos/predict2.py
@@ -13,7 +13,6 @@ from torchvision import transforms

 import comfy.patcher_extension
 from comfy.ldm.modules.attention import optimized_attention
-import comfy.ldm.common_dit

 def apply_rotary_pos_emb(
    t: torch.Tensor,
@@ -335,7 +334,7 @@ class FinalLayer(nn.Module):
        device=None, dtype=None, operations=None
    ):
        super().__init__()
-        self.layer_norm = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
+        self.layer_norm = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
        self.linear = operations.Linear(
            hidden_size, spatial_patch_size * spatial_patch_size * temporal_patch_size * out_channels, bias=False, device=device, dtype=dtype
        )
@@ -463,8 +462,6 @@ class Block(nn.Module):
        extra_per_block_pos_emb: Optional[torch.Tensor] = None,
        transformer_options: Optional[dict] = {},
    ) -> torch.Tensor:
-        residual_dtype = x_B_T_H_W_D.dtype
-        compute_dtype = emb_B_T_D.dtype
        if extra_per_block_pos_emb is not None:
            x_B_T_H_W_D = x_B_T_H_W_D + extra_per_block_pos_emb

@@ -514,7 +511,7 @@ class Block(nn.Module):
        result_B_T_H_W_D = rearrange(
            self.self_attn(
                # normalized_x_B_T_HW_D,
-                rearrange(normalized_x_B_T_H_W_D.to(compute_dtype), "b t h w d -> b (t h w) d"),
+                rearrange(normalized_x_B_T_H_W_D, "b t h w d -> b (t h w) d"),
                None,
                rope_emb=rope_emb_L_1_1_D,
                transformer_options=transformer_options,
@@ -524,7 +521,7 @@ class Block(nn.Module):
            h=H,
            w=W,
        )
-        x_B_T_H_W_D = x_B_T_H_W_D + gate_self_attn_B_T_1_1_D.to(residual_dtype) * result_B_T_H_W_D.to(residual_dtype)
+        x_B_T_H_W_D = x_B_T_H_W_D + gate_self_attn_B_T_1_1_D * result_B_T_H_W_D

        def _x_fn(
            _x_B_T_H_W_D: torch.Tensor,
@@ -538,7 +535,7 @@ class Block(nn.Module):
            )
            _result_B_T_H_W_D = rearrange(
                self.cross_attn(
-                    rearrange(_normalized_x_B_T_H_W_D.to(compute_dtype), "b t h w d -> b (t h w) d"),
+                    rearrange(_normalized_x_B_T_H_W_D, "b t h w d -> b (t h w) d"),
                    crossattn_emb,
                    rope_emb=rope_emb_L_1_1_D,
                    transformer_options=transformer_options,
@@ -557,7 +554,7 @@ class Block(nn.Module):
            shift_cross_attn_B_T_1_1_D,
            transformer_options=transformer_options,
        )
-        x_B_T_H_W_D = result_B_T_H_W_D.to(residual_dtype) * gate_cross_attn_B_T_1_1_D.to(residual_dtype) + x_B_T_H_W_D
+        x_B_T_H_W_D = result_B_T_H_W_D * gate_cross_attn_B_T_1_1_D + x_B_T_H_W_D

        normalized_x_B_T_H_W_D = _fn(
            x_B_T_H_W_D,
@@ -565,8 +562,8 @@ class Block(nn.Module):
            scale_mlp_B_T_1_1_D,
            shift_mlp_B_T_1_1_D,
        )
-        result_B_T_H_W_D = self.mlp(normalized_x_B_T_H_W_D.to(compute_dtype))
-        x_B_T_H_W_D = x_B_T_H_W_D + gate_mlp_B_T_1_1_D.to(residual_dtype) * result_B_T_H_W_D.to(residual_dtype)
+        result_B_T_H_W_D = self.mlp(normalized_x_B_T_H_W_D)
+        x_B_T_H_W_D = x_B_T_H_W_D + gate_mlp_B_T_1_1_D * result_B_T_H_W_D
        return x_B_T_H_W_D


@@ -838,8 +835,6 @@ class MiniTrainDIT(nn.Module):
        padding_mask: Optional[torch.Tensor] = None,
        **kwargs,
    ):
-        orig_shape = list(x.shape)
-        x = comfy.ldm.common_dit.pad_to_patch_size(x, (self.patch_temporal, self.patch_spatial, self.patch_spatial))
        x_B_C_T_H_W = x
        timesteps_B_T = timesteps
        crossattn_emb = context
@@ -878,14 +873,6 @@ class MiniTrainDIT(nn.Module):
            "extra_per_block_pos_emb": extra_pos_emb_B_T_H_W_D_or_T_H_W_B_D,
            "transformer_options": kwargs.get("transformer_options", {}),
        }
-
-        # The residual stream for this model has large values. To make fp16 compute_dtype work, we keep the residual stream
-        # in fp32, but run attention and MLP modules in fp16.
-        # An alternate method that clamps fp16 values "works" in the sense that it makes coherent images, but there is noticeable
-        # quality degradation and visual artifacts.
-        if x_B_T_H_W_D.dtype == torch.float16:
-            x_B_T_H_W_D = x_B_T_H_W_D.float()
-
        for block in self.blocks:
            x_B_T_H_W_D = block(
                x_B_T_H_W_D,
@@ -894,6 +881,6 @@ class MiniTrainDIT(nn.Module):
                **block_kwargs,
            )

-        x_B_T_H_W_O = self.final_layer(x_B_T_H_W_D.to(crossattn_emb.dtype), t_embedding_B_T_D, adaln_lora_B_T_3D=adaln_lora_B_T_3D)
-        x_B_C_Tt_Hp_Wp = self.unpatchify(x_B_T_H_W_O)[:, :, :orig_shape[-3], :orig_shape[-2], :orig_shape[-1]]
+        x_B_T_H_W_O = self.final_layer(x_B_T_H_W_D, t_embedding_B_T_D, adaln_lora_B_T_3D=adaln_lora_B_T_3D)
+        x_B_C_Tt_Hp_Wp = self.unpatchify(x_B_T_H_W_O)
        return x_B_C_Tt_Hp_Wp
--- a/comfy/ldm/flipflop_transformer.py
+++ b/comfy/ldm/flipflop_transformer.py
@@ -0,0 +1,200 @@
+from __future__ import annotations
+import torch
+import copy
+
+import comfy.model_management
+
+
+class FlipFlopModule(torch.nn.Module):
+    def __init__(self, block_types: tuple[str, ...], enable_flipflop: bool = True):
+        super().__init__()
+        self.block_types = block_types
+        self.enable_flipflop = enable_flipflop
+        self.flipflop: dict[str, FlipFlopHolder] = {}
+        self.block_info: dict[str, tuple[int, int]] = {}
+        self.flipflop_prefixes: list[str] = []
+
+    def setup_flipflop_holders(self, block_info: dict[str, tuple[int, int]], flipflop_prefixes: list[str], load_device: torch.device, offload_device: torch.device):
+        for block_type, (flipflop_blocks, total_blocks) in block_info.items():
+            if block_type in self.flipflop:
+                continue
+            self.flipflop[block_type] = FlipFlopHolder(getattr(self, block_type)[total_blocks-flipflop_blocks:], flipflop_blocks, total_blocks, load_device, offload_device)
+            self.block_info[block_type] = (flipflop_blocks, total_blocks)
+        self.flipflop_prefixes = flipflop_prefixes.copy()
+
+    def init_flipflop_block_copies(self, device: torch.device) -> int:
+        memory_freed = 0
+        for holder in self.flipflop.values():
+            memory_freed += holder.init_flipflop_block_copies(device)
+        return memory_freed
+
+    def clean_flipflop_holders(self):
+        memory_freed = 0
+        for block_type in list(self.flipflop.keys()):
+            memory_freed += self.flipflop[block_type].clean_flipflop_blocks()
+            del self.flipflop[block_type]
+        self.block_info = {}
+        self.flipflop_prefixes = []
+        return memory_freed
+
+    def get_all_blocks(self, block_type: str) -> list[torch.nn.Module]:
+        return getattr(self, block_type)
+
+    def get_blocks(self, block_type: str) -> torch.nn.ModuleList:
+        if block_type not in self.block_types:
+            raise ValueError(f"Block type {block_type} not found in {self.block_types}")
+        if block_type in self.flipflop:
+            return getattr(self, block_type)[:self.flipflop[block_type].i_offset]
+        return getattr(self, block_type)
+
+    def get_all_block_module_sizes(self, reverse_sort_by_size: bool = False) -> list[tuple[str, int]]:
+        '''
+        Returns a list of (block_type, size) sorted by size.
+        If reverse_sort_by_size is True, the list is sorted by size in reverse order.
+        '''
+        sizes = [(block_type, self.get_block_module_size(block_type)) for block_type in self.block_types]
+        sizes.sort(key=lambda x: x[1], reverse=reverse_sort_by_size)
+        return sizes
+
+    def get_block_module_size(self, block_type: str) -> int:
+        return comfy.model_management.module_size(getattr(self, block_type)[0])
+
+    def execute_blocks(self, block_type: str, func, out: torch.Tensor | tuple[torch.Tensor,...], *args, **kwargs):
+        # execute blocks, supporting both single and double (or higher) block types
+        if isinstance(out, torch.Tensor):
+            out = (out,)
+        for i, block in enumerate(self.get_blocks(block_type)):
+            out = func(i, block, *out, *args, **kwargs)
+            if isinstance(out, torch.Tensor):
+                out = (out,)
+        if block_type in self.flipflop:
+            holder = self.flipflop[block_type]
+            with holder.context() as ctx:
+                for i, block in enumerate(holder.blocks):
+                    out = ctx(func, i, block, *out, *args, **kwargs)
+                    if isinstance(out, torch.Tensor):
+                        out = (out,)
+        if len(out) == 1:
+            out = out[0]
+        return out
+
+
+class FlipFlopContext:
+    def __init__(self, holder: FlipFlopHolder):
+        # NOTE: there is a bug when there are an odd number of blocks to flipflop.
+        # Worked around right now by always making sure it will be even, but need to resolve.
+        self.holder = holder
+        self.reset()
+
+    def reset(self):
+        self.num_blocks = len(self.holder.blocks)
+        self.first_flip = True
+        self.first_flop = True
+        self.last_flip = False
+        self.last_flop = False
+
+    def __enter__(self):
+        self.reset()
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.holder.compute_stream.record_event(self.holder.cpy_end_event)
+
+    def do_flip(self, func, i: int, _, *args, **kwargs):
+        # flip
+        self.holder.compute_stream.wait_event(self.holder.cpy_end_event)
+        with torch.cuda.stream(self.holder.compute_stream):
+            out = func(i+self.holder.i_offset, self.holder.flip, *args, **kwargs)
+        self.holder.event_flip.record(self.holder.compute_stream)
+        # while flip executes, queue flop to copy to its next block
+        next_flop_i = i + 1
+        if next_flop_i >= self.num_blocks:
+            next_flop_i = next_flop_i - self.num_blocks
+            self.last_flip = True
+        if not self.first_flip:
+            self.holder._copy_state_dict(self.holder.flop.state_dict(), self.holder.blocks[next_flop_i].state_dict(), self.holder.event_flop, self.holder.cpy_end_event)
+        if self.last_flip:
+            self.holder._copy_state_dict(self.holder.flip.state_dict(), self.holder.blocks[0].state_dict(), cpy_start_event=self.holder.event_flip)
+        self.first_flip = False
+        return out
+
+    def do_flop(self, func, i: int, _, *args, **kwargs):
+        # flop
+        if not self.first_flop:
+            self.holder.compute_stream.wait_event(self.holder.cpy_end_event)
+        with torch.cuda.stream(self.holder.compute_stream):
+            out = func(i+self.holder.i_offset, self.holder.flop, *args, **kwargs)
+        self.holder.event_flop.record(self.holder.compute_stream)
+        # while flop executes, queue flip to copy to its next block
+        next_flip_i = i + 1
+        if next_flip_i >= self.num_blocks:
+            next_flip_i = next_flip_i - self.num_blocks
+            self.last_flop = True
+        self.holder._copy_state_dict(self.holder.flip.state_dict(), self.holder.blocks[next_flip_i].state_dict(), self.holder.event_flip, self.holder.cpy_end_event)
+        if self.last_flop:
+            self.holder._copy_state_dict(self.holder.flop.state_dict(), self.holder.blocks[1].state_dict(), cpy_start_event=self.holder.event_flop)
+        self.first_flop = False
+        return out
+
+    @torch.no_grad()
+    def __call__(self, func, i: int, block: torch.nn.Module, *args, **kwargs):
+        # flips are even indexes, flops are odd indexes
+        if i % 2 == 0:
+            return self.do_flip(func, i, block, *args, **kwargs)
+        else:
+            return self.do_flop(func, i, block, *args, **kwargs)
+
+
+class FlipFlopHolder:
+    def __init__(self, blocks: list[torch.nn.Module], flip_amount: int, total_amount: int, load_device: torch.device, offload_device: torch.device):
+        self.load_device = load_device
+        self.offload_device = offload_device
+        self.blocks = blocks
+        self.flip_amount = flip_amount
+        self.total_amount = total_amount
+        # NOTE: used to make sure block indexes passed into block functions match expected patch indexes
+        self.i_offset = total_amount - flip_amount
+
+        self.block_module_size = 0
+        if len(self.blocks) > 0:
+            self.block_module_size = comfy.model_management.module_size(self.blocks[0])
+
+        self.flip: torch.nn.Module = None
+        self.flop: torch.nn.Module = None
+
+        self.compute_stream = torch.cuda.default_stream(self.load_device)
+        self.cpy_stream = torch.cuda.Stream(self.load_device)
+
+        self.event_flip = torch.cuda.Event(enable_timing=False)
+        self.event_flop = torch.cuda.Event(enable_timing=False)
+        self.cpy_end_event = torch.cuda.Event(enable_timing=False)
+        # INIT - is this actually needed?
+        self.compute_stream.record_event(self.cpy_end_event)
+
+    def _copy_state_dict(self, dst, src, cpy_start_event: torch.cuda.Event=None, cpy_end_event: torch.cuda.Event=None):
+        if cpy_start_event:
+            self.cpy_stream.wait_event(cpy_start_event)
+
+        with torch.cuda.stream(self.cpy_stream):
+            for k, v in src.items():
+                dst[k].copy_(v, non_blocking=True)
+        if cpy_end_event:
+            cpy_end_event.record(self.cpy_stream)
+
+    def context(self):
+        return FlipFlopContext(self)
+
+    def init_flipflop_block_copies(self, load_device: torch.device) -> int:
+        self.flip = copy.deepcopy(self.blocks[0]).to(device=load_device)
+        self.flop = copy.deepcopy(self.blocks[1]).to(device=load_device)
+        return comfy.model_management.module_size(self.flip) + comfy.model_management.module_size(self.flop)
+
+    def clean_flipflop_blocks(self) -> int:
+        memory_freed = 0
+        memory_freed += comfy.model_management.module_size(self.flip)
+        memory_freed += comfy.model_management.module_size(self.flop)
+        del self.flip
+        del self.flop
+        self.flip = None
+        self.flop = None
+        return memory_freed
--- a/comfy/ldm/flux/layers.py
+++ b/comfy/ldm/flux/layers.py
@@ -5,9 +5,9 @@ import torch
 from torch import Tensor, nn

 from .math import attention, rope
+import comfy.ops
+import comfy.ldm.common_dit

-# Fix import for some custom nodes, TODO: delete eventually.
-RMSNorm = None

 class EmbedND(nn.Module):
    def __init__(self, dim: int, theta: int, axes_dim: list):
@@ -48,51 +48,30 @@ def timestep_embedding(t: Tensor, dim, max_period=10000, time_factor: float = 10
    return embedding

 class MLPEmbedder(nn.Module):
-    def __init__(self, in_dim: int, hidden_dim: int, bias=True, dtype=None, device=None, operations=None):
+    def __init__(self, in_dim: int, hidden_dim: int, dtype=None, device=None, operations=None):
        super().__init__()
-        self.in_layer = operations.Linear(in_dim, hidden_dim, bias=bias, dtype=dtype, device=device)
+        self.in_layer = operations.Linear(in_dim, hidden_dim, bias=True, dtype=dtype, device=device)
        self.silu = nn.SiLU()
-        self.out_layer = operations.Linear(hidden_dim, hidden_dim, bias=bias, dtype=dtype, device=device)
+        self.out_layer = operations.Linear(hidden_dim, hidden_dim, bias=True, dtype=dtype, device=device)

    def forward(self, x: Tensor) -> Tensor:
        return self.out_layer(self.silu(self.in_layer(x)))

-class YakMLP(nn.Module):
-    def __init__(self, hidden_size: int, intermediate_size: int, dtype=None, device=None, operations=None):
+
+class RMSNorm(torch.nn.Module):
+    def __init__(self, dim: int, dtype=None, device=None, operations=None):
        super().__init__()
-        self.hidden_size = hidden_size
-        self.intermediate_size = intermediate_size
-        self.gate_proj = operations.Linear(self.hidden_size, self.intermediate_size, bias=True, dtype=dtype, device=device)
-        self.up_proj = operations.Linear(self.hidden_size, self.intermediate_size, bias=True, dtype=dtype, device=device)
-        self.down_proj = operations.Linear(self.intermediate_size, self.hidden_size, bias=True, dtype=dtype, device=device)
-        self.act_fn = nn.SiLU()
+        self.scale = nn.Parameter(torch.empty((dim), dtype=dtype, device=device))

-    def forward(self, x: Tensor) -> Tensor:
-        down_proj = self.down_proj(self.act_fn(self.gate_proj(x)) * self.up_proj(x))
-        return down_proj
-
-def build_mlp(hidden_size, mlp_hidden_dim, mlp_silu_act=False, yak_mlp=False, dtype=None, device=None, operations=None):
-    if yak_mlp:
-        return YakMLP(hidden_size, mlp_hidden_dim, dtype=dtype, device=device, operations=operations)
-    if mlp_silu_act:
-        return nn.Sequential(
-            operations.Linear(hidden_size, mlp_hidden_dim * 2, bias=False, dtype=dtype, device=device),
-            SiLUActivation(),
-            operations.Linear(mlp_hidden_dim, hidden_size, bias=False, dtype=dtype, device=device),
-        )
-    else:
-        return nn.Sequential(
-            operations.Linear(hidden_size, mlp_hidden_dim, bias=True, dtype=dtype, device=device),
-            nn.GELU(approximate="tanh"),
-            operations.Linear(mlp_hidden_dim, hidden_size, bias=True, dtype=dtype, device=device),
-        )
+    def forward(self, x: Tensor):
+        return comfy.ldm.common_dit.rms_norm(x, self.scale, 1e-6)


 class QKNorm(torch.nn.Module):
    def __init__(self, dim: int, dtype=None, device=None, operations=None):
        super().__init__()
-        self.query_norm = operations.RMSNorm(dim, dtype=dtype, device=device)
-        self.key_norm = operations.RMSNorm(dim, dtype=dtype, device=device)
+        self.query_norm = RMSNorm(dim, dtype=dtype, device=device, operations=operations)
+        self.key_norm = RMSNorm(dim, dtype=dtype, device=device, operations=operations)

    def forward(self, q: Tensor, k: Tensor, v: Tensor) -> tuple:
        q = self.query_norm(q)
@@ -101,14 +80,14 @@ class QKNorm(torch.nn.Module):


 class SelfAttention(nn.Module):
-    def __init__(self, dim: int, num_heads: int = 8, qkv_bias: bool = False, proj_bias: bool = True, dtype=None, device=None, operations=None):
+    def __init__(self, dim: int, num_heads: int = 8, qkv_bias: bool = False, dtype=None, device=None, operations=None):
        super().__init__()
        self.num_heads = num_heads
        head_dim = dim // num_heads

        self.qkv = operations.Linear(dim, dim * 3, bias=qkv_bias, dtype=dtype, device=device)
        self.norm = QKNorm(head_dim, dtype=dtype, device=device, operations=operations)
-        self.proj = operations.Linear(dim, dim, bias=proj_bias, dtype=dtype, device=device)
+        self.proj = operations.Linear(dim, dim, dtype=dtype, device=device)


@dataclass
@@ -119,11 +98,11 @@ class ModulationOut:


 class Modulation(nn.Module):
-    def __init__(self, dim: int, double: bool, bias=True, dtype=None, device=None, operations=None):
+    def __init__(self, dim: int, double: bool, dtype=None, device=None, operations=None):
        super().__init__()
        self.is_double = double
        self.multiplier = 6 if double else 3
-        self.lin = operations.Linear(dim, self.multiplier * dim, bias=bias, dtype=dtype, device=device)
+        self.lin = operations.Linear(dim, self.multiplier * dim, bias=True, dtype=dtype, device=device)

    def forward(self, vec: Tensor) -> tuple:
        if vec.ndim == 2:
@@ -150,90 +129,77 @@ def apply_mod(tensor, m_mult, m_add=None, modulation_dims=None):
        return tensor


-class SiLUActivation(nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.gate_fn = nn.SiLU()
-
-    def forward(self, x: Tensor) -> Tensor:
-        x1, x2 = x.chunk(2, dim=-1)
-        return self.gate_fn(x1) * x2
-
-
 class DoubleStreamBlock(nn.Module):
-    def __init__(self, hidden_size: int, num_heads: int, mlp_ratio: float, qkv_bias: bool = False, modulation=True, mlp_silu_act=False, proj_bias=True, yak_mlp=False, dtype=None, device=None, operations=None):
+    def __init__(self, hidden_size: int, num_heads: int, mlp_ratio: float, qkv_bias: bool = False, flipped_img_txt=False, dtype=None, device=None, operations=None):
        super().__init__()

        mlp_hidden_dim = int(hidden_size * mlp_ratio)
        self.num_heads = num_heads
        self.hidden_size = hidden_size
-        self.modulation = modulation
-
-        if self.modulation:
-            self.img_mod = Modulation(hidden_size, double=True, dtype=dtype, device=device, operations=operations)
-
+        self.img_mod = Modulation(hidden_size, double=True, dtype=dtype, device=device, operations=operations)
        self.img_norm1 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
-        self.img_attn = SelfAttention(dim=hidden_size, num_heads=num_heads, qkv_bias=qkv_bias, proj_bias=proj_bias, dtype=dtype, device=device, operations=operations)
+        self.img_attn = SelfAttention(dim=hidden_size, num_heads=num_heads, qkv_bias=qkv_bias, dtype=dtype, device=device, operations=operations)

        self.img_norm2 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
+        self.img_mlp = nn.Sequential(
+            operations.Linear(hidden_size, mlp_hidden_dim, bias=True, dtype=dtype, device=device),
+            nn.GELU(approximate="tanh"),
+            operations.Linear(mlp_hidden_dim, hidden_size, bias=True, dtype=dtype, device=device),
+        )

-        self.img_mlp = build_mlp(hidden_size, mlp_hidden_dim, mlp_silu_act=mlp_silu_act, yak_mlp=yak_mlp, dtype=dtype, device=device, operations=operations)
-
-        if self.modulation:
-            self.txt_mod = Modulation(hidden_size, double=True, dtype=dtype, device=device, operations=operations)
-
+        self.txt_mod = Modulation(hidden_size, double=True, dtype=dtype, device=device, operations=operations)
        self.txt_norm1 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
-        self.txt_attn = SelfAttention(dim=hidden_size, num_heads=num_heads, qkv_bias=qkv_bias, proj_bias=proj_bias, dtype=dtype, device=device, operations=operations)
+        self.txt_attn = SelfAttention(dim=hidden_size, num_heads=num_heads, qkv_bias=qkv_bias, dtype=dtype, device=device, operations=operations)

        self.txt_norm2 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
-
-        self.txt_mlp = build_mlp(hidden_size, mlp_hidden_dim, mlp_silu_act=mlp_silu_act, yak_mlp=yak_mlp, dtype=dtype, device=device, operations=operations)
+        self.txt_mlp = nn.Sequential(
+            operations.Linear(hidden_size, mlp_hidden_dim, bias=True, dtype=dtype, device=device),
+            nn.GELU(approximate="tanh"),
+            operations.Linear(mlp_hidden_dim, hidden_size, bias=True, dtype=dtype, device=device),
+        )
+        self.flipped_img_txt = flipped_img_txt

    def forward(self, img: Tensor, txt: Tensor, vec: Tensor, pe: Tensor, attn_mask=None, modulation_dims_img=None, modulation_dims_txt=None, transformer_options={}):
-        if self.modulation:
-            img_mod1, img_mod2 = self.img_mod(vec)
-            txt_mod1, txt_mod2 = self.txt_mod(vec)
-        else:
-            (img_mod1, img_mod2), (txt_mod1, txt_mod2) = vec
+        img_mod1, img_mod2 = self.img_mod(vec)
+        txt_mod1, txt_mod2 = self.txt_mod(vec)

        # prepare image for attention
        img_modulated = self.img_norm1(img)
        img_modulated = apply_mod(img_modulated, (1 + img_mod1.scale), img_mod1.shift, modulation_dims_img)
        img_qkv = self.img_attn.qkv(img_modulated)
-        del img_modulated
        img_q, img_k, img_v = img_qkv.view(img_qkv.shape[0], img_qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
-        del img_qkv
        img_q, img_k = self.img_attn.norm(img_q, img_k, img_v)

        # prepare txt for attention
        txt_modulated = self.txt_norm1(txt)
        txt_modulated = apply_mod(txt_modulated, (1 + txt_mod1.scale), txt_mod1.shift, modulation_dims_txt)
        txt_qkv = self.txt_attn.qkv(txt_modulated)
-        del txt_modulated
        txt_q, txt_k, txt_v = txt_qkv.view(txt_qkv.shape[0], txt_qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
-        del txt_qkv
        txt_q, txt_k = self.txt_attn.norm(txt_q, txt_k, txt_v)

-        q = torch.cat((txt_q, img_q), dim=2)
-        del txt_q, img_q
-        k = torch.cat((txt_k, img_k), dim=2)
-        del txt_k, img_k
-        v = torch.cat((txt_v, img_v), dim=2)
-        del txt_v, img_v
-        # run actual attention
-        attn = attention(q, k, v, pe=pe, mask=attn_mask, transformer_options=transformer_options)
-        del q, k, v
+        if self.flipped_img_txt:
+            # run actual attention
+            attn = attention(torch.cat((img_q, txt_q), dim=2),
+                             torch.cat((img_k, txt_k), dim=2),
+                             torch.cat((img_v, txt_v), dim=2),
+                             pe=pe, mask=attn_mask, transformer_options=transformer_options)

-        txt_attn, img_attn = attn[:, : txt.shape[1]], attn[:, txt.shape[1]:]
+            img_attn, txt_attn = attn[:, : img.shape[1]], attn[:, img.shape[1]:]
+        else:
+            # run actual attention
+            attn = attention(torch.cat((txt_q, img_q), dim=2),
+                             torch.cat((txt_k, img_k), dim=2),
+                             torch.cat((txt_v, img_v), dim=2),
+                             pe=pe, mask=attn_mask, transformer_options=transformer_options)
+
+            txt_attn, img_attn = attn[:, : txt.shape[1]], attn[:, txt.shape[1]:]

        # calculate the img bloks
-        img += apply_mod(self.img_attn.proj(img_attn), img_mod1.gate, None, modulation_dims_img)
-        del img_attn
-        img += apply_mod(self.img_mlp(apply_mod(self.img_norm2(img), (1 + img_mod2.scale), img_mod2.shift, modulation_dims_img)), img_mod2.gate, None, modulation_dims_img)
+        img = img + apply_mod(self.img_attn.proj(img_attn), img_mod1.gate, None, modulation_dims_img)
+        img = img + apply_mod(self.img_mlp(apply_mod(self.img_norm2(img), (1 + img_mod2.scale), img_mod2.shift, modulation_dims_img)), img_mod2.gate, None, modulation_dims_img)

        # calculate the txt bloks
        txt += apply_mod(self.txt_attn.proj(txt_attn), txt_mod1.gate, None, modulation_dims_txt)
-        del txt_attn
        txt += apply_mod(self.txt_mlp(apply_mod(self.txt_norm2(txt), (1 + txt_mod2.scale), txt_mod2.shift, modulation_dims_txt)), txt_mod2.gate, None, modulation_dims_txt)

        if txt.dtype == torch.float16:
@@ -254,10 +220,6 @@ class SingleStreamBlock(nn.Module):
        num_heads: int,
        mlp_ratio: float = 4.0,
        qk_scale: float = None,
-        modulation=True,
-        mlp_silu_act=False,
-        bias=True,
-        yak_mlp=False,
        dtype=None,
        device=None,
        operations=None
@@ -269,55 +231,30 @@ class SingleStreamBlock(nn.Module):
        self.scale = qk_scale or head_dim**-0.5

        self.mlp_hidden_dim = int(hidden_size * mlp_ratio)
-
-        self.mlp_hidden_dim_first = self.mlp_hidden_dim
-        self.yak_mlp = yak_mlp
-        if mlp_silu_act:
-            self.mlp_hidden_dim_first = int(hidden_size * mlp_ratio * 2)
-            self.mlp_act = SiLUActivation()
-        else:
-            self.mlp_act = nn.GELU(approximate="tanh")
-
-        if self.yak_mlp:
-            self.mlp_hidden_dim_first *= 2
-            self.mlp_act = nn.SiLU()
-
        # qkv and mlp_in
-        self.linear1 = operations.Linear(hidden_size, hidden_size * 3 + self.mlp_hidden_dim_first, bias=bias, dtype=dtype, device=device)
+        self.linear1 = operations.Linear(hidden_size, hidden_size * 3 + self.mlp_hidden_dim, dtype=dtype, device=device)
        # proj and mlp_out
-        self.linear2 = operations.Linear(hidden_size + self.mlp_hidden_dim, hidden_size, bias=bias, dtype=dtype, device=device)
+        self.linear2 = operations.Linear(hidden_size + self.mlp_hidden_dim, hidden_size, dtype=dtype, device=device)

        self.norm = QKNorm(head_dim, dtype=dtype, device=device, operations=operations)

        self.hidden_size = hidden_size
        self.pre_norm = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)

-        if modulation:
-            self.modulation = Modulation(hidden_size, double=False, dtype=dtype, device=device, operations=operations)
-        else:
-            self.modulation = None
+        self.mlp_act = nn.GELU(approximate="tanh")
+        self.modulation = Modulation(hidden_size, double=False, dtype=dtype, device=device, operations=operations)

    def forward(self, x: Tensor, vec: Tensor, pe: Tensor, attn_mask=None, modulation_dims=None, transformer_options={}) -> Tensor:
-        if self.modulation:
-            mod, _ = self.modulation(vec)
-        else:
-            mod = vec
-
-        qkv, mlp = torch.split(self.linear1(apply_mod(self.pre_norm(x), (1 + mod.scale), mod.shift, modulation_dims)), [3 * self.hidden_size, self.mlp_hidden_dim_first], dim=-1)
+        mod, _ = self.modulation(vec)
+        qkv, mlp = torch.split(self.linear1(apply_mod(self.pre_norm(x), (1 + mod.scale), mod.shift, modulation_dims)), [3 * self.hidden_size, self.mlp_hidden_dim], dim=-1)

        q, k, v = qkv.view(qkv.shape[0], qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
-        del qkv
        q, k = self.norm(q, k, v)

        # compute attention
        attn = attention(q, k, v, pe=pe, mask=attn_mask, transformer_options=transformer_options)
-        del q, k, v
        # compute activation in mlp stream, cat again and run second linear layer
-        if self.yak_mlp:
-            mlp = self.mlp_act(mlp[..., self.mlp_hidden_dim_first // 2:]) * mlp[..., :self.mlp_hidden_dim_first // 2]
-        else:
-            mlp = self.mlp_act(mlp)
-        output = self.linear2(torch.cat((attn, mlp), 2))
+        output = self.linear2(torch.cat((attn, self.mlp_act(mlp)), 2))
        x += apply_mod(output, mod.gate, None, modulation_dims)
        if x.dtype == torch.float16:
            x = torch.nan_to_num(x, nan=0.0, posinf=65504, neginf=-65504)
@@ -325,11 +262,11 @@ class SingleStreamBlock(nn.Module):


 class LastLayer(nn.Module):
-    def __init__(self, hidden_size: int, patch_size: int, out_channels: int, bias=True, dtype=None, device=None, operations=None):
+    def __init__(self, hidden_size: int, patch_size: int, out_channels: int, dtype=None, device=None, operations=None):
        super().__init__()
        self.norm_final = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
-        self.linear = operations.Linear(hidden_size, patch_size * patch_size * out_channels, bias=bias, dtype=dtype, device=device)
-        self.adaLN_modulation = nn.Sequential(nn.SiLU(), operations.Linear(hidden_size, 2 * hidden_size, bias=bias, dtype=dtype, device=device))
+        self.linear = operations.Linear(hidden_size, patch_size * patch_size * out_channels, bias=True, dtype=dtype, device=device)
+        self.adaLN_modulation = nn.Sequential(nn.SiLU(), operations.Linear(hidden_size, 2 * hidden_size, bias=True, dtype=dtype, device=device))

    def forward(self, x: Tensor, vec: Tensor, modulation_dims=None) -> Tensor:
        if vec.ndim == 2:
--- a/comfy/ldm/flux/math.py
+++ b/comfy/ldm/flux/math.py
@@ -4,16 +4,23 @@ from torch import Tensor

 from comfy.ldm.modules.attention import optimized_attention
 import comfy.model_management
-import logging


 def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor, mask=None, transformer_options={}) -> Tensor:
+    q_shape = q.shape
+    k_shape = k.shape
+
    if pe is not None:
-        q, k = apply_rope(q, k, pe)
+        q = q.to(dtype=pe.dtype).reshape(*q.shape[:-1], -1, 1, 2)
+        k = k.to(dtype=pe.dtype).reshape(*k.shape[:-1], -1, 1, 2)
+        q = (pe[..., 0] * q[..., 0] + pe[..., 1] * q[..., 1]).reshape(*q_shape).type_as(v)
+        k = (pe[..., 0] * k[..., 0] + pe[..., 1] * k[..., 1]).reshape(*k_shape).type_as(v)
+
    heads = q.shape[1]
    x = optimized_attention(q, k, v, heads, skip_reshape=True, mask=mask, transformer_options=transformer_options)
    return x

+
 def rope(pos: Tensor, dim: int, theta: int) -> Tensor:
    assert dim % 2 == 0
    if comfy.model_management.is_device_mps(pos.device) or comfy.model_management.is_intel_xpu() or comfy.model_management.is_directml_enabled():
@@ -28,8 +35,7 @@ def rope(pos: Tensor, dim: int, theta: int) -> Tensor:
    out = rearrange(out, "b n d (i j) -> b n d i j", i=2, j=2)
    return out.to(dtype=torch.float32, device=pos.device)

-
-def _apply_rope1(x: Tensor, freqs_cis: Tensor):
+def apply_rope1(x: Tensor, freqs_cis: Tensor):
    x_ = x.to(dtype=freqs_cis.dtype).reshape(*x.shape[:-1], -1, 1, 2)

    x_out = freqs_cis[..., 0] * x_[..., 0]
@@ -37,26 +43,5 @@ def _apply_rope1(x: Tensor, freqs_cis: Tensor):

    return x_out.reshape(*x.shape).type_as(x)

-
-def _apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor):
+def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor):
    return apply_rope1(xq, freqs_cis), apply_rope1(xk, freqs_cis)
-
-
-try:
-    import comfy.quant_ops
-    q_apply_rope = comfy.quant_ops.ck.apply_rope
-    q_apply_rope1 = comfy.quant_ops.ck.apply_rope1
-    def apply_rope(xq, xk, freqs_cis):
-        if comfy.model_management.in_training:
-            return _apply_rope(xq, xk, freqs_cis)
-        else:
-            return apply_rope1(xq, freqs_cis), apply_rope1(xk, freqs_cis)
-    def apply_rope1(x, freqs_cis):
-        if comfy.model_management.in_training:
-            return _apply_rope1(x, freqs_cis)
-        else:
-            return q_apply_rope1(x, freqs_cis)
-except:
-    logging.warning("No comfy kitchen, using old apply_rope functions.")
-    apply_rope = _apply_rope
-    apply_rope1 = _apply_rope1
--- a/comfy/ldm/flux/model.py
+++ b/comfy/ldm/flux/model.py
@@ -7,6 +7,7 @@ from torch import Tensor, nn
 from einops import rearrange, repeat
 import comfy.ldm.common_dit
 import comfy.patcher_extension
+from comfy.ldm.flipflop_transformer import FlipFlopModule

 from .layers import (
    DoubleStreamBlock,
@@ -15,7 +16,6 @@ from .layers import (
    MLPEmbedder,
    SingleStreamBlock,
    timestep_embedding,
-    Modulation,
 )

@dataclass
@@ -34,23 +34,15 @@ class FluxParams:
    patch_size: int
    qkv_bias: bool
    guidance_embed: bool
-    txt_ids_dims: list
-    global_modulation: bool = False
-    mlp_silu_act: bool = False
-    ops_bias: bool = True
-    default_ref_method: str = "offset"
-    ref_index_scale: float = 1.0
-    yak_mlp: bool = False
-    txt_norm: bool = False


-class Flux(nn.Module):
+class Flux(FlipFlopModule):
    """
    Transformer model for flow matching on sequences.
    """

    def __init__(self, image_model=None, final_layer=True, dtype=None, device=None, operations=None, **kwargs):
-        super().__init__()
+        super().__init__(("double_blocks", "single_blocks"))
        self.dtype = dtype
        params = FluxParams(**kwargs)
        self.params = params
@@ -67,22 +59,13 @@ class Flux(nn.Module):
        self.hidden_size = params.hidden_size
        self.num_heads = params.num_heads
        self.pe_embedder = EmbedND(dim=pe_dim, theta=params.theta, axes_dim=params.axes_dim)
-        self.img_in = operations.Linear(self.in_channels, self.hidden_size, bias=params.ops_bias, dtype=dtype, device=device)
-        self.time_in = MLPEmbedder(in_dim=256, hidden_dim=self.hidden_size, bias=params.ops_bias, dtype=dtype, device=device, operations=operations)
-        if params.vec_in_dim is not None:
-            self.vector_in = MLPEmbedder(params.vec_in_dim, self.hidden_size, dtype=dtype, device=device, operations=operations)
-        else:
-            self.vector_in = None
-
+        self.img_in = operations.Linear(self.in_channels, self.hidden_size, bias=True, dtype=dtype, device=device)
+        self.time_in = MLPEmbedder(in_dim=256, hidden_dim=self.hidden_size, dtype=dtype, device=device, operations=operations)
+        self.vector_in = MLPEmbedder(params.vec_in_dim, self.hidden_size, dtype=dtype, device=device, operations=operations)
        self.guidance_in = (
-            MLPEmbedder(in_dim=256, hidden_dim=self.hidden_size, bias=params.ops_bias, dtype=dtype, device=device, operations=operations) if params.guidance_embed else nn.Identity()
+            MLPEmbedder(in_dim=256, hidden_dim=self.hidden_size, dtype=dtype, device=device, operations=operations) if params.guidance_embed else nn.Identity()
        )
-        self.txt_in = operations.Linear(params.context_in_dim, self.hidden_size, bias=params.ops_bias, dtype=dtype, device=device)
-
-        if params.txt_norm:
-            self.txt_norm = operations.RMSNorm(params.context_in_dim, dtype=dtype, device=device)
-        else:
-            self.txt_norm = None
+        self.txt_in = operations.Linear(params.context_in_dim, self.hidden_size, dtype=dtype, device=device)

        self.double_blocks = nn.ModuleList(
            [
@@ -91,10 +74,6 @@ class Flux(nn.Module):
                    self.num_heads,
                    mlp_ratio=params.mlp_ratio,
                    qkv_bias=params.qkv_bias,
-                    modulation=params.global_modulation is False,
-                    mlp_silu_act=params.mlp_silu_act,
-                    proj_bias=params.ops_bias,
-                    yak_mlp=params.yak_mlp,
                    dtype=dtype, device=device, operations=operations
                )
                for _ in range(params.depth)
@@ -103,30 +82,79 @@ class Flux(nn.Module):

        self.single_blocks = nn.ModuleList(
            [
-                SingleStreamBlock(self.hidden_size, self.num_heads, mlp_ratio=params.mlp_ratio, modulation=params.global_modulation is False, mlp_silu_act=params.mlp_silu_act, bias=params.ops_bias, yak_mlp=params.yak_mlp, dtype=dtype, device=device, operations=operations)
+                SingleStreamBlock(self.hidden_size, self.num_heads, mlp_ratio=params.mlp_ratio, dtype=dtype, device=device, operations=operations)
                for _ in range(params.depth_single_blocks)
            ]
        )

        if final_layer:
-            self.final_layer = LastLayer(self.hidden_size, 1, self.out_channels, bias=params.ops_bias, dtype=dtype, device=device, operations=operations)
+            self.final_layer = LastLayer(self.hidden_size, 1, self.out_channels, dtype=dtype, device=device, operations=operations)

-        if params.global_modulation:
-            self.double_stream_modulation_img = Modulation(
-                self.hidden_size,
-                double=True,
-                bias=False,
-                dtype=dtype, device=device, operations=operations
-            )
-            self.double_stream_modulation_txt = Modulation(
-                self.hidden_size,
-                double=True,
-                bias=False,
-                dtype=dtype, device=device, operations=operations
-            )
-            self.single_stream_modulation = Modulation(
-                self.hidden_size, double=False, bias=False, dtype=dtype, device=device, operations=operations
-            )
+    def indiv_double_block_fwd(self, i, block, img, txt, vec, pe, attn_mask, control, blocks_replace, transformer_options):
+        if ("double_block", i) in blocks_replace:
+            def block_wrap(args):
+                out = {}
+                out["img"], out["txt"] = block(img=args["img"],
+                                               txt=args["txt"],
+                                               vec=args["vec"],
+                                               pe=args["pe"],
+                                               attn_mask=args.get("attn_mask"),
+                                               transformer_options=args.get("transformer_options"))
+                return out
+
+            out = blocks_replace[("double_block", i)]({"img": img,
+                                                       "txt": txt,
+                                                       "vec": vec,
+                                                       "pe": pe,
+                                                       "attn_mask": attn_mask,
+                                                       "transformer_options": transformer_options},
+                                                       {"original_block": block_wrap})
+            txt = out["txt"]
+            img = out["img"]
+        else:
+            img, txt = block(img=img,
+                             txt=txt,
+                             vec=vec,
+                             pe=pe,
+                             attn_mask=attn_mask,
+                             transformer_options=transformer_options)
+
+        if control is not None: # Controlnet
+            control_i = control.get("input")
+            if i < len(control_i):
+                add = control_i[i]
+                if add is not None:
+                    img[:, :add.shape[1]] += add
+        return img, txt
+
+    def indiv_single_block_fwd(self, i, block, img, txt, vec, pe, attn_mask, control, blocks_replace, transformer_options):
+        if ("single_block", i) in blocks_replace:
+            def block_wrap(args):
+                out = {}
+                out["img"] = block(args["img"],
+                                   vec=args["vec"],
+                                   pe=args["pe"],
+                                   attn_mask=args.get("attn_mask"),
+                                   transformer_options=args.get("transformer_options"))
+                return out
+
+            out = blocks_replace[("single_block", i)]({"img": img,
+                                                       "vec": vec,
+                                                       "pe": pe,
+                                                       "attn_mask": attn_mask,
+                                                       "transformer_options": transformer_options},
+                                                       {"original_block": block_wrap})
+            img = out["img"]
+        else:
+            img = block(img, vec=vec, pe=pe, attn_mask=attn_mask, transformer_options=transformer_options)
+
+        if control is not None: # Controlnet
+            control_o = control.get("output")
+            if i < len(control_o):
+                add = control_o[i]
+                if add is not None:
+                    img[:, txt.shape[1] : txt.shape[1] + add.shape[1], ...] += add
+        return img

    def forward_orig(
        self,
@@ -142,6 +170,9 @@ class Flux(nn.Module):
        attn_mask: Tensor = None,
    ) -> Tensor:

+        if y is None:
+            y = torch.zeros((img.shape[0], self.params.vec_in_dim), device=img.device, dtype=img.dtype)
+
        patches = transformer_options.get("patches", {})
        patches_replace = transformer_options.get("patches_replace", {})
        if img.ndim != 3 or txt.ndim != 3:
@@ -154,19 +185,9 @@ class Flux(nn.Module):
            if guidance is not None:
                vec = vec + self.guidance_in(timestep_embedding(guidance, 256).to(img.dtype))

-        if self.vector_in is not None:
-            if y is None:
-                y = torch.zeros((img.shape[0], self.params.vec_in_dim), device=img.device, dtype=img.dtype)
-            vec = vec + self.vector_in(y[:, :self.params.vec_in_dim])
-
-        if self.txt_norm is not None:
-            txt = self.txt_norm(txt)
+        vec = vec + self.vector_in(y[:, :self.params.vec_in_dim])
        txt = self.txt_in(txt)

-        vec_orig = vec
-        if self.params.global_modulation:
-            vec = (self.double_stream_modulation_img(vec_orig), self.double_stream_modulation_txt(vec_orig))
-
        if "post_input" in patches:
            for p in patches["post_input"]:
                out = p({"img": img, "txt": txt, "img_ids": img_ids, "txt_ids": txt_ids})
@@ -182,90 +203,23 @@ class Flux(nn.Module):
            pe = None

        blocks_replace = patches_replace.get("dit", {})
-        transformer_options["total_blocks"] = len(self.double_blocks)
-        transformer_options["block_type"] = "double"
-        for i, block in enumerate(self.double_blocks):
-            transformer_options["block_index"] = i
-            if ("double_block", i) in blocks_replace:
-                def block_wrap(args):
-                    out = {}
-                    out["img"], out["txt"] = block(img=args["img"],
-                                                   txt=args["txt"],
-                                                   vec=args["vec"],
-                                                   pe=args["pe"],
-                                                   attn_mask=args.get("attn_mask"),
-                                                   transformer_options=args.get("transformer_options"))
-                    return out
-
-                out = blocks_replace[("double_block", i)]({"img": img,
-                                                           "txt": txt,
-                                                           "vec": vec,
-                                                           "pe": pe,
-                                                           "attn_mask": attn_mask,
-                                                           "transformer_options": transformer_options},
-                                                          {"original_block": block_wrap})
-                txt = out["txt"]
-                img = out["img"]
-            else:
-                img, txt = block(img=img,
-                                 txt=txt,
-                                 vec=vec,
-                                 pe=pe,
-                                 attn_mask=attn_mask,
-                                 transformer_options=transformer_options)
-
-            if control is not None: # Controlnet
-                control_i = control.get("input")
-                if i < len(control_i):
-                    add = control_i[i]
-                    if add is not None:
-                        img[:, :add.shape[1]] += add
+        # execute double blocks
+        img, txt = self.execute_blocks("double_blocks", self.indiv_double_block_fwd, (img, txt), vec, pe, attn_mask, control, blocks_replace, transformer_options)

        if img.dtype == torch.float16:
            img = torch.nan_to_num(img, nan=0.0, posinf=65504, neginf=-65504)

        img = torch.cat((txt, img), 1)

-        if self.params.global_modulation:
-            vec, _ = self.single_stream_modulation(vec_orig)
-
-        transformer_options["total_blocks"] = len(self.single_blocks)
-        transformer_options["block_type"] = "single"
-        for i, block in enumerate(self.single_blocks):
-            transformer_options["block_index"] = i
-            if ("single_block", i) in blocks_replace:
-                def block_wrap(args):
-                    out = {}
-                    out["img"] = block(args["img"],
-                                       vec=args["vec"],
-                                       pe=args["pe"],
-                                       attn_mask=args.get("attn_mask"),
-                                       transformer_options=args.get("transformer_options"))
-                    return out
-
-                out = blocks_replace[("single_block", i)]({"img": img,
-                                                           "vec": vec,
-                                                           "pe": pe,
-                                                           "attn_mask": attn_mask,
-                                                           "transformer_options": transformer_options},
-                                                          {"original_block": block_wrap})
-                img = out["img"]
-            else:
-                img = block(img, vec=vec, pe=pe, attn_mask=attn_mask, transformer_options=transformer_options)
-
-            if control is not None: # Controlnet
-                control_o = control.get("output")
-                if i < len(control_o):
-                    add = control_o[i]
-                    if add is not None:
-                        img[:, txt.shape[1] : txt.shape[1] + add.shape[1], ...] += add
+        # execute single blocks
+        img = self.execute_blocks("single_blocks", self.indiv_single_block_fwd, img, txt, vec, pe, attn_mask, control, blocks_replace, transformer_options)

        img = img[:, txt.shape[1] :, ...]

-        img = self.final_layer(img, vec_orig)  # (N, T, patch_size ** 2 * out_channels)
+        img = self.final_layer(img, vec)  # (N, T, patch_size ** 2 * out_channels)
        return img

-    def process_img(self, x, index=0, h_offset=0, w_offset=0, transformer_options={}):
+    def process_img(self, x, index=0, h_offset=0, w_offset=0):
        bs, c, h, w = x.shape
        patch_size = self.patch_size
        x = comfy.ldm.common_dit.pad_to_patch_size(x, (patch_size, patch_size))
@@ -277,22 +231,10 @@ class Flux(nn.Module):
        h_offset = ((h_offset + (patch_size // 2)) // patch_size)
        w_offset = ((w_offset + (patch_size // 2)) // patch_size)

-        steps_h = h_len
-        steps_w = w_len
-
-        rope_options = transformer_options.get("rope_options", None)
-        if rope_options is not None:
-            h_len = (h_len - 1.0) * rope_options.get("scale_y", 1.0) + 1.0
-            w_len = (w_len - 1.0) * rope_options.get("scale_x", 1.0) + 1.0
-
-            index += rope_options.get("shift_t", 0.0)
-            h_offset += rope_options.get("shift_y", 0.0)
-            w_offset += rope_options.get("shift_x", 0.0)
-
-        img_ids = torch.zeros((steps_h, steps_w, len(self.params.axes_dim)), device=x.device, dtype=torch.float32)
+        img_ids = torch.zeros((h_len, w_len, 3), device=x.device, dtype=x.dtype)
        img_ids[:, :, 0] = img_ids[:, :, 1] + index
-        img_ids[:, :, 1] = img_ids[:, :, 1] + torch.linspace(h_offset, h_len - 1 + h_offset, steps=steps_h, device=x.device, dtype=torch.float32).unsqueeze(1)
-        img_ids[:, :, 2] = img_ids[:, :, 2] + torch.linspace(w_offset, w_len - 1 + w_offset, steps=steps_w, device=x.device, dtype=torch.float32).unsqueeze(0)
+        img_ids[:, :, 1] = img_ids[:, :, 1] + torch.linspace(h_offset, h_len - 1 + h_offset, steps=h_len, device=x.device, dtype=x.dtype).unsqueeze(1)
+        img_ids[:, :, 2] = img_ids[:, :, 2] + torch.linspace(w_offset, w_len - 1 + w_offset, steps=w_len, device=x.device, dtype=x.dtype).unsqueeze(0)
        return img, repeat(img_ids, "h w c -> b (h w) c", b=bs)

    def forward(self, x, timestep, context, y=None, guidance=None, ref_latents=None, control=None, transformer_options={}, **kwargs):
@@ -308,16 +250,16 @@ class Flux(nn.Module):

        h_len = ((h_orig + (patch_size // 2)) // patch_size)
        w_len = ((w_orig + (patch_size // 2)) // patch_size)
-        img, img_ids = self.process_img(x, transformer_options=transformer_options)
+        img, img_ids = self.process_img(x)
        img_tokens = img.shape[1]
        if ref_latents is not None:
            h = 0
            w = 0
            index = 0
-            ref_latents_method = kwargs.get("ref_latents_method", self.params.default_ref_method)
+            ref_latents_method = kwargs.get("ref_latents_method", "offset")
            for ref in ref_latents:
                if ref_latents_method == "index":
-                    index += self.params.ref_index_scale
+                    index += 1
                    h_offset = 0
                    w_offset = 0
                elif ref_latents_method == "uxo":
@@ -341,12 +283,7 @@ class Flux(nn.Module):
                img = torch.cat([img, kontext], dim=1)
                img_ids = torch.cat([img_ids, kontext_ids], dim=1)

-        txt_ids = torch.zeros((bs, context.shape[1], len(self.params.axes_dim)), device=x.device, dtype=torch.float32)
-
-        if len(self.params.txt_ids_dims) > 0:
-            for i in self.params.txt_ids_dims:
-                txt_ids[:, :, i] = torch.linspace(0, context.shape[1] - 1, steps=context.shape[1], device=x.device, dtype=torch.float32)
-
+        txt_ids = torch.zeros((bs, context.shape[1], 3), device=x.device, dtype=x.dtype)
        out = self.forward_orig(img, img_ids, context, txt_ids, timestep, y, guidance, control, transformer_options, attn_mask=kwargs.get("attention_mask", None))
        out = out[:, :img_tokens]
-        return rearrange(out, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=self.patch_size, pw=self.patch_size)[:,:,:h_orig,:w_orig]
+        return rearrange(out, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=2, pw=2)[:,:,:h_orig,:w_orig]
--- a/comfy/ldm/hunyuan_video/model.py
+++ b/comfy/ldm/hunyuan_video/model.py
@@ -6,6 +6,7 @@ import comfy.ldm.flux.layers
 import comfy.ldm.modules.diffusionmodules.mmdit
 from comfy.ldm.modules.attention import optimized_attention

+
 from dataclasses import dataclass
 from einops import repeat

@@ -41,9 +42,6 @@ class HunyuanVideoParams:
    guidance_embed: bool
    byt5: bool
    meanflow: bool
-    use_cond_type_embedding: bool
-    vision_in_dim: int
-    meanflow_sum: bool


 class SelfAttentionRef(nn.Module):
@@ -159,10 +157,7 @@ class TokenRefiner(nn.Module):
        t = self.t_embedder(timestep_embedding(timesteps, 256, time_factor=1.0).to(x.dtype))
        # m = mask.float().unsqueeze(-1)
        # c = (x.float() * m).sum(dim=1) / m.sum(dim=1) #TODO: the following works when the x.shape is the same length as the tokens but might break otherwise
-        if x.dtype == torch.float16:
-            c = x.float().sum(dim=1) / x.shape[1]
-        else:
-            c = x.sum(dim=1) / x.shape[1]
+        c = x.sum(dim=1) / x.shape[1]

        c = t + self.c_embedder(c.to(x.dtype))
        x = self.input_embedder(x)
@@ -201,15 +196,11 @@ class HunyuanVideo(nn.Module):
    def __init__(self, image_model=None, final_layer=True, dtype=None, device=None, operations=None, **kwargs):
        super().__init__()
        self.dtype = dtype
-        operation_settings = {"operations": operations, "device": device, "dtype": dtype}
-
        params = HunyuanVideoParams(**kwargs)
        self.params = params
        self.patch_size = params.patch_size
        self.in_channels = params.in_channels
        self.out_channels = params.out_channels
-        self.use_cond_type_embedding = params.use_cond_type_embedding
-        self.vision_in_dim = params.vision_in_dim
        if params.hidden_size % params.num_heads != 0:
            raise ValueError(
                f"Hidden size {params.hidden_size} must be divisible by num_heads {params.num_heads}"
@@ -241,6 +232,7 @@ class HunyuanVideo(nn.Module):
                    self.num_heads,
                    mlp_ratio=params.mlp_ratio,
                    qkv_bias=params.qkv_bias,
+                    flipped_img_txt=True,
                    dtype=dtype, device=device, operations=operations
                )
                for _ in range(params.depth)
@@ -274,18 +266,6 @@ class HunyuanVideo(nn.Module):
        if final_layer:
            self.final_layer = LastLayer(self.hidden_size, self.patch_size[-1], self.out_channels, dtype=dtype, device=device, operations=operations)

-        # HunyuanVideo 1.5 specific modules
-        if self.vision_in_dim is not None:
-            from comfy.ldm.wan.model import MLPProj
-            self.vision_in = MLPProj(in_dim=self.vision_in_dim, out_dim=self.hidden_size, operation_settings=operation_settings)
-        else:
-            self.vision_in = None
-        if self.use_cond_type_embedding:
-            # 0: text_encoder feature 1: byt5 feature 2: vision_encoder feature
-            self.cond_type_embedding = nn.Embedding(3, self.hidden_size)
-        else:
-            self.cond_type_embedding = None
-
    def forward_orig(
        self,
        img: Tensor,
@@ -296,7 +276,6 @@ class HunyuanVideo(nn.Module):
        timesteps: Tensor,
        y: Tensor = None,
        txt_byt5=None,
-        clip_fea=None,
        guidance: Tensor = None,
        guiding_frame_index=None,
        ref_latent=None,
@@ -317,7 +296,7 @@ class HunyuanVideo(nn.Module):
                timesteps_r = transformer_options['sample_sigmas'][w[0] + 1]
                timesteps_r = timesteps_r.unsqueeze(0).to(device=timesteps.device, dtype=timesteps.dtype)
                vec_r = self.time_r_in(timestep_embedding(timesteps_r, 256, time_factor=1000.0).to(img.dtype))
-                vec = (vec + vec_r) if self.params.meanflow_sum else (vec + vec_r) / 2
+                vec = (vec + vec_r) / 2

        if ref_latent is not None:
            ref_latent_ids = self.img_ids(ref_latent)
@@ -352,47 +331,25 @@ class HunyuanVideo(nn.Module):

        txt = self.txt_in(txt, timesteps, txt_mask, transformer_options=transformer_options)

-        if self.cond_type_embedding is not None:
-            self.cond_type_embedding.to(txt.device)
-            cond_emb = self.cond_type_embedding(torch.zeros_like(txt[:, :, 0], device=txt.device, dtype=torch.long))
-            txt = txt + cond_emb.to(txt.dtype)
-
        if self.byt5_in is not None and txt_byt5 is not None:
            txt_byt5 = self.byt5_in(txt_byt5)
-            if self.cond_type_embedding is not None:
-                cond_emb = self.cond_type_embedding(torch.ones_like(txt_byt5[:, :, 0], device=txt_byt5.device, dtype=torch.long))
-                txt_byt5 = txt_byt5 + cond_emb.to(txt_byt5.dtype)
-                txt = torch.cat((txt_byt5, txt), dim=1) # byt5 first for HunyuanVideo1.5
-            else:
-                txt = torch.cat((txt, txt_byt5), dim=1)
            txt_byt5_ids = torch.zeros((txt_ids.shape[0], txt_byt5.shape[1], txt_ids.shape[-1]), device=txt_ids.device, dtype=txt_ids.dtype)
+            txt = torch.cat((txt, txt_byt5), dim=1)
            txt_ids = torch.cat((txt_ids, txt_byt5_ids), dim=1)

-        if clip_fea is not None:
-            txt_vision_states = self.vision_in(clip_fea)
-            if self.cond_type_embedding is not None:
-                cond_emb = self.cond_type_embedding(2 * torch.ones_like(txt_vision_states[:, :, 0], dtype=torch.long, device=txt_vision_states.device))
-                txt_vision_states = txt_vision_states + cond_emb
-            txt = torch.cat((txt_vision_states.to(txt.dtype), txt), dim=1)
-            extra_txt_ids = torch.zeros((txt_ids.shape[0], txt_vision_states.shape[1], txt_ids.shape[-1]), device=txt_ids.device, dtype=txt_ids.dtype)
-            txt_ids = torch.cat((txt_ids, extra_txt_ids), dim=1)
-
-        ids = torch.cat((txt_ids, img_ids), dim=1)
+        ids = torch.cat((img_ids, txt_ids), dim=1)
        pe = self.pe_embedder(ids)

        img_len = img.shape[1]
        if txt_mask is not None:
            attn_mask_len = img_len + txt.shape[1]
            attn_mask = torch.zeros((1, 1, attn_mask_len), dtype=img.dtype, device=img.device)
-            attn_mask[:, 0, :txt.shape[1]] = txt_mask
+            attn_mask[:, 0, img_len:] = txt_mask
        else:
            attn_mask = None

        blocks_replace = patches_replace.get("dit", {})
-        transformer_options["total_blocks"] = len(self.double_blocks)
-        transformer_options["block_type"] = "double"
        for i, block in enumerate(self.double_blocks):
-            transformer_options["block_index"] = i
            if ("double_block", i) in blocks_replace:
                def block_wrap(args):
                    out = {}
@@ -412,12 +369,9 @@ class HunyuanVideo(nn.Module):
                    if add is not None:
                        img += add

-        img = torch.cat((txt, img), 1)
+        img = torch.cat((img, txt), 1)

-        transformer_options["total_blocks"] = len(self.single_blocks)
-        transformer_options["block_type"] = "single"
        for i, block in enumerate(self.single_blocks):
-            transformer_options["block_index"] = i
            if ("single_block", i) in blocks_replace:
                def block_wrap(args):
                    out = {}
@@ -434,9 +388,9 @@ class HunyuanVideo(nn.Module):
                if i < len(control_o):
                    add = control_o[i]
                    if add is not None:
-                        img[:, txt.shape[1]: img_len + txt.shape[1]] += add
+                        img[:, : img_len] += add

-        img = img[:, txt.shape[1]: img_len + txt.shape[1]]
+        img = img[:, : img_len]
        if ref_latent is not None:
            img = img[:, ref_latent.shape[1]:]

@@ -476,14 +430,14 @@ class HunyuanVideo(nn.Module):
        img_ids[:, :, 1] = img_ids[:, :, 1] + torch.linspace(0, w_len - 1, steps=w_len, device=x.device, dtype=x.dtype).unsqueeze(0)
        return repeat(img_ids, "h w c -> b (h w) c", b=bs)

-    def forward(self, x, timestep, context, y=None, txt_byt5=None, clip_fea=None, guidance=None, attention_mask=None, guiding_frame_index=None, ref_latent=None, disable_time_r=False, control=None, transformer_options={}, **kwargs):
+    def forward(self, x, timestep, context, y=None, txt_byt5=None, guidance=None, attention_mask=None, guiding_frame_index=None, ref_latent=None, disable_time_r=False, control=None, transformer_options={}, **kwargs):
        return comfy.patcher_extension.WrapperExecutor.new_class_executor(
            self._forward,
            self,
            comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, transformer_options)
-        ).execute(x, timestep, context, y, txt_byt5, clip_fea, guidance, attention_mask, guiding_frame_index, ref_latent, disable_time_r, control, transformer_options, **kwargs)
+        ).execute(x, timestep, context, y, txt_byt5, guidance, attention_mask, guiding_frame_index, ref_latent, disable_time_r, control, transformer_options, **kwargs)

-    def _forward(self, x, timestep, context, y=None, txt_byt5=None, clip_fea=None, guidance=None, attention_mask=None, guiding_frame_index=None, ref_latent=None, disable_time_r=False, control=None, transformer_options={}, **kwargs):
+    def _forward(self, x, timestep, context, y=None, txt_byt5=None, guidance=None, attention_mask=None, guiding_frame_index=None, ref_latent=None, disable_time_r=False, control=None, transformer_options={}, **kwargs):
        bs = x.shape[0]
        if len(self.patch_size) == 3:
            img_ids = self.img_ids(x)
@@ -491,5 +445,5 @@ class HunyuanVideo(nn.Module):
        else:
            img_ids = self.img_ids_2d(x)
            txt_ids = torch.zeros((bs, context.shape[1], 2), device=x.device, dtype=x.dtype)
-        out = self.forward_orig(x, img_ids, context, txt_ids, attention_mask, timestep, y, txt_byt5, clip_fea, guidance, guiding_frame_index, ref_latent, disable_time_r=disable_time_r, control=control, transformer_options=transformer_options)
+        out = self.forward_orig(x, img_ids, context, txt_ids, attention_mask, timestep, y, txt_byt5, guidance, guiding_frame_index, ref_latent, disable_time_r=disable_time_r, control=control, transformer_options=transformer_options)
        return out
--- a/comfy/ldm/hunyuan_video/upsampler.py
+++ b/comfy/ldm/hunyuan_video/upsampler.py
@@ -1,122 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from comfy.ldm.modules.diffusionmodules.model import ResnetBlock, VideoConv3d
-from comfy.ldm.hunyuan_video.vae_refiner import RMS_norm
-import comfy.model_management
-import comfy.model_patcher
-
-class SRResidualCausalBlock3D(nn.Module):
-    def __init__(self, channels: int):
-        super().__init__()
-        self.block = nn.Sequential(
-            VideoConv3d(channels, channels, kernel_size=3),
-            nn.SiLU(inplace=True),
-            VideoConv3d(channels, channels, kernel_size=3),
-            nn.SiLU(inplace=True),
-            VideoConv3d(channels, channels, kernel_size=3),
-        )
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        return x + self.block(x)
-
-class SRModel3DV2(nn.Module):
-    def __init__(
-        self,
-        in_channels: int,
-        out_channels: int,
-        hidden_channels: int = 64,
-        num_blocks: int = 6,
-        global_residual: bool = False,
-    ):
-        super().__init__()
-        self.in_conv = VideoConv3d(in_channels, hidden_channels, kernel_size=3)
-        self.blocks = nn.ModuleList([SRResidualCausalBlock3D(hidden_channels) for _ in range(num_blocks)])
-        self.out_conv = VideoConv3d(hidden_channels, out_channels, kernel_size=3)
-        self.global_residual = bool(global_residual)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        residual = x
-        y = self.in_conv(x)
-        for blk in self.blocks:
-            y = blk(y)
-        y = self.out_conv(y)
-        if self.global_residual and (y.shape == residual.shape):
-            y = y + residual
-        return y
-
-
-class Upsampler(nn.Module):
-    def __init__(
-        self,
-        z_channels: int,
-        out_channels: int,
-        block_out_channels: tuple[int, ...],
-        num_res_blocks: int = 2,
-    ):
-        super().__init__()
-        self.num_res_blocks = num_res_blocks
-        self.block_out_channels = block_out_channels
-        self.z_channels = z_channels
-
-        ch = block_out_channels[0]
-        self.conv_in = VideoConv3d(z_channels, ch, kernel_size=3)
-
-        self.up = nn.ModuleList()
-
-        for i, tgt in enumerate(block_out_channels):
-            stage = nn.Module()
-            stage.block = nn.ModuleList([ResnetBlock(in_channels=ch if j == 0 else tgt,
-                                                    out_channels=tgt,
-                                                    temb_channels=0,
-                                                    conv_shortcut=False,
-                                                    conv_op=VideoConv3d, norm_op=RMS_norm)
-                                        for j in range(num_res_blocks + 1)])
-            ch = tgt
-            self.up.append(stage)
-
-        self.norm_out = RMS_norm(ch)
-        self.conv_out = VideoConv3d(ch, out_channels, kernel_size=3)
-
-    def forward(self, z):
-        """
-        Args:
-            z: (B, C, T, H, W)
-            target_shape: (H, W)
-        """
-        # z to block_in
-        repeats = self.block_out_channels[0] // (self.z_channels)
-        x = self.conv_in(z) + z.repeat_interleave(repeats=repeats, dim=1)
-
-        # upsampling
-        for stage in self.up:
-            for blk in stage.block:
-                x = blk(x)
-
-        out = self.conv_out(F.silu(self.norm_out(x)))
-        return out
-
-UPSAMPLERS = {
-    "720p": SRModel3DV2,
-    "1080p": Upsampler,
-}
-
-class HunyuanVideo15SRModel():
-    def __init__(self, model_type, config):
-        self.load_device = comfy.model_management.vae_device()
-        offload_device = comfy.model_management.vae_offload_device()
-        self.dtype = comfy.model_management.vae_dtype(self.load_device)
-        self.model_class = UPSAMPLERS.get(model_type)
-        self.model = self.model_class(**config).eval()
-
-        self.patcher = comfy.model_patcher.CoreModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device)
-
-    def load_sd(self, sd):
-        return self.model.load_state_dict(sd, strict=True, assign=self.patcher.is_dynamic())
-
-    def get_sd(self):
-        return self.model.state_dict()
-
-    def resample_latent(self, latent):
-        comfy.model_management.load_model_gpu(self.patcher)
-        return self.model(latent.to(self.load_device))
--- a/comfy/ldm/hunyuan_video/vae_refiner.py
+++ b/comfy/ldm/hunyuan_video/vae_refiner.py
@@ -1,13 +1,11 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from comfy.ldm.modules.diffusionmodules.model import ResnetBlock, AttnBlock, CarriedConv3d, Normalize, conv_carry_causal_3d, torch_cat_if_needed
+from comfy.ldm.modules.diffusionmodules.model import ResnetBlock, AttnBlock, VideoConv3d, Normalize
 import comfy.ops
 import comfy.ldm.models.autoencoder
-import comfy.model_management
 ops = comfy.ops.disable_weight_init

-
 class RMS_norm(nn.Module):
    def __init__(self, dim):
        super().__init__()
@@ -16,10 +14,10 @@ class RMS_norm(nn.Module):
        self.gamma = nn.Parameter(torch.empty(shape))

    def forward(self, x):
-        return F.normalize(x, dim=1) * self.scale * comfy.model_management.cast_to(self.gamma, dtype=x.dtype, device=x.device)
+        return F.normalize(x, dim=1) * self.scale * self.gamma

 class DnSmpl(nn.Module):
-    def __init__(self, ic, oc, tds, refiner_vae, op):
+    def __init__(self, ic, oc, tds=True, refiner_vae=True, op=VideoConv3d):
        super().__init__()
        fct = 2 * 2 * 2 if tds else 1 * 2 * 2
        assert oc % fct == 0
@@ -29,12 +27,11 @@ class DnSmpl(nn.Module):
        self.tds = tds
        self.gs = fct * ic // oc

-    def forward(self, x, conv_carry_in=None, conv_carry_out=None):
+    def forward(self, x):
        r1 = 2 if self.tds else 1
-        h = conv_carry_causal_3d([x], self.conv, conv_carry_in, conv_carry_out)
-
-        if self.tds and self.refiner_vae and conv_carry_in is None:
+        h = self.conv(x)

+        if self.tds and self.refiner_vae:
            hf = h[:, :, :1, :, :]
            b, c, f, ht, wd = hf.shape
            hf = hf.reshape(b, c, f, ht // 2, 2, wd // 2, 2)
@@ -42,7 +39,14 @@ class DnSmpl(nn.Module):
            hf = hf.reshape(b, 2 * 2 * c, f, ht // 2, wd // 2)
            hf = torch.cat([hf, hf], dim=1)

-            h = h[:, :, 1:, :, :]
+            hn = h[:, :, 1:, :, :]
+            b, c, frms, ht, wd = hn.shape
+            nf = frms // r1
+            hn = hn.reshape(b, c, nf, r1, ht // 2, 2, wd // 2, 2)
+            hn = hn.permute(0, 3, 5, 7, 1, 2, 4, 6)
+            hn = hn.reshape(b, r1 * 2 * 2 * c, nf, ht // 2, wd // 2)
+
+            h = torch.cat([hf, hn], dim=2)

            xf = x[:, :, :1, :, :]
            b, ci, f, ht, wd = xf.shape
@@ -50,36 +54,38 @@ class DnSmpl(nn.Module):
            xf = xf.permute(0, 4, 6, 1, 2, 3, 5)
            xf = xf.reshape(b, 2 * 2 * ci, f, ht // 2, wd // 2)
            B, C, T, H, W = xf.shape
-            xf = xf.view(B, hf.shape[1], self.gs // 2, T, H, W).mean(dim=2)
+            xf = xf.view(B, h.shape[1], self.gs // 2, T, H, W).mean(dim=2)

-            x = x[:, :, 1:, :, :]
+            xn = x[:, :, 1:, :, :]
+            b, ci, frms, ht, wd = xn.shape
+            nf = frms // r1
+            xn = xn.reshape(b, ci, nf, r1, ht // 2, 2, wd // 2, 2)
+            xn = xn.permute(0, 3, 5, 7, 1, 2, 4, 6)
+            xn = xn.reshape(b, r1 * 2 * 2 * ci, nf, ht // 2, wd // 2)
+            B, C, T, H, W = xn.shape
+            xn = xn.view(B, h.shape[1], self.gs, T, H, W).mean(dim=2)
+            sc = torch.cat([xf, xn], dim=2)
+        else:
+            b, c, frms, ht, wd = h.shape

-        if h.shape[2] == 0:
-            return hf + xf
+            nf = frms // r1
+            h = h.reshape(b, c, nf, r1, ht // 2, 2, wd // 2, 2)
+            h = h.permute(0, 3, 5, 7, 1, 2, 4, 6)
+            h = h.reshape(b, r1 * 2 * 2 * c, nf, ht // 2, wd // 2)

-        b, c, frms, ht, wd = h.shape
-        nf = frms // r1
-        h = h.reshape(b, c, nf, r1, ht // 2, 2, wd // 2, 2)
-        h = h.permute(0, 3, 5, 7, 1, 2, 4, 6)
-        h = h.reshape(b, r1 * 2 * 2 * c, nf, ht // 2, wd // 2)
+            b, ci, frms, ht, wd = x.shape
+            nf = frms // r1
+            sc = x.reshape(b, ci, nf, r1, ht // 2, 2, wd // 2, 2)
+            sc = sc.permute(0, 3, 5, 7, 1, 2, 4, 6)
+            sc = sc.reshape(b, r1 * 2 * 2 * ci, nf, ht // 2, wd // 2)
+            B, C, T, H, W = sc.shape
+            sc = sc.view(B, h.shape[1], self.gs, T, H, W).mean(dim=2)

-        b, ci, frms, ht, wd = x.shape
-        nf = frms // r1
-        x = x.reshape(b, ci, nf, r1, ht // 2, 2, wd // 2, 2)
-        x = x.permute(0, 3, 5, 7, 1, 2, 4, 6)
-        x = x.reshape(b, r1 * 2 * 2 * ci, nf, ht // 2, wd // 2)
-        B, C, T, H, W = x.shape
-        x = x.view(B, h.shape[1], self.gs, T, H, W).mean(dim=2)
-
-        if self.tds and self.refiner_vae and conv_carry_in is None:
-            h = torch.cat([hf, h], dim=2)
-            x = torch.cat([xf, x], dim=2)
-
-        return h + x
+        return h + sc


 class UpSmpl(nn.Module):
-    def __init__(self, ic, oc, tus, refiner_vae, op):
+    def __init__(self, ic, oc, tus=True, refiner_vae=True, op=VideoConv3d):
        super().__init__()
        fct = 2 * 2 * 2 if tus else 1 * 2 * 2
        self.conv = op(ic, oc * fct, kernel_size=3, stride=1, padding=1)
@@ -88,11 +94,11 @@ class UpSmpl(nn.Module):
        self.tus = tus
        self.rp = fct * oc // ic

-    def forward(self, x, conv_carry_in=None, conv_carry_out=None):
+    def forward(self, x):
        r1 = 2 if self.tus else 1
-        h = conv_carry_causal_3d([x], self.conv, conv_carry_in, conv_carry_out)
+        h = self.conv(x)

-        if self.tus and self.refiner_vae and conv_carry_in is None:
+        if self.tus and self.refiner_vae:
            hf = h[:, :, :1, :, :]
            b, c, f, ht, wd = hf.shape
            nc = c // (2 * 2)
@@ -101,7 +107,14 @@ class UpSmpl(nn.Module):
            hf = hf.reshape(b, nc, f, ht * 2, wd * 2)
            hf = hf[:, : hf.shape[1] // 2]

-            h = h[:, :, 1:, :, :]
+            hn = h[:, :, 1:, :, :]
+            b, c, frms, ht, wd = hn.shape
+            nc = c // (r1 * 2 * 2)
+            hn = hn.reshape(b, r1, 2, 2, nc, frms, ht, wd)
+            hn = hn.permute(0, 4, 5, 1, 6, 2, 7, 3)
+            hn = hn.reshape(b, nc, frms * r1, ht * 2, wd * 2)
+
+            h = torch.cat([hf, hn], dim=2)

            xf = x[:, :, :1, :, :]
            b, ci, f, ht, wd = xf.shape
@@ -112,26 +125,29 @@ class UpSmpl(nn.Module):
            xf = xf.permute(0, 3, 4, 5, 1, 6, 2)
            xf = xf.reshape(b, nc, f, ht * 2, wd * 2)

-            x = x[:, :, 1:, :, :]
+            xn = x[:, :, 1:, :, :]
+            xn = xn.repeat_interleave(repeats=self.rp, dim=1)
+            b, c, frms, ht, wd = xn.shape
+            nc = c // (r1 * 2 * 2)
+            xn = xn.reshape(b, r1, 2, 2, nc, frms, ht, wd)
+            xn = xn.permute(0, 4, 5, 1, 6, 2, 7, 3)
+            xn = xn.reshape(b, nc, frms * r1, ht * 2, wd * 2)
+            sc = torch.cat([xf, xn], dim=2)
+        else:
+            b, c, frms, ht, wd = h.shape
+            nc = c // (r1 * 2 * 2)
+            h = h.reshape(b, r1, 2, 2, nc, frms, ht, wd)
+            h = h.permute(0, 4, 5, 1, 6, 2, 7, 3)
+            h = h.reshape(b, nc, frms * r1, ht * 2, wd * 2)

-        b, c, frms, ht, wd = h.shape
-        nc = c // (r1 * 2 * 2)
-        h = h.reshape(b, r1, 2, 2, nc, frms, ht, wd)
-        h = h.permute(0, 4, 5, 1, 6, 2, 7, 3)
-        h = h.reshape(b, nc, frms * r1, ht * 2, wd * 2)
+            sc = x.repeat_interleave(repeats=self.rp, dim=1)
+            b, c, frms, ht, wd = sc.shape
+            nc = c // (r1 * 2 * 2)
+            sc = sc.reshape(b, r1, 2, 2, nc, frms, ht, wd)
+            sc = sc.permute(0, 4, 5, 1, 6, 2, 7, 3)
+            sc = sc.reshape(b, nc, frms * r1, ht * 2, wd * 2)

-        x = x.repeat_interleave(repeats=self.rp, dim=1)
-        b, c, frms, ht, wd = x.shape
-        nc = c // (r1 * 2 * 2)
-        x = x.reshape(b, r1, 2, 2, nc, frms, ht, wd)
-        x = x.permute(0, 4, 5, 1, 6, 2, 7, 3)
-        x = x.reshape(b, nc, frms * r1, ht * 2, wd * 2)
-
-        if self.tus and self.refiner_vae and conv_carry_in is None:
-            h = torch.cat([hf, h], dim=2)
-            x = torch.cat([xf, x], dim=2)
-
-        return h + x
+        return h + sc

 class Encoder(nn.Module):
    def __init__(self, in_channels, z_channels, block_out_channels, num_res_blocks,
@@ -144,7 +160,7 @@ class Encoder(nn.Module):

        self.refiner_vae = refiner_vae
        if self.refiner_vae:
-            conv_op = CarriedConv3d
+            conv_op = VideoConv3d
            norm_op = RMS_norm
        else:
            conv_op = ops.Conv3d
@@ -172,9 +188,9 @@ class Encoder(nn.Module):
            self.down.append(stage)

        self.mid = nn.Module()
-        self.mid.block_1 = ResnetBlock(in_channels=ch, out_channels=ch, conv_op=conv_op, norm_op=norm_op)
+        self.mid.block_1 = ResnetBlock(in_channels=ch, out_channels=ch, temb_channels=0, conv_op=conv_op, norm_op=norm_op)
        self.mid.attn_1 = AttnBlock(ch, conv_op=ops.Conv3d, norm_op=norm_op)
-        self.mid.block_2 = ResnetBlock(in_channels=ch, out_channels=ch, conv_op=conv_op, norm_op=norm_op)
+        self.mid.block_2 = ResnetBlock(in_channels=ch, out_channels=ch, temb_channels=0, conv_op=conv_op, norm_op=norm_op)

        self.norm_out = norm_op(ch)
        self.conv_out = conv_op(ch, z_channels << 1, 3, 1, 1)
@@ -185,48 +201,31 @@ class Encoder(nn.Module):
        if not self.refiner_vae and x.shape[2] == 1:
            x = x.expand(-1, -1, self.ffactor_temporal, -1, -1)

-        if self.refiner_vae:
-            xl = [x[:, :, :1, :, :]]
-            if x.shape[2] > self.ffactor_temporal:
-                xl += torch.split(x[:, :, 1: 1 + ((x.shape[2] - 1) // self.ffactor_temporal) * self.ffactor_temporal, :, :], self.ffactor_temporal * 2, dim=2)
-            x = xl
-        else:
-            x = [x]
-        out = []
+        x = self.conv_in(x)

-        conv_carry_in = None
+        for stage in self.down:
+            for blk in stage.block:
+                x = blk(x)
+            if hasattr(stage, 'downsample'):
+                x = stage.downsample(x)

-        for i, x1 in enumerate(x):
-            conv_carry_out = []
-            if i == len(x) - 1:
-                conv_carry_out = None
-
-            x1 = [ x1 ]
-            x1 = conv_carry_causal_3d(x1, self.conv_in, conv_carry_in, conv_carry_out)
-
-            for stage in self.down:
-                for blk in stage.block:
-                    x1 = blk(x1, None, conv_carry_in, conv_carry_out)
-                if hasattr(stage, 'downsample'):
-                    x1 = stage.downsample(x1, conv_carry_in, conv_carry_out)
-
-            out.append(x1)
-            conv_carry_in = conv_carry_out
-
-        out = torch_cat_if_needed(out, dim=2)
-
-        x = self.mid.block_2(self.mid.attn_1(self.mid.block_1(out)))
-        del out
+        x = self.mid.block_2(self.mid.attn_1(self.mid.block_1(x)))

        b, c, t, h, w = x.shape
        grp = c // (self.z_channels << 1)
        skip = x.view(b, c // grp, grp, t, h, w).mean(2)

-        out = conv_carry_causal_3d([F.silu(self.norm_out(x))], self.conv_out) + skip
+        out = self.conv_out(F.silu(self.norm_out(x))) + skip

        if self.refiner_vae:
            out = self.regul(out)[0]

+            out = torch.cat((out[:, :, :1], out), dim=2)
+            out = out.permute(0, 2, 1, 3, 4)
+            b, f_times_2, c, h, w = out.shape
+            out = out.reshape(b, f_times_2 // 2, 2 * c, h, w)
+            out = out.permute(0, 2, 1, 3, 4).contiguous()
+
        return out

 class Decoder(nn.Module):
@@ -240,7 +239,7 @@ class Decoder(nn.Module):

        self.refiner_vae = refiner_vae
        if self.refiner_vae:
-            conv_op = CarriedConv3d
+            conv_op = VideoConv3d
            norm_op = RMS_norm
        else:
            conv_op = ops.Conv3d
@@ -250,9 +249,9 @@ class Decoder(nn.Module):
        self.conv_in = conv_op(z_channels, ch, kernel_size=3, stride=1, padding=1)

        self.mid = nn.Module()
-        self.mid.block_1 = ResnetBlock(in_channels=ch, out_channels=ch, conv_op=conv_op, norm_op=norm_op)
+        self.mid.block_1 = ResnetBlock(in_channels=ch, out_channels=ch, temb_channels=0, conv_op=conv_op, norm_op=norm_op)
        self.mid.attn_1 = AttnBlock(ch, conv_op=ops.Conv3d, norm_op=norm_op)
-        self.mid.block_2 = ResnetBlock(in_channels=ch, out_channels=ch,  conv_op=conv_op, norm_op=norm_op)
+        self.mid.block_2 = ResnetBlock(in_channels=ch, out_channels=ch, temb_channels=0, conv_op=conv_op, norm_op=norm_op)

        self.up = nn.ModuleList()
        depth = (ffactor_spatial >> 1).bit_length()
@@ -276,38 +275,27 @@ class Decoder(nn.Module):
        self.conv_out = conv_op(ch, out_channels, 3, stride=1, padding=1)

    def forward(self, z):
-        x = conv_carry_causal_3d([z], self.conv_in) + z.repeat_interleave(self.block_out_channels[0] // self.z_channels, 1)
+        if self.refiner_vae:
+            z = z.permute(0, 2, 1, 3, 4)
+            b, f, c, h, w = z.shape
+            z = z.reshape(b, f, 2, c // 2, h, w)
+            z = z.permute(0, 1, 2, 3, 4, 5).reshape(b, f * 2, c // 2, h, w)
+            z = z.permute(0, 2, 1, 3, 4)
+            z = z[:, :, 1:]
+
+        x = self.conv_in(z) + z.repeat_interleave(self.block_out_channels[0] // self.z_channels, 1)
        x = self.mid.block_2(self.mid.attn_1(self.mid.block_1(x)))

-        if self.refiner_vae:
-            x = torch.split(x, 2, dim=2)
-        else:
-            x = [ x ]
-        out = []
+        for stage in self.up:
+            for blk in stage.block:
+                x = blk(x)
+            if hasattr(stage, 'upsample'):
+                x = stage.upsample(x)

-        conv_carry_in = None
-
-        for i, x1 in enumerate(x):
-            conv_carry_out = []
-            if i == len(x) - 1:
-                conv_carry_out = None
-            for stage in self.up:
-                for blk in stage.block:
-                    x1 = blk(x1, None, conv_carry_in, conv_carry_out)
-                if hasattr(stage, 'upsample'):
-                    x1 = stage.upsample(x1, conv_carry_in, conv_carry_out)
-
-            x1 = [ F.silu(self.norm_out(x1)) ]
-            x1 = conv_carry_causal_3d(x1, self.conv_out, conv_carry_in, conv_carry_out)
-            out.append(x1)
-            conv_carry_in = conv_carry_out
-        del x
-
-        out = torch_cat_if_needed(out, dim=2)
+        out = self.conv_out(F.silu(self.norm_out(x)))

        if not self.refiner_vae:
            if z.shape[-3] == 1:
                out = out[:, :, -1:]

        return out
-
--- a/comfy/ldm/kandinsky5/model.py
+++ b/comfy/ldm/kandinsky5/model.py
@@ -1,413 +0,0 @@
-import torch
-from torch import nn
-import math
-
-import comfy.ldm.common_dit
-from comfy.ldm.modules.attention import optimized_attention
-from comfy.ldm.flux.math import apply_rope1
-from comfy.ldm.flux.layers import EmbedND
-
-def attention(q, k, v, heads, transformer_options={}):
-    return optimized_attention(
-        q.transpose(1, 2),
-        k.transpose(1, 2),
-        v.transpose(1, 2),
-        heads=heads,
-        skip_reshape=True,
-        transformer_options=transformer_options
-    )
-
-def apply_scale_shift_norm(norm, x, scale, shift):
-    return torch.addcmul(shift, norm(x), scale + 1.0)
-
-def apply_gate_sum(x, out, gate):
-    return torch.addcmul(x, gate, out)
-
-def get_shift_scale_gate(params):
-    shift, scale, gate = torch.chunk(params, 3, dim=-1)
-    return tuple(x.unsqueeze(1) for x in (shift, scale, gate))
-
-def get_freqs(dim, max_period=10000.0):
-    return torch.exp(-math.log(max_period) * torch.arange(start=0, end=dim, dtype=torch.float32) / dim)
-
-
-class TimeEmbeddings(nn.Module):
-    def __init__(self, model_dim, time_dim, max_period=10000.0, operation_settings=None):
-        super().__init__()
-        assert model_dim % 2 == 0
-        self.model_dim = model_dim
-        self.max_period = max_period
-        self.register_buffer("freqs", get_freqs(model_dim // 2, max_period), persistent=False)
-        operations = operation_settings.get("operations")
-        self.in_layer = operations.Linear(model_dim, time_dim, bias=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
-        self.activation = nn.SiLU()
-        self.out_layer = operations.Linear(time_dim, time_dim, bias=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
-
-    def forward(self, timestep, dtype):
-        args = torch.outer(timestep, self.freqs.to(device=timestep.device))
-        time_embed = torch.cat([torch.cos(args), torch.sin(args)], dim=-1).to(dtype)
-        time_embed = self.out_layer(self.activation(self.in_layer(time_embed)))
-        return time_embed
-
-
-class TextEmbeddings(nn.Module):
-    def __init__(self, text_dim, model_dim, operation_settings=None):
-        super().__init__()
-        operations = operation_settings.get("operations")
-        self.in_layer = operations.Linear(text_dim, model_dim, bias=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
-        self.norm = operations.LayerNorm(model_dim, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
-
-    def forward(self, text_embed):
-        text_embed = self.in_layer(text_embed)
-        return self.norm(text_embed).type_as(text_embed)
-
-
-class VisualEmbeddings(nn.Module):
-    def __init__(self, visual_dim, model_dim, patch_size, operation_settings=None):
-        super().__init__()
-        self.patch_size = patch_size
-        operations = operation_settings.get("operations")
-        self.in_layer = operations.Linear(visual_dim, model_dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
-
-    def forward(self, x):
-        x = x.movedim(1, -1)  # B C T H W -> B T H W C
-        B, T, H, W, dim = x.shape
-        pt, ph, pw = self.patch_size
-
-        x = x.view(
-            B,
-            T // pt, pt,
-            H // ph, ph,
-            W // pw, pw,
-            dim,
-        ).permute(0, 1, 3, 5, 2, 4, 6, 7).flatten(4, 7)
-
-        return self.in_layer(x)
-
-
-class Modulation(nn.Module):
-    def __init__(self, time_dim, model_dim, num_params, operation_settings=None):
-        super().__init__()
-        self.activation = nn.SiLU()
-        self.out_layer = operation_settings.get("operations").Linear(time_dim, num_params * model_dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
-
-    def forward(self, x):
-        return self.out_layer(self.activation(x))
-
-
-class SelfAttention(nn.Module):
-    def __init__(self, num_channels, head_dim, operation_settings=None):
-        super().__init__()
-        assert num_channels % head_dim == 0
-        self.num_heads = num_channels // head_dim
-        self.head_dim = head_dim
-
-        operations = operation_settings.get("operations")
-        self.to_query = operations.Linear(num_channels, num_channels, bias=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
-        self.to_key = operations.Linear(num_channels, num_channels, bias=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
-        self.to_value = operations.Linear(num_channels, num_channels, bias=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
-        self.query_norm = operations.RMSNorm(head_dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
-        self.key_norm = operations.RMSNorm(head_dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
-
-        self.out_layer = operations.Linear(num_channels, num_channels, bias=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
-        self.num_chunks = 2
-
-    def _compute_qk(self, x, freqs, proj_fn, norm_fn):
-        result = proj_fn(x).view(*x.shape[:-1], self.num_heads, -1)
-        return apply_rope1(norm_fn(result), freqs)
-
-    def _forward(self, x, freqs, transformer_options={}):
-        q = self._compute_qk(x, freqs, self.to_query, self.query_norm)
-        k = self._compute_qk(x, freqs, self.to_key, self.key_norm)
-        v = self.to_value(x).view(*x.shape[:-1], self.num_heads, -1)
-        out = attention(q, k, v, self.num_heads, transformer_options=transformer_options)
-        return self.out_layer(out)
-
-    def _forward_chunked(self, x, freqs, transformer_options={}):
-        def process_chunks(proj_fn, norm_fn):
-            x_chunks = torch.chunk(x, self.num_chunks, dim=1)
-            freqs_chunks = torch.chunk(freqs, self.num_chunks, dim=1)
-            chunks = []
-            for x_chunk, freqs_chunk in zip(x_chunks, freqs_chunks):
-                chunks.append(self._compute_qk(x_chunk, freqs_chunk, proj_fn, norm_fn))
-            return torch.cat(chunks, dim=1)
-
-        q = process_chunks(self.to_query, self.query_norm)
-        k = process_chunks(self.to_key, self.key_norm)
-        v = self.to_value(x).view(*x.shape[:-1], self.num_heads, -1)
-        out = attention(q, k, v, self.num_heads, transformer_options=transformer_options)
-        return self.out_layer(out)
-
-    def forward(self, x, freqs, transformer_options={}):
-        if x.shape[1] > 8192:
-            return self._forward_chunked(x, freqs, transformer_options=transformer_options)
-        else:
-            return self._forward(x, freqs, transformer_options=transformer_options)
-
-
-class CrossAttention(SelfAttention):
-    def get_qkv(self, x, context):
-        q = self.to_query(x).view(*x.shape[:-1], self.num_heads, -1)
-        k = self.to_key(context).view(*context.shape[:-1], self.num_heads, -1)
-        v = self.to_value(context).view(*context.shape[:-1], self.num_heads, -1)
-        return q, k, v
-
-    def forward(self, x, context, transformer_options={}):
-        q, k, v = self.get_qkv(x, context)
-        out = attention(self.query_norm(q), self.key_norm(k), v, self.num_heads, transformer_options=transformer_options)
-        return self.out_layer(out)
-
-
-class FeedForward(nn.Module):
-    def __init__(self, dim, ff_dim, operation_settings=None):
-        super().__init__()
-        operations = operation_settings.get("operations")
-        self.in_layer = operations.Linear(dim, ff_dim, bias=False, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
-        self.activation = nn.GELU()
-        self.out_layer = operations.Linear(ff_dim, dim, bias=False, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
-        self.num_chunks = 4
-
-    def _forward(self, x):
-        return self.out_layer(self.activation(self.in_layer(x)))
-
-    def _forward_chunked(self, x):
-        chunks = torch.chunk(x, self.num_chunks, dim=1)
-        output_chunks = []
-        for chunk in chunks:
-            output_chunks.append(self._forward(chunk))
-        return torch.cat(output_chunks, dim=1)
-
-    def forward(self, x):
-        if x.shape[1] > 8192:
-            return self._forward_chunked(x)
-        else:
-            return self._forward(x)
-
-
-class OutLayer(nn.Module):
-    def __init__(self, model_dim, time_dim, visual_dim, patch_size, operation_settings=None):
-        super().__init__()
-        self.patch_size = patch_size
-        self.modulation = Modulation(time_dim, model_dim, 2, operation_settings=operation_settings)
-        operations = operation_settings.get("operations")
-        self.norm = operations.LayerNorm(model_dim, elementwise_affine=False, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
-        self.out_layer = operations.Linear(model_dim, math.prod(patch_size) * visual_dim, bias=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
-
-    def forward(self, visual_embed, time_embed):
-        B, T, H, W, _ = visual_embed.shape
-        shift, scale = torch.chunk(self.modulation(time_embed), 2, dim=-1)
-        scale = scale[:, None, None, None, :]
-        shift = shift[:, None, None, None, :]
-        visual_embed = apply_scale_shift_norm(self.norm, visual_embed, scale, shift)
-        x = self.out_layer(visual_embed)
-
-        out_dim = x.shape[-1] // (self.patch_size[0] * self.patch_size[1] * self.patch_size[2])
-        x = x.view(
-            B, T, H, W,
-            out_dim,
-            self.patch_size[0], self.patch_size[1], self.patch_size[2]
-        )
-        return x.permute(0, 4, 1, 5, 2, 6, 3, 7).flatten(2, 3).flatten(3, 4).flatten(4, 5)
-
-
-class TransformerEncoderBlock(nn.Module):
-    def __init__(self, model_dim, time_dim, ff_dim, head_dim, operation_settings=None):
-        super().__init__()
-        self.text_modulation = Modulation(time_dim, model_dim, 6, operation_settings=operation_settings)
-        operations = operation_settings.get("operations")
-
-        self.self_attention_norm = operations.LayerNorm(model_dim, elementwise_affine=False, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
-        self.self_attention = SelfAttention(model_dim, head_dim, operation_settings=operation_settings)
-
-        self.feed_forward_norm = operations.LayerNorm(model_dim, elementwise_affine=False, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
-        self.feed_forward = FeedForward(model_dim, ff_dim, operation_settings=operation_settings)
-
-    def forward(self, x, time_embed, freqs, transformer_options={}):
-        self_attn_params, ff_params = torch.chunk(self.text_modulation(time_embed), 2, dim=-1)
-        shift, scale, gate = get_shift_scale_gate(self_attn_params)
-        out = apply_scale_shift_norm(self.self_attention_norm, x, scale, shift)
-        out = self.self_attention(out, freqs, transformer_options=transformer_options)
-        x = apply_gate_sum(x, out, gate)
-
-        shift, scale, gate = get_shift_scale_gate(ff_params)
-        out = apply_scale_shift_norm(self.feed_forward_norm, x, scale, shift)
-        out = self.feed_forward(out)
-        x = apply_gate_sum(x, out, gate)
-        return x
-
-
-class TransformerDecoderBlock(nn.Module):
-    def __init__(self, model_dim, time_dim, ff_dim, head_dim, operation_settings=None):
-        super().__init__()
-        self.visual_modulation = Modulation(time_dim, model_dim, 9, operation_settings=operation_settings)
-
-        operations = operation_settings.get("operations")
-        self.self_attention_norm = operations.LayerNorm(model_dim, elementwise_affine=False, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
-        self.self_attention = SelfAttention(model_dim, head_dim, operation_settings=operation_settings)
-
-        self.cross_attention_norm = operations.LayerNorm(model_dim, elementwise_affine=False, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
-        self.cross_attention = CrossAttention(model_dim, head_dim, operation_settings=operation_settings)
-
-        self.feed_forward_norm = operations.LayerNorm(model_dim, elementwise_affine=False, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
-        self.feed_forward = FeedForward(model_dim, ff_dim, operation_settings=operation_settings)
-
-    def forward(self, visual_embed, text_embed, time_embed, freqs, transformer_options={}):
-        self_attn_params, cross_attn_params, ff_params = torch.chunk(self.visual_modulation(time_embed), 3, dim=-1)
-        # self attention
-        shift, scale, gate = get_shift_scale_gate(self_attn_params)
-        visual_out = apply_scale_shift_norm(self.self_attention_norm, visual_embed, scale, shift)
-        visual_out = self.self_attention(visual_out, freqs, transformer_options=transformer_options)
-        visual_embed = apply_gate_sum(visual_embed, visual_out, gate)
-        # cross attention
-        shift, scale, gate = get_shift_scale_gate(cross_attn_params)
-        visual_out = apply_scale_shift_norm(self.cross_attention_norm, visual_embed, scale, shift)
-        visual_out = self.cross_attention(visual_out, text_embed, transformer_options=transformer_options)
-        visual_embed = apply_gate_sum(visual_embed, visual_out, gate)
-        # feed forward
-        shift, scale, gate = get_shift_scale_gate(ff_params)
-        visual_out = apply_scale_shift_norm(self.feed_forward_norm, visual_embed, scale, shift)
-        visual_out = self.feed_forward(visual_out)
-        visual_embed = apply_gate_sum(visual_embed, visual_out, gate)
-        return visual_embed
-
-
-class Kandinsky5(nn.Module):
-    def __init__(
-        self,
-        in_visual_dim=16, out_visual_dim=16, in_text_dim=3584, in_text_dim2=768, time_dim=512,
-        model_dim=1792, ff_dim=7168, visual_embed_dim=132, patch_size=(1, 2, 2), num_text_blocks=2, num_visual_blocks=32,
-        axes_dims=(16, 24, 24), rope_scale_factor=(1.0, 2.0, 2.0),
-        dtype=None, device=None, operations=None, **kwargs
-    ):
-        super().__init__()
-        head_dim = sum(axes_dims)
-        self.rope_scale_factor = rope_scale_factor
-        self.in_visual_dim = in_visual_dim
-        self.model_dim = model_dim
-        self.patch_size = patch_size
-        self.visual_embed_dim = visual_embed_dim
-        self.dtype = dtype
-        self.device = device
-        operation_settings = {"operations": operations, "device": device, "dtype": dtype}
-
-        self.time_embeddings = TimeEmbeddings(model_dim, time_dim, operation_settings=operation_settings)
-        self.text_embeddings = TextEmbeddings(in_text_dim, model_dim, operation_settings=operation_settings)
-        self.pooled_text_embeddings = TextEmbeddings(in_text_dim2, time_dim, operation_settings=operation_settings)
-        self.visual_embeddings = VisualEmbeddings(visual_embed_dim, model_dim, patch_size, operation_settings=operation_settings)
-
-        self.text_transformer_blocks = nn.ModuleList(
-            [TransformerEncoderBlock(model_dim, time_dim, ff_dim, head_dim, operation_settings=operation_settings) for _ in range(num_text_blocks)]
-        )
-
-        self.visual_transformer_blocks = nn.ModuleList(
-            [TransformerDecoderBlock(model_dim, time_dim, ff_dim, head_dim, operation_settings=operation_settings) for _ in range(num_visual_blocks)]
-        )
-
-        self.out_layer = OutLayer(model_dim, time_dim, out_visual_dim, patch_size, operation_settings=operation_settings)
-
-        self.rope_embedder_3d = EmbedND(dim=head_dim, theta=10000.0, axes_dim=axes_dims)
-        self.rope_embedder_1d = EmbedND(dim=head_dim, theta=10000.0, axes_dim=[head_dim])
-
-    def rope_encode_1d(self, seq_len, seq_start=0, steps=None, device=None, dtype=None, transformer_options={}):
-        steps = seq_len if steps is None else steps
-        seq_ids = torch.linspace(seq_start, seq_start + (seq_len - 1), steps=steps, device=device, dtype=dtype)
-        seq_ids = seq_ids.reshape(-1, 1).unsqueeze(0)  # Shape: (1, steps, 1)
-        freqs = self.rope_embedder_1d(seq_ids).movedim(1, 2)
-        return freqs
-
-    def rope_encode_3d(self, t, h, w, t_start=0, steps_t=None, steps_h=None, steps_w=None, device=None, dtype=None, transformer_options={}):
-
-        patch_size = self.patch_size
-        t_len = ((t + (patch_size[0] // 2)) // patch_size[0])
-        h_len = ((h + (patch_size[1] // 2)) // patch_size[1])
-        w_len = ((w + (patch_size[2] // 2)) // patch_size[2])
-
-        if steps_t is None:
-            steps_t = t_len
-        if steps_h is None:
-            steps_h = h_len
-        if steps_w is None:
-            steps_w = w_len
-
-        h_start = 0
-        w_start = 0
-        rope_options = transformer_options.get("rope_options", None)
-        if rope_options is not None:
-            t_len = (t_len - 1.0) * rope_options.get("scale_t", 1.0) + 1.0
-            h_len = (h_len - 1.0) * rope_options.get("scale_y", 1.0) + 1.0
-            w_len = (w_len - 1.0) * rope_options.get("scale_x", 1.0) + 1.0
-
-            t_start += rope_options.get("shift_t", 0.0)
-            h_start += rope_options.get("shift_y", 0.0)
-            w_start += rope_options.get("shift_x", 0.0)
-        else:
-            rope_scale_factor = self.rope_scale_factor
-            if self.model_dim == 4096: # pro video model uses different rope scaling at higher resolutions
-                if h * w >= 14080:
-                    rope_scale_factor = (1.0, 3.16, 3.16)
-
-            t_len = (t_len - 1.0) / rope_scale_factor[0] + 1.0
-            h_len = (h_len - 1.0) / rope_scale_factor[1] + 1.0
-            w_len = (w_len - 1.0) / rope_scale_factor[2] + 1.0
-
-        img_ids = torch.zeros((steps_t, steps_h, steps_w, 3), device=device, dtype=dtype)
-        img_ids[:, :, :, 0] = img_ids[:, :, :, 0] + torch.linspace(t_start, t_start + (t_len - 1), steps=steps_t, device=device, dtype=dtype).reshape(-1, 1, 1)
-        img_ids[:, :, :, 1] = img_ids[:, :, :, 1] + torch.linspace(h_start, h_start + (h_len - 1), steps=steps_h, device=device, dtype=dtype).reshape(1, -1, 1)
-        img_ids[:, :, :, 2] = img_ids[:, :, :, 2] + torch.linspace(w_start, w_start + (w_len - 1), steps=steps_w, device=device, dtype=dtype).reshape(1, 1, -1)
-        img_ids = img_ids.reshape(1, -1, img_ids.shape[-1])
-
-        freqs = self.rope_embedder_3d(img_ids).movedim(1, 2)
-        return freqs
-
-    def forward_orig(self, x, timestep, context, y, freqs, freqs_text, transformer_options={}, **kwargs):
-        patches_replace = transformer_options.get("patches_replace", {})
-        context = self.text_embeddings(context)
-        time_embed = self.time_embeddings(timestep, x.dtype) + self.pooled_text_embeddings(y)
-
-        for block in self.text_transformer_blocks:
-            context = block(context, time_embed, freqs_text, transformer_options=transformer_options)
-
-        visual_embed = self.visual_embeddings(x)
-        visual_shape = visual_embed.shape[:-1]
-        visual_embed = visual_embed.flatten(1, -2)
-
-        blocks_replace = patches_replace.get("dit", {})
-        transformer_options["total_blocks"] = len(self.visual_transformer_blocks)
-        transformer_options["block_type"] = "double"
-        for i, block in enumerate(self.visual_transformer_blocks):
-            transformer_options["block_index"] = i
-            if ("double_block", i) in blocks_replace:
-                def block_wrap(args):
-                    return block(x=args["x"], context=args["context"], time_embed=args["time_embed"], freqs=args["freqs"], transformer_options=args.get("transformer_options"))
-                visual_embed = blocks_replace[("double_block", i)]({"x": visual_embed, "context": context, "time_embed": time_embed, "freqs": freqs, "transformer_options": transformer_options}, {"original_block": block_wrap})["x"]
-            else:
-                visual_embed = block(visual_embed, context, time_embed, freqs=freqs, transformer_options=transformer_options)
-
-        visual_embed = visual_embed.reshape(*visual_shape, -1)
-        return self.out_layer(visual_embed, time_embed)
-
-    def _forward(self, x, timestep, context, y, time_dim_replace=None, transformer_options={}, **kwargs):
-        original_dims = x.ndim
-        if original_dims == 4:
-            x = x.unsqueeze(2)
-        bs, c, t_len, h, w = x.shape
-        x = comfy.ldm.common_dit.pad_to_patch_size(x, self.patch_size)
-
-        if time_dim_replace is not None:
-            time_dim_replace = comfy.ldm.common_dit.pad_to_patch_size(time_dim_replace, self.patch_size)
-            x[:, :time_dim_replace.shape[1], :time_dim_replace.shape[2]] = time_dim_replace
-
-        freqs = self.rope_encode_3d(t_len, h, w, device=x.device, dtype=x.dtype, transformer_options=transformer_options)
-        freqs_text = self.rope_encode_1d(context.shape[1], device=x.device, dtype=x.dtype, transformer_options=transformer_options)
-
-        out = self.forward_orig(x, timestep, context, y, freqs, freqs_text, transformer_options=transformer_options, **kwargs)
-        if original_dims == 4:
-            out = out.squeeze(2)
-        return out
-
-    def forward(self, x, timestep, context, y, time_dim_replace=None, transformer_options={}, **kwargs):
-        return comfy.patcher_extension.WrapperExecutor.new_class_executor(
-            self._forward,
-            self,
-            comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, transformer_options)
-        ).execute(x, timestep, context, y, time_dim_replace=time_dim_replace, transformer_options=transformer_options, **kwargs)
--- a/comfy/ldm/lightricks/av_model.py
+++ b/comfy/ldm/lightricks/av_model.py
@@ -1,871 +0,0 @@
-from typing import Tuple
-import torch
-import torch.nn as nn
-from comfy.ldm.lightricks.model import (
-    CrossAttention,
-    FeedForward,
-    AdaLayerNormSingle,
-    PixArtAlphaTextProjection,
-    LTXVModel,
-)
-from comfy.ldm.lightricks.symmetric_patchifier import AudioPatchifier
-import comfy.ldm.common_dit
-
-class CompressedTimestep:
-    """Store video timestep embeddings in compressed form using per-frame indexing."""
-    __slots__ = ('data', 'batch_size', 'num_frames', 'patches_per_frame', 'feature_dim')
-
-    def __init__(self, tensor: torch.Tensor, patches_per_frame: int):
-        """
-        tensor: [batch_size, num_tokens, feature_dim] tensor where num_tokens = num_frames * patches_per_frame
-        patches_per_frame: Number of spatial patches per frame (height * width in latent space), or None to disable compression
-        """
-        self.batch_size, num_tokens, self.feature_dim = tensor.shape
-
-        # Check if compression is valid (num_tokens must be divisible by patches_per_frame)
-        if patches_per_frame is not None and num_tokens % patches_per_frame == 0 and num_tokens >= patches_per_frame:
-            self.patches_per_frame = patches_per_frame
-            self.num_frames = num_tokens // patches_per_frame
-
-            # Reshape to [batch, frames, patches_per_frame, feature_dim] and store one value per frame
-            # All patches in a frame are identical, so we only keep the first one
-            reshaped = tensor.view(self.batch_size, self.num_frames, patches_per_frame, self.feature_dim)
-            self.data = reshaped[:, :, 0, :].contiguous()  # [batch, frames, feature_dim]
-        else:
-            # Not divisible or too small - store directly without compression
-            self.patches_per_frame = 1
-            self.num_frames = num_tokens
-            self.data = tensor
-
-    def expand(self):
-        """Expand back to original tensor."""
-        if self.patches_per_frame == 1:
-            return self.data
-
-        # [batch, frames, feature_dim] -> [batch, frames, patches_per_frame, feature_dim] -> [batch, tokens, feature_dim]
-        expanded = self.data.unsqueeze(2).expand(self.batch_size, self.num_frames, self.patches_per_frame, self.feature_dim)
-        return expanded.reshape(self.batch_size, -1, self.feature_dim)
-
-    def expand_for_computation(self, scale_shift_table: torch.Tensor, batch_size: int, indices: slice = slice(None, None)):
-        """Compute ada values on compressed per-frame data, then expand spatially."""
-        num_ada_params = scale_shift_table.shape[0]
-
-        # No compression - compute directly
-        if self.patches_per_frame == 1:
-            num_tokens = self.data.shape[1]
-            dim_per_param = self.feature_dim // num_ada_params
-            reshaped = self.data.reshape(batch_size, num_tokens, num_ada_params, dim_per_param)[:, :, indices, :]
-            table_values = scale_shift_table[indices].unsqueeze(0).unsqueeze(0).to(device=self.data.device, dtype=self.data.dtype)
-            ada_values = (table_values + reshaped).unbind(dim=2)
-            return ada_values
-
-        # Compressed: compute on per-frame data then expand spatially
-        # Reshape: [batch, frames, feature_dim] -> [batch, frames, num_ada_params, dim_per_param]
-        frame_reshaped = self.data.reshape(batch_size, self.num_frames, num_ada_params, -1)[:, :, indices, :]
-        table_values = scale_shift_table[indices].unsqueeze(0).unsqueeze(0).to(
-            device=self.data.device, dtype=self.data.dtype
-        )
-        frame_ada = (table_values + frame_reshaped).unbind(dim=2)
-
-        # Expand each ada parameter spatially: [batch, frames, dim] -> [batch, frames, patches, dim] -> [batch, tokens, dim]
-        return tuple(
-            frame_val.unsqueeze(2).expand(batch_size, self.num_frames, self.patches_per_frame, -1)
-            .reshape(batch_size, -1, frame_val.shape[-1])
-            for frame_val in frame_ada
-        )
-
-class BasicAVTransformerBlock(nn.Module):
-    def __init__(
-        self,
-        v_dim,
-        a_dim,
-        v_heads,
-        a_heads,
-        vd_head,
-        ad_head,
-        v_context_dim=None,
-        a_context_dim=None,
-        attn_precision=None,
-        dtype=None,
-        device=None,
-        operations=None,
-    ):
-        super().__init__()
-
-        self.attn_precision = attn_precision
-
-        self.attn1 = CrossAttention(
-            query_dim=v_dim,
-            heads=v_heads,
-            dim_head=vd_head,
-            context_dim=None,
-            attn_precision=self.attn_precision,
-            dtype=dtype,
-            device=device,
-            operations=operations,
-        )
-        self.audio_attn1 = CrossAttention(
-            query_dim=a_dim,
-            heads=a_heads,
-            dim_head=ad_head,
-            context_dim=None,
-            attn_precision=self.attn_precision,
-            dtype=dtype,
-            device=device,
-            operations=operations,
-        )
-
-        self.attn2 = CrossAttention(
-            query_dim=v_dim,
-            context_dim=v_context_dim,
-            heads=v_heads,
-            dim_head=vd_head,
-            attn_precision=self.attn_precision,
-            dtype=dtype,
-            device=device,
-            operations=operations,
-        )
-        self.audio_attn2 = CrossAttention(
-            query_dim=a_dim,
-            context_dim=a_context_dim,
-            heads=a_heads,
-            dim_head=ad_head,
-            attn_precision=self.attn_precision,
-            dtype=dtype,
-            device=device,
-            operations=operations,
-        )
-
-        # Q: Video, K,V: Audio
-        self.audio_to_video_attn = CrossAttention(
-            query_dim=v_dim,
-            context_dim=a_dim,
-            heads=a_heads,
-            dim_head=ad_head,
-            attn_precision=self.attn_precision,
-            dtype=dtype,
-            device=device,
-            operations=operations,
-        )
-
-        # Q: Audio, K,V: Video
-        self.video_to_audio_attn = CrossAttention(
-            query_dim=a_dim,
-            context_dim=v_dim,
-            heads=a_heads,
-            dim_head=ad_head,
-            attn_precision=self.attn_precision,
-            dtype=dtype,
-            device=device,
-            operations=operations,
-        )
-
-        self.ff = FeedForward(
-            v_dim, dim_out=v_dim, glu=True, dtype=dtype, device=device, operations=operations
-        )
-        self.audio_ff = FeedForward(
-            a_dim, dim_out=a_dim, glu=True, dtype=dtype, device=device, operations=operations
-        )
-
-        self.scale_shift_table = nn.Parameter(torch.empty(6, v_dim, device=device, dtype=dtype))
-        self.audio_scale_shift_table = nn.Parameter(
-            torch.empty(6, a_dim, device=device, dtype=dtype)
-        )
-
-        self.scale_shift_table_a2v_ca_audio = nn.Parameter(
-            torch.empty(5, a_dim, device=device, dtype=dtype)
-        )
-        self.scale_shift_table_a2v_ca_video = nn.Parameter(
-            torch.empty(5, v_dim, device=device, dtype=dtype)
-        )
-
-    def get_ada_values(
-        self, scale_shift_table: torch.Tensor, batch_size: int, timestep: torch.Tensor, indices: slice = slice(None, None)
-    ):
-        if isinstance(timestep, CompressedTimestep):
-            return timestep.expand_for_computation(scale_shift_table, batch_size, indices)
-
-        num_ada_params = scale_shift_table.shape[0]
-
-        ada_values = (
-            scale_shift_table[indices].unsqueeze(0).unsqueeze(0).to(device=timestep.device, dtype=timestep.dtype)
-            + timestep.reshape(batch_size, timestep.shape[1], num_ada_params, -1)[:, :, indices, :]
-        ).unbind(dim=2)
-        return ada_values
-
-    def get_av_ca_ada_values(
-        self,
-        scale_shift_table: torch.Tensor,
-        batch_size: int,
-        scale_shift_timestep: torch.Tensor,
-        gate_timestep: torch.Tensor,
-        num_scale_shift_values: int = 4,
-    ):
-        scale_shift_ada_values = self.get_ada_values(
-            scale_shift_table[:num_scale_shift_values, :],
-            batch_size,
-            scale_shift_timestep,
-        )
-        gate_ada_values = self.get_ada_values(
-            scale_shift_table[num_scale_shift_values:, :],
-            batch_size,
-            gate_timestep,
-        )
-
-        return (*scale_shift_ada_values, *gate_ada_values)
-
-    def forward(
-        self, x: Tuple[torch.Tensor, torch.Tensor], v_context=None, a_context=None, attention_mask=None, v_timestep=None, a_timestep=None,
-        v_pe=None, a_pe=None, v_cross_pe=None, a_cross_pe=None, v_cross_scale_shift_timestep=None, a_cross_scale_shift_timestep=None,
-        v_cross_gate_timestep=None, a_cross_gate_timestep=None, transformer_options=None,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        run_vx = transformer_options.get("run_vx", True)
-        run_ax = transformer_options.get("run_ax", True)
-
-        vx, ax = x
-        run_ax = run_ax and ax.numel() > 0
-        run_a2v = run_vx and transformer_options.get("a2v_cross_attn", True) and ax.numel() > 0
-        run_v2a = run_ax and transformer_options.get("v2a_cross_attn", True)
-
-        # video
-        if run_vx:
-            # video self-attention
-            vshift_msa, vscale_msa = (self.get_ada_values(self.scale_shift_table, vx.shape[0], v_timestep, slice(0, 2)))
-            norm_vx = comfy.ldm.common_dit.rms_norm(vx) * (1 + vscale_msa) + vshift_msa
-            del vshift_msa, vscale_msa
-            attn1_out = self.attn1(norm_vx, pe=v_pe, transformer_options=transformer_options)
-            del norm_vx
-            # video cross-attention
-            vgate_msa = self.get_ada_values(self.scale_shift_table, vx.shape[0], v_timestep, slice(2, 3))[0]
-            vx.addcmul_(attn1_out, vgate_msa)
-            del vgate_msa, attn1_out
-            vx.add_(self.attn2(comfy.ldm.common_dit.rms_norm(vx), context=v_context, mask=attention_mask, transformer_options=transformer_options))
-
-        # audio
-        if run_ax:
-            # audio self-attention
-            ashift_msa, ascale_msa = (self.get_ada_values(self.audio_scale_shift_table, ax.shape[0], a_timestep, slice(0, 2)))
-            norm_ax = comfy.ldm.common_dit.rms_norm(ax) * (1 + ascale_msa) + ashift_msa
-            del ashift_msa, ascale_msa
-            attn1_out = self.audio_attn1(norm_ax, pe=a_pe, transformer_options=transformer_options)
-            del norm_ax
-            # audio cross-attention
-            agate_msa = self.get_ada_values(self.audio_scale_shift_table, ax.shape[0], a_timestep, slice(2, 3))[0]
-            ax.addcmul_(attn1_out, agate_msa)
-            del agate_msa, attn1_out
-            ax.add_(self.audio_attn2(comfy.ldm.common_dit.rms_norm(ax), context=a_context, mask=attention_mask, transformer_options=transformer_options))
-
-        # video - audio cross attention.
-        if run_a2v or run_v2a:
-            vx_norm3 = comfy.ldm.common_dit.rms_norm(vx)
-            ax_norm3 = comfy.ldm.common_dit.rms_norm(ax)
-
-            # audio to video cross attention
-            if run_a2v:
-                scale_ca_audio_hidden_states_a2v, shift_ca_audio_hidden_states_a2v = self.get_ada_values(
-                    self.scale_shift_table_a2v_ca_audio[:4, :], ax.shape[0], a_cross_scale_shift_timestep)[:2]
-                scale_ca_video_hidden_states_a2v_v, shift_ca_video_hidden_states_a2v_v = self.get_ada_values(
-                    self.scale_shift_table_a2v_ca_video[:4, :], vx.shape[0], v_cross_scale_shift_timestep)[:2]
-
-                vx_scaled = vx_norm3 * (1 + scale_ca_video_hidden_states_a2v_v) + shift_ca_video_hidden_states_a2v_v
-                ax_scaled = ax_norm3 * (1 + scale_ca_audio_hidden_states_a2v) + shift_ca_audio_hidden_states_a2v
-                del scale_ca_video_hidden_states_a2v_v, shift_ca_video_hidden_states_a2v_v, scale_ca_audio_hidden_states_a2v, shift_ca_audio_hidden_states_a2v
-
-                a2v_out = self.audio_to_video_attn(vx_scaled, context=ax_scaled, pe=v_cross_pe, k_pe=a_cross_pe, transformer_options=transformer_options)
-                del vx_scaled, ax_scaled
-
-                gate_out_a2v = self.get_ada_values(self.scale_shift_table_a2v_ca_video[4:, :], vx.shape[0], v_cross_gate_timestep)[0]
-                vx.addcmul_(a2v_out, gate_out_a2v)
-                del gate_out_a2v, a2v_out
-
-            # video to audio cross attention
-            if run_v2a:
-                scale_ca_audio_hidden_states_v2a, shift_ca_audio_hidden_states_v2a = self.get_ada_values(
-                    self.scale_shift_table_a2v_ca_audio[:4, :], ax.shape[0], a_cross_scale_shift_timestep)[2:4]
-                scale_ca_video_hidden_states_v2a, shift_ca_video_hidden_states_v2a = self.get_ada_values(
-                    self.scale_shift_table_a2v_ca_video[:4, :], vx.shape[0], v_cross_scale_shift_timestep)[2:4]
-
-                ax_scaled = ax_norm3 * (1 + scale_ca_audio_hidden_states_v2a) + shift_ca_audio_hidden_states_v2a
-                vx_scaled = vx_norm3 * (1 + scale_ca_video_hidden_states_v2a) + shift_ca_video_hidden_states_v2a
-                del scale_ca_video_hidden_states_v2a, shift_ca_video_hidden_states_v2a, scale_ca_audio_hidden_states_v2a, shift_ca_audio_hidden_states_v2a
-
-                v2a_out = self.video_to_audio_attn(ax_scaled, context=vx_scaled, pe=a_cross_pe, k_pe=v_cross_pe, transformer_options=transformer_options)
-                del ax_scaled, vx_scaled
-
-                gate_out_v2a = self.get_ada_values(self.scale_shift_table_a2v_ca_audio[4:, :], ax.shape[0], a_cross_gate_timestep)[0]
-                ax.addcmul_(v2a_out, gate_out_v2a)
-                del gate_out_v2a, v2a_out
-
-            del vx_norm3, ax_norm3
-
-        # video feedforward
-        if run_vx:
-            vshift_mlp, vscale_mlp = self.get_ada_values(self.scale_shift_table, vx.shape[0], v_timestep, slice(3, 5))
-            vx_scaled = comfy.ldm.common_dit.rms_norm(vx) * (1 + vscale_mlp) + vshift_mlp
-            del vshift_mlp, vscale_mlp
-
-            ff_out = self.ff(vx_scaled)
-            del vx_scaled
-
-            vgate_mlp = self.get_ada_values(self.scale_shift_table, vx.shape[0], v_timestep, slice(5, 6))[0]
-            vx.addcmul_(ff_out, vgate_mlp)
-            del vgate_mlp, ff_out
-
-        # audio feedforward
-        if run_ax:
-            ashift_mlp, ascale_mlp = self.get_ada_values(self.audio_scale_shift_table, ax.shape[0], a_timestep, slice(3, 5))
-            ax_scaled = comfy.ldm.common_dit.rms_norm(ax) * (1 + ascale_mlp) + ashift_mlp
-            del ashift_mlp, ascale_mlp
-
-            ff_out = self.audio_ff(ax_scaled)
-            del ax_scaled
-
-            agate_mlp = self.get_ada_values(self.audio_scale_shift_table, ax.shape[0], a_timestep, slice(5, 6))[0]
-            ax.addcmul_(ff_out, agate_mlp)
-            del agate_mlp, ff_out
-
-        return vx, ax
-
-
-class LTXAVModel(LTXVModel):
-    """LTXAV model for audio-video generation."""
-
-    def __init__(
-        self,
-        in_channels=128,
-        audio_in_channels=128,
-        cross_attention_dim=4096,
-        audio_cross_attention_dim=2048,
-        attention_head_dim=128,
-        audio_attention_head_dim=64,
-        num_attention_heads=32,
-        audio_num_attention_heads=32,
-        caption_channels=3840,
-        num_layers=48,
-        positional_embedding_theta=10000.0,
-        positional_embedding_max_pos=[20, 2048, 2048],
-        audio_positional_embedding_max_pos=[20],
-        causal_temporal_positioning=False,
-        vae_scale_factors=(8, 32, 32),
-        use_middle_indices_grid=False,
-        timestep_scale_multiplier=1000.0,
-        av_ca_timestep_scale_multiplier=1.0,
-        dtype=None,
-        device=None,
-        operations=None,
-        **kwargs,
-    ):
-        # Store audio-specific parameters
-        self.audio_in_channels = audio_in_channels
-        self.audio_cross_attention_dim = audio_cross_attention_dim
-        self.audio_attention_head_dim = audio_attention_head_dim
-        self.audio_num_attention_heads = audio_num_attention_heads
-        self.audio_positional_embedding_max_pos = audio_positional_embedding_max_pos
-
-        # Calculate audio dimensions
-        self.audio_inner_dim = audio_num_attention_heads * audio_attention_head_dim
-        self.audio_out_channels = audio_in_channels
-
-        # Audio-specific constants
-        self.num_audio_channels = 8
-        self.audio_frequency_bins = 16
-
-        self.av_ca_timestep_scale_multiplier = av_ca_timestep_scale_multiplier
-
-        super().__init__(
-            in_channels=in_channels,
-            cross_attention_dim=cross_attention_dim,
-            attention_head_dim=attention_head_dim,
-            num_attention_heads=num_attention_heads,
-            caption_channels=caption_channels,
-            num_layers=num_layers,
-            positional_embedding_theta=positional_embedding_theta,
-            positional_embedding_max_pos=positional_embedding_max_pos,
-            causal_temporal_positioning=causal_temporal_positioning,
-            vae_scale_factors=vae_scale_factors,
-            use_middle_indices_grid=use_middle_indices_grid,
-            timestep_scale_multiplier=timestep_scale_multiplier,
-            dtype=dtype,
-            device=device,
-            operations=operations,
-            **kwargs,
-        )
-
-    def _init_model_components(self, device, dtype, **kwargs):
-        """Initialize LTXAV-specific components."""
-        # Audio-specific projections
-        self.audio_patchify_proj = self.operations.Linear(
-            self.audio_in_channels, self.audio_inner_dim, bias=True, dtype=dtype, device=device
-        )
-
-        # Audio-specific AdaLN
-        self.audio_adaln_single = AdaLayerNormSingle(
-            self.audio_inner_dim,
-            use_additional_conditions=False,
-            dtype=dtype,
-            device=device,
-            operations=self.operations,
-        )
-
-        num_scale_shift_values = 4
-        self.av_ca_video_scale_shift_adaln_single = AdaLayerNormSingle(
-            self.inner_dim,
-            use_additional_conditions=False,
-            embedding_coefficient=num_scale_shift_values,
-            dtype=dtype,
-            device=device,
-            operations=self.operations,
-        )
-        self.av_ca_a2v_gate_adaln_single = AdaLayerNormSingle(
-            self.inner_dim,
-            use_additional_conditions=False,
-            embedding_coefficient=1,
-            dtype=dtype,
-            device=device,
-            operations=self.operations,
-        )
-        self.av_ca_audio_scale_shift_adaln_single = AdaLayerNormSingle(
-            self.audio_inner_dim,
-            use_additional_conditions=False,
-            embedding_coefficient=num_scale_shift_values,
-            dtype=dtype,
-            device=device,
-            operations=self.operations,
-        )
-        self.av_ca_v2a_gate_adaln_single = AdaLayerNormSingle(
-            self.audio_inner_dim,
-            use_additional_conditions=False,
-            embedding_coefficient=1,
-            dtype=dtype,
-            device=device,
-            operations=self.operations,
-        )
-
-        # Audio caption projection
-        self.audio_caption_projection = PixArtAlphaTextProjection(
-            in_features=self.caption_channels,
-            hidden_size=self.audio_inner_dim,
-            dtype=dtype,
-            device=device,
-            operations=self.operations,
-        )
-
-    def _init_transformer_blocks(self, device, dtype, **kwargs):
-        """Initialize transformer blocks for LTXAV."""
-        self.transformer_blocks = nn.ModuleList(
-            [
-                BasicAVTransformerBlock(
-                    v_dim=self.inner_dim,
-                    a_dim=self.audio_inner_dim,
-                    v_heads=self.num_attention_heads,
-                    a_heads=self.audio_num_attention_heads,
-                    vd_head=self.attention_head_dim,
-                    ad_head=self.audio_attention_head_dim,
-                    v_context_dim=self.cross_attention_dim,
-                    a_context_dim=self.audio_cross_attention_dim,
-                    dtype=dtype,
-                    device=device,
-                    operations=self.operations,
-                )
-                for _ in range(self.num_layers)
-            ]
-        )
-
-    def _init_output_components(self, device, dtype):
-        """Initialize output components for LTXAV."""
-        # Video output components
-        super()._init_output_components(device, dtype)
-        # Audio output components
-        self.audio_scale_shift_table = nn.Parameter(
-            torch.empty(2, self.audio_inner_dim, dtype=dtype, device=device)
-        )
-        self.audio_norm_out = self.operations.LayerNorm(
-            self.audio_inner_dim, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device
-        )
-        self.audio_proj_out = self.operations.Linear(
-            self.audio_inner_dim, self.audio_out_channels, dtype=dtype, device=device
-        )
-        self.a_patchifier = AudioPatchifier(1, start_end=True)
-
-    def separate_audio_and_video_latents(self, x, audio_length):
-        """Separate audio and video latents from combined input."""
-        # vx = x[:, : self.in_channels]
-        # ax = x[:, self.in_channels :]
-        #
-        # ax = ax.reshape(ax.shape[0], -1)
-        # ax = ax[:, : audio_length * self.num_audio_channels * self.audio_frequency_bins]
-        #
-        # ax = ax.reshape(
-        #     ax.shape[0], self.num_audio_channels, audio_length, self.audio_frequency_bins
-        # )
-
-        vx = x[0]
-        ax = x[1] if len(x) > 1 else torch.zeros(
-            (vx.shape[0], self.num_audio_channels, 0, self.audio_frequency_bins),
-            device=vx.device, dtype=vx.dtype
-        )
-        return vx, ax
-
-    def recombine_audio_and_video_latents(self, vx, ax, target_shape=None):
-        if ax.numel() == 0:
-            return vx
-        else:
-            return [vx, ax]
-        """Recombine audio and video latents for output."""
-        # if ax.device != vx.device or ax.dtype != vx.dtype:
-        #     logging.warning("Audio and video latents are on different devices or dtypes.")
-        #     ax = ax.to(device=vx.device, dtype=vx.dtype)
-        #     logging.warning(f"Audio audio latent moved to device: {ax.device}, dtype: {ax.dtype}")
-        #
-        # ax = ax.reshape(ax.shape[0], -1)
-        # # pad to f x h x w of the video latents
-        # divisor = vx.shape[-1] * vx.shape[-2] * vx.shape[-3]
-        # if target_shape is None:
-        #     repetitions = math.ceil(ax.shape[-1] / divisor)
-        # else:
-        #     repetitions = target_shape[1] - vx.shape[1]
-        # padded_len = repetitions * divisor
-        # ax = F.pad(ax, (0, padded_len - ax.shape[-1]))
-        # ax = ax.reshape(ax.shape[0], -1, vx.shape[-3], vx.shape[-2], vx.shape[-1])
-        # return torch.cat([vx, ax], dim=1)
-
-    def _process_input(self, x, keyframe_idxs, denoise_mask, **kwargs):
-        """Process input for LTXAV - separate audio and video, then patchify."""
-        audio_length = kwargs.get("audio_length", 0)
-        # Separate audio and video latents
-        vx, ax = self.separate_audio_and_video_latents(x, audio_length)
-
-        has_spatial_mask = False
-        if denoise_mask is not None:
-            # check if any frame has spatial variation (inpainting)
-            for frame_idx in range(denoise_mask.shape[2]):
-                frame_mask = denoise_mask[0, 0, frame_idx]
-                if frame_mask.numel() > 0 and frame_mask.min() != frame_mask.max():
-                    has_spatial_mask = True
-                    break
-
-        [vx, v_pixel_coords, additional_args] = super()._process_input(
-            vx, keyframe_idxs, denoise_mask, **kwargs
-        )
-        additional_args["has_spatial_mask"] = has_spatial_mask
-
-        ax, a_latent_coords = self.a_patchifier.patchify(ax)
-        ax = self.audio_patchify_proj(ax)
-
-        # additional_args.update({"av_orig_shape": list(x.shape)})
-        return [vx, ax], [v_pixel_coords, a_latent_coords], additional_args
-
-    def _prepare_timestep(self, timestep, batch_size, hidden_dtype, **kwargs):
-        """Prepare timestep embeddings."""
-        # TODO: some code reuse is needed here.
-        grid_mask = kwargs.get("grid_mask", None)
-        if grid_mask is not None:
-            timestep = timestep[:, grid_mask]
-
-        timestep_scaled = timestep * self.timestep_scale_multiplier
-
-        v_timestep, v_embedded_timestep = self.adaln_single(
-            timestep_scaled.flatten(),
-            {"resolution": None, "aspect_ratio": None},
-            batch_size=batch_size,
-            hidden_dtype=hidden_dtype,
-        )
-
-        # Calculate patches_per_frame from orig_shape: [batch, channels, frames, height, width]
-        # Video tokens are arranged as (frames * height * width), so patches_per_frame = height * width
-        orig_shape = kwargs.get("orig_shape")
-        has_spatial_mask = kwargs.get("has_spatial_mask", None)
-        v_patches_per_frame = None
-        if not has_spatial_mask and orig_shape is not None and len(orig_shape) == 5:
-            # orig_shape[3] = height, orig_shape[4] = width (in latent space)
-            v_patches_per_frame = orig_shape[3] * orig_shape[4]
-
-        # Reshape to [batch_size, num_tokens, dim] and compress for storage
-        v_timestep = CompressedTimestep(v_timestep.view(batch_size, -1, v_timestep.shape[-1]), v_patches_per_frame)
-        v_embedded_timestep = CompressedTimestep(v_embedded_timestep.view(batch_size, -1, v_embedded_timestep.shape[-1]), v_patches_per_frame)
-
-        # Prepare audio timestep
-        a_timestep = kwargs.get("a_timestep")
-        if a_timestep is not None:
-            a_timestep_scaled = a_timestep * self.timestep_scale_multiplier
-            a_timestep_flat = a_timestep_scaled.flatten()
-            timestep_flat = timestep_scaled.flatten()
-            av_ca_factor = self.av_ca_timestep_scale_multiplier / self.timestep_scale_multiplier
-
-            # Cross-attention timesteps - compress these too
-            av_ca_audio_scale_shift_timestep, _ = self.av_ca_audio_scale_shift_adaln_single(
-                a_timestep_flat,
-                {"resolution": None, "aspect_ratio": None},
-                batch_size=batch_size,
-                hidden_dtype=hidden_dtype,
-            )
-            av_ca_video_scale_shift_timestep, _ = self.av_ca_video_scale_shift_adaln_single(
-                timestep_flat,
-                {"resolution": None, "aspect_ratio": None},
-                batch_size=batch_size,
-                hidden_dtype=hidden_dtype,
-            )
-            av_ca_a2v_gate_noise_timestep, _ = self.av_ca_a2v_gate_adaln_single(
-                timestep_flat * av_ca_factor,
-                {"resolution": None, "aspect_ratio": None},
-                batch_size=batch_size,
-                hidden_dtype=hidden_dtype,
-            )
-            av_ca_v2a_gate_noise_timestep, _ = self.av_ca_v2a_gate_adaln_single(
-                a_timestep_flat * av_ca_factor,
-                {"resolution": None, "aspect_ratio": None},
-                batch_size=batch_size,
-                hidden_dtype=hidden_dtype,
-            )
-
-            # Compress cross-attention timesteps (only video side, audio is too small to benefit)
-            # v_patches_per_frame is None for spatial masks, set for temporal masks or no mask
-            cross_av_timestep_ss = [
-                av_ca_audio_scale_shift_timestep.view(batch_size, -1, av_ca_audio_scale_shift_timestep.shape[-1]),
-                CompressedTimestep(av_ca_video_scale_shift_timestep.view(batch_size, -1, av_ca_video_scale_shift_timestep.shape[-1]), v_patches_per_frame),  # video - compressed if possible
-                CompressedTimestep(av_ca_a2v_gate_noise_timestep.view(batch_size, -1, av_ca_a2v_gate_noise_timestep.shape[-1]), v_patches_per_frame),  # video - compressed if possible
-                av_ca_v2a_gate_noise_timestep.view(batch_size, -1, av_ca_v2a_gate_noise_timestep.shape[-1]),
-            ]
-
-            a_timestep, a_embedded_timestep = self.audio_adaln_single(
-                a_timestep_flat,
-                {"resolution": None, "aspect_ratio": None},
-                batch_size=batch_size,
-                hidden_dtype=hidden_dtype,
-            )
-            # Audio timesteps
-            a_timestep = a_timestep.view(batch_size, -1, a_timestep.shape[-1])
-            a_embedded_timestep = a_embedded_timestep.view(batch_size, -1, a_embedded_timestep.shape[-1])
-        else:
-            a_timestep = timestep_scaled
-            a_embedded_timestep = kwargs.get("embedded_timestep")
-            cross_av_timestep_ss = []
-
-        return [v_timestep, a_timestep, cross_av_timestep_ss], [
-            v_embedded_timestep,
-            a_embedded_timestep,
-        ]
-
-    def _prepare_context(self, context, batch_size, x, attention_mask=None):
-        vx = x[0]
-        ax = x[1]
-        v_context, a_context = torch.split(
-            context, int(context.shape[-1] / 2), len(context.shape) - 1
-        )
-
-        v_context, attention_mask = super()._prepare_context(
-            v_context, batch_size, vx, attention_mask
-        )
-        if self.audio_caption_projection is not None:
-            a_context = self.audio_caption_projection(a_context)
-            a_context = a_context.view(batch_size, -1, ax.shape[-1])
-
-        return [v_context, a_context], attention_mask
-
-    def _prepare_positional_embeddings(self, pixel_coords, frame_rate, x_dtype):
-        v_pixel_coords = pixel_coords[0]
-        v_pe = super()._prepare_positional_embeddings(v_pixel_coords, frame_rate, x_dtype)
-
-        a_latent_coords = pixel_coords[1]
-        a_pe = self._precompute_freqs_cis(
-            a_latent_coords,
-            dim=self.audio_inner_dim,
-            out_dtype=x_dtype,
-            max_pos=self.audio_positional_embedding_max_pos,
-            use_middle_indices_grid=self.use_middle_indices_grid,
-            num_attention_heads=self.audio_num_attention_heads,
-        )
-
-        # calculate positional embeddings for the middle of the token duration, to use in av cross attention layers.
-        max_pos = max(
-            self.positional_embedding_max_pos[0], self.audio_positional_embedding_max_pos[0]
-        )
-        v_pixel_coords = v_pixel_coords.to(torch.float32)
-        v_pixel_coords[:, 0] = v_pixel_coords[:, 0] * (1.0 / frame_rate)
-        av_cross_video_freq_cis = self._precompute_freqs_cis(
-            v_pixel_coords[:, 0:1, :],
-            dim=self.audio_cross_attention_dim,
-            out_dtype=x_dtype,
-            max_pos=[max_pos],
-            use_middle_indices_grid=True,
-            num_attention_heads=self.audio_num_attention_heads,
-        )
-        av_cross_audio_freq_cis = self._precompute_freqs_cis(
-            a_latent_coords[:, 0:1, :],
-            dim=self.audio_cross_attention_dim,
-            out_dtype=x_dtype,
-            max_pos=[max_pos],
-            use_middle_indices_grid=True,
-            num_attention_heads=self.audio_num_attention_heads,
-        )
-
-        return [(v_pe, av_cross_video_freq_cis), (a_pe, av_cross_audio_freq_cis)]
-
-    def _process_transformer_blocks(
-        self, x, context, attention_mask, timestep, pe, transformer_options={}, **kwargs
-    ):
-        vx = x[0]
-        ax = x[1]
-        v_context = context[0]
-        a_context = context[1]
-        v_timestep = timestep[0]
-        a_timestep = timestep[1]
-        v_pe, av_cross_video_freq_cis = pe[0]
-        a_pe, av_cross_audio_freq_cis = pe[1]
-
-        (
-            av_ca_audio_scale_shift_timestep,
-            av_ca_video_scale_shift_timestep,
-            av_ca_a2v_gate_noise_timestep,
-            av_ca_v2a_gate_noise_timestep,
-        ) = timestep[2]
-
-        """Process transformer blocks for LTXAV."""
-        patches_replace = transformer_options.get("patches_replace", {})
-        blocks_replace = patches_replace.get("dit", {})
-
-        # Process transformer blocks
-        for i, block in enumerate(self.transformer_blocks):
-            if ("double_block", i) in blocks_replace:
-
-                def block_wrap(args):
-                    out = {}
-                    out["img"] = block(
-                        args["img"],
-                        v_context=args["v_context"],
-                        a_context=args["a_context"],
-                        attention_mask=args["attention_mask"],
-                        v_timestep=args["v_timestep"],
-                        a_timestep=args["a_timestep"],
-                        v_pe=args["v_pe"],
-                        a_pe=args["a_pe"],
-                        v_cross_pe=args["v_cross_pe"],
-                        a_cross_pe=args["a_cross_pe"],
-                        v_cross_scale_shift_timestep=args["v_cross_scale_shift_timestep"],
-                        a_cross_scale_shift_timestep=args["a_cross_scale_shift_timestep"],
-                        v_cross_gate_timestep=args["v_cross_gate_timestep"],
-                        a_cross_gate_timestep=args["a_cross_gate_timestep"],
-                        transformer_options=args["transformer_options"],
-                    )
-                    return out
-
-                out = blocks_replace[("double_block", i)](
-                    {
-                        "img": (vx, ax),
-                        "v_context": v_context,
-                        "a_context": a_context,
-                        "attention_mask": attention_mask,
-                        "v_timestep": v_timestep,
-                        "a_timestep": a_timestep,
-                        "v_pe": v_pe,
-                        "a_pe": a_pe,
-                        "v_cross_pe": av_cross_video_freq_cis,
-                        "a_cross_pe": av_cross_audio_freq_cis,
-                        "v_cross_scale_shift_timestep": av_ca_video_scale_shift_timestep,
-                        "a_cross_scale_shift_timestep": av_ca_audio_scale_shift_timestep,
-                        "v_cross_gate_timestep": av_ca_a2v_gate_noise_timestep,
-                        "a_cross_gate_timestep": av_ca_v2a_gate_noise_timestep,
-                        "transformer_options": transformer_options,
-                    },
-                    {"original_block": block_wrap},
-                )
-                vx, ax = out["img"]
-            else:
-                vx, ax = block(
-                    (vx, ax),
-                    v_context=v_context,
-                    a_context=a_context,
-                    attention_mask=attention_mask,
-                    v_timestep=v_timestep,
-                    a_timestep=a_timestep,
-                    v_pe=v_pe,
-                    a_pe=a_pe,
-                    v_cross_pe=av_cross_video_freq_cis,
-                    a_cross_pe=av_cross_audio_freq_cis,
-                    v_cross_scale_shift_timestep=av_ca_video_scale_shift_timestep,
-                    a_cross_scale_shift_timestep=av_ca_audio_scale_shift_timestep,
-                    v_cross_gate_timestep=av_ca_a2v_gate_noise_timestep,
-                    a_cross_gate_timestep=av_ca_v2a_gate_noise_timestep,
-                    transformer_options=transformer_options,
-                )
-
-        return [vx, ax]
-
-    def _process_output(self, x, embedded_timestep, keyframe_idxs, **kwargs):
-        vx = x[0]
-        ax = x[1]
-        v_embedded_timestep = embedded_timestep[0]
-        a_embedded_timestep = embedded_timestep[1]
-
-        # Expand compressed video timestep if needed
-        if isinstance(v_embedded_timestep, CompressedTimestep):
-            v_embedded_timestep = v_embedded_timestep.expand()
-
-        vx = super()._process_output(vx, v_embedded_timestep, keyframe_idxs, **kwargs)
-
-        # Process audio output
-        a_scale_shift_values = (
-            self.audio_scale_shift_table[None, None].to(device=a_embedded_timestep.device, dtype=a_embedded_timestep.dtype)
-            + a_embedded_timestep[:, :, None]
-        )
-        a_shift, a_scale = a_scale_shift_values[:, :, 0], a_scale_shift_values[:, :, 1]
-
-        ax = self.audio_norm_out(ax)
-        ax = ax * (1 + a_scale) + a_shift
-        ax = self.audio_proj_out(ax)
-
-        # Unpatchify audio
-        ax = self.a_patchifier.unpatchify(
-            ax, channels=self.num_audio_channels, freq=self.audio_frequency_bins
-        )
-
-        # Recombine audio and video
-        original_shape = kwargs.get("av_orig_shape")
-        return self.recombine_audio_and_video_latents(vx, ax, original_shape)
-
-    def forward(
-        self,
-        x,
-        timestep,
-        context,
-        attention_mask=None,
-        frame_rate=25,
-        transformer_options={},
-        keyframe_idxs=None,
-        **kwargs,
-    ):
-        """
-        Forward pass for LTXAV model.
-
-        Args:
-            x: Combined audio-video input tensor
-            timestep: Tuple of (video_timestep, audio_timestep) or single timestep
-            context: Context tensor (e.g., text embeddings)
-            attention_mask: Attention mask tensor
-            frame_rate: Frame rate for temporal processing
-            transformer_options: Additional options for transformer blocks
-            keyframe_idxs: Keyframe indices for temporal processing
-            **kwargs: Additional keyword arguments including audio_length
-
-        Returns:
-            Combined audio-video output tensor
-        """
-        # Handle timestep format
-        if isinstance(timestep, (tuple, list)) and len(timestep) == 2:
-            v_timestep, a_timestep = timestep
-            kwargs["a_timestep"] = a_timestep
-            timestep = v_timestep
-        else:
-            kwargs["a_timestep"] = timestep
-
-        # Call parent forward method
-        return super().forward(
-            x,
-            timestep,
-            context,
-            attention_mask,
-            frame_rate,
-            transformer_options,
-            keyframe_idxs,
-            **kwargs,
-        )
--- a/comfy/ldm/lightricks/embeddings_connector.py
+++ b/comfy/ldm/lightricks/embeddings_connector.py
@@ -1,305 +0,0 @@
-import math
-from typing import Optional
-
-import comfy.ldm.common_dit
-import torch
-from comfy.ldm.lightricks.model import (
-    CrossAttention,
-    FeedForward,
-    generate_freq_grid_np,
-    interleaved_freqs_cis,
-    split_freqs_cis,
-)
-from torch import nn
-
-
-class BasicTransformerBlock1D(nn.Module):
-    r"""
-    A basic Transformer block.
-
-    Parameters:
-
-        dim (`int`): The number of channels in the input and output.
-        num_attention_heads (`int`): The number of heads to use for multi-head attention.
-        attention_head_dim (`int`): The number of channels in each head.
-        dropout (`float`, *optional*, defaults to 0.0): The dropout probability to use.
-        activation_fn (`str`, *optional*, defaults to `"geglu"`): Activation function to be used in feed-forward.
-        attention_bias (:
-            obj: `bool`, *optional*, defaults to `False`): Configure if the attentions should contain a bias parameter.
-        upcast_attention (`bool`, *optional*):
-            Whether to upcast the attention computation to float32. This is useful for mixed precision training.
-        norm_elementwise_affine (`bool`, *optional*, defaults to `True`):
-            Whether to use learnable elementwise affine parameters for normalization.
-        standardization_norm (`str`, *optional*, defaults to `"layer_norm"`): The type of pre-normalization to use. Can be `"layer_norm"` or `"rms_norm"`.
-        norm_eps (`float`, *optional*, defaults to 1e-5): Epsilon value for normalization layers.
-        qk_norm (`str`, *optional*, defaults to None):
-            Set to 'layer_norm' or `rms_norm` to perform query and key normalization.
-        final_dropout (`bool` *optional*, defaults to False):
-            Whether to apply a final dropout after the last feed-forward layer.
-        ff_inner_dim (`int`, *optional*): Dimension of the inner feed-forward layer. If not provided, defaults to `dim * 4`.
-        ff_bias (`bool`, *optional*, defaults to `True`): Whether to use bias in the feed-forward layer.
-        attention_out_bias (`bool`, *optional*, defaults to `True`): Whether to use bias in the attention output layer.
-        use_rope (`bool`, *optional*, defaults to `False`): Whether to use Rotary Position Embeddings (RoPE).
-        ffn_dim_mult (`int`, *optional*, defaults to 4): Multiplier for the inner dimension of the feed-forward layer.
-    """
-
-    def __init__(
-        self,
-        dim,
-        n_heads,
-        d_head,
-        context_dim=None,
-        attn_precision=None,
-        dtype=None,
-        device=None,
-        operations=None,
-    ):
-        super().__init__()
-
-        # Define 3 blocks. Each block has its own normalization layer.
-        # 1. Self-Attn
-        self.attn1 = CrossAttention(
-            query_dim=dim,
-            heads=n_heads,
-            dim_head=d_head,
-            context_dim=None,
-            dtype=dtype,
-            device=device,
-            operations=operations,
-        )
-
-        # 3. Feed-forward
-        self.ff = FeedForward(
-            dim,
-            dim_out=dim,
-            glu=True,
-            dtype=dtype,
-            device=device,
-            operations=operations,
-        )
-
-    def forward(self, hidden_states, attention_mask=None, pe=None) -> torch.FloatTensor:
-
-        # Notice that normalization is always applied before the real computation in the following blocks.
-
-        # 1. Normalization Before Self-Attention
-        norm_hidden_states = comfy.ldm.common_dit.rms_norm(hidden_states)
-
-        norm_hidden_states = norm_hidden_states.squeeze(1)
-
-        # 2. Self-Attention
-        attn_output = self.attn1(norm_hidden_states, mask=attention_mask, pe=pe)
-
-        hidden_states = attn_output + hidden_states
-        if hidden_states.ndim == 4:
-            hidden_states = hidden_states.squeeze(1)
-
-        # 3. Normalization before Feed-Forward
-        norm_hidden_states = comfy.ldm.common_dit.rms_norm(hidden_states)
-
-        # 4. Feed-forward
-        ff_output = self.ff(norm_hidden_states)
-
-        hidden_states = ff_output + hidden_states
-        if hidden_states.ndim == 4:
-            hidden_states = hidden_states.squeeze(1)
-
-        return hidden_states
-
-
-class Embeddings1DConnector(nn.Module):
-    _supports_gradient_checkpointing = True
-
-    def __init__(
-        self,
-        in_channels=128,
-        cross_attention_dim=2048,
-        attention_head_dim=128,
-        num_attention_heads=30,
-        num_layers=2,
-        positional_embedding_theta=10000.0,
-        positional_embedding_max_pos=[4096],
-        causal_temporal_positioning=False,
-        num_learnable_registers: Optional[int] = 128,
-        dtype=None,
-        device=None,
-        operations=None,
-        split_rope=False,
-        double_precision_rope=False,
-        **kwargs,
-    ):
-        super().__init__()
-        self.dtype = dtype
-        self.out_channels = in_channels
-        self.num_attention_heads = num_attention_heads
-        self.inner_dim = num_attention_heads * attention_head_dim
-        self.causal_temporal_positioning = causal_temporal_positioning
-        self.positional_embedding_theta = positional_embedding_theta
-        self.positional_embedding_max_pos = positional_embedding_max_pos
-        self.split_rope = split_rope
-        self.double_precision_rope = double_precision_rope
-        self.transformer_1d_blocks = nn.ModuleList(
-            [
-                BasicTransformerBlock1D(
-                    self.inner_dim,
-                    num_attention_heads,
-                    attention_head_dim,
-                    context_dim=cross_attention_dim,
-                    dtype=dtype,
-                    device=device,
-                    operations=operations,
-                )
-                for _ in range(num_layers)
-            ]
-        )
-
-        inner_dim = num_attention_heads * attention_head_dim
-        self.num_learnable_registers = num_learnable_registers
-        if self.num_learnable_registers:
-            self.learnable_registers = nn.Parameter(
-                torch.rand(
-                    self.num_learnable_registers, inner_dim, dtype=dtype, device=device
-                )
-                * 2.0
-                - 1.0
-            )
-
-    def get_fractional_positions(self, indices_grid):
-        fractional_positions = torch.stack(
-            [
-                indices_grid[:, i] / self.positional_embedding_max_pos[i]
-                for i in range(1)
-            ],
-            dim=-1,
-        )
-        return fractional_positions
-
-    def precompute_freqs(self, indices_grid, spacing):
-        source_dtype = indices_grid.dtype
-        dtype = (
-            torch.float32
-            if source_dtype in (torch.bfloat16, torch.float16)
-            else source_dtype
-        )
-
-        fractional_positions = self.get_fractional_positions(indices_grid)
-        indices = (
-            generate_freq_grid_np(
-                self.positional_embedding_theta,
-                indices_grid.shape[1],
-                self.inner_dim,
-            )
-            if self.double_precision_rope
-            else self.generate_freq_grid(spacing, dtype, fractional_positions.device)
-        ).to(device=fractional_positions.device)
-
-        if spacing == "exp_2":
-            freqs = (
-                (indices * fractional_positions.unsqueeze(-1))
-                .transpose(-1, -2)
-                .flatten(2)
-            )
-        else:
-            freqs = (
-                (indices * (fractional_positions.unsqueeze(-1) * 2 - 1))
-                .transpose(-1, -2)
-                .flatten(2)
-            )
-        return freqs
-
-    def generate_freq_grid(self, spacing, dtype, device):
-        dim = self.inner_dim
-        theta = self.positional_embedding_theta
-        n_pos_dims = 1
-        n_elem = 2 * n_pos_dims  # 2 for cos and sin e.g. x 3 = 6
-        start = 1
-        end = theta
-
-        if spacing == "exp":
-            indices = theta ** (torch.arange(0, dim, n_elem, device="cpu", dtype=torch.float32) / (dim - n_elem))
-            indices = indices.to(dtype=dtype, device=device)
-        elif spacing == "exp_2":
-            indices = 1.0 / theta ** (torch.arange(0, dim, n_elem, device=device) / dim)
-            indices = indices.to(dtype=dtype)
-        elif spacing == "linear":
-            indices = torch.linspace(
-                start, end, dim // n_elem, device=device, dtype=dtype
-            )
-        elif spacing == "sqrt":
-            indices = torch.linspace(
-                start**2, end**2, dim // n_elem, device=device, dtype=dtype
-            ).sqrt()
-
-        indices = indices * math.pi / 2
-
-        return indices
-
-    def precompute_freqs_cis(self, indices_grid, spacing="exp"):
-        dim = self.inner_dim
-        n_elem = 2  # 2 because of cos and sin
-        freqs = self.precompute_freqs(indices_grid, spacing)
-        if self.split_rope:
-            expected_freqs = dim // 2
-            current_freqs = freqs.shape[-1]
-            pad_size = expected_freqs - current_freqs
-            cos_freq, sin_freq = split_freqs_cis(
-                freqs, pad_size, self.num_attention_heads
-            )
-        else:
-            cos_freq, sin_freq = interleaved_freqs_cis(freqs, dim % n_elem)
-        return cos_freq.to(self.dtype), sin_freq.to(self.dtype), self.split_rope
-
-    def forward(
-        self,
-        hidden_states: torch.Tensor,
-        attention_mask: Optional[torch.Tensor] = None,
-    ):
-        """
-        The [`Transformer2DModel`] forward method.
-
-        Args:
-            hidden_states (`torch.LongTensor` of shape `(batch size, num latent pixels)` if discrete, `torch.FloatTensor` of shape `(batch size, channel, height, width)` if continuous):
-                Input `hidden_states`.
-            indices_grid (`torch.LongTensor` of shape `(batch size, 3, num latent pixels)`):
-            attention_mask ( `torch.Tensor`, *optional*):
-                An attention mask of shape `(batch, key_tokens)` is applied to `encoder_hidden_states`. If `1` the mask
-                is kept, otherwise if `0` it is discarded. Mask will be converted into a bias, which adds large
-                negative values to the attention scores corresponding to "discard" tokens.
-        Returns:
-            If `return_dict` is True, an [`~models.transformer_2d.Transformer2DModelOutput`] is returned, otherwise a
-            `tuple` where the first element is the sample tensor.
-        """
-        # 1. Input
-
-        if self.num_learnable_registers:
-            num_registers_duplications = math.ceil(
-                max(1024, hidden_states.shape[1]) / self.num_learnable_registers
-            )
-            learnable_registers = torch.tile(
-                self.learnable_registers.to(hidden_states), (num_registers_duplications, 1)
-            )
-
-            hidden_states = torch.cat((hidden_states, learnable_registers[hidden_states.shape[1]:].unsqueeze(0).repeat(hidden_states.shape[0], 1, 1)), dim=1)
-
-            if attention_mask is not None:
-                attention_mask = torch.zeros([1, 1, 1, hidden_states.shape[1]], dtype=attention_mask.dtype, device=attention_mask.device)
-
-        indices_grid = torch.arange(
-            hidden_states.shape[1], dtype=torch.float32, device=hidden_states.device
-        )
-        indices_grid = indices_grid[None, None, :]
-        freqs_cis = self.precompute_freqs_cis(indices_grid)
-
-        # 2. Blocks
-        for block_idx, block in enumerate(self.transformer_1d_blocks):
-            hidden_states = block(
-                hidden_states, attention_mask=attention_mask, pe=freqs_cis
-            )
-
-        # 3. Output
-        # if self.output_scale is not None:
-        #     hidden_states = hidden_states / self.output_scale
-
-        hidden_states = comfy.ldm.common_dit.rms_norm(hidden_states)
-
-        return hidden_states, attention_mask
--- a/comfy/ldm/lightricks/latent_upsampler.py
+++ b/comfy/ldm/lightricks/latent_upsampler.py
@@ -1,292 +0,0 @@
-from typing import Optional, Tuple
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from einops import rearrange
-
-
-def _rational_for_scale(scale: float) -> Tuple[int, int]:
-    mapping = {0.75: (3, 4), 1.5: (3, 2), 2.0: (2, 1), 4.0: (4, 1)}
-    if float(scale) not in mapping:
-        raise ValueError(
-            f"Unsupported spatial_scale {scale}. Choose from {list(mapping.keys())}"
-        )
-    return mapping[float(scale)]
-
-
-class PixelShuffleND(nn.Module):
-    def __init__(self, dims, upscale_factors=(2, 2, 2)):
-        super().__init__()
-        assert dims in [1, 2, 3], "dims must be 1, 2, or 3"
-        self.dims = dims
-        self.upscale_factors = upscale_factors
-
-    def forward(self, x):
-        if self.dims == 3:
-            return rearrange(
-                x,
-                "b (c p1 p2 p3) d h w -> b c (d p1) (h p2) (w p3)",
-                p1=self.upscale_factors[0],
-                p2=self.upscale_factors[1],
-                p3=self.upscale_factors[2],
-            )
-        elif self.dims == 2:
-            return rearrange(
-                x,
-                "b (c p1 p2) h w -> b c (h p1) (w p2)",
-                p1=self.upscale_factors[0],
-                p2=self.upscale_factors[1],
-            )
-        elif self.dims == 1:
-            return rearrange(
-                x,
-                "b (c p1) f h w -> b c (f p1) h w",
-                p1=self.upscale_factors[0],
-            )
-
-
-class BlurDownsample(nn.Module):
-    """
-    Anti-aliased spatial downsampling by integer stride using a fixed separable binomial kernel.
-    Applies only on H,W. Works for dims=2 or dims=3 (per-frame).
-    """
-
-    def __init__(self, dims: int, stride: int):
-        super().__init__()
-        assert dims in (2, 3)
-        assert stride >= 1 and isinstance(stride, int)
-        self.dims = dims
-        self.stride = stride
-
-        # 5x5 separable binomial kernel [1,4,6,4,1] (outer product), normalized
-        k = torch.tensor([1.0, 4.0, 6.0, 4.0, 1.0])
-        k2d = k[:, None] @ k[None, :]
-        k2d = (k2d / k2d.sum()).float()  # shape (5,5)
-        self.register_buffer("kernel", k2d[None, None, :, :])  # (1,1,5,5)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        if self.stride == 1:
-            return x
-
-        def _apply_2d(x2d: torch.Tensor) -> torch.Tensor:
-            # x2d: (B, C, H, W)
-            B, C, H, W = x2d.shape
-            weight = self.kernel.expand(C, 1, 5, 5)  # depthwise
-            x2d = F.conv2d(
-                x2d, weight=weight, bias=None, stride=self.stride, padding=2, groups=C
-            )
-            return x2d
-
-        if self.dims == 2:
-            return _apply_2d(x)
-        else:
-            # dims == 3: apply per-frame on H,W
-            b, c, f, h, w = x.shape
-            x = rearrange(x, "b c f h w -> (b f) c h w")
-            x = _apply_2d(x)
-            h2, w2 = x.shape[-2:]
-            x = rearrange(x, "(b f) c h w -> b c f h w", b=b, f=f, h=h2, w=w2)
-            return x
-
-
-class SpatialRationalResampler(nn.Module):
-    """
-    Fully-learned rational spatial scaling: up by 'num' via PixelShuffle, then anti-aliased
-    downsample by 'den' using fixed blur + stride. Operates on H,W only.
-
-    For dims==3, work per-frame for spatial scaling (temporal axis untouched).
-    """
-
-    def __init__(self, mid_channels: int, scale: float):
-        super().__init__()
-        self.scale = float(scale)
-        self.num, self.den = _rational_for_scale(self.scale)
-        self.conv = nn.Conv2d(
-            mid_channels, (self.num**2) * mid_channels, kernel_size=3, padding=1
-        )
-        self.pixel_shuffle = PixelShuffleND(2, upscale_factors=(self.num, self.num))
-        self.blur_down = BlurDownsample(dims=2, stride=self.den)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        b, c, f, h, w = x.shape
-        x = rearrange(x, "b c f h w -> (b f) c h w")
-        x = self.conv(x)
-        x = self.pixel_shuffle(x)
-        x = self.blur_down(x)
-        x = rearrange(x, "(b f) c h w -> b c f h w", b=b, f=f)
-        return x
-
-
-class ResBlock(nn.Module):
-    def __init__(
-        self, channels: int, mid_channels: Optional[int] = None, dims: int = 3
-    ):
-        super().__init__()
-        if mid_channels is None:
-            mid_channels = channels
-
-        Conv = nn.Conv2d if dims == 2 else nn.Conv3d
-
-        self.conv1 = Conv(channels, mid_channels, kernel_size=3, padding=1)
-        self.norm1 = nn.GroupNorm(32, mid_channels)
-        self.conv2 = Conv(mid_channels, channels, kernel_size=3, padding=1)
-        self.norm2 = nn.GroupNorm(32, channels)
-        self.activation = nn.SiLU()
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        residual = x
-        x = self.conv1(x)
-        x = self.norm1(x)
-        x = self.activation(x)
-        x = self.conv2(x)
-        x = self.norm2(x)
-        x = self.activation(x + residual)
-        return x
-
-
-class LatentUpsampler(nn.Module):
-    """
-    Model to spatially upsample VAE latents.
-
-    Args:
-        in_channels (`int`): Number of channels in the input latent
-        mid_channels (`int`): Number of channels in the middle layers
-        num_blocks_per_stage (`int`): Number of ResBlocks to use in each stage (pre/post upsampling)
-        dims (`int`): Number of dimensions for convolutions (2 or 3)
-        spatial_upsample (`bool`): Whether to spatially upsample the latent
-        temporal_upsample (`bool`): Whether to temporally upsample the latent
-    """
-
-    def __init__(
-        self,
-        in_channels: int = 128,
-        mid_channels: int = 512,
-        num_blocks_per_stage: int = 4,
-        dims: int = 3,
-        spatial_upsample: bool = True,
-        temporal_upsample: bool = False,
-        spatial_scale: float = 2.0,
-        rational_resampler: bool = False,
-    ):
-        super().__init__()
-
-        self.in_channels = in_channels
-        self.mid_channels = mid_channels
-        self.num_blocks_per_stage = num_blocks_per_stage
-        self.dims = dims
-        self.spatial_upsample = spatial_upsample
-        self.temporal_upsample = temporal_upsample
-        self.spatial_scale = float(spatial_scale)
-        self.rational_resampler = rational_resampler
-
-        Conv = nn.Conv2d if dims == 2 else nn.Conv3d
-
-        self.initial_conv = Conv(in_channels, mid_channels, kernel_size=3, padding=1)
-        self.initial_norm = nn.GroupNorm(32, mid_channels)
-        self.initial_activation = nn.SiLU()
-
-        self.res_blocks = nn.ModuleList(
-            [ResBlock(mid_channels, dims=dims) for _ in range(num_blocks_per_stage)]
-        )
-
-        if spatial_upsample and temporal_upsample:
-            self.upsampler = nn.Sequential(
-                nn.Conv3d(mid_channels, 8 * mid_channels, kernel_size=3, padding=1),
-                PixelShuffleND(3),
-            )
-        elif spatial_upsample:
-            if rational_resampler:
-                self.upsampler = SpatialRationalResampler(
-                    mid_channels=mid_channels, scale=self.spatial_scale
-                )
-            else:
-                self.upsampler = nn.Sequential(
-                    nn.Conv2d(mid_channels, 4 * mid_channels, kernel_size=3, padding=1),
-                    PixelShuffleND(2),
-                )
-        elif temporal_upsample:
-            self.upsampler = nn.Sequential(
-                nn.Conv3d(mid_channels, 2 * mid_channels, kernel_size=3, padding=1),
-                PixelShuffleND(1),
-            )
-        else:
-            raise ValueError(
-                "Either spatial_upsample or temporal_upsample must be True"
-            )
-
-        self.post_upsample_res_blocks = nn.ModuleList(
-            [ResBlock(mid_channels, dims=dims) for _ in range(num_blocks_per_stage)]
-        )
-
-        self.final_conv = Conv(mid_channels, in_channels, kernel_size=3, padding=1)
-
-    def forward(self, latent: torch.Tensor) -> torch.Tensor:
-        b, c, f, h, w = latent.shape
-
-        if self.dims == 2:
-            x = rearrange(latent, "b c f h w -> (b f) c h w")
-            x = self.initial_conv(x)
-            x = self.initial_norm(x)
-            x = self.initial_activation(x)
-
-            for block in self.res_blocks:
-                x = block(x)
-
-            x = self.upsampler(x)
-
-            for block in self.post_upsample_res_blocks:
-                x = block(x)
-
-            x = self.final_conv(x)
-            x = rearrange(x, "(b f) c h w -> b c f h w", b=b, f=f)
-        else:
-            x = self.initial_conv(latent)
-            x = self.initial_norm(x)
-            x = self.initial_activation(x)
-
-            for block in self.res_blocks:
-                x = block(x)
-
-            if self.temporal_upsample:
-                x = self.upsampler(x)
-                x = x[:, :, 1:, :, :]
-            else:
-                if isinstance(self.upsampler, SpatialRationalResampler):
-                    x = self.upsampler(x)
-                else:
-                    x = rearrange(x, "b c f h w -> (b f) c h w")
-                    x = self.upsampler(x)
-                    x = rearrange(x, "(b f) c h w -> b c f h w", b=b, f=f)
-
-            for block in self.post_upsample_res_blocks:
-                x = block(x)
-
-            x = self.final_conv(x)
-
-        return x
-
-    @classmethod
-    def from_config(cls, config):
-        return cls(
-            in_channels=config.get("in_channels", 4),
-            mid_channels=config.get("mid_channels", 128),
-            num_blocks_per_stage=config.get("num_blocks_per_stage", 4),
-            dims=config.get("dims", 2),
-            spatial_upsample=config.get("spatial_upsample", True),
-            temporal_upsample=config.get("temporal_upsample", False),
-            spatial_scale=config.get("spatial_scale", 2.0),
-            rational_resampler=config.get("rational_resampler", False),
-        )
-
-    def config(self):
-        return {
-            "_class_name": "LatentUpsampler",
-            "in_channels": self.in_channels,
-            "mid_channels": self.mid_channels,
-            "num_blocks_per_stage": self.num_blocks_per_stage,
-            "dims": self.dims,
-            "spatial_upsample": self.spatial_upsample,
-            "temporal_upsample": self.temporal_upsample,
-            "spatial_scale": self.spatial_scale,
-            "rational_resampler": self.rational_resampler,
-        }
--- a/comfy/ldm/lightricks/model.py
+++ b/comfy/ldm/lightricks/model.py
@@ -1,47 +1,14 @@
-from abc import ABC, abstractmethod
-from enum import Enum
-import functools
-import math
-from typing import Dict, Optional, Tuple
-
-from einops import rearrange
-import numpy as np
 import torch
 from torch import nn
 import comfy.patcher_extension
 import comfy.ldm.modules.attention
 import comfy.ldm.common_dit
+from einops import rearrange
+import math
+from typing import Dict, Optional, Tuple

 from .symmetric_patchifier import SymmetricPatchifier, latent_to_pixel_coords

-def _log_base(x, base):
-    return np.log(x) / np.log(base)
-
-class LTXRopeType(str, Enum):
-    INTERLEAVED = "interleaved"
-    SPLIT = "split"
-
-    KEY = "rope_type"
-
-    @classmethod
-    def from_dict(cls, kwargs, default=None):
-        if default is None:
-            default = cls.INTERLEAVED
-        return cls(kwargs.get(cls.KEY, default))
-
-
-class LTXFrequenciesPrecision(str, Enum):
-    FLOAT32 = "float32"
-    FLOAT64 = "float64"
-
-    KEY = "frequencies_precision"
-
-    @classmethod
-    def from_dict(cls, kwargs, default=None):
-        if default is None:
-            default = cls.FLOAT32
-        return cls(kwargs.get(cls.KEY, default))
-

 def get_timestep_embedding(
    timesteps: torch.Tensor,
@@ -73,7 +40,9 @@ def get_timestep_embedding(
    assert len(timesteps.shape) == 1, "Timesteps should be a 1d-array"

    half_dim = embedding_dim // 2
-    exponent = -math.log(max_period) * torch.arange(start=0, end=half_dim, dtype=torch.float32, device=timesteps.device)
+    exponent = -math.log(max_period) * torch.arange(
+        start=0, end=half_dim, dtype=torch.float32, device=timesteps.device
+    )
    exponent = exponent / (half_dim - downscale_freq_shift)

    emb = torch.exp(exponent)
@@ -105,9 +74,7 @@ class TimestepEmbedding(nn.Module):
        post_act_fn: Optional[str] = None,
        cond_proj_dim=None,
        sample_proj_bias=True,
-        dtype=None,
-        device=None,
-        operations=None,
+        dtype=None, device=None, operations=None,
    ):
        super().__init__()

@@ -124,9 +91,7 @@ class TimestepEmbedding(nn.Module):
            time_embed_dim_out = out_dim
        else:
            time_embed_dim_out = time_embed_dim
-        self.linear_2 = operations.Linear(
-            time_embed_dim, time_embed_dim_out, sample_proj_bias, dtype=dtype, device=device
-        )
+        self.linear_2 = operations.Linear(time_embed_dim, time_embed_dim_out, sample_proj_bias, dtype=dtype, device=device)

        if post_act_fn is None:
            self.post_act = None
@@ -175,22 +140,12 @@ class PixArtAlphaCombinedTimestepSizeEmbeddings(nn.Module):
    https://github.com/PixArt-alpha/PixArt-alpha/blob/0f55e922376d8b797edd44d25d0e7464b260dcab/diffusion/model/nets/PixArtMS.py#L164C9-L168C29
    """

-    def __init__(
-        self,
-        embedding_dim,
-        size_emb_dim,
-        use_additional_conditions: bool = False,
-        dtype=None,
-        device=None,
-        operations=None,
-    ):
+    def __init__(self, embedding_dim, size_emb_dim, use_additional_conditions: bool = False, dtype=None, device=None, operations=None):
        super().__init__()

        self.outdim = size_emb_dim
        self.time_proj = Timesteps(num_channels=256, flip_sin_to_cos=True, downscale_freq_shift=0)
-        self.timestep_embedder = TimestepEmbedding(
-            in_channels=256, time_embed_dim=embedding_dim, dtype=dtype, device=device, operations=operations
-        )
+        self.timestep_embedder = TimestepEmbedding(in_channels=256, time_embed_dim=embedding_dim, dtype=dtype, device=device, operations=operations)

    def forward(self, timestep, resolution, aspect_ratio, batch_size, hidden_dtype):
        timesteps_proj = self.time_proj(timestep)
@@ -209,22 +164,15 @@ class AdaLayerNormSingle(nn.Module):
        use_additional_conditions (`bool`): To use additional conditions for normalization or not.
    """

-    def __init__(
-        self, embedding_dim: int, embedding_coefficient: int = 6, use_additional_conditions: bool = False, dtype=None, device=None, operations=None
-    ):
+    def __init__(self, embedding_dim: int, use_additional_conditions: bool = False, dtype=None, device=None, operations=None):
        super().__init__()

        self.emb = PixArtAlphaCombinedTimestepSizeEmbeddings(
-            embedding_dim,
-            size_emb_dim=embedding_dim // 3,
-            use_additional_conditions=use_additional_conditions,
-            dtype=dtype,
-            device=device,
-            operations=operations,
+            embedding_dim, size_emb_dim=embedding_dim // 3, use_additional_conditions=use_additional_conditions, dtype=dtype, device=device, operations=operations
        )

        self.silu = nn.SiLU()
-        self.linear = operations.Linear(embedding_dim, embedding_coefficient * embedding_dim, bias=True, dtype=dtype, device=device)
+        self.linear = operations.Linear(embedding_dim, 6 * embedding_dim, bias=True, dtype=dtype, device=device)

    def forward(
        self,
@@ -238,7 +186,6 @@ class AdaLayerNormSingle(nn.Module):
        embedded_timestep = self.emb(timestep, **added_cond_kwargs, batch_size=batch_size, hidden_dtype=hidden_dtype)
        return self.linear(self.silu(embedded_timestep)), embedded_timestep

-
 class PixArtAlphaTextProjection(nn.Module):
    """
    Projects caption embeddings. Also handles dropout for classifier-free guidance.
@@ -246,24 +193,18 @@ class PixArtAlphaTextProjection(nn.Module):
    Adapted from https://github.com/PixArt-alpha/PixArt-alpha/blob/master/diffusion/model/nets/PixArt_blocks.py
    """

-    def __init__(
-        self, in_features, hidden_size, out_features=None, act_fn="gelu_tanh", dtype=None, device=None, operations=None
-    ):
+    def __init__(self, in_features, hidden_size, out_features=None, act_fn="gelu_tanh", dtype=None, device=None, operations=None):
        super().__init__()
        if out_features is None:
            out_features = hidden_size
-        self.linear_1 = operations.Linear(
-            in_features=in_features, out_features=hidden_size, bias=True, dtype=dtype, device=device
-        )
+        self.linear_1 = operations.Linear(in_features=in_features, out_features=hidden_size, bias=True, dtype=dtype, device=device)
        if act_fn == "gelu_tanh":
            self.act_1 = nn.GELU(approximate="tanh")
        elif act_fn == "silu":
            self.act_1 = nn.SiLU()
        else:
            raise ValueError(f"Unknown activation function: {act_fn}")
-        self.linear_2 = operations.Linear(
-            in_features=hidden_size, out_features=out_features, bias=True, dtype=dtype, device=device
-        )
+        self.linear_2 = operations.Linear(in_features=hidden_size, out_features=out_features, bias=True, dtype=dtype, device=device)

    def forward(self, caption):
        hidden_states = self.linear_1(caption)
@@ -282,28 +223,25 @@ class GELU_approx(nn.Module):


 class FeedForward(nn.Module):
-    def __init__(self, dim, dim_out, mult=4, glu=False, dropout=0.0, dtype=None, device=None, operations=None):
+    def __init__(self, dim, dim_out, mult=4, glu=False, dropout=0., dtype=None, device=None, operations=None):
        super().__init__()
        inner_dim = int(dim * mult)
        project_in = GELU_approx(dim, inner_dim, dtype=dtype, device=device, operations=operations)

        self.net = nn.Sequential(
-            project_in, nn.Dropout(dropout), operations.Linear(inner_dim, dim_out, dtype=dtype, device=device)
+            project_in,
+            nn.Dropout(dropout),
+            operations.Linear(inner_dim, dim_out, dtype=dtype, device=device)
        )

    def forward(self, x):
        return self.net(x)

-def apply_rotary_emb(input_tensor, freqs_cis):
-    cos_freqs, sin_freqs = freqs_cis[0], freqs_cis[1]
-    split_pe = freqs_cis[2] if len(freqs_cis) > 2 else False
-    return (
-        apply_split_rotary_emb(input_tensor, cos_freqs, sin_freqs)
-        if split_pe else
-        apply_interleaved_rotary_emb(input_tensor, cos_freqs, sin_freqs)
-    )

-def apply_interleaved_rotary_emb(input_tensor, cos_freqs, sin_freqs):  # TODO: remove duplicate funcs and pick the best/fastest one
+def apply_rotary_emb(input_tensor, freqs_cis): #TODO: remove duplicate funcs and pick the best/fastest one
+    cos_freqs = freqs_cis[0]
+    sin_freqs = freqs_cis[1]
+
    t_dup = rearrange(input_tensor, "... (d r) -> ... d r", r=2)
    t1, t2 = t_dup.unbind(dim=-1)
    t_dup = torch.stack((-t2, t1), dim=-1)
@@ -313,37 +251,9 @@ def apply_interleaved_rotary_emb(input_tensor, cos_freqs, sin_freqs):  # TODO: r

    return out

-def apply_split_rotary_emb(input_tensor, cos, sin):
-    needs_reshape = False
-    if input_tensor.ndim != 4 and cos.ndim == 4:
-        B, H, T, _ = cos.shape
-        input_tensor = input_tensor.reshape(B, T, H, -1).swapaxes(1, 2)
-        needs_reshape = True
-    split_input = rearrange(input_tensor, "... (d r) -> ... d r", d=2)
-    first_half_input = split_input[..., :1, :]
-    second_half_input = split_input[..., 1:, :]
-    output = split_input * cos.unsqueeze(-2)
-    first_half_output = output[..., :1, :]
-    second_half_output = output[..., 1:, :]
-    first_half_output.addcmul_(-sin.unsqueeze(-2), second_half_input)
-    second_half_output.addcmul_(sin.unsqueeze(-2), first_half_input)
-    output = rearrange(output, "... d r -> ... (d r)")
-    return output.swapaxes(1, 2).reshape(B, T, -1) if needs_reshape else output
-

 class CrossAttention(nn.Module):
-    def __init__(
-        self,
-        query_dim,
-        context_dim=None,
-        heads=8,
-        dim_head=64,
-        dropout=0.0,
-        attn_precision=None,
-        dtype=None,
-        device=None,
-        operations=None,
-    ):
+    def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, dropout=0., attn_precision=None, dtype=None, device=None, operations=None):
        super().__init__()
        inner_dim = dim_head * heads
        context_dim = query_dim if context_dim is None else context_dim
@@ -359,11 +269,9 @@ class CrossAttention(nn.Module):
        self.to_k = operations.Linear(context_dim, inner_dim, bias=True, dtype=dtype, device=device)
        self.to_v = operations.Linear(context_dim, inner_dim, bias=True, dtype=dtype, device=device)

-        self.to_out = nn.Sequential(
-            operations.Linear(inner_dim, query_dim, dtype=dtype, device=device), nn.Dropout(dropout)
-        )
+        self.to_out = nn.Sequential(operations.Linear(inner_dim, query_dim, dtype=dtype, device=device), nn.Dropout(dropout))

-    def forward(self, x, context=None, mask=None, pe=None, k_pe=None, transformer_options={}):
+    def forward(self, x, context=None, mask=None, pe=None, transformer_options={}):
        q = self.to_q(x)
        context = x if context is None else context
        k = self.to_k(context)
@@ -374,7 +282,7 @@ class CrossAttention(nn.Module):

        if pe is not None:
            q = apply_rotary_emb(q, pe)
-            k = apply_rotary_emb(k, pe if k_pe is None else k_pe)
+            k = apply_rotary_emb(k, pe)

        if mask is None:
            out = comfy.ldm.modules.attention.optimized_attention(q, k, v, self.heads, attn_precision=self.attn_precision, transformer_options=transformer_options)
@@ -384,495 +292,146 @@ class CrossAttention(nn.Module):


 class BasicTransformerBlock(nn.Module):
-    def __init__(
-        self, dim, n_heads, d_head, context_dim=None, attn_precision=None, dtype=None, device=None, operations=None
-    ):
+    def __init__(self, dim, n_heads, d_head, context_dim=None, attn_precision=None, dtype=None, device=None, operations=None):
        super().__init__()

        self.attn_precision = attn_precision
-        self.attn1 = CrossAttention(
-            query_dim=dim,
-            heads=n_heads,
-            dim_head=d_head,
-            context_dim=None,
-            attn_precision=self.attn_precision,
-            dtype=dtype,
-            device=device,
-            operations=operations,
-        )
+        self.attn1 = CrossAttention(query_dim=dim, heads=n_heads, dim_head=d_head, context_dim=None, attn_precision=self.attn_precision, dtype=dtype, device=device, operations=operations)
        self.ff = FeedForward(dim, dim_out=dim, glu=True, dtype=dtype, device=device, operations=operations)

-        self.attn2 = CrossAttention(
-            query_dim=dim,
-            context_dim=context_dim,
-            heads=n_heads,
-            dim_head=d_head,
-            attn_precision=self.attn_precision,
-            dtype=dtype,
-            device=device,
-            operations=operations,
-        )
+        self.attn2 = CrossAttention(query_dim=dim, context_dim=context_dim, heads=n_heads, dim_head=d_head, attn_precision=self.attn_precision, dtype=dtype, device=device, operations=operations)

        self.scale_shift_table = nn.Parameter(torch.empty(6, dim, device=device, dtype=dtype))

    def forward(self, x, context=None, attention_mask=None, timestep=None, pe=None, transformer_options={}):
        shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = (self.scale_shift_table[None, None].to(device=x.device, dtype=x.dtype) + timestep.reshape(x.shape[0], timestep.shape[1], self.scale_shift_table.shape[0], -1)).unbind(dim=2)

-        attn1_input = comfy.ldm.common_dit.rms_norm(x)
-        attn1_input = torch.addcmul(attn1_input, attn1_input, scale_msa).add_(shift_msa)
-        attn1_input = self.attn1(attn1_input, pe=pe, transformer_options=transformer_options)
-        x.addcmul_(attn1_input, gate_msa)
-        del attn1_input
+        x += self.attn1(comfy.ldm.common_dit.rms_norm(x) * (1 + scale_msa) + shift_msa, pe=pe, transformer_options=transformer_options) * gate_msa

        x += self.attn2(x, context=context, mask=attention_mask, transformer_options=transformer_options)

-        y = comfy.ldm.common_dit.rms_norm(x)
-        y = torch.addcmul(y, y, scale_mlp).add_(shift_mlp)
-        x.addcmul_(self.ff(y), gate_mlp)
+        y = comfy.ldm.common_dit.rms_norm(x) * (1 + scale_mlp) + shift_mlp
+        x += self.ff(y) * gate_mlp

        return x

 def get_fractional_positions(indices_grid, max_pos):
-    n_pos_dims = indices_grid.shape[1]
-    assert n_pos_dims == len(max_pos), f'Number of position dimensions ({n_pos_dims}) must match max_pos length ({len(max_pos)})'
    fractional_positions = torch.stack(
-        [indices_grid[:, i] / max_pos[i] for i in range(n_pos_dims)],
-        axis=-1,
+        [
+            indices_grid[:, i] / max_pos[i]
+            for i in range(3)
+        ],
+        dim=-1,
    )
    return fractional_positions


-@functools.lru_cache(maxsize=5)
-def generate_freq_grid_np(positional_embedding_theta, positional_embedding_max_pos_count, inner_dim, _ = None):
-    theta = positional_embedding_theta
+def precompute_freqs_cis(indices_grid, dim, out_dtype, theta=10000.0, max_pos=[20, 2048, 2048]):
+    dtype = torch.float32 #self.dtype
+
+    fractional_positions = get_fractional_positions(indices_grid, max_pos)
+
    start = 1
    end = theta
-
-    n_elem = 2 * positional_embedding_max_pos_count
-    pow_indices = np.power(
-        theta,
-        np.linspace(
-            _log_base(start, theta),
-            _log_base(end, theta),
-            inner_dim // n_elem,
-            dtype=np.float64,
-        ),
-    )
-    return torch.tensor(pow_indices * math.pi / 2, dtype=torch.float32)
-
-def generate_freq_grid_pytorch(positional_embedding_theta, positional_embedding_max_pos_count, inner_dim, device):
-    theta = positional_embedding_theta
-    start = 1
-    end = theta
-    n_elem = 2 * positional_embedding_max_pos_count
+    device = fractional_positions.device

    indices = theta ** (
        torch.linspace(
            math.log(start, theta),
            math.log(end, theta),
-            inner_dim // n_elem,
+            dim // 6,
            device=device,
-            dtype=torch.float32,
+            dtype=dtype,
        )
    )
-    indices = indices.to(dtype=torch.float32)
+    indices = indices.to(dtype=dtype)

    indices = indices * math.pi / 2

-    return indices
-
-def generate_freqs(indices, indices_grid, max_pos, use_middle_indices_grid):
-    if use_middle_indices_grid:
-        assert(len(indices_grid.shape) == 4 and indices_grid.shape[-1] ==2)
-        indices_grid_start, indices_grid_end = indices_grid[..., 0], indices_grid[..., 1]
-        indices_grid = (indices_grid_start + indices_grid_end) / 2.0
-    elif len(indices_grid.shape) == 4:
-        indices_grid = indices_grid[..., 0]
-
-    # Get fractional positions and compute frequency indices
-    fractional_positions = get_fractional_positions(indices_grid, max_pos)
-    indices = indices.to(device=fractional_positions.device)
-
    freqs = (
        (indices * (fractional_positions.unsqueeze(-1) * 2 - 1))
        .transpose(-1, -2)
        .flatten(2)
    )
-    return freqs

-def interleaved_freqs_cis(freqs, pad_size):
    cos_freq = freqs.cos().repeat_interleave(2, dim=-1)
    sin_freq = freqs.sin().repeat_interleave(2, dim=-1)
-    if pad_size != 0:
-        cos_padding = torch.ones_like(cos_freq[:, :, : pad_size])
-        sin_padding = torch.zeros_like(cos_freq[:, :, : pad_size])
+    if dim % 6 != 0:
+        cos_padding = torch.ones_like(cos_freq[:, :, : dim % 6])
+        sin_padding = torch.zeros_like(cos_freq[:, :, : dim % 6])
        cos_freq = torch.cat([cos_padding, cos_freq], dim=-1)
        sin_freq = torch.cat([sin_padding, sin_freq], dim=-1)
-    return cos_freq, sin_freq
+    return cos_freq.to(out_dtype), sin_freq.to(out_dtype)

-def split_freqs_cis(freqs, pad_size, num_attention_heads):
-    cos_freq = freqs.cos()
-    sin_freq = freqs.sin()

-    if pad_size != 0:
-        cos_padding = torch.ones_like(cos_freq[:, :, :pad_size])
-        sin_padding = torch.zeros_like(sin_freq[:, :, :pad_size])
+class LTXVModel(torch.nn.Module):
+    def __init__(self,
+                 in_channels=128,
+                 cross_attention_dim=2048,
+                 attention_head_dim=64,
+                 num_attention_heads=32,

-        cos_freq = torch.concatenate([cos_padding, cos_freq], axis=-1)
-        sin_freq = torch.concatenate([sin_padding, sin_freq], axis=-1)
+                 caption_channels=4096,
+                 num_layers=28,

-    # Reshape freqs to be compatible with multi-head attention
-    B , T, half_HD = cos_freq.shape

-    cos_freq = cos_freq.reshape(B, T, num_attention_heads, half_HD // num_attention_heads)
-    sin_freq = sin_freq.reshape(B, T, num_attention_heads, half_HD // num_attention_heads)
-
-    cos_freq = torch.swapaxes(cos_freq, 1, 2)  # (B,H,T,D//2)
-    sin_freq = torch.swapaxes(sin_freq, 1, 2)  # (B,H,T,D//2)
-    return cos_freq, sin_freq
-
-class LTXBaseModel(torch.nn.Module, ABC):
-    """
-    Abstract base class for LTX models (Lightricks Transformer models).
-
-    This class defines the common interface and shared functionality for all LTX models,
-    including LTXV (video) and LTXAV (audio-video) variants.
-    """
-
-    def __init__(
-        self,
-        in_channels: int,
-        cross_attention_dim: int,
-        attention_head_dim: int,
-        num_attention_heads: int,
-        caption_channels: int,
-        num_layers: int,
-        positional_embedding_theta: float = 10000.0,
-        positional_embedding_max_pos: list = [20, 2048, 2048],
-        causal_temporal_positioning: bool = False,
-        vae_scale_factors: tuple = (8, 32, 32),
-        use_middle_indices_grid=False,
-        timestep_scale_multiplier = 1000.0,
-        dtype=None,
-        device=None,
-        operations=None,
-        **kwargs,
-    ):
+                 positional_embedding_theta=10000.0,
+                 positional_embedding_max_pos=[20, 2048, 2048],
+                 causal_temporal_positioning=False,
+                 vae_scale_factors=(8, 32, 32),
+                 dtype=None, device=None, operations=None, **kwargs):
        super().__init__()
        self.generator = None
        self.vae_scale_factors = vae_scale_factors
-        self.use_middle_indices_grid = use_middle_indices_grid
        self.dtype = dtype
-        self.in_channels = in_channels
-        self.cross_attention_dim = cross_attention_dim
-        self.attention_head_dim = attention_head_dim
-        self.num_attention_heads = num_attention_heads
-        self.caption_channels = caption_channels
-        self.num_layers = num_layers
-        self.positional_embedding_theta = positional_embedding_theta
-        self.positional_embedding_max_pos = positional_embedding_max_pos
-        self.split_positional_embedding = LTXRopeType.from_dict(kwargs)
-        self.freq_grid_generator = (
-            generate_freq_grid_np if LTXFrequenciesPrecision.from_dict(kwargs) == LTXFrequenciesPrecision.FLOAT64
-            else generate_freq_grid_pytorch
-        )
-        self.causal_temporal_positioning = causal_temporal_positioning
-        self.operations = operations
-        self.timestep_scale_multiplier = timestep_scale_multiplier
-
-        # Common dimensions
-        self.inner_dim = num_attention_heads * attention_head_dim
        self.out_channels = in_channels
+        self.inner_dim = num_attention_heads * attention_head_dim
+        self.causal_temporal_positioning = causal_temporal_positioning

-        # Initialize common components
-        self._init_common_components(device, dtype)
-
-        # Initialize model-specific components
-        self._init_model_components(device, dtype, **kwargs)
-
-        # Initialize transformer blocks
-        self._init_transformer_blocks(device, dtype, **kwargs)
-
-        # Initialize output components
-        self._init_output_components(device, dtype)
-
-    def _init_common_components(self, device, dtype):
-        """Initialize components common to all LTX models
-        - patchify_proj: Linear projection for patchifying input
-        - adaln_single: AdaLN layer for timestep embedding
-        - caption_projection: Linear projection for caption embedding
-        """
-        self.patchify_proj = self.operations.Linear(
-            self.in_channels, self.inner_dim, bias=True, dtype=dtype, device=device
-        )
+        self.patchify_proj = operations.Linear(in_channels, self.inner_dim, bias=True, dtype=dtype, device=device)

        self.adaln_single = AdaLayerNormSingle(
-            self.inner_dim, use_additional_conditions=False, dtype=dtype, device=device, operations=self.operations
+            self.inner_dim, use_additional_conditions=False, dtype=dtype, device=device, operations=operations
        )

+        # self.adaln_single.linear = operations.Linear(self.inner_dim, 4 * self.inner_dim, bias=True, dtype=dtype, device=device)
+
        self.caption_projection = PixArtAlphaTextProjection(
-            in_features=self.caption_channels,
-            hidden_size=self.inner_dim,
-            dtype=dtype,
-            device=device,
-            operations=self.operations,
+            in_features=caption_channels, hidden_size=self.inner_dim, dtype=dtype, device=device, operations=operations
        )

-    @abstractmethod
-    def _init_model_components(self, device, dtype, **kwargs):
-        """Initialize model-specific components. Must be implemented by subclasses."""
-        pass
-
-    @abstractmethod
-    def _init_transformer_blocks(self, device, dtype, **kwargs):
-        """Initialize transformer blocks. Must be implemented by subclasses."""
-        pass
-
-    @abstractmethod
-    def _init_output_components(self, device, dtype):
-        """Initialize output components. Must be implemented by subclasses."""
-        pass
-
-    @abstractmethod
-    def _process_input(self, x, keyframe_idxs, denoise_mask, **kwargs):
-        """Process input data. Must be implemented by subclasses."""
-        pass
-
-    @abstractmethod
-    def _process_transformer_blocks(self, x, context, attention_mask, timestep, pe, **kwargs):
-        """Process transformer blocks. Must be implemented by subclasses."""
-        pass
-
-    @abstractmethod
-    def _process_output(self, x, embedded_timestep, keyframe_idxs, **kwargs):
-        """Process output data. Must be implemented by subclasses."""
-        pass
-
-    def _prepare_timestep(self, timestep, batch_size, hidden_dtype, **kwargs):
-        """Prepare timestep embeddings."""
-        grid_mask = kwargs.get("grid_mask", None)
-        if grid_mask is not None:
-            timestep = timestep[:, grid_mask]
-
-        timestep = timestep * self.timestep_scale_multiplier
-        timestep, embedded_timestep = self.adaln_single(
-            timestep.flatten(),
-            {"resolution": None, "aspect_ratio": None},
-            batch_size=batch_size,
-            hidden_dtype=hidden_dtype,
-        )
-
-        # Second dimension is 1 or number of tokens (if timestep_per_token)
-        timestep = timestep.view(batch_size, -1, timestep.shape[-1])
-        embedded_timestep = embedded_timestep.view(batch_size, -1, embedded_timestep.shape[-1])
-
-        return timestep, embedded_timestep
-
-    def _prepare_context(self, context, batch_size, x, attention_mask=None):
-        """Prepare context for transformer blocks."""
-        if self.caption_projection is not None:
-            context = self.caption_projection(context)
-            context = context.view(batch_size, -1, x.shape[-1])
-
-        return context, attention_mask
-
-    def _precompute_freqs_cis(
-        self,
-        indices_grid,
-        dim,
-        out_dtype,
-        theta=10000.0,
-        max_pos=[20, 2048, 2048],
-        use_middle_indices_grid=False,
-        num_attention_heads=32,
-    ):
-        split_mode = self.split_positional_embedding == LTXRopeType.SPLIT
-        indices = self.freq_grid_generator(theta, indices_grid.shape[1], dim, indices_grid.device)
-        freqs = generate_freqs(indices, indices_grid, max_pos, use_middle_indices_grid)
-
-        if split_mode:
-            expected_freqs = dim // 2
-            current_freqs = freqs.shape[-1]
-            pad_size = expected_freqs - current_freqs
-            cos_freq, sin_freq = split_freqs_cis(freqs, pad_size, num_attention_heads)
-        else:
-            # 2 because of cos and sin by 3 for (t, x, y), 1 for temporal only
-            n_elem = 2 * indices_grid.shape[1]
-            cos_freq, sin_freq = interleaved_freqs_cis(freqs, dim % n_elem)
-        return cos_freq.to(out_dtype), sin_freq.to(out_dtype), split_mode
-
-    def _prepare_positional_embeddings(self, pixel_coords, frame_rate, x_dtype):
-        """Prepare positional embeddings."""
-        fractional_coords = pixel_coords.to(torch.float32)
-        fractional_coords[:, 0] = fractional_coords[:, 0] * (1.0 / frame_rate)
-        pe = self._precompute_freqs_cis(
-            fractional_coords,
-            dim=self.inner_dim,
-            out_dtype=x_dtype,
-            max_pos=self.positional_embedding_max_pos,
-            use_middle_indices_grid=self.use_middle_indices_grid,
-            num_attention_heads=self.num_attention_heads,
-        )
-        return pe
-
-    def _prepare_attention_mask(self, attention_mask, x_dtype):
-        """Prepare attention mask."""
-        if attention_mask is not None and not torch.is_floating_point(attention_mask):
-            attention_mask = (attention_mask - 1).to(x_dtype).reshape(
-                (attention_mask.shape[0], 1, -1, attention_mask.shape[-1])
-            ) * torch.finfo(x_dtype).max
-        return attention_mask
-
-    def forward(
-        self, x, timestep, context, attention_mask, frame_rate=25, transformer_options={}, keyframe_idxs=None, denoise_mask=None, **kwargs
-    ):
-        """
-        Forward pass for LTX models.
-
-        Args:
-            x: Input tensor
-            timestep: Timestep tensor
-            context: Context tensor (e.g., text embeddings)
-            attention_mask: Attention mask tensor
-            frame_rate: Frame rate for temporal processing
-            transformer_options: Additional options for transformer blocks
-            keyframe_idxs: Keyframe indices for temporal processing
-            **kwargs: Additional keyword arguments
-
-        Returns:
-            Processed output tensor
-        """
-        return comfy.patcher_extension.WrapperExecutor.new_class_executor(
-            self._forward,
-            self,
-            comfy.patcher_extension.get_all_wrappers(
-                comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, transformer_options
-            ),
-        ).execute(x, timestep, context, attention_mask, frame_rate, transformer_options, keyframe_idxs, denoise_mask=denoise_mask, **kwargs)
-
-    def _forward(
-        self, x, timestep, context, attention_mask, frame_rate=25, transformer_options={}, keyframe_idxs=None, denoise_mask=None, **kwargs
-    ):
-        """
-        Internal forward pass for LTX models.
-
-        Args:
-            x: Input tensor
-            timestep: Timestep tensor
-            context: Context tensor (e.g., text embeddings)
-            attention_mask: Attention mask tensor
-            frame_rate: Frame rate for temporal processing
-            transformer_options: Additional options for transformer blocks
-            keyframe_idxs: Keyframe indices for temporal processing
-            **kwargs: Additional keyword arguments
-
-        Returns:
-            Processed output tensor
-        """
-        if isinstance(x, list):
-            input_dtype = x[0].dtype
-            batch_size = x[0].shape[0]
-        else:
-            input_dtype = x.dtype
-            batch_size = x.shape[0]
-        # Process input
-        merged_args = {**transformer_options, **kwargs}
-        x, pixel_coords, additional_args = self._process_input(x, keyframe_idxs, denoise_mask, **merged_args)
-        merged_args.update(additional_args)
-
-        # Prepare timestep and context
-        timestep, embedded_timestep = self._prepare_timestep(timestep, batch_size, input_dtype, **merged_args)
-        context, attention_mask = self._prepare_context(context, batch_size, x, attention_mask)
-
-        # Prepare attention mask and positional embeddings
-        attention_mask = self._prepare_attention_mask(attention_mask, input_dtype)
-        pe = self._prepare_positional_embeddings(pixel_coords, frame_rate, input_dtype)
-
-        # Process transformer blocks
-        x = self._process_transformer_blocks(
-            x, context, attention_mask, timestep, pe, transformer_options=transformer_options, **merged_args
-        )
-
-        # Process output
-        x = self._process_output(x, embedded_timestep, keyframe_idxs, **merged_args)
-        return x
-
-
-class LTXVModel(LTXBaseModel):
-    """LTXV model for video generation."""
-
-    def __init__(
-        self,
-        in_channels=128,
-        cross_attention_dim=2048,
-        attention_head_dim=64,
-        num_attention_heads=32,
-        caption_channels=4096,
-        num_layers=28,
-        positional_embedding_theta=10000.0,
-        positional_embedding_max_pos=[20, 2048, 2048],
-        causal_temporal_positioning=False,
-        vae_scale_factors=(8, 32, 32),
-        use_middle_indices_grid=False,
-        timestep_scale_multiplier = 1000.0,
-        dtype=None,
-        device=None,
-        operations=None,
-        **kwargs,
-    ):
-        super().__init__(
-            in_channels=in_channels,
-            cross_attention_dim=cross_attention_dim,
-            attention_head_dim=attention_head_dim,
-            num_attention_heads=num_attention_heads,
-            caption_channels=caption_channels,
-            num_layers=num_layers,
-            positional_embedding_theta=positional_embedding_theta,
-            positional_embedding_max_pos=positional_embedding_max_pos,
-            causal_temporal_positioning=causal_temporal_positioning,
-            vae_scale_factors=vae_scale_factors,
-            use_middle_indices_grid=use_middle_indices_grid,
-            timestep_scale_multiplier=timestep_scale_multiplier,
-            dtype=dtype,
-            device=device,
-            operations=operations,
-            **kwargs,
-        )
-
-    def _init_model_components(self, device, dtype, **kwargs):
-        """Initialize LTXV-specific components."""
-        # No additional components needed for LTXV beyond base class
-        pass
-
-    def _init_transformer_blocks(self, device, dtype, **kwargs):
-        """Initialize transformer blocks for LTXV."""
        self.transformer_blocks = nn.ModuleList(
            [
                BasicTransformerBlock(
                    self.inner_dim,
-                    self.num_attention_heads,
-                    self.attention_head_dim,
-                    context_dim=self.cross_attention_dim,
-                    dtype=dtype,
-                    device=device,
-                    operations=self.operations,
+                    num_attention_heads,
+                    attention_head_dim,
+                    context_dim=cross_attention_dim,
+                    # attn_precision=attn_precision,
+                    dtype=dtype, device=device, operations=operations
                )
-                for _ in range(self.num_layers)
+                for d in range(num_layers)
            ]
        )

-    def _init_output_components(self, device, dtype):
-        """Initialize output components for LTXV."""
        self.scale_shift_table = nn.Parameter(torch.empty(2, self.inner_dim, dtype=dtype, device=device))
-        self.norm_out = self.operations.LayerNorm(
-            self.inner_dim, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device
-        )
-        self.proj_out = self.operations.Linear(self.inner_dim, self.out_channels, dtype=dtype, device=device)
-        self.patchifier = SymmetricPatchifier(1, start_end=True)
+        self.norm_out = operations.LayerNorm(self.inner_dim, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
+        self.proj_out = operations.Linear(self.inner_dim, self.out_channels, dtype=dtype, device=device)
+
+        self.patchifier = SymmetricPatchifier(1)
+
+    def forward(self, x, timestep, context, attention_mask, frame_rate=25, transformer_options={}, keyframe_idxs=None, **kwargs):
+        return comfy.patcher_extension.WrapperExecutor.new_class_executor(
+            self._forward,
+            self,
+            comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, transformer_options)
+        ).execute(x, timestep, context, attention_mask, frame_rate, transformer_options, keyframe_idxs, **kwargs)
+
+    def _forward(self, x, timestep, context, attention_mask, frame_rate=25, transformer_options={}, keyframe_idxs=None, **kwargs):
+        patches_replace = transformer_options.get("patches_replace", {})
+
+        orig_shape = list(x.shape)

-    def _process_input(self, x, keyframe_idxs, denoise_mask, **kwargs):
-        """Process input for LTXV."""
-        additional_args = {"orig_shape": list(x.shape)}
        x, latent_coords = self.patchifier.patchify(x)
        pixel_coords = latent_to_pixel_coords(
            latent_coords=latent_coords,
@@ -880,30 +439,44 @@ class LTXVModel(LTXBaseModel):
            causal_fix=self.causal_temporal_positioning,
        )

-        grid_mask = None
        if keyframe_idxs is not None:
-            additional_args.update({ "orig_patchified_shape": list(x.shape)})
-            denoise_mask = self.patchifier.patchify(denoise_mask)[0]
-            grid_mask = ~torch.any(denoise_mask < 0, dim=-1)[0]
-            additional_args.update({"grid_mask": grid_mask})
-            x = x[:, grid_mask, :]
-            pixel_coords = pixel_coords[:, :, grid_mask, ...]
+            pixel_coords[:, :, -keyframe_idxs.shape[2]:] = keyframe_idxs

-            kf_grid_mask = grid_mask[-keyframe_idxs.shape[2]:]
-            keyframe_idxs = keyframe_idxs[..., kf_grid_mask, :]
-            pixel_coords[:, :, -keyframe_idxs.shape[2]:, :] = keyframe_idxs
+        fractional_coords = pixel_coords.to(torch.float32)
+        fractional_coords[:, 0] = fractional_coords[:, 0] * (1.0 / frame_rate)

        x = self.patchify_proj(x)
-        return x, pixel_coords, additional_args
+        timestep = timestep * 1000.0
+
+        if attention_mask is not None and not torch.is_floating_point(attention_mask):
+            attention_mask = (attention_mask - 1).to(x.dtype).reshape((attention_mask.shape[0], 1, -1, attention_mask.shape[-1])) * torch.finfo(x.dtype).max
+
+        pe = precompute_freqs_cis(fractional_coords, dim=self.inner_dim, out_dtype=x.dtype)
+
+        batch_size = x.shape[0]
+        timestep, embedded_timestep = self.adaln_single(
+            timestep.flatten(),
+            {"resolution": None, "aspect_ratio": None},
+            batch_size=batch_size,
+            hidden_dtype=x.dtype,
+        )
+        # Second dimension is 1 or number of tokens (if timestep_per_token)
+        timestep = timestep.view(batch_size, -1, timestep.shape[-1])
+        embedded_timestep = embedded_timestep.view(
+            batch_size, -1, embedded_timestep.shape[-1]
+        )
+
+        # 2. Blocks
+        if self.caption_projection is not None:
+            batch_size = x.shape[0]
+            context = self.caption_projection(context)
+            context = context.view(
+                batch_size, -1, x.shape[-1]
+            )

-    def _process_transformer_blocks(self, x, context, attention_mask, timestep, pe, transformer_options={}, **kwargs):
-        """Process transformer blocks for LTXV."""
-        patches_replace = transformer_options.get("patches_replace", {})
        blocks_replace = patches_replace.get("dit", {})
-
        for i, block in enumerate(self.transformer_blocks):
            if ("double_block", i) in blocks_replace:
-
                def block_wrap(args):
                    out = {}
                    out["img"] = block(args["img"], context=args["txt"], attention_mask=args["attention_mask"], timestep=args["vec"], pe=args["pe"], transformer_options=args["transformer_options"])
@@ -921,28 +494,16 @@ class LTXVModel(LTXBaseModel):
                    transformer_options=transformer_options,
                )

-        return x
-
-    def _process_output(self, x, embedded_timestep, keyframe_idxs, **kwargs):
-        """Process output for LTXV."""
-        # Apply scale-shift modulation
+        # 3. Output
        scale_shift_values = (
            self.scale_shift_table[None, None].to(device=x.device, dtype=x.dtype) + embedded_timestep[:, :, None]
        )
        shift, scale = scale_shift_values[:, :, 0], scale_shift_values[:, :, 1]
-
        x = self.norm_out(x)
+        # Modulation
        x = x * (1 + scale) + shift
        x = self.proj_out(x)

-        if keyframe_idxs is not None:
-            grid_mask = kwargs["grid_mask"]
-            orig_patchified_shape = kwargs["orig_patchified_shape"]
-            full_x = torch.zeros(orig_patchified_shape, dtype=x.dtype, device=x.device)
-            full_x[:, grid_mask, :] = x
-            x = full_x
-        # Unpatchify to restore original dimensions
-        orig_shape = kwargs["orig_shape"]
        x = self.patchifier.unpatchify(
            latents=x,
            output_height=orig_shape[3],
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Jedrzej Kosinski	386e854aab	Merge branch 'master' into flipflop-stream	2025-10-28 15:08:27 -07:00
Jedrzej Kosinski	61133af772	Add '--flipflop-offload' startup argument	2025-10-13 21:10:44 -07:00
Jedrzej Kosinski	586a8de8da	Merge branch 'master' into flipflop-stream	2025-10-13 21:04:37 -07:00
Jedrzej Kosinski	5329180fce	Made flipflop consider partial_unload, partial_offload, and add flip+flop to mem counters	2025-10-03 16:21:01 -07:00
Jedrzej Kosinski	0fdd327c2f	Merge branch 'master' into flipflop-stream	2025-10-03 14:32:56 -07:00
Jedrzej Kosinski	ee01002e63	Add flipflop support to (base) WAN, fix issue with applying loras to flipflop weights being done on CPU instead of GPU, left some timing functions as the lora application time could use some reduction	2025-10-02 22:02:50 -07:00
Jedrzej Kosinski	831c3cf05e	Add a temporary workaround for odd amount of blocks not producing expected results	2025-10-02 20:29:11 -07:00
Jedrzej Kosinski	0d8e8abd90	Default ro smaller blocks getting flipflopped first	2025-10-02 18:00:21 -07:00
Jedrzej Kosinski	d5001ed90e	Make flux support flipflop	2025-10-02 17:53:22 -07:00
Jedrzej Kosinski	8d7b22b720	Fixed FlipFlipModule.execute_blocks having hardcoded strings from Qwen	2025-10-02 17:49:43 -07:00
Jedrzej Kosinski	6d3ec9fcf3	Simplified flipflop setup by adding FlipFlopModule.execute_blocks helper	2025-10-02 16:46:37 -07:00
Jedrzej Kosinski	c4420b6a41	Change log string slightly	2025-10-02 15:34:35 -07:00
Jedrzej Kosinski	a282586995	Merge branch 'master' into flipflop-stream	2025-10-02 15:03:26 -07:00
Jedrzej Kosinski	0df61b5032	Fix improper index slicing for flipflop get blocks, add extra log message	2025-10-01 21:21:36 -07:00
Jedrzej Kosinski	7c896c5567	Initial automatic support for flipflop within ModelPatcher - only Qwen Image diffusion_model uses FlipFlopModule currently	2025-10-01 20:13:50 -07:00
Jedrzej Kosinski	ec156e72eb	Merge branch 'master' into flipflop-stream	2025-09-30 23:08:37 -07:00
Jedrzej Kosinski	01f4512bf8	In-progress commit on making flipflop async weight streaming native, made loaded partially/loaded completely log messages have labels because having to memorize their meaning for dev work is annoying	2025-09-30 23:08:08 -07:00
Jedrzej Kosinski	d0bd221495	Merge branch 'master' into flipflop-stream	2025-09-29 22:49:38 -07:00
Jedrzej Kosinski	8a8162e8da	Fix percentage logic, begin adding elements to ModelPatcher to track flip flop compatibility	2025-09-29 22:49:12 -07:00
Jedrzej Kosinski	ff789c8beb	Merge branch 'master' into flipflop-stream	2025-09-29 16:09:51 -07:00
Jedrzej Kosinski	0e966dcf85	Merge branch 'master' into flipflop-stream	2025-09-27 21:13:26 -07:00
Jedrzej Kosinski	6b240b0bce	Refactored old flip flop into a new implementation that allows for controlling the percentage of blocks getting flip flopped, converted nodes to v3 schema	2025-09-25 22:41:41 -07:00
Jedrzej Kosinski	f9fbf902d5	Added missing Qwen block params, further subdivided blocks function	2025-09-25 17:49:39 -07:00
Jedrzej Kosinski	f083720eb4	Refactored FlipFlopTransformer.__call__ to fully separate out actions between flip and flop	2025-09-25 16:16:51 -07:00
Jedrzej Kosinski	84e73f2aa5	Brought over flip flop prototype from contentis' fork, limiting it to only Qwen to ease the process of adapting it to be a native feature	2025-09-25 16:15:46 -07:00
				`@@ -1 +0,0 @@`
				{"revision": 0, "last_node_id": 29, "last_link_id": 0, "nodes": [{"id": 29, "type": "4c9d6ea4-b912-40e5-8766-6793a9758c53", "pos": [1970, -230], "size": [180, 86], "flags": {}, "order": 5, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "R", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}, {"label": "G", "localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": []}, {"label": "B", "localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": []}, {"label": "A", "localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": []}], "title": "Image Channels", "properties": {"proxyWidgets": []}, "widgets_values": []}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "4c9d6ea4-b912-40e5-8766-6793a9758c53", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 28, "lastLinkId": 39, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Image Channels", "inputNode": {"id": -10, "bounding": [1820, -185, 120, 60]}, "outputNode": {"id": -20, "bounding": [2460, -215, 120, 120]}, "inputs": [{"id": "3522932b-2d86-4a1f-a02a-cb29f3a9d7fe", "name": "images.image0", "type": "IMAGE", "linkIds": [39], "localized_name": "images.image0", "label": "image", "pos": [1920, -165]}], "outputs": [{"id": "605cb9c3-b065-4d9b-81d2-3ec331889b2b", "name": "IMAGE0", "type": "IMAGE", "linkIds": [26], "localized_name": "IMAGE0", "label": "R", "pos": [2480, -195]}, {"id": "fb44a77e-0522-43e9-9527-82e7465b3596", "name": "IMAGE1", "type": "IMAGE", "linkIds": [27], "localized_name": "IMAGE1", "label": "G", "pos": [2480, -175]}, {"id": "81460ee6-0131-402a-874f-6bf3001fc4ff", "name": "IMAGE2", "type": "IMAGE", "linkIds": [28], "localized_name": "IMAGE2", "label": "B", "pos": [2480, -155]}, {"id": "ae690246-80d4-4951-b1d9-9306d8a77417", "name": "IMAGE3", "type": "IMAGE", "linkIds": [29], "localized_name": "IMAGE3", "label": "A", "pos": [2480, -135]}], "widgets": [], "nodes": [{"id": 23, "type": "GLSLShader", "pos": [2000, -330], "size": [400, 172], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 39}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}], "outputs": [{"label": "R", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [26]}, {"label": "G", "localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": [27]}, {"label": "B", "localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": [28]}, {"label": "A", "localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": [29]}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\nlayout(location = 1) out vec4 fragColor1;\nlayout(location = 2) out vec4 fragColor2;\nlayout(location = 3) out vec4 fragColor3;\n\nvoid main() {\n vec4 color = texture(u_image0, v_texCoord);\n // Output each channel as grayscale to separate render targets\n fragColor0 = vec4(vec3(color.r), 1.0); // Red channel\n fragColor1 = vec4(vec3(color.g), 1.0); // Green channel\n fragColor2 = vec4(vec3(color.b), 1.0); // Blue channel\n fragColor3 = vec4(vec3(color.a), 1.0); // Alpha channel\n}\n", "from_input"]}], "groups": [], "links": [{"id": 39, "origin_id": -10, "origin_slot": 0, "target_id": 23, "target_slot": 0, "type": "IMAGE"}, {"id": 26, "origin_id": 23, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 27, "origin_id": 23, "origin_slot": 1, "target_id": -20, "target_slot": 1, "type": "IMAGE"}, {"id": 28, "origin_id": 23, "origin_slot": 2, "target_id": -20, "target_slot": 2, "type": "IMAGE"}, {"id": 29, "origin_id": 23, "origin_slot": 3, "target_id": -20, "target_slot": 3, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}}]}}
				`@@ -1 +0,0 @@`
				{"revision":0,"last_node_id":25,"last_link_id":0,"nodes":[{"id":25,"type":"621ba4e2-22a8-482d-a369-023753198b7b","pos":[4610,-790],"size":[230,58],"flags":{},"order":4,"mode":0,"inputs":[{"label":"image","localized_name":"images.image0","name":"images.image0","type":"IMAGE","link":null}],"outputs":[{"label":"IMAGE","localized_name":"IMAGE0","name":"IMAGE0","type":"IMAGE","links":[]}],"title":"Sharpen","properties":{"proxyWidgets":[["24","value"]]},"widgets_values":[]}],"links":[],"version":0.4,"definitions":{"subgraphs":[{"id":"621ba4e2-22a8-482d-a369-023753198b7b","version":1,"state":{"lastGroupId":0,"lastNodeId":24,"lastLinkId":36,"lastRerouteId":0},"revision":0,"config":{},"name":"Sharpen","inputNode":{"id":-10,"bounding":[4090,-825,120,60]},"outputNode":{"id":-20,"bounding":[5150,-825,120,60]},"inputs":[{"id":"37011fb7-14b7-4e0e-b1a0-6a02e8da1fd7","name":"images.image0","type":"IMAGE","linkIds":[34],"localized_name":"images.image0","label":"image","pos":[4190,-805]}],"outputs":[{"id":"e9182b3f-635c-4cd4-a152-4b4be17ae4b9","name":"IMAGE0","type":"IMAGE","linkIds":[35],"localized_name":"IMAGE0","label":"IMAGE","pos":[5170,-805]}],"widgets":[],"nodes":[{"id":24,"type":"PrimitiveFloat","pos":[4280,-1240],"size":[270,58],"flags":{},"order":0,"mode":0,"inputs":[{"label":"strength","localized_name":"value","name":"value","type":"FLOAT","widget":{"name":"value"},"link":null}],"outputs":[{"localized_name":"FLOAT","name":"FLOAT","type":"FLOAT","links":[36]}],"properties":{"Node name for S&R":"PrimitiveFloat","min":0,"max":3,"precision":2,"step":0.05},"widgets_values":[0.5]},{"id":23,"type":"GLSLShader","pos":[4570,-1240],"size":[370,192],"flags":{},"order":1,"mode":0,"inputs":[{"label":"image0","localized_name":"images.image0","name":"images.image0","type":"IMAGE","link":34},{"label":"image1","localized_name":"images.image1","name":"images.image1","shape":7,"type":"IMAGE","link":null},{"label":"u_float0","localized_name":"floats.u_float0","name":"floats.u_float0","shape":7,"type":"FLOAT","link":36},{"label":"u_float1","localized_name":"floats.u_float1","name":"floats.u_float1","shape":7,"type":"FLOAT","link":null},{"label":"u_int0","localized_name":"ints.u_int0","name":"ints.u_int0","shape":7,"type":"INT","link":null},{"localized_name":"fragment_shader","name":"fragment_shader","type":"STRING","widget":{"name":"fragment_shader"},"link":null},{"localized_name":"size_mode","name":"size_mode","type":"COMFY_DYNAMICCOMBO_V3","widget":{"name":"size_mode"},"link":null}],"outputs":[{"localized_name":"IMAGE0","name":"IMAGE0","type":"IMAGE","links":[35]},{"localized_name":"IMAGE1","name":"IMAGE1","type":"IMAGE","links":null},{"localized_name":"IMAGE2","name":"IMAGE2","type":"IMAGE","links":null},{"localized_name":"IMAGE3","name":"IMAGE3","type":"IMAGE","links":null}],"properties":{"Node name for S&R":"GLSLShader"},"widgets_values":["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform vec2 u_resolution;\nuniform float u_float0; // strength [0.0 – 2.0] typical: 0.3–1.0\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\nvoid main() {\n vec2 texel = 1.0 / u_resolution;\n \n // Sample center and neighbors\n vec4 center = texture(u_image0, v_texCoord);\n vec4 top = texture(u_image0, v_texCoord + vec2( 0.0, -texel.y));\n vec4 bottom = texture(u_image0, v_texCoord + vec2( 0.0, texel.y));\n vec4 left = texture(u_image0, v_texCoord + vec2(-texel.x, 0.0));\n vec4 right = texture(u_image0, v_texCoord + vec2( texel.x, 0.0));\n \n // Edge enhancement (Laplacian)\n vec4 edges = center * 4.0 - top - bottom - left - right;\n \n // Add edges back scaled by strength\n vec4 sharpened = center + edges * u_float0;\n \n fragColor0 = vec4(clamp(sharpened.rgb, 0.0, 1.0), center.a);\n}","from_input"]}],"groups":[],"links":[{"id":36,"origin_id":24,"origin_slot":0,"target_id":23,"target_slot":2,"type":"FLOAT"},{"id":34,"origin_id":-10,"origin_slot":0,"target_id":23,"target_slot":0,"type":"IMAGE"},{"id":35,"origin_id":23,"origin_slot":0,"target_id":-20,"target_slot":0,"type":"IMAGE"}],"extra":{"workflowRendererVersion":"LG"}}]}}