Add the websocket library for automated tests

Add a missing file
It looks like this got caught by .gitignore? There's probably a better place to put it, but I'm not sure what that is.
2026-04-16 20:51:34 +00:00 · 2025-06-13 21:51:32 -07:00 · 2025-06-13 21:45:21 -07:00 · 2025-06-13 21:39:26 -07:00 · 2025-06-13 21:39:26 -07:00
96 changed files with 1193 additions and 158328 deletions
--- a/.ci/windows_base_files/README_VERY_IMPORTANT.txt
+++ b/.ci/windows_base_files/README_VERY_IMPORTANT.txt
@@ -4,9 +4,6 @@ if you have a NVIDIA gpu:

 run_nvidia_gpu.bat

-if you want to enable the fast fp16 accumulation (faster for fp16 models with slightly less quality):
-
-run_nvidia_gpu_fast_fp16_accumulation.bat


 To run it in slow CPU mode:
--- a/.github/workflows/check-line-endings.yml
+++ b/.github/workflows/check-line-endings.yml
@@ -1,40 +0,0 @@
-name: Check for Windows Line Endings
-
-on:
-  pull_request:
-    branches: ['*'] # Trigger on all pull requests to any branch
-
-jobs:
-  check-line-endings:
-    runs-on: ubuntu-latest
-
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0 # Fetch all history to compare changes
-
-      - name: Check for Windows line endings (CRLF)
-        run: |
-          # Get the list of changed files in the PR
-          CHANGED_FILES=$(git diff --name-only ${{ github.event.pull_request.base.sha }}..${{ github.event.pull_request.head.sha }})
-
-          # Flag to track if CRLF is found
-          CRLF_FOUND=false
-
-          # Loop through each changed file
-          for FILE in $CHANGED_FILES; do
-            # Check if the file exists and is a text file
-            if [ -f "$FILE" ] && file "$FILE" | grep -q "text"; then
-              # Check for CRLF line endings
-              if grep -UP '\r$' "$FILE"; then
-                echo "Error: Windows line endings (CRLF) detected in $FILE"
-                CRLF_FOUND=true
-              fi
-            fi
-          done
-
-          # Exit with error if CRLF was found
-          if [ "$CRLF_FOUND" = true ]; then
-            exit 1
-          fi
--- a/.github/workflows/release-webhook.yml
+++ b/.github/workflows/release-webhook.yml
@@ -1,108 +0,0 @@
-name: Release Webhook
-
-on:
-  release:
-    types: [published]
-
-jobs:
-  send-webhook:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Send release webhook
-        env:
-          WEBHOOK_URL: ${{ secrets.RELEASE_GITHUB_WEBHOOK_URL }}
-          WEBHOOK_SECRET: ${{ secrets.RELEASE_GITHUB_WEBHOOK_SECRET }}
-        run: |
-          # Generate UUID for delivery ID
-          DELIVERY_ID=$(uuidgen)
-          HOOK_ID="release-webhook-$(date +%s)"
-          
-          # Create webhook payload matching GitHub release webhook format
-          PAYLOAD=$(cat <<EOF
-          {
-            "action": "published",
-            "release": {
-              "id": ${{ github.event.release.id }},
-              "node_id": "${{ github.event.release.node_id }}",
-              "url": "${{ github.event.release.url }}",
-              "html_url": "${{ github.event.release.html_url }}",
-              "assets_url": "${{ github.event.release.assets_url }}",
-              "upload_url": "${{ github.event.release.upload_url }}",
-              "tag_name": "${{ github.event.release.tag_name }}",
-              "target_commitish": "${{ github.event.release.target_commitish }}",
-              "name": ${{ toJSON(github.event.release.name) }},
-              "body": ${{ toJSON(github.event.release.body) }},
-              "draft": ${{ github.event.release.draft }},
-              "prerelease": ${{ github.event.release.prerelease }},
-              "created_at": "${{ github.event.release.created_at }}",
-              "published_at": "${{ github.event.release.published_at }}",
-              "author": {
-                "login": "${{ github.event.release.author.login }}",
-                "id": ${{ github.event.release.author.id }},
-                "node_id": "${{ github.event.release.author.node_id }}",
-                "avatar_url": "${{ github.event.release.author.avatar_url }}",
-                "url": "${{ github.event.release.author.url }}",
-                "html_url": "${{ github.event.release.author.html_url }}",
-                "type": "${{ github.event.release.author.type }}",
-                "site_admin": ${{ github.event.release.author.site_admin }}
-              },
-              "tarball_url": "${{ github.event.release.tarball_url }}",
-              "zipball_url": "${{ github.event.release.zipball_url }}",
-              "assets": ${{ toJSON(github.event.release.assets) }}
-            },
-            "repository": {
-              "id": ${{ github.event.repository.id }},
-              "node_id": "${{ github.event.repository.node_id }}",
-              "name": "${{ github.event.repository.name }}",
-              "full_name": "${{ github.event.repository.full_name }}",
-              "private": ${{ github.event.repository.private }},
-              "owner": {
-                "login": "${{ github.event.repository.owner.login }}",
-                "id": ${{ github.event.repository.owner.id }},
-                "node_id": "${{ github.event.repository.owner.node_id }}",
-                "avatar_url": "${{ github.event.repository.owner.avatar_url }}",
-                "url": "${{ github.event.repository.owner.url }}",
-                "html_url": "${{ github.event.repository.owner.html_url }}",
-                "type": "${{ github.event.repository.owner.type }}",
-                "site_admin": ${{ github.event.repository.owner.site_admin }}
-              },
-              "html_url": "${{ github.event.repository.html_url }}",
-              "clone_url": "${{ github.event.repository.clone_url }}",
-              "git_url": "${{ github.event.repository.git_url }}",
-              "ssh_url": "${{ github.event.repository.ssh_url }}",
-              "url": "${{ github.event.repository.url }}",
-              "created_at": "${{ github.event.repository.created_at }}",
-              "updated_at": "${{ github.event.repository.updated_at }}",
-              "pushed_at": "${{ github.event.repository.pushed_at }}",
-              "default_branch": "${{ github.event.repository.default_branch }}",
-              "fork": ${{ github.event.repository.fork }}
-            },
-            "sender": {
-              "login": "${{ github.event.sender.login }}",
-              "id": ${{ github.event.sender.id }},
-              "node_id": "${{ github.event.sender.node_id }}",
-              "avatar_url": "${{ github.event.sender.avatar_url }}",
-              "url": "${{ github.event.sender.url }}",
-              "html_url": "${{ github.event.sender.html_url }}",
-              "type": "${{ github.event.sender.type }}",
-              "site_admin": ${{ github.event.sender.site_admin }}
-            }
-          }
-          EOF
-          )
-          
-          # Generate HMAC-SHA256 signature
-          SIGNATURE=$(echo -n "$PAYLOAD" | openssl dgst -sha256 -hmac "$WEBHOOK_SECRET" -hex | cut -d' ' -f2)
-          
-          # Send webhook with required headers
-          curl -X POST "$WEBHOOK_URL" \
-            -H "Content-Type: application/json" \
-            -H "X-GitHub-Event: release" \
-            -H "X-GitHub-Delivery: $DELIVERY_ID" \
-            -H "X-GitHub-Hook-ID: $HOOK_ID" \
-            -H "X-Hub-Signature-256: sha256=$SIGNATURE" \
-            -H "User-Agent: GitHub-Actions-Webhook/1.0" \
-            -d "$PAYLOAD" \
-            --fail --silent --show-error
-          
-          echo "✅ Release webhook sent successfully"
--- a/.github/workflows/stable-release.yml
+++ b/.github/workflows/stable-release.yml
@@ -102,4 +102,5 @@ jobs:
          file: ComfyUI_windows_portable_nvidia.7z
          tag: ${{ inputs.git_tag }}
          overwrite: true
-          draft: true
+          prerelease: true
+          make_latest: false
--- a/.github/workflows/test-unit.yml
+++ b/.github/workflows/test-unit.yml
@@ -28,3 +28,7 @@ jobs:
      run: |
        pip install -r tests-unit/requirements.txt
        python -m pytest tests-unit
+    - name: Run Execution Model Tests
+      run: |
+        python -m pytest tests/inference/test_execution.py
+
--- a/.github/workflows/windows_release_nightly_pytorch.yml
+++ b/.github/workflows/windows_release_nightly_pytorch.yml
@@ -7,7 +7,7 @@ on:
        description: 'cuda version'
        required: true
        type: string
-        default: "129"
+        default: "128"

      python_minor:
        description: 'python minor version'
@@ -19,7 +19,7 @@ on:
        description: 'python patch version'
        required: true
        type: string
-        default: "5"
+        default: "2"
 #  push:
 #    branches:
 #      - master
@@ -53,8 +53,6 @@ jobs:
            ls ../temp_wheel_dir
            ./python.exe -s -m pip install --pre ../temp_wheel_dir/*
            sed -i '1i../ComfyUI' ./python3${{ inputs.python_minor }}._pth
-
-            rm ./Lib/site-packages/torch/lib/dnnl.lib #I don't think this is actually used and I need the space
            cd ..

            git clone --depth 1 https://github.com/comfyanonymous/taesd
--- a/README.md
+++ b/README.md
@@ -55,7 +55,7 @@ See what ComfyUI can do with the [example workflows](https://comfyanonymous.gith
 ## Features
 - Nodes/graph/flowchart interface to experiment and create complex Stable Diffusion workflows without needing to code anything.
 - Image Models
-   - SD1.x, SD2.x ([unCLIP](https://comfyanonymous.github.io/ComfyUI_examples/unclip/))
+   - SD1.x, SD2.x,
   - [SDXL](https://comfyanonymous.github.io/ComfyUI_examples/sdxl/), [SDXL Turbo](https://comfyanonymous.github.io/ComfyUI_examples/sdturbo/)
   - [Stable Cascade](https://comfyanonymous.github.io/ComfyUI_examples/stable_cascade/)
   - [SD3 and SD3.5](https://comfyanonymous.github.io/ComfyUI_examples/sd3/)
@@ -65,19 +65,13 @@ See what ComfyUI can do with the [example workflows](https://comfyanonymous.gith
   - [Flux](https://comfyanonymous.github.io/ComfyUI_examples/flux/)
   - [Lumina Image 2.0](https://comfyanonymous.github.io/ComfyUI_examples/lumina2/)
   - [HiDream](https://comfyanonymous.github.io/ComfyUI_examples/hidream/)
-   - [Cosmos Predict2](https://comfyanonymous.github.io/ComfyUI_examples/cosmos_predict2/)
- Image Editing Models
-   - [Omnigen 2](https://comfyanonymous.github.io/ComfyUI_examples/omnigen/)
-   - [Flux Kontext](https://comfyanonymous.github.io/ComfyUI_examples/flux/#flux-kontext-image-editing-model)
-   - [HiDream E1.1](https://comfyanonymous.github.io/ComfyUI_examples/hidream/#hidream-e11)
 - Video Models
   - [Stable Video Diffusion](https://comfyanonymous.github.io/ComfyUI_examples/video/)
   - [Mochi](https://comfyanonymous.github.io/ComfyUI_examples/mochi/)
   - [LTX-Video](https://comfyanonymous.github.io/ComfyUI_examples/ltxv/)
   - [Hunyuan Video](https://comfyanonymous.github.io/ComfyUI_examples/hunyuan_video/)
-   - [Nvidia Cosmos](https://comfyanonymous.github.io/ComfyUI_examples/cosmos/) and [Cosmos Predict2](https://comfyanonymous.github.io/ComfyUI_examples/cosmos_predict2/)
+   - [Nvidia Cosmos](https://comfyanonymous.github.io/ComfyUI_examples/cosmos/)
   - [Wan 2.1](https://comfyanonymous.github.io/ComfyUI_examples/wan/)
-   - [Wan 2.2](https://comfyanonymous.github.io/ComfyUI_examples/wan22/)
 - Audio Models
   - [Stable Audio](https://comfyanonymous.github.io/ComfyUI_examples/audio/)
   - [ACE Step](https://comfyanonymous.github.io/ComfyUI_examples/audio/)
@@ -85,10 +79,9 @@ See what ComfyUI can do with the [example workflows](https://comfyanonymous.gith
   - [Hunyuan3D 2.0](https://docs.comfy.org/tutorials/3d/hunyuan3D-2)
 - Asynchronous Queue system
 - Many optimizations: Only re-executes the parts of the workflow that changes between executions.
- Smart memory management: can automatically run large models on GPUs with as low as 1GB vram with smart offloading.
+- Smart memory management: can automatically run models on GPUs with as low as 1GB vram.
 - Works even if you don't have a GPU with: ```--cpu``` (slow)
- Can load ckpt and safetensors: All in one checkpoints or standalone diffusion models, VAEs and CLIP models.
- Safe loading of ckpt, pt, pth, etc.. files.
+- Can load ckpt, safetensors and diffusers models/checkpoints. Standalone VAEs and CLIP models.
 - Embeddings/Textual inversion
 - [Loras (regular, locon and loha)](https://comfyanonymous.github.io/ComfyUI_examples/lora/)
 - [Hypernetworks](https://comfyanonymous.github.io/ComfyUI_examples/hypernetworks/)
@@ -99,10 +92,12 @@ See what ComfyUI can do with the [example workflows](https://comfyanonymous.gith
 - [Inpainting](https://comfyanonymous.github.io/ComfyUI_examples/inpaint/) with both regular and inpainting models.
 - [ControlNet and T2I-Adapter](https://comfyanonymous.github.io/ComfyUI_examples/controlnet/)
 - [Upscale Models (ESRGAN, ESRGAN variants, SwinIR, Swin2SR, etc...)](https://comfyanonymous.github.io/ComfyUI_examples/upscale_models/)
+- [unCLIP Models](https://comfyanonymous.github.io/ComfyUI_examples/unclip/)
 - [GLIGEN](https://comfyanonymous.github.io/ComfyUI_examples/gligen/)
 - [Model Merging](https://comfyanonymous.github.io/ComfyUI_examples/model_merging/)
 - [LCM models and Loras](https://comfyanonymous.github.io/ComfyUI_examples/lcm/)
 - Latent previews with [TAESD](#how-to-show-high-quality-previews)
+- Starts up very fast.
 - Works fully offline: core will never download anything unless you want to.
 - Optional API nodes to use paid models from external providers through the online [Comfy API](https://docs.comfy.org/tutorials/api-nodes/overview).
 - [Config file](extra_model_paths.yaml.example) to set the search paths for models.
@@ -179,6 +174,10 @@ If you have trouble extracting it, right click the file -> properties -> unblock

 See the [Config file](extra_model_paths.yaml.example) to set the search paths for models. In the standalone windows build you can find this file in the ComfyUI directory. Rename this file to extra_model_paths.yaml and edit it with your favorite text editor.

+## Jupyter Notebook
+
+To run it on services like paperspace, kaggle or colab you can use my [Jupyter Notebook](notebooks/comfyui_colab.ipynb)
+

 ## [comfy-cli](https://docs.comfy.org/comfy-cli/getting-started)

@@ -240,7 +239,7 @@ Nvidia users should install stable pytorch using this command:

 This is the command to install pytorch nightly instead which might have performance improvements.

-```pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu129```
+```pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128```

 #### Troubleshooting

@@ -273,8 +272,6 @@ You can install ComfyUI in Apple Mac silicon (M1 or M2) with any recent macOS ve

 #### DirectML (AMD Cards on Windows)

-This is very badly supported and is not recommended. There are some unofficial builds of pytorch ROCm on windows that exist that will give you a much better experience than this. This readme will be updated once official pytorch ROCm builds for windows come out.
-
 ```pip install torch-directml``` Then you can launch ComfyUI with: ```python main.py --directml```

 #### Ascend NPUs
@@ -294,13 +291,6 @@ For models compatible with Cambricon Extension for PyTorch (torch_mlu). Here's a
 2. Next, install the PyTorch(torch_mlu) following the instructions on the [Installation](https://www.cambricon.com/docs/sdk_1.15.0/cambricon_pytorch_1.17.0/user_guide_1.9/index.html)
 3. Launch ComfyUI by running `python main.py`

-#### Iluvatar Corex
-
-For models compatible with Iluvatar Extension for PyTorch. Here's a step-by-step guide tailored to your platform and installation method:
-
-1. Install the Iluvatar Corex Toolkit by adhering to the platform-specific instructions on the [Installation](https://support.iluvatar.com/#/DocumentCentre?id=1&nameCenter=2&productId=520117912052801536)
-2. Launch ComfyUI by running `python main.py`
-
 # Running

 ```python main.py```
--- a/app/frontend_management.py
+++ b/app/frontend_management.py
@@ -29,48 +29,18 @@ def frontend_install_warning_message():
 This error is happening because the ComfyUI frontend is no longer shipped as part of the main repo but as a pip package instead.
 """.strip()

-def parse_version(version: str) -> tuple[int, int, int]:
-        return tuple(map(int, version.split(".")))
-
-def is_valid_version(version: str) -> bool:
-    """Validate if a string is a valid semantic version (X.Y.Z format)."""
-    pattern = r"^(\d+)\.(\d+)\.(\d+)$"
-    return bool(re.match(pattern, version))
-
-def get_installed_frontend_version():
-    """Get the currently installed frontend package version."""
-    frontend_version_str = version("comfyui-frontend-package")
-    return frontend_version_str
-
-def get_required_frontend_version():
-    """Get the required frontend version from requirements.txt."""
-    try:
-        with open(requirements_path, "r", encoding="utf-8") as f:
-            for line in f:
-                line = line.strip()
-                if line.startswith("comfyui-frontend-package=="):
-                    version_str = line.split("==")[-1]
-                    if not is_valid_version(version_str):
-                        logging.error(f"Invalid version format in requirements.txt: {version_str}")
-                        return None
-                    return version_str
-            logging.error("comfyui-frontend-package not found in requirements.txt")
-            return None
-    except FileNotFoundError:
-        logging.error("requirements.txt not found. Cannot determine required frontend version.")
-        return None
-    except Exception as e:
-        logging.error(f"Error reading requirements.txt: {e}")
-        return None

 def check_frontend_version():
    """Check if the frontend version is up to date."""

+    def parse_version(version: str) -> tuple[int, int, int]:
+        return tuple(map(int, version.split(".")))
+
    try:
-        frontend_version_str = get_installed_frontend_version()
+        frontend_version_str = version("comfyui-frontend-package")
        frontend_version = parse_version(frontend_version_str)
-        required_frontend_str = get_required_frontend_version()
-        required_frontend = parse_version(required_frontend_str)
+        with open(requirements_path, "r", encoding="utf-8") as f:
+            required_frontend = parse_version(f.readline().split("=")[-1])
        if frontend_version < required_frontend:
            app.logger.log_startup_warning(
                f"""
@@ -198,11 +168,6 @@ def download_release_asset_zip(release: Release, destination_path: str) -> None:
 class FrontendManager:
    CUSTOM_FRONTENDS_ROOT = str(Path(__file__).parents[1] / "web_custom_versions")

-    @classmethod
-    def get_required_frontend_version(cls) -> str:
-        """Get the required frontend package version."""
-        return get_required_frontend_version()
-
    @classmethod
    def default_frontend_path(cls) -> str:
        try:
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@@ -49,8 +49,7 @@ parser.add_argument("--temp-directory", type=str, default=None, help="Set the Co
 parser.add_argument("--input-directory", type=str, default=None, help="Set the ComfyUI input directory. Overrides --base-directory.")
 parser.add_argument("--auto-launch", action="store_true", help="Automatically launch ComfyUI in the default browser.")
 parser.add_argument("--disable-auto-launch", action="store_true", help="Disable auto launching the browser.")
-parser.add_argument("--cuda-device", type=int, default=None, metavar="DEVICE_ID", help="Set the id of the cuda device this instance will use. All other devices will not be visible.")
-parser.add_argument("--default-device", type=int, default=None, metavar="DEFAULT_DEVICE_ID", help="Set the id of the default device, all other devices will stay visible.")
+parser.add_argument("--cuda-device", type=int, default=None, metavar="DEVICE_ID", help="Set the id of the cuda device this instance will use.")
 cm_group = parser.add_mutually_exclusive_group()
 cm_group.add_argument("--cuda-malloc", action="store_true", help="Enable cudaMallocAsync (enabled by default for torch 2.0 and up).")
 cm_group.add_argument("--disable-cuda-malloc", action="store_true", help="Disable cudaMallocAsync.")
@@ -145,7 +144,6 @@ class PerformanceFeature(enum.Enum):
 parser.add_argument("--fast", nargs="*", type=PerformanceFeature, help="Enable some untested and potentially quality deteriorating optimizations. --fast with no arguments enables everything. You can pass a list specific optimizations if you only want to enable specific ones. Current valid optimizations: fp16_accumulation fp8_matrix_mult cublas_ops")

 parser.add_argument("--mmap-torch-files", action="store_true", help="Use mmap when loading ckpt/pt files.")
-parser.add_argument("--disable-mmap", action="store_true", help="Don't use mmap when loading safetensors.")

 parser.add_argument("--dont-print-server", action="store_true", help="Don't print server output.")
 parser.add_argument("--quick-test-for-ci", action="store_true", help="Quick test for CI.")
@@ -153,7 +151,6 @@ parser.add_argument("--windows-standalone-build", action="store_true", help="Win

 parser.add_argument("--disable-metadata", action="store_true", help="Disable saving prompt metadata in files.")
 parser.add_argument("--disable-all-custom-nodes", action="store_true", help="Disable loading all custom nodes.")
-parser.add_argument("--whitelist-custom-nodes", type=str, nargs='+', default=[], help="Specify custom node folders to load even when --disable-all-custom-nodes is enabled.")
 parser.add_argument("--disable-api-nodes", action="store_true", help="Disable loading all api nodes.")

 parser.add_argument("--multi-user", action="store_true", help="Enables per-user storage.")
--- a/comfy/gligen.py
+++ b/comfy/gligen.py
@@ -1,10 +1,55 @@
 import math
 import torch
 from torch import nn
-from .ldm.modules.attention import CrossAttention, FeedForward
+from .ldm.modules.attention import CrossAttention
+from inspect import isfunction
 import comfy.ops
 ops = comfy.ops.manual_cast

+def exists(val):
+    return val is not None
+
+
+def uniq(arr):
+    return{el: True for el in arr}.keys()
+
+
+def default(val, d):
+    if exists(val):
+        return val
+    return d() if isfunction(d) else d
+
+
+# feedforward
+class GEGLU(nn.Module):
+    def __init__(self, dim_in, dim_out):
+        super().__init__()
+        self.proj = ops.Linear(dim_in, dim_out * 2)
+
+    def forward(self, x):
+        x, gate = self.proj(x).chunk(2, dim=-1)
+        return x * torch.nn.functional.gelu(gate)
+
+
+class FeedForward(nn.Module):
+    def __init__(self, dim, dim_out=None, mult=4, glu=False, dropout=0.):
+        super().__init__()
+        inner_dim = int(dim * mult)
+        dim_out = default(dim_out, dim)
+        project_in = nn.Sequential(
+            ops.Linear(dim, inner_dim),
+            nn.GELU()
+        ) if not glu else GEGLU(dim, inner_dim)
+
+        self.net = nn.Sequential(
+            project_in,
+            nn.Dropout(dropout),
+            ops.Linear(inner_dim, dim_out)
+        )
+
+    def forward(self, x):
+        return self.net(x)
+

 class GatedCrossAttentionDense(nn.Module):
    def __init__(self, query_dim, context_dim, n_heads, d_head):
--- a/comfy/k_diffusion/sa_solver.py
+++ b/comfy/k_diffusion/sa_solver.py
@@ -1,121 +0,0 @@
-# SA-Solver: Stochastic Adams Solver (NeurIPS 2023, arXiv:2309.05019)
-# Conference: https://proceedings.neurips.cc/paper_files/paper/2023/file/f4a6806490d31216a3ba667eb240c897-Paper-Conference.pdf
-# Codebase ref: https://github.com/scxue/SA-Solver
-
-import math
-from typing import Union, Callable
-import torch
-
-
-def compute_exponential_coeffs(s: torch.Tensor, t: torch.Tensor, solver_order: int, tau_t: float) -> torch.Tensor:
-    """Compute (1 + tau^2) * integral of exp((1 + tau^2) * x) * x^p dx from s to t with exp((1 + tau^2) * t) factored out, using integration by parts.
-
-    Integral of exp((1 + tau^2) * x) * x^p dx
-        = product_terms[p] - (p / (1 + tau^2)) * integral of exp((1 + tau^2) * x) * x^(p-1) dx,
-    with base case p=0 where integral equals product_terms[0].
-
-    where
-        product_terms[p] = x^p * exp((1 + tau^2) * x) / (1 + tau^2).
-
-    Construct a recursive coefficient matrix following the above recursive relation to compute all integral terms up to p = (solver_order - 1).
-    Return coefficients used by the SA-Solver in data prediction mode.
-
-    Args:
-        s: Start time s.
-        t: End time t.
-        solver_order: Current order of the solver.
-        tau_t: Stochastic strength parameter in the SDE.
-
-    Returns:
-        Exponential coefficients used in data prediction, with exp((1 + tau^2) * t) factored out, ordered from p=0 to p=solver_order−1, shape (solver_order,).
-    """
-    tau_mul = 1 + tau_t ** 2
-    h = t - s
-    p = torch.arange(solver_order, dtype=s.dtype, device=s.device)
-
-    # product_terms after factoring out exp((1 + tau^2) * t)
-    # Includes (1 + tau^2) factor from outside the integral
-    product_terms_factored = (t ** p - s ** p * (-tau_mul * h).exp())
-
-    # Lower triangular recursive coefficient matrix
-    # Accumulates recursive coefficients based on p / (1 + tau^2)
-    recursive_depth_mat = p.unsqueeze(1) - p.unsqueeze(0)
-    log_factorial = (p + 1).lgamma()
-    recursive_coeff_mat = log_factorial.unsqueeze(1) - log_factorial.unsqueeze(0)
-    if tau_t > 0:
-        recursive_coeff_mat = recursive_coeff_mat - (recursive_depth_mat * math.log(tau_mul))
-    signs = torch.where(recursive_depth_mat % 2 == 0, 1.0, -1.0)
-    recursive_coeff_mat = (recursive_coeff_mat.exp() * signs).tril()
-
-    return recursive_coeff_mat @ product_terms_factored
-
-
-def compute_simple_stochastic_adams_b_coeffs(sigma_next: torch.Tensor, curr_lambdas: torch.Tensor, lambda_s: torch.Tensor, lambda_t: torch.Tensor, tau_t: float, is_corrector_step: bool = False) -> torch.Tensor:
-    """Compute simple order-2 b coefficients from SA-Solver paper (Appendix D. Implementation Details)."""
-    tau_mul = 1 + tau_t ** 2
-    h = lambda_t - lambda_s
-    alpha_t = sigma_next * lambda_t.exp()
-    if is_corrector_step:
-        # Simplified 1-step (order-2) corrector
-        b_1 = alpha_t * (0.5 * tau_mul * h)
-        b_2 = alpha_t * (-h * tau_mul).expm1().neg() - b_1
-    else:
-        # Simplified 2-step predictor
-        b_2 = alpha_t * (0.5 * tau_mul * h ** 2) / (curr_lambdas[-2] - lambda_s)
-        b_1 = alpha_t * (-h * tau_mul).expm1().neg() - b_2
-    return torch.stack([b_2, b_1])
-
-
-def compute_stochastic_adams_b_coeffs(sigma_next: torch.Tensor, curr_lambdas: torch.Tensor, lambda_s: torch.Tensor, lambda_t: torch.Tensor, tau_t: float, simple_order_2: bool = False, is_corrector_step: bool = False) -> torch.Tensor:
-    """Compute b_i coefficients for the SA-Solver (see eqs. 15 and 18).
-
-    The solver order corresponds to the number of input lambdas (half-logSNR points).
-
-    Args:
-        sigma_next: Sigma at end time t.
-        curr_lambdas: Lambda time points used to construct the Lagrange basis, shape (N,).
-        lambda_s: Lambda at start time s.
-        lambda_t: Lambda at end time t.
-        tau_t: Stochastic strength parameter in the SDE.
-        simple_order_2: Whether to enable the simple order-2 scheme.
-        is_corrector_step: Flag for corrector step in simple order-2 mode.
-
-    Returns:
-        b_i coefficients for the SA-Solver, shape (N,), where N is the solver order.
-    """
-    num_timesteps = curr_lambdas.shape[0]
-
-    if simple_order_2 and num_timesteps == 2:
-        return compute_simple_stochastic_adams_b_coeffs(sigma_next, curr_lambdas, lambda_s, lambda_t, tau_t, is_corrector_step)
-
-    # Compute coefficients by solving a linear system from Lagrange basis interpolation
-    exp_integral_coeffs = compute_exponential_coeffs(lambda_s, lambda_t, num_timesteps, tau_t)
-    vandermonde_matrix_T = torch.vander(curr_lambdas, num_timesteps, increasing=True).T
-    lagrange_integrals = torch.linalg.solve(vandermonde_matrix_T, exp_integral_coeffs)
-
-    # (sigma_t * exp(-tau^2 * lambda_t)) * exp((1 + tau^2) * lambda_t)
-    # = sigma_t * exp(lambda_t) = alpha_t
-    # exp((1 + tau^2) * lambda_t) is extracted from the integral
-    alpha_t = sigma_next * lambda_t.exp()
-    return alpha_t * lagrange_integrals
-
-
-def get_tau_interval_func(start_sigma: float, end_sigma: float, eta: float = 1.0) -> Callable[[Union[torch.Tensor, float]], float]:
-    """Return a function that controls the stochasticity of SA-Solver.
-
-    When eta = 0, SA-Solver runs as ODE. The official approach uses
-    time t to determine the SDE interval, while here we use sigma instead.
-
-    See:
-        https://github.com/scxue/SA-Solver/blob/main/README.md
-    """
-
-    def tau_func(sigma: Union[torch.Tensor, float]) -> float:
-        if eta <= 0:
-            return 0.0  # ODE
-
-        if isinstance(sigma, torch.Tensor):
-            sigma = sigma.item()
-        return eta if start_sigma >= sigma >= end_sigma else 0.0
-
-    return tau_func
--- a/comfy/k_diffusion/sampling.py
+++ b/comfy/k_diffusion/sampling.py
@@ -1,5 +1,4 @@
 import math
-from functools import partial

 from scipy import integrate
 import torch
@@ -9,7 +8,6 @@ from tqdm.auto import trange, tqdm

 from . import utils
 from . import deis
-from . import sa_solver
 import comfy.model_patcher
 import comfy.model_sampling

@@ -144,33 +142,6 @@ class BrownianTreeNoiseSampler:
        return self.tree(t0, t1) / (t1 - t0).abs().sqrt()


-def sigma_to_half_log_snr(sigma, model_sampling):
-    """Convert sigma to half-logSNR log(alpha_t / sigma_t)."""
-    if isinstance(model_sampling, comfy.model_sampling.CONST):
-        # log((1 - t) / t) = log((1 - sigma) / sigma)
-        return sigma.logit().neg()
-    return sigma.log().neg()
-
-
-def half_log_snr_to_sigma(half_log_snr, model_sampling):
-    """Convert half-logSNR log(alpha_t / sigma_t) to sigma."""
-    if isinstance(model_sampling, comfy.model_sampling.CONST):
-        # 1 / (1 + exp(half_log_snr))
-        return half_log_snr.neg().sigmoid()
-    return half_log_snr.neg().exp()
-
-
-def offset_first_sigma_for_snr(sigmas, model_sampling, percent_offset=1e-4):
-    """Adjust the first sigma to avoid invalid logSNR."""
-    if len(sigmas) <= 1:
-        return sigmas
-    if isinstance(model_sampling, comfy.model_sampling.CONST):
-        if sigmas[0] >= 1:
-            sigmas = sigmas.clone()
-            sigmas[0] = model_sampling.percent_to_sigma(percent_offset)
-    return sigmas
-
-
@torch.no_grad()
 def sample_euler(model, x, sigmas, extra_args=None, callback=None, disable=None, s_churn=0., s_tmin=0., s_tmax=float('inf'), s_noise=1.):
    """Implements Algorithm 2 (Euler steps) from Karras et al. (2022)."""
@@ -413,13 +384,9 @@ def sample_lms(model, x, sigmas, extra_args=None, callback=None, disable=None, o
            ds.pop(0)
        if callback is not None:
            callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised})
-        if sigmas[i + 1] == 0:
-            # Denoising step
-            x = denoised
-        else:
-            cur_order = min(i + 1, order)
-            coeffs = [linear_multistep_coeff(cur_order, sigmas_cpu, i, j) for j in range(cur_order)]
-            x = x + sum(coeff * d for coeff, d in zip(coeffs, reversed(ds)))
+        cur_order = min(i + 1, order)
+        coeffs = [linear_multistep_coeff(cur_order, sigmas_cpu, i, j) for j in range(cur_order)]
+        x = x + sum(coeff * d for coeff, d in zip(coeffs, reversed(ds)))
    return x


@@ -715,7 +682,6 @@ def sample_dpmpp_2s_ancestral_RF(model, x, sigmas, extra_args=None, callback=Non
        # logged_x = torch.cat((logged_x, x.unsqueeze(0)), dim=0)
    return x

-
@torch.no_grad()
 def sample_dpmpp_sde(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, r=1 / 2):
    """DPM-Solver++ (stochastic)."""
@@ -727,49 +693,38 @@ def sample_dpmpp_sde(model, x, sigmas, extra_args=None, callback=None, disable=N
    seed = extra_args.get("seed", None)
    noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=seed, cpu=True) if noise_sampler is None else noise_sampler
    s_in = x.new_ones([x.shape[0]])
-
-    model_sampling = model.inner_model.model_patcher.get_model_object('model_sampling')
-    sigma_fn = partial(half_log_snr_to_sigma, model_sampling=model_sampling)
-    lambda_fn = partial(sigma_to_half_log_snr, model_sampling=model_sampling)
-    sigmas = offset_first_sigma_for_snr(sigmas, model_sampling)
+    sigma_fn = lambda t: t.neg().exp()
+    t_fn = lambda sigma: sigma.log().neg()

    for i in trange(len(sigmas) - 1, disable=disable):
        denoised = model(x, sigmas[i] * s_in, **extra_args)
        if callback is not None:
            callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised})
        if sigmas[i + 1] == 0:
-            # Denoising step
-            x = denoised
+            # Euler method
+            d = to_d(x, sigmas[i], denoised)
+            dt = sigmas[i + 1] - sigmas[i]
+            x = x + d * dt
        else:
            # DPM-Solver++
-            lambda_s, lambda_t = lambda_fn(sigmas[i]), lambda_fn(sigmas[i + 1])
-            h = lambda_t - lambda_s
-            lambda_s_1 = lambda_s + r * h
+            t, t_next = t_fn(sigmas[i]), t_fn(sigmas[i + 1])
+            h = t_next - t
+            s = t + h * r
            fac = 1 / (2 * r)

-            sigma_s_1 = sigma_fn(lambda_s_1)
-
-            alpha_s = sigmas[i] * lambda_s.exp()
-            alpha_s_1 = sigma_s_1 * lambda_s_1.exp()
-            alpha_t = sigmas[i + 1] * lambda_t.exp()
-
            # Step 1
-            sd, su = get_ancestral_step(lambda_s.neg().exp(), lambda_s_1.neg().exp(), eta)
-            lambda_s_1_ = sd.log().neg()
-            h_ = lambda_s_1_ - lambda_s
-            x_2 = (alpha_s_1 / alpha_s) * (-h_).exp() * x - alpha_s_1 * (-h_).expm1() * denoised
-            if eta > 0 and s_noise > 0:
-                x_2 = x_2 + alpha_s_1 * noise_sampler(sigmas[i], sigma_s_1) * s_noise * su
-            denoised_2 = model(x_2, sigma_s_1 * s_in, **extra_args)
+            sd, su = get_ancestral_step(sigma_fn(t), sigma_fn(s), eta)
+            s_ = t_fn(sd)
+            x_2 = (sigma_fn(s_) / sigma_fn(t)) * x - (t - s_).expm1() * denoised
+            x_2 = x_2 + noise_sampler(sigma_fn(t), sigma_fn(s)) * s_noise * su
+            denoised_2 = model(x_2, sigma_fn(s) * s_in, **extra_args)

            # Step 2
-            sd, su = get_ancestral_step(lambda_s.neg().exp(), lambda_t.neg().exp(), eta)
-            lambda_t_ = sd.log().neg()
-            h_ = lambda_t_ - lambda_s
+            sd, su = get_ancestral_step(sigma_fn(t), sigma_fn(t_next), eta)
+            t_next_ = t_fn(sd)
            denoised_d = (1 - fac) * denoised + fac * denoised_2
-            x = (alpha_t / alpha_s) * (-h_).exp() * x - alpha_t * (-h_).expm1() * denoised_d
-            if eta > 0 and s_noise > 0:
-                x = x + alpha_t * noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * su
+            x = (sigma_fn(t_next_) / sigma_fn(t)) * x - (t - t_next_).expm1() * denoised_d
+            x = x + noise_sampler(sigma_fn(t), sigma_fn(t_next)) * s_noise * su
    return x


@@ -798,7 +753,6 @@ def sample_dpmpp_2m(model, x, sigmas, extra_args=None, callback=None, disable=No
        old_denoised = denoised
    return x

-
@torch.no_grad()
 def sample_dpmpp_2m_sde(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, solver_type='midpoint'):
    """DPM-Solver++(2M) SDE."""
@@ -814,12 +768,9 @@ def sample_dpmpp_2m_sde(model, x, sigmas, extra_args=None, callback=None, disabl
    noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=seed, cpu=True) if noise_sampler is None else noise_sampler
    s_in = x.new_ones([x.shape[0]])

-    model_sampling = model.inner_model.model_patcher.get_model_object('model_sampling')
-    lambda_fn = partial(sigma_to_half_log_snr, model_sampling=model_sampling)
-    sigmas = offset_first_sigma_for_snr(sigmas, model_sampling)
-
    old_denoised = None
-    h, h_last = None, None
+    h_last = None
+    h = None

    for i in trange(len(sigmas) - 1, disable=disable):
        denoised = model(x, sigmas[i] * s_in, **extra_args)
@@ -830,29 +781,26 @@ def sample_dpmpp_2m_sde(model, x, sigmas, extra_args=None, callback=None, disabl
            x = denoised
        else:
            # DPM-Solver++(2M) SDE
-            lambda_s, lambda_t = lambda_fn(sigmas[i]), lambda_fn(sigmas[i + 1])
-            h = lambda_t - lambda_s
-            h_eta = h * (eta + 1)
+            t, s = -sigmas[i].log(), -sigmas[i + 1].log()
+            h = s - t
+            eta_h = eta * h

-            alpha_t = sigmas[i + 1] * lambda_t.exp()
-
-            x = sigmas[i + 1] / sigmas[i] * (-h * eta).exp() * x + alpha_t * (-h_eta).expm1().neg() * denoised
+            x = sigmas[i + 1] / sigmas[i] * (-eta_h).exp() * x + (-h - eta_h).expm1().neg() * denoised

            if old_denoised is not None:
                r = h_last / h
                if solver_type == 'heun':
-                    x = x + alpha_t * ((-h_eta).expm1().neg() / (-h_eta) + 1) * (1 / r) * (denoised - old_denoised)
+                    x = x + ((-h - eta_h).expm1().neg() / (-h - eta_h) + 1) * (1 / r) * (denoised - old_denoised)
                elif solver_type == 'midpoint':
-                    x = x + 0.5 * alpha_t * (-h_eta).expm1().neg() * (1 / r) * (denoised - old_denoised)
+                    x = x + 0.5 * (-h - eta_h).expm1().neg() * (1 / r) * (denoised - old_denoised)

-            if eta > 0 and s_noise > 0:
-                x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * sigmas[i + 1] * (-2 * h * eta).expm1().neg().sqrt() * s_noise
+            if eta:
+                x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * sigmas[i + 1] * (-2 * eta_h).expm1().neg().sqrt() * s_noise

        old_denoised = denoised
        h_last = h
    return x

-
@torch.no_grad()
 def sample_dpmpp_3m_sde(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None):
    """DPM-Solver++(3M) SDE."""
@@ -866,10 +814,6 @@ def sample_dpmpp_3m_sde(model, x, sigmas, extra_args=None, callback=None, disabl
    noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=seed, cpu=True) if noise_sampler is None else noise_sampler
    s_in = x.new_ones([x.shape[0]])

-    model_sampling = model.inner_model.model_patcher.get_model_object('model_sampling')
-    lambda_fn = partial(sigma_to_half_log_snr, model_sampling=model_sampling)
-    sigmas = offset_first_sigma_for_snr(sigmas, model_sampling)
-
    denoised_1, denoised_2 = None, None
    h, h_1, h_2 = None, None, None

@@ -881,16 +825,13 @@ def sample_dpmpp_3m_sde(model, x, sigmas, extra_args=None, callback=None, disabl
            # Denoising step
            x = denoised
        else:
-            lambda_s, lambda_t = lambda_fn(sigmas[i]), lambda_fn(sigmas[i + 1])
-            h = lambda_t - lambda_s
+            t, s = -sigmas[i].log(), -sigmas[i + 1].log()
+            h = s - t
            h_eta = h * (eta + 1)

-            alpha_t = sigmas[i + 1] * lambda_t.exp()
-
-            x = sigmas[i + 1] / sigmas[i] * (-h * eta).exp() * x + alpha_t * (-h_eta).expm1().neg() * denoised
+            x = torch.exp(-h_eta) * x + (-h_eta).expm1().neg() * denoised

            if h_2 is not None:
-                # DPM-Solver++(3M) SDE
                r0 = h_1 / h
                r1 = h_2 / h
                d1_0 = (denoised - denoised_1) / r0
@@ -899,22 +840,20 @@ def sample_dpmpp_3m_sde(model, x, sigmas, extra_args=None, callback=None, disabl
                d2 = (d1_0 - d1_1) / (r0 + r1)
                phi_2 = h_eta.neg().expm1() / h_eta + 1
                phi_3 = phi_2 / h_eta - 0.5
-                x = x + (alpha_t * phi_2) * d1 - (alpha_t * phi_3) * d2
+                x = x + phi_2 * d1 - phi_3 * d2
            elif h_1 is not None:
-                # DPM-Solver++(2M) SDE
                r = h_1 / h
                d = (denoised - denoised_1) / r
                phi_2 = h_eta.neg().expm1() / h_eta + 1
-                x = x + (alpha_t * phi_2) * d
+                x = x + phi_2 * d

-            if eta > 0 and s_noise > 0:
+            if eta:
                x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * sigmas[i + 1] * (-2 * h * eta).expm1().neg().sqrt() * s_noise

        denoised_1, denoised_2 = denoised, denoised_1
        h_1, h_2 = h, h_1
    return x

-
@torch.no_grad()
 def sample_dpmpp_3m_sde_gpu(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None):
    if len(sigmas) <= 1:
@@ -924,7 +863,6 @@ def sample_dpmpp_3m_sde_gpu(model, x, sigmas, extra_args=None, callback=None, di
    noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=extra_args.get("seed", None), cpu=False) if noise_sampler is None else noise_sampler
    return sample_dpmpp_3m_sde(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler)

-
@torch.no_grad()
 def sample_dpmpp_2m_sde_gpu(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, solver_type='midpoint'):
    if len(sigmas) <= 1:
@@ -934,7 +872,6 @@ def sample_dpmpp_2m_sde_gpu(model, x, sigmas, extra_args=None, callback=None, di
    noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=extra_args.get("seed", None), cpu=False) if noise_sampler is None else noise_sampler
    return sample_dpmpp_2m_sde(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler, solver_type=solver_type)

-
@torch.no_grad()
 def sample_dpmpp_sde_gpu(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, r=1 / 2):
    if len(sigmas) <= 1:
@@ -1072,9 +1009,7 @@ def sample_ipndm(model, x, sigmas, extra_args=None, callback=None, disable=None,
        d_cur = (x_cur - denoised) / t_cur

        order = min(max_order, i+1)
-        if t_next == 0:     # Denoising step
-            x_next = denoised
-        elif order == 1:    # First Euler step.
+        if order == 1:      # First Euler step.
            x_next = x_cur + (t_next - t_cur) * d_cur
        elif order == 2:    # Use one history point.
            x_next = x_cur + (t_next - t_cur) * (3 * d_cur - buffer_model[-1]) / 2
@@ -1092,7 +1027,6 @@ def sample_ipndm(model, x, sigmas, extra_args=None, callback=None, disable=None,

    return x_next

-
 #From https://github.com/zju-pi/diff-sampler/blob/main/diff-solvers-main/solvers.py
 #under Apache 2 license
 def sample_ipndm_v(model, x, sigmas, extra_args=None, callback=None, disable=None, max_order=4):
@@ -1116,9 +1050,7 @@ def sample_ipndm_v(model, x, sigmas, extra_args=None, callback=None, disable=Non
        d_cur = (x_cur - denoised) / t_cur

        order = min(max_order, i+1)
-        if t_next == 0:     # Denoising step
-            x_next = denoised
-        elif order == 1:    # First Euler step.
+        if order == 1:      # First Euler step.
            x_next = x_cur + (t_next - t_cur) * d_cur
        elif order == 2:    # Use one history point.
            h_n = (t_next - t_cur)
@@ -1158,7 +1090,6 @@ def sample_ipndm_v(model, x, sigmas, extra_args=None, callback=None, disable=Non

    return x_next

-
 #From https://github.com/zju-pi/diff-sampler/blob/main/diff-solvers-main/solvers.py
 #under Apache 2 license
@torch.no_grad()
@@ -1209,22 +1140,39 @@ def sample_deis(model, x, sigmas, extra_args=None, callback=None, disable=None,

    return x_next

+@torch.no_grad()
+def sample_euler_cfg_pp(model, x, sigmas, extra_args=None, callback=None, disable=None):
+    extra_args = {} if extra_args is None else extra_args
+
+    temp = [0]
+    def post_cfg_function(args):
+        temp[0] = args["uncond_denoised"]
+        return args["denoised"]
+
+    model_options = extra_args.get("model_options", {}).copy()
+    extra_args["model_options"] = comfy.model_patcher.set_model_options_post_cfg_function(model_options, post_cfg_function, disable_cfg1_optimization=True)
+
+    s_in = x.new_ones([x.shape[0]])
+    for i in trange(len(sigmas) - 1, disable=disable):
+        sigma_hat = sigmas[i]
+        denoised = model(x, sigma_hat * s_in, **extra_args)
+        d = to_d(x, sigma_hat, temp[0])
+        if callback is not None:
+            callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigma_hat, 'denoised': denoised})
+        # Euler method
+        x = denoised + d * sigmas[i + 1]
+    return x

@torch.no_grad()
 def sample_euler_ancestral_cfg_pp(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None):
-    """Ancestral sampling with Euler method steps (CFG++)."""
+    """Ancestral sampling with Euler method steps."""
    extra_args = {} if extra_args is None else extra_args
    seed = extra_args.get("seed", None)
    noise_sampler = default_noise_sampler(x, seed=seed) if noise_sampler is None else noise_sampler

-    model_sampling = model.inner_model.model_patcher.get_model_object("model_sampling")
-    lambda_fn = partial(sigma_to_half_log_snr, model_sampling=model_sampling)
-
-    uncond_denoised = None
-
+    temp = [0]
    def post_cfg_function(args):
-        nonlocal uncond_denoised
-        uncond_denoised = args["uncond_denoised"]
+        temp[0] = args["uncond_denoised"]
        return args["denoised"]

    model_options = extra_args.get("model_options", {}).copy()
@@ -1233,33 +1181,15 @@ def sample_euler_ancestral_cfg_pp(model, x, sigmas, extra_args=None, callback=No
    s_in = x.new_ones([x.shape[0]])
    for i in trange(len(sigmas) - 1, disable=disable):
        denoised = model(x, sigmas[i] * s_in, **extra_args)
+        sigma_down, sigma_up = get_ancestral_step(sigmas[i], sigmas[i + 1], eta=eta)
        if callback is not None:
            callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised})
-        if sigmas[i + 1] == 0:
-            # Denoising step
-            x = denoised
-        else:
-            alpha_s = sigmas[i] * lambda_fn(sigmas[i]).exp()
-            alpha_t = sigmas[i + 1] * lambda_fn(sigmas[i + 1]).exp()
-            d = to_d(x, sigmas[i], alpha_s * uncond_denoised)   # to noise
-
-            # DDIM stochastic sampling
-            sigma_down, sigma_up = get_ancestral_step(sigmas[i] / alpha_s, sigmas[i + 1] / alpha_t, eta=eta)
-            sigma_down = alpha_t * sigma_down
-
-            # Euler method
-            x = alpha_t * denoised + sigma_down * d
-            if eta > 0 and s_noise > 0:
-                x = x + alpha_t * noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * sigma_up
+        d = to_d(x, sigmas[i], temp[0])
+        # Euler method
+        x = denoised + d * sigma_down
+        if sigmas[i + 1] > 0:
+            x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * sigma_up
    return x
-
-
-@torch.no_grad()
-def sample_euler_cfg_pp(model, x, sigmas, extra_args=None, callback=None, disable=None):
-    """Euler method steps (CFG++)."""
-    return sample_euler_ancestral_cfg_pp(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=0.0, s_noise=0.0, noise_sampler=None)
-
-
@torch.no_grad()
 def sample_dpmpp_2s_ancestral_cfg_pp(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None):
    """Ancestral sampling with DPM-Solver++(2S) second-order steps."""
@@ -1416,7 +1346,6 @@ def sample_res_multistep_ancestral(model, x, sigmas, extra_args=None, callback=N
 def sample_res_multistep_ancestral_cfg_pp(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None):
    return res_multistep(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, s_noise=s_noise, noise_sampler=noise_sampler, eta=eta, cfg_pp=True)

-
@torch.no_grad()
 def sample_gradient_estimation(model, x, sigmas, extra_args=None, callback=None, disable=None, ge_gamma=2., cfg_pp=False):
    """Gradient-estimation sampler. Paper: https://openreview.net/pdf?id=o2ND9v0CeK"""
@@ -1443,32 +1372,31 @@ def sample_gradient_estimation(model, x, sigmas, extra_args=None, callback=None,
        if callback is not None:
            callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised})
        dt = sigmas[i + 1] - sigmas[i]
-        if sigmas[i + 1] == 0:
-            # Denoising step
-            x = denoised
-        else:
+        if i == 0:
            # Euler method
            if cfg_pp:
                x = denoised + d * sigmas[i + 1]
            else:
                x = x + d * dt
-
-            if i >= 1:
-                # Gradient estimation
+        else:
+            # Gradient estimation
+            if cfg_pp:
                d_bar = (ge_gamma - 1) * (d - old_d)
+                x = denoised + d * sigmas[i + 1] + d_bar * dt
+            else:
+                d_bar = ge_gamma * d + (1 - ge_gamma) * old_d
                x = x + d_bar * dt
        old_d = d
    return x

-
@torch.no_grad()
 def sample_gradient_estimation_cfg_pp(model, x, sigmas, extra_args=None, callback=None, disable=None, ge_gamma=2.):
    return sample_gradient_estimation(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, ge_gamma=ge_gamma, cfg_pp=True)

-
@torch.no_grad()
-def sample_er_sde(model, x, sigmas, extra_args=None, callback=None, disable=None, s_noise=1.0, noise_sampler=None, noise_scaler=None, max_stage=3):
-    """Extended Reverse-Time SDE solver (VP ER-SDE-Solver-3). arXiv: https://arxiv.org/abs/2309.06169.
+def sample_er_sde(model, x, sigmas, extra_args=None, callback=None, disable=None, s_noise=1., noise_sampler=None, noise_scaler=None, max_stage=3):
+    """
+    Extended Reverse-Time SDE solver (VE ER-SDE-Solver-3). Arxiv: https://arxiv.org/abs/2309.06169.
    Code reference: https://github.com/QinpengCui/ER-SDE-Solver/blob/main/er_sde_solver.py.
    """
    extra_args = {} if extra_args is None else extra_args
@@ -1476,18 +1404,12 @@ def sample_er_sde(model, x, sigmas, extra_args=None, callback=None, disable=None
    noise_sampler = default_noise_sampler(x, seed=seed) if noise_sampler is None else noise_sampler
    s_in = x.new_ones([x.shape[0]])

-    def default_er_sde_noise_scaler(x):
-        return x * ((x ** 0.3).exp() + 10.0)
-
-    noise_scaler = default_er_sde_noise_scaler if noise_scaler is None else noise_scaler
+    def default_noise_scaler(sigma):
+        return sigma * ((sigma ** 0.3).exp() + 10.0)
+    noise_scaler = default_noise_scaler if noise_scaler is None else noise_scaler
    num_integration_points = 200.0
    point_indice = torch.arange(0, num_integration_points, dtype=torch.float32, device=x.device)

-    model_sampling = model.inner_model.model_patcher.get_model_object("model_sampling")
-    sigmas = offset_first_sigma_for_snr(sigmas, model_sampling)
-    half_log_snrs = sigma_to_half_log_snr(sigmas, model_sampling)
-    er_lambdas = half_log_snrs.neg().exp()  # er_lambda_t = sigma_t / alpha_t
-
    old_denoised = None
    old_denoised_d = None

@@ -1498,45 +1420,41 @@ def sample_er_sde(model, x, sigmas, extra_args=None, callback=None, disable=None
        stage_used = min(max_stage, i + 1)
        if sigmas[i + 1] == 0:
            x = denoised
+        elif stage_used == 1:
+            r = noise_scaler(sigmas[i + 1]) / noise_scaler(sigmas[i])
+            x = r * x + (1 - r) * denoised
        else:
-            er_lambda_s, er_lambda_t = er_lambdas[i], er_lambdas[i + 1]
-            alpha_s = sigmas[i] / er_lambda_s
-            alpha_t = sigmas[i + 1] / er_lambda_t
-            r_alpha = alpha_t / alpha_s
-            r = noise_scaler(er_lambda_t) / noise_scaler(er_lambda_s)
+            r = noise_scaler(sigmas[i + 1]) / noise_scaler(sigmas[i])
+            x = r * x + (1 - r) * denoised

-            # Stage 1 Euler
-            x = r_alpha * r * x + alpha_t * (1 - r) * denoised
+            dt = sigmas[i + 1] - sigmas[i]
+            sigma_step_size = -dt / num_integration_points
+            sigma_pos = sigmas[i + 1] + point_indice * sigma_step_size
+            scaled_pos = noise_scaler(sigma_pos)

-            if stage_used >= 2:
-                dt = er_lambda_t - er_lambda_s
-                lambda_step_size = -dt / num_integration_points
-                lambda_pos = er_lambda_t + point_indice * lambda_step_size
-                scaled_pos = noise_scaler(lambda_pos)
+            # Stage 2
+            s = torch.sum(1 / scaled_pos) * sigma_step_size
+            denoised_d = (denoised - old_denoised) / (sigmas[i] - sigmas[i - 1])
+            x = x + (dt + s * noise_scaler(sigmas[i + 1])) * denoised_d

-                # Stage 2
-                s = torch.sum(1 / scaled_pos) * lambda_step_size
-                denoised_d = (denoised - old_denoised) / (er_lambda_s - er_lambdas[i - 1])
-                x = x + alpha_t * (dt + s * noise_scaler(er_lambda_t)) * denoised_d
+            if stage_used >= 3:
+                # Stage 3
+                s_u = torch.sum((sigma_pos - sigmas[i]) / scaled_pos) * sigma_step_size
+                denoised_u = (denoised_d - old_denoised_d) / ((sigmas[i] - sigmas[i - 2]) / 2)
+                x = x + ((dt ** 2) / 2 + s_u * noise_scaler(sigmas[i + 1])) * denoised_u
+            old_denoised_d = denoised_d

-                if stage_used >= 3:
-                    # Stage 3
-                    s_u = torch.sum((lambda_pos - er_lambda_s) / scaled_pos) * lambda_step_size
-                    denoised_u = (denoised_d - old_denoised_d) / ((er_lambda_s - er_lambdas[i - 2]) / 2)
-                    x = x + alpha_t * ((dt ** 2) / 2 + s_u * noise_scaler(er_lambda_t)) * denoised_u
-                old_denoised_d = denoised_d
-
-            if s_noise > 0:
-                x = x + alpha_t * noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * (er_lambda_t ** 2 - er_lambda_s ** 2 * r ** 2).sqrt().nan_to_num(nan=0.0)
+        if s_noise != 0 and sigmas[i + 1] > 0:
+            x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * (sigmas[i + 1] ** 2 - sigmas[i] ** 2 * r ** 2).sqrt().nan_to_num(nan=0.0)
        old_denoised = denoised
    return x

-
@torch.no_grad()
 def sample_seeds_2(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, r=0.5):
-    """SEEDS-2 - Stochastic Explicit Exponential Derivative-free Solvers (VP Data Prediction) stage 2.
-    arXiv: https://arxiv.org/abs/2305.14267
-    """
+    '''
+    SEEDS-2 - Stochastic Explicit Exponential Derivative-free Solvers (VE Data Prediction) stage 2
+    Arxiv: https://arxiv.org/abs/2305.14267
+    '''
    extra_args = {} if extra_args is None else extra_args
    seed = extra_args.get("seed", None)
    noise_sampler = default_noise_sampler(x, seed=seed) if noise_sampler is None else noise_sampler
@@ -1544,11 +1462,6 @@ def sample_seeds_2(model, x, sigmas, extra_args=None, callback=None, disable=Non

    inject_noise = eta > 0 and s_noise > 0

-    model_sampling = model.inner_model.model_patcher.get_model_object('model_sampling')
-    sigma_fn = partial(half_log_snr_to_sigma, model_sampling=model_sampling)
-    lambda_fn = partial(sigma_to_half_log_snr, model_sampling=model_sampling)
-    sigmas = offset_first_sigma_for_snr(sigmas, model_sampling)
-
    for i in trange(len(sigmas) - 1, disable=disable):
        denoised = model(x, sigmas[i] * s_in, **extra_args)
        if callback is not None:
@@ -1556,43 +1469,38 @@ def sample_seeds_2(model, x, sigmas, extra_args=None, callback=None, disable=Non
        if sigmas[i + 1] == 0:
            x = denoised
        else:
-            lambda_s, lambda_t = lambda_fn(sigmas[i]), lambda_fn(sigmas[i + 1])
-            h = lambda_t - lambda_s
+            t, t_next = -sigmas[i].log(), -sigmas[i + 1].log()
+            h = t_next - t
            h_eta = h * (eta + 1)
-            lambda_s_1 = lambda_s + r * h
+            s = t + r * h
            fac = 1 / (2 * r)
-            sigma_s_1 = sigma_fn(lambda_s_1)
-
-            # alpha_t = sigma_t * exp(log(alpha_t / sigma_t)) = sigma_t * exp(lambda_t)
-            alpha_s_1 = sigma_s_1 * lambda_s_1.exp()
-            alpha_t = sigmas[i + 1] * lambda_t.exp()
+            sigma_s = s.neg().exp()

            coeff_1, coeff_2 = (-r * h_eta).expm1(), (-h_eta).expm1()
            if inject_noise:
-                # 0 < r < 1
                noise_coeff_1 = (-2 * r * h * eta).expm1().neg().sqrt()
-                noise_coeff_2 = (-r * h * eta).exp() * (-2 * (1 - r) * h * eta).expm1().neg().sqrt()
-                noise_1, noise_2 = noise_sampler(sigmas[i], sigma_s_1), noise_sampler(sigma_s_1, sigmas[i + 1])
+                noise_coeff_2 = ((-2 * r * h * eta).expm1() - (-2 * h * eta).expm1()).sqrt()
+                noise_1, noise_2 = noise_sampler(sigmas[i], sigma_s), noise_sampler(sigma_s, sigmas[i + 1])

            # Step 1
-            x_2 = sigma_s_1 / sigmas[i] * (-r * h * eta).exp() * x - alpha_s_1 * coeff_1 * denoised
+            x_2 = (coeff_1 + 1) * x - coeff_1 * denoised
            if inject_noise:
-                x_2 = x_2 + sigma_s_1 * (noise_coeff_1 * noise_1) * s_noise
-            denoised_2 = model(x_2, sigma_s_1 * s_in, **extra_args)
+                x_2 = x_2 + sigma_s * (noise_coeff_1 * noise_1) * s_noise
+            denoised_2 = model(x_2, sigma_s * s_in, **extra_args)

            # Step 2
            denoised_d = (1 - fac) * denoised + fac * denoised_2
-            x = sigmas[i + 1] / sigmas[i] * (-h * eta).exp() * x - alpha_t * coeff_2 * denoised_d
+            x = (coeff_2 + 1) * x - coeff_2 * denoised_d
            if inject_noise:
                x = x + sigmas[i + 1] * (noise_coeff_2 * noise_1 + noise_coeff_1 * noise_2) * s_noise
    return x

-
@torch.no_grad()
 def sample_seeds_3(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, r_1=1./3, r_2=2./3):
-    """SEEDS-3 - Stochastic Explicit Exponential Derivative-free Solvers (VP Data Prediction) stage 3.
-    arXiv: https://arxiv.org/abs/2305.14267
-    """
+    '''
+    SEEDS-3 - Stochastic Explicit Exponential Derivative-free Solvers (VE Data Prediction) stage 3
+    Arxiv: https://arxiv.org/abs/2305.14267
+    '''
    extra_args = {} if extra_args is None else extra_args
    seed = extra_args.get("seed", None)
    noise_sampler = default_noise_sampler(x, seed=seed) if noise_sampler is None else noise_sampler
@@ -1600,11 +1508,6 @@ def sample_seeds_3(model, x, sigmas, extra_args=None, callback=None, disable=Non

    inject_noise = eta > 0 and s_noise > 0

-    model_sampling = model.inner_model.model_patcher.get_model_object('model_sampling')
-    sigma_fn = partial(half_log_snr_to_sigma, model_sampling=model_sampling)
-    lambda_fn = partial(sigma_to_half_log_snr, model_sampling=model_sampling)
-    sigmas = offset_first_sigma_for_snr(sigmas, model_sampling)
-
    for i in trange(len(sigmas) - 1, disable=disable):
        denoised = model(x, sigmas[i] * s_in, **extra_args)
        if callback is not None:
@@ -1612,150 +1515,34 @@ def sample_seeds_3(model, x, sigmas, extra_args=None, callback=None, disable=Non
        if sigmas[i + 1] == 0:
            x = denoised
        else:
-            lambda_s, lambda_t = lambda_fn(sigmas[i]), lambda_fn(sigmas[i + 1])
-            h = lambda_t - lambda_s
+            t, t_next = -sigmas[i].log(), -sigmas[i + 1].log()
+            h = t_next - t
            h_eta = h * (eta + 1)
-            lambda_s_1 = lambda_s + r_1 * h
-            lambda_s_2 = lambda_s + r_2 * h
-            sigma_s_1, sigma_s_2 = sigma_fn(lambda_s_1), sigma_fn(lambda_s_2)
-
-            # alpha_t = sigma_t * exp(log(alpha_t / sigma_t)) = sigma_t * exp(lambda_t)
-            alpha_s_1 = sigma_s_1 * lambda_s_1.exp()
-            alpha_s_2 = sigma_s_2 * lambda_s_2.exp()
-            alpha_t = sigmas[i + 1] * lambda_t.exp()
+            s_1 = t + r_1 * h
+            s_2 = t + r_2 * h
+            sigma_s_1, sigma_s_2 = s_1.neg().exp(), s_2.neg().exp()

            coeff_1, coeff_2, coeff_3 = (-r_1 * h_eta).expm1(), (-r_2 * h_eta).expm1(), (-h_eta).expm1()
            if inject_noise:
-                # 0 < r_1 < r_2 < 1
                noise_coeff_1 = (-2 * r_1 * h * eta).expm1().neg().sqrt()
-                noise_coeff_2 = (-r_1 * h * eta).exp() * (-2 * (r_2 - r_1) * h * eta).expm1().neg().sqrt()
-                noise_coeff_3 = (-r_2 * h * eta).exp() * (-2 * (1 - r_2) * h * eta).expm1().neg().sqrt()
+                noise_coeff_2 = ((-2 * r_1 * h * eta).expm1() - (-2 * r_2 * h * eta).expm1()).sqrt()
+                noise_coeff_3 = ((-2 * r_2 * h * eta).expm1() - (-2 * h * eta).expm1()).sqrt()
                noise_1, noise_2, noise_3 = noise_sampler(sigmas[i], sigma_s_1), noise_sampler(sigma_s_1, sigma_s_2), noise_sampler(sigma_s_2, sigmas[i + 1])

            # Step 1
-            x_2 = sigma_s_1 / sigmas[i] * (-r_1 * h * eta).exp() * x - alpha_s_1 * coeff_1 * denoised
+            x_2 = (coeff_1 + 1) * x - coeff_1 * denoised
            if inject_noise:
                x_2 = x_2 + sigma_s_1 * (noise_coeff_1 * noise_1) * s_noise
            denoised_2 = model(x_2, sigma_s_1 * s_in, **extra_args)

            # Step 2
-            x_3 = sigma_s_2 / sigmas[i] * (-r_2 * h * eta).exp() * x - alpha_s_2 * coeff_2 * denoised + (r_2 / r_1) * alpha_s_2 * (coeff_2 / (r_2 * h_eta) + 1) * (denoised_2 - denoised)
+            x_3 = (coeff_2 + 1) * x - coeff_2 * denoised + (r_2 / r_1) * (coeff_2 / (r_2 * h_eta) + 1) * (denoised_2 - denoised)
            if inject_noise:
                x_3 = x_3 + sigma_s_2 * (noise_coeff_2 * noise_1 + noise_coeff_1 * noise_2) * s_noise
            denoised_3 = model(x_3, sigma_s_2 * s_in, **extra_args)

            # Step 3
-            x = sigmas[i + 1] / sigmas[i] * (-h * eta).exp() * x - alpha_t * coeff_3 * denoised + (1. / r_2) * alpha_t * (coeff_3 / h_eta + 1) * (denoised_3 - denoised)
+            x = (coeff_3 + 1) * x - coeff_3 * denoised + (1. / r_2) * (coeff_3 / h_eta + 1) * (denoised_3 - denoised)
            if inject_noise:
                x = x + sigmas[i + 1] * (noise_coeff_3 * noise_1 + noise_coeff_2 * noise_2 + noise_coeff_1 * noise_3) * s_noise
    return x
-
-
-@torch.no_grad()
-def sample_sa_solver(model, x, sigmas, extra_args=None, callback=None, disable=False, tau_func=None, s_noise=1.0, noise_sampler=None, predictor_order=3, corrector_order=4, use_pece=False, simple_order_2=False):
-    """Stochastic Adams Solver with predictor-corrector method (NeurIPS 2023)."""
-    if len(sigmas) <= 1:
-        return x
-    extra_args = {} if extra_args is None else extra_args
-    seed = extra_args.get("seed", None)
-    noise_sampler = default_noise_sampler(x, seed=seed) if noise_sampler is None else noise_sampler
-    s_in = x.new_ones([x.shape[0]])
-
-    model_sampling = model.inner_model.model_patcher.get_model_object("model_sampling")
-    sigmas = offset_first_sigma_for_snr(sigmas, model_sampling)
-    lambdas = sigma_to_half_log_snr(sigmas, model_sampling=model_sampling)
-
-    if tau_func is None:
-        # Use default interval for stochastic sampling
-        start_sigma = model_sampling.percent_to_sigma(0.2)
-        end_sigma = model_sampling.percent_to_sigma(0.8)
-        tau_func = sa_solver.get_tau_interval_func(start_sigma, end_sigma, eta=1.0)
-
-    max_used_order = max(predictor_order, corrector_order)
-    x_pred = x  # x: current state, x_pred: predicted next state
-
-    h = 0.0
-    tau_t = 0.0
-    noise = 0.0
-    pred_list = []
-
-    # Lower order near the end to improve stability
-    lower_order_to_end = sigmas[-1].item() == 0
-
-    for i in trange(len(sigmas) - 1, disable=disable):
-        # Evaluation
-        denoised = model(x_pred, sigmas[i] * s_in, **extra_args)
-        if callback is not None:
-            callback({"x": x_pred, "i": i, "sigma": sigmas[i], "sigma_hat": sigmas[i], "denoised": denoised})
-        pred_list.append(denoised)
-        pred_list = pred_list[-max_used_order:]
-
-        predictor_order_used = min(predictor_order, len(pred_list))
-        if i == 0 or (sigmas[i + 1] == 0 and not use_pece):
-            corrector_order_used = 0
-        else:
-            corrector_order_used = min(corrector_order, len(pred_list))
-
-        if lower_order_to_end:
-            predictor_order_used = min(predictor_order_used, len(sigmas) - 2 - i)
-            corrector_order_used = min(corrector_order_used, len(sigmas) - 1 - i)
-
-        # Corrector
-        if corrector_order_used == 0:
-            # Update by the predicted state
-            x = x_pred
-        else:
-            curr_lambdas = lambdas[i - corrector_order_used + 1:i + 1]
-            b_coeffs = sa_solver.compute_stochastic_adams_b_coeffs(
-                sigmas[i],
-                curr_lambdas,
-                lambdas[i - 1],
-                lambdas[i],
-                tau_t,
-                simple_order_2,
-                is_corrector_step=True,
-            )
-            pred_mat = torch.stack(pred_list[-corrector_order_used:], dim=1)    # (B, K, ...)
-            corr_res = torch.tensordot(pred_mat, b_coeffs, dims=([1], [0]))  # (B, ...)
-            x = sigmas[i] / sigmas[i - 1] * (-(tau_t ** 2) * h).exp() * x + corr_res
-
-            if tau_t > 0 and s_noise > 0:
-                # The noise from the previous predictor step
-                x = x + noise
-
-            if use_pece:
-                # Evaluate the corrected state
-                denoised = model(x, sigmas[i] * s_in, **extra_args)
-                pred_list[-1] = denoised
-
-        # Predictor
-        if sigmas[i + 1] == 0:
-            # Denoising step
-            x = denoised
-        else:
-            tau_t = tau_func(sigmas[i + 1])
-            curr_lambdas = lambdas[i - predictor_order_used + 1:i + 1]
-            b_coeffs = sa_solver.compute_stochastic_adams_b_coeffs(
-                sigmas[i + 1],
-                curr_lambdas,
-                lambdas[i],
-                lambdas[i + 1],
-                tau_t,
-                simple_order_2,
-                is_corrector_step=False,
-            )
-            pred_mat = torch.stack(pred_list[-predictor_order_used:], dim=1)    # (B, K, ...)
-            pred_res = torch.tensordot(pred_mat, b_coeffs, dims=([1], [0]))  # (B, ...)
-            h = lambdas[i + 1] - lambdas[i]
-            x_pred = sigmas[i + 1] / sigmas[i] * (-(tau_t ** 2) * h).exp() * x + pred_res
-
-            if tau_t > 0 and s_noise > 0:
-                noise = noise_sampler(sigmas[i], sigmas[i + 1]) * sigmas[i + 1] * (-2 * tau_t ** 2 * h).expm1().neg().sqrt() * s_noise
-                x_pred = x_pred + noise
-    return x
-
-
-@torch.no_grad()
-def sample_sa_solver_pece(model, x, sigmas, extra_args=None, callback=None, disable=False, tau_func=None, s_noise=1.0, noise_sampler=None, predictor_order=3, corrector_order=4, simple_order_2=False):
-    """Stochastic Adams Solver with PECE (Predict–Evaluate–Correct–Evaluate) mode (NeurIPS 2023)."""
-    return sample_sa_solver(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, tau_func=tau_func, s_noise=s_noise, noise_sampler=noise_sampler, predictor_order=predictor_order, corrector_order=corrector_order, use_pece=True, simple_order_2=simple_order_2)
--- a/comfy/latent_formats.py
+++ b/comfy/latent_formats.py
@@ -457,82 +457,6 @@ class Wan21(LatentFormat):
        latents_std = self.latents_std.to(latent.device, latent.dtype)
        return latent * latents_std / self.scale_factor + latents_mean

-class Wan22(Wan21):
-    latent_channels = 48
-    latent_dimensions = 3
-
-    latent_rgb_factors = [
-            [ 0.0119,  0.0103,  0.0046],
-            [-0.1062, -0.0504,  0.0165],
-            [ 0.0140,  0.0409,  0.0491],
-            [-0.0813, -0.0677,  0.0607],
-            [ 0.0656,  0.0851,  0.0808],
-            [ 0.0264,  0.0463,  0.0912],
-            [ 0.0295,  0.0326,  0.0590],
-            [-0.0244, -0.0270,  0.0025],
-            [ 0.0443, -0.0102,  0.0288],
-            [-0.0465, -0.0090, -0.0205],
-            [ 0.0359,  0.0236,  0.0082],
-            [-0.0776,  0.0854,  0.1048],
-            [ 0.0564,  0.0264,  0.0561],
-            [ 0.0006,  0.0594,  0.0418],
-            [-0.0319, -0.0542, -0.0637],
-            [-0.0268,  0.0024,  0.0260],
-            [ 0.0539,  0.0265,  0.0358],
-            [-0.0359, -0.0312, -0.0287],
-            [-0.0285, -0.1032, -0.1237],
-            [ 0.1041,  0.0537,  0.0622],
-            [-0.0086, -0.0374, -0.0051],
-            [ 0.0390,  0.0670,  0.2863],
-            [ 0.0069,  0.0144,  0.0082],
-            [ 0.0006, -0.0167,  0.0079],
-            [ 0.0313, -0.0574, -0.0232],
-            [-0.1454, -0.0902, -0.0481],
-            [ 0.0714,  0.0827,  0.0447],
-            [-0.0304, -0.0574, -0.0196],
-            [ 0.0401,  0.0384,  0.0204],
-            [-0.0758, -0.0297, -0.0014],
-            [ 0.0568,  0.1307,  0.1372],
-            [-0.0055, -0.0310, -0.0380],
-            [ 0.0239, -0.0305,  0.0325],
-            [-0.0663, -0.0673, -0.0140],
-            [-0.0416, -0.0047, -0.0023],
-            [ 0.0166,  0.0112, -0.0093],
-            [-0.0211,  0.0011,  0.0331],
-            [ 0.1833,  0.1466,  0.2250],
-            [-0.0368,  0.0370,  0.0295],
-            [-0.3441, -0.3543, -0.2008],
-            [-0.0479, -0.0489, -0.0420],
-            [-0.0660, -0.0153,  0.0800],
-            [-0.0101,  0.0068,  0.0156],
-            [-0.0690, -0.0452, -0.0927],
-            [-0.0145,  0.0041,  0.0015],
-            [ 0.0421,  0.0451,  0.0373],
-            [ 0.0504, -0.0483, -0.0356],
-            [-0.0837,  0.0168,  0.0055]
-        ]
-
-    latent_rgb_factors_bias = [0.0317, -0.0878, -0.1388]
-
-    def __init__(self):
-        self.scale_factor = 1.0
-        self.latents_mean = torch.tensor([
-                -0.2289, -0.0052, -0.1323, -0.2339, -0.2799, 0.0174, 0.1838, 0.1557,
-                -0.1382, 0.0542, 0.2813, 0.0891, 0.1570, -0.0098, 0.0375, -0.1825,
-                -0.2246, -0.1207, -0.0698, 0.5109, 0.2665, -0.2108, -0.2158, 0.2502,
-                -0.2055, -0.0322, 0.1109, 0.1567, -0.0729, 0.0899, -0.2799, -0.1230,
-                -0.0313, -0.1649, 0.0117, 0.0723, -0.2839, -0.2083, -0.0520, 0.3748,
-                0.0152, 0.1957, 0.1433, -0.2944, 0.3573, -0.0548, -0.1681, -0.0667,
-            ]).view(1, self.latent_channels, 1, 1, 1)
-        self.latents_std = torch.tensor([
-                0.4765, 1.0364, 0.4514, 1.1677, 0.5313, 0.4990, 0.4818, 0.5013,
-                0.8158, 1.0344, 0.5894, 1.0901, 0.6885, 0.6165, 0.8454, 0.4978,
-                0.5759, 0.3523, 0.7135, 0.6804, 0.5833, 1.4146, 0.8986, 0.5659,
-                0.7069, 0.5338, 0.4889, 0.4917, 0.4069, 0.4999, 0.6866, 0.4093,
-                0.5709, 0.6065, 0.6415, 0.4944, 0.5726, 1.2042, 0.5458, 1.6887,
-                0.3971, 1.0600, 0.3943, 0.5537, 0.5444, 0.4089, 0.7468, 0.7744
-            ]).view(1, self.latent_channels, 1, 1, 1)
-
 class Hunyuan3Dv2(LatentFormat):
    latent_channels = 64
    latent_dimensions = 1
--- a/comfy/ldm/chroma/model.py
+++ b/comfy/ldm/chroma/model.py
@@ -254,12 +254,13 @@ class Chroma(nn.Module):

    def forward(self, x, timestep, context, guidance, control=None, transformer_options={}, **kwargs):
        bs, c, h, w = x.shape
-        x = comfy.ldm.common_dit.pad_to_patch_size(x, (self.patch_size, self.patch_size))
+        patch_size = 2
+        x = comfy.ldm.common_dit.pad_to_patch_size(x, (patch_size, patch_size))

-        img = rearrange(x, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=self.patch_size, pw=self.patch_size)
+        img = rearrange(x, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size)

-        h_len = ((h + (self.patch_size // 2)) // self.patch_size)
-        w_len = ((w + (self.patch_size // 2)) // self.patch_size)
+        h_len = ((h + (patch_size // 2)) // patch_size)
+        w_len = ((w + (patch_size // 2)) // patch_size)
        img_ids = torch.zeros((h_len, w_len, 3), device=x.device, dtype=x.dtype)
        img_ids[:, :, 1] = img_ids[:, :, 1] + torch.linspace(0, h_len - 1, steps=h_len, device=x.device, dtype=x.dtype).unsqueeze(1)
        img_ids[:, :, 2] = img_ids[:, :, 2] + torch.linspace(0, w_len - 1, steps=w_len, device=x.device, dtype=x.dtype).unsqueeze(0)
@@ -267,4 +268,4 @@ class Chroma(nn.Module):

        txt_ids = torch.zeros((bs, context.shape[1], 3), device=x.device, dtype=x.dtype)
        out = self.forward_orig(img, img_ids, context, txt_ids, timestep, guidance, control, transformer_options, attn_mask=kwargs.get("attention_mask", None))
-        return rearrange(out, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=self.patch_size, pw=self.patch_size)[:,:,:h,:w]
+        return rearrange(out, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=2, pw=2)[:,:,:h,:w]
--- a/comfy/ldm/cosmos/predict2.py
+++ b/comfy/ldm/cosmos/predict2.py
@@ -70,7 +70,11 @@ def torch_attention_op(q_B_S_H_D: torch.Tensor, k_B_S_H_D: torch.Tensor, v_B_S_H
    q_B_H_S_D = rearrange(q_B_S_H_D, "b ... h k -> b h ... k").view(in_q_shape[0], in_q_shape[-2], -1, in_q_shape[-1])
    k_B_H_S_D = rearrange(k_B_S_H_D, "b ... h v -> b h ... v").view(in_k_shape[0], in_k_shape[-2], -1, in_k_shape[-1])
    v_B_H_S_D = rearrange(v_B_S_H_D, "b ... h v -> b h ... v").view(in_k_shape[0], in_k_shape[-2], -1, in_k_shape[-1])
-    return optimized_attention(q_B_H_S_D, k_B_H_S_D, v_B_H_S_D, in_q_shape[-2], skip_reshape=True)
+    result_B_S_HD = rearrange(
+        optimized_attention(q_B_H_S_D, k_B_H_S_D, v_B_H_S_D, in_q_shape[-2], skip_reshape=True, skip_output_reshape=True), "b h ... l -> b ... (h l)"
+    )
+
+    return result_B_S_HD


 class Attention(nn.Module):
--- a/comfy/ldm/flux/controlnet.py
+++ b/comfy/ldm/flux/controlnet.py
@@ -123,8 +123,6 @@ class ControlNetFlux(Flux):

        if y is None:
            y = torch.zeros((img.shape[0], self.params.vec_in_dim), device=img.device, dtype=img.dtype)
-        else:
-            y = y[:, :self.params.vec_in_dim]

        # running on sequences img
        img = self.img_in(img)
--- a/comfy/ldm/flux/layers.py
+++ b/comfy/ldm/flux/layers.py
@@ -118,7 +118,7 @@ class Modulation(nn.Module):
 def apply_mod(tensor, m_mult, m_add=None, modulation_dims=None):
    if modulation_dims is None:
        if m_add is not None:
-            return torch.addcmul(m_add, tensor, m_mult)
+            return tensor * m_mult + m_add
        else:
            return tensor * m_mult
    else:
--- a/comfy/ldm/flux/model.py
+++ b/comfy/ldm/flux/model.py
@@ -195,50 +195,20 @@ class Flux(nn.Module):
        img = self.final_layer(img, vec)  # (N, T, patch_size ** 2 * out_channels)
        return img

-    def process_img(self, x, index=0, h_offset=0, w_offset=0):
+    def forward(self, x, timestep, context, y=None, guidance=None, control=None, transformer_options={}, **kwargs):
        bs, c, h, w = x.shape
        patch_size = self.patch_size
        x = comfy.ldm.common_dit.pad_to_patch_size(x, (patch_size, patch_size))

        img = rearrange(x, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size)
+
        h_len = ((h + (patch_size // 2)) // patch_size)
        w_len = ((w + (patch_size // 2)) // patch_size)
-
-        h_offset = ((h_offset + (patch_size // 2)) // patch_size)
-        w_offset = ((w_offset + (patch_size // 2)) // patch_size)
-
        img_ids = torch.zeros((h_len, w_len, 3), device=x.device, dtype=x.dtype)
-        img_ids[:, :, 0] = img_ids[:, :, 1] + index
-        img_ids[:, :, 1] = img_ids[:, :, 1] + torch.linspace(h_offset, h_len - 1 + h_offset, steps=h_len, device=x.device, dtype=x.dtype).unsqueeze(1)
-        img_ids[:, :, 2] = img_ids[:, :, 2] + torch.linspace(w_offset, w_len - 1 + w_offset, steps=w_len, device=x.device, dtype=x.dtype).unsqueeze(0)
-        return img, repeat(img_ids, "h w c -> b (h w) c", b=bs)
-
-    def forward(self, x, timestep, context, y=None, guidance=None, ref_latents=None, control=None, transformer_options={}, **kwargs):
-        bs, c, h_orig, w_orig = x.shape
-        patch_size = self.patch_size
-
-        h_len = ((h_orig + (patch_size // 2)) // patch_size)
-        w_len = ((w_orig + (patch_size // 2)) // patch_size)
-        img, img_ids = self.process_img(x)
-        img_tokens = img.shape[1]
-        if ref_latents is not None:
-            h = 0
-            w = 0
-            for ref in ref_latents:
-                h_offset = 0
-                w_offset = 0
-                if ref.shape[-2] + h > ref.shape[-1] + w:
-                    w_offset = w
-                else:
-                    h_offset = h
-
-                kontext, kontext_ids = self.process_img(ref, index=1, h_offset=h_offset, w_offset=w_offset)
-                img = torch.cat([img, kontext], dim=1)
-                img_ids = torch.cat([img_ids, kontext_ids], dim=1)
-                h = max(h, ref.shape[-2] + h_offset)
-                w = max(w, ref.shape[-1] + w_offset)
+        img_ids[:, :, 1] = img_ids[:, :, 1] + torch.linspace(0, h_len - 1, steps=h_len, device=x.device, dtype=x.dtype).unsqueeze(1)
+        img_ids[:, :, 2] = img_ids[:, :, 2] + torch.linspace(0, w_len - 1, steps=w_len, device=x.device, dtype=x.dtype).unsqueeze(0)
+        img_ids = repeat(img_ids, "h w c -> b (h w) c", b=bs)

        txt_ids = torch.zeros((bs, context.shape[1], 3), device=x.device, dtype=x.dtype)
        out = self.forward_orig(img, img_ids, context, txt_ids, timestep, y, guidance, control, transformer_options, attn_mask=kwargs.get("attention_mask", None))
-        out = out[:, :img_tokens]
-        return rearrange(out, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=2, pw=2)[:,:,:h_orig,:w_orig]
+        return rearrange(out, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=2, pw=2)[:,:,:h,:w]
--- a/comfy/ldm/lightricks/model.py
+++ b/comfy/ldm/lightricks/model.py
@@ -261,8 +261,8 @@ class CrossAttention(nn.Module):
        self.heads = heads
        self.dim_head = dim_head

-        self.q_norm = operations.RMSNorm(inner_dim, eps=1e-5, dtype=dtype, device=device)
-        self.k_norm = operations.RMSNorm(inner_dim, eps=1e-5, dtype=dtype, device=device)
+        self.q_norm = operations.RMSNorm(inner_dim, dtype=dtype, device=device)
+        self.k_norm = operations.RMSNorm(inner_dim, dtype=dtype, device=device)

        self.to_q = operations.Linear(query_dim, inner_dim, bias=True, dtype=dtype, device=device)
        self.to_k = operations.Linear(context_dim, inner_dim, bias=True, dtype=dtype, device=device)
--- a/comfy/ldm/lightricks/vae/causal_video_autoencoder.py
+++ b/comfy/ldm/lightricks/vae/causal_video_autoencoder.py
@@ -973,7 +973,7 @@ class VideoVAE(nn.Module):
            norm_layer=config.get("norm_layer", "group_norm"),
            causal=config.get("causal_decoder", False),
            timestep_conditioning=self.timestep_conditioning,
-            spatial_padding_mode=config.get("spatial_padding_mode", "reflect"),
+            spatial_padding_mode=config.get("spatial_padding_mode", "zeros"),
        )

        self.per_channel_statistics = processor()
--- a/comfy/ldm/models/autoencoder.py
+++ b/comfy/ldm/models/autoencoder.py
@@ -11,7 +11,7 @@ from comfy.ldm.modules.ema import LitEma
 import comfy.ops

 class DiagonalGaussianRegularizer(torch.nn.Module):
-    def __init__(self, sample: bool = False):
+    def __init__(self, sample: bool = True):
        super().__init__()
        self.sample = sample

@@ -19,12 +19,16 @@ class DiagonalGaussianRegularizer(torch.nn.Module):
        yield from ()

    def forward(self, z: torch.Tensor) -> Tuple[torch.Tensor, dict]:
+        log = dict()
        posterior = DiagonalGaussianDistribution(z)
        if self.sample:
            z = posterior.sample()
        else:
            z = posterior.mode()
-        return z, None
+        kl_loss = posterior.kl()
+        kl_loss = torch.sum(kl_loss) / kl_loss.shape[0]
+        log["kl_loss"] = kl_loss
+        return z, log


 class AbstractAutoencoder(torch.nn.Module):
--- a/comfy/ldm/modules/sub_quadratic_attention.py
+++ b/comfy/ldm/modules/sub_quadratic_attention.py
@@ -31,7 +31,7 @@ def dynamic_slice(
    starts: List[int],
    sizes: List[int],
 ) -> Tensor:
-    slicing = tuple(slice(start, start + size) for start, size in zip(starts, sizes))
+    slicing = [slice(start, start + size) for start, size in zip(starts, sizes)]
    return x[slicing]

 class AttnChunk(NamedTuple):
--- a/comfy/ldm/omnigen/omnigen2.py
+++ b/comfy/ldm/omnigen/omnigen2.py
@@ -1,469 +0,0 @@
-# Original code: https://github.com/VectorSpaceLab/OmniGen2
-
-from typing import Optional, Tuple
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from einops import rearrange, repeat
-from comfy.ldm.lightricks.model import Timesteps
-from comfy.ldm.flux.layers import EmbedND
-from comfy.ldm.modules.attention import optimized_attention_masked
-import comfy.model_management
-import comfy.ldm.common_dit
-
-
-def apply_rotary_emb(x, freqs_cis):
-    if x.shape[1] == 0:
-        return x
-
-    t_ = x.reshape(*x.shape[:-1], -1, 1, 2)
-    t_out = freqs_cis[..., 0] * t_[..., 0] + freqs_cis[..., 1] * t_[..., 1]
-    return t_out.reshape(*x.shape).to(dtype=x.dtype)
-
-
-def swiglu(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
-    return F.silu(x) * y
-
-
-class TimestepEmbedding(nn.Module):
-    def __init__(self, in_channels: int, time_embed_dim: int, dtype=None, device=None, operations=None):
-        super().__init__()
-        self.linear_1 = operations.Linear(in_channels, time_embed_dim, dtype=dtype, device=device)
-        self.act = nn.SiLU()
-        self.linear_2 = operations.Linear(time_embed_dim, time_embed_dim, dtype=dtype, device=device)
-
-    def forward(self, sample: torch.Tensor) -> torch.Tensor:
-        sample = self.linear_1(sample)
-        sample = self.act(sample)
-        sample = self.linear_2(sample)
-        return sample
-
-
-class LuminaRMSNormZero(nn.Module):
-    def __init__(self, embedding_dim: int, norm_eps: float = 1e-5, dtype=None, device=None, operations=None):
-        super().__init__()
-        self.silu = nn.SiLU()
-        self.linear = operations.Linear(min(embedding_dim, 1024), 4 * embedding_dim, dtype=dtype, device=device)
-        self.norm = operations.RMSNorm(embedding_dim, eps=norm_eps, dtype=dtype, device=device)
-
-    def forward(self, x: torch.Tensor, emb: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        emb = self.linear(self.silu(emb))
-        scale_msa, gate_msa, scale_mlp, gate_mlp = emb.chunk(4, dim=1)
-        x = self.norm(x) * (1 + scale_msa[:, None])
-        return x, gate_msa, scale_mlp, gate_mlp
-
-
-class LuminaLayerNormContinuous(nn.Module):
-    def __init__(self, embedding_dim: int, conditioning_embedding_dim: int, elementwise_affine: bool = False, eps: float = 1e-6, out_dim: Optional[int] = None, dtype=None, device=None, operations=None):
-        super().__init__()
-        self.silu = nn.SiLU()
-        self.linear_1 = operations.Linear(conditioning_embedding_dim, embedding_dim, dtype=dtype, device=device)
-        self.norm = operations.LayerNorm(embedding_dim, eps, elementwise_affine, dtype=dtype, device=device)
-        self.linear_2 = operations.Linear(embedding_dim, out_dim, bias=True, dtype=dtype, device=device) if out_dim is not None else None
-
-    def forward(self, x: torch.Tensor, conditioning_embedding: torch.Tensor) -> torch.Tensor:
-        emb = self.linear_1(self.silu(conditioning_embedding).to(x.dtype))
-        x = self.norm(x) * (1 + emb)[:, None, :]
-        if self.linear_2 is not None:
-            x = self.linear_2(x)
-        return x
-
-
-class LuminaFeedForward(nn.Module):
-    def __init__(self, dim: int, inner_dim: int, multiple_of: int = 256, dtype=None, device=None, operations=None):
-        super().__init__()
-        inner_dim = multiple_of * ((inner_dim + multiple_of - 1) // multiple_of)
-        self.linear_1 = operations.Linear(dim, inner_dim, bias=False, dtype=dtype, device=device)
-        self.linear_2 = operations.Linear(inner_dim, dim, bias=False, dtype=dtype, device=device)
-        self.linear_3 = operations.Linear(dim, inner_dim, bias=False, dtype=dtype, device=device)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        h1, h2 = self.linear_1(x), self.linear_3(x)
-        return self.linear_2(swiglu(h1, h2))
-
-
-class Lumina2CombinedTimestepCaptionEmbedding(nn.Module):
-    def __init__(self, hidden_size: int = 4096, text_feat_dim: int = 2048, frequency_embedding_size: int = 256, norm_eps: float = 1e-5, timestep_scale: float = 1.0, dtype=None, device=None, operations=None):
-        super().__init__()
-        self.time_proj = Timesteps(num_channels=frequency_embedding_size, flip_sin_to_cos=True, downscale_freq_shift=0.0, scale=timestep_scale)
-        self.timestep_embedder = TimestepEmbedding(in_channels=frequency_embedding_size, time_embed_dim=min(hidden_size, 1024), dtype=dtype, device=device, operations=operations)
-        self.caption_embedder = nn.Sequential(
-            operations.RMSNorm(text_feat_dim, eps=norm_eps, dtype=dtype, device=device),
-            operations.Linear(text_feat_dim, hidden_size, bias=True, dtype=dtype, device=device),
-        )
-
-    def forward(self, timestep: torch.Tensor, text_hidden_states: torch.Tensor, dtype: torch.dtype) -> Tuple[torch.Tensor, torch.Tensor]:
-        timestep_proj = self.time_proj(timestep).to(dtype=dtype)
-        time_embed = self.timestep_embedder(timestep_proj)
-        caption_embed = self.caption_embedder(text_hidden_states)
-        return time_embed, caption_embed
-
-
-class Attention(nn.Module):
-    def __init__(self, query_dim: int, dim_head: int, heads: int, kv_heads: int, eps: float = 1e-5, bias: bool = False, dtype=None, device=None, operations=None):
-        super().__init__()
-        self.heads = heads
-        self.kv_heads = kv_heads
-        self.dim_head = dim_head
-        self.scale = dim_head ** -0.5
-
-        self.to_q = operations.Linear(query_dim, heads * dim_head, bias=bias, dtype=dtype, device=device)
-        self.to_k = operations.Linear(query_dim, kv_heads * dim_head, bias=bias, dtype=dtype, device=device)
-        self.to_v = operations.Linear(query_dim, kv_heads * dim_head, bias=bias, dtype=dtype, device=device)
-
-        self.norm_q = operations.RMSNorm(dim_head, eps=eps, dtype=dtype, device=device)
-        self.norm_k = operations.RMSNorm(dim_head, eps=eps, dtype=dtype, device=device)
-
-        self.to_out = nn.Sequential(
-            operations.Linear(heads * dim_head, query_dim, bias=bias, dtype=dtype, device=device),
-            nn.Dropout(0.0)
-        )
-
-    def forward(self, hidden_states: torch.Tensor, encoder_hidden_states: torch.Tensor, attention_mask: Optional[torch.Tensor] = None, image_rotary_emb: Optional[torch.Tensor] = None) -> torch.Tensor:
-        batch_size, sequence_length, _ = hidden_states.shape
-
-        query = self.to_q(hidden_states)
-        key = self.to_k(encoder_hidden_states)
-        value = self.to_v(encoder_hidden_states)
-
-        query = query.view(batch_size, -1, self.heads, self.dim_head)
-        key = key.view(batch_size, -1, self.kv_heads, self.dim_head)
-        value = value.view(batch_size, -1, self.kv_heads, self.dim_head)
-
-        query = self.norm_q(query)
-        key = self.norm_k(key)
-
-        if image_rotary_emb is not None:
-            query = apply_rotary_emb(query, image_rotary_emb)
-            key = apply_rotary_emb(key, image_rotary_emb)
-
-        query = query.transpose(1, 2)
-        key = key.transpose(1, 2)
-        value = value.transpose(1, 2)
-
-        if self.kv_heads < self.heads:
-            key = key.repeat_interleave(self.heads // self.kv_heads, dim=1)
-            value = value.repeat_interleave(self.heads // self.kv_heads, dim=1)
-
-        hidden_states = optimized_attention_masked(query, key, value, self.heads, attention_mask, skip_reshape=True)
-        hidden_states = self.to_out[0](hidden_states)
-        return hidden_states
-
-
-class OmniGen2TransformerBlock(nn.Module):
-    def __init__(self, dim: int, num_attention_heads: int, num_kv_heads: int, multiple_of: int, ffn_dim_multiplier: float, norm_eps: float, modulation: bool = True, dtype=None, device=None, operations=None):
-        super().__init__()
-        self.modulation = modulation
-
-        self.attn = Attention(
-            query_dim=dim,
-            dim_head=dim // num_attention_heads,
-            heads=num_attention_heads,
-            kv_heads=num_kv_heads,
-            eps=1e-5,
-            bias=False,
-            dtype=dtype, device=device, operations=operations,
-        )
-
-        self.feed_forward = LuminaFeedForward(
-            dim=dim,
-            inner_dim=4 * dim,
-            multiple_of=multiple_of,
-            dtype=dtype, device=device, operations=operations
-        )
-
-        if modulation:
-            self.norm1 = LuminaRMSNormZero(embedding_dim=dim, norm_eps=norm_eps, dtype=dtype, device=device, operations=operations)
-        else:
-            self.norm1 = operations.RMSNorm(dim, eps=norm_eps, dtype=dtype, device=device)
-
-        self.ffn_norm1 = operations.RMSNorm(dim, eps=norm_eps, dtype=dtype, device=device)
-        self.norm2 = operations.RMSNorm(dim, eps=norm_eps, dtype=dtype, device=device)
-        self.ffn_norm2 = operations.RMSNorm(dim, eps=norm_eps, dtype=dtype, device=device)
-
-    def forward(self, hidden_states: torch.Tensor, attention_mask: torch.Tensor, image_rotary_emb: torch.Tensor, temb: Optional[torch.Tensor] = None) -> torch.Tensor:
-        if self.modulation:
-            norm_hidden_states, gate_msa, scale_mlp, gate_mlp = self.norm1(hidden_states, temb)
-            attn_output = self.attn(norm_hidden_states, norm_hidden_states, attention_mask, image_rotary_emb)
-            hidden_states = hidden_states + gate_msa.unsqueeze(1).tanh() * self.norm2(attn_output)
-            mlp_output = self.feed_forward(self.ffn_norm1(hidden_states) * (1 + scale_mlp.unsqueeze(1)))
-            hidden_states = hidden_states + gate_mlp.unsqueeze(1).tanh() * self.ffn_norm2(mlp_output)
-        else:
-            norm_hidden_states = self.norm1(hidden_states)
-            attn_output = self.attn(norm_hidden_states, norm_hidden_states, attention_mask, image_rotary_emb)
-            hidden_states = hidden_states + self.norm2(attn_output)
-            mlp_output = self.feed_forward(self.ffn_norm1(hidden_states))
-            hidden_states = hidden_states + self.ffn_norm2(mlp_output)
-        return hidden_states
-
-
-class OmniGen2RotaryPosEmbed(nn.Module):
-    def __init__(self, theta: int, axes_dim: Tuple[int, int, int], axes_lens: Tuple[int, int, int] = (300, 512, 512), patch_size: int = 2):
-        super().__init__()
-        self.theta = theta
-        self.axes_dim = axes_dim
-        self.axes_lens = axes_lens
-        self.patch_size = patch_size
-        self.rope_embedder = EmbedND(dim=sum(axes_dim), theta=self.theta, axes_dim=axes_dim)
-
-    def forward(self, batch_size, encoder_seq_len, l_effective_cap_len, l_effective_ref_img_len, l_effective_img_len, ref_img_sizes, img_sizes, device):
-        p = self.patch_size
-
-        seq_lengths = [cap_len + sum(ref_img_len) + img_len for cap_len, ref_img_len, img_len in zip(l_effective_cap_len, l_effective_ref_img_len, l_effective_img_len)]
-
-        max_seq_len = max(seq_lengths)
-        max_ref_img_len = max([sum(ref_img_len) for ref_img_len in l_effective_ref_img_len])
-        max_img_len = max(l_effective_img_len)
-
-        position_ids = torch.zeros(batch_size, max_seq_len, 3, dtype=torch.int32, device=device)
-
-        for i, (cap_seq_len, seq_len) in enumerate(zip(l_effective_cap_len, seq_lengths)):
-            position_ids[i, :cap_seq_len] = repeat(torch.arange(cap_seq_len, dtype=torch.int32, device=device), "l -> l 3")
-
-            pe_shift = cap_seq_len
-            pe_shift_len = cap_seq_len
-
-            if ref_img_sizes[i] is not None:
-                for ref_img_size, ref_img_len in zip(ref_img_sizes[i], l_effective_ref_img_len[i]):
-                    H, W = ref_img_size
-                    ref_H_tokens, ref_W_tokens = H // p, W // p
-
-                    row_ids = repeat(torch.arange(ref_H_tokens, dtype=torch.int32, device=device), "h -> h w", w=ref_W_tokens).flatten()
-                    col_ids = repeat(torch.arange(ref_W_tokens, dtype=torch.int32, device=device), "w -> h w", h=ref_H_tokens).flatten()
-                    position_ids[i, pe_shift_len:pe_shift_len + ref_img_len, 0] = pe_shift
-                    position_ids[i, pe_shift_len:pe_shift_len + ref_img_len, 1] = row_ids
-                    position_ids[i, pe_shift_len:pe_shift_len + ref_img_len, 2] = col_ids
-
-                    pe_shift += max(ref_H_tokens, ref_W_tokens)
-                    pe_shift_len += ref_img_len
-
-            H, W = img_sizes[i]
-            H_tokens, W_tokens = H // p, W // p
-
-            row_ids = repeat(torch.arange(H_tokens, dtype=torch.int32, device=device), "h -> h w", w=W_tokens).flatten()
-            col_ids = repeat(torch.arange(W_tokens, dtype=torch.int32, device=device), "w -> h w", h=H_tokens).flatten()
-
-            position_ids[i, pe_shift_len: seq_len, 0] = pe_shift
-            position_ids[i, pe_shift_len: seq_len, 1] = row_ids
-            position_ids[i, pe_shift_len: seq_len, 2] = col_ids
-
-        freqs_cis = self.rope_embedder(position_ids).movedim(1, 2)
-
-        cap_freqs_cis_shape = list(freqs_cis.shape)
-        cap_freqs_cis_shape[1] = encoder_seq_len
-        cap_freqs_cis = torch.zeros(*cap_freqs_cis_shape, device=device, dtype=freqs_cis.dtype)
-
-        ref_img_freqs_cis_shape = list(freqs_cis.shape)
-        ref_img_freqs_cis_shape[1] = max_ref_img_len
-        ref_img_freqs_cis = torch.zeros(*ref_img_freqs_cis_shape, device=device, dtype=freqs_cis.dtype)
-
-        img_freqs_cis_shape = list(freqs_cis.shape)
-        img_freqs_cis_shape[1] = max_img_len
-        img_freqs_cis = torch.zeros(*img_freqs_cis_shape, device=device, dtype=freqs_cis.dtype)
-
-        for i, (cap_seq_len, ref_img_len, img_len, seq_len) in enumerate(zip(l_effective_cap_len, l_effective_ref_img_len, l_effective_img_len, seq_lengths)):
-            cap_freqs_cis[i, :cap_seq_len] = freqs_cis[i, :cap_seq_len]
-            ref_img_freqs_cis[i, :sum(ref_img_len)] = freqs_cis[i, cap_seq_len:cap_seq_len + sum(ref_img_len)]
-            img_freqs_cis[i, :img_len] = freqs_cis[i, cap_seq_len + sum(ref_img_len):cap_seq_len + sum(ref_img_len) + img_len]
-
-        return cap_freqs_cis, ref_img_freqs_cis, img_freqs_cis, freqs_cis, l_effective_cap_len, seq_lengths
-
-
-class OmniGen2Transformer2DModel(nn.Module):
-    def __init__(
-        self,
-        patch_size: int = 2,
-        in_channels: int = 16,
-        out_channels: Optional[int] = None,
-        hidden_size: int = 2304,
-        num_layers: int = 26,
-        num_refiner_layers: int = 2,
-        num_attention_heads: int = 24,
-        num_kv_heads: int = 8,
-        multiple_of: int = 256,
-        ffn_dim_multiplier: Optional[float] = None,
-        norm_eps: float = 1e-5,
-        axes_dim_rope: Tuple[int, int, int] = (32, 32, 32),
-        axes_lens: Tuple[int, int, int] = (300, 512, 512),
-        text_feat_dim: int = 1024,
-        timestep_scale: float = 1.0,
-        image_model=None,
-        device=None,
-        dtype=None,
-        operations=None,
-    ):
-        super().__init__()
-
-        self.patch_size = patch_size
-        self.out_channels = out_channels or in_channels
-        self.hidden_size = hidden_size
-        self.dtype = dtype
-
-        self.rope_embedder = OmniGen2RotaryPosEmbed(
-            theta=10000,
-            axes_dim=axes_dim_rope,
-            axes_lens=axes_lens,
-            patch_size=patch_size,
-        )
-
-        self.x_embedder = operations.Linear(patch_size * patch_size * in_channels, hidden_size, dtype=dtype, device=device)
-        self.ref_image_patch_embedder = operations.Linear(patch_size * patch_size * in_channels, hidden_size, dtype=dtype, device=device)
-
-        self.time_caption_embed = Lumina2CombinedTimestepCaptionEmbedding(
-            hidden_size=hidden_size,
-            text_feat_dim=text_feat_dim,
-            norm_eps=norm_eps,
-            timestep_scale=timestep_scale, dtype=dtype, device=device, operations=operations
-        )
-
-        self.noise_refiner = nn.ModuleList([
-            OmniGen2TransformerBlock(
-                hidden_size, num_attention_heads, num_kv_heads,
-                multiple_of, ffn_dim_multiplier, norm_eps, modulation=True, dtype=dtype, device=device, operations=operations
-            ) for _ in range(num_refiner_layers)
-        ])
-
-        self.ref_image_refiner = nn.ModuleList([
-            OmniGen2TransformerBlock(
-                hidden_size, num_attention_heads, num_kv_heads,
-                multiple_of, ffn_dim_multiplier, norm_eps, modulation=True, dtype=dtype, device=device, operations=operations
-            ) for _ in range(num_refiner_layers)
-        ])
-
-        self.context_refiner = nn.ModuleList([
-            OmniGen2TransformerBlock(
-                hidden_size, num_attention_heads, num_kv_heads,
-                multiple_of, ffn_dim_multiplier, norm_eps, modulation=False, dtype=dtype, device=device, operations=operations
-            ) for _ in range(num_refiner_layers)
-        ])
-
-        self.layers = nn.ModuleList([
-            OmniGen2TransformerBlock(
-                hidden_size, num_attention_heads, num_kv_heads,
-                multiple_of, ffn_dim_multiplier, norm_eps, modulation=True, dtype=dtype, device=device, operations=operations
-            ) for _ in range(num_layers)
-        ])
-
-        self.norm_out = LuminaLayerNormContinuous(
-            embedding_dim=hidden_size,
-            conditioning_embedding_dim=min(hidden_size, 1024),
-            elementwise_affine=False,
-            eps=1e-6,
-            out_dim=patch_size * patch_size * self.out_channels, dtype=dtype, device=device, operations=operations
-        )
-
-        self.image_index_embedding = nn.Parameter(torch.empty(5, hidden_size, device=device, dtype=dtype))
-
-    def flat_and_pad_to_seq(self, hidden_states, ref_image_hidden_states):
-        batch_size = len(hidden_states)
-        p = self.patch_size
-
-        img_sizes = [(img.size(1), img.size(2)) for img in hidden_states]
-        l_effective_img_len = [(H // p) * (W // p) for (H, W) in img_sizes]
-
-        if ref_image_hidden_states is not None:
-            ref_image_hidden_states = list(map(lambda ref: comfy.ldm.common_dit.pad_to_patch_size(ref, (p, p)), ref_image_hidden_states))
-            ref_img_sizes = [[(imgs.size(2), imgs.size(3)) if imgs is not None else None for imgs in ref_image_hidden_states]] * batch_size
-            l_effective_ref_img_len = [[(ref_img_size[0] // p) * (ref_img_size[1] // p) for ref_img_size in _ref_img_sizes] if _ref_img_sizes is not None else [0] for _ref_img_sizes in ref_img_sizes]
-        else:
-            ref_img_sizes = [None for _ in range(batch_size)]
-            l_effective_ref_img_len = [[0] for _ in range(batch_size)]
-
-        flat_ref_img_hidden_states = None
-        if ref_image_hidden_states is not None:
-            imgs = []
-            for ref_img in ref_image_hidden_states:
-                B, C, H, W = ref_img.size()
-                ref_img = rearrange(ref_img, 'b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1=p, p2=p)
-                imgs.append(ref_img)
-            flat_ref_img_hidden_states = torch.cat(imgs, dim=1)
-
-        img = hidden_states
-        B, C, H, W = img.size()
-        flat_hidden_states = rearrange(img, 'b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1=p, p2=p)
-
-        return (
-            flat_hidden_states, flat_ref_img_hidden_states,
-            None, None,
-            l_effective_ref_img_len, l_effective_img_len,
-            ref_img_sizes, img_sizes,
-        )
-
-    def img_patch_embed_and_refine(self, hidden_states, ref_image_hidden_states, padded_img_mask, padded_ref_img_mask, noise_rotary_emb, ref_img_rotary_emb, l_effective_ref_img_len, l_effective_img_len, temb):
-        batch_size = len(hidden_states)
-
-        hidden_states = self.x_embedder(hidden_states)
-        if ref_image_hidden_states is not None:
-            ref_image_hidden_states = self.ref_image_patch_embedder(ref_image_hidden_states)
-            image_index_embedding = comfy.model_management.cast_to(self.image_index_embedding, dtype=hidden_states.dtype, device=hidden_states.device)
-
-            for i in range(batch_size):
-                shift = 0
-                for j, ref_img_len in enumerate(l_effective_ref_img_len[i]):
-                    ref_image_hidden_states[i, shift:shift + ref_img_len, :] = ref_image_hidden_states[i, shift:shift + ref_img_len, :] + image_index_embedding[j]
-                    shift += ref_img_len
-
-        for layer in self.noise_refiner:
-            hidden_states = layer(hidden_states, padded_img_mask, noise_rotary_emb, temb)
-
-        if ref_image_hidden_states is not None:
-            for layer in self.ref_image_refiner:
-                ref_image_hidden_states = layer(ref_image_hidden_states, padded_ref_img_mask, ref_img_rotary_emb, temb)
-
-            hidden_states = torch.cat([ref_image_hidden_states, hidden_states], dim=1)
-
-        return hidden_states
-
-    def forward(self, x, timesteps, context, num_tokens, ref_latents=None, attention_mask=None, **kwargs):
-        B, C, H, W = x.shape
-        hidden_states = comfy.ldm.common_dit.pad_to_patch_size(x, (self.patch_size, self.patch_size))
-        _, _, H_padded, W_padded = hidden_states.shape
-        timestep = 1.0 - timesteps
-        text_hidden_states = context
-        text_attention_mask = attention_mask
-        ref_image_hidden_states = ref_latents
-        device = hidden_states.device
-
-        temb, text_hidden_states = self.time_caption_embed(timestep, text_hidden_states, hidden_states[0].dtype)
-
-        (
-            hidden_states, ref_image_hidden_states,
-            img_mask, ref_img_mask,
-            l_effective_ref_img_len, l_effective_img_len,
-            ref_img_sizes, img_sizes,
-        ) = self.flat_and_pad_to_seq(hidden_states, ref_image_hidden_states)
-
-        (
-            context_rotary_emb, ref_img_rotary_emb, noise_rotary_emb,
-            rotary_emb, encoder_seq_lengths, seq_lengths,
-        ) = self.rope_embedder(
-            hidden_states.shape[0], text_hidden_states.shape[1], [num_tokens] * text_hidden_states.shape[0],
-            l_effective_ref_img_len, l_effective_img_len,
-            ref_img_sizes, img_sizes, device,
-        )
-
-        for layer in self.context_refiner:
-            text_hidden_states = layer(text_hidden_states, text_attention_mask, context_rotary_emb)
-
-        img_len = hidden_states.shape[1]
-        combined_img_hidden_states = self.img_patch_embed_and_refine(
-            hidden_states, ref_image_hidden_states,
-            img_mask, ref_img_mask,
-            noise_rotary_emb, ref_img_rotary_emb,
-            l_effective_ref_img_len, l_effective_img_len,
-            temb,
-        )
-
-        hidden_states = torch.cat([text_hidden_states, combined_img_hidden_states], dim=1)
-        attention_mask = None
-
-        for layer in self.layers:
-            hidden_states = layer(hidden_states, attention_mask, rotary_emb, temb)
-
-        hidden_states = self.norm_out(hidden_states, temb)
-
-        p = self.patch_size
-        output = rearrange(hidden_states[:, -img_len:], 'b (h w) (p1 p2 c) -> b c (h p1) (w p2)',  h=H_padded // p, w=W_padded// p, p1=p, p2=p)[:, :, :H, :W]
-
-        return -output
--- a/comfy/ldm/pixart/pixartms.py
+++ b/comfy/ldm/pixart/pixartms.py
@@ -1,256 +1,256 @@
-# Based on:
-# https://github.com/PixArt-alpha/PixArt-alpha [Apache 2.0 license]
-# https://github.com/PixArt-alpha/PixArt-sigma [Apache 2.0 license]
-import torch
-import torch.nn as nn
-
-from .blocks import (
-    t2i_modulate,
-    CaptionEmbedder,
-    AttentionKVCompress,
-    MultiHeadCrossAttention,
-    T2IFinalLayer,
-    SizeEmbedder,
-)
-from comfy.ldm.modules.diffusionmodules.mmdit import TimestepEmbedder, PatchEmbed, Mlp, get_1d_sincos_pos_embed_from_grid_torch
-
-
-def get_2d_sincos_pos_embed_torch(embed_dim, w, h, pe_interpolation=1.0, base_size=16, device=None, dtype=torch.float32):
-    grid_h, grid_w = torch.meshgrid(
-        torch.arange(h, device=device, dtype=dtype) / (h/base_size) / pe_interpolation,
-        torch.arange(w, device=device, dtype=dtype) / (w/base_size) / pe_interpolation,
-        indexing='ij'
-    )
-    emb_h = get_1d_sincos_pos_embed_from_grid_torch(embed_dim // 2, grid_h, device=device, dtype=dtype)
-    emb_w = get_1d_sincos_pos_embed_from_grid_torch(embed_dim // 2, grid_w, device=device, dtype=dtype)
-    emb = torch.cat([emb_w, emb_h], dim=1)  # (H*W, D)
-    return emb
-
-class PixArtMSBlock(nn.Module):
-    """
-    A PixArt block with adaptive layer norm zero (adaLN-Zero) conditioning.
-    """
-    def __init__(self, hidden_size, num_heads, mlp_ratio=4.0, drop_path=0., input_size=None,
-                 sampling=None, sr_ratio=1, qk_norm=False, dtype=None, device=None, operations=None, **block_kwargs):
-        super().__init__()
-        self.hidden_size = hidden_size
-        self.norm1 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
-        self.attn = AttentionKVCompress(
-            hidden_size, num_heads=num_heads, qkv_bias=True, sampling=sampling, sr_ratio=sr_ratio,
-            qk_norm=qk_norm, dtype=dtype, device=device, operations=operations, **block_kwargs
-        )
-        self.cross_attn = MultiHeadCrossAttention(
-            hidden_size, num_heads, dtype=dtype, device=device, operations=operations, **block_kwargs
-        )
-        self.norm2 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
-        # to be compatible with lower version pytorch
-        approx_gelu = lambda: nn.GELU(approximate="tanh")
-        self.mlp = Mlp(
-            in_features=hidden_size, hidden_features=int(hidden_size * mlp_ratio), act_layer=approx_gelu,
-            dtype=dtype, device=device, operations=operations
-        )
-        self.scale_shift_table = nn.Parameter(torch.randn(6, hidden_size) / hidden_size ** 0.5)
-
-    def forward(self, x, y, t, mask=None, HW=None, **kwargs):
-        B, N, C = x.shape
-
-        shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = (self.scale_shift_table[None].to(dtype=x.dtype, device=x.device) + t.reshape(B, 6, -1)).chunk(6, dim=1)
-        x = x + (gate_msa * self.attn(t2i_modulate(self.norm1(x), shift_msa, scale_msa), HW=HW))
-        x = x + self.cross_attn(x, y, mask)
-        x = x + (gate_mlp * self.mlp(t2i_modulate(self.norm2(x), shift_mlp, scale_mlp)))
-
-        return x
-
-
-### Core PixArt Model ###
-class PixArtMS(nn.Module):
-    """
-    Diffusion model with a Transformer backbone.
-    """
-    def __init__(
-            self,
-            input_size=32,
-            patch_size=2,
-            in_channels=4,
-            hidden_size=1152,
-            depth=28,
-            num_heads=16,
-            mlp_ratio=4.0,
-            class_dropout_prob=0.1,
-            learn_sigma=True,
-            pred_sigma=True,
-            drop_path: float = 0.,
-            caption_channels=4096,
-            pe_interpolation=None,
-            pe_precision=None,
-            config=None,
-            model_max_length=120,
-            micro_condition=True,
-            qk_norm=False,
-            kv_compress_config=None,
-            dtype=None,
-            device=None,
-            operations=None,
-            **kwargs,
-    ):
-        nn.Module.__init__(self)
-        self.dtype = dtype
-        self.pred_sigma = pred_sigma
-        self.in_channels = in_channels
-        self.out_channels = in_channels * 2 if pred_sigma else in_channels
-        self.patch_size = patch_size
-        self.num_heads = num_heads
-        self.pe_interpolation = pe_interpolation
-        self.pe_precision = pe_precision
-        self.hidden_size = hidden_size
-        self.depth = depth
-
-        approx_gelu = lambda: nn.GELU(approximate="tanh")
-        self.t_block = nn.Sequential(
-            nn.SiLU(),
-            operations.Linear(hidden_size, 6 * hidden_size, bias=True, dtype=dtype, device=device)
-        )
-        self.x_embedder = PatchEmbed(
-            patch_size=patch_size,
-            in_chans=in_channels,
-            embed_dim=hidden_size,
-            bias=True,
-            dtype=dtype,
-            device=device,
-            operations=operations
-        )
-        self.t_embedder = TimestepEmbedder(
-            hidden_size, dtype=dtype, device=device, operations=operations,
-        )
-        self.y_embedder = CaptionEmbedder(
-            in_channels=caption_channels, hidden_size=hidden_size, uncond_prob=class_dropout_prob,
-            act_layer=approx_gelu, token_num=model_max_length,
-            dtype=dtype, device=device, operations=operations,
-        )
-
-        self.micro_conditioning = micro_condition
-        if self.micro_conditioning:
-            self.csize_embedder = SizeEmbedder(hidden_size//3, dtype=dtype, device=device, operations=operations)
-            self.ar_embedder = SizeEmbedder(hidden_size//3, dtype=dtype, device=device, operations=operations)
-
-        # For fixed sin-cos embedding:
-        # num_patches = (input_size // patch_size) * (input_size // patch_size)
-        # self.base_size = input_size // self.patch_size
-        # self.register_buffer("pos_embed", torch.zeros(1, num_patches, hidden_size))
-
-        drop_path = [x.item() for x in torch.linspace(0, drop_path, depth)]  # stochastic depth decay rule
-        if kv_compress_config is None:
-            kv_compress_config = {
-                'sampling': None,
-                'scale_factor': 1,
-                'kv_compress_layer': [],
-            }
-        self.blocks = nn.ModuleList([
-            PixArtMSBlock(
-                hidden_size, num_heads, mlp_ratio=mlp_ratio, drop_path=drop_path[i],
-                sampling=kv_compress_config['sampling'],
-                sr_ratio=int(kv_compress_config['scale_factor']) if i in kv_compress_config['kv_compress_layer'] else 1,
-                qk_norm=qk_norm,
-                dtype=dtype,
-                device=device,
-                operations=operations,
-            )
-            for i in range(depth)
-        ])
-        self.final_layer = T2IFinalLayer(
-            hidden_size, patch_size, self.out_channels, dtype=dtype, device=device, operations=operations
-        )
-
-    def forward_orig(self, x, timestep, y, mask=None, c_size=None, c_ar=None, **kwargs):
-        """
-        Original forward pass of PixArt.
-        x: (N, C, H, W) tensor of spatial inputs (images or latent representations of images)
-        t: (N,) tensor of diffusion timesteps
-        y: (N, 1, 120, C) conditioning
-        ar: (N, 1): aspect ratio
-        cs: (N ,2) size conditioning for height/width
-        """
-        B, C, H, W = x.shape
-        c_res = (H + W) // 2
-        pe_interpolation = self.pe_interpolation
-        if pe_interpolation is None or self.pe_precision is not None:
-            # calculate pe_interpolation on-the-fly
-            pe_interpolation = round(c_res / (512/8.0), self.pe_precision or 0)
-
-        pos_embed = get_2d_sincos_pos_embed_torch(
-            self.hidden_size,
-            h=(H // self.patch_size),
-            w=(W // self.patch_size),
-            pe_interpolation=pe_interpolation,
-            base_size=((round(c_res / 64) * 64) // self.patch_size),
-            device=x.device,
-            dtype=x.dtype,
-        ).unsqueeze(0)
-
-        x = self.x_embedder(x) + pos_embed  # (N, T, D), where T = H * W / patch_size ** 2
-        t = self.t_embedder(timestep, x.dtype)  # (N, D)
-
-        if self.micro_conditioning and (c_size is not None and c_ar is not None):
-            bs = x.shape[0]
-            c_size = self.csize_embedder(c_size, bs)  # (N, D)
-            c_ar = self.ar_embedder(c_ar, bs)  # (N, D)
-            t = t + torch.cat([c_size, c_ar], dim=1)
-
-        t0 = self.t_block(t)
-        y = self.y_embedder(y, self.training)  # (N, D)
-
-        if mask is not None:
-            if mask.shape[0] != y.shape[0]:
-                mask = mask.repeat(y.shape[0] // mask.shape[0], 1)
-            mask = mask.squeeze(1).squeeze(1)
-            y = y.squeeze(1).masked_select(mask.unsqueeze(-1) != 0).view(1, -1, x.shape[-1])
-            y_lens = mask.sum(dim=1).tolist()
-        else:
-            y_lens = None
-            y = y.squeeze(1).view(1, -1, x.shape[-1])
-        for block in self.blocks:
-            x = block(x, y, t0, y_lens, (H, W), **kwargs)  # (N, T, D)
-
-        x = self.final_layer(x, t)  # (N, T, patch_size ** 2 * out_channels)
-        x = self.unpatchify(x, H, W)  # (N, out_channels, H, W)
-
-        return x
-
-    def forward(self, x, timesteps, context, c_size=None, c_ar=None, **kwargs):
-        B, C, H, W = x.shape
-
-        # Fallback for missing microconds
-        if self.micro_conditioning:
-            if c_size is None:
-                c_size = torch.tensor([H*8, W*8], dtype=x.dtype, device=x.device).repeat(B, 1)
-
-            if c_ar is None:
-                c_ar = torch.tensor([H/W], dtype=x.dtype, device=x.device).repeat(B, 1)
-
-        ## Still accepts the input w/o that dim but returns garbage
-        if len(context.shape) == 3:
-            context = context.unsqueeze(1)
-
-        ## run original forward pass
-        out = self.forward_orig(x, timesteps, context, c_size=c_size, c_ar=c_ar)
-
-        ## only return EPS
-        if self.pred_sigma:
-            return out[:, :self.in_channels]
-        return out
-
-    def unpatchify(self, x, h, w):
-        """
-        x: (N, T, patch_size**2 * C)
-        imgs: (N, H, W, C)
-        """
-        c = self.out_channels
-        p = self.x_embedder.patch_size[0]
-        h = h // self.patch_size
-        w = w // self.patch_size
-        assert h * w == x.shape[1]
-
-        x = x.reshape(shape=(x.shape[0], h, w, p, p, c))
-        x = torch.einsum('nhwpqc->nchpwq', x)
-        imgs = x.reshape(shape=(x.shape[0], c, h * p, w * p))
-        return imgs
+# Based on:
+# https://github.com/PixArt-alpha/PixArt-alpha [Apache 2.0 license]
+# https://github.com/PixArt-alpha/PixArt-sigma [Apache 2.0 license]
+import torch
+import torch.nn as nn
+
+from .blocks import (
+    t2i_modulate,
+    CaptionEmbedder,
+    AttentionKVCompress,
+    MultiHeadCrossAttention,
+    T2IFinalLayer,
+    SizeEmbedder,
+)
+from comfy.ldm.modules.diffusionmodules.mmdit import TimestepEmbedder, PatchEmbed, Mlp, get_1d_sincos_pos_embed_from_grid_torch
+
+
+def get_2d_sincos_pos_embed_torch(embed_dim, w, h, pe_interpolation=1.0, base_size=16, device=None, dtype=torch.float32):
+    grid_h, grid_w = torch.meshgrid(
+        torch.arange(h, device=device, dtype=dtype) / (h/base_size) / pe_interpolation,
+        torch.arange(w, device=device, dtype=dtype) / (w/base_size) / pe_interpolation,
+        indexing='ij'
+    )
+    emb_h = get_1d_sincos_pos_embed_from_grid_torch(embed_dim // 2, grid_h, device=device, dtype=dtype)
+    emb_w = get_1d_sincos_pos_embed_from_grid_torch(embed_dim // 2, grid_w, device=device, dtype=dtype)
+    emb = torch.cat([emb_w, emb_h], dim=1)  # (H*W, D)
+    return emb
+
+class PixArtMSBlock(nn.Module):
+    """
+    A PixArt block with adaptive layer norm zero (adaLN-Zero) conditioning.
+    """
+    def __init__(self, hidden_size, num_heads, mlp_ratio=4.0, drop_path=0., input_size=None,
+                 sampling=None, sr_ratio=1, qk_norm=False, dtype=None, device=None, operations=None, **block_kwargs):
+        super().__init__()
+        self.hidden_size = hidden_size
+        self.norm1 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
+        self.attn = AttentionKVCompress(
+            hidden_size, num_heads=num_heads, qkv_bias=True, sampling=sampling, sr_ratio=sr_ratio,
+            qk_norm=qk_norm, dtype=dtype, device=device, operations=operations, **block_kwargs
+        )
+        self.cross_attn = MultiHeadCrossAttention(
+            hidden_size, num_heads, dtype=dtype, device=device, operations=operations, **block_kwargs
+        )
+        self.norm2 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
+        # to be compatible with lower version pytorch
+        approx_gelu = lambda: nn.GELU(approximate="tanh")
+        self.mlp = Mlp(
+            in_features=hidden_size, hidden_features=int(hidden_size * mlp_ratio), act_layer=approx_gelu,
+            dtype=dtype, device=device, operations=operations
+        )
+        self.scale_shift_table = nn.Parameter(torch.randn(6, hidden_size) / hidden_size ** 0.5)
+
+    def forward(self, x, y, t, mask=None, HW=None, **kwargs):
+        B, N, C = x.shape
+
+        shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = (self.scale_shift_table[None].to(dtype=x.dtype, device=x.device) + t.reshape(B, 6, -1)).chunk(6, dim=1)
+        x = x + (gate_msa * self.attn(t2i_modulate(self.norm1(x), shift_msa, scale_msa), HW=HW))
+        x = x + self.cross_attn(x, y, mask)
+        x = x + (gate_mlp * self.mlp(t2i_modulate(self.norm2(x), shift_mlp, scale_mlp)))
+
+        return x
+
+
+### Core PixArt Model ###
+class PixArtMS(nn.Module):
+    """
+    Diffusion model with a Transformer backbone.
+    """
+    def __init__(
+            self,
+            input_size=32,
+            patch_size=2,
+            in_channels=4,
+            hidden_size=1152,
+            depth=28,
+            num_heads=16,
+            mlp_ratio=4.0,
+            class_dropout_prob=0.1,
+            learn_sigma=True,
+            pred_sigma=True,
+            drop_path: float = 0.,
+            caption_channels=4096,
+            pe_interpolation=None,
+            pe_precision=None,
+            config=None,
+            model_max_length=120,
+            micro_condition=True,
+            qk_norm=False,
+            kv_compress_config=None,
+            dtype=None,
+            device=None,
+            operations=None,
+            **kwargs,
+    ):
+        nn.Module.__init__(self)
+        self.dtype = dtype
+        self.pred_sigma = pred_sigma
+        self.in_channels = in_channels
+        self.out_channels = in_channels * 2 if pred_sigma else in_channels
+        self.patch_size = patch_size
+        self.num_heads = num_heads
+        self.pe_interpolation = pe_interpolation
+        self.pe_precision = pe_precision
+        self.hidden_size = hidden_size
+        self.depth = depth
+
+        approx_gelu = lambda: nn.GELU(approximate="tanh")
+        self.t_block = nn.Sequential(
+            nn.SiLU(),
+            operations.Linear(hidden_size, 6 * hidden_size, bias=True, dtype=dtype, device=device)
+        )
+        self.x_embedder = PatchEmbed(
+            patch_size=patch_size,
+            in_chans=in_channels,
+            embed_dim=hidden_size,
+            bias=True,
+            dtype=dtype,
+            device=device,
+            operations=operations
+        )
+        self.t_embedder = TimestepEmbedder(
+            hidden_size, dtype=dtype, device=device, operations=operations,
+        )
+        self.y_embedder = CaptionEmbedder(
+            in_channels=caption_channels, hidden_size=hidden_size, uncond_prob=class_dropout_prob,
+            act_layer=approx_gelu, token_num=model_max_length,
+            dtype=dtype, device=device, operations=operations,
+        )
+
+        self.micro_conditioning = micro_condition
+        if self.micro_conditioning:
+            self.csize_embedder = SizeEmbedder(hidden_size//3, dtype=dtype, device=device, operations=operations)
+            self.ar_embedder = SizeEmbedder(hidden_size//3, dtype=dtype, device=device, operations=operations)
+
+        # For fixed sin-cos embedding:
+        # num_patches = (input_size // patch_size) * (input_size // patch_size)
+        # self.base_size = input_size // self.patch_size
+        # self.register_buffer("pos_embed", torch.zeros(1, num_patches, hidden_size))
+
+        drop_path = [x.item() for x in torch.linspace(0, drop_path, depth)]  # stochastic depth decay rule
+        if kv_compress_config is None:
+            kv_compress_config = {
+                'sampling': None,
+                'scale_factor': 1,
+                'kv_compress_layer': [],
+            }
+        self.blocks = nn.ModuleList([
+            PixArtMSBlock(
+                hidden_size, num_heads, mlp_ratio=mlp_ratio, drop_path=drop_path[i],
+                sampling=kv_compress_config['sampling'],
+                sr_ratio=int(kv_compress_config['scale_factor']) if i in kv_compress_config['kv_compress_layer'] else 1,
+                qk_norm=qk_norm,
+                dtype=dtype,
+                device=device,
+                operations=operations,
+            )
+            for i in range(depth)
+        ])
+        self.final_layer = T2IFinalLayer(
+            hidden_size, patch_size, self.out_channels, dtype=dtype, device=device, operations=operations
+        )
+
+    def forward_orig(self, x, timestep, y, mask=None, c_size=None, c_ar=None, **kwargs):
+        """
+        Original forward pass of PixArt.
+        x: (N, C, H, W) tensor of spatial inputs (images or latent representations of images)
+        t: (N,) tensor of diffusion timesteps
+        y: (N, 1, 120, C) conditioning
+        ar: (N, 1): aspect ratio
+        cs: (N ,2) size conditioning for height/width
+        """
+        B, C, H, W = x.shape
+        c_res = (H + W) // 2
+        pe_interpolation = self.pe_interpolation
+        if pe_interpolation is None or self.pe_precision is not None:
+            # calculate pe_interpolation on-the-fly
+            pe_interpolation = round(c_res / (512/8.0), self.pe_precision or 0)
+
+        pos_embed = get_2d_sincos_pos_embed_torch(
+            self.hidden_size,
+            h=(H // self.patch_size),
+            w=(W // self.patch_size),
+            pe_interpolation=pe_interpolation,
+            base_size=((round(c_res / 64) * 64) // self.patch_size),
+            device=x.device,
+            dtype=x.dtype,
+        ).unsqueeze(0)
+
+        x = self.x_embedder(x) + pos_embed  # (N, T, D), where T = H * W / patch_size ** 2
+        t = self.t_embedder(timestep, x.dtype)  # (N, D)
+
+        if self.micro_conditioning and (c_size is not None and c_ar is not None):
+            bs = x.shape[0]
+            c_size = self.csize_embedder(c_size, bs)  # (N, D)
+            c_ar = self.ar_embedder(c_ar, bs)  # (N, D)
+            t = t + torch.cat([c_size, c_ar], dim=1)
+
+        t0 = self.t_block(t)
+        y = self.y_embedder(y, self.training)  # (N, D)
+
+        if mask is not None:
+            if mask.shape[0] != y.shape[0]:
+                mask = mask.repeat(y.shape[0] // mask.shape[0], 1)
+            mask = mask.squeeze(1).squeeze(1)
+            y = y.squeeze(1).masked_select(mask.unsqueeze(-1) != 0).view(1, -1, x.shape[-1])
+            y_lens = mask.sum(dim=1).tolist()
+        else:
+            y_lens = None
+            y = y.squeeze(1).view(1, -1, x.shape[-1])
+        for block in self.blocks:
+            x = block(x, y, t0, y_lens, (H, W), **kwargs)  # (N, T, D)
+
+        x = self.final_layer(x, t)  # (N, T, patch_size ** 2 * out_channels)
+        x = self.unpatchify(x, H, W)  # (N, out_channels, H, W)
+
+        return x
+
+    def forward(self, x, timesteps, context, c_size=None, c_ar=None, **kwargs):
+        B, C, H, W = x.shape
+
+        # Fallback for missing microconds
+        if self.micro_conditioning:
+            if c_size is None:
+                c_size = torch.tensor([H*8, W*8], dtype=x.dtype, device=x.device).repeat(B, 1)
+
+            if c_ar is None:
+                c_ar = torch.tensor([H/W], dtype=x.dtype, device=x.device).repeat(B, 1)
+
+        ## Still accepts the input w/o that dim but returns garbage
+        if len(context.shape) == 3:
+            context = context.unsqueeze(1)
+
+        ## run original forward pass
+        out = self.forward_orig(x, timesteps, context, c_size=c_size, c_ar=c_ar)
+
+        ## only return EPS
+        if self.pred_sigma:
+            return out[:, :self.in_channels]
+        return out
+
+    def unpatchify(self, x, h, w):
+        """
+        x: (N, T, patch_size**2 * C)
+        imgs: (N, H, W, C)
+        """
+        c = self.out_channels
+        p = self.x_embedder.patch_size[0]
+        h = h // self.patch_size
+        w = w // self.patch_size
+        assert h * w == x.shape[1]
+
+        x = x.reshape(shape=(x.shape[0], h, w, p, p, c))
+        x = torch.einsum('nhwpqc->nchpwq', x)
+        imgs = x.reshape(shape=(x.shape[0], c, h * p, w * p))
+        return imgs
--- a/comfy/ldm/wan/model.py
+++ b/comfy/ldm/wan/model.py
@@ -146,15 +146,6 @@ WAN_CROSSATTENTION_CLASSES = {
 }


-def repeat_e(e, x):
-    repeats = 1
-    if e.shape[1] > 1:
-        repeats = x.shape[1] // e.shape[1]
-    if repeats == 1:
-        return e
-    return torch.repeat_interleave(e, repeats, dim=1)
-
-
 class WanAttentionBlock(nn.Module):

    def __init__(self,
@@ -211,23 +202,20 @@ class WanAttentionBlock(nn.Module):
        """
        # assert e.dtype == torch.float32

-        if e.ndim < 4:
-            e = (comfy.model_management.cast_to(self.modulation, dtype=x.dtype, device=x.device) + e).chunk(6, dim=1)
-        else:
-            e = (comfy.model_management.cast_to(self.modulation, dtype=x.dtype, device=x.device).unsqueeze(0) + e).unbind(2)
+        e = (comfy.model_management.cast_to(self.modulation, dtype=x.dtype, device=x.device) + e).chunk(6, dim=1)
        # assert e[0].dtype == torch.float32

        # self-attention
        y = self.self_attn(
-            self.norm1(x) * (1 + repeat_e(e[1], x)) + repeat_e(e[0], x),
+            self.norm1(x) * (1 + e[1]) + e[0],
            freqs)

-        x = x + y * repeat_e(e[2], x)
+        x = x + y * e[2]

        # cross-attention & ffn
        x = x + self.cross_attn(self.norm3(x), context, context_img_len=context_img_len)
-        y = self.ffn(self.norm2(x) * (1 + repeat_e(e[4], x)) + repeat_e(e[3], x))
-        x = x + y * repeat_e(e[5], x)
+        y = self.ffn(self.norm2(x) * (1 + e[4]) + e[3])
+        x = x + y * e[5]
        return x


@@ -337,12 +325,8 @@ class Head(nn.Module):
            e(Tensor): Shape [B, C]
        """
        # assert e.dtype == torch.float32
-        if e.ndim < 3:
-            e = (comfy.model_management.cast_to(self.modulation, dtype=x.dtype, device=x.device) + e.unsqueeze(1)).chunk(2, dim=1)
-        else:
-            e = (comfy.model_management.cast_to(self.modulation, dtype=x.dtype, device=x.device).unsqueeze(0) + e.unsqueeze(2)).unbind(2)
-
-        x = (self.head(self.norm(x) * (1 + repeat_e(e[1], x)) + repeat_e(e[0], x)))
+        e = (comfy.model_management.cast_to(self.modulation, dtype=x.dtype, device=x.device) + e.unsqueeze(1)).chunk(2, dim=1)
+        x = (self.head(self.norm(x) * (1 + e[1]) + e[0]))
        return x


@@ -522,9 +506,8 @@ class WanModel(torch.nn.Module):

        # time embeddings
        e = self.time_embedding(
-            sinusoidal_embedding_1d(self.freq_dim, t.flatten()).to(dtype=x[0].dtype))
-        e = e.reshape(t.shape[0], -1, e.shape[-1])
-        e0 = self.time_projection(e).unflatten(2, (6, self.dim))
+            sinusoidal_embedding_1d(self.freq_dim, t).to(dtype=x[0].dtype))
+        e0 = self.time_projection(e).unflatten(1, (6, self.dim))

        # context
        context = self.text_embedding(context)
--- a/comfy/ldm/wan/vae.py
+++ b/comfy/ldm/wan/vae.py
@@ -52,6 +52,15 @@ class RMS_norm(nn.Module):
            x, dim=(1 if self.channel_first else -1)) * self.scale * self.gamma.to(x) + (self.bias.to(x) if self.bias is not None else 0)


+class Upsample(nn.Upsample):
+
+    def forward(self, x):
+        """
+        Fix bfloat16 support for nearest neighbor interpolation.
+        """
+        return super().forward(x.float()).type_as(x)
+
+
 class Resample(nn.Module):

    def __init__(self, dim, mode):
@@ -64,11 +73,11 @@ class Resample(nn.Module):
        # layers
        if mode == 'upsample2d':
            self.resample = nn.Sequential(
-                nn.Upsample(scale_factor=(2., 2.), mode='nearest-exact'),
+                Upsample(scale_factor=(2., 2.), mode='nearest-exact'),
                ops.Conv2d(dim, dim // 2, 3, padding=1))
        elif mode == 'upsample3d':
            self.resample = nn.Sequential(
-                nn.Upsample(scale_factor=(2., 2.), mode='nearest-exact'),
+                Upsample(scale_factor=(2., 2.), mode='nearest-exact'),
                ops.Conv2d(dim, dim // 2, 3, padding=1))
            self.time_conv = CausalConv3d(
                dim, dim * 2, (3, 1, 1), padding=(1, 0, 0))
@@ -148,6 +157,29 @@ class Resample(nn.Module):
                    feat_idx[0] += 1
        return x

+    def init_weight(self, conv):
+        conv_weight = conv.weight
+        nn.init.zeros_(conv_weight)
+        c1, c2, t, h, w = conv_weight.size()
+        one_matrix = torch.eye(c1, c2)
+        init_matrix = one_matrix
+        nn.init.zeros_(conv_weight)
+        #conv_weight.data[:,:,-1,1,1] = init_matrix * 0.5
+        conv_weight.data[:, :, 1, 0, 0] = init_matrix  #* 0.5
+        conv.weight.data.copy_(conv_weight)
+        nn.init.zeros_(conv.bias.data)
+
+    def init_weight2(self, conv):
+        conv_weight = conv.weight.data
+        nn.init.zeros_(conv_weight)
+        c1, c2, t, h, w = conv_weight.size()
+        init_matrix = torch.eye(c1 // 2, c2)
+        #init_matrix = repeat(init_matrix, 'o ... -> (o 2) ...').permute(1,0,2).contiguous().reshape(c1,c2)
+        conv_weight[:c1 // 2, :, -1, 0, 0] = init_matrix
+        conv_weight[c1 // 2:, :, -1, 0, 0] = init_matrix
+        conv.weight.data.copy_(conv_weight)
+        nn.init.zeros_(conv.bias.data)
+

 class ResidualBlock(nn.Module):

@@ -462,6 +494,12 @@ class WanVAE(nn.Module):
        self.decoder = Decoder3d(dim, z_dim, dim_mult, num_res_blocks,
                                 attn_scales, self.temperal_upsample, dropout)

+    def forward(self, x):
+        mu, log_var = self.encode(x)
+        z = self.reparameterize(mu, log_var)
+        x_recon = self.decode(z)
+        return x_recon, mu, log_var
+
    def encode(self, x):
        self.clear_cache()
        ## cache
@@ -507,6 +545,18 @@ class WanVAE(nn.Module):
        self.clear_cache()
        return out

+    def reparameterize(self, mu, log_var):
+        std = torch.exp(0.5 * log_var)
+        eps = torch.randn_like(std)
+        return eps * std + mu
+
+    def sample(self, imgs, deterministic=False):
+        mu, log_var = self.encode(imgs)
+        if deterministic:
+            return mu
+        std = torch.exp(0.5 * log_var.clamp(-30.0, 20.0))
+        return mu + std * torch.randn_like(std)
+
    def clear_cache(self):
        self._conv_num = count_conv3d(self.decoder)
        self._conv_idx = [0]
--- a/comfy/ldm/wan/vae2_2.py
+++ b/comfy/ldm/wan/vae2_2.py
@@ -1,726 +0,0 @@
-# original version: https://github.com/Wan-Video/Wan2.2/blob/main/wan/modules/vae2_2.py
-# Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from einops import rearrange
-from .vae import AttentionBlock, CausalConv3d, RMS_norm
-
-import comfy.ops
-ops = comfy.ops.disable_weight_init
-
-CACHE_T = 2
-
-
-class Resample(nn.Module):
-
-    def __init__(self, dim, mode):
-        assert mode in (
-            "none",
-            "upsample2d",
-            "upsample3d",
-            "downsample2d",
-            "downsample3d",
-        )
-        super().__init__()
-        self.dim = dim
-        self.mode = mode
-
-        # layers
-        if mode == "upsample2d":
-            self.resample = nn.Sequential(
-                nn.Upsample(scale_factor=(2.0, 2.0), mode="nearest-exact"),
-                ops.Conv2d(dim, dim, 3, padding=1),
-            )
-        elif mode == "upsample3d":
-            self.resample = nn.Sequential(
-                nn.Upsample(scale_factor=(2.0, 2.0), mode="nearest-exact"),
-                ops.Conv2d(dim, dim, 3, padding=1),
-                # ops.Conv2d(dim, dim//2, 3, padding=1)
-            )
-            self.time_conv = CausalConv3d(
-                dim, dim * 2, (3, 1, 1), padding=(1, 0, 0))
-        elif mode == "downsample2d":
-            self.resample = nn.Sequential(
-                nn.ZeroPad2d((0, 1, 0, 1)),
-                ops.Conv2d(dim, dim, 3, stride=(2, 2)))
-        elif mode == "downsample3d":
-            self.resample = nn.Sequential(
-                nn.ZeroPad2d((0, 1, 0, 1)),
-                ops.Conv2d(dim, dim, 3, stride=(2, 2)))
-            self.time_conv = CausalConv3d(
-                dim, dim, (3, 1, 1), stride=(2, 1, 1), padding=(0, 0, 0))
-        else:
-            self.resample = nn.Identity()
-
-    def forward(self, x, feat_cache=None, feat_idx=[0]):
-        b, c, t, h, w = x.size()
-        if self.mode == "upsample3d":
-            if feat_cache is not None:
-                idx = feat_idx[0]
-                if feat_cache[idx] is None:
-                    feat_cache[idx] = "Rep"
-                    feat_idx[0] += 1
-                else:
-                    cache_x = x[:, :, -CACHE_T:, :, :].clone()
-                    if (cache_x.shape[2] < 2 and feat_cache[idx] is not None and
-                            feat_cache[idx] != "Rep"):
-                        # cache last frame of last two chunk
-                        cache_x = torch.cat(
-                            [
-                                feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to(
-                                    cache_x.device),
-                                cache_x,
-                            ],
-                            dim=2,
-                        )
-                    if (cache_x.shape[2] < 2 and feat_cache[idx] is not None and
-                            feat_cache[idx] == "Rep"):
-                        cache_x = torch.cat(
-                            [
-                                torch.zeros_like(cache_x).to(cache_x.device),
-                                cache_x
-                            ],
-                            dim=2,
-                        )
-                    if feat_cache[idx] == "Rep":
-                        x = self.time_conv(x)
-                    else:
-                        x = self.time_conv(x, feat_cache[idx])
-                    feat_cache[idx] = cache_x
-                    feat_idx[0] += 1
-                    x = x.reshape(b, 2, c, t, h, w)
-                    x = torch.stack((x[:, 0, :, :, :, :], x[:, 1, :, :, :, :]),
-                                    3)
-                    x = x.reshape(b, c, t * 2, h, w)
-        t = x.shape[2]
-        x = rearrange(x, "b c t h w -> (b t) c h w")
-        x = self.resample(x)
-        x = rearrange(x, "(b t) c h w -> b c t h w", t=t)
-
-        if self.mode == "downsample3d":
-            if feat_cache is not None:
-                idx = feat_idx[0]
-                if feat_cache[idx] is None:
-                    feat_cache[idx] = x.clone()
-                    feat_idx[0] += 1
-                else:
-                    cache_x = x[:, :, -1:, :, :].clone()
-                    x = self.time_conv(
-                        torch.cat([feat_cache[idx][:, :, -1:, :, :], x], 2))
-                    feat_cache[idx] = cache_x
-                    feat_idx[0] += 1
-        return x
-
-
-class ResidualBlock(nn.Module):
-
-    def __init__(self, in_dim, out_dim, dropout=0.0):
-        super().__init__()
-        self.in_dim = in_dim
-        self.out_dim = out_dim
-
-        # layers
-        self.residual = nn.Sequential(
-            RMS_norm(in_dim, images=False),
-            nn.SiLU(),
-            CausalConv3d(in_dim, out_dim, 3, padding=1),
-            RMS_norm(out_dim, images=False),
-            nn.SiLU(),
-            nn.Dropout(dropout),
-            CausalConv3d(out_dim, out_dim, 3, padding=1),
-        )
-        self.shortcut = (
-            CausalConv3d(in_dim, out_dim, 1)
-            if in_dim != out_dim else nn.Identity())
-
-    def forward(self, x, feat_cache=None, feat_idx=[0]):
-        old_x = x
-        for layer in self.residual:
-            if isinstance(layer, CausalConv3d) and feat_cache is not None:
-                idx = feat_idx[0]
-                cache_x = x[:, :, -CACHE_T:, :, :].clone()
-                if cache_x.shape[2] < 2 and feat_cache[idx] is not None:
-                    # cache last frame of last two chunk
-                    cache_x = torch.cat(
-                        [
-                            feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to(
-                                cache_x.device),
-                            cache_x,
-                        ],
-                        dim=2,
-                    )
-                x = layer(x, feat_cache[idx])
-                feat_cache[idx] = cache_x
-                feat_idx[0] += 1
-            else:
-                x = layer(x)
-        return x + self.shortcut(old_x)
-
-
-def patchify(x, patch_size):
-    if patch_size == 1:
-        return x
-    if x.dim() == 4:
-        x = rearrange(
-            x, "b c (h q) (w r) -> b (c r q) h w", q=patch_size, r=patch_size)
-    elif x.dim() == 5:
-        x = rearrange(
-            x,
-            "b c f (h q) (w r) -> b (c r q) f h w",
-            q=patch_size,
-            r=patch_size,
-        )
-    else:
-        raise ValueError(f"Invalid input shape: {x.shape}")
-
-    return x
-
-
-def unpatchify(x, patch_size):
-    if patch_size == 1:
-        return x
-
-    if x.dim() == 4:
-        x = rearrange(
-            x, "b (c r q) h w -> b c (h q) (w r)", q=patch_size, r=patch_size)
-    elif x.dim() == 5:
-        x = rearrange(
-            x,
-            "b (c r q) f h w -> b c f (h q) (w r)",
-            q=patch_size,
-            r=patch_size,
-        )
-    return x
-
-
-class AvgDown3D(nn.Module):
-
-    def __init__(
-        self,
-        in_channels,
-        out_channels,
-        factor_t,
-        factor_s=1,
-    ):
-        super().__init__()
-        self.in_channels = in_channels
-        self.out_channels = out_channels
-        self.factor_t = factor_t
-        self.factor_s = factor_s
-        self.factor = self.factor_t * self.factor_s * self.factor_s
-
-        assert in_channels * self.factor % out_channels == 0
-        self.group_size = in_channels * self.factor // out_channels
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        pad_t = (self.factor_t - x.shape[2] % self.factor_t) % self.factor_t
-        pad = (0, 0, 0, 0, pad_t, 0)
-        x = F.pad(x, pad)
-        B, C, T, H, W = x.shape
-        x = x.view(
-            B,
-            C,
-            T // self.factor_t,
-            self.factor_t,
-            H // self.factor_s,
-            self.factor_s,
-            W // self.factor_s,
-            self.factor_s,
-        )
-        x = x.permute(0, 1, 3, 5, 7, 2, 4, 6).contiguous()
-        x = x.view(
-            B,
-            C * self.factor,
-            T // self.factor_t,
-            H // self.factor_s,
-            W // self.factor_s,
-        )
-        x = x.view(
-            B,
-            self.out_channels,
-            self.group_size,
-            T // self.factor_t,
-            H // self.factor_s,
-            W // self.factor_s,
-        )
-        x = x.mean(dim=2)
-        return x
-
-
-class DupUp3D(nn.Module):
-
-    def __init__(
-        self,
-        in_channels: int,
-        out_channels: int,
-        factor_t,
-        factor_s=1,
-    ):
-        super().__init__()
-        self.in_channels = in_channels
-        self.out_channels = out_channels
-
-        self.factor_t = factor_t
-        self.factor_s = factor_s
-        self.factor = self.factor_t * self.factor_s * self.factor_s
-
-        assert out_channels * self.factor % in_channels == 0
-        self.repeats = out_channels * self.factor // in_channels
-
-    def forward(self, x: torch.Tensor, first_chunk=False) -> torch.Tensor:
-        x = x.repeat_interleave(self.repeats, dim=1)
-        x = x.view(
-            x.size(0),
-            self.out_channels,
-            self.factor_t,
-            self.factor_s,
-            self.factor_s,
-            x.size(2),
-            x.size(3),
-            x.size(4),
-        )
-        x = x.permute(0, 1, 5, 2, 6, 3, 7, 4).contiguous()
-        x = x.view(
-            x.size(0),
-            self.out_channels,
-            x.size(2) * self.factor_t,
-            x.size(4) * self.factor_s,
-            x.size(6) * self.factor_s,
-        )
-        if first_chunk:
-            x = x[:, :, self.factor_t - 1:, :, :]
-        return x
-
-
-class Down_ResidualBlock(nn.Module):
-
-    def __init__(self,
-                 in_dim,
-                 out_dim,
-                 dropout,
-                 mult,
-                 temperal_downsample=False,
-                 down_flag=False):
-        super().__init__()
-
-        # Shortcut path with downsample
-        self.avg_shortcut = AvgDown3D(
-            in_dim,
-            out_dim,
-            factor_t=2 if temperal_downsample else 1,
-            factor_s=2 if down_flag else 1,
-        )
-
-        # Main path with residual blocks and downsample
-        downsamples = []
-        for _ in range(mult):
-            downsamples.append(ResidualBlock(in_dim, out_dim, dropout))
-            in_dim = out_dim
-
-        # Add the final downsample block
-        if down_flag:
-            mode = "downsample3d" if temperal_downsample else "downsample2d"
-            downsamples.append(Resample(out_dim, mode=mode))
-
-        self.downsamples = nn.Sequential(*downsamples)
-
-    def forward(self, x, feat_cache=None, feat_idx=[0]):
-        x_copy = x
-        for module in self.downsamples:
-            x = module(x, feat_cache, feat_idx)
-
-        return x + self.avg_shortcut(x_copy)
-
-
-class Up_ResidualBlock(nn.Module):
-
-    def __init__(self,
-                 in_dim,
-                 out_dim,
-                 dropout,
-                 mult,
-                 temperal_upsample=False,
-                 up_flag=False):
-        super().__init__()
-        # Shortcut path with upsample
-        if up_flag:
-            self.avg_shortcut = DupUp3D(
-                in_dim,
-                out_dim,
-                factor_t=2 if temperal_upsample else 1,
-                factor_s=2 if up_flag else 1,
-            )
-        else:
-            self.avg_shortcut = None
-
-        # Main path with residual blocks and upsample
-        upsamples = []
-        for _ in range(mult):
-            upsamples.append(ResidualBlock(in_dim, out_dim, dropout))
-            in_dim = out_dim
-
-        # Add the final upsample block
-        if up_flag:
-            mode = "upsample3d" if temperal_upsample else "upsample2d"
-            upsamples.append(Resample(out_dim, mode=mode))
-
-        self.upsamples = nn.Sequential(*upsamples)
-
-    def forward(self, x, feat_cache=None, feat_idx=[0], first_chunk=False):
-        x_main = x
-        for module in self.upsamples:
-            x_main = module(x_main, feat_cache, feat_idx)
-        if self.avg_shortcut is not None:
-            x_shortcut = self.avg_shortcut(x, first_chunk)
-            return x_main + x_shortcut
-        else:
-            return x_main
-
-
-class Encoder3d(nn.Module):
-
-    def __init__(
-        self,
-        dim=128,
-        z_dim=4,
-        dim_mult=[1, 2, 4, 4],
-        num_res_blocks=2,
-        attn_scales=[],
-        temperal_downsample=[True, True, False],
-        dropout=0.0,
-    ):
-        super().__init__()
-        self.dim = dim
-        self.z_dim = z_dim
-        self.dim_mult = dim_mult
-        self.num_res_blocks = num_res_blocks
-        self.attn_scales = attn_scales
-        self.temperal_downsample = temperal_downsample
-
-        # dimensions
-        dims = [dim * u for u in [1] + dim_mult]
-        scale = 1.0
-
-        # init block
-        self.conv1 = CausalConv3d(12, dims[0], 3, padding=1)
-
-        # downsample blocks
-        downsamples = []
-        for i, (in_dim, out_dim) in enumerate(zip(dims[:-1], dims[1:])):
-            t_down_flag = (
-                temperal_downsample[i]
-                if i < len(temperal_downsample) else False)
-            downsamples.append(
-                Down_ResidualBlock(
-                    in_dim=in_dim,
-                    out_dim=out_dim,
-                    dropout=dropout,
-                    mult=num_res_blocks,
-                    temperal_downsample=t_down_flag,
-                    down_flag=i != len(dim_mult) - 1,
-                ))
-            scale /= 2.0
-        self.downsamples = nn.Sequential(*downsamples)
-
-        # middle blocks
-        self.middle = nn.Sequential(
-            ResidualBlock(out_dim, out_dim, dropout),
-            AttentionBlock(out_dim),
-            ResidualBlock(out_dim, out_dim, dropout),
-        )
-
-        # # output blocks
-        self.head = nn.Sequential(
-            RMS_norm(out_dim, images=False),
-            nn.SiLU(),
-            CausalConv3d(out_dim, z_dim, 3, padding=1),
-        )
-
-    def forward(self, x, feat_cache=None, feat_idx=[0]):
-
-        if feat_cache is not None:
-            idx = feat_idx[0]
-            cache_x = x[:, :, -CACHE_T:, :, :].clone()
-            if cache_x.shape[2] < 2 and feat_cache[idx] is not None:
-                cache_x = torch.cat(
-                    [
-                        feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to(
-                            cache_x.device),
-                        cache_x,
-                    ],
-                    dim=2,
-                )
-            x = self.conv1(x, feat_cache[idx])
-            feat_cache[idx] = cache_x
-            feat_idx[0] += 1
-        else:
-            x = self.conv1(x)
-
-        ## downsamples
-        for layer in self.downsamples:
-            if feat_cache is not None:
-                x = layer(x, feat_cache, feat_idx)
-            else:
-                x = layer(x)
-
-        ## middle
-        for layer in self.middle:
-            if isinstance(layer, ResidualBlock) and feat_cache is not None:
-                x = layer(x, feat_cache, feat_idx)
-            else:
-                x = layer(x)
-
-        ## head
-        for layer in self.head:
-            if isinstance(layer, CausalConv3d) and feat_cache is not None:
-                idx = feat_idx[0]
-                cache_x = x[:, :, -CACHE_T:, :, :].clone()
-                if cache_x.shape[2] < 2 and feat_cache[idx] is not None:
-                    cache_x = torch.cat(
-                        [
-                            feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to(
-                                cache_x.device),
-                            cache_x,
-                        ],
-                        dim=2,
-                    )
-                x = layer(x, feat_cache[idx])
-                feat_cache[idx] = cache_x
-                feat_idx[0] += 1
-            else:
-                x = layer(x)
-
-        return x
-
-
-class Decoder3d(nn.Module):
-
-    def __init__(
-        self,
-        dim=128,
-        z_dim=4,
-        dim_mult=[1, 2, 4, 4],
-        num_res_blocks=2,
-        attn_scales=[],
-        temperal_upsample=[False, True, True],
-        dropout=0.0,
-    ):
-        super().__init__()
-        self.dim = dim
-        self.z_dim = z_dim
-        self.dim_mult = dim_mult
-        self.num_res_blocks = num_res_blocks
-        self.attn_scales = attn_scales
-        self.temperal_upsample = temperal_upsample
-
-        # dimensions
-        dims = [dim * u for u in [dim_mult[-1]] + dim_mult[::-1]]
-        # init block
-        self.conv1 = CausalConv3d(z_dim, dims[0], 3, padding=1)
-
-        # middle blocks
-        self.middle = nn.Sequential(
-            ResidualBlock(dims[0], dims[0], dropout),
-            AttentionBlock(dims[0]),
-            ResidualBlock(dims[0], dims[0], dropout),
-        )
-
-        # upsample blocks
-        upsamples = []
-        for i, (in_dim, out_dim) in enumerate(zip(dims[:-1], dims[1:])):
-            t_up_flag = temperal_upsample[i] if i < len(
-                temperal_upsample) else False
-            upsamples.append(
-                Up_ResidualBlock(
-                    in_dim=in_dim,
-                    out_dim=out_dim,
-                    dropout=dropout,
-                    mult=num_res_blocks + 1,
-                    temperal_upsample=t_up_flag,
-                    up_flag=i != len(dim_mult) - 1,
-                ))
-        self.upsamples = nn.Sequential(*upsamples)
-
-        # output blocks
-        self.head = nn.Sequential(
-            RMS_norm(out_dim, images=False),
-            nn.SiLU(),
-            CausalConv3d(out_dim, 12, 3, padding=1),
-        )
-
-    def forward(self, x, feat_cache=None, feat_idx=[0], first_chunk=False):
-        if feat_cache is not None:
-            idx = feat_idx[0]
-            cache_x = x[:, :, -CACHE_T:, :, :].clone()
-            if cache_x.shape[2] < 2 and feat_cache[idx] is not None:
-                cache_x = torch.cat(
-                    [
-                        feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to(
-                            cache_x.device),
-                        cache_x,
-                    ],
-                    dim=2,
-                )
-            x = self.conv1(x, feat_cache[idx])
-            feat_cache[idx] = cache_x
-            feat_idx[0] += 1
-        else:
-            x = self.conv1(x)
-
-        for layer in self.middle:
-            if isinstance(layer, ResidualBlock) and feat_cache is not None:
-                x = layer(x, feat_cache, feat_idx)
-            else:
-                x = layer(x)
-
-        ## upsamples
-        for layer in self.upsamples:
-            if feat_cache is not None:
-                x = layer(x, feat_cache, feat_idx, first_chunk)
-            else:
-                x = layer(x)
-
-        ## head
-        for layer in self.head:
-            if isinstance(layer, CausalConv3d) and feat_cache is not None:
-                idx = feat_idx[0]
-                cache_x = x[:, :, -CACHE_T:, :, :].clone()
-                if cache_x.shape[2] < 2 and feat_cache[idx] is not None:
-                    cache_x = torch.cat(
-                        [
-                            feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to(
-                                cache_x.device),
-                            cache_x,
-                        ],
-                        dim=2,
-                    )
-                x = layer(x, feat_cache[idx])
-                feat_cache[idx] = cache_x
-                feat_idx[0] += 1
-            else:
-                x = layer(x)
-        return x
-
-
-def count_conv3d(model):
-    count = 0
-    for m in model.modules():
-        if isinstance(m, CausalConv3d):
-            count += 1
-    return count
-
-
-class WanVAE(nn.Module):
-
-    def __init__(
-        self,
-        dim=160,
-        dec_dim=256,
-        z_dim=16,
-        dim_mult=[1, 2, 4, 4],
-        num_res_blocks=2,
-        attn_scales=[],
-        temperal_downsample=[True, True, False],
-        dropout=0.0,
-    ):
-        super().__init__()
-        self.dim = dim
-        self.z_dim = z_dim
-        self.dim_mult = dim_mult
-        self.num_res_blocks = num_res_blocks
-        self.attn_scales = attn_scales
-        self.temperal_downsample = temperal_downsample
-        self.temperal_upsample = temperal_downsample[::-1]
-
-        # modules
-        self.encoder = Encoder3d(
-            dim,
-            z_dim * 2,
-            dim_mult,
-            num_res_blocks,
-            attn_scales,
-            self.temperal_downsample,
-            dropout,
-        )
-        self.conv1 = CausalConv3d(z_dim * 2, z_dim * 2, 1)
-        self.conv2 = CausalConv3d(z_dim, z_dim, 1)
-        self.decoder = Decoder3d(
-            dec_dim,
-            z_dim,
-            dim_mult,
-            num_res_blocks,
-            attn_scales,
-            self.temperal_upsample,
-            dropout,
-        )
-
-    def encode(self, x):
-        self.clear_cache()
-        x = patchify(x, patch_size=2)
-        t = x.shape[2]
-        iter_ = 1 + (t - 1) // 4
-        for i in range(iter_):
-            self._enc_conv_idx = [0]
-            if i == 0:
-                out = self.encoder(
-                    x[:, :, :1, :, :],
-                    feat_cache=self._enc_feat_map,
-                    feat_idx=self._enc_conv_idx,
-                )
-            else:
-                out_ = self.encoder(
-                    x[:, :, 1 + 4 * (i - 1):1 + 4 * i, :, :],
-                    feat_cache=self._enc_feat_map,
-                    feat_idx=self._enc_conv_idx,
-                )
-                out = torch.cat([out, out_], 2)
-        mu, log_var = self.conv1(out).chunk(2, dim=1)
-        self.clear_cache()
-        return mu
-
-    def decode(self, z):
-        self.clear_cache()
-        iter_ = z.shape[2]
-        x = self.conv2(z)
-        for i in range(iter_):
-            self._conv_idx = [0]
-            if i == 0:
-                out = self.decoder(
-                    x[:, :, i:i + 1, :, :],
-                    feat_cache=self._feat_map,
-                    feat_idx=self._conv_idx,
-                    first_chunk=True,
-                )
-            else:
-                out_ = self.decoder(
-                    x[:, :, i:i + 1, :, :],
-                    feat_cache=self._feat_map,
-                    feat_idx=self._conv_idx,
-                )
-                out = torch.cat([out, out_], 2)
-        out = unpatchify(out, patch_size=2)
-        self.clear_cache()
-        return out
-
-    def reparameterize(self, mu, log_var):
-        std = torch.exp(0.5 * log_var)
-        eps = torch.randn_like(std)
-        return eps * std + mu
-
-    def sample(self, imgs, deterministic=False):
-        mu, log_var = self.encode(imgs)
-        if deterministic:
-            return mu
-        std = torch.exp(0.5 * log_var.clamp(-30.0, 20.0))
-        return mu + std * torch.randn_like(std)
-
-    def clear_cache(self):
-        self._conv_num = count_conv3d(self.decoder)
-        self._conv_idx = [0]
-        self._feat_map = [None] * self._conv_num
-        # cache encode
-        self._enc_conv_num = count_conv3d(self.encoder)
-        self._enc_conv_idx = [0]
-        self._enc_feat_map = [None] * self._enc_conv_num
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@@ -41,7 +41,6 @@ import comfy.ldm.hunyuan3d.model
 import comfy.ldm.hidream.model
 import comfy.ldm.chroma.model
 import comfy.ldm.ace.model
-import comfy.ldm.omnigen.omnigen2

 import comfy.model_management
 import comfy.patcher_extension
@@ -816,7 +815,6 @@ class PixArt(BaseModel):
 class Flux(BaseModel):
    def __init__(self, model_config, model_type=ModelType.FLUX, device=None, unet_model=comfy.ldm.flux.model.Flux):
        super().__init__(model_config, model_type, device=device, unet_model=unet_model)
-        self.memory_usage_factor_conds = ("ref_latents",)

    def concat_cond(self, **kwargs):
        try:
@@ -877,23 +875,8 @@ class Flux(BaseModel):
        guidance = kwargs.get("guidance", 3.5)
        if guidance is not None:
            out['guidance'] = comfy.conds.CONDRegular(torch.FloatTensor([guidance]))
-
-        ref_latents = kwargs.get("reference_latents", None)
-        if ref_latents is not None:
-            latents = []
-            for lat in ref_latents:
-                latents.append(self.process_latent_in(lat))
-            out['ref_latents'] = comfy.conds.CONDList(latents)
        return out

-    def extra_conds_shapes(self, **kwargs):
-        out = {}
-        ref_latents = kwargs.get("reference_latents", None)
-        if ref_latents is not None:
-            out['ref_latents'] = list([1, 16, sum(map(lambda a: math.prod(a.size()), ref_latents)) // 16])
-        return out
-
-
 class GenmoMochi(BaseModel):
    def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
        super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.genmo.joint_model.asymm_models_joint.AsymmDiTJoint)
@@ -1031,32 +1014,9 @@ class CosmosPredict2(BaseModel):
        if cross_attn is not None:
            out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)

-        denoise_mask = kwargs.get("concat_mask", kwargs.get("denoise_mask", None))
-        if denoise_mask is not None:
-            out["denoise_mask"] = comfy.conds.CONDRegular(denoise_mask)
-
        out['fps'] = comfy.conds.CONDConstant(kwargs.get("frame_rate", None))
        return out

-    def process_timestep(self, timestep, x, denoise_mask=None, **kwargs):
-        if denoise_mask is None:
-            return timestep
-        if denoise_mask.ndim <= 4:
-            return timestep
-        condition_video_mask_B_1_T_1_1 = denoise_mask.mean(dim=[1, 3, 4], keepdim=True)
-        c_noise_B_1_T_1_1 = 0.0 * (1.0 - condition_video_mask_B_1_T_1_1) + timestep.reshape(timestep.shape[0], 1, 1, 1, 1) * condition_video_mask_B_1_T_1_1
-        out = c_noise_B_1_T_1_1.squeeze(dim=[1, 3, 4])
-        return out
-
-    def scale_latent_inpaint(self, sigma, noise, latent_image, **kwargs):
-        sigma = sigma.reshape([sigma.shape[0]] + [1] * (len(noise.shape) - 1))
-        sigma_noise_augmentation = 0 #TODO
-        if sigma_noise_augmentation != 0:
-            latent_image = latent_image + noise
-        latent_image = self.model_sampling.calculate_input(torch.tensor([sigma_noise_augmentation], device=latent_image.device, dtype=latent_image.dtype), latent_image)
-        sigma = (sigma / (sigma + 1))
-        return latent_image / (1.0 - sigma)
-
 class Lumina2(BaseModel):
    def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
        super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.lumina.model.NextDiT)
@@ -1097,9 +1057,8 @@ class WAN21(BaseModel):
                image[:, i: i + 16] = self.process_latent_in(image[:, i: i + 16])
            image = utils.resize_to_batch_size(image, noise.shape[0])

-        if extra_channels != image.shape[1] + 4:
-            if not self.image_to_video or extra_channels == image.shape[1]:
-                return image
+        if not self.image_to_video or extra_channels == image.shape[1]:
+            return image

        if image.shape[1] > (extra_channels - 4):
            image = image[:, :(extra_channels - 4)]
@@ -1183,31 +1142,6 @@ class WAN21_Camera(WAN21):
            out['camera_conditions'] = comfy.conds.CONDRegular(camera_conditions)
        return out

-class WAN22(BaseModel):
-    def __init__(self, model_config, model_type=ModelType.FLOW, image_to_video=False, device=None):
-        super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.wan.model.WanModel)
-        self.image_to_video = image_to_video
-
-    def extra_conds(self, **kwargs):
-        out = super().extra_conds(**kwargs)
-        cross_attn = kwargs.get("cross_attn", None)
-        if cross_attn is not None:
-            out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
-
-        denoise_mask = kwargs.get("concat_mask", kwargs.get("denoise_mask", None))
-        if denoise_mask is not None:
-            out["denoise_mask"] = comfy.conds.CONDRegular(denoise_mask)
-        return out
-
-    def process_timestep(self, timestep, x, denoise_mask=None, **kwargs):
-        if denoise_mask is None:
-            return timestep
-        temp_ts = (torch.mean(denoise_mask[:, :, :, :, :], dim=(1, 3, 4), keepdim=True) * timestep.view([timestep.shape[0]] + [1] * (denoise_mask.ndim - 1))).reshape(timestep.shape[0], -1)
-        return temp_ts
-
-    def scale_latent_inpaint(self, sigma, noise, latent_image, **kwargs):
-        return latent_image
-
 class Hunyuan3Dv2(BaseModel):
    def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
        super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.hunyuan3d.model.Hunyuan3Dv2)
@@ -1273,33 +1207,3 @@ class ACEStep(BaseModel):
        out['speaker_embeds'] = comfy.conds.CONDRegular(torch.zeros(noise.shape[0], 512, device=noise.device, dtype=noise.dtype))
        out['lyrics_strength'] = comfy.conds.CONDConstant(kwargs.get("lyrics_strength", 1.0))
        return out
-
-class Omnigen2(BaseModel):
-    def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
-        super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.omnigen.omnigen2.OmniGen2Transformer2DModel)
-        self.memory_usage_factor_conds = ("ref_latents",)
-
-    def extra_conds(self, **kwargs):
-        out = super().extra_conds(**kwargs)
-        attention_mask = kwargs.get("attention_mask", None)
-        if attention_mask is not None:
-            if torch.numel(attention_mask) != attention_mask.sum():
-                out['attention_mask'] = comfy.conds.CONDRegular(attention_mask)
-            out['num_tokens'] = comfy.conds.CONDConstant(max(1, torch.sum(attention_mask).item()))
-        cross_attn = kwargs.get("cross_attn", None)
-        if cross_attn is not None:
-            out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
-        ref_latents = kwargs.get("reference_latents", None)
-        if ref_latents is not None:
-            latents = []
-            for lat in ref_latents:
-                latents.append(self.process_latent_in(lat))
-            out['ref_latents'] = comfy.conds.CONDList(latents)
-        return out
-
-    def extra_conds_shapes(self, **kwargs):
-        out = {}
-        ref_latents = kwargs.get("reference_latents", None)
-        if ref_latents is not None:
-            out['ref_latents'] = list([1, 16, sum(map(lambda a: math.prod(a.size()), ref_latents)) // 16])
-        return out
--- a/comfy/model_detection.py
+++ b/comfy/model_detection.py
@@ -346,9 +346,7 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
        dit_config = {}
        dit_config["image_model"] = "wan2.1"
        dim = state_dict['{}head.modulation'.format(key_prefix)].shape[-1]
-        out_dim = state_dict['{}head.head.weight'.format(key_prefix)].shape[0] // 4
        dit_config["dim"] = dim
-        dit_config["out_dim"] = out_dim
        dit_config["num_heads"] = dim // 128
        dit_config["ffn_dim"] = state_dict['{}blocks.0.ffn.0.weight'.format(key_prefix)].shape[0]
        dit_config["num_layers"] = count_blocks(state_dict_keys, '{}blocks.'.format(key_prefix) + '{}.')
@@ -443,16 +441,11 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
            dit_config["rope_h_extrapolation_ratio"] = 4.0
            dit_config["rope_w_extrapolation_ratio"] = 4.0
            dit_config["rope_t_extrapolation_ratio"] = 1.0
-        elif dit_config["in_channels"] == 17: # img to video
-            if dit_config["model_channels"] == 2048:
-                dit_config["extra_per_block_abs_pos_emb"] = False
-                dit_config["rope_h_extrapolation_ratio"] = 3.0
-                dit_config["rope_w_extrapolation_ratio"] = 3.0
-                dit_config["rope_t_extrapolation_ratio"] = 1.0
-            elif dit_config["model_channels"] == 5120:
-                dit_config["rope_h_extrapolation_ratio"] = 2.0
-                dit_config["rope_w_extrapolation_ratio"] = 2.0
-                dit_config["rope_t_extrapolation_ratio"] = 0.8333333333333334
+        elif dit_config["in_channels"] == 17:
+            dit_config["extra_per_block_abs_pos_emb"] = False
+            dit_config["rope_h_extrapolation_ratio"] = 3.0
+            dit_config["rope_w_extrapolation_ratio"] = 3.0
+            dit_config["rope_t_extrapolation_ratio"] = 1.0

        dit_config["extra_h_extrapolation_ratio"] = 1.0
        dit_config["extra_w_extrapolation_ratio"] = 1.0
@@ -461,26 +454,6 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):

        return dit_config

-    if '{}time_caption_embed.timestep_embedder.linear_1.bias'.format(key_prefix) in state_dict_keys:  # Omnigen2
-        dit_config = {}
-        dit_config["image_model"] = "omnigen2"
-        dit_config["axes_dim_rope"] = [40, 40, 40]
-        dit_config["axes_lens"] = [1024, 1664, 1664]
-        dit_config["ffn_dim_multiplier"] = None
-        dit_config["hidden_size"] = 2520
-        dit_config["in_channels"] = 16
-        dit_config["multiple_of"] = 256
-        dit_config["norm_eps"] = 1e-05
-        dit_config["num_attention_heads"] = 21
-        dit_config["num_kv_heads"] = 7
-        dit_config["num_layers"] = 32
-        dit_config["num_refiner_layers"] = 2
-        dit_config["out_channels"] = None
-        dit_config["patch_size"] = 2
-        dit_config["text_feat_dim"] = 2048
-        dit_config["timestep_scale"] = 1000.0
-        return dit_config
-
    if '{}input_blocks.0.0.weight'.format(key_prefix) not in state_dict_keys:
        return None

--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -101,7 +101,7 @@ if args.directml is not None:
    lowvram_available = False #TODO: need to find a way to get free memory in directml before this can be enabled by default.

 try:
-    import intel_extension_for_pytorch as ipex  # noqa: F401
+    import intel_extension_for_pytorch as ipex
    _ = torch.xpu.device_count()
    xpu_available = xpu_available or torch.xpu.is_available()
 except:
@@ -128,11 +128,6 @@ try:
 except:
    mlu_available = False

-try:
-    ixuca_available = hasattr(torch, "corex")
-except:
-    ixuca_available = False
-
 if args.cpu:
    cpu_state = CPUState.CPU

@@ -156,12 +151,6 @@ def is_mlu():
        return True
    return False

-def is_ixuca():
-    global ixuca_available
-    if ixuca_available:
-        return True
-    return False
-
 def get_torch_device():
    global directml_enabled
    global cpu_state
@@ -197,9 +186,8 @@ def get_total_memory(dev=None, torch_total_too=False):
        elif is_intel_xpu():
            stats = torch.xpu.memory_stats(dev)
            mem_reserved = stats['reserved_bytes.all.current']
-            mem_total_xpu = torch.xpu.get_device_properties(dev).total_memory
            mem_total_torch = mem_reserved
-            mem_total = mem_total_xpu
+            mem_total = torch.xpu.get_device_properties(dev).total_memory
        elif is_ascend_npu():
            stats = torch.npu.memory_stats(dev)
            mem_reserved = stats['reserved_bytes.all.current']
@@ -300,7 +288,7 @@ try:
        if torch_version_numeric[0] >= 2:
            if ENABLE_PYTORCH_ATTENTION == False and args.use_split_cross_attention == False and args.use_quad_cross_attention == False:
                ENABLE_PYTORCH_ATTENTION = True
-    if is_intel_xpu() or is_ascend_npu() or is_mlu() or is_ixuca():
+    if is_intel_xpu() or is_ascend_npu() or is_mlu():
        if args.use_split_cross_attention == False and args.use_quad_cross_attention == False:
            ENABLE_PYTORCH_ATTENTION = True
 except:
@@ -319,10 +307,7 @@ try:
        logging.info("ROCm version: {}".format(rocm_version))
        if args.use_split_cross_attention == False and args.use_quad_cross_attention == False:
            if torch_version_numeric >= (2, 7):  # works on 2.6 but doesn't actually seem to improve much
-                if any((a in arch) for a in ["gfx90a", "gfx942", "gfx1100", "gfx1101", "gfx1151"]):  # TODO: more arches, TODO: gfx950
-                    ENABLE_PYTORCH_ATTENTION = True
-            if torch_version_numeric >= (2, 8):
-                if any((a in arch) for a in ["gfx1201"]):
+                if any((a in arch) for a in ["gfx90a", "gfx942", "gfx1100", "gfx1101", "gfx1151"]):  # TODO: more arches, TODO: gfx1201 and gfx950
                    ENABLE_PYTORCH_ATTENTION = True
        if torch_version_numeric >= (2, 7) and rocm_version >= (6, 4):
            if any((a in arch) for a in ["gfx1201", "gfx942", "gfx950"]):  # TODO: more arches
@@ -392,8 +377,6 @@ def get_torch_device_name(device):
            except:
                allocator_backend = ""
            return "{} {} : {}".format(device, torch.cuda.get_device_name(device), allocator_backend)
-        elif device.type == "xpu":
-            return "{} {}".format(device, torch.xpu.get_device_name(device))
        else:
            return "{}".format(device.type)
    elif is_intel_xpu():
@@ -529,8 +512,6 @@ WINDOWS = any(platform.win32_ver())
 EXTRA_RESERVED_VRAM = 400 * 1024 * 1024
 if WINDOWS:
    EXTRA_RESERVED_VRAM = 600 * 1024 * 1024 #Windows is higher because of the shared vram issue
-    if total_vram > (15 * 1024):  # more extra reserved vram on 16GB+ cards
-        EXTRA_RESERVED_VRAM += 100 * 1024 * 1024

 if args.reserve_vram is not None:
    EXTRA_RESERVED_VRAM = args.reserve_vram * 1024 * 1024 * 1024
@@ -895,7 +876,6 @@ def vae_dtype(device=None, allowed_dtypes=[]):
            return d

        # NOTE: bfloat16 seems to work on AMD for the VAE but is extremely slow in some cases compared to fp32
-        # slowness still a problem on pytorch nightly 2.9.0.dev20250720+rocm6.4 tested on RDNA3
        if d == torch.bfloat16 and (not is_amd()) and should_use_bf16(device):
            return d

@@ -949,7 +929,7 @@ def device_supports_non_blocking(device):
    if is_device_mps(device):
        return False #pytorch bug? mps doesn't support non blocking
    if is_intel_xpu():
-        return True
+        return False
    if args.deterministic: #TODO: figure out why deterministic breaks non blocking from gpu to cpu (previews)
        return False
    if directml_enabled:
@@ -988,8 +968,6 @@ def get_offload_stream(device):
        stream_counter = (stream_counter + 1) % len(ss)
        if is_device_cuda(device):
            ss[stream_counter].wait_stream(torch.cuda.current_stream())
-        elif is_device_xpu(device):
-            ss[stream_counter].wait_stream(torch.xpu.current_stream())
        stream_counters[device] = stream_counter
        return s
    elif is_device_cuda(device):
@@ -1001,15 +979,6 @@ def get_offload_stream(device):
        stream_counter = (stream_counter + 1) % len(ss)
        stream_counters[device] = stream_counter
        return s
-    elif is_device_xpu(device):
-        ss = []
-        for k in range(NUM_STREAMS):
-            ss.append(torch.xpu.Stream(device=device, priority=0))
-        STREAMS[device] = ss
-        s = ss[stream_counter]
-        stream_counter = (stream_counter + 1) % len(ss)
-        stream_counters[device] = stream_counter
-        return s
    return None

 def sync_stream(device, stream):
@@ -1017,8 +986,6 @@ def sync_stream(device, stream):
        return
    if is_device_cuda(device):
        torch.cuda.current_stream().wait_stream(stream)
-    elif is_device_xpu(device):
-        torch.xpu.current_stream().wait_stream(stream)

 def cast_to(weight, dtype=None, device=None, non_blocking=False, copy=False, stream=None):
    if device is None or weight.device == device:
@@ -1060,8 +1027,6 @@ def xformers_enabled():
        return False
    if is_mlu():
        return False
-    if is_ixuca():
-        return False
    if directml_enabled:
        return False
    return XFORMERS_IS_AVAILABLE
@@ -1097,8 +1062,6 @@ def pytorch_attention_flash_attention():
            return True
        if is_amd():
            return True #if you have pytorch attention enabled on AMD it probably supports at least mem efficient attention
-        if is_ixuca():
-            return True
    return False

 def force_upcast_attention_dtype():
@@ -1129,8 +1092,8 @@ def get_free_memory(dev=None, torch_free_too=False):
            stats = torch.xpu.memory_stats(dev)
            mem_active = stats['active_bytes.all.current']
            mem_reserved = stats['reserved_bytes.all.current']
-            mem_free_xpu = torch.xpu.get_device_properties(dev).total_memory - mem_reserved
            mem_free_torch = mem_reserved - mem_active
+            mem_free_xpu = torch.xpu.get_device_properties(dev).total_memory - mem_reserved
            mem_free_total = mem_free_xpu + mem_free_torch
        elif is_ascend_npu():
            stats = torch.npu.memory_stats(dev)
@@ -1179,9 +1142,6 @@ def is_device_cpu(device):
 def is_device_mps(device):
    return is_device_type(device, 'mps')

-def is_device_xpu(device):
-    return is_device_type(device, 'xpu')
-
 def is_device_cuda(device):
    return is_device_type(device, 'cuda')

@@ -1213,10 +1173,7 @@ def should_use_fp16(device=None, model_params=0, prioritize_performance=True, ma
        return False

    if is_intel_xpu():
-        if torch_version_numeric < (2, 3):
-            return True
-        else:
-            return torch.xpu.get_device_properties(device).has_fp16
+        return True

    if is_ascend_npu():
        return True
@@ -1224,9 +1181,6 @@ def should_use_fp16(device=None, model_params=0, prioritize_performance=True, ma
    if is_mlu():
        return True

-    if is_ixuca():
-        return True
-
    if torch.version.hip:
        return True

@@ -1282,15 +1236,9 @@ def should_use_bf16(device=None, model_params=0, prioritize_performance=True, ma
        return False

    if is_intel_xpu():
-        if torch_version_numeric < (2, 6):
-            return True
-        else:
-            return torch.xpu.get_device_capability(device)['has_bfloat16_conversions']
-
-    if is_ascend_npu():
        return True

-    if is_ixuca():
+    if is_ascend_npu():
        return True

    if is_amd():
@@ -1342,13 +1290,6 @@ def supports_fp8_compute(device=None):

    return True

-def extended_fp16_support():
-    # TODO: check why some models work with fp16 on newer torch versions but not on older
-    if torch_version_numeric < (2, 7):
-        return False
-
-    return True
-
 def soft_empty_cache(force=False):
    global cpu_state
    if cpu_state == CPUState.MPS:
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@@ -379,9 +379,6 @@ class ModelPatcher:
    def set_model_sampler_pre_cfg_function(self, pre_cfg_function, disable_cfg1_optimization=False):
        self.model_options = set_model_options_pre_cfg_function(self.model_options, pre_cfg_function, disable_cfg1_optimization)

-    def set_model_sampler_calc_cond_batch_function(self, sampler_calc_cond_batch_function):
-        self.model_options["sampler_calc_cond_batch_function"] = sampler_calc_cond_batch_function
-
    def set_model_unet_function_wrapper(self, unet_wrapper_function: UnetWrapperFunction):
        self.model_options["model_function_wrapper"] = unet_wrapper_function

--- a/comfy/ops.py
+++ b/comfy/ops.py
@@ -336,12 +336,9 @@ class fp8_ops(manual_cast):
            return None

        def forward_comfy_cast_weights(self, input):
-            try:
-                out = fp8_linear(self, input)
-                if out is not None:
-                    return out
-            except Exception as e:
-                logging.info("Exception during fp8 op: {}".format(e))
+            out = fp8_linear(self, input)
+            if out is not None:
+                return out

            weight, bias = cast_bias_weight(self, input)
            return torch.nn.functional.linear(input, weight, bias)
--- a/comfy/samplers.py
+++ b/comfy/samplers.py
@@ -373,11 +373,7 @@ def sampling_function(model, x, timestep, uncond, cond, cond_scale, model_option
        uncond_ = uncond

    conds = [cond, uncond_]
-    if "sampler_calc_cond_batch_function" in model_options:
-        args = {"conds": conds, "input": x, "sigma": timestep, "model": model, "model_options": model_options}
-        out = model_options["sampler_calc_cond_batch_function"](args)
-    else:
-        out = calc_cond_batch(model, conds, x, timestep, model_options)
+    out = calc_cond_batch(model, conds, x, timestep, model_options)

    for fn in model_options.get("sampler_pre_cfg_function", []):
        args = {"conds":conds, "conds_out": out, "cond_scale": cond_scale, "timestep": timestep,
@@ -720,7 +716,7 @@ KSAMPLER_NAMES = ["euler", "euler_cfg_pp", "euler_ancestral", "euler_ancestral_c
                  "lms", "dpm_fast", "dpm_adaptive", "dpmpp_2s_ancestral", "dpmpp_2s_ancestral_cfg_pp", "dpmpp_sde", "dpmpp_sde_gpu",
                  "dpmpp_2m", "dpmpp_2m_cfg_pp", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "dpmpp_3m_sde", "dpmpp_3m_sde_gpu", "ddpm", "lcm",
                  "ipndm", "ipndm_v", "deis", "res_multistep", "res_multistep_cfg_pp", "res_multistep_ancestral", "res_multistep_ancestral_cfg_pp",
-                  "gradient_estimation", "gradient_estimation_cfg_pp", "er_sde", "seeds_2", "seeds_3", "sa_solver", "sa_solver_pece"]
+                  "gradient_estimation", "gradient_estimation_cfg_pp", "er_sde", "seeds_2", "seeds_3"]

 class KSAMPLER(Sampler):
    def __init__(self, sampler_function, extra_options={}, inpaint_options={}):
@@ -1043,13 +1039,13 @@ class SchedulerHandler(NamedTuple):
    use_ms: bool = True

 SCHEDULER_HANDLERS = {
-    "simple": SchedulerHandler(simple_scheduler),
-    "sgm_uniform": SchedulerHandler(partial(normal_scheduler, sgm=True)),
+    "normal": SchedulerHandler(normal_scheduler),
    "karras": SchedulerHandler(k_diffusion_sampling.get_sigmas_karras, use_ms=False),
    "exponential": SchedulerHandler(k_diffusion_sampling.get_sigmas_exponential, use_ms=False),
+    "sgm_uniform": SchedulerHandler(partial(normal_scheduler, sgm=True)),
+    "simple": SchedulerHandler(simple_scheduler),
    "ddim_uniform": SchedulerHandler(ddim_scheduler),
    "beta": SchedulerHandler(beta_scheduler),
-    "normal": SchedulerHandler(normal_scheduler),
    "linear_quadratic": SchedulerHandler(linear_quadratic_schedule),
    "kl_optimal": SchedulerHandler(kl_optimal_scheduler, use_ms=False),
 }
--- a/comfy/sd.py
+++ b/comfy/sd.py
@@ -14,12 +14,10 @@ import comfy.ldm.genmo.vae.model
 import comfy.ldm.lightricks.vae.causal_video_autoencoder
 import comfy.ldm.cosmos.vae
 import comfy.ldm.wan.vae
-import comfy.ldm.wan.vae2_2
 import comfy.ldm.hunyuan3d.vae
 import comfy.ldm.ace.vae.music_dcae_pipeline
 import yaml
 import math
-import os

 import comfy.utils

@@ -46,7 +44,6 @@ import comfy.text_encoders.lumina2
 import comfy.text_encoders.wan
 import comfy.text_encoders.hidream
 import comfy.text_encoders.ace
-import comfy.text_encoders.omnigen2

 import comfy.model_patcher
 import comfy.lora
@@ -421,30 +418,17 @@ class VAE:
                self.memory_used_encode = lambda shape, dtype: (50 * (round((shape[2] + 7) / 8) * 8) * shape[3] * shape[4]) * model_management.dtype_size(dtype)
                self.working_dtypes = [torch.bfloat16, torch.float32]
            elif "decoder.middle.0.residual.0.gamma" in sd:
-                if "decoder.upsamples.0.upsamples.0.residual.2.weight" in sd:  # Wan 2.2 VAE
-                    self.upscale_ratio = (lambda a: max(0, a * 4 - 3), 16, 16)
-                    self.upscale_index_formula = (4, 16, 16)
-                    self.downscale_ratio = (lambda a: max(0, math.floor((a + 3) / 4)), 16, 16)
-                    self.downscale_index_formula = (4, 16, 16)
-                    self.latent_dim = 3
-                    self.latent_channels = 48
-                    ddconfig = {"dim": 160, "z_dim": self.latent_channels, "dim_mult": [1, 2, 4, 4], "num_res_blocks": 2, "attn_scales": [], "temperal_downsample": [False, True, True], "dropout": 0.0}
-                    self.first_stage_model = comfy.ldm.wan.vae2_2.WanVAE(**ddconfig)
-                    self.working_dtypes = [torch.bfloat16, torch.float16, torch.float32]
-                    self.memory_used_encode = lambda shape, dtype: 3300 * shape[3] * shape[4] * model_management.dtype_size(dtype)
-                    self.memory_used_decode = lambda shape, dtype: 8000 * shape[3] * shape[4] * (16 * 16) * model_management.dtype_size(dtype)
-                else:  # Wan 2.1 VAE
-                    self.upscale_ratio = (lambda a: max(0, a * 4 - 3), 8, 8)
-                    self.upscale_index_formula = (4, 8, 8)
-                    self.downscale_ratio = (lambda a: max(0, math.floor((a + 3) / 4)), 8, 8)
-                    self.downscale_index_formula = (4, 8, 8)
-                    self.latent_dim = 3
-                    self.latent_channels = 16
-                    ddconfig = {"dim": 96, "z_dim": self.latent_channels, "dim_mult": [1, 2, 4, 4], "num_res_blocks": 2, "attn_scales": [], "temperal_downsample": [False, True, True], "dropout": 0.0}
-                    self.first_stage_model = comfy.ldm.wan.vae.WanVAE(**ddconfig)
-                    self.working_dtypes = [torch.bfloat16, torch.float16, torch.float32]
-                    self.memory_used_encode = lambda shape, dtype: 6000 * shape[3] * shape[4] * model_management.dtype_size(dtype)
-                    self.memory_used_decode = lambda shape, dtype: 7000 * shape[3] * shape[4] * (8 * 8) * model_management.dtype_size(dtype)
+                self.upscale_ratio = (lambda a: max(0, a * 4 - 3), 8, 8)
+                self.upscale_index_formula = (4, 8, 8)
+                self.downscale_ratio = (lambda a: max(0, math.floor((a + 3) / 4)), 8, 8)
+                self.downscale_index_formula = (4, 8, 8)
+                self.latent_dim = 3
+                self.latent_channels = 16
+                ddconfig = {"dim": 96, "z_dim": self.latent_channels, "dim_mult": [1, 2, 4, 4], "num_res_blocks": 2, "attn_scales": [], "temperal_downsample": [False, True, True], "dropout": 0.0}
+                self.first_stage_model = comfy.ldm.wan.vae.WanVAE(**ddconfig)
+                self.working_dtypes = [torch.bfloat16, torch.float16, torch.float32]
+                self.memory_used_encode = lambda shape, dtype: 6000 * shape[3] * shape[4] * model_management.dtype_size(dtype)
+                self.memory_used_decode = lambda shape, dtype: 7000 * shape[3] * shape[4] * (8 * 8) * model_management.dtype_size(dtype)
            elif "geo_decoder.cross_attn_decoder.ln_1.bias" in sd:
                self.latent_dim = 1
                ln_post = "geo_decoder.ln_post.weight" in sd
@@ -770,7 +754,6 @@ class CLIPType(Enum):
    HIDREAM = 14
    CHROMA = 15
    ACE = 16
-    OMNIGEN2 = 17


 def load_clip(ckpt_paths, embedding_directory=None, clip_type=CLIPType.STABLE_DIFFUSION, model_options={}):
@@ -790,7 +773,6 @@ class TEModel(Enum):
    LLAMA3_8 = 7
    T5_XXL_OLD = 8
    GEMMA_2_2B = 9
-    QWEN25_3B = 10

 def detect_te_model(sd):
    if "text_model.encoder.layers.30.mlp.fc1.weight" in sd:
@@ -811,8 +793,6 @@ def detect_te_model(sd):
        return TEModel.T5_BASE
    if 'model.layers.0.post_feedforward_layernorm.weight' in sd:
        return TEModel.GEMMA_2_2B
-    if 'model.layers.0.self_attn.k_proj.bias' in sd:
-        return TEModel.QWEN25_3B
    if "model.layers.0.post_attention_layernorm.weight" in sd:
        return TEModel.LLAMA3_8
    return None
@@ -914,9 +894,6 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip
            clip_target.clip = comfy.text_encoders.hidream.hidream_clip(**llama_detect(clip_data),
                                                                        clip_l=False, clip_g=False, t5=False, llama=True, dtype_t5=None, t5xxl_scaled_fp8=None)
            clip_target.tokenizer = comfy.text_encoders.hidream.HiDreamTokenizer
-        elif te_model == TEModel.QWEN25_3B:
-            clip_target.clip = comfy.text_encoders.omnigen2.te(**llama_detect(clip_data))
-            clip_target.tokenizer = comfy.text_encoders.omnigen2.Omnigen2Tokenizer
        else:
            # clip_l
            if clip_type == CLIPType.SD3:
@@ -992,12 +969,6 @@ def load_gligen(ckpt_path):
        model = model.half()
    return comfy.model_patcher.ModelPatcher(model, load_device=model_management.get_torch_device(), offload_device=model_management.unet_offload_device())

-def model_detection_error_hint(path, state_dict):
-    filename = os.path.basename(path)
-    if 'lora' in filename.lower():
-        return "\nHINT: This seems to be a Lora file and Lora files should be put in the lora folder and loaded with a lora loader node.."
-    return ""
-
 def load_checkpoint(config_path=None, ckpt_path=None, output_vae=True, output_clip=True, embedding_directory=None, state_dict=None, config=None):
    logging.warning("Warning: The load checkpoint with config function is deprecated and will eventually be removed, please use the other one.")
    model, clip, vae, _ = load_checkpoint_guess_config(ckpt_path, output_vae=output_vae, output_clip=output_clip, output_clipvision=False, embedding_directory=embedding_directory, output_model=True)
@@ -1026,7 +997,7 @@ def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, o
    sd, metadata = comfy.utils.load_torch_file(ckpt_path, return_metadata=True)
    out = load_state_dict_guess_config(sd, output_vae, output_clip, output_clipvision, embedding_directory, output_model, model_options, te_model_options=te_model_options, metadata=metadata)
    if out is None:
-        raise RuntimeError("ERROR: Could not detect model type of: {}\n{}".format(ckpt_path, model_detection_error_hint(ckpt_path, sd)))
+        raise RuntimeError("ERROR: Could not detect model type of: {}".format(ckpt_path))
    return out

 def load_state_dict_guess_config(sd, output_vae=True, output_clip=True, output_clipvision=False, embedding_directory=None, output_model=True, model_options={}, te_model_options={}, metadata=None):
@@ -1189,7 +1160,7 @@ def load_diffusion_model_state_dict(sd, model_options={}):
    model.load_model_weights(new_sd, "")
    left_over = sd.keys()
    if len(left_over) > 0:
-        logging.info("left over keys in diffusion model: {}".format(left_over))
+        logging.info("left over keys in unet: {}".format(left_over))
    return comfy.model_patcher.ModelPatcher(model, load_device=load_device, offload_device=offload_device)


@@ -1197,8 +1168,8 @@ def load_diffusion_model(unet_path, model_options={}):
    sd = comfy.utils.load_torch_file(unet_path)
    model = load_diffusion_model_state_dict(sd, model_options=model_options)
    if model is None:
-        logging.error("ERROR UNSUPPORTED DIFFUSION MODEL {}".format(unet_path))
-        raise RuntimeError("ERROR: Could not detect model type of: {}\n{}".format(unet_path, model_detection_error_hint(unet_path, sd)))
+        logging.error("ERROR UNSUPPORTED UNET {}".format(unet_path))
+        raise RuntimeError("ERROR: Could not detect model type of: {}".format(unet_path))
    return model

 def load_unet(unet_path, dtype=None):
--- a/comfy/sd1_clip.py
+++ b/comfy/sd1_clip.py
@@ -462,7 +462,7 @@ class SDTokenizer:
            tokenizer_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "sd1_tokenizer")
        self.tokenizer = tokenizer_class.from_pretrained(tokenizer_path, **tokenizer_args)
        self.max_length = tokenizer_data.get("{}_max_length".format(embedding_key), max_length)
-        self.min_length = tokenizer_data.get("{}_min_length".format(embedding_key), min_length)
+        self.min_length = min_length
        self.end_token = None
        self.min_padding = min_padding

@@ -482,8 +482,7 @@ class SDTokenizer:
            if end_token is not None:
                self.end_token = end_token
            else:
-                if has_end_token:
-                    self.end_token = empty[0]
+                self.end_token = empty[0]

        if pad_token is not None:
            self.pad_token = pad_token
--- a/comfy/sd1_tokenizer/tokenizer_config.json
+++ b/comfy/sd1_tokenizer/tokenizer_config.json
@@ -18,7 +18,7 @@
    "single_word": false
  },
  "errors": "replace",
-  "model_max_length": 8192,
+  "model_max_length": 77,
  "name_or_path": "openai/clip-vit-large-patch14",
  "pad_token": "<|endoftext|>",
  "special_tokens_map_file": "./special_tokens_map.json",
--- a/comfy/supported_models.py
+++ b/comfy/supported_models.py
@@ -18,7 +18,6 @@ import comfy.text_encoders.cosmos
 import comfy.text_encoders.lumina2
 import comfy.text_encoders.wan
 import comfy.text_encoders.ace
-import comfy.text_encoders.omnigen2

 from . import supported_models_base
 from . import latent_formats
@@ -1059,19 +1058,6 @@ class WAN21_Vace(WAN21_T2V):
        out = model_base.WAN21_Vace(self, image_to_video=False, device=device)
        return out

-class WAN22_T2V(WAN21_T2V):
-    unet_config = {
-        "image_model": "wan2.1",
-        "model_type": "t2v",
-        "out_dim": 48,
-    }
-
-    latent_format = latent_formats.Wan22
-
-    def get_model(self, state_dict, prefix="", device=None):
-        out = model_base.WAN22(self, image_to_video=True, device=device)
-        return out
-
 class Hunyuan3Dv2(supported_models_base.BASE):
    unet_config = {
        "image_model": "hunyuan3d2",
@@ -1195,41 +1181,6 @@ class ACEStep(supported_models_base.BASE):
    def clip_target(self, state_dict={}):
        return supported_models_base.ClipTarget(comfy.text_encoders.ace.AceT5Tokenizer, comfy.text_encoders.ace.AceT5Model)

-class Omnigen2(supported_models_base.BASE):
-    unet_config = {
-        "image_model": "omnigen2",
-    }
-
-    sampling_settings = {
-        "multiplier": 1.0,
-        "shift": 2.6,
-    }
-
-    memory_usage_factor = 1.65 #TODO
-
-    unet_extra_config = {}
-    latent_format = latent_formats.Flux
-
-    supported_inference_dtypes = [torch.bfloat16, torch.float32]
-
-    vae_key_prefix = ["vae."]
-    text_encoder_key_prefix = ["text_encoders."]
-
-    def __init__(self, unet_config):
-        super().__init__(unet_config)
-        if comfy.model_management.extended_fp16_support():
-            self.supported_inference_dtypes = [torch.float16] + self.supported_inference_dtypes
-
-    def get_model(self, state_dict, prefix="", device=None):
-        out = model_base.Omnigen2(self, device=device)
-        return out
-
-    def clip_target(self, state_dict={}):
-        pref = self.text_encoder_key_prefix[0]
-        hunyuan_detect = comfy.text_encoders.hunyuan_video.llama_detect(state_dict, "{}qwen25_3b.transformer.".format(pref))
-        return supported_models_base.ClipTarget(comfy.text_encoders.omnigen2.Omnigen2Tokenizer, comfy.text_encoders.omnigen2.te(**hunyuan_detect))
-
-
-models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, FluxSchnell, GenmoMochi, LTXV, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, CosmosT2IPredict2, CosmosI2VPredict2, Lumina2, WAN22_T2V, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, WAN21_Vace, WAN21_Camera, Hunyuan3Dv2mini, Hunyuan3Dv2, HiDream, Chroma, ACEStep, Omnigen2]
+models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, FluxSchnell, GenmoMochi, LTXV, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, CosmosT2IPredict2, CosmosI2VPredict2, Lumina2, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, WAN21_Vace, WAN21_Camera, Hunyuan3Dv2mini, Hunyuan3Dv2, HiDream, Chroma, ACEStep]

 models += [SVD_img2vid]
--- a/comfy/text_encoders/llama.py
+++ b/comfy/text_encoders/llama.py
@@ -24,24 +24,6 @@ class Llama2Config:
    head_dim = 128
    rms_norm_add = False
    mlp_activation = "silu"
-    qkv_bias = False
-
-@dataclass
-class Qwen25_3BConfig:
-    vocab_size: int = 151936
-    hidden_size: int = 2048
-    intermediate_size: int = 11008
-    num_hidden_layers: int = 36
-    num_attention_heads: int = 16
-    num_key_value_heads: int = 2
-    max_position_embeddings: int = 128000
-    rms_norm_eps: float = 1e-6
-    rope_theta: float = 1000000.0
-    transformer_type: str = "llama"
-    head_dim = 128
-    rms_norm_add = False
-    mlp_activation = "silu"
-    qkv_bias = True

@dataclass
 class Gemma2_2B_Config:
@@ -58,7 +40,6 @@ class Gemma2_2B_Config:
    head_dim = 256
    rms_norm_add = True
    mlp_activation = "gelu_pytorch_tanh"
-    qkv_bias = False

 class RMSNorm(nn.Module):
    def __init__(self, dim: int, eps: float = 1e-5, add=False, device=None, dtype=None):
@@ -117,9 +98,9 @@ class Attention(nn.Module):
        self.inner_size = self.num_heads * self.head_dim

        ops = ops or nn
-        self.q_proj = ops.Linear(config.hidden_size, self.inner_size, bias=config.qkv_bias, device=device, dtype=dtype)
-        self.k_proj = ops.Linear(config.hidden_size, self.num_kv_heads * self.head_dim, bias=config.qkv_bias, device=device, dtype=dtype)
-        self.v_proj = ops.Linear(config.hidden_size, self.num_kv_heads * self.head_dim, bias=config.qkv_bias, device=device, dtype=dtype)
+        self.q_proj = ops.Linear(config.hidden_size, self.inner_size, bias=False, device=device, dtype=dtype)
+        self.k_proj = ops.Linear(config.hidden_size, self.num_kv_heads * self.head_dim, bias=False, device=device, dtype=dtype)
+        self.v_proj = ops.Linear(config.hidden_size, self.num_kv_heads * self.head_dim, bias=False, device=device, dtype=dtype)
        self.o_proj = ops.Linear(self.inner_size, config.hidden_size, bias=False, device=device, dtype=dtype)

    def forward(
@@ -339,14 +320,6 @@ class Llama2(BaseLlama, torch.nn.Module):
        self.model = Llama2_(config, device=device, dtype=dtype, ops=operations)
        self.dtype = dtype

-class Qwen25_3B(BaseLlama, torch.nn.Module):
-    def __init__(self, config_dict, dtype, device, operations):
-        super().__init__()
-        config = Qwen25_3BConfig(**config_dict)
-        self.num_layers = config.num_hidden_layers
-
-        self.model = Llama2_(config, device=device, dtype=dtype, ops=operations)
-        self.dtype = dtype

 class Gemma2_2B(BaseLlama, torch.nn.Module):
    def __init__(self, config_dict, dtype, device, operations):
--- a/comfy/text_encoders/omnigen2.py
+++ b/comfy/text_encoders/omnigen2.py
@@ -1,44 +0,0 @@
-from transformers import Qwen2Tokenizer
-from comfy import sd1_clip
-import comfy.text_encoders.llama
-import os
-
-
-class Qwen25_3BTokenizer(sd1_clip.SDTokenizer):
-    def __init__(self, embedding_directory=None, tokenizer_data={}):
-        tokenizer_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "qwen25_tokenizer")
-        super().__init__(tokenizer_path, pad_with_end=False, embedding_size=2048, embedding_key='qwen25_3b', tokenizer_class=Qwen2Tokenizer, has_start_token=False, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, pad_token=151643, tokenizer_data=tokenizer_data)
-
-
-class Omnigen2Tokenizer(sd1_clip.SD1Tokenizer):
-    def __init__(self, embedding_directory=None, tokenizer_data={}):
-        super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, name="qwen25_3b", tokenizer=Qwen25_3BTokenizer)
-        self.llama_template = '<|im_start|>system\nYou are a helpful assistant that generates high-quality images based on user instructions.<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n'
-
-    def tokenize_with_weights(self, text, return_word_ids=False, llama_template=None,**kwargs):
-        if llama_template is None:
-            llama_text = self.llama_template.format(text)
-        else:
-            llama_text = llama_template.format(text)
-        return super().tokenize_with_weights(llama_text, return_word_ids=return_word_ids, **kwargs)
-
-class Qwen25_3BModel(sd1_clip.SDClipModel):
-    def __init__(self, device="cpu", layer="last", layer_idx=None, dtype=None, attention_mask=True, model_options={}):
-        super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"pad": 151643}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Qwen25_3B, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options)
-
-
-class Omnigen2Model(sd1_clip.SD1ClipModel):
-    def __init__(self, device="cpu", dtype=None, model_options={}):
-        super().__init__(device=device, dtype=dtype, name="qwen25_3b", clip_model=Qwen25_3BModel, model_options=model_options)
-
-
-def te(dtype_llama=None, llama_scaled_fp8=None):
-    class Omnigen2TEModel_(Omnigen2Model):
-        def __init__(self, device="cpu", dtype=None, model_options={}):
-            if llama_scaled_fp8 is not None and "scaled_fp8" not in model_options:
-                model_options = model_options.copy()
-                model_options["scaled_fp8"] = llama_scaled_fp8
-            if dtype_llama is not None:
-                dtype = dtype_llama
-            super().__init__(device=device, dtype=dtype, model_options=model_options)
-    return Omnigen2TEModel_
--- a/comfy/text_encoders/pixart_t5.py
+++ b/comfy/text_encoders/pixart_t5.py
@@ -1,42 +1,42 @@
-import os
-
-from comfy import sd1_clip
-import comfy.text_encoders.t5
-import comfy.text_encoders.sd3_clip
-from comfy.sd1_clip import gen_empty_tokens
-
-from transformers import T5TokenizerFast
-
-class T5XXLModel(comfy.text_encoders.sd3_clip.T5XXLModel):
-    def __init__(self, **kwargs):
-        super().__init__(**kwargs)
-
-    def gen_empty_tokens(self, special_tokens, *args, **kwargs):
-        # PixArt expects the negative to be all pad tokens
-        special_tokens = special_tokens.copy()
-        special_tokens.pop("end")
-        return gen_empty_tokens(special_tokens, *args, **kwargs)
-
-class PixArtT5XXL(sd1_clip.SD1ClipModel):
-    def __init__(self, device="cpu", dtype=None, model_options={}):
-        super().__init__(device=device, dtype=dtype, name="t5xxl", clip_model=T5XXLModel, model_options=model_options)
-
-class T5XXLTokenizer(sd1_clip.SDTokenizer):
-    def __init__(self, embedding_directory=None, tokenizer_data={}):
-        tokenizer_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "t5_tokenizer")
-        super().__init__(tokenizer_path, embedding_directory=embedding_directory, pad_with_end=False, embedding_size=4096, embedding_key='t5xxl', tokenizer_class=T5TokenizerFast, has_start_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, tokenizer_data=tokenizer_data) # no padding
-
-class PixArtTokenizer(sd1_clip.SD1Tokenizer):
-    def __init__(self, embedding_directory=None, tokenizer_data={}):
-        super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, clip_name="t5xxl", tokenizer=T5XXLTokenizer)
-
-def pixart_te(dtype_t5=None, t5xxl_scaled_fp8=None):
-    class PixArtTEModel_(PixArtT5XXL):
-        def __init__(self, device="cpu", dtype=None, model_options={}):
-            if t5xxl_scaled_fp8 is not None and "t5xxl_scaled_fp8" not in model_options:
-                model_options = model_options.copy()
-                model_options["t5xxl_scaled_fp8"] = t5xxl_scaled_fp8
-            if dtype is None:
-                dtype = dtype_t5
-            super().__init__(device=device, dtype=dtype, model_options=model_options)
-    return PixArtTEModel_
+import os
+
+from comfy import sd1_clip
+import comfy.text_encoders.t5
+import comfy.text_encoders.sd3_clip
+from comfy.sd1_clip import gen_empty_tokens
+
+from transformers import T5TokenizerFast
+
+class T5XXLModel(comfy.text_encoders.sd3_clip.T5XXLModel):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+    def gen_empty_tokens(self, special_tokens, *args, **kwargs):
+        # PixArt expects the negative to be all pad tokens
+        special_tokens = special_tokens.copy()
+        special_tokens.pop("end")
+        return gen_empty_tokens(special_tokens, *args, **kwargs)
+
+class PixArtT5XXL(sd1_clip.SD1ClipModel):
+    def __init__(self, device="cpu", dtype=None, model_options={}):
+        super().__init__(device=device, dtype=dtype, name="t5xxl", clip_model=T5XXLModel, model_options=model_options)
+
+class T5XXLTokenizer(sd1_clip.SDTokenizer):
+    def __init__(self, embedding_directory=None, tokenizer_data={}):
+        tokenizer_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "t5_tokenizer")
+        super().__init__(tokenizer_path, embedding_directory=embedding_directory, pad_with_end=False, embedding_size=4096, embedding_key='t5xxl', tokenizer_class=T5TokenizerFast, has_start_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, tokenizer_data=tokenizer_data) # no padding
+
+class PixArtTokenizer(sd1_clip.SD1Tokenizer):
+    def __init__(self, embedding_directory=None, tokenizer_data={}):
+        super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, clip_name="t5xxl", tokenizer=T5XXLTokenizer)
+
+def pixart_te(dtype_t5=None, t5xxl_scaled_fp8=None):
+    class PixArtTEModel_(PixArtT5XXL):
+        def __init__(self, device="cpu", dtype=None, model_options={}):
+            if t5xxl_scaled_fp8 is not None and "t5xxl_scaled_fp8" not in model_options:
+                model_options = model_options.copy()
+                model_options["t5xxl_scaled_fp8"] = t5xxl_scaled_fp8
+            if dtype is None:
+                dtype = dtype_t5
+            super().__init__(device=device, dtype=dtype, model_options=model_options)
+    return PixArtTEModel_
--- a/comfy/text_encoders/qwen25_tokenizer/merges.txt
+++ b/comfy/text_encoders/qwen25_tokenizer/merges.txt
--- a/comfy/text_encoders/qwen25_tokenizer/tokenizer_config.json
+++ b/comfy/text_encoders/qwen25_tokenizer/tokenizer_config.json
@@ -1,241 +0,0 @@
-{
-  "add_bos_token": false,
-  "add_prefix_space": false,
-  "added_tokens_decoder": {
-    "151643": {
-      "content": "<|endoftext|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151644": {
-      "content": "<|im_start|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151645": {
-      "content": "<|im_end|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151646": {
-      "content": "<|object_ref_start|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151647": {
-      "content": "<|object_ref_end|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151648": {
-      "content": "<|box_start|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151649": {
-      "content": "<|box_end|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151650": {
-      "content": "<|quad_start|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151651": {
-      "content": "<|quad_end|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151652": {
-      "content": "<|vision_start|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151653": {
-      "content": "<|vision_end|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151654": {
-      "content": "<|vision_pad|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151655": {
-      "content": "<|image_pad|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151656": {
-      "content": "<|video_pad|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151657": {
-      "content": "<tool_call>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151658": {
-      "content": "</tool_call>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151659": {
-      "content": "<|fim_prefix|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151660": {
-      "content": "<|fim_middle|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151661": {
-      "content": "<|fim_suffix|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151662": {
-      "content": "<|fim_pad|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151663": {
-      "content": "<|repo_name|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151664": {
-      "content": "<|file_sep|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151665": {
-      "content": "<|img|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151666": {
-      "content": "<|endofimg|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151667": {
-      "content": "<|meta|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151668": {
-      "content": "<|endofmeta|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    }
-  },
-  "additional_special_tokens": [
-    "<|im_start|>",
-    "<|im_end|>",
-    "<|object_ref_start|>",
-    "<|object_ref_end|>",
-    "<|box_start|>",
-    "<|box_end|>",
-    "<|quad_start|>",
-    "<|quad_end|>",
-    "<|vision_start|>",
-    "<|vision_end|>",
-    "<|vision_pad|>",
-    "<|image_pad|>",
-    "<|video_pad|>"
-  ],
-  "bos_token": null,
-  "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- messages[0]['content'] }}\n    {%- else %}\n        {{- 'You are a helpful assistant.' }}\n    {%- endif %}\n    {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n    {%- else %}\n        {{- '<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {{- '<|im_start|>' + message.role }}\n        {%- if message.content %}\n            {{- '\\n' + message.content }}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls %}\n            {%- if tool_call.function is defined %}\n                {%- set tool_call = tool_call.function %}\n            {%- endif %}\n            {{- '\\n<tool_call>\\n{\"name\": \"' }}\n            {{- tool_call.name }}\n            {{- '\", \"arguments\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- '}\\n</tool_call>' }}\n        {%- endfor %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
-  "clean_up_tokenization_spaces": false,
-  "eos_token": "<|im_end|>",
-  "errors": "replace",
-  "extra_special_tokens": {},
-  "model_max_length": 131072,
-  "pad_token": "<|endoftext|>",
-  "processor_class": "Qwen2_5_VLProcessor",
-  "split_special_tokens": false,
-  "tokenizer_class": "Qwen2Tokenizer",
-  "unk_token": null
-}
--- a/comfy/text_encoders/qwen25_tokenizer/vocab.json
+++ b/comfy/text_encoders/qwen25_tokenizer/vocab.json
--- a/comfy/text_encoders/t5.py
+++ b/comfy/text_encoders/t5.py
@@ -146,7 +146,7 @@ class T5Attention(torch.nn.Module):
        )
        values = self.relative_attention_bias(relative_position_bucket, out_dtype=dtype)  # shape (query_length, key_length, num_heads)
        values = values.permute([2, 0, 1]).unsqueeze(0)  # shape (1, num_heads, query_length, key_length)
-        return values.contiguous()
+        return values

    def forward(self, x, mask=None, past_bias=None, optimized_attention=None):
        q = self.q(x)
--- a/comfy/utils.py
+++ b/comfy/utils.py
@@ -31,7 +31,6 @@ from einops import rearrange
 from comfy.cli_args import args

 MMAP_TORCH_FILES = args.mmap_torch_files
-DISABLE_MMAP = args.disable_mmap

 ALWAYS_SAFE_LOAD = False
 if hasattr(torch.serialization, "add_safe_globals"):  # TODO: this was added in pytorch 2.4, the unsafe path should be removed once earlier versions are deprecated
@@ -59,10 +58,7 @@ def load_torch_file(ckpt, safe_load=False, device=None, return_metadata=False):
            with safetensors.safe_open(ckpt, framework="pt", device=device.type) as f:
                sd = {}
                for k in f.keys():
-                    tensor = f.get_tensor(k)
-                    if DISABLE_MMAP:  # TODO: Not sure if this is the best way to bypass the mmap issues
-                        tensor = tensor.to(device=device, copy=True)
-                    sd[k] = tensor
+                    sd[k] = f.get_tensor(k)
                if return_metadata:
                    metadata = f.metadata()
        except Exception as e:
@@ -81,7 +77,6 @@ def load_torch_file(ckpt, safe_load=False, device=None, return_metadata=False):
        if safe_load or ALWAYS_SAFE_LOAD:
            pl_sd = torch.load(ckpt, map_location=device, weights_only=True, **torch_args)
        else:
-            logging.warning("WARNING: loading {} unsafely, upgrade your pytorch to 2.4 or newer to load this file safely.".format(ckpt))
            pl_sd = torch.load(ckpt, map_location=device, pickle_module=comfy.checkpoint_pickle)
        if "state_dict" in pl_sd:
            sd = pl_sd["state_dict"]
@@ -698,26 +693,6 @@ def resize_to_batch_size(tensor, batch_size):

    return output

-def resize_list_to_batch_size(l, batch_size):
-    in_batch_size = len(l)
-    if in_batch_size == batch_size or in_batch_size == 0:
-        return l
-
-    if batch_size <= 1:
-        return l[:batch_size]
-
-    output = []
-    if batch_size < in_batch_size:
-        scale = (in_batch_size - 1) / (batch_size - 1)
-        for i in range(batch_size):
-            output.append(l[min(round(i * scale), in_batch_size - 1)])
-    else:
-        scale = in_batch_size / batch_size
-        for i in range(batch_size):
-           output.append(l[min(math.floor((i + 0.5) * scale), in_batch_size - 1)])
-
-    return output
-
 def convert_sd_to(state_dict, dtype):
    keys = list(state_dict.keys())
    for k in keys:
--- a/comfy/weight_adapter/init.py
+++ b/comfy/weight_adapter/init.py
@@ -15,20 +15,9 @@ adapters: list[type[WeightAdapterBase]] = [
    OFTAdapter,
    BOFTAdapter,
 ]
-adapter_maps: dict[str, type[WeightAdapterBase]] = {
-    "LoRA": LoRAAdapter,
-    "LoHa": LoHaAdapter,
-    "LoKr": LoKrAdapter,
-    "OFT": OFTAdapter,
-    ## We disable not implemented algo for now
-    # "GLoRA": GLoRAAdapter,
-    # "BOFT": BOFTAdapter,
-}
-

 __all__ = [
    "WeightAdapterBase",
    "WeightAdapterTrainBase",
-    "adapters",
-    "adapter_maps",
+    "adapters"
 ] + [a.__name__ for a in adapters]
--- a/comfy/weight_adapter/base.py
+++ b/comfy/weight_adapter/base.py
@@ -133,43 +133,3 @@ def tucker_weight_from_conv(up, down, mid):
 def tucker_weight(wa, wb, t):
    temp = torch.einsum("i j ..., j r -> i r ...", t, wb)
    return torch.einsum("i j ..., i r -> r j ...", temp, wa)
-
-
-def factorization(dimension: int, factor: int = -1) -> tuple[int, int]:
-    """
-    return a tuple of two value of input dimension decomposed by the number closest to factor
-    second value is higher or equal than first value.
-
-    examples)
-    factor
-        -1               2                4               8               16               ...
-    127 -> 1, 127   127 -> 1, 127    127 -> 1, 127   127 -> 1, 127   127 -> 1, 127
-    128 -> 8, 16    128 -> 2, 64     128 -> 4, 32    128 -> 8, 16    128 -> 8, 16
-    250 -> 10, 25   250 -> 2, 125    250 -> 2, 125   250 -> 5, 50    250 -> 10, 25
-    360 -> 8, 45    360 -> 2, 180    360 -> 4, 90    360 -> 8, 45    360 -> 12, 30
-    512 -> 16, 32   512 -> 2, 256    512 -> 4, 128   512 -> 8, 64    512 -> 16, 32
-    1024 -> 32, 32  1024 -> 2, 512   1024 -> 4, 256  1024 -> 8, 128  1024 -> 16, 64
-    """
-
-    if factor > 0 and (dimension % factor) == 0 and dimension >= factor**2:
-        m = factor
-        n = dimension // factor
-        if m > n:
-            n, m = m, n
-        return m, n
-    if factor < 0:
-        factor = dimension
-    m, n = 1, dimension
-    length = m + n
-    while m < n:
-        new_m = m + 1
-        while dimension % new_m != 0:
-            new_m += 1
-        new_n = dimension // new_m
-        if new_m + new_n > length or new_m > factor:
-            break
-        else:
-            m, n = new_m, new_n
-    if m > n:
-        n, m = m, n
-    return m, n
--- a/comfy/weight_adapter/loha.py
+++ b/comfy/weight_adapter/loha.py
@@ -3,120 +3,7 @@ from typing import Optional

 import torch
 import comfy.model_management
-from .base import WeightAdapterBase, WeightAdapterTrainBase, weight_decompose
-
-
-class HadaWeight(torch.autograd.Function):
-    @staticmethod
-    def forward(ctx, w1u, w1d, w2u, w2d, scale=torch.tensor(1)):
-        ctx.save_for_backward(w1d, w1u, w2d, w2u, scale)
-        diff_weight = ((w1u @ w1d) * (w2u @ w2d)) * scale
-        return diff_weight
-
-    @staticmethod
-    def backward(ctx, grad_out):
-        (w1d, w1u, w2d, w2u, scale) = ctx.saved_tensors
-        grad_out = grad_out * scale
-        temp = grad_out * (w2u @ w2d)
-        grad_w1u = temp @ w1d.T
-        grad_w1d = w1u.T @ temp
-
-        temp = grad_out * (w1u @ w1d)
-        grad_w2u = temp @ w2d.T
-        grad_w2d = w2u.T @ temp
-
-        del temp
-        return grad_w1u, grad_w1d, grad_w2u, grad_w2d, None
-
-
-class HadaWeightTucker(torch.autograd.Function):
-    @staticmethod
-    def forward(ctx, t1, w1u, w1d, t2, w2u, w2d, scale=torch.tensor(1)):
-        ctx.save_for_backward(t1, w1d, w1u, t2, w2d, w2u, scale)
-
-        rebuild1 = torch.einsum("i j ..., j r, i p -> p r ...", t1, w1d, w1u)
-        rebuild2 = torch.einsum("i j ..., j r, i p -> p r ...", t2, w2d, w2u)
-
-        return rebuild1 * rebuild2 * scale
-
-    @staticmethod
-    def backward(ctx, grad_out):
-        (t1, w1d, w1u, t2, w2d, w2u, scale) = ctx.saved_tensors
-        grad_out = grad_out * scale
-
-        temp = torch.einsum("i j ..., j r -> i r ...", t2, w2d)
-        rebuild = torch.einsum("i j ..., i r -> r j ...", temp, w2u)
-
-        grad_w = rebuild * grad_out
-        del rebuild
-
-        grad_w1u = torch.einsum("r j ..., i j ... -> r i", temp, grad_w)
-        grad_temp = torch.einsum("i j ..., i r -> r j ...", grad_w, w1u.T)
-        del grad_w, temp
-
-        grad_w1d = torch.einsum("i r ..., i j ... -> r j", t1, grad_temp)
-        grad_t1 = torch.einsum("i j ..., j r -> i r ...", grad_temp, w1d.T)
-        del grad_temp
-
-        temp = torch.einsum("i j ..., j r -> i r ...", t1, w1d)
-        rebuild = torch.einsum("i j ..., i r -> r j ...", temp, w1u)
-
-        grad_w = rebuild * grad_out
-        del rebuild
-
-        grad_w2u = torch.einsum("r j ..., i j ... -> r i", temp, grad_w)
-        grad_temp = torch.einsum("i j ..., i r -> r j ...", grad_w, w2u.T)
-        del grad_w, temp
-
-        grad_w2d = torch.einsum("i r ..., i j ... -> r j", t2, grad_temp)
-        grad_t2 = torch.einsum("i j ..., j r -> i r ...", grad_temp, w2d.T)
-        del grad_temp
-        return grad_t1, grad_w1u, grad_w1d, grad_t2, grad_w2u, grad_w2d, None
-
-
-class LohaDiff(WeightAdapterTrainBase):
-    def __init__(self, weights):
-        super().__init__()
-        # Unpack weights tuple from LoHaAdapter
-        w1a, w1b, alpha, w2a, w2b, t1, t2, _ = weights
-
-        # Create trainable parameters
-        self.hada_w1_a = torch.nn.Parameter(w1a)
-        self.hada_w1_b = torch.nn.Parameter(w1b)
-        self.hada_w2_a = torch.nn.Parameter(w2a)
-        self.hada_w2_b = torch.nn.Parameter(w2b)
-
-        self.use_tucker = False
-        if t1 is not None and t2 is not None:
-            self.use_tucker = True
-            self.hada_t1 = torch.nn.Parameter(t1)
-            self.hada_t2 = torch.nn.Parameter(t2)
-        else:
-            # Keep the attributes for consistent access
-            self.hada_t1 = None
-            self.hada_t2 = None
-
-        # Store rank and non-trainable alpha
-        self.rank = w1b.shape[0]
-        self.alpha = torch.nn.Parameter(torch.tensor(alpha), requires_grad=False)
-
-    def __call__(self, w):
-        org_dtype = w.dtype
-
-        scale = self.alpha / self.rank
-        if self.use_tucker:
-            diff_weight = HadaWeightTucker.apply(self.hada_t1, self.hada_w1_a, self.hada_w1_b, self.hada_t2, self.hada_w2_a, self.hada_w2_b, scale)
-        else:
-            diff_weight = HadaWeight.apply(self.hada_w1_a, self.hada_w1_b, self.hada_w2_a, self.hada_w2_b, scale)
-
-        # Add the scaled difference to the original weight
-        weight = w.to(diff_weight) + diff_weight.reshape(w.shape)
-
-        return weight.to(org_dtype)
-
-    def passive_memory_usage(self):
-        """Calculates memory usage of the trainable parameters."""
-        return sum(param.numel() * param.element_size() for param in self.parameters())
+from .base import WeightAdapterBase, weight_decompose


 class LoHaAdapter(WeightAdapterBase):
@@ -126,25 +13,6 @@ class LoHaAdapter(WeightAdapterBase):
        self.loaded_keys = loaded_keys
        self.weights = weights

-    @classmethod
-    def create_train(cls, weight, rank=1, alpha=1.0):
-        out_dim = weight.shape[0]
-        in_dim = weight.shape[1:].numel()
-        mat1 = torch.empty(out_dim, rank, device=weight.device, dtype=weight.dtype)
-        mat2 = torch.empty(rank, in_dim, device=weight.device, dtype=weight.dtype)
-        torch.nn.init.normal_(mat1, 0.1)
-        torch.nn.init.constant_(mat2, 0.0)
-        mat3 = torch.empty(out_dim, rank, device=weight.device, dtype=weight.dtype)
-        mat4 = torch.empty(rank, in_dim, device=weight.device, dtype=weight.dtype)
-        torch.nn.init.normal_(mat3, 0.1)
-        torch.nn.init.normal_(mat4, 0.01)
-        return LohaDiff(
-            (mat1, mat2, alpha, mat3, mat4, None, None, None)
-        )
-
-    def to_train(self):
-        return LohaDiff(self.weights)
-
    @classmethod
    def load(
        cls,
--- a/comfy/weight_adapter/lokr.py
+++ b/comfy/weight_adapter/lokr.py
@@ -3,77 +3,7 @@ from typing import Optional

 import torch
 import comfy.model_management
-from .base import (
-    WeightAdapterBase,
-    WeightAdapterTrainBase,
-    weight_decompose,
-    factorization,
-)
-
-
-class LokrDiff(WeightAdapterTrainBase):
-    def __init__(self, weights):
-        super().__init__()
-        (lokr_w1, lokr_w2, alpha, lokr_w1_a, lokr_w1_b, lokr_w2_a, lokr_w2_b, lokr_t2, dora_scale) = weights
-        self.use_tucker = False
-        if lokr_w1_a is not None:
-            _, rank_a = lokr_w1_a.shape[0], lokr_w1_a.shape[1]
-            rank_a, _ = lokr_w1_b.shape[0], lokr_w1_b.shape[1]
-            self.lokr_w1_a = torch.nn.Parameter(lokr_w1_a)
-            self.lokr_w1_b = torch.nn.Parameter(lokr_w1_b)
-            self.w1_rebuild = True
-            self.ranka = rank_a
-
-        if lokr_w2_a is not None:
-            _, rank_b = lokr_w2_a.shape[0], lokr_w2_a.shape[1]
-            rank_b, _ = lokr_w2_b.shape[0], lokr_w2_b.shape[1]
-            self.lokr_w2_a = torch.nn.Parameter(lokr_w2_a)
-            self.lokr_w2_b = torch.nn.Parameter(lokr_w2_b)
-            if lokr_t2 is not None:
-                self.use_tucker = True
-                self.lokr_t2 = torch.nn.Parameter(lokr_t2)
-            self.w2_rebuild = True
-            self.rankb = rank_b
-
-        if lokr_w1 is not None:
-            self.lokr_w1 = torch.nn.Parameter(lokr_w1)
-            self.w1_rebuild = False
-
-        if lokr_w2 is not None:
-            self.lokr_w2 = torch.nn.Parameter(lokr_w2)
-            self.w2_rebuild = False
-
-        self.alpha = torch.nn.Parameter(torch.tensor(alpha), requires_grad=False)
-
-    @property
-    def w1(self):
-        if self.w1_rebuild:
-            return (self.lokr_w1_a @ self.lokr_w1_b) * (self.alpha / self.ranka)
-        else:
-            return self.lokr_w1
-
-    @property
-    def w2(self):
-        if self.w2_rebuild:
-            if self.use_tucker:
-                w2 = torch.einsum(
-                    'i j k l, j r, i p -> p r k l',
-                    self.lokr_t2,
-                    self.lokr_w2_b,
-                    self.lokr_w2_a
-                )
-            else:
-                w2 = self.lokr_w2_a @ self.lokr_w2_b
-            return w2 * (self.alpha / self.rankb)
-        else:
-            return self.lokr_w2
-
-    def __call__(self, w):
-        diff = torch.kron(self.w1, self.w2)
-        return w + diff.reshape(w.shape).to(w)
-
-    def passive_memory_usage(self):
-        return sum(param.numel() * param.element_size() for param in self.parameters())
+from .base import WeightAdapterBase, weight_decompose


 class LoKrAdapter(WeightAdapterBase):
@@ -83,20 +13,6 @@ class LoKrAdapter(WeightAdapterBase):
        self.loaded_keys = loaded_keys
        self.weights = weights

-    @classmethod
-    def create_train(cls, weight, rank=1, alpha=1.0):
-        out_dim = weight.shape[0]
-        in_dim = weight.shape[1:].numel()
-        out1, out2 = factorization(out_dim, rank)
-        in1, in2 = factorization(in_dim, rank)
-        mat1 = torch.empty(out1, in1, device=weight.device, dtype=weight.dtype)
-        mat2 = torch.empty(out2, in2, device=weight.device, dtype=weight.dtype)
-        torch.nn.init.kaiming_uniform_(mat2, a=5**0.5)
-        torch.nn.init.constant_(mat1, 0.0)
-        return LokrDiff(
-            (mat1, mat2, alpha, None, None, None, None, None, None)
-        )
-
    @classmethod
    def load(
        cls,
--- a/comfy/weight_adapter/oft.py
+++ b/comfy/weight_adapter/oft.py
@@ -3,58 +3,7 @@ from typing import Optional

 import torch
 import comfy.model_management
-from .base import WeightAdapterBase, WeightAdapterTrainBase, weight_decompose, factorization
-
-
-class OFTDiff(WeightAdapterTrainBase):
-    def __init__(self, weights):
-        super().__init__()
-        # Unpack weights tuple from LoHaAdapter
-        blocks, rescale, alpha, _ = weights
-
-        # Create trainable parameters
-        self.oft_blocks = torch.nn.Parameter(blocks)
-        if rescale is not None:
-            self.rescale = torch.nn.Parameter(rescale)
-            self.rescaled = True
-        else:
-            self.rescaled = False
-        self.block_num, self.block_size, _ = blocks.shape
-        self.constraint = float(alpha)
-        self.alpha = torch.nn.Parameter(torch.tensor(alpha), requires_grad=False)
-
-    def __call__(self, w):
-        org_dtype = w.dtype
-        I = torch.eye(self.block_size, device=self.oft_blocks.device)
-
-        ## generate r
-        # for Q = -Q^T
-        q = self.oft_blocks - self.oft_blocks.transpose(1, 2)
-        normed_q = q
-        if self.constraint:
-            q_norm = torch.norm(q) + 1e-8
-            if q_norm > self.constraint:
-                normed_q = q * self.constraint / q_norm
-        # use float() to prevent unsupported type
-        r = (I + normed_q) @ (I - normed_q).float().inverse()
-
-        ## Apply chunked matmul on weight
-        _, *shape = w.shape
-        org_weight = w.to(dtype=r.dtype)
-        org_weight = org_weight.unflatten(0, (self.block_num, self.block_size))
-        # Init R=0, so add I on it to ensure the output of step0 is original model output
-        weight = torch.einsum(
-            "k n m, k n ... -> k m ...",
-            r,
-            org_weight,
-        ).flatten(0, 1)
-        if self.rescaled:
-            weight = self.rescale * weight
-        return weight.to(org_dtype)
-
-    def passive_memory_usage(self):
-        """Calculates memory usage of the trainable parameters."""
-        return sum(param.numel() * param.element_size() for param in self.parameters())
+from .base import WeightAdapterBase, weight_decompose


 class OFTAdapter(WeightAdapterBase):
@@ -64,18 +13,6 @@ class OFTAdapter(WeightAdapterBase):
        self.loaded_keys = loaded_keys
        self.weights = weights

-    @classmethod
-    def create_train(cls, weight, rank=1, alpha=1.0):
-        out_dim = weight.shape[0]
-        block_size, block_num = factorization(out_dim, rank)
-        block = torch.zeros(block_num, block_size, block_size, device=weight.device, dtype=weight.dtype)
-        return OFTDiff(
-            (block, None, alpha, None)
-        )
-
-    def to_train(self):
-        return OFTDiff(self.weights)
-
    @classmethod
    def load(
        cls,
@@ -123,8 +60,6 @@ class OFTAdapter(WeightAdapterBase):
        blocks = v[0]
        rescale = v[1]
        alpha = v[2]
-        if alpha is None:
-            alpha = 0
        dora_scale = v[3]

        blocks = comfy.model_management.cast_to_device(blocks, weight.device, intermediate_dtype)
--- a/comfy_api/feature_flags.py
+++ b/comfy_api/feature_flags.py
@@ -1,69 +0,0 @@
-"""
-Feature flags module for ComfyUI WebSocket protocol negotiation.
-
-This module handles capability negotiation between frontend and backend,
-allowing graceful protocol evolution while maintaining backward compatibility.
-"""
-
-from typing import Any, Dict
-
-from comfy.cli_args import args
-
-# Default server capabilities
-SERVER_FEATURE_FLAGS: Dict[str, Any] = {
-    "supports_preview_metadata": True,
-    "max_upload_size": args.max_upload_size * 1024 * 1024, # Convert MB to bytes
-}
-
-
-def get_connection_feature(
-    sockets_metadata: Dict[str, Dict[str, Any]],
-    sid: str,
-    feature_name: str,
-    default: Any = False
-) -> Any:
-    """
-    Get a feature flag value for a specific connection.
-
-    Args:
-        sockets_metadata: Dictionary of socket metadata
-        sid: Session ID of the connection
-        feature_name: Name of the feature to check
-        default: Default value if feature not found
-
-    Returns:
-        Feature value or default if not found
-    """
-    if sid not in sockets_metadata:
-        return default
-
-    return sockets_metadata[sid].get("feature_flags", {}).get(feature_name, default)
-
-
-def supports_feature(
-    sockets_metadata: Dict[str, Dict[str, Any]],
-    sid: str,
-    feature_name: str
-) -> bool:
-    """
-    Check if a connection supports a specific feature.
-
-    Args:
-        sockets_metadata: Dictionary of socket metadata
-        sid: Session ID of the connection
-        feature_name: Name of the feature to check
-
-    Returns:
-        Boolean indicating if feature is supported
-    """
-    return get_connection_feature(sockets_metadata, sid, feature_name, False) is True
-
-
-def get_server_features() -> Dict[str, Any]:
-    """
-    Get the server's feature flags.
-
-    Returns:
-        Dictionary of server feature flags
-    """
-    return SERVER_FEATURE_FLAGS.copy()
--- a/comfy_api/input/video_types.py
+++ b/comfy_api/input/video_types.py
@@ -1,8 +1,6 @@
 from __future__ import annotations
 from abc import ABC, abstractmethod
-from typing import Optional, Union
-import io
-import av
+from typing import Optional
 from comfy_api.util import VideoContainer, VideoCodec, VideoComponents

 class VideoInput(ABC):
@@ -33,22 +31,6 @@ class VideoInput(ABC):
        """
        pass

-    def get_stream_source(self) -> Union[str, io.BytesIO]:
-        """
-        Get a streamable source for the video. This allows processing without
-        loading the entire video into memory.
-
-        Returns:
-            Either a file path (str) or a BytesIO object that can be opened with av.
-
-        Default implementation creates a BytesIO buffer, but subclasses should
-        override this for better performance when possible.
-        """
-        buffer = io.BytesIO()
-        self.save_to(buffer)
-        buffer.seek(0)
-        return buffer
-
    # Provide a default implementation, but subclasses can provide optimized versions
    # if possible.
    def get_dimensions(self) -> tuple[int, int]:
@@ -71,15 +53,3 @@ class VideoInput(ABC):
        components = self.get_components()
        frame_count = components.images.shape[0]
        return float(frame_count / components.frame_rate)
-
-    def get_container_format(self) -> str:
-        """
-        Returns the container format of the video (e.g., 'mp4', 'mov', 'avi').
-
-        Returns:
-            Container format as string
-        """
-        # Default implementation - subclasses should override for better performance
-        source = self.get_stream_source()
-        with av.open(source, mode="r") as container:
-            return container.format.name
--- a/comfy_api/input_impl/video_types.py
+++ b/comfy_api/input_impl/video_types.py
@@ -64,15 +64,6 @@ class VideoFromFile(VideoInput):
        """
        self.__file = file

-    def get_stream_source(self) -> str | io.BytesIO:
-        """
-        Return the underlying file source for efficient streaming.
-        This avoids unnecessary memory copies when the source is already a file path.
-        """
-        if isinstance(self.__file, io.BytesIO):
-            self.__file.seek(0)
-        return self.__file
-
    def get_dimensions(self) -> tuple[int, int]:
        """
        Returns the dimensions of the video input.
@@ -121,18 +112,6 @@ class VideoFromFile(VideoInput):

        raise ValueError(f"Could not determine duration for file '{self.__file}'")

-    def get_container_format(self) -> str:
-        """
-        Returns the container format of the video (e.g., 'mp4', 'mov', 'avi').
-
-        Returns:
-            Container format as string
-        """
-        if isinstance(self.__file, io.BytesIO):
-            self.__file.seek(0)
-        with av.open(self.__file, mode='r') as container:
-            return container.format.name
-
    def get_components_internal(self, container: InputContainer) -> VideoComponents:
        # Get video frames
        frames = []
--- a/comfy_api_nodes/README.md
+++ b/comfy_api_nodes/README.md
@@ -2,7 +2,7 @@

 ## Introduction 

-Below are a collection of nodes that work by calling external APIs. More information available in our [docs](https://docs.comfy.org/tutorials/api-nodes/overview).
+Below are a collection of nodes that work by calling external APIs. More information available in our [docs](https://docs.comfy.org/tutorials/api-nodes/overview#api-nodes).

 ## Development

--- a/comfy_api_nodes/apis/init.py
+++ b/comfy_api_nodes/apis/init.py
@@ -1,6 +1,6 @@
 # generated by datamodel-codegen:
 #   filename:  filtered-openapi.yaml
-#   timestamp: 2025-07-06T09:47:31+00:00
+#   timestamp: 2025-05-19T21:38:55+00:00

 from __future__ import annotations

@@ -1355,158 +1355,6 @@ class ModelResponseProperties(BaseModel):
    )


-class Keyframes(BaseModel):
-    image_url: Optional[str] = None
-
-
-class MoonvalleyPromptResponse(BaseModel):
-    error: Optional[Dict[str, Any]] = None
-    frame_conditioning: Optional[Dict[str, Any]] = None
-    id: Optional[str] = None
-    inference_params: Optional[Dict[str, Any]] = None
-    meta: Optional[Dict[str, Any]] = None
-    model_params: Optional[Dict[str, Any]] = None
-    output_url: Optional[str] = None
-    prompt_text: Optional[str] = None
-    status: Optional[str] = None
-
-
-class MoonvalleyTextToVideoInferenceParams(BaseModel):
-    add_quality_guidance: Optional[bool] = Field(
-        True, description='Whether to add quality guidance'
-    )
-    caching_coefficient: Optional[float] = Field(
-        0.3, description='Caching coefficient for optimization'
-    )
-    caching_cooldown: Optional[int] = Field(
-        3, description='Number of caching cooldown steps'
-    )
-    caching_warmup: Optional[int] = Field(
-        3, description='Number of caching warmup steps'
-    )
-    clip_value: Optional[float] = Field(
-        3, description='CLIP value for generation control'
-    )
-    conditioning_frame_index: Optional[int] = Field(
-        0, description='Index of the conditioning frame'
-    )
-    cooldown_steps: Optional[int] = Field(
-        None, description='Number of cooldown steps (calculated based on num_frames)'
-    )
-    fps: Optional[int] = Field(
-        24, description='Frames per second of the generated video'
-    )
-    guidance_scale: Optional[float] = Field(
-        12.5, description='Guidance scale for generation control'
-    )
-    height: Optional[int] = Field(
-        1080, description='Height of the generated video in pixels'
-    )
-    negative_prompt: Optional[str] = Field(None, description='Negative prompt text')
-    num_frames: Optional[int] = Field(64, description='Number of frames to generate')
-    seed: Optional[int] = Field(
-        None, description='Random seed for generation (default: random)'
-    )
-    shift_value: Optional[float] = Field(
-        3, description='Shift value for generation control'
-    )
-    steps: Optional[int] = Field(80, description='Number of denoising steps')
-    use_guidance_schedule: Optional[bool] = Field(
-        True, description='Whether to use guidance scheduling'
-    )
-    use_negative_prompts: Optional[bool] = Field(
-        False, description='Whether to use negative prompts'
-    )
-    use_timestep_transform: Optional[bool] = Field(
-        True, description='Whether to use timestep transformation'
-    )
-    warmup_steps: Optional[int] = Field(
-        None, description='Number of warmup steps (calculated based on num_frames)'
-    )
-    width: Optional[int] = Field(
-        1920, description='Width of the generated video in pixels'
-    )
-
-
-class MoonvalleyTextToVideoRequest(BaseModel):
-    image_url: Optional[str] = None
-    inference_params: Optional[MoonvalleyTextToVideoInferenceParams] = None
-    prompt_text: Optional[str] = None
-    webhook_url: Optional[str] = None
-
-
-class MoonvalleyUploadFileRequest(BaseModel):
-    file: Optional[StrictBytes] = None
-
-
-class MoonvalleyUploadFileResponse(BaseModel):
-    access_url: Optional[str] = None
-
-
-class MoonvalleyVideoToVideoInferenceParams(BaseModel):
-    add_quality_guidance: Optional[bool] = Field(
-        True, description='Whether to add quality guidance'
-    )
-    caching_coefficient: Optional[float] = Field(
-        0.3, description='Caching coefficient for optimization'
-    )
-    caching_cooldown: Optional[int] = Field(
-        3, description='Number of caching cooldown steps'
-    )
-    caching_warmup: Optional[int] = Field(
-        3, description='Number of caching warmup steps'
-    )
-    clip_value: Optional[float] = Field(
-        3, description='CLIP value for generation control'
-    )
-    conditioning_frame_index: Optional[int] = Field(
-        0, description='Index of the conditioning frame'
-    )
-    cooldown_steps: Optional[int] = Field(
-        None, description='Number of cooldown steps (calculated based on num_frames)'
-    )
-    guidance_scale: Optional[float] = Field(
-        12.5, description='Guidance scale for generation control'
-    )
-    negative_prompt: Optional[str] = Field(None, description='Negative prompt text')
-    seed: Optional[int] = Field(
-        None, description='Random seed for generation (default: random)'
-    )
-    shift_value: Optional[float] = Field(
-        3, description='Shift value for generation control'
-    )
-    steps: Optional[int] = Field(80, description='Number of denoising steps')
-    use_guidance_schedule: Optional[bool] = Field(
-        True, description='Whether to use guidance scheduling'
-    )
-    use_negative_prompts: Optional[bool] = Field(
-        False, description='Whether to use negative prompts'
-    )
-    use_timestep_transform: Optional[bool] = Field(
-        True, description='Whether to use timestep transformation'
-    )
-    warmup_steps: Optional[int] = Field(
-        None, description='Number of warmup steps (calculated based on num_frames)'
-    )
-
-
-class ControlType(str, Enum):
-    motion_control = 'motion_control'
-    pose_control = 'pose_control'
-
-
-class MoonvalleyVideoToVideoRequest(BaseModel):
-    control_type: ControlType = Field(
-        ..., description='Supported types for video control'
-    )
-    inference_params: Optional[MoonvalleyVideoToVideoInferenceParams] = None
-    prompt_text: str = Field(..., description='Describes the video to generate')
-    video_url: str = Field(..., description='Url to control video')
-    webhook_url: Optional[str] = Field(
-        None, description='Optional webhook URL for notifications'
-    )
-
-
 class Moderation(str, Enum):
    low = 'low'
    auto = 'auto'
@@ -3259,23 +3107,6 @@ class LumaUpscaleVideoGenerationRequest(BaseModel):
    resolution: Optional[LumaVideoModelOutputResolution] = None


-class MoonvalleyImageToVideoRequest(MoonvalleyTextToVideoRequest):
-    keyframes: Optional[Dict[str, Keyframes]] = None
-
-
-class MoonvalleyResizeVideoRequest(MoonvalleyVideoToVideoRequest):
-    frame_position: Optional[List[int]] = Field(None, max_length=2, min_length=2)
-    frame_resolution: Optional[List[int]] = Field(None, max_length=2, min_length=2)
-    scale: Optional[List[int]] = Field(None, max_length=2, min_length=2)
-
-
-class MoonvalleyTextToImageRequest(BaseModel):
-    image_url: Optional[str] = None
-    inference_params: Optional[MoonvalleyTextToVideoInferenceParams] = None
-    prompt_text: Optional[str] = None
-    webhook_url: Optional[str] = None
-
-
 class OutputContent(RootModel[Union[OutputTextContent, OutputAudioContent]]):
    root: Union[OutputTextContent, OutputAudioContent]

--- a/comfy_api_nodes/nodes_gemini.py
+++ b/comfy_api_nodes/nodes_gemini.py
@@ -406,7 +406,7 @@ class GeminiInputFiles(ComfyNodeABC):

    def create_file_part(self, file_path: str) -> GeminiPart:
        mime_type = (
-            GeminiMimeType.application_pdf
+            GeminiMimeType.pdf
            if file_path.endswith(".pdf")
            else GeminiMimeType.text_plain
        )
--- a/comfy_api_nodes/nodes_kling.py
+++ b/comfy_api_nodes/nodes_kling.py
@@ -132,8 +132,6 @@ def poll_until_finished(
        result_url_extractor=result_url_extractor,
        estimated_duration=estimated_duration,
        node_id=node_id,
-        poll_interval=16.0,
-        max_poll_attempts=256,
    ).execute()


--- a/comfy_api_nodes/nodes_moonvalley.py
+++ b/comfy_api_nodes/nodes_moonvalley.py
@@ -1,743 +0,0 @@
-import logging
-from typing import Any, Callable, Optional, TypeVar
-import random
-import torch
-from comfy_api_nodes.util.validation_utils import (
-    get_image_dimensions,
-    validate_image_dimensions,
-)
-
-
-from comfy_api_nodes.apis import (
-    MoonvalleyTextToVideoRequest,
-    MoonvalleyTextToVideoInferenceParams,
-    MoonvalleyVideoToVideoInferenceParams,
-    MoonvalleyVideoToVideoRequest,
-    MoonvalleyPromptResponse,
-)
-from comfy_api_nodes.apis.client import (
-    ApiEndpoint,
-    HttpMethod,
-    SynchronousOperation,
-    PollingOperation,
-    EmptyRequest,
-)
-from comfy_api_nodes.apinode_utils import (
-    download_url_to_video_output,
-    upload_images_to_comfyapi,
-    upload_video_to_comfyapi,
-)
-from comfy_api_nodes.mapper_utils import model_field_to_node_input
-
-from comfy_api.input.video_types import VideoInput
-from comfy.comfy_types.node_typing import IO
-from comfy_api.input_impl import VideoFromFile
-import av
-import io
-
-API_UPLOADS_ENDPOINT = "/proxy/moonvalley/uploads"
-API_PROMPTS_ENDPOINT = "/proxy/moonvalley/prompts"
-API_VIDEO2VIDEO_ENDPOINT = "/proxy/moonvalley/prompts/video-to-video"
-API_TXT2VIDEO_ENDPOINT = "/proxy/moonvalley/prompts/text-to-video"
-API_IMG2VIDEO_ENDPOINT = "/proxy/moonvalley/prompts/image-to-video"
-
-MIN_WIDTH = 300
-MIN_HEIGHT = 300
-
-MAX_WIDTH = 10000
-MAX_HEIGHT = 10000
-
-MIN_VID_WIDTH = 300
-MIN_VID_HEIGHT = 300
-
-MAX_VID_WIDTH = 10000
-MAX_VID_HEIGHT = 10000
-
-MAX_VIDEO_SIZE = 1024 * 1024 * 1024  # 1 GB max for in-memory video processing
-
-MOONVALLEY_MAREY_MAX_PROMPT_LENGTH = 5000
-R = TypeVar("R")
-
-
-class MoonvalleyApiError(Exception):
-    """Base exception for Moonvalley API errors."""
-
-    pass
-
-
-def is_valid_task_creation_response(response: MoonvalleyPromptResponse) -> bool:
-    """Verifies that the initial response contains a task ID."""
-    return bool(response.id)
-
-
-def validate_task_creation_response(response) -> None:
-    if not is_valid_task_creation_response(response):
-        error_msg = f"Moonvalley Marey API: Initial request failed. Code: {response.code}, Message: {response.message}, Data: {response}"
-        logging.error(error_msg)
-        raise MoonvalleyApiError(error_msg)
-
-
-def get_video_from_response(response):
-    video = response.output_url
-    logging.info(
-        "Moonvalley Marey API: Task %s succeeded. Video URL: %s", response.id, video
-    )
-    return video
-
-
-def get_video_url_from_response(response) -> Optional[str]:
-    """Returns the first video url from the Moonvalley video generation task result.
-    Will not raise an error if the response is not valid.
-    """
-    if response:
-        return str(get_video_from_response(response))
-    else:
-        return None
-
-
-def poll_until_finished(
-    auth_kwargs: dict[str, str],
-    api_endpoint: ApiEndpoint[Any, R],
-    result_url_extractor: Optional[Callable[[R], str]] = None,
-    node_id: Optional[str] = None,
-) -> R:
-    """Polls the Moonvalley API endpoint until the task reaches a terminal state, then returns the response."""
-    return PollingOperation(
-        poll_endpoint=api_endpoint,
-        completed_statuses=[
-            "completed",
-        ],
-        max_poll_attempts=240,  # 64 minutes with 16s interval
-        poll_interval=16.0,
-        failed_statuses=["error"],
-        status_extractor=lambda response: (
-            response.status if response and response.status else None
-        ),
-        auth_kwargs=auth_kwargs,
-        result_url_extractor=result_url_extractor,
-        node_id=node_id,
-    ).execute()
-
-
-def validate_prompts(
-    prompt: str, negative_prompt: str, max_length=MOONVALLEY_MAREY_MAX_PROMPT_LENGTH
-):
-    """Verifies that the prompt isn't empty and that neither prompt is too long."""
-    if not prompt:
-        raise ValueError("Positive prompt is empty")
-    if len(prompt) > max_length:
-        raise ValueError(f"Positive prompt is too long: {len(prompt)} characters")
-    if negative_prompt and len(negative_prompt) > max_length:
-        raise ValueError(
-            f"Negative prompt is too long: {len(negative_prompt)} characters"
-        )
-    return True
-
-
-def validate_input_media(width, height, with_frame_conditioning, num_frames_in=None):
-    # inference validation
-    # T = num_frames
-    # in all cases, the following must be true: T divisible by 16 and H,W by 8. in addition...
-    # with image conditioning: H*W must be divisible by 8192
-    # without image conditioning: T divisible by 32
-    if num_frames_in and not num_frames_in % 16 == 0:
-        return False, ("The input video total frame count must be divisible by 16!")
-
-    if height % 8 != 0 or width % 8 != 0:
-        return False, (
-            f"Height ({height}) and width ({width}) must be " "divisible by 8"
-        )
-
-    if with_frame_conditioning:
-        if (height * width) % 8192 != 0:
-            return False, (
-                f"Height * width ({height * width}) must be "
-                "divisible by 8192 for frame conditioning"
-            )
-    else:
-        if num_frames_in and not num_frames_in % 32 == 0:
-            return False, ("The input video total frame count must be divisible by 32!")
-
-
-def validate_input_image(
-    image: torch.Tensor, with_frame_conditioning: bool = False
-) -> None:
-    """
-    Validates the input image adheres to the expectations of the API:
-    - The image resolution should not be less than 300*300px
-    - The aspect ratio of the image should be between 1:2.5 ~ 2.5:1
-
-    """
-    height, width = get_image_dimensions(image)
-    validate_input_media(width, height, with_frame_conditioning)
-    validate_image_dimensions(
-        image, min_width=300, min_height=300, max_height=MAX_HEIGHT, max_width=MAX_WIDTH
-    )
-
-
-def validate_video_to_video_input(video: VideoInput) -> VideoInput:
-    """
-    Validates and processes video input for Moonvalley Video-to-Video generation.
-
-    Args:
-        video: Input video to validate
-
-    Returns:
-        Validated and potentially trimmed video
-
-    Raises:
-        ValueError: If video doesn't meet requirements
-        MoonvalleyApiError: If video duration is too short
-    """
-    width, height = _get_video_dimensions(video)
-    _validate_video_dimensions(width, height)
-    _validate_container_format(video)
-
-    return _validate_and_trim_duration(video)
-
-
-def _get_video_dimensions(video: VideoInput) -> tuple[int, int]:
-    """Extracts video dimensions with error handling."""
-    try:
-        return video.get_dimensions()
-    except Exception as e:
-        logging.error("Error getting dimensions of video: %s", e)
-        raise ValueError(f"Cannot get video dimensions: {e}") from e
-
-
-def _validate_video_dimensions(width: int, height: int) -> None:
-    """Validates video dimensions meet Moonvalley V2V requirements."""
-    supported_resolutions = {
-        (1920, 1080), (1080, 1920), (1152, 1152),
-        (1536, 1152), (1152, 1536)
-    }
-
-    if (width, height) not in supported_resolutions:
-        supported_list = ', '.join([f'{w}x{h}' for w, h in sorted(supported_resolutions)])
-        raise ValueError(f"Resolution {width}x{height} not supported. Supported: {supported_list}")
-
-
-def _validate_container_format(video: VideoInput) -> None:
-    """Validates video container format is MP4."""
-    container_format = video.get_container_format()
-    if container_format not in ['mp4', 'mov,mp4,m4a,3gp,3g2,mj2']:
-        raise ValueError(f"Only MP4 container format supported. Got: {container_format}")
-
-
-def _validate_and_trim_duration(video: VideoInput) -> VideoInput:
-    """Validates video duration and trims to 5 seconds if needed."""
-    duration = video.get_duration()
-    _validate_minimum_duration(duration)
-    return _trim_if_too_long(video, duration)
-
-
-def _validate_minimum_duration(duration: float) -> None:
-    """Ensures video is at least 5 seconds long."""
-    if duration < 5:
-        raise MoonvalleyApiError("Input video must be at least 5 seconds long.")
-
-
-def _trim_if_too_long(video: VideoInput, duration: float) -> VideoInput:
-    """Trims video to 5 seconds if longer."""
-    if duration > 5:
-        return trim_video(video, 5)
-    return video
-
-
-
-def trim_video(video: VideoInput, duration_sec: float) -> VideoInput:
-    """
-    Returns a new VideoInput object trimmed from the beginning to the specified duration,
-    using av to avoid loading entire video into memory.
-
-    Args:
-        video: Input video to trim
-        duration_sec: Duration in seconds to keep from the beginning
-
-    Returns:
-        VideoFromFile object that owns the output buffer
-    """
-    output_buffer = io.BytesIO()
-
-    input_container = None
-    output_container = None
-
-    try:
-        # Get the stream source - this avoids loading entire video into memory
-        # when the source is already a file path
-        input_source = video.get_stream_source()
-
-        # Open containers
-        input_container = av.open(input_source, mode="r")
-        output_container = av.open(output_buffer, mode="w", format="mp4")
-
-        # Set up output streams for re-encoding
-        video_stream = None
-        audio_stream = None
-
-        for stream in input_container.streams:
-            logging.info(f"Found stream: type={stream.type}, class={type(stream)}")
-            if isinstance(stream, av.VideoStream):
-                # Create output video stream with same parameters
-                video_stream = output_container.add_stream(
-                    "h264", rate=stream.average_rate
-                )
-                video_stream.width = stream.width
-                video_stream.height = stream.height
-                video_stream.pix_fmt = "yuv420p"
-                logging.info(
-                    f"Added video stream: {stream.width}x{stream.height} @ {stream.average_rate}fps"
-                )
-            elif isinstance(stream, av.AudioStream):
-                # Create output audio stream with same parameters
-                audio_stream = output_container.add_stream(
-                    "aac", rate=stream.sample_rate
-                )
-                audio_stream.sample_rate = stream.sample_rate
-                audio_stream.layout = stream.layout
-                logging.info(
-                    f"Added audio stream: {stream.sample_rate}Hz, {stream.channels} channels"
-                )
-
-        # Calculate target frame count that's divisible by 16
-        fps = input_container.streams.video[0].average_rate
-        estimated_frames = int(duration_sec * fps)
-        target_frames = (estimated_frames // 16) * 16  # Round down to nearest multiple of 16
-
-        if target_frames == 0:
-            raise ValueError("Video too short: need at least 16 frames for Moonvalley")
-
-        frame_count = 0
-        audio_frame_count = 0
-
-        # Decode and re-encode video frames
-        if video_stream:
-            for frame in input_container.decode(video=0):
-                if frame_count >= target_frames:
-                    break
-
-                # Re-encode frame
-                for packet in video_stream.encode(frame):
-                    output_container.mux(packet)
-                frame_count += 1
-
-            # Flush encoder
-            for packet in video_stream.encode():
-                output_container.mux(packet)
-
-            logging.info(
-                f"Encoded {frame_count} video frames (target: {target_frames})"
-            )
-
-        # Decode and re-encode audio frames
-        if audio_stream:
-            input_container.seek(0)  # Reset to beginning for audio
-            for frame in input_container.decode(audio=0):
-                if frame.time >= duration_sec:
-                    break
-
-                # Re-encode frame
-                for packet in audio_stream.encode(frame):
-                    output_container.mux(packet)
-                audio_frame_count += 1
-
-            # Flush encoder
-            for packet in audio_stream.encode():
-                output_container.mux(packet)
-
-            logging.info(f"Encoded {audio_frame_count} audio frames")
-
-        # Close containers
-        output_container.close()
-        input_container.close()
-
-        # Return as VideoFromFile using the buffer
-        output_buffer.seek(0)
-        return VideoFromFile(output_buffer)
-
-    except Exception as e:
-        # Clean up on error
-        if input_container is not None:
-            input_container.close()
-        if output_container is not None:
-            output_container.close()
-        raise RuntimeError(f"Failed to trim video: {str(e)}") from e
-
-
-# --- BaseMoonvalleyVideoNode ---
-class BaseMoonvalleyVideoNode:
-    def parseWidthHeightFromRes(self, resolution: str):
-        # Accepts a string like "16:9 (1920 x 1080)" and returns width, height as a dict
-        res_map = {
-            "16:9 (1920 x 1080)": {"width": 1920, "height": 1080},
-            "9:16 (1080 x 1920)": {"width": 1080, "height": 1920},
-            "1:1 (1152 x 1152)": {"width": 1152, "height": 1152},
-            "4:3 (1536 x 1152)": {"width": 1536, "height": 1152},
-            "3:4 (1152 x 1536)": {"width": 1152, "height": 1536},
-            "21:9 (2560 x 1080)": {"width": 2560, "height": 1080},
-        }
-        if resolution in res_map:
-            return res_map[resolution]
-        else:
-            # Default to 1920x1080 if unknown
-            return {"width": 1920, "height": 1080}
-
-    def parseControlParameter(self, value):
-        control_map = {
-            "Motion Transfer": "motion_control",
-            "Canny": "canny_control",
-            "Pose Transfer": "pose_control",
-            "Depth": "depth_control",
-        }
-        if value in control_map:
-            return control_map[value]
-        else:
-            return control_map["Motion Transfer"]
-
-    def get_response(
-        self, task_id: str, auth_kwargs: dict[str, str], node_id: Optional[str] = None
-    ) -> MoonvalleyPromptResponse:
-        return poll_until_finished(
-            auth_kwargs,
-            ApiEndpoint(
-                path=f"{API_PROMPTS_ENDPOINT}/{task_id}",
-                method=HttpMethod.GET,
-                request_model=EmptyRequest,
-                response_model=MoonvalleyPromptResponse,
-            ),
-            result_url_extractor=get_video_url_from_response,
-            node_id=node_id,
-        )
-
-    @classmethod
-    def INPUT_TYPES(cls):
-        return {
-            "required": {
-                "prompt": model_field_to_node_input(
-                    IO.STRING,
-                    MoonvalleyTextToVideoRequest,
-                    "prompt_text",
-                    multiline=True,
-                ),
-                "negative_prompt": model_field_to_node_input(
-                    IO.STRING,
-                    MoonvalleyTextToVideoInferenceParams,
-                    "negative_prompt",
-                    multiline=True,
-                    default="low-poly, flat shader, bad rigging, stiff animation, uncanny eyes, low-quality textures, looping glitch, cheap effect, overbloom, bloom spam, default lighting, game asset, stiff face, ugly specular, AI artifacts",
-                ),
-                "resolution": (
-                    IO.COMBO,
-                    {
-                        "options": [
-                            "16:9 (1920 x 1080)",
-                            "9:16 (1080 x 1920)",
-                            "1:1 (1152 x 1152)",
-                            "4:3 (1440 x 1080)",
-                            "3:4 (1080 x 1440)",
-                            "21:9 (2560 x 1080)",
-                        ],
-                        "default": "16:9 (1920 x 1080)",
-                        "tooltip": "Resolution of the output video",
-                    },
-                ),
-                # "length": (IO.COMBO,{"options":['5s','10s'], "default": '5s'}),
-                "prompt_adherence": model_field_to_node_input(
-                    IO.FLOAT,
-                    MoonvalleyTextToVideoInferenceParams,
-                    "guidance_scale",
-                    default=7.0,
-                    step=1,
-                    min=1,
-                    max=20,
-                ),
-                "seed": model_field_to_node_input(
-                    IO.INT,
-                    MoonvalleyTextToVideoInferenceParams,
-                    "seed",
-                    default=random.randint(0, 2**32 - 1),
-                    min=0,
-                    max=4294967295,
-                    step=1,
-                    display="number",
-                    tooltip="Random seed value",
-                    control_after_generate=True,
-                ),
-                "steps": model_field_to_node_input(
-                    IO.INT,
-                    MoonvalleyTextToVideoInferenceParams,
-                    "steps",
-                    default=100,
-                    min=1,
-                    max=100,
-                ),
-            },
-            "hidden": {
-                "auth_token": "AUTH_TOKEN_COMFY_ORG",
-                "comfy_api_key": "API_KEY_COMFY_ORG",
-                "unique_id": "UNIQUE_ID",
-            },
-            "optional": {
-                "image": model_field_to_node_input(
-                    IO.IMAGE,
-                    MoonvalleyTextToVideoRequest,
-                    "image_url",
-                    tooltip="The reference image used to generate the video",
-                ),
-            },
-        }
-
-    RETURN_TYPES = ("STRING",)
-    FUNCTION = "generate"
-    CATEGORY = "api node/video/Moonvalley Marey"
-    API_NODE = True
-
-    def generate(self, **kwargs):
-        return None
-
-
-# --- MoonvalleyImg2VideoNode ---
-class MoonvalleyImg2VideoNode(BaseMoonvalleyVideoNode):
-
-    @classmethod
-    def INPUT_TYPES(cls):
-        return super().INPUT_TYPES()
-
-    RETURN_TYPES = ("VIDEO",)
-    RETURN_NAMES = ("video",)
-    DESCRIPTION = "Moonvalley Marey Image to Video Node"
-
-    def generate(
-        self, prompt, negative_prompt, unique_id: Optional[str] = None, **kwargs
-    ):
-        image = kwargs.get("image", None)
-        if image is None:
-            raise MoonvalleyApiError("image is required")
-
-        validate_input_image(image, True)
-        validate_prompts(prompt, negative_prompt, MOONVALLEY_MAREY_MAX_PROMPT_LENGTH)
-        width_height = self.parseWidthHeightFromRes(kwargs.get("resolution"))
-
-        inference_params = MoonvalleyTextToVideoInferenceParams(
-            negative_prompt=negative_prompt,
-            steps=kwargs.get("steps"),
-            seed=kwargs.get("seed"),
-            guidance_scale=kwargs.get("prompt_adherence"),
-            num_frames=128,
-            width=width_height.get("width"),
-            height=width_height.get("height"),
-            use_negative_prompts=True,
-        )
-        """Upload image to comfy backend to have a URL available for further processing"""
-        # Get MIME type from tensor - assuming PNG format for image tensors
-        mime_type = "image/png"
-
-        image_url = upload_images_to_comfyapi(
-            image, max_images=1, auth_kwargs=kwargs, mime_type=mime_type
-        )[0]
-
-        request = MoonvalleyTextToVideoRequest(
-            image_url=image_url, prompt_text=prompt, inference_params=inference_params
-        )
-        initial_operation = SynchronousOperation(
-            endpoint=ApiEndpoint(
-                path=API_IMG2VIDEO_ENDPOINT,
-                method=HttpMethod.POST,
-                request_model=MoonvalleyTextToVideoRequest,
-                response_model=MoonvalleyPromptResponse,
-            ),
-            request=request,
-            auth_kwargs=kwargs,
-        )
-        task_creation_response = initial_operation.execute()
-        validate_task_creation_response(task_creation_response)
-        task_id = task_creation_response.id
-
-        final_response = self.get_response(
-            task_id, auth_kwargs=kwargs, node_id=unique_id
-        )
-        video = download_url_to_video_output(final_response.output_url)
-        return (video,)
-
-
-# --- MoonvalleyVid2VidNode ---
-class MoonvalleyVideo2VideoNode(BaseMoonvalleyVideoNode):
-    def __init__(self):
-        super().__init__()
-
-    @classmethod
-    def INPUT_TYPES(cls):
-        return {
-            "required": {
-                "prompt": model_field_to_node_input(
-                    IO.STRING, MoonvalleyVideoToVideoRequest, "prompt_text",
-                    multiline=True
-                ),
-                "negative_prompt": model_field_to_node_input(
-                    IO.STRING,
-                    MoonvalleyVideoToVideoInferenceParams,
-                    "negative_prompt",
-                    multiline=True,
-                    default="low-poly, flat shader, bad rigging, stiff animation, uncanny eyes, low-quality textures, looping glitch, cheap effect, overbloom, bloom spam, default lighting, game asset, stiff face, ugly specular, AI artifacts"
-                ),
-                "seed": model_field_to_node_input(IO.INT,MoonvalleyVideoToVideoInferenceParams, "seed", default=random.randint(0, 2**32 - 1), min=0, max=4294967295, step=1, display="number", tooltip="Random seed value", control_after_generate=True),
-            },
-            "hidden": {
-                "auth_token": "AUTH_TOKEN_COMFY_ORG",
-                "comfy_api_key": "API_KEY_COMFY_ORG",
-                "unique_id": "UNIQUE_ID",
-            },
-            "optional": {
-                "video": (IO.VIDEO, {"default": "", "multiline": False, "tooltip": "The reference video used to generate the output video. Must be at least 5 seconds long. Videos longer than 5s will be automatically trimmed. Only MP4 format supported."}),
-                "control_type": (
-                    ["Motion Transfer", "Pose Transfer"],
-                    {"default": "Motion Transfer"},
-                ),
-                "motion_intensity": (
-                    "INT",
-                    {
-                        "default": 100,
-                        "step": 1,
-                        "min": 0,
-                        "max": 100,
-                        "tooltip": "Only used if control_type is 'Motion Transfer'",
-                    },
-                )
-            }
-        }
-
-    RETURN_TYPES = ("VIDEO",)
-    RETURN_NAMES = ("video",)
-
-    def generate(
-        self, prompt, negative_prompt, unique_id: Optional[str] = None, **kwargs
-    ):
-        video = kwargs.get("video")
-
-        if not video:
-            raise MoonvalleyApiError("video is required")
-
-        video_url = ""
-        if video:
-            validated_video = validate_video_to_video_input(video)
-            video_url = upload_video_to_comfyapi(validated_video, auth_kwargs=kwargs)
-
-        control_type = kwargs.get("control_type")
-        motion_intensity = kwargs.get("motion_intensity")
-
-        """Validate prompts and inference input"""
-        validate_prompts(prompt, negative_prompt)
-
-        # Only include motion_intensity for Motion Transfer
-        control_params = {}
-        if control_type == "Motion Transfer" and motion_intensity is not None:
-            control_params['motion_intensity'] = motion_intensity
-
-        inference_params=MoonvalleyVideoToVideoInferenceParams(
-            negative_prompt=negative_prompt,
-            seed=kwargs.get("seed"),
-            control_params=control_params
-        )
-
-        control = self.parseControlParameter(control_type)
-
-        request = MoonvalleyVideoToVideoRequest(
-            control_type=control,
-            video_url=video_url,
-            prompt_text=prompt,
-            inference_params=inference_params,
-        )
-
-        initial_operation = SynchronousOperation(
-            endpoint=ApiEndpoint(
-                path=API_VIDEO2VIDEO_ENDPOINT,
-                method=HttpMethod.POST,
-                request_model=MoonvalleyVideoToVideoRequest,
-                response_model=MoonvalleyPromptResponse,
-            ),
-            request=request,
-            auth_kwargs=kwargs,
-        )
-        task_creation_response = initial_operation.execute()
-        validate_task_creation_response(task_creation_response)
-        task_id = task_creation_response.id
-
-        final_response = self.get_response(
-            task_id, auth_kwargs=kwargs, node_id=unique_id
-        )
-
-        video = download_url_to_video_output(final_response.output_url)
-
-        return (video,)
-
-
-# --- MoonvalleyTxt2VideoNode ---
-class MoonvalleyTxt2VideoNode(BaseMoonvalleyVideoNode):
-    def __init__(self):
-        super().__init__()
-
-    RETURN_TYPES = ("VIDEO",)
-    RETURN_NAMES = ("video",)
-
-    @classmethod
-    def INPUT_TYPES(cls):
-        input_types = super().INPUT_TYPES()
-        # Remove image-specific parameters
-        for param in ["image"]:
-            if param in input_types["optional"]:
-                del input_types["optional"][param]
-        return input_types
-
-    def generate(
-        self, prompt, negative_prompt, unique_id: Optional[str] = None, **kwargs
-    ):
-        validate_prompts(prompt, negative_prompt, MOONVALLEY_MAREY_MAX_PROMPT_LENGTH)
-        width_height = self.parseWidthHeightFromRes(kwargs.get("resolution"))
-
-        inference_params=MoonvalleyTextToVideoInferenceParams(
-                    negative_prompt=negative_prompt,
-                    steps=kwargs.get("steps"),
-                    seed=kwargs.get("seed"),
-                    guidance_scale=kwargs.get("prompt_adherence"),
-                    num_frames=128,
-                    width=width_height.get("width"),
-                    height=width_height.get("height"),
-                )
-        request = MoonvalleyTextToVideoRequest(
-            prompt_text=prompt, inference_params=inference_params
-        )
-
-        initial_operation = SynchronousOperation(
-            endpoint=ApiEndpoint(
-                path=API_TXT2VIDEO_ENDPOINT,
-                method=HttpMethod.POST,
-                request_model=MoonvalleyTextToVideoRequest,
-                response_model=MoonvalleyPromptResponse,
-            ),
-            request=request,
-            auth_kwargs=kwargs,
-        )
-        task_creation_response = initial_operation.execute()
-        validate_task_creation_response(task_creation_response)
-        task_id = task_creation_response.id
-
-        final_response = self.get_response(
-            task_id, auth_kwargs=kwargs, node_id=unique_id
-        )
-
-        video = download_url_to_video_output(final_response.output_url)
-        return (video,)
-
-
-NODE_CLASS_MAPPINGS = {
-    "MoonvalleyImg2VideoNode": MoonvalleyImg2VideoNode,
-    "MoonvalleyTxt2VideoNode": MoonvalleyTxt2VideoNode,
-    "MoonvalleyVideo2VideoNode": MoonvalleyVideo2VideoNode,
-}
-
-
-NODE_DISPLAY_NAME_MAPPINGS = {
-    "MoonvalleyImg2VideoNode": "Moonvalley Marey Image to Video",
-    "MoonvalleyTxt2VideoNode": "Moonvalley Marey Text to Video",
-    "MoonvalleyVideo2VideoNode": "Moonvalley Marey Video to Video",
-}
--- a/comfy_config/config_parser.py
+++ b/comfy_config/config_parser.py
@@ -11,43 +11,6 @@ from comfy_config.types import (
    PyProjectSettings
 )

-def validate_and_extract_os_classifiers(classifiers: list) -> list:
-    os_classifiers = [c for c in classifiers if c.startswith("Operating System :: ")]
-    if not os_classifiers:
-        return []
-
-    os_values = [c[len("Operating System :: ") :] for c in os_classifiers]
-    valid_os_prefixes = {"Microsoft", "POSIX", "MacOS", "OS Independent"}
-
-    for os_value in os_values:
-        if not any(os_value.startswith(prefix) for prefix in valid_os_prefixes):
-            return []
-
-    return os_values
-
-
-def validate_and_extract_accelerator_classifiers(classifiers: list) -> list:
-    accelerator_classifiers = [c for c in classifiers if c.startswith("Environment ::")]
-    if not accelerator_classifiers:
-        return []
-
-    accelerator_values = [c[len("Environment :: ") :] for c in accelerator_classifiers]
-
-    valid_accelerators = {
-        "GPU :: NVIDIA CUDA",
-        "GPU :: AMD ROCm",
-        "GPU :: Intel Arc",
-        "NPU :: Huawei Ascend",
-        "GPU :: Apple Metal",
-    }
-
-    for accelerator_value in accelerator_values:
-        if accelerator_value not in valid_accelerators:
-            return []
-
-    return accelerator_values
-
-
 """
 Extract configuration from a custom node directory's pyproject.toml file or a Python file.

@@ -115,24 +78,6 @@ def extract_node_configuration(path) -> Optional[PyProjectConfig]:
    tool_data = raw_settings.tool
    comfy_data = tool_data.get("comfy", {}) if tool_data else {}

-    dependencies = project_data.get("dependencies", [])
-    supported_comfyui_frontend_version = ""
-    for dep in dependencies:
-        if isinstance(dep, str) and dep.startswith("comfyui-frontend-package"):
-            supported_comfyui_frontend_version = dep.removeprefix("comfyui-frontend-package")
-            break
-
-    supported_comfyui_version = comfy_data.get("requires-comfyui", "")
-
-    classifiers = project_data.get('classifiers', [])
-    supported_os = validate_and_extract_os_classifiers(classifiers)
-    supported_accelerators = validate_and_extract_accelerator_classifiers(classifiers)
-
-    project_data['supported_os'] = supported_os
-    project_data['supported_accelerators'] = supported_accelerators
-    project_data['supported_comfyui_frontend_version'] = supported_comfyui_frontend_version
-    project_data['supported_comfyui_version'] = supported_comfyui_version
-
    return PyProjectConfig(project=project_data, tool_comfy=comfy_data)


--- a/comfy_config/types.py
+++ b/comfy_config/types.py
@@ -51,7 +51,7 @@ class ComfyConfig(BaseModel):
    models: List[Model] = Field(default_factory=list, alias="Models")
    includes: List[str] = Field(default_factory=list)
    web: Optional[str] = None
-    banner_url: str = ""
+

 class License(BaseModel):
    file: str = ""
@@ -66,10 +66,6 @@ class ProjectConfig(BaseModel):
    dependencies: List[str] = Field(default_factory=list)
    license: License = Field(default_factory=License)
    urls: URLs = Field(default_factory=URLs)
-    supported_os: List[str] = Field(default_factory=list)
-    supported_accelerators: List[str] = Field(default_factory=list)
-    supported_comfyui_version: str = ""
-    supported_comfyui_frontend_version: str = ""

    @field_validator('license', mode='before')
    @classmethod
--- a/comfy_execution/graph.py
+++ b/comfy_execution/graph.py
@@ -3,7 +3,6 @@ from typing import Type, Literal

 import nodes
 import asyncio
-import inspect
 from comfy_execution.graph_utils import is_link
 from comfy.comfy_types.node_typing import ComfyNodeABC, InputTypeDict, InputTypeOptions

@@ -240,15 +239,8 @@ class ExecutionList(TopologicalSort):
                return True
            return False

-        # If an available node is async, do that first.
-        # This will execute the asynchronous function earlier, reducing the overall time.
-        def is_async(node_id):
-            class_type = self.dynprompt.get_node(node_id)["class_type"]
-            class_def = nodes.NODE_CLASS_MAPPINGS[class_type]
-            return inspect.iscoroutinefunction(getattr(class_def, class_def.FUNCTION))
-
        for node_id in node_list:
-            if is_output(node_id) or is_async(node_id):
+            if is_output(node_id):
                return node_id

        #This should handle the VAEDecode -> preview case
--- a/comfy_execution/progress.py
+++ b/comfy_execution/progress.py
@@ -4,12 +4,8 @@ from PIL import Image
 from enum import Enum
 from abc import ABC
 from tqdm import tqdm
-from typing import TYPE_CHECKING
-if TYPE_CHECKING:
-    from comfy_execution.graph import DynamicPrompt
+from comfy_execution.graph import DynamicPrompt
 from protocol import BinaryEventTypes
-from comfy_api import feature_flags
-

 class NodeState(Enum):
    Pending = "pending"
@@ -17,23 +13,19 @@ class NodeState(Enum):
    Finished = "finished"
    Error = "error"

-
 class NodeProgressState(TypedDict):
    """
    A class to represent the state of a node's progress.
    """
-
    state: NodeState
    value: float
    max: float

-
 class ProgressHandler(ABC):
    """
    Abstract base class for progress handlers.
    Progress handlers receive progress updates and display them in various ways.
    """
-
    def __init__(self, name: str):
        self.name = name
        self.enabled = True
@@ -45,15 +37,8 @@ class ProgressHandler(ABC):
        """Called when a node starts processing"""
        pass

-    def update_handler(
-        self,
-        node_id: str,
-        value: float,
-        max_value: float,
-        state: NodeProgressState,
-        prompt_id: str,
-        image: Optional[Image.Image] = None,
-    ):
+    def update_handler(self, node_id: str, value: float, max_value: float,
+                      state: NodeProgressState, prompt_id: str, image: Optional[Image.Image] = None):
        """Called when a node's progress is updated"""
        pass

@@ -73,12 +58,10 @@ class ProgressHandler(ABC):
        """Disable this handler"""
        self.enabled = False

-
 class CLIProgressHandler(ProgressHandler):
    """
    Handler that displays progress using tqdm progress bars in the CLI.
    """
-
    def __init__(self):
        super().__init__("cli")
        self.progress_bars: Dict[str, tqdm] = {}
@@ -92,19 +75,12 @@ class CLIProgressHandler(ProgressHandler):
                desc=f"Node {node_id}",
                unit="steps",
                leave=True,
-                position=len(self.progress_bars),
+                position=len(self.progress_bars)
            )

    @override
-    def update_handler(
-        self,
-        node_id: str,
-        value: float,
-        max_value: float,
-        state: NodeProgressState,
-        prompt_id: str,
-        image: Optional[Image.Image] = None,
-    ):
+    def update_handler(self, node_id: str, value: float, max_value: float,
+                      state: NodeProgressState, prompt_id: str, image: Optional[Image.Image] = None):
        # Handle case where start_handler wasn't called
        if node_id not in self.progress_bars:
            self.progress_bars[node_id] = tqdm(
@@ -112,7 +88,7 @@ class CLIProgressHandler(ProgressHandler):
                desc=f"Node {node_id}",
                unit="steps",
                leave=True,
-                position=len(self.progress_bars),
+                position=len(self.progress_bars)
            )
            self.progress_bars[node_id].update(value)
        else:
@@ -143,12 +119,10 @@ class CLIProgressHandler(ProgressHandler):
            bar.close()
        self.progress_bars.clear()

-
 class WebUIProgressHandler(ProgressHandler):
    """
    Handler that sends progress updates to the WebUI via WebSockets.
    """
-
    def __init__(self, server_instance):
        super().__init__("webui")
        self.server_instance = server_instance
@@ -171,16 +145,17 @@ class WebUIProgressHandler(ProgressHandler):
                "prompt_id": prompt_id,
                "display_node_id": self.registry.dynprompt.get_display_node_id(node_id),
                "parent_node_id": self.registry.dynprompt.get_parent_node_id(node_id),
-                "real_node_id": self.registry.dynprompt.get_real_node_id(node_id),
+                "real_node_id": self.registry.dynprompt.get_real_node_id(node_id)
            }
            for node_id, state in nodes.items()
            if state["state"] != NodeState.Pending
        }

        # Send a combined progress_state message with all node states
-        self.server_instance.send_sync(
-            "progress_state", {"prompt_id": prompt_id, "nodes": active_nodes}
-        )
+        self.server_instance.send_sync("progress_state", {
+            "prompt_id": prompt_id,
+            "nodes": active_nodes
+        })

    @override
    def start_handler(self, node_id: str, state: NodeProgressState, prompt_id: str):
@@ -189,41 +164,21 @@ class WebUIProgressHandler(ProgressHandler):
            self._send_progress_state(prompt_id, self.registry.nodes)

    @override
-    def update_handler(
-        self,
-        node_id: str,
-        value: float,
-        max_value: float,
-        state: NodeProgressState,
-        prompt_id: str,
-        image: Optional[Image.Image] = None,
-    ):
+    def update_handler(self, node_id: str, value: float, max_value: float,
+                      state: NodeProgressState, prompt_id: str, image: Optional[Image.Image] = None):
        # Send progress state of all nodes
        if self.registry:
            self._send_progress_state(prompt_id, self.registry.nodes)
        if image:
-            # Only send new format if client supports it
-            if feature_flags.supports_feature(
-                self.server_instance.sockets_metadata,
-                self.server_instance.client_id,
-                "supports_preview_metadata",
-            ):
-                metadata = {
-                    "node_id": node_id,
-                    "prompt_id": prompt_id,
-                    "display_node_id": self.registry.dynprompt.get_display_node_id(
-                        node_id
-                    ),
-                    "parent_node_id": self.registry.dynprompt.get_parent_node_id(
-                        node_id
-                    ),
-                    "real_node_id": self.registry.dynprompt.get_real_node_id(node_id),
-                }
-                self.server_instance.send_sync(
-                    BinaryEventTypes.PREVIEW_IMAGE_WITH_METADATA,
-                    (image, metadata),
-                    self.server_instance.client_id,
-                )
+            metadata = {
+                "node_id": node_id,
+                "prompt_id": prompt_id,
+                "display_node_id": self.registry.dynprompt.get_display_node_id(node_id),
+                "parent_node_id": self.registry.dynprompt.get_parent_node_id(node_id),
+                "real_node_id": self.registry.dynprompt.get_real_node_id(node_id)
+            }
+            self.server_instance.send_sync(BinaryEventTypes.PREVIEW_IMAGE_WITH_METADATA, (image, metadata), self.server_instance.client_id)
+

    @override
    def finish_handler(self, node_id: str, state: NodeProgressState, prompt_id: str):
@@ -231,13 +186,11 @@ class WebUIProgressHandler(ProgressHandler):
        if self.registry:
            self._send_progress_state(prompt_id, self.registry.nodes)

-
 class ProgressRegistry:
    """
    Registry that maintains node progress state and notifies registered handlers.
    """
-
-    def __init__(self, prompt_id: str, dynprompt: "DynamicPrompt"):
+    def __init__(self, prompt_id: str, dynprompt: DynamicPrompt):
        self.prompt_id = prompt_id
        self.dynprompt = dynprompt
        self.nodes: Dict[str, NodeProgressState] = {}
@@ -268,7 +221,9 @@ class ProgressRegistry:
        """Ensure a node entry exists"""
        if node_id not in self.nodes:
            self.nodes[node_id] = NodeProgressState(
-                state=NodeState.Pending, value=0, max=1
+                state = NodeState.Pending,
+                value = 0,
+                max = 1
            )
        return self.nodes[node_id]

@@ -284,9 +239,7 @@ class ProgressRegistry:
            if handler.enabled:
                handler.start_handler(node_id, entry, self.prompt_id)

-    def update_progress(
-        self, node_id: str, value: float, max_value: float, image: Optional[Image.Image]
-    ) -> None:
+    def update_progress(self, node_id: str, value: float, max_value: float, image: Optional[Image.Image]) -> None:
        """Update progress for a node"""
        entry = self.ensure_entry(node_id)
        entry["state"] = NodeState.Running
@@ -296,9 +249,7 @@ class ProgressRegistry:
        # Notify all enabled handlers
        for handler in self.handlers.values():
            if handler.enabled:
-                handler.update_handler(
-                    node_id, value, max_value, entry, self.prompt_id, image
-                )
+                handler.update_handler(node_id, value, max_value, entry, self.prompt_id, image)

    def finish_progress(self, node_id: str) -> None:
        """Finish progress tracking for a node"""
@@ -317,9 +268,9 @@ class ProgressRegistry:
            handler.reset()

 # Global registry instance
-global_progress_registry: ProgressRegistry = None
+global_progress_registry: ProgressRegistry = ProgressRegistry(prompt_id="", dynprompt=DynamicPrompt({}))

-def reset_progress_state(prompt_id: str, dynprompt: "DynamicPrompt") -> None:
+def reset_progress_state(prompt_id: str, dynprompt: DynamicPrompt) -> None:
    global global_progress_registry

    # Reset existing handlers if registry exists
@@ -329,19 +280,9 @@ def reset_progress_state(prompt_id: str, dynprompt: "DynamicPrompt") -> None:
    # Create new registry
    global_progress_registry = ProgressRegistry(prompt_id, dynprompt)

-
 def add_progress_handler(handler: ProgressHandler) -> None:
-    registry = get_progress_state()
-    handler.set_registry(registry)
-    registry.register_handler(handler)
-
+    handler.set_registry(global_progress_registry)
+    global_progress_registry.register_handler(handler)

 def get_progress_state() -> ProgressRegistry:
-    global global_progress_registry
-    if global_progress_registry is None:
-        from comfy_execution.graph import DynamicPrompt
-
-        global_progress_registry = ProgressRegistry(
-            prompt_id="", dynprompt=DynamicPrompt({})
-        )
    return global_progress_registry
--- a/comfy_extras/nodes_audio.py
+++ b/comfy_extras/nodes_audio.py
@@ -133,6 +133,14 @@ def save_audio(self, audio, filename_prefix="ComfyUI", format="flac", prompt=Non
            if sample_rate != audio["sample_rate"]:
                waveform = torchaudio.functional.resample(waveform, audio["sample_rate"], sample_rate)

+        # Create in-memory WAV buffer
+        wav_buffer = io.BytesIO()
+        torchaudio.save(wav_buffer, waveform, sample_rate, format="WAV")
+        wav_buffer.seek(0)  # Rewind for reading
+
+        # Use PyAV to convert and add metadata
+        input_container = av.open(wav_buffer)
+
        # Create output with specified format
        output_buffer = io.BytesIO()
        output_container = av.open(output_buffer, mode='w', format=format)
@@ -142,6 +150,7 @@ def save_audio(self, audio, filename_prefix="ComfyUI", format="flac", prompt=Non
            output_container.metadata[key] = value

        # Set up the output stream with appropriate properties
+        input_container.streams.audio[0]
        if format == "opus":
            out_stream = output_container.add_stream("libopus", rate=sample_rate)
            if quality == "64k":
@@ -166,16 +175,18 @@ def save_audio(self, audio, filename_prefix="ComfyUI", format="flac", prompt=Non
        else: #format == "flac":
            out_stream = output_container.add_stream("flac", rate=sample_rate)

-        frame = av.AudioFrame.from_ndarray(waveform.movedim(0, 1).reshape(1, -1).float().numpy(), format='flt', layout='mono' if waveform.shape[0] == 1 else 'stereo')
-        frame.sample_rate = sample_rate
-        frame.pts = 0
-        output_container.mux(out_stream.encode(frame))
+
+        # Copy frames from input to output
+        for frame in input_container.decode(audio=0):
+            frame.pts = None  # Let PyAV handle timestamps
+            output_container.mux(out_stream.encode(frame))

        # Flush encoder
        output_container.mux(out_stream.encode(None))

        # Close containers
        output_container.close()
+        input_container.close()

        # Write the output to file
        output_buffer.seek(0)
@@ -278,42 +289,6 @@ class PreviewAudio(SaveAudio):
                "hidden": {"prompt": "PROMPT", "extra_pnginfo": "EXTRA_PNGINFO"},
                }

-def f32_pcm(wav: torch.Tensor) -> torch.Tensor:
-    """Convert audio to float 32 bits PCM format."""
-    if wav.dtype.is_floating_point:
-        return wav
-    elif wav.dtype == torch.int16:
-        return wav.float() / (2 ** 15)
-    elif wav.dtype == torch.int32:
-        return wav.float() / (2 ** 31)
-    raise ValueError(f"Unsupported wav dtype: {wav.dtype}")
-
-def load(filepath: str) -> tuple[torch.Tensor, int]:
-    with av.open(filepath) as af:
-        if not af.streams.audio:
-            raise ValueError("No audio stream found in the file.")
-
-        stream = af.streams.audio[0]
-        sr = stream.codec_context.sample_rate
-        n_channels = stream.channels
-
-        frames = []
-        length = 0
-        for frame in af.decode(streams=stream.index):
-            buf = torch.from_numpy(frame.to_ndarray())
-            if buf.shape[0] != n_channels:
-                buf = buf.view(-1, n_channels).t()
-
-            frames.append(buf)
-            length += buf.shape[1]
-
-        if not frames:
-            raise ValueError("No audio frames decoded.")
-
-        wav = torch.cat(frames, dim=1)
-        wav = f32_pcm(wav)
-        return wav, sr
-
 class LoadAudio:
    @classmethod
    def INPUT_TYPES(s):
@@ -328,7 +303,7 @@ class LoadAudio:

    def load(self, audio):
        audio_path = folder_paths.get_annotated_filepath(audio)
-        waveform, sample_rate = load(audio_path)
+        waveform, sample_rate = torchaudio.load(audio_path)
        audio = {"waveform": waveform.unsqueeze(0), "sample_rate": sample_rate}
        return (audio, )

--- a/comfy_extras/nodes_cfg.py
+++ b/comfy_extras/nodes_cfg.py
@@ -40,33 +40,6 @@ class CFGZeroStar:
        m.set_model_sampler_post_cfg_function(cfg_zero_star)
        return (m, )

-class CFGNorm:
-    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"model": ("MODEL",),
-                             "strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step": 0.01}),
-                            }}
-    RETURN_TYPES = ("MODEL",)
-    RETURN_NAMES = ("patched_model",)
-    FUNCTION = "patch"
-    CATEGORY = "advanced/guidance"
-    EXPERIMENTAL = True
-
-    def patch(self, model, strength):
-        m = model.clone()
-        def cfg_norm(args):
-            cond_p = args['cond_denoised']
-            pred_text_ = args["denoised"]
-
-            norm_full_cond = torch.norm(cond_p, dim=1, keepdim=True)
-            norm_pred_text = torch.norm(pred_text_, dim=1, keepdim=True)
-            scale = (norm_full_cond / (norm_pred_text + 1e-8)).clamp(min=0.0, max=1.0)
-            return pred_text_ * scale * strength
-
-        m.set_model_sampler_post_cfg_function(cfg_norm)
-        return (m, )
-
 NODE_CLASS_MAPPINGS = {
-    "CFGZeroStar": CFGZeroStar,
-    "CFGNorm": CFGNorm,
+    "CFGZeroStar": CFGZeroStar
 }
--- a/comfy_extras/nodes_cosmos.py
+++ b/comfy_extras/nodes_cosmos.py
@@ -2,7 +2,6 @@ import nodes
 import torch
 import comfy.model_management
 import comfy.utils
-import comfy.latent_formats


 class EmptyCosmosLatentVideo:
@@ -76,53 +75,8 @@ class CosmosImageToVideoLatent:
        out_latent["noise_mask"] = mask.repeat((batch_size, ) + (1,) * (mask.ndim - 1))
        return (out_latent,)

-class CosmosPredict2ImageToVideoLatent:
-    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"vae": ("VAE", ),
-                             "width": ("INT", {"default": 848, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}),
-                             "height": ("INT", {"default": 480, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}),
-                             "length": ("INT", {"default": 93, "min": 1, "max": nodes.MAX_RESOLUTION, "step": 4}),
-                             "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}),
-                },
-                "optional": {"start_image": ("IMAGE", ),
-                             "end_image": ("IMAGE", ),
-                }}
-
-
-    RETURN_TYPES = ("LATENT",)
-    FUNCTION = "encode"
-
-    CATEGORY = "conditioning/inpaint"
-
-    def encode(self, vae, width, height, length, batch_size, start_image=None, end_image=None):
-        latent = torch.zeros([1, 16, ((length - 1) // 4) + 1, height // 8, width // 8], device=comfy.model_management.intermediate_device())
-        if start_image is None and end_image is None:
-            out_latent = {}
-            out_latent["samples"] = latent
-            return (out_latent,)
-
-        mask = torch.ones([latent.shape[0], 1, ((length - 1) // 4) + 1, latent.shape[-2], latent.shape[-1]], device=comfy.model_management.intermediate_device())
-
-        if start_image is not None:
-            latent_temp = vae_encode_with_padding(vae, start_image, width, height, length, padding=1)
-            latent[:, :, :latent_temp.shape[-3]] = latent_temp
-            mask[:, :, :latent_temp.shape[-3]] *= 0.0
-
-        if end_image is not None:
-            latent_temp = vae_encode_with_padding(vae, end_image, width, height, length, padding=0)
-            latent[:, :, -latent_temp.shape[-3]:] = latent_temp
-            mask[:, :, -latent_temp.shape[-3]:] *= 0.0
-
-        out_latent = {}
-        latent_format = comfy.latent_formats.Wan21()
-        latent = latent_format.process_out(latent) * mask + latent * (1.0 - mask)
-        out_latent["samples"] = latent.repeat((batch_size, ) + (1,) * (latent.ndim - 1))
-        out_latent["noise_mask"] = mask.repeat((batch_size, ) + (1,) * (mask.ndim - 1))
-        return (out_latent,)

 NODE_CLASS_MAPPINGS = {
    "EmptyCosmosLatentVideo": EmptyCosmosLatentVideo,
    "CosmosImageToVideoLatent": CosmosImageToVideoLatent,
-    "CosmosPredict2ImageToVideoLatent": CosmosPredict2ImageToVideoLatent,
 }
--- a/comfy_extras/nodes_custom_sampler.py
+++ b/comfy_extras/nodes_custom_sampler.py
@@ -2,8 +2,6 @@ import math
 import comfy.samplers
 import comfy.sample
 from comfy.k_diffusion import sampling as k_diffusion_sampling
-from comfy.k_diffusion import sa_solver
-from comfy.comfy_types import IO, ComfyNodeABC, InputTypeDict
 import latent_preview
 import torch
 import comfy.utils
@@ -301,35 +299,6 @@ class ExtendIntermediateSigmas:

        return (extended_sigmas,)

-
-class SamplingPercentToSigma:
-    @classmethod
-    def INPUT_TYPES(cls) -> InputTypeDict:
-        return {
-            "required": {
-                "model": (IO.MODEL, {}),
-                "sampling_percent": (IO.FLOAT, {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.0001}),
-                "return_actual_sigma": (IO.BOOLEAN, {"default": False, "tooltip": "Return the actual sigma value instead of the value used for interval checks.\nThis only affects results at 0.0 and 1.0."}),
-            }
-        }
-
-    RETURN_TYPES = (IO.FLOAT,)
-    RETURN_NAMES = ("sigma_value",)
-    CATEGORY = "sampling/custom_sampling/sigmas"
-
-    FUNCTION = "get_sigma"
-
-    def get_sigma(self, model, sampling_percent, return_actual_sigma):
-        model_sampling = model.get_model_object("model_sampling")
-        sigma_val = model_sampling.percent_to_sigma(sampling_percent)
-        if return_actual_sigma:
-            if sampling_percent == 0.0:
-                sigma_val = model_sampling.sigma_max.item()
-            elif sampling_percent == 1.0:
-                sigma_val = model_sampling.sigma_min.item()
-        return (sigma_val,)
-
-
 class KSamplerSelect:
    @classmethod
    def INPUT_TYPES(s):
@@ -511,89 +480,6 @@ class SamplerDPMAdaptative:
                                                              "s_noise":s_noise })
        return (sampler, )

-
-class SamplerER_SDE(ComfyNodeABC):
-    @classmethod
-    def INPUT_TYPES(cls) -> InputTypeDict:
-        return {
-            "required": {
-                "solver_type": (IO.COMBO, {"options": ["ER-SDE", "Reverse-time SDE", "ODE"]}),
-                "max_stage": (IO.INT, {"default": 3, "min": 1, "max": 3}),
-                "eta": (
-                    IO.FLOAT,
-                    {"default": 1.0, "min": 0.0, "max": 100.0, "step": 0.01, "round": False, "tooltip": "Stochastic strength of reverse-time SDE.\nWhen eta=0, it reduces to deterministic ODE. This setting doesn't apply to ER-SDE solver type."},
-                ),
-                "s_noise": (IO.FLOAT, {"default": 1.0, "min": 0.0, "max": 100.0, "step": 0.01, "round": False}),
-            }
-        }
-
-    RETURN_TYPES = (IO.SAMPLER,)
-    CATEGORY = "sampling/custom_sampling/samplers"
-
-    FUNCTION = "get_sampler"
-
-    def get_sampler(self, solver_type, max_stage, eta, s_noise):
-        if solver_type == "ODE" or (solver_type == "Reverse-time SDE" and eta == 0):
-            eta = 0
-            s_noise = 0
-
-        def reverse_time_sde_noise_scaler(x):
-            return x ** (eta + 1)
-
-        if solver_type == "ER-SDE":
-            # Use the default one in sample_er_sde()
-            noise_scaler = None
-        else:
-            noise_scaler = reverse_time_sde_noise_scaler
-
-        sampler_name = "er_sde"
-        sampler = comfy.samplers.ksampler(sampler_name, {"s_noise": s_noise, "noise_scaler": noise_scaler, "max_stage": max_stage})
-        return (sampler,)
-
-
-class SamplerSASolver(ComfyNodeABC):
-    @classmethod
-    def INPUT_TYPES(cls) -> InputTypeDict:
-        return {
-            "required": {
-                "model": (IO.MODEL, {}),
-                "eta": (IO.FLOAT, {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01, "round": False},),
-                "sde_start_percent": (IO.FLOAT, {"default": 0.2, "min": 0.0, "max": 1.0, "step": 0.001},),
-                "sde_end_percent": (IO.FLOAT, {"default": 0.8, "min": 0.0, "max": 1.0, "step": 0.001},),
-                "s_noise": (IO.FLOAT, {"default": 1.0, "min": 0.0, "max": 100.0, "step": 0.01, "round": False},),
-                "predictor_order": (IO.INT, {"default": 3, "min": 1, "max": 6}),
-                "corrector_order": (IO.INT, {"default": 4, "min": 0, "max": 6}),
-                "use_pece": (IO.BOOLEAN, {}),
-                "simple_order_2": (IO.BOOLEAN, {}),
-            }
-        }
-
-    RETURN_TYPES = (IO.SAMPLER,)
-    CATEGORY = "sampling/custom_sampling/samplers"
-
-    FUNCTION = "get_sampler"
-
-    def get_sampler(self, model, eta, sde_start_percent, sde_end_percent, s_noise, predictor_order, corrector_order, use_pece, simple_order_2):
-        model_sampling = model.get_model_object("model_sampling")
-        start_sigma = model_sampling.percent_to_sigma(sde_start_percent)
-        end_sigma = model_sampling.percent_to_sigma(sde_end_percent)
-        tau_func = sa_solver.get_tau_interval_func(start_sigma, end_sigma, eta=eta)
-
-        sampler_name = "sa_solver"
-        sampler = comfy.samplers.ksampler(
-            sampler_name,
-            {
-                "tau_func": tau_func,
-                "s_noise": s_noise,
-                "predictor_order": predictor_order,
-                "corrector_order": corrector_order,
-                "use_pece": use_pece,
-                "simple_order_2": simple_order_2,
-            },
-        )
-        return (sampler,)
-
-
 class Noise_EmptyNoise:
    def __init__(self):
        self.seed = 0
@@ -712,10 +598,9 @@ class CFGGuider:
        return (guider,)

 class Guider_DualCFG(comfy.samplers.CFGGuider):
-    def set_cfg(self, cfg1, cfg2, nested=False):
+    def set_cfg(self, cfg1, cfg2):
        self.cfg1 = cfg1
        self.cfg2 = cfg2
-        self.nested = nested

    def set_conds(self, positive, middle, negative):
        middle = node_helpers.conditioning_set_values(middle, {"prompt_type": "negative"})
@@ -724,21 +609,9 @@ class Guider_DualCFG(comfy.samplers.CFGGuider):
    def predict_noise(self, x, timestep, model_options={}, seed=None):
        negative_cond = self.conds.get("negative", None)
        middle_cond = self.conds.get("middle", None)
-        positive_cond = self.conds.get("positive", None)

-        if self.nested:
-            out = comfy.samplers.calc_cond_batch(self.inner_model, [negative_cond, middle_cond, positive_cond], x, timestep, model_options)
-            pred_text = comfy.samplers.cfg_function(self.inner_model, out[2], out[1], self.cfg1, x, timestep, model_options=model_options, cond=positive_cond, uncond=middle_cond)
-            return out[0] + self.cfg2 * (pred_text - out[0])
-        else:
-            if model_options.get("disable_cfg1_optimization", False) == False:
-                if math.isclose(self.cfg2, 1.0):
-                    negative_cond = None
-                    if math.isclose(self.cfg1, 1.0):
-                        middle_cond = None
-
-            out = comfy.samplers.calc_cond_batch(self.inner_model, [negative_cond, middle_cond, positive_cond], x, timestep, model_options)
-            return comfy.samplers.cfg_function(self.inner_model, out[1], out[0], self.cfg2, x, timestep, model_options=model_options, cond=middle_cond, uncond=negative_cond) + (out[2] - out[1]) * self.cfg1
+        out = comfy.samplers.calc_cond_batch(self.inner_model, [negative_cond, middle_cond, self.conds.get("positive", None)], x, timestep, model_options)
+        return comfy.samplers.cfg_function(self.inner_model, out[1], out[0], self.cfg2, x, timestep, model_options=model_options, cond=middle_cond, uncond=negative_cond) + (out[2] - out[1]) * self.cfg1

 class DualCFGGuider:
    @classmethod
@@ -750,7 +623,6 @@ class DualCFGGuider:
                    "negative": ("CONDITIONING", ),
                    "cfg_conds": ("FLOAT", {"default": 8.0, "min": 0.0, "max": 100.0, "step":0.1, "round": 0.01}),
                    "cfg_cond2_negative": ("FLOAT", {"default": 8.0, "min": 0.0, "max": 100.0, "step":0.1, "round": 0.01}),
-                    "style": (["regular", "nested"],),
                     }
                }

@@ -759,10 +631,10 @@ class DualCFGGuider:
    FUNCTION = "get_guider"
    CATEGORY = "sampling/custom_sampling/guiders"

-    def get_guider(self, model, cond1, cond2, negative, cfg_conds, cfg_cond2_negative, style):
+    def get_guider(self, model, cond1, cond2, negative, cfg_conds, cfg_cond2_negative):
        guider = Guider_DualCFG(model)
        guider.set_conds(cond1, cond2, negative)
-        guider.set_cfg(cfg_conds, cfg_cond2_negative, nested=(style == "nested"))
+        guider.set_cfg(cfg_conds, cfg_cond2_negative)
        return (guider,)

 class DisableNoise:
@@ -909,14 +781,11 @@ NODE_CLASS_MAPPINGS = {
    "SamplerDPMPP_SDE": SamplerDPMPP_SDE,
    "SamplerDPMPP_2S_Ancestral": SamplerDPMPP_2S_Ancestral,
    "SamplerDPMAdaptative": SamplerDPMAdaptative,
-    "SamplerER_SDE": SamplerER_SDE,
-    "SamplerSASolver": SamplerSASolver,
    "SplitSigmas": SplitSigmas,
    "SplitSigmasDenoise": SplitSigmasDenoise,
    "FlipSigmas": FlipSigmas,
    "SetFirstSigma": SetFirstSigma,
    "ExtendIntermediateSigmas": ExtendIntermediateSigmas,
-    "SamplingPercentToSigma": SamplingPercentToSigma,

    "CFGGuider": CFGGuider,
    "DualCFGGuider": DualCFGGuider,
--- a/comfy_extras/nodes_edit_model.py
+++ b/comfy_extras/nodes_edit_model.py
@@ -1,26 +0,0 @@
-import node_helpers
-
-
-class ReferenceLatent:
-    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"conditioning": ("CONDITIONING", ),
-                            },
-                "optional": {"latent": ("LATENT", ),}
-               }
-
-    RETURN_TYPES = ("CONDITIONING",)
-    FUNCTION = "append"
-
-    CATEGORY = "advanced/conditioning/edit_models"
-    DESCRIPTION = "This node sets the guiding latent for an edit model. If the model supports it you can chain multiple to set multiple reference images."
-
-    def append(self, conditioning, latent=None):
-        if latent is not None:
-            conditioning = node_helpers.conditioning_set_values(conditioning, {"reference_latents": [latent["samples"]]}, append=True)
-        return (conditioning, )
-
-
-NODE_CLASS_MAPPINGS = {
-    "ReferenceLatent": ReferenceLatent,
-}
--- a/comfy_extras/nodes_flux.py
+++ b/comfy_extras/nodes_flux.py
@@ -1,5 +1,4 @@
 import node_helpers
-import comfy.utils

 class CLIPTextEncodeFlux:
    @classmethod
@@ -57,52 +56,8 @@ class FluxDisableGuidance:
        return (c, )


-PREFERED_KONTEXT_RESOLUTIONS = [
-    (672, 1568),
-    (688, 1504),
-    (720, 1456),
-    (752, 1392),
-    (800, 1328),
-    (832, 1248),
-    (880, 1184),
-    (944, 1104),
-    (1024, 1024),
-    (1104, 944),
-    (1184, 880),
-    (1248, 832),
-    (1328, 800),
-    (1392, 752),
-    (1456, 720),
-    (1504, 688),
-    (1568, 672),
-]
-
-
-class FluxKontextImageScale:
-    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"image": ("IMAGE", ),
-                            },
-               }
-
-    RETURN_TYPES = ("IMAGE",)
-    FUNCTION = "scale"
-
-    CATEGORY = "advanced/conditioning/flux"
-    DESCRIPTION = "This node resizes the image to one that is more optimal for flux kontext."
-
-    def scale(self, image):
-        width = image.shape[2]
-        height = image.shape[1]
-        aspect_ratio = width / height
-        _, width, height = min((abs(aspect_ratio - w / h), w, h) for w, h in PREFERED_KONTEXT_RESOLUTIONS)
-        image = comfy.utils.common_upscale(image.movedim(-1, 1), width, height, "lanczos", "center").movedim(1, -1)
-        return (image, )
-
-
 NODE_CLASS_MAPPINGS = {
    "CLIPTextEncodeFlux": CLIPTextEncodeFlux,
    "FluxGuidance": FluxGuidance,
    "FluxDisableGuidance": FluxDisableGuidance,
-    "FluxKontextImageScale": FluxKontextImageScale,
 }
--- a/comfy_extras/nodes_fresca.py
+++ b/comfy_extras/nodes_fresca.py
@@ -71,11 +71,8 @@ class FreSca:
    DESCRIPTION = "Applies frequency-dependent scaling to the guidance"
    def patch(self, model, scale_low, scale_high, freq_cutoff):
        def custom_cfg_function(args):
-            conds_out = args["conds_out"]
-            if len(conds_out) <= 1 or None in args["conds"][:2]:
-                return conds_out
-            cond = conds_out[0]
-            uncond = conds_out[1]
+            cond = args["conds_out"][0]
+            uncond = args["conds_out"][1]

            guidance = cond - uncond
            filtered_guidance = Fourier_filter(
@@ -86,7 +83,7 @@ class FreSca:
            )
            filtered_cond = filtered_guidance + uncond

-            return [filtered_cond, uncond] + conds_out[2:]
+            return [filtered_cond, uncond]

        m = model.clone()
        m.set_model_sampler_pre_cfg_function(custom_cfg_function)
--- a/comfy_extras/nodes_images.py
+++ b/comfy_extras/nodes_images.py
@@ -304,23 +304,10 @@ Optional spacing can be added between images.
                image2.movedim(-1, 1), target_w, target_h, "lanczos", "disabled"
            ).movedim(1, -1)

-        color_map = {
-            "white": 1.0,
-            "black": 0.0,
-            "red": (1.0, 0.0, 0.0),
-            "green": (0.0, 1.0, 0.0),
-            "blue": (0.0, 0.0, 1.0),
-        }
-
-        color_val = color_map[spacing_color]
-
        # When not matching sizes, pad to align non-concat dimensions
        if not match_image_size:
            h1, w1 = image1.shape[1:3]
            h2, w2 = image2.shape[1:3]
-            pad_value = 0.0
-            if not isinstance(color_val, tuple):
-                pad_value = color_val

            if direction in ["left", "right"]:
                # For horizontal concat, pad heights to match
@@ -329,11 +316,11 @@ Optional spacing can be added between images.
                    if h1 < target_h:
                        pad_h = target_h - h1
                        pad_top, pad_bottom = pad_h // 2, pad_h - pad_h // 2
-                        image1 = torch.nn.functional.pad(image1, (0, 0, 0, 0, pad_top, pad_bottom), mode='constant', value=pad_value)
+                        image1 = torch.nn.functional.pad(image1, (0, 0, 0, 0, pad_top, pad_bottom), mode='constant', value=0.0)
                    if h2 < target_h:
                        pad_h = target_h - h2
                        pad_top, pad_bottom = pad_h // 2, pad_h - pad_h // 2
-                        image2 = torch.nn.functional.pad(image2, (0, 0, 0, 0, pad_top, pad_bottom), mode='constant', value=pad_value)
+                        image2 = torch.nn.functional.pad(image2, (0, 0, 0, 0, pad_top, pad_bottom), mode='constant', value=0.0)
            else:  # up, down
                # For vertical concat, pad widths to match
                if w1 != w2:
@@ -341,11 +328,11 @@ Optional spacing can be added between images.
                    if w1 < target_w:
                        pad_w = target_w - w1
                        pad_left, pad_right = pad_w // 2, pad_w - pad_w // 2
-                        image1 = torch.nn.functional.pad(image1, (0, 0, pad_left, pad_right), mode='constant', value=pad_value)
+                        image1 = torch.nn.functional.pad(image1, (0, 0, pad_left, pad_right), mode='constant', value=0.0)
                    if w2 < target_w:
                        pad_w = target_w - w2
                        pad_left, pad_right = pad_w // 2, pad_w - pad_w // 2
-                        image2 = torch.nn.functional.pad(image2, (0, 0, pad_left, pad_right), mode='constant', value=pad_value)
+                        image2 = torch.nn.functional.pad(image2, (0, 0, pad_left, pad_right), mode='constant', value=0.0)

        # Ensure same number of channels
        if image1.shape[-1] != image2.shape[-1]:
@@ -379,6 +366,15 @@ Optional spacing can be added between images.
        if spacing_width > 0:
            spacing_width = spacing_width + (spacing_width % 2)  # Ensure even

+            color_map = {
+                "white": 1.0,
+                "black": 0.0,
+                "red": (1.0, 0.0, 0.0),
+                "green": (0.0, 1.0, 0.0),
+                "blue": (0.0, 0.0, 1.0),
+            }
+            color_val = color_map[spacing_color]
+
            if direction in ["left", "right"]:
                spacing_shape = (
                    image1.shape[0],
@@ -414,62 +410,6 @@ Optional spacing can be added between images.
        concat_dim = 2 if direction in ["left", "right"] else 1
        return (torch.cat(images, dim=concat_dim),)

-class ResizeAndPadImage:
-    @classmethod
-    def INPUT_TYPES(cls):
-        return {
-            "required": {
-                "image": ("IMAGE",),
-                "target_width": ("INT", {
-                    "default": 512,
-                    "min": 1,
-                    "max": MAX_RESOLUTION,
-                    "step": 1
-                }),
-                "target_height": ("INT", {
-                    "default": 512,
-                    "min": 1,
-                    "max": MAX_RESOLUTION,
-                    "step": 1
-                }),
-                "padding_color": (["white", "black"],),
-                "interpolation": (["area", "bicubic", "nearest-exact", "bilinear", "lanczos"],),
-            }
-        }
-
-    RETURN_TYPES = ("IMAGE",)
-    FUNCTION = "resize_and_pad"
-    CATEGORY = "image/transform"
-
-    def resize_and_pad(self, image, target_width, target_height, padding_color, interpolation):
-        batch_size, orig_height, orig_width, channels = image.shape
-
-        scale_w = target_width / orig_width
-        scale_h = target_height / orig_height
-        scale = min(scale_w, scale_h)
-
-        new_width = int(orig_width * scale)
-        new_height = int(orig_height * scale)
-
-        image_permuted = image.permute(0, 3, 1, 2)
-
-        resized = comfy.utils.common_upscale(image_permuted, new_width, new_height, interpolation, "disabled")
-
-        pad_value = 0.0 if padding_color == "black" else 1.0
-        padded = torch.full(
-            (batch_size, channels, target_height, target_width),
-            pad_value,
-            dtype=image.dtype,
-            device=image.device
-        )
-
-        y_offset = (target_height - new_height) // 2
-        x_offset = (target_width - new_width) // 2
-
-        padded[:, :, y_offset:y_offset + new_height, x_offset:x_offset + new_width] = resized
-
-        output = padded.permute(0, 2, 3, 1)
-        return (output,)

 class SaveSVGNode:
    """
@@ -583,49 +523,6 @@ class GetImageSize:

        return width, height, batch_size

-class ImageRotate:
-    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": { "image": (IO.IMAGE,),
-                              "rotation": (["none", "90 degrees", "180 degrees", "270 degrees"],),
-                              }}
-    RETURN_TYPES = (IO.IMAGE,)
-    FUNCTION = "rotate"
-
-    CATEGORY = "image/transform"
-
-    def rotate(self, image, rotation):
-        rotate_by = 0
-        if rotation.startswith("90"):
-            rotate_by = 1
-        elif rotation.startswith("180"):
-            rotate_by = 2
-        elif rotation.startswith("270"):
-            rotate_by = 3
-
-        image = torch.rot90(image, k=rotate_by, dims=[2, 1])
-        return (image,)
-
-class ImageFlip:
-    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": { "image": (IO.IMAGE,),
-                              "flip_method": (["x-axis: vertically", "y-axis: horizontally"],),
-                              }}
-    RETURN_TYPES = (IO.IMAGE,)
-    FUNCTION = "flip"
-
-    CATEGORY = "image/transform"
-
-    def flip(self, image, flip_method):
-        if flip_method.startswith("x"):
-            image = torch.flip(image, dims=[1])
-        elif flip_method.startswith("y"):
-            image = torch.flip(image, dims=[2])
-
-        return (image,)
-
-
 NODE_CLASS_MAPPINGS = {
    "ImageCrop": ImageCrop,
    "RepeatImageBatch": RepeatImageBatch,
@@ -635,8 +532,5 @@ NODE_CLASS_MAPPINGS = {
    "SaveAnimatedPNG": SaveAnimatedPNG,
    "SaveSVGNode": SaveSVGNode,
    "ImageStitch": ImageStitch,
-    "ResizeAndPadImage": ResizeAndPadImage,
    "GetImageSize": GetImageSize,
-    "ImageRotate": ImageRotate,
-    "ImageFlip": ImageFlip,
 }
--- a/comfy_extras/nodes_load_3d.py
+++ b/comfy_extras/nodes_load_3d.py
@@ -5,8 +5,6 @@ import os
 from comfy.comfy_types import IO
 from comfy_api.input_impl import VideoFromFile

-from pathlib import Path
-

 def normalize_path(path):
    return path.replace('\\', '/')
@@ -18,14 +16,7 @@ class Load3D():

        os.makedirs(input_dir, exist_ok=True)

-        input_path = Path(input_dir)
-        base_path = Path(folder_paths.get_input_directory())
-
-        files = [
-            normalize_path(str(file_path.relative_to(base_path)))
-            for file_path in input_path.rglob("*")
-            if file_path.suffix.lower() in {'.gltf', '.glb', '.obj', '.fbx', '.stl'}
-        ]
+        files = [normalize_path(os.path.join("3d", f)) for f in os.listdir(input_dir) if f.endswith(('.gltf', '.glb', '.obj', '.fbx', '.stl'))]

        return {"required": {
            "model_file": (sorted(files), {"file_upload": True}),
@@ -70,14 +61,7 @@ class Load3DAnimation():

        os.makedirs(input_dir, exist_ok=True)

-        input_path = Path(input_dir)
-        base_path = Path(folder_paths.get_input_directory())
-
-        files = [
-            normalize_path(str(file_path.relative_to(base_path)))
-            for file_path in input_path.rglob("*")
-            if file_path.suffix.lower() in {'.gltf', '.glb', '.fbx'}
-        ]
+        files = [normalize_path(os.path.join("3d", f)) for f in os.listdir(input_dir) if f.endswith(('.gltf', '.glb', '.fbx'))]

        return {"required": {
            "model_file": (sorted(files), {"file_upload": True}),
--- a/comfy_extras/nodes_lt.py
+++ b/comfy_extras/nodes_lt.py
@@ -134,8 +134,8 @@ class LTXVAddGuide:
        _, num_keyframes = get_keyframe_idxs(cond)
        latent_count = latent_length - num_keyframes
        frame_idx = frame_idx if frame_idx >= 0 else max((latent_count - 1) * time_scale_factor + 1 + frame_idx, 0)
-        if guide_length > 1 and frame_idx != 0:
-            frame_idx = (frame_idx - 1) // time_scale_factor * time_scale_factor + 1 # frame index - 1 must be divisible by 8 or frame_idx == 0
+        if guide_length > 1:
+            frame_idx = frame_idx // time_scale_factor * time_scale_factor # frame index must be divisible by 8

        latent_idx = (frame_idx + time_scale_factor - 1) // time_scale_factor

@@ -144,7 +144,7 @@ class LTXVAddGuide:
    def add_keyframe_index(self, cond, frame_idx, guiding_latent, scale_factors):
        keyframe_idxs, _ = get_keyframe_idxs(cond)
        _, latent_coords = self._patchifier.patchify(guiding_latent)
-        pixel_coords = latent_to_pixel_coords(latent_coords, scale_factors, causal_fix=frame_idx == 0)  # we need the causal fix only if we're placing the new latents at index 0
+        pixel_coords = latent_to_pixel_coords(latent_coords, scale_factors, True)
        pixel_coords[:, 0] += frame_idx
        if keyframe_idxs is None:
            keyframe_idxs = pixel_coords
--- a/comfy_extras/nodes_mask.py
+++ b/comfy_extras/nodes_mask.py
@@ -152,7 +152,7 @@ class ImageColorToMask:
    def image_to_mask(self, image, color):
        temp = (torch.clamp(image, 0, 1.0) * 255.0).round().to(torch.int)
        temp = torch.bitwise_left_shift(temp[:,:,:,0], 16) + torch.bitwise_left_shift(temp[:,:,:,1], 8) + temp[:,:,:,2]
-        mask = torch.where(temp == color, 1.0, 0).float()
+        mask = torch.where(temp == color, 255, 0).float()
        return (mask,)

 class SolidMask:
@@ -247,7 +247,7 @@ class MaskComposite:
        visible_width, visible_height = (right - left, bottom - top,)

        source_portion = source[:, :visible_height, :visible_width]
-        destination_portion = output[:, top:bottom, left:right]
+        destination_portion = destination[:, top:bottom, left:right]

        if operation == "multiply":
            output[:, top:bottom, left:right] = destination_portion * source_portion
--- a/comfy_extras/nodes_model_merging_model_specific.py
+++ b/comfy_extras/nodes_model_merging_model_specific.py
@@ -268,52 +268,6 @@ class ModelMergeWAN2_1(comfy_extras.nodes_model_merging.ModelMergeBlocks):

        return {"required": arg_dict}

-class ModelMergeCosmosPredict2_2B(comfy_extras.nodes_model_merging.ModelMergeBlocks):
-    CATEGORY = "advanced/model_merging/model_specific"
-
-    @classmethod
-    def INPUT_TYPES(s):
-        arg_dict = { "model1": ("MODEL",),
-                              "model2": ("MODEL",)}
-
-        argument = ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01})
-
-        arg_dict["pos_embedder."] = argument
-        arg_dict["x_embedder."] = argument
-        arg_dict["t_embedder."] = argument
-        arg_dict["t_embedding_norm."] = argument
-
-
-        for i in range(28):
-            arg_dict["blocks.{}.".format(i)] = argument
-
-        arg_dict["final_layer."] = argument
-
-        return {"required": arg_dict}
-
-class ModelMergeCosmosPredict2_14B(comfy_extras.nodes_model_merging.ModelMergeBlocks):
-    CATEGORY = "advanced/model_merging/model_specific"
-
-    @classmethod
-    def INPUT_TYPES(s):
-        arg_dict = { "model1": ("MODEL",),
-                              "model2": ("MODEL",)}
-
-        argument = ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01})
-
-        arg_dict["pos_embedder."] = argument
-        arg_dict["x_embedder."] = argument
-        arg_dict["t_embedder."] = argument
-        arg_dict["t_embedding_norm."] = argument
-
-
-        for i in range(36):
-            arg_dict["blocks.{}.".format(i)] = argument
-
-        arg_dict["final_layer."] = argument
-
-        return {"required": arg_dict}
-
 NODE_CLASS_MAPPINGS = {
    "ModelMergeSD1": ModelMergeSD1,
    "ModelMergeSD2": ModelMergeSD1, #SD1 and SD2 have the same blocks
@@ -327,6 +281,4 @@ NODE_CLASS_MAPPINGS = {
    "ModelMergeCosmos7B": ModelMergeCosmos7B,
    "ModelMergeCosmos14B": ModelMergeCosmos14B,
    "ModelMergeWAN2_1": ModelMergeWAN2_1,
-    "ModelMergeCosmosPredict2_2B": ModelMergeCosmosPredict2_2B,
-    "ModelMergeCosmosPredict2_14B": ModelMergeCosmosPredict2_14B,
 }
--- a/comfy_extras/nodes_perpneg.py
+++ b/comfy_extras/nodes_perpneg.py
@@ -4,7 +4,6 @@ import comfy.sampler_helpers
 import comfy.samplers
 import comfy.utils
 import node_helpers
-import math

 def perp_neg(x, noise_pred_pos, noise_pred_neg, noise_pred_nocond, neg_scale, cond_scale):
    pos = noise_pred_pos - noise_pred_nocond
@@ -70,23 +69,8 @@ class Guider_PerpNeg(comfy.samplers.CFGGuider):
        negative_cond = self.conds.get("negative", None)
        empty_cond = self.conds.get("empty_negative_prompt", None)

-        if model_options.get("disable_cfg1_optimization", False) == False:
-            if math.isclose(self.neg_scale, 0.0):
-                negative_cond = None
-                if math.isclose(self.cfg, 1.0):
-                    empty_cond = None
-
-        conds = [positive_cond, negative_cond, empty_cond]
-
-        out = comfy.samplers.calc_cond_batch(self.inner_model, conds, x, timestep, model_options)
-
-        # Apply pre_cfg_functions since sampling_function() is skipped
-        for fn in model_options.get("sampler_pre_cfg_function", []):
-            args = {"conds":conds, "conds_out": out, "cond_scale": self.cfg, "timestep": timestep,
-                    "input": x, "sigma": timestep, "model": self.inner_model, "model_options": model_options}
-            out = fn(args)
-
-        noise_pred_pos, noise_pred_neg, noise_pred_empty = out
+        (noise_pred_pos, noise_pred_neg, noise_pred_empty) = \
+            comfy.samplers.calc_cond_batch(self.inner_model, [positive_cond, negative_cond, empty_cond], x, timestep, model_options)
        cfg_result = perp_neg(x, noise_pred_pos, noise_pred_neg, noise_pred_empty, self.neg_scale, self.cfg)

        # normally this would be done in cfg_function, but we skipped
@@ -98,7 +82,6 @@ class Guider_PerpNeg(comfy.samplers.CFGGuider):
                "denoised": cfg_result,
                "cond": positive_cond,
                "uncond": negative_cond,
-                "cond_scale": self.cfg,
                "model": self.inner_model,
                "uncond_denoised": noise_pred_neg,
                "cond_denoised": noise_pred_pos,
--- a/comfy_extras/nodes_pixart.py
+++ b/comfy_extras/nodes_pixart.py
@@ -1,24 +1,24 @@
-from nodes import MAX_RESOLUTION
-
-class CLIPTextEncodePixArtAlpha:
-    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {
-            "width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
-            "height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
-            # "aspect_ratio": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
-            "text": ("STRING", {"multiline": True, "dynamicPrompts": True}), "clip": ("CLIP", ),
-            }}
-
-    RETURN_TYPES = ("CONDITIONING",)
-    FUNCTION = "encode"
-    CATEGORY = "advanced/conditioning"
-    DESCRIPTION = "Encodes text and sets the resolution conditioning for PixArt Alpha. Does not apply to PixArt Sigma."
-
-    def encode(self, clip, width, height, text):
-        tokens = clip.tokenize(text)
-        return (clip.encode_from_tokens_scheduled(tokens, add_dict={"width": width, "height": height}),)
-
-NODE_CLASS_MAPPINGS = {
-    "CLIPTextEncodePixArtAlpha": CLIPTextEncodePixArtAlpha,
-}
+from nodes import MAX_RESOLUTION
+
+class CLIPTextEncodePixArtAlpha:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": {
+            "width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
+            "height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
+            # "aspect_ratio": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
+            "text": ("STRING", {"multiline": True, "dynamicPrompts": True}), "clip": ("CLIP", ),
+            }}
+
+    RETURN_TYPES = ("CONDITIONING",)
+    FUNCTION = "encode"
+    CATEGORY = "advanced/conditioning"
+    DESCRIPTION = "Encodes text and sets the resolution conditioning for PixArt Alpha. Does not apply to PixArt Sigma."
+
+    def encode(self, clip, width, height, text):
+        tokens = clip.tokenize(text)
+        return (clip.encode_from_tokens_scheduled(tokens, add_dict={"width": width, "height": height}),)
+
+NODE_CLASS_MAPPINGS = {
+    "CLIPTextEncodePixArtAlpha": CLIPTextEncodePixArtAlpha,
+}
--- a/comfy_extras/nodes_slg.py
+++ b/comfy_extras/nodes_slg.py
@@ -78,75 +78,7 @@ class SkipLayerGuidanceDiT:

        return (m, )

-class SkipLayerGuidanceDiTSimple:
-    '''
-    Simple version of the SkipLayerGuidanceDiT node that only modifies the uncond pass.
-    '''
-    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"model": ("MODEL", ),
-                             "double_layers": ("STRING", {"default": "7, 8, 9", "multiline": False}),
-                             "single_layers": ("STRING", {"default": "7, 8, 9", "multiline": False}),
-                             "start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001}),
-                             "end_percent": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.001}),
-                                }}
-    RETURN_TYPES = ("MODEL",)
-    FUNCTION = "skip_guidance"
-    EXPERIMENTAL = True
-
-    DESCRIPTION = "Simple version of the SkipLayerGuidanceDiT node that only modifies the uncond pass."
-
-    CATEGORY = "advanced/guidance"
-
-    def skip_guidance(self, model, start_percent, end_percent, double_layers="", single_layers=""):
-        def skip(args, extra_args):
-            return args
-
-        model_sampling = model.get_model_object("model_sampling")
-        sigma_start = model_sampling.percent_to_sigma(start_percent)
-        sigma_end = model_sampling.percent_to_sigma(end_percent)
-
-        double_layers = re.findall(r'\d+', double_layers)
-        double_layers = [int(i) for i in double_layers]
-
-        single_layers = re.findall(r'\d+', single_layers)
-        single_layers = [int(i) for i in single_layers]
-
-        if len(double_layers) == 0 and len(single_layers) == 0:
-            return (model, )
-
-        def calc_cond_batch_function(args):
-            x = args["input"]
-            model = args["model"]
-            conds = args["conds"]
-            sigma = args["sigma"]
-
-            model_options = args["model_options"]
-            slg_model_options = model_options.copy()
-
-            for layer in double_layers:
-                slg_model_options = comfy.model_patcher.set_model_options_patch_replace(slg_model_options, skip, "dit", "double_block", layer)
-
-            for layer in single_layers:
-                slg_model_options = comfy.model_patcher.set_model_options_patch_replace(slg_model_options, skip, "dit", "single_block", layer)
-
-            cond, uncond = conds
-            sigma_ = sigma[0].item()
-            if sigma_ >= sigma_end and sigma_ <= sigma_start and uncond is not None:
-                cond_out, _ = comfy.samplers.calc_cond_batch(model, [cond, None], x, sigma, model_options)
-                _, uncond_out = comfy.samplers.calc_cond_batch(model, [None, uncond], x, sigma, slg_model_options)
-                out = [cond_out, uncond_out]
-            else:
-                out = comfy.samplers.calc_cond_batch(model, conds, x, sigma, model_options)
-
-            return out
-
-        m = model.clone()
-        m.set_model_sampler_calc_cond_batch_function(calc_cond_batch_function)
-
-        return (m, )

 NODE_CLASS_MAPPINGS = {
    "SkipLayerGuidanceDiT": SkipLayerGuidanceDiT,
-    "SkipLayerGuidanceDiTSimple": SkipLayerGuidanceDiTSimple,
 }
--- a/comfy_extras/nodes_tcfg.py
+++ b/comfy_extras/nodes_tcfg.py
@@ -1,71 +0,0 @@
-# TCFG: Tangential Damping Classifier-free Guidance - (arXiv: https://arxiv.org/abs/2503.18137)
-
-import torch
-
-from comfy.comfy_types import IO, ComfyNodeABC, InputTypeDict
-
-
-def score_tangential_damping(cond_score: torch.Tensor, uncond_score: torch.Tensor) -> torch.Tensor:
-    """Drop tangential components from uncond score to align with cond score."""
-    # (B, 1, ...)
-    batch_num = cond_score.shape[0]
-    cond_score_flat = cond_score.reshape(batch_num, 1, -1).float()
-    uncond_score_flat = uncond_score.reshape(batch_num, 1, -1).float()
-
-    # Score matrix A (B, 2, ...)
-    score_matrix = torch.cat((uncond_score_flat, cond_score_flat), dim=1)
-    try:
-        _, _, Vh = torch.linalg.svd(score_matrix, full_matrices=False)
-    except RuntimeError:
-        # Fallback to CPU
-        _, _, Vh = torch.linalg.svd(score_matrix.cpu(), full_matrices=False)
-
-    # Drop the tangential components
-    v1 = Vh[:, 0:1, :].to(uncond_score_flat.device)  # (B, 1, ...)
-    uncond_score_td = (uncond_score_flat @ v1.transpose(-2, -1)) * v1
-    return uncond_score_td.reshape_as(uncond_score).to(uncond_score.dtype)
-
-
-class TCFG(ComfyNodeABC):
-    @classmethod
-    def INPUT_TYPES(cls) -> InputTypeDict:
-        return {
-            "required": {
-                "model": (IO.MODEL, {}),
-            }
-        }
-
-    RETURN_TYPES = (IO.MODEL,)
-    RETURN_NAMES = ("patched_model",)
-    FUNCTION = "patch"
-
-    CATEGORY = "advanced/guidance"
-    DESCRIPTION = "TCFG – Tangential Damping CFG (2503.18137)\n\nRefine the uncond (negative) to align with the cond (positive) for improving quality."
-
-    def patch(self, model):
-        m = model.clone()
-
-        def tangential_damping_cfg(args):
-            #  Assume [cond, uncond, ...]
-            x = args["input"]
-            conds_out = args["conds_out"]
-            if len(conds_out) <= 1 or None in args["conds"][:2]:
-                # Skip when either cond or uncond is None
-                return conds_out
-            cond_pred = conds_out[0]
-            uncond_pred = conds_out[1]
-            uncond_td = score_tangential_damping(x - cond_pred, x - uncond_pred)
-            uncond_pred_td = x - uncond_td
-            return [cond_pred, uncond_pred_td] + conds_out[2:]
-
-        m.set_model_sampler_pre_cfg_function(tangential_damping_cfg)
-        return (m,)
-
-
-NODE_CLASS_MAPPINGS = {
-    "TCFG": TCFG,
-}
-
-NODE_DISPLAY_NAME_MAPPINGS = {
-    "TCFG": "Tangential Damping CFG",
-}
--- a/comfy_extras/nodes_train.py
+++ b/comfy_extras/nodes_train.py
@@ -20,82 +20,41 @@ import folder_paths
 import node_helpers
 from comfy.cli_args import args
 from comfy.comfy_types.node_typing import IO
-from comfy.weight_adapter import adapters, adapter_maps
-
-
-def make_batch_extra_option_dict(d, indicies, full_size=None):
-    new_dict = {}
-    for k, v in d.items():
-        newv = v
-        if isinstance(v, dict):
-            newv = make_batch_extra_option_dict(v, indicies, full_size=full_size)
-        elif isinstance(v, torch.Tensor):
-            if full_size is None or v.size(0) == full_size:
-                newv = v[indicies]
-        elif isinstance(v, (list, tuple)) and len(v) == full_size:
-            newv = [v[i] for i in indicies]
-        new_dict[k] = newv
-    return new_dict
+from comfy.weight_adapter import adapters


 class TrainSampler(comfy.samplers.Sampler):
-    def __init__(self, loss_fn, optimizer, loss_callback=None, batch_size=1, grad_acc=1, total_steps=1, seed=0, training_dtype=torch.bfloat16):
+
+    def __init__(self, loss_fn, optimizer, loss_callback=None):
        self.loss_fn = loss_fn
        self.optimizer = optimizer
        self.loss_callback = loss_callback
-        self.batch_size = batch_size
-        self.total_steps = total_steps
-        self.grad_acc = grad_acc
-        self.seed = seed
-        self.training_dtype = training_dtype

    def sample(self, model_wrap, sigmas, extra_args, callback, noise, latent_image=None, denoise_mask=None, disable_pbar=False):
-        cond = model_wrap.conds["positive"]
-        dataset_size = sigmas.size(0)
-        torch.cuda.empty_cache()
-        for i in (pbar:=tqdm.trange(self.total_steps, desc="Training LoRA", smoothing=0.01, disable=not comfy.utils.PROGRESS_BAR_ENABLED)):
-            noisegen = comfy_extras.nodes_custom_sampler.Noise_RandomNoise(self.seed + i * 1000)
-            indicies = torch.randperm(dataset_size)[:self.batch_size].tolist()
+        self.optimizer.zero_grad()
+        noise = model_wrap.inner_model.model_sampling.noise_scaling(sigmas, noise, latent_image, False)
+        latent = model_wrap.inner_model.model_sampling.noise_scaling(
+            torch.zeros_like(sigmas),
+            torch.zeros_like(noise, requires_grad=True),
+            latent_image,
+            False
+        )

-            batch_latent = torch.stack([latent_image[i] for i in indicies])
-            batch_noise = noisegen.generate_noise({"samples": batch_latent}).to(batch_latent.device)
-            batch_sigmas = [
-                model_wrap.inner_model.model_sampling.percent_to_sigma(
-                    torch.rand((1,)).item()
-                ) for _ in range(min(self.batch_size, dataset_size))
-            ]
-            batch_sigmas = torch.tensor(batch_sigmas).to(batch_latent.device)
+        # Ensure model is in training mode and computing gradients
+        # x0 pred
+        denoised = model_wrap(noise, sigmas, **extra_args)
+        try:
+            loss = self.loss_fn(denoised, latent.clone())
+        except RuntimeError as e:
+            if "does not require grad and does not have a grad_fn" in str(e):
+                logging.info("WARNING: This is likely due to the model is loaded in inference mode.")
+        loss.backward()
+        if self.loss_callback:
+            self.loss_callback(loss.item())

-            xt = model_wrap.inner_model.model_sampling.noise_scaling(
-                batch_sigmas,
-                batch_noise,
-                batch_latent,
-                False
-            )
-            x0 = model_wrap.inner_model.model_sampling.noise_scaling(
-                torch.zeros_like(batch_sigmas),
-                torch.zeros_like(batch_noise),
-                batch_latent,
-                False
-            )
-
-            model_wrap.conds["positive"] = [
-                cond[i] for i in indicies
-            ]
-            batch_extra_args = make_batch_extra_option_dict(extra_args, indicies, full_size=dataset_size)
-
-            with torch.autocast(xt.device.type, dtype=self.training_dtype):
-                x0_pred = model_wrap(xt, batch_sigmas, **batch_extra_args)
-                loss = self.loss_fn(x0_pred, x0)
-            loss.backward()
-            if self.loss_callback:
-                self.loss_callback(loss.item())
-            pbar.set_postfix({"loss": f"{loss.item():.4f}"})
-
-            if (i+1) % self.grad_acc == 0:
-                self.optimizer.step()
-                self.optimizer.zero_grad()
-        torch.cuda.empty_cache()
+        self.optimizer.step()
+        # torch.cuda.memory._dump_snapshot("trainn.pickle")
+        # torch.cuda.memory._record_memory_history(enabled=None)
        return torch.zeros_like(latent_image)


@@ -116,7 +75,7 @@ class BiasDiff(torch.nn.Module):
        return self.passive_memory_usage()


-def load_and_process_images(image_files, input_dir, resize_method="None", w=None, h=None):
+def load_and_process_images(image_files, input_dir, resize_method="None"):
    """Utility function to load and process a list of images.

    Args:
@@ -131,6 +90,7 @@ def load_and_process_images(image_files, input_dir, resize_method="None", w=None
        raise ValueError("No valid images found in input")

    output_images = []
+    w, h = None, None

    for file in image_files:
        image_path = os.path.join(input_dir, file)
@@ -246,103 +206,6 @@ class LoadImageSetFromFolderNode:
        return (output_tensor,)


-class LoadImageTextSetFromFolderNode:
-    @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "folder": (folder_paths.get_input_subfolders(), {"tooltip": "The folder to load images from."}),
-                "clip": (IO.CLIP, {"tooltip": "The CLIP model used for encoding the text."}),
-            },
-            "optional": {
-                "resize_method": (
-                    ["None", "Stretch", "Crop", "Pad"],
-                    {"default": "None"},
-                ),
-                "width": (
-                    IO.INT,
-                    {
-                        "default": -1,
-                        "min": -1,
-                        "max": 10000,
-                        "step": 1,
-                        "tooltip": "The width to resize the images to. -1 means use the original width.",
-                    },
-                ),
-                "height": (
-                    IO.INT,
-                    {
-                        "default": -1,
-                        "min": -1,
-                        "max": 10000,
-                        "step": 1,
-                        "tooltip": "The height to resize the images to. -1 means use the original height.",
-                    },
-                )
-            },
-        }
-
-    RETURN_TYPES = ("IMAGE", IO.CONDITIONING,)
-    FUNCTION = "load_images"
-    CATEGORY = "loaders"
-    EXPERIMENTAL = True
-    DESCRIPTION = "Loads a batch of images and caption from a directory for training."
-
-    def load_images(self, folder, clip, resize_method, width=None, height=None):
-        if clip is None:
-            raise RuntimeError("ERROR: clip input is invalid: None\n\nIf the clip is from a checkpoint loader node your checkpoint does not contain a valid clip or text encoder model.")
-
-        logging.info(f"Loading images from folder: {folder}")
-
-        sub_input_dir = os.path.join(folder_paths.get_input_directory(), folder)
-        valid_extensions = [".png", ".jpg", ".jpeg", ".webp"]
-
-        image_files = []
-        for item in os.listdir(sub_input_dir):
-            path = os.path.join(sub_input_dir, item)
-            if any(item.lower().endswith(ext) for ext in valid_extensions):
-                image_files.append(path)
-            elif os.path.isdir(path):
-                # Support kohya-ss/sd-scripts folder structure
-                repeat = 1
-                if item.split("_")[0].isdigit():
-                    repeat = int(item.split("_")[0])
-                image_files.extend([
-                    os.path.join(path, f) for f in os.listdir(path) if any(f.lower().endswith(ext) for ext in valid_extensions)
-                ] * repeat)
-
-        caption_file_path = [
-            f.replace(os.path.splitext(f)[1], ".txt")
-            for f in image_files
-        ]
-        captions = []
-        for caption_file in caption_file_path:
-            caption_path = os.path.join(sub_input_dir, caption_file)
-            if os.path.exists(caption_path):
-                with open(caption_path, "r", encoding="utf-8") as f:
-                    caption = f.read().strip()
-                    captions.append(caption)
-            else:
-                captions.append("")
-
-        width = width if width != -1 else None
-        height = height if height != -1 else None
-        output_tensor = load_and_process_images(image_files, sub_input_dir, resize_method, width, height)
-
-        logging.info(f"Loaded {len(output_tensor)} images from {sub_input_dir}.")
-
-        logging.info(f"Encoding captions from {sub_input_dir}.")
-        conditions = []
-        empty_cond = clip.encode_from_tokens_scheduled(clip.tokenize(""))
-        for text in captions:
-            if text == "":
-                conditions.append(empty_cond)
-            tokens = clip.tokenize(text)
-            conditions.extend(clip.encode_from_tokens_scheduled(tokens))
-        logging.info(f"Encoded {len(conditions)} captions from {sub_input_dir}.")
-        return (output_tensor, conditions)
-
-
 def draw_loss_graph(loss_map, steps):
    width, height = 500, 300
    img = Image.new("RGB", (width, height), "white")
@@ -420,16 +283,6 @@ class TrainLoraNode:
                        "tooltip": "The batch size to use for training.",
                    },
                ),
-                "grad_accumulation_steps": (
-                    IO.INT,
-                    {
-                        "default": 1,
-                        "min": 1,
-                        "max": 1024,
-                        "step": 1,
-                        "tooltip": "The number of gradient accumulation steps to use for training.",
-                    }
-                ),
                "steps": (
                    IO.INT,
                    {
@@ -489,17 +342,6 @@ class TrainLoraNode:
                    ["bf16", "fp32"],
                    {"default": "bf16", "tooltip": "The dtype to use for lora."},
                ),
-                "algorithm": (
-                    list(adapter_maps.keys()),
-                    {"default": list(adapter_maps.keys())[0], "tooltip": "The algorithm to use for training."},
-                ),
-                "gradient_checkpointing": (
-                    IO.BOOLEAN,
-                    {
-                        "default": True,
-                        "tooltip": "Use gradient checkpointing for training.",
-                    }
-                ),
                "existing_lora": (
                    folder_paths.get_filename_list("loras") + ["[None]"],
                    {
@@ -523,7 +365,6 @@ class TrainLoraNode:
        positive,
        batch_size,
        steps,
-        grad_accumulation_steps,
        learning_rate,
        rank,
        optimizer,
@@ -531,8 +372,6 @@ class TrainLoraNode:
        seed,
        training_dtype,
        lora_dtype,
-        algorithm,
-        gradient_checkpointing,
        existing_lora,
    ):
        mp = model.clone()
@@ -542,13 +381,6 @@ class TrainLoraNode:

        latents = latents["samples"].to(dtype)
        num_images = latents.shape[0]
-        logging.info(f"Total Images: {num_images}, Total Captions: {len(positive)}")
-        if len(positive) == 1 and num_images > 1:
-            positive = positive * num_images
-        elif len(positive) != num_images:
-            raise ValueError(
-                f"Number of positive conditions ({len(positive)}) does not match number of images ({num_images})."
-            )

        with torch.inference_mode(False):
            lora_sd = {}
@@ -583,8 +415,10 @@ class TrainLoraNode:
                                if existing_adapter is not None:
                                    break
                            else:
+                                # If no existing adapter found, use LoRA
+                                # We will add algo option in the future
                                existing_adapter = None
-                                adapter_cls = adapter_maps[algorithm]
+                                adapter_cls = adapters[0]

                            if existing_adapter is not None:
                                train_adapter = existing_adapter.to_train().to(lora_dtype)
@@ -638,45 +472,45 @@ class TrainLoraNode:
                criterion = torch.nn.SmoothL1Loss()

            # setup models
-            if gradient_checkpointing:
-                for m in find_all_highest_child_module_with_forward(mp.model.diffusion_model):
-                    patch(m)
-            mp.model.requires_grad_(False)
+            for m in find_all_highest_child_module_with_forward(mp.model.diffusion_model):
+                patch(m)
            comfy.model_management.load_models_gpu([mp], memory_required=1e20, force_full_load=True)

            # Setup sampler and guider like in test script
            loss_map = {"loss": []}
            def loss_callback(loss):
                loss_map["loss"].append(loss)
+                pbar.set_postfix({"loss": f"{loss:.4f}"})
            train_sampler = TrainSampler(
-                criterion,
-                optimizer,
-                loss_callback=loss_callback,
-                batch_size=batch_size,
-                grad_acc=grad_accumulation_steps,
-                total_steps=steps*grad_accumulation_steps,
-                seed=seed,
-                training_dtype=dtype
+                criterion, optimizer, loss_callback=loss_callback
            )
            guider = comfy_extras.nodes_custom_sampler.Guider_Basic(mp)
            guider.set_conds(positive)  # Set conditioning from input
+            ss = comfy_extras.nodes_custom_sampler.SamplerCustomAdvanced()
+
+            # yoland: this currently resize to the first image in the dataset

            # Training loop
+            torch.cuda.empty_cache()
            try:
-                # Generate dummy sigmas and noise
-                sigmas = torch.tensor(range(num_images))
-                noise = comfy_extras.nodes_custom_sampler.Noise_RandomNoise(seed)
-                guider.sample(
-                    noise.generate_noise({"samples": latents}),
-                    latents,
-                    train_sampler,
-                    sigmas,
-                    seed=noise.seed
-                )
+                for step in (pbar:=tqdm.trange(steps, desc="Training LoRA", smoothing=0.01, disable=not comfy.utils.PROGRESS_BAR_ENABLED)):
+                    # Generate random sigma
+                    sigma = mp.model.model_sampling.percent_to_sigma(
+                        torch.rand((1,)).item()
+                    )
+                    sigma = torch.tensor([sigma])
+
+                    noise = comfy_extras.nodes_custom_sampler.Noise_RandomNoise(step * 1000 + seed)
+
+                    indices = torch.randperm(num_images)[:batch_size]
+                    ss.sample(
+                        noise, guider, train_sampler, sigma, {"samples": latents[indices].clone()}
+                    )
            finally:
                for m in mp.model.modules():
                    unpatch(m)
-            del train_sampler, optimizer
+            del ss, train_sampler, optimizer
+            torch.cuda.empty_cache()

            for adapter in all_weight_adapters:
                adapter.requires_grad_(False)
@@ -718,9 +552,6 @@ class LoraModelLoader:


 class SaveLoRA:
-    def __init__(self):
-        self.output_dir = folder_paths.get_output_directory()
-
    @classmethod
    def INPUT_TYPES(s):
        return {
@@ -734,7 +565,7 @@ class SaveLoRA:
                "prefix": (
                    "STRING",
                    {
-                        "default": "loras/ComfyUI_trained_lora",
+                        "default": "trained_lora",
                        "tooltip": "The prefix to use for the saved LoRA file.",
                    },
                ),
@@ -757,13 +588,12 @@ class SaveLoRA:
    OUTPUT_NODE = True

    def save(self, lora, prefix, steps=None):
-        full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(prefix, self.output_dir)
+        date = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        if steps is None:
-            output_checkpoint = f"{filename}_{counter:05}_.safetensors"
+            output_file = f"models/loras/{prefix}_{date}_lora.safetensors"
        else:
-            output_checkpoint = f"{filename}_{steps}_steps_{counter:05}_.safetensors"
-        output_checkpoint = os.path.join(full_output_folder, output_checkpoint)
-        safetensors.torch.save_file(lora, output_checkpoint)
+            output_file = f"models/loras/{prefix}_{steps}_steps_{date}_lora.safetensors"
+        safetensors.torch.save_file(lora, output_file)
        return {}


@@ -863,7 +693,6 @@ NODE_CLASS_MAPPINGS = {
    "SaveLoRANode": SaveLoRA,
    "LoraModelLoader": LoraModelLoader,
    "LoadImageSetFromFolderNode": LoadImageSetFromFolderNode,
-    "LoadImageTextSetFromFolderNode": LoadImageTextSetFromFolderNode,
    "LossGraphNode": LossGraphNode,
 }

@@ -872,6 +701,5 @@ NODE_DISPLAY_NAME_MAPPINGS = {
    "SaveLoRANode": "Save LoRA Weights",
    "LoraModelLoader": "Load LoRA Model",
    "LoadImageSetFromFolderNode": "Load Image Dataset from Folder",
-    "LoadImageTextSetFromFolderNode": "Load Image and Text Dataset from Folder",
    "LossGraphNode": "Plot Loss Graph",
 }
--- a/comfy_extras/nodes_wan.py
+++ b/comfy_extras/nodes_wan.py
@@ -1,4 +1,3 @@
-import math
 import nodes
 import node_helpers
 import torch
@@ -6,9 +5,7 @@ import comfy.model_management
 import comfy.utils
 import comfy.latent_formats
 import comfy.clip_vision
-import json
-import numpy as np
-from typing import Tuple
+

 class WanImageToVideo:
    @classmethod
@@ -386,350 +383,7 @@ class WanPhantomSubjectToVideo:
        out_latent["samples"] = latent
        return (positive, cond2, negative, out_latent)

-def parse_json_tracks(tracks):
-    """Parse JSON track data into a standardized format"""
-    tracks_data = []
-    try:
-        # If tracks is a string, try to parse it as JSON
-        if isinstance(tracks, str):
-            parsed = json.loads(tracks.replace("'", '"'))
-            tracks_data.extend(parsed)
-        else:
-            # If tracks is a list of strings, parse each one
-            for track_str in tracks:
-                parsed = json.loads(track_str.replace("'", '"'))
-                tracks_data.append(parsed)
-
-        # Check if we have a single track (dict with x,y) or a list of tracks
-        if tracks_data and isinstance(tracks_data[0], dict) and 'x' in tracks_data[0]:
-            # Single track detected, wrap it in a list
-            tracks_data = [tracks_data]
-        elif tracks_data and isinstance(tracks_data[0], list) and tracks_data[0] and isinstance(tracks_data[0][0], dict) and 'x' in tracks_data[0][0]:
-            # Already a list of tracks, nothing to do
-            pass
-        else:
-            # Unexpected format
-            pass
-
-    except json.JSONDecodeError:
-        tracks_data = []
-    return tracks_data
-
-def process_tracks(tracks_np: np.ndarray, frame_size: Tuple[int, int], num_frames, quant_multi: int = 8, **kwargs):
-    # tracks: shape [t, h, w, 3] => samples align with 24 fps, model trained with 16 fps.
-    # frame_size: tuple (W, H)
-    tracks = torch.from_numpy(tracks_np).float()
-
-    if tracks.shape[1] == 121:
-        tracks = torch.permute(tracks, (1, 0, 2, 3))
-
-    tracks, visibles = tracks[..., :2], tracks[..., 2:3]
-
-    short_edge = min(*frame_size)
-
-    frame_center = torch.tensor([*frame_size]).type_as(tracks) / 2
-    tracks = tracks - frame_center
-
-    tracks = tracks / short_edge * 2
-
-    visibles = visibles * 2 - 1
-
-    trange = torch.linspace(-1, 1, tracks.shape[0]).view(-1, 1, 1, 1).expand(*visibles.shape)
-
-    out_ = torch.cat([trange, tracks, visibles], dim=-1).view(121, -1, 4)
-
-    out_0 = out_[:1]
-
-    out_l = out_[1:] # 121 => 120 | 1
-    a = 120 // math.gcd(120, num_frames)
-    b = num_frames // math.gcd(120, num_frames)
-    out_l = torch.repeat_interleave(out_l, b, dim=0)[1::a]  # 120 => 120 * b => 120 * b / a == F
-
-    final_result = torch.cat([out_0, out_l], dim=0)
-
-    return final_result
-
-FIXED_LENGTH = 121
-def pad_pts(tr):
-    """Convert list of {x,y} to (FIXED_LENGTH,1,3) array, padding/truncating."""
-    pts = np.array([[p['x'], p['y'], 1] for p in tr], dtype=np.float32)
-    n = pts.shape[0]
-    if n < FIXED_LENGTH:
-        pad = np.zeros((FIXED_LENGTH - n, 3), dtype=np.float32)
-        pts = np.vstack((pts, pad))
-    else:
-        pts = pts[:FIXED_LENGTH]
-    return pts.reshape(FIXED_LENGTH, 1, 3)
-
-def ind_sel(target: torch.Tensor, ind: torch.Tensor, dim: int = 1):
-    """Index selection utility function"""
-    assert (
-        len(ind.shape) > dim
-    ), "Index must have the target dim, but get dim: %d, ind shape: %s" % (dim, str(ind.shape))
-
-    target = target.expand(
-        *tuple(
-            [ind.shape[k] if target.shape[k] == 1 else -1 for k in range(dim)]
-            + [
-                -1,
-            ]
-            * (len(target.shape) - dim)
-        )
-    )
-
-    ind_pad = ind
-
-    if len(target.shape) > dim + 1:
-        for _ in range(len(target.shape) - (dim + 1)):
-            ind_pad = ind_pad.unsqueeze(-1)
-        ind_pad = ind_pad.expand(*(-1,) * (dim + 1), *target.shape[(dim + 1) : :])
-
-    return torch.gather(target, dim=dim, index=ind_pad)
-
-def merge_final(vert_attr: torch.Tensor, weight: torch.Tensor, vert_assign: torch.Tensor):
-    """Merge vertex attributes with weights"""
-    target_dim = len(vert_assign.shape) - 1
-    if len(vert_attr.shape) == 2:
-        assert vert_attr.shape[0] > vert_assign.max()
-        new_shape = [1] * target_dim + list(vert_attr.shape)
-        tensor = vert_attr.reshape(new_shape)
-        sel_attr = ind_sel(tensor, vert_assign.type(torch.long), dim=target_dim)
-    else:
-        assert vert_attr.shape[1] > vert_assign.max()
-        new_shape = [vert_attr.shape[0]] + [1] * (target_dim - 1) + list(vert_attr.shape[1:])
-        tensor = vert_attr.reshape(new_shape)
-        sel_attr = ind_sel(tensor, vert_assign.type(torch.long), dim=target_dim)
-
-    final_attr = torch.sum(sel_attr * weight.unsqueeze(-1), dim=-2)
-    return final_attr
-
-
-def _patch_motion_single(
-    tracks: torch.FloatTensor,  # (B, T, N, 4)
-    vid: torch.FloatTensor,     # (C, T, H, W)
-    temperature: float,
-    vae_divide: tuple,
-    topk: int,
-):
-    """Apply motion patching based on tracks"""
-    _, T, H, W = vid.shape
-    N = tracks.shape[2]
-    _, tracks_xy, visible = torch.split(
-        tracks, [1, 2, 1], dim=-1
-    )  # (B, T, N, 2) | (B, T, N, 1)
-    tracks_n = tracks_xy / torch.tensor([W / min(H, W), H / min(H, W)], device=tracks_xy.device)
-    tracks_n = tracks_n.clamp(-1, 1)
-    visible = visible.clamp(0, 1)
-
-    xx = torch.linspace(-W / min(H, W), W / min(H, W), W)
-    yy = torch.linspace(-H / min(H, W), H / min(H, W), H)
-
-    grid = torch.stack(torch.meshgrid(yy, xx, indexing="ij")[::-1], dim=-1).to(
-        tracks_xy.device
-    )
-
-    tracks_pad = tracks_xy[:, 1:]
-    visible_pad = visible[:, 1:]
-
-    visible_align = visible_pad.view(T - 1, 4, *visible_pad.shape[2:]).sum(1)
-    tracks_align = (tracks_pad * visible_pad).view(T - 1, 4, *tracks_pad.shape[2:]).sum(
-        1
-    ) / (visible_align + 1e-5)
-    dist_ = (
-        (tracks_align[:, None, None] - grid[None, :, :, None]).pow(2).sum(-1)
-    )  # T, H, W, N
-    weight = torch.exp(-dist_ * temperature) * visible_align.clamp(0, 1).view(
-        T - 1, 1, 1, N
-    )
-    vert_weight, vert_index = torch.topk(
-        weight, k=min(topk, weight.shape[-1]), dim=-1
-    )
-
-    grid_mode = "bilinear"
-    point_feature = torch.nn.functional.grid_sample(
-        vid.permute(1, 0, 2, 3)[:1],
-        tracks_n[:, :1].type(vid.dtype),
-        mode=grid_mode,
-        padding_mode="zeros",
-        align_corners=False,
-    )
-    point_feature = point_feature.squeeze(0).squeeze(1).permute(1, 0) # N, C=16
-
-    out_feature = merge_final(point_feature, vert_weight, vert_index).permute(3, 0, 1, 2) # T - 1, H, W, C => C, T - 1, H, W
-    out_weight = vert_weight.sum(-1) # T - 1, H, W
-
-    # out feature -> already soft weighted
-    mix_feature = out_feature + vid[:, 1:] * (1 - out_weight.clamp(0, 1))
-
-    out_feature_full = torch.cat([vid[:, :1], mix_feature], dim=1) # C, T, H, W
-    out_mask_full = torch.cat([torch.ones_like(out_weight[:1]), out_weight], dim=0)  # T, H, W
-
-    return out_mask_full[None].expand(vae_divide[0], -1, -1, -1), out_feature_full
-
-
-def patch_motion(
-    tracks: torch.FloatTensor,  # (B, TB, T, N, 4)
-    vid: torch.FloatTensor,     # (C, T, H, W)
-    temperature: float = 220.0,
-    vae_divide: tuple = (4, 16),
-    topk: int = 2,
-):
-    B = len(tracks)
-
-    # Process each batch separately
-    out_masks = []
-    out_features = []
-
-    for b in range(B):
-        mask, feature = _patch_motion_single(
-            tracks[b],  # (T, N, 4)
-            vid[b],        # (C, T, H, W)
-            temperature,
-            vae_divide,
-            topk
-        )
-        out_masks.append(mask)
-        out_features.append(feature)
-
-    # Stack results: (B, C, T, H, W)
-    out_mask_full = torch.stack(out_masks, dim=0)
-    out_feature_full = torch.stack(out_features, dim=0)
-
-    return out_mask_full, out_feature_full
-
-class WanTrackToVideo:
-    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {
-                    "positive": ("CONDITIONING", ),
-                    "negative": ("CONDITIONING", ),
-                    "vae": ("VAE", ),
-                    "tracks": ("STRING", {"multiline": True, "default": "[]"}),
-                    "width": ("INT", {"default": 832, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}),
-                    "height": ("INT", {"default": 480, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}),
-                    "length": ("INT", {"default": 81, "min": 1, "max": nodes.MAX_RESOLUTION, "step": 4}),
-                    "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}),
-                    "temperature": ("FLOAT", {"default": 220.0, "min": 1.0, "max": 1000.0, "step": 0.1}),
-                    "topk": ("INT", {"default": 2, "min": 1, "max": 10}),
-                    "start_image": ("IMAGE", ),
-                },
-                "optional": {
-                    "clip_vision_output": ("CLIP_VISION_OUTPUT", ),
-                }}
-
-    RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
-    RETURN_NAMES = ("positive", "negative", "latent")
-    FUNCTION = "encode"
-
-    CATEGORY = "conditioning/video_models"
-
-    def encode(self, positive, negative, vae, tracks, width, height, length, batch_size,
-               temperature, topk, start_image=None, clip_vision_output=None):
-
-        tracks_data = parse_json_tracks(tracks)
-
-        if not tracks_data:
-            return WanImageToVideo().encode(positive, negative, vae, width, height, length, batch_size, start_image=start_image, clip_vision_output=clip_vision_output)
-
-        latent = torch.zeros([batch_size, 16, ((length - 1) // 4) + 1, height // 8, width // 8],
-                           device=comfy.model_management.intermediate_device())
-
-        if isinstance(tracks_data[0][0], dict):
-            tracks_data = [tracks_data]
-
-        processed_tracks = []
-        for batch in tracks_data:
-            arrs = []
-            for track in batch:
-                pts = pad_pts(track)
-                arrs.append(pts)
-
-            tracks_np = np.stack(arrs, axis=0)
-            processed_tracks.append(process_tracks(tracks_np, (width, height), length - 1).unsqueeze(0))
-
-        if start_image is not None:
-            start_image = comfy.utils.common_upscale(start_image[:batch_size].movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1)
-            videos = torch.ones((start_image.shape[0], length, height, width, start_image.shape[-1]), device=start_image.device, dtype=start_image.dtype) * 0.5
-            for i in range(start_image.shape[0]):
-                videos[i, 0] = start_image[i]
-
-            latent_videos = []
-            videos = comfy.utils.resize_to_batch_size(videos, batch_size)
-            for i in range(batch_size):
-                latent_videos += [vae.encode(videos[i, :, :, :, :3])]
-            y = torch.cat(latent_videos, dim=0)
-
-            # Scale latent since patch_motion is non-linear
-            y = comfy.latent_formats.Wan21().process_in(y)
-
-            processed_tracks = comfy.utils.resize_list_to_batch_size(processed_tracks, batch_size)
-            res = patch_motion(
-                processed_tracks, y, temperature=temperature, topk=topk, vae_divide=(4, 16)
-            )
-
-            mask, concat_latent_image = res
-            concat_latent_image = comfy.latent_formats.Wan21().process_out(concat_latent_image)
-            mask = -mask + 1.0  # Invert mask to match expected format
-            positive = node_helpers.conditioning_set_values(positive,
-                                                            {"concat_mask": mask,
-                                                            "concat_latent_image": concat_latent_image})
-            negative = node_helpers.conditioning_set_values(negative,
-                                                            {"concat_mask": mask,
-                                                            "concat_latent_image": concat_latent_image})
-
-        if clip_vision_output is not None:
-            positive = node_helpers.conditioning_set_values(positive, {"clip_vision_output": clip_vision_output})
-            negative = node_helpers.conditioning_set_values(negative, {"clip_vision_output": clip_vision_output})
-
-        out_latent = {}
-        out_latent["samples"] = latent
-        return (positive, negative, out_latent)
-
-
-class Wan22ImageToVideoLatent:
-    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"vae": ("VAE", ),
-                             "width": ("INT", {"default": 1280, "min": 32, "max": nodes.MAX_RESOLUTION, "step": 32}),
-                             "height": ("INT", {"default": 704, "min": 32, "max": nodes.MAX_RESOLUTION, "step": 32}),
-                             "length": ("INT", {"default": 49, "min": 1, "max": nodes.MAX_RESOLUTION, "step": 4}),
-                             "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}),
-                },
-                "optional": {"start_image": ("IMAGE", ),
-                }}
-
-
-    RETURN_TYPES = ("LATENT",)
-    FUNCTION = "encode"
-
-    CATEGORY = "conditioning/inpaint"
-
-    def encode(self, vae, width, height, length, batch_size, start_image=None):
-        latent = torch.zeros([1, 48, ((length - 1) // 4) + 1, height // 16, width // 16], device=comfy.model_management.intermediate_device())
-
-        if start_image is None:
-            out_latent = {}
-            out_latent["samples"] = latent
-            return (out_latent,)
-
-        mask = torch.ones([latent.shape[0], 1, ((length - 1) // 4) + 1, latent.shape[-2], latent.shape[-1]], device=comfy.model_management.intermediate_device())
-
-        if start_image is not None:
-            start_image = comfy.utils.common_upscale(start_image[:length].movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1)
-            latent_temp = vae.encode(start_image)
-            latent[:, :, :latent_temp.shape[-3]] = latent_temp
-            mask[:, :, :latent_temp.shape[-3]] *= 0.0
-
-        out_latent = {}
-        latent_format = comfy.latent_formats.Wan22()
-        latent = latent_format.process_out(latent) * mask + latent * (1.0 - mask)
-        out_latent["samples"] = latent.repeat((batch_size, ) + (1,) * (latent.ndim - 1))
-        out_latent["noise_mask"] = mask.repeat((batch_size, ) + (1,) * (mask.ndim - 1))
-        return (out_latent,)
-
-
 NODE_CLASS_MAPPINGS = {
-    "WanTrackToVideo": WanTrackToVideo,
    "WanImageToVideo": WanImageToVideo,
    "WanFunControlToVideo": WanFunControlToVideo,
    "WanFunInpaintToVideo": WanFunInpaintToVideo,
@@ -738,5 +392,4 @@ NODE_CLASS_MAPPINGS = {
    "TrimVideoLatent": TrimVideoLatent,
    "WanCameraImageToVideo": WanCameraImageToVideo,
    "WanPhantomSubjectToVideo": WanPhantomSubjectToVideo,
-    "Wan22ImageToVideoLatent": Wan22ImageToVideoLatent,
 }
--- a/comfyui_version.py
+++ b/comfyui_version.py
@@ -1,3 +1,3 @@
 # This file is automatically generated by the build process when version is
 # updated in pyproject.toml.
-__version__ = "0.3.47"
+__version__ = "0.3.40"
--- a/cuda_malloc.py
+++ b/cuda_malloc.py
@@ -74,8 +74,7 @@ if not args.cuda_malloc:
                module = importlib.util.module_from_spec(spec)
                spec.loader.exec_module(module)
                version = module.__version__
-
-        if int(version[0]) >= 2 and "+cu" in version: #enable by default for torch version 2.0 and up only on cuda torch
+        if int(version[0]) >= 2: #enable by default for torch version 2.0 and up
            args.cuda_malloc = cuda_malloc_supported()
    except:
        pass
--- a/execution.py
+++ b/execution.py
@@ -123,8 +123,6 @@ class CacheSet:
        }
        return result

-SENSITIVE_EXTRA_DATA_KEYS = ("auth_token_comfy_org", "api_key_comfy_org")
-
 def get_input_data(inputs, class_def, unique_id, outputs=None, dynprompt=None, extra_data={}):
    valid_inputs = class_def.INPUT_TYPES()
    input_data_all = {}
@@ -345,17 +343,7 @@ async def execute(server, dynprompt, caches, current_item, extra_data, executed,
    input_data_all = None
    try:
        if unique_id in pending_async_nodes:
-            results = []
-            for r in pending_async_nodes[unique_id]:
-                if isinstance(r, asyncio.Task):
-                    try:
-                        results.append(r.result())
-                    except Exception as ex:
-                        # An async task failed - propagate the exception up
-                        del pending_async_nodes[unique_id]
-                        raise ex
-                else:
-                    results.append(r)
+            results = [r.result() if isinstance(r, asyncio.Task) else r for r in pending_async_nodes[unique_id]]
            del pending_async_nodes[unique_id]
            output_data, output_ui, has_subgraph = get_output_from_returns(results, class_def)
        elif unique_id in pending_subgraph_results:
@@ -430,7 +418,7 @@ async def execute(server, dynprompt, caches, current_item, extra_data, executed,
                unblock = execution_list.add_external_block(unique_id)
                async def await_completion():
                    tasks = [x for x in output_data if isinstance(x, asyncio.Task)]
-                    await asyncio.gather(*tasks, return_exceptions=True)
+                    await asyncio.gather(*tasks)
                    unblock()
                asyncio.create_task(await_completion())
                return (ExecutionResult.PENDING, None, None)
@@ -505,20 +493,17 @@ async def execute(server, dynprompt, caches, current_item, extra_data, executed,

        logging.error(f"!!! Exception during processing !!! {ex}")
        logging.error(traceback.format_exc())
-        tips = ""
-
-        if isinstance(ex, comfy.model_management.OOM_EXCEPTION):
-            tips = "This error means you ran out of memory on your GPU.\n\nTIPS: If the workflow worked before you might have accidentally set the batch_size to a large number."
-            logging.error("Got an OOM, unloading all loaded models.")
-            comfy.model_management.unload_all_models()

        error_details = {
            "node_id": real_node_id,
-            "exception_message": "{}\n{}".format(ex, tips),
+            "exception_message": str(ex),
            "exception_type": exception_type,
            "traceback": traceback.format_tb(tb),
            "current_inputs": input_data_formatted
        }
+        if isinstance(ex, comfy.model_management.OOM_EXCEPTION):
+            logging.error("Got an OOM, unloading all loaded models.")
+            comfy.model_management.unload_all_models()

        return (ExecutionResult.FAILURE, error_details, ex)

@@ -1047,11 +1032,6 @@ class PromptQueue:
            if status is not None:
                status_dict = copy.deepcopy(status._asdict())

-            # Remove sensitive data from extra_data before storing in history
-            for sensitive_val in SENSITIVE_EXTRA_DATA_KEYS:
-                if sensitive_val in prompt[3]:
-                    prompt[3].pop(sensitive_val)
-
            self.history[prompt[1]] = {
                "prompt": prompt,
                "outputs": {},
@@ -1097,7 +1077,7 @@ class PromptQueue:
                    return True
        return False

-    def get_history(self, prompt_id=None, max_items=None, offset=-1, map_function=None):
+    def get_history(self, prompt_id=None, max_items=None, offset=-1):
        with self.mutex:
            if prompt_id is None:
                out = {}
@@ -1106,21 +1086,13 @@ class PromptQueue:
                    offset = len(self.history) - max_items
                for k in self.history:
                    if i >= offset:
-                        p = self.history[k]
-                        if map_function is not None:
-                            p = map_function(p)
-                        out[k] = p
+                        out[k] = self.history[k]
                        if max_items is not None and len(out) >= max_items:
                            break
                    i += 1
                return out
            elif prompt_id in self.history:
-                p = self.history[prompt_id]
-                if map_function is None:
-                    p = copy.deepcopy(p)
-                else:
-                    p = map_function(p)
-                return {prompt_id: p}
+                return {prompt_id: copy.deepcopy(self.history[prompt_id])}
            else:
                return {}

--- a/main.py
+++ b/main.py
@@ -13,7 +13,6 @@ import logging
 import sys
 from comfy_execution.progress import get_progress_state
 from comfy_execution.utils import get_executing_context
-from comfy_api import feature_flags

 if __name__ == "__main__":
    #NOTE: These do not do anything on core ComfyUI, they are for custom nodes.
@@ -58,9 +57,6 @@ def apply_custom_paths():


 def execute_prestartup_script():
-    if args.disable_all_custom_nodes and len(args.whitelist_custom_nodes) == 0:
-        return
-
    def execute_script(script_path):
        module_name = os.path.splitext(script_path)[0]
        try:
@@ -72,6 +68,9 @@ def execute_prestartup_script():
            logging.error(f"Failed to execute startup-script: {script_path} / {e}")
        return False

+    if args.disable_all_custom_nodes:
+        return
+
    node_paths = folder_paths.get_folder_paths("custom_nodes")
    for custom_node_path in node_paths:
        possible_modules = os.listdir(custom_node_path)
@@ -84,9 +83,6 @@ def execute_prestartup_script():

            script_path = os.path.join(module_path, "prestartup_script.py")
            if os.path.exists(script_path):
-                if args.disable_all_custom_nodes and possible_module not in args.whitelist_custom_nodes:
-                    logging.info(f"Prestartup Skipping {possible_module} due to disable_all_custom_nodes and whitelist_custom_nodes")
-                    continue
                time_before = time.perf_counter()
                success = execute_script(script_path)
                node_prestartup_times.append((time.perf_counter() - time_before, module_path, success))
@@ -115,15 +111,6 @@ if os.name == "nt":
    logging.getLogger("xformers").addFilter(lambda record: 'A matching Triton is not available' not in record.getMessage())

 if __name__ == "__main__":
-    if args.default_device is not None:
-        default_dev = args.default_device
-        devices = list(range(32))
-        devices.remove(default_dev)
-        devices.insert(0, default_dev)
-        devices = ','.join(map(str, devices))
-        os.environ['CUDA_VISIBLE_DEVICES'] = str(devices)
-        os.environ['HIP_VISIBLE_DEVICES'] = str(devices)
-
    if args.cuda_device is not None:
        os.environ['CUDA_VISIBLE_DEVICES'] = str(args.cuda_device)
        os.environ['HIP_VISIBLE_DEVICES'] = str(args.cuda_device)
@@ -139,9 +126,6 @@ if __name__ == "__main__":

    import cuda_malloc

-if 'torch' in sys.modules:
-    logging.warning("WARNING: Potential Error in code: Torch already imported, torch should never be imported before this point.")
-
 import comfy.utils

 import execution
@@ -203,13 +187,7 @@ def prompt_worker(q, server_instance):

            current_time = time.perf_counter()
            execution_time = current_time - execution_start_time
-
-            # Log Time in a more readable way after 10 minutes
-            if execution_time > 600:
-                execution_time = time.strftime("%H:%M:%S", time.gmtime(execution_time))
-                logging.info(f"Prompt executed in {execution_time}")
-            else:
-                logging.info("Prompt executed in {:.2f} seconds".format(execution_time))
+            logging.info("Prompt executed in {:.2f} seconds".format(execution_time))

        flags = q.get_flags()
        free_memory = flags.get("free_memory", False)
@@ -259,17 +237,9 @@ def hijack_progress(server_instance):

        server_instance.send_sync("progress", progress, server_instance.client_id)
        if preview_image is not None:
-            # Only send old method if client doesn't support preview metadata
-            if not feature_flags.supports_feature(
-                server_instance.sockets_metadata,
-                server_instance.client_id,
-                "supports_preview_metadata",
-            ):
-                server_instance.send_sync(
-                    BinaryEventTypes.UNENCODED_PREVIEW_IMAGE,
-                    preview_image,
-                    server_instance.client_id,
-                )
+            # Also send old method for backward compatibility
+            # TODO - Remove after this repo is updated to frontend with metadata support
+            server_instance.send_sync(BinaryEventTypes.UNENCODED_PREVIEW_IMAGE, preview_image, server_instance.client_id)

    comfy.utils.set_progress_bar_global_hook(hook)

@@ -313,10 +283,7 @@ def start_comfyui(asyncio_loop=None):
    prompt_server = server.PromptServer(asyncio_loop)

    hook_breaker_ac10a0.save_functions()
-    nodes.init_extra_nodes(
-        init_custom_nodes=(not args.disable_all_custom_nodes) or len(args.whitelist_custom_nodes) > 0,
-        init_api_nodes=not args.disable_api_nodes
-    )
+    nodes.init_extra_nodes(init_custom_nodes=not args.disable_all_custom_nodes, init_api_nodes=not args.disable_api_nodes)
    hook_breaker_ac10a0.restore_functions()

    cuda_malloc_warning()
--- a/nodes.py
+++ b/nodes.py
@@ -920,7 +920,7 @@ class CLIPLoader:
    @classmethod
    def INPUT_TYPES(s):
        return {"required": { "clip_name": (folder_paths.get_filename_list("text_encoders"), ),
-                              "type": (["stable_diffusion", "stable_cascade", "sd3", "stable_audio", "mochi", "ltxv", "pixart", "cosmos", "lumina2", "wan", "hidream", "chroma", "ace", "omnigen2"], ),
+                              "type": (["stable_diffusion", "stable_cascade", "sd3", "stable_audio", "mochi", "ltxv", "pixart", "cosmos", "lumina2", "wan", "hidream", "chroma", "ace"], ),
                              },
                "optional": {
                              "device": (["default", "cpu"], {"advanced": True}),
@@ -930,7 +930,7 @@ class CLIPLoader:

    CATEGORY = "advanced/loaders"

-    DESCRIPTION = "[Recipes]\n\nstable_diffusion: clip-l\nstable_cascade: clip-g\nsd3: t5 xxl/ clip-g / clip-l\nstable_audio: t5 base\nmochi: t5 xxl\ncosmos: old t5 xxl\nlumina2: gemma 2 2B\nwan: umt5 xxl\n hidream: llama-3.1 (Recommend) or t5\nomnigen2: qwen vl 2.5 3B"
+    DESCRIPTION = "[Recipes]\n\nstable_diffusion: clip-l\nstable_cascade: clip-g\nsd3: t5 xxl/ clip-g / clip-l\nstable_audio: t5 base\nmochi: t5 xxl\ncosmos: old t5 xxl\nlumina2: gemma 2 2B\nwan: umt5 xxl\n hidream: llama-3.1 (Recommend) or t5"

    def load_clip(self, clip_name, type="stable_diffusion", device="default"):
        clip_type = getattr(comfy.sd.CLIPType, type.upper(), comfy.sd.CLIPType.STABLE_DIFFUSION)
@@ -2187,9 +2187,6 @@ def init_external_custom_nodes():
            module_path = os.path.join(custom_node_path, possible_module)
            if os.path.isfile(module_path) and os.path.splitext(module_path)[1] != ".py": continue
            if module_path.endswith(".disabled"): continue
-            if args.disable_all_custom_nodes and possible_module not in args.whitelist_custom_nodes:
-                logging.info(f"Skipping {possible_module} due to disable_all_custom_nodes and whitelist_custom_nodes")
-                continue
            time_before = time.perf_counter()
            success = load_custom_node(module_path, base_node_names, module_parent="custom_nodes")
            node_import_times.append((time.perf_counter() - time_before, module_path, success))
@@ -2282,8 +2279,6 @@ def init_builtin_extra_nodes():
        "nodes_ace.py",
        "nodes_string.py",
        "nodes_camera_trajectory.py",
-        "nodes_edit_model.py",
-        "nodes_tcfg.py"
    ]

    import_failed = []
@@ -2310,7 +2305,6 @@ def init_builtin_api_nodes():
        "nodes_pika.py",
        "nodes_runway.py",
        "nodes_tripo.py",
-        "nodes_moonvalley.py",
        "nodes_rodin.py",
        "nodes_gemini.py",
    ]
--- a/notebooks/comfyui_colab.ipynb
+++ b/notebooks/comfyui_colab.ipynb
@@ -0,0 +1,322 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "aaaaaaaaaa"
+   },
+   "source": [
+    "Git clone the repo and install the requirements. (ignore the pip errors about protobuf)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "bbbbbbbbbb"
+   },
+   "outputs": [],
+   "source": [
+    "#@title Environment Setup\n",
+    "\n",
+    "\n",
+    "OPTIONS = {}\n",
+    "\n",
+    "USE_GOOGLE_DRIVE = False  #@param {type:\"boolean\"}\n",
+    "UPDATE_COMFY_UI = True  #@param {type:\"boolean\"}\n",
+    "WORKSPACE = 'ComfyUI'\n",
+    "OPTIONS['USE_GOOGLE_DRIVE'] = USE_GOOGLE_DRIVE\n",
+    "OPTIONS['UPDATE_COMFY_UI'] = UPDATE_COMFY_UI\n",
+    "\n",
+    "if OPTIONS['USE_GOOGLE_DRIVE']:\n",
+    "    !echo \"Mounting Google Drive...\"\n",
+    "    %cd /\n",
+    "    \n",
+    "    from google.colab import drive\n",
+    "    drive.mount('/content/drive')\n",
+    "\n",
+    "    WORKSPACE = \"/content/drive/MyDrive/ComfyUI\"\n",
+    "    %cd /content/drive/MyDrive\n",
+    "\n",
+    "![ ! -d $WORKSPACE ] && echo -= Initial setup ComfyUI =- && git clone https://github.com/comfyanonymous/ComfyUI\n",
+    "%cd $WORKSPACE\n",
+    "\n",
+    "if OPTIONS['UPDATE_COMFY_UI']:\n",
+    "  !echo -= Updating ComfyUI =-\n",
+    "  !git pull\n",
+    "\n",
+    "!echo -= Install dependencies =-\n",
+    "!pip install xformers!=0.0.18 -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu121 --extra-index-url https://download.pytorch.org/whl/cu118 --extra-index-url https://download.pytorch.org/whl/cu117"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "cccccccccc"
+   },
+   "source": [
+    "Download some models/checkpoints/vae or custom comfyui nodes (uncomment the commands for the ones you want)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "dddddddddd"
+   },
+   "outputs": [],
+   "source": [
+    "# Checkpoints\n",
+    "\n",
+    "### SDXL\n",
+    "### I recommend these workflow examples: https://comfyanonymous.github.io/ComfyUI_examples/sdxl/\n",
+    "\n",
+    "#!wget -c https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors -P ./models/checkpoints/\n",
+    "#!wget -c https://huggingface.co/stabilityai/stable-diffusion-xl-refiner-1.0/resolve/main/sd_xl_refiner_1.0.safetensors -P ./models/checkpoints/\n",
+    "\n",
+    "# SDXL ReVision\n",
+    "#!wget -c https://huggingface.co/comfyanonymous/clip_vision_g/resolve/main/clip_vision_g.safetensors -P ./models/clip_vision/\n",
+    "\n",
+    "# SD1.5\n",
+    "!wget -c https://huggingface.co/Comfy-Org/stable-diffusion-v1-5-archive/resolve/main/v1-5-pruned-emaonly-fp16.safetensors -P ./models/checkpoints/\n",
+    "\n",
+    "# SD2\n",
+    "#!wget -c https://huggingface.co/stabilityai/stable-diffusion-2-1-base/resolve/main/v2-1_512-ema-pruned.safetensors -P ./models/checkpoints/\n",
+    "#!wget -c https://huggingface.co/stabilityai/stable-diffusion-2-1/resolve/main/v2-1_768-ema-pruned.safetensors -P ./models/checkpoints/\n",
+    "\n",
+    "# Some SD1.5 anime style\n",
+    "#!wget -c https://huggingface.co/WarriorMama777/OrangeMixs/resolve/main/Models/AbyssOrangeMix2/AbyssOrangeMix2_hard.safetensors -P ./models/checkpoints/\n",
+    "#!wget -c https://huggingface.co/WarriorMama777/OrangeMixs/resolve/main/Models/AbyssOrangeMix3/AOM3A1_orangemixs.safetensors -P ./models/checkpoints/\n",
+    "#!wget -c https://huggingface.co/WarriorMama777/OrangeMixs/resolve/main/Models/AbyssOrangeMix3/AOM3A3_orangemixs.safetensors -P ./models/checkpoints/\n",
+    "#!wget -c https://huggingface.co/Linaqruf/anything-v3.0/resolve/main/anything-v3-fp16-pruned.safetensors -P ./models/checkpoints/\n",
+    "\n",
+    "# Waifu Diffusion 1.5 (anime style SD2.x 768-v)\n",
+    "#!wget -c https://huggingface.co/waifu-diffusion/wd-1-5-beta3/resolve/main/wd-illusion-fp16.safetensors -P ./models/checkpoints/\n",
+    "\n",
+    "\n",
+    "# unCLIP models\n",
+    "#!wget -c https://huggingface.co/comfyanonymous/illuminatiDiffusionV1_v11_unCLIP/resolve/main/illuminatiDiffusionV1_v11-unclip-h-fp16.safetensors -P ./models/checkpoints/\n",
+    "#!wget -c https://huggingface.co/comfyanonymous/wd-1.5-beta2_unCLIP/resolve/main/wd-1-5-beta2-aesthetic-unclip-h-fp16.safetensors -P ./models/checkpoints/\n",
+    "\n",
+    "\n",
+    "# VAE\n",
+    "!wget -c https://huggingface.co/stabilityai/sd-vae-ft-mse-original/resolve/main/vae-ft-mse-840000-ema-pruned.safetensors -P ./models/vae/\n",
+    "#!wget -c https://huggingface.co/WarriorMama777/OrangeMixs/resolve/main/VAEs/orangemix.vae.pt -P ./models/vae/\n",
+    "#!wget -c https://huggingface.co/hakurei/waifu-diffusion-v1-4/resolve/main/vae/kl-f8-anime2.ckpt -P ./models/vae/\n",
+    "\n",
+    "\n",
+    "# Loras\n",
+    "#!wget -c https://civitai.com/api/download/models/10350 -O ./models/loras/theovercomer8sContrastFix_sd21768.safetensors #theovercomer8sContrastFix SD2.x 768-v\n",
+    "#!wget -c https://civitai.com/api/download/models/10638 -O ./models/loras/theovercomer8sContrastFix_sd15.safetensors #theovercomer8sContrastFix SD1.x\n",
+    "#!wget -c https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_offset_example-lora_1.0.safetensors -P ./models/loras/ #SDXL offset noise lora\n",
+    "\n",
+    "\n",
+    "# T2I-Adapter\n",
+    "#!wget -c https://huggingface.co/TencentARC/T2I-Adapter/resolve/main/models/t2iadapter_depth_sd14v1.pth -P ./models/controlnet/\n",
+    "#!wget -c https://huggingface.co/TencentARC/T2I-Adapter/resolve/main/models/t2iadapter_seg_sd14v1.pth -P ./models/controlnet/\n",
+    "#!wget -c https://huggingface.co/TencentARC/T2I-Adapter/resolve/main/models/t2iadapter_sketch_sd14v1.pth -P ./models/controlnet/\n",
+    "#!wget -c https://huggingface.co/TencentARC/T2I-Adapter/resolve/main/models/t2iadapter_keypose_sd14v1.pth -P ./models/controlnet/\n",
+    "#!wget -c https://huggingface.co/TencentARC/T2I-Adapter/resolve/main/models/t2iadapter_openpose_sd14v1.pth -P ./models/controlnet/\n",
+    "#!wget -c https://huggingface.co/TencentARC/T2I-Adapter/resolve/main/models/t2iadapter_color_sd14v1.pth -P ./models/controlnet/\n",
+    "#!wget -c https://huggingface.co/TencentARC/T2I-Adapter/resolve/main/models/t2iadapter_canny_sd14v1.pth -P ./models/controlnet/\n",
+    "\n",
+    "# T2I Styles Model\n",
+    "#!wget -c https://huggingface.co/TencentARC/T2I-Adapter/resolve/main/models/t2iadapter_style_sd14v1.pth -P ./models/style_models/\n",
+    "\n",
+    "# CLIPVision model (needed for styles model)\n",
+    "#!wget -c https://huggingface.co/openai/clip-vit-large-patch14/resolve/main/pytorch_model.bin -O ./models/clip_vision/clip_vit14.bin\n",
+    "\n",
+    "\n",
+    "# ControlNet\n",
+    "#!wget -c https://huggingface.co/comfyanonymous/ControlNet-v1-1_fp16_safetensors/resolve/main/control_v11e_sd15_ip2p_fp16.safetensors -P ./models/controlnet/\n",
+    "#!wget -c https://huggingface.co/comfyanonymous/ControlNet-v1-1_fp16_safetensors/resolve/main/control_v11e_sd15_shuffle_fp16.safetensors -P ./models/controlnet/\n",
+    "#!wget -c https://huggingface.co/comfyanonymous/ControlNet-v1-1_fp16_safetensors/resolve/main/control_v11p_sd15_canny_fp16.safetensors -P ./models/controlnet/\n",
+    "#!wget -c https://huggingface.co/comfyanonymous/ControlNet-v1-1_fp16_safetensors/resolve/main/control_v11f1p_sd15_depth_fp16.safetensors -P ./models/controlnet/\n",
+    "#!wget -c https://huggingface.co/comfyanonymous/ControlNet-v1-1_fp16_safetensors/resolve/main/control_v11p_sd15_inpaint_fp16.safetensors -P ./models/controlnet/\n",
+    "#!wget -c https://huggingface.co/comfyanonymous/ControlNet-v1-1_fp16_safetensors/resolve/main/control_v11p_sd15_lineart_fp16.safetensors -P ./models/controlnet/\n",
+    "#!wget -c https://huggingface.co/comfyanonymous/ControlNet-v1-1_fp16_safetensors/resolve/main/control_v11p_sd15_mlsd_fp16.safetensors -P ./models/controlnet/\n",
+    "#!wget -c https://huggingface.co/comfyanonymous/ControlNet-v1-1_fp16_safetensors/resolve/main/control_v11p_sd15_normalbae_fp16.safetensors -P ./models/controlnet/\n",
+    "#!wget -c https://huggingface.co/comfyanonymous/ControlNet-v1-1_fp16_safetensors/resolve/main/control_v11p_sd15_openpose_fp16.safetensors -P ./models/controlnet/\n",
+    "#!wget -c https://huggingface.co/comfyanonymous/ControlNet-v1-1_fp16_safetensors/resolve/main/control_v11p_sd15_scribble_fp16.safetensors -P ./models/controlnet/\n",
+    "#!wget -c https://huggingface.co/comfyanonymous/ControlNet-v1-1_fp16_safetensors/resolve/main/control_v11p_sd15_seg_fp16.safetensors -P ./models/controlnet/\n",
+    "#!wget -c https://huggingface.co/comfyanonymous/ControlNet-v1-1_fp16_safetensors/resolve/main/control_v11p_sd15_softedge_fp16.safetensors -P ./models/controlnet/\n",
+    "#!wget -c https://huggingface.co/comfyanonymous/ControlNet-v1-1_fp16_safetensors/resolve/main/control_v11p_sd15s2_lineart_anime_fp16.safetensors -P ./models/controlnet/\n",
+    "#!wget -c https://huggingface.co/comfyanonymous/ControlNet-v1-1_fp16_safetensors/resolve/main/control_v11u_sd15_tile_fp16.safetensors -P ./models/controlnet/\n",
+    "\n",
+    "# ControlNet SDXL\n",
+    "#!wget -c https://huggingface.co/stabilityai/control-lora/resolve/main/control-LoRAs-rank256/control-lora-canny-rank256.safetensors -P ./models/controlnet/\n",
+    "#!wget -c https://huggingface.co/stabilityai/control-lora/resolve/main/control-LoRAs-rank256/control-lora-depth-rank256.safetensors -P ./models/controlnet/\n",
+    "#!wget -c https://huggingface.co/stabilityai/control-lora/resolve/main/control-LoRAs-rank256/control-lora-recolor-rank256.safetensors -P ./models/controlnet/\n",
+    "#!wget -c https://huggingface.co/stabilityai/control-lora/resolve/main/control-LoRAs-rank256/control-lora-sketch-rank256.safetensors -P ./models/controlnet/\n",
+    "\n",
+    "# Controlnet Preprocessor nodes by Fannovel16\n",
+    "#!cd custom_nodes && git clone https://github.com/Fannovel16/comfy_controlnet_preprocessors; cd comfy_controlnet_preprocessors && python install.py\n",
+    "\n",
+    "\n",
+    "# GLIGEN\n",
+    "#!wget -c https://huggingface.co/comfyanonymous/GLIGEN_pruned_safetensors/resolve/main/gligen_sd14_textbox_pruned_fp16.safetensors -P ./models/gligen/\n",
+    "\n",
+    "\n",
+    "# ESRGAN upscale model\n",
+    "#!wget -c https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth -P ./models/upscale_models/\n",
+    "#!wget -c https://huggingface.co/sberbank-ai/Real-ESRGAN/resolve/main/RealESRGAN_x2.pth -P ./models/upscale_models/\n",
+    "#!wget -c https://huggingface.co/sberbank-ai/Real-ESRGAN/resolve/main/RealESRGAN_x4.pth -P ./models/upscale_models/\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "kkkkkkkkkkkkkkk"
+   },
+   "source": [
+    "### Run ComfyUI with cloudflared (Recommended Way)\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "jjjjjjjjjjjjjj"
+   },
+   "outputs": [],
+   "source": [
+    "!wget https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64.deb\n",
+    "!dpkg -i cloudflared-linux-amd64.deb\n",
+    "\n",
+    "import subprocess\n",
+    "import threading\n",
+    "import time\n",
+    "import socket\n",
+    "import urllib.request\n",
+    "\n",
+    "def iframe_thread(port):\n",
+    "  while True:\n",
+    "      time.sleep(0.5)\n",
+    "      sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)\n",
+    "      result = sock.connect_ex(('127.0.0.1', port))\n",
+    "      if result == 0:\n",
+    "        break\n",
+    "      sock.close()\n",
+    "  print(\"\\nComfyUI finished loading, trying to launch cloudflared (if it gets stuck here cloudflared is having issues)\\n\")\n",
+    "\n",
+    "  p = subprocess.Popen([\"cloudflared\", \"tunnel\", \"--url\", \"http://127.0.0.1:{}\".format(port)], stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n",
+    "  for line in p.stderr:\n",
+    "    l = line.decode()\n",
+    "    if \"trycloudflare.com \" in l:\n",
+    "      print(\"This is the URL to access ComfyUI:\", l[l.find(\"http\"):], end='')\n",
+    "    #print(l, end='')\n",
+    "\n",
+    "\n",
+    "threading.Thread(target=iframe_thread, daemon=True, args=(8188,)).start()\n",
+    "\n",
+    "!python main.py --dont-print-server"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "kkkkkkkkkkkkkk"
+   },
+   "source": [
+    "### Run ComfyUI with localtunnel\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "jjjjjjjjjjjjj"
+   },
+   "outputs": [],
+   "source": [
+    "!npm install -g localtunnel\n",
+    "\n",
+    "import threading\n",
+    "\n",
+    "def iframe_thread(port):\n",
+    "  while True:\n",
+    "      time.sleep(0.5)\n",
+    "      sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)\n",
+    "      result = sock.connect_ex(('127.0.0.1', port))\n",
+    "      if result == 0:\n",
+    "        break\n",
+    "      sock.close()\n",
+    "  print(\"\\nComfyUI finished loading, trying to launch localtunnel (if it gets stuck here localtunnel is having issues)\\n\")\n",
+    "\n",
+    "  print(\"The password/enpoint ip for localtunnel is:\", urllib.request.urlopen('https://ipv4.icanhazip.com').read().decode('utf8').strip(\"\\n\"))\n",
+    "  p = subprocess.Popen([\"lt\", \"--port\", \"{}\".format(port)], stdout=subprocess.PIPE)\n",
+    "  for line in p.stdout:\n",
+    "    print(line.decode(), end='')\n",
+    "\n",
+    "\n",
+    "threading.Thread(target=iframe_thread, daemon=True, args=(8188,)).start()\n",
+    "\n",
+    "!python main.py --dont-print-server"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "gggggggggg"
+   },
+   "source": [
+    "### Run ComfyUI with colab iframe (use only in case the previous way with localtunnel doesn't work)\n",
+    "\n",
+    "You should see the ui appear in an iframe. If you get a 403 error, it's your firefox settings or an extension that's messing things up.\n",
+    "\n",
+    "If you want to open it in another window use the link.\n",
+    "\n",
+    "Note that some UI features like live image previews won't work because the colab iframe blocks websockets."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "hhhhhhhhhh"
+   },
+   "outputs": [],
+   "source": [
+    "import threading\n",
+    "def iframe_thread(port):\n",
+    "  while True:\n",
+    "      time.sleep(0.5)\n",
+    "      sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)\n",
+    "      result = sock.connect_ex(('127.0.0.1', port))\n",
+    "      if result == 0:\n",
+    "        break\n",
+    "      sock.close()\n",
+    "  from google.colab import output\n",
+    "  output.serve_kernel_port_as_iframe(port, height=1024)\n",
+    "  print(\"to open it in a window you can open this link here:\")\n",
+    "  output.serve_kernel_port_as_window(port)\n",
+    "\n",
+    "threading.Thread(target=iframe_thread, daemon=True, args=(8188,)).start()\n",
+    "\n",
+    "!python main.py --dont-print-server"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "provenance": []
+  },
+  "gpuClass": "standard",
+  "kernelspec": {
+   "display_name": "Python 3",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "ComfyUI"
-version = "0.3.47"
+version = "0.3.40"
 readme = "README.md"
 license = { file = "LICENSE" }
 requires-python = ">=3.9"
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,13 +1,13 @@
-comfyui-frontend-package==1.23.4
-comfyui-workflow-templates==0.1.41
-comfyui-embedded-docs==0.2.4
+comfyui-frontend-package==1.21.7
+comfyui-workflow-templates==0.1.28
+comfyui-embedded-docs==0.2.2
 torch
 torchsde
 torchvision
 torchaudio
 numpy>=1.25.0
 einops
-transformers>=4.37.2
+transformers>=4.28.1
 tokenizers>=0.13.3
 sentencepiece
 safetensors>=0.4.2
--- a/script_examples/websockets_api_example.py
+++ b/script_examples/websockets_api_example.py
@@ -10,11 +10,11 @@ import urllib.parse
 server_address = "127.0.0.1:8188"
 client_id = str(uuid.uuid4())

-def queue_prompt(prompt, prompt_id):
-    p = {"prompt": prompt, "client_id": client_id, "prompt_id": prompt_id}
+def queue_prompt(prompt):
+    p = {"prompt": prompt, "client_id": client_id}
    data = json.dumps(p).encode('utf-8')
-    req = urllib.request.Request("http://{}/prompt".format(server_address), data=data)
-    urllib.request.urlopen(req).read()
+    req =  urllib.request.Request("http://{}/prompt".format(server_address), data=data)
+    return json.loads(urllib.request.urlopen(req).read())

 def get_image(filename, subfolder, folder_type):
    data = {"filename": filename, "subfolder": subfolder, "type": folder_type}
@@ -27,8 +27,7 @@ def get_history(prompt_id):
        return json.loads(response.read())

 def get_images(ws, prompt):
-    prompt_id = str(uuid.uuid4())
-    queue_prompt(prompt, prompt_id)
+    prompt_id = queue_prompt(prompt)['prompt_id']
    output_images = {}
    while True:
        out = ws.recv()
--- a/server.py
+++ b/server.py
@@ -26,7 +26,6 @@ import mimetypes
 from comfy.cli_args import args
 import comfy.utils
 import comfy.model_management
-from comfy_api import feature_flags
 import node_helpers
 from comfyui_version import __version__
 from app.frontend_management import FrontendManager
@@ -175,7 +174,6 @@ class PromptServer():
        max_upload_size = round(args.max_upload_size * 1024 * 1024)
        self.app = web.Application(client_max_size=max_upload_size, middlewares=middlewares)
        self.sockets = dict()
-        self.sockets_metadata = dict()
        self.web_root = (
            FrontendManager.init_frontend(args.front_end_version)
            if args.front_end_root is None
@@ -200,53 +198,20 @@ class PromptServer():
            else:
                sid = uuid.uuid4().hex

-            # Store WebSocket for backward compatibility
            self.sockets[sid] = ws
-            # Store metadata separately
-            self.sockets_metadata[sid] = {"feature_flags": {}}

            try:
                # Send initial state to the new client
-                await self.send("status", {"status": self.get_queue_info(), "sid": sid}, sid)
+                await self.send("status", { "status": self.get_queue_info(), 'sid': sid }, sid)
                # On reconnect if we are the currently executing client send the current node
                if self.client_id == sid and self.last_node_id is not None:
                    await self.send("executing", { "node": self.last_node_id }, sid)

-                # Flag to track if we've received the first message
-                first_message = True
-
                async for msg in ws:
                    if msg.type == aiohttp.WSMsgType.ERROR:
                        logging.warning('ws connection closed with exception %s' % ws.exception())
-                    elif msg.type == aiohttp.WSMsgType.TEXT:
-                        try:
-                            data = json.loads(msg.data)
-                            # Check if first message is feature flags
-                            if first_message and data.get("type") == "feature_flags":
-                                # Store client feature flags
-                                client_flags = data.get("data", {})
-                                self.sockets_metadata[sid]["feature_flags"] = client_flags
-
-                                # Send server feature flags in response
-                                await self.send(
-                                    "feature_flags",
-                                    feature_flags.get_server_features(),
-                                    sid,
-                                )
-
-                                logging.info(
-                                    f"Feature flags negotiated for client {sid}: {client_flags}"
-                                )
-                            first_message = False
-                        except json.JSONDecodeError:
-                            logging.warning(
-                                f"Invalid JSON received from client {sid}: {msg.data}"
-                            )
-                        except Exception as e:
-                            logging.error(f"Error processing WebSocket message: {e}")
            finally:
                self.sockets.pop(sid, None)
-                self.sockets_metadata.pop(sid, None)
            return ws

        @routes.get("/")
@@ -553,7 +518,6 @@ class PromptServer():
            ram_free = comfy.model_management.get_free_memory(cpu_device)
            vram_total, torch_vram_total = comfy.model_management.get_total_memory(device, torch_total_too=True)
            vram_free, torch_vram_free = comfy.model_management.get_free_memory(device, torch_free_too=True)
-            required_frontend_version = FrontendManager.get_required_frontend_version()

            system_stats = {
                "system": {
@@ -561,7 +525,6 @@ class PromptServer():
                    "ram_total": ram_total,
                    "ram_free": ram_free,
                    "comfyui_version": __version__,
-                    "required_frontend_version": required_frontend_version,
                    "python_version": sys.version,
                    "pytorch_version": comfy.model_management.torch_version,
                    "embedded_python": os.path.split(os.path.split(sys.executable)[0])[1] == "python_embeded",
@@ -581,10 +544,6 @@ class PromptServer():
            }
            return web.json_response(system_stats)

-        @routes.get("/features")
-        async def get_features(request):
-            return web.json_response(feature_flags.get_server_features())
-
        @routes.get("/prompt")
        async def get_prompt(request):
            return web.json_response(self.get_queue_info())
@@ -680,7 +639,7 @@ class PromptServer():

            if "prompt" in json_data:
                prompt = json_data["prompt"]
-                prompt_id = str(json_data.get("prompt_id", uuid.uuid4()))
+                prompt_id = str(uuid.uuid4())
                valid = await execution.validate_prompt(prompt_id, prompt)
                extra_data = {}
                if "extra_data" in json_data:
@@ -923,10 +882,10 @@ class PromptServer():
        ssl_ctx = None
        scheme = "http"
        if args.tls_keyfile and args.tls_certfile:
-            ssl_ctx = ssl.SSLContext(protocol=ssl.PROTOCOL_TLS_SERVER, verify_mode=ssl.CERT_NONE)
-            ssl_ctx.load_cert_chain(certfile=args.tls_certfile,
+                ssl_ctx = ssl.SSLContext(protocol=ssl.PROTOCOL_TLS_SERVER, verify_mode=ssl.CERT_NONE)
+                ssl_ctx.load_cert_chain(certfile=args.tls_certfile,
                                keyfile=args.tls_keyfile)
-            scheme = "https"
+                scheme = "https"

        if verbose:
            logging.info("Starting server\n")
--- a/tests-unit/app_test/frontend_manager_test.py
+++ b/tests-unit/app_test/frontend_manager_test.py
@@ -1,7 +1,7 @@
 import argparse
 import pytest
 from requests.exceptions import HTTPError
-from unittest.mock import patch, mock_open
+from unittest.mock import patch

 from app.frontend_management import (
    FrontendManager,
@@ -172,36 +172,3 @@ def test_init_frontend_fallback_on_error():
    # Assert
    assert frontend_path == "/default/path"
    mock_check.assert_called_once()
-
-
-def test_get_frontend_version():
-    # Arrange
-    expected_version = "1.25.0"
-    mock_requirements_content = """torch
-torchsde
-comfyui-frontend-package==1.25.0
-other-package==1.0.0
-numpy"""
-
-    # Act
-    with patch("builtins.open", mock_open(read_data=mock_requirements_content)):
-        version = FrontendManager.get_required_frontend_version()
-
-    # Assert
-    assert version == expected_version
-
-
-def test_get_frontend_version_invalid_semver():
-    # Arrange
-    mock_requirements_content = """torch
-torchsde
-comfyui-frontend-package==1.29.3.75
-other-package==1.0.0
-numpy"""
-
-    # Act
-    with patch("builtins.open", mock_open(read_data=mock_requirements_content)):
-        version = FrontendManager.get_required_frontend_version()
-
-    # Assert
-    assert version is None
--- a/tests-unit/feature_flags_test.py
+++ b/tests-unit/feature_flags_test.py
@@ -1,98 +0,0 @@
-"""Tests for feature flags functionality."""
-
-from comfy_api.feature_flags import (
-    get_connection_feature,
-    supports_feature,
-    get_server_features,
-    SERVER_FEATURE_FLAGS,
-)
-
-
-class TestFeatureFlags:
-    """Test suite for feature flags functions."""
-
-    def test_get_server_features_returns_copy(self):
-        """Test that get_server_features returns a copy of the server flags."""
-        features = get_server_features()
-        # Verify it's a copy by modifying it
-        features["test_flag"] = True
-        # Original should be unchanged
-        assert "test_flag" not in SERVER_FEATURE_FLAGS
-
-    def test_get_server_features_contains_expected_flags(self):
-        """Test that server features contain expected flags."""
-        features = get_server_features()
-        assert "supports_preview_metadata" in features
-        assert features["supports_preview_metadata"] is True
-        assert "max_upload_size" in features
-        assert isinstance(features["max_upload_size"], (int, float))
-
-    def test_get_connection_feature_with_missing_sid(self):
-        """Test getting feature for non-existent session ID."""
-        sockets_metadata = {}
-        result = get_connection_feature(sockets_metadata, "missing_sid", "some_feature")
-        assert result is False  # Default value
-
-    def test_get_connection_feature_with_custom_default(self):
-        """Test getting feature with custom default value."""
-        sockets_metadata = {}
-        result = get_connection_feature(
-            sockets_metadata, "missing_sid", "some_feature", default="custom_default"
-        )
-        assert result == "custom_default"
-
-    def test_get_connection_feature_with_feature_flags(self):
-        """Test getting feature from connection with feature flags."""
-        sockets_metadata = {
-            "sid1": {
-                "feature_flags": {
-                    "supports_preview_metadata": True,
-                    "custom_feature": "value",
-                },
-            }
-        }
-        result = get_connection_feature(sockets_metadata, "sid1", "supports_preview_metadata")
-        assert result is True
-
-        result = get_connection_feature(sockets_metadata, "sid1", "custom_feature")
-        assert result == "value"
-
-    def test_get_connection_feature_missing_feature(self):
-        """Test getting non-existent feature from connection."""
-        sockets_metadata = {
-            "sid1": {"feature_flags": {"existing_feature": True}}
-        }
-        result = get_connection_feature(sockets_metadata, "sid1", "missing_feature")
-        assert result is False
-
-    def test_supports_feature_returns_boolean(self):
-        """Test that supports_feature always returns boolean."""
-        sockets_metadata = {
-            "sid1": {
-                "feature_flags": {
-                    "bool_feature": True,
-                    "string_feature": "value",
-                    "none_feature": None,
-                },
-            }
-        }
-
-        # True boolean feature
-        assert supports_feature(sockets_metadata, "sid1", "bool_feature") is True
-
-        # Non-boolean values should return False
-        assert supports_feature(sockets_metadata, "sid1", "string_feature") is False
-        assert supports_feature(sockets_metadata, "sid1", "none_feature") is False
-        assert supports_feature(sockets_metadata, "sid1", "missing_feature") is False
-
-    def test_supports_feature_with_missing_connection(self):
-        """Test supports_feature with missing connection."""
-        sockets_metadata = {}
-        assert supports_feature(sockets_metadata, "missing_sid", "any_feature") is False
-
-    def test_empty_feature_flags_dict(self):
-        """Test connection with empty feature flags dictionary."""
-        sockets_metadata = {"sid1": {"feature_flags": {}}}
-        result = get_connection_feature(sockets_metadata, "sid1", "any_feature")
-        assert result is False
-        assert supports_feature(sockets_metadata, "sid1", "any_feature") is False
--- a/tests-unit/websocket_feature_flags_test.py
+++ b/tests-unit/websocket_feature_flags_test.py
@@ -1,77 +0,0 @@
-"""Simplified tests for WebSocket feature flags functionality."""
-from comfy_api import feature_flags
-
-
-class TestWebSocketFeatureFlags:
-    """Test suite for WebSocket feature flags integration."""
-
-    def test_server_feature_flags_response(self):
-        """Test server feature flags are properly formatted."""
-        features = feature_flags.get_server_features()
-
-        # Check expected server features
-        assert "supports_preview_metadata" in features
-        assert features["supports_preview_metadata"] is True
-        assert "max_upload_size" in features
-        assert isinstance(features["max_upload_size"], (int, float))
-
-    def test_progress_py_checks_feature_flags(self):
-        """Test that progress.py checks feature flags before sending metadata."""
-        # This simulates the check in progress.py
-        client_id = "test_client"
-        sockets_metadata = {"test_client": {"feature_flags": {}}}
-
-        # The actual check would be in progress.py
-        supports_metadata = feature_flags.supports_feature(
-            sockets_metadata, client_id, "supports_preview_metadata"
-        )
-
-        assert supports_metadata is False
-
-    def test_multiple_clients_different_features(self):
-        """Test handling multiple clients with different feature support."""
-        sockets_metadata = {
-            "modern_client": {
-                "feature_flags": {"supports_preview_metadata": True}
-            },
-            "legacy_client": {
-                "feature_flags": {}
-            }
-        }
-
-        # Check modern client
-        assert feature_flags.supports_feature(
-            sockets_metadata, "modern_client", "supports_preview_metadata"
-        ) is True
-
-        # Check legacy client
-        assert feature_flags.supports_feature(
-            sockets_metadata, "legacy_client", "supports_preview_metadata"
-        ) is False
-
-    def test_feature_negotiation_message_format(self):
-        """Test the format of feature negotiation messages."""
-        # Client message format
-        client_message = {
-            "type": "feature_flags",
-            "data": {
-                "supports_preview_metadata": True,
-                "api_version": "1.0.0"
-            }
-        }
-
-        # Verify structure
-        assert client_message["type"] == "feature_flags"
-        assert "supports_preview_metadata" in client_message["data"]
-
-        # Server response format (what would be sent)
-        server_features = feature_flags.get_server_features()
-        server_message = {
-            "type": "feature_flags",
-            "data": server_features
-        }
-
-        # Verify structure
-        assert server_message["type"] == "feature_flags"
-        assert "supports_preview_metadata" in server_message["data"]
-        assert server_message["data"]["supports_preview_metadata"] is True
--- a/tests/inference/test_async_nodes.py
+++ b/tests/inference/test_async_nodes.py
@@ -1,410 +0,0 @@
-import pytest
-import time
-import torch
-import urllib.error
-import numpy as np
-import subprocess
-
-from pytest import fixture
-from comfy_execution.graph_utils import GraphBuilder
-from tests.inference.test_execution import ComfyClient
-
-
-@pytest.mark.execution
-class TestAsyncNodes:
-    @fixture(scope="class", autouse=True, params=[
-        (False, 0),
-        (True, 0),
-        (True, 100),
-    ])
-    def _server(self, args_pytest, request):
-        pargs = [
-            'python','main.py',
-            '--output-directory', args_pytest["output_dir"],
-            '--listen', args_pytest["listen"],
-            '--port', str(args_pytest["port"]),
-            '--extra-model-paths-config', 'tests/inference/extra_model_paths.yaml',
-        ]
-        use_lru, lru_size = request.param
-        if use_lru:
-            pargs += ['--cache-lru', str(lru_size)]
-        # Running server with args: pargs
-        p = subprocess.Popen(pargs)
-        yield
-        p.kill()
-        torch.cuda.empty_cache()
-
-    @fixture(scope="class", autouse=True)
-    def shared_client(self, args_pytest, _server):
-        client = ComfyClient()
-        n_tries = 5
-        for i in range(n_tries):
-            time.sleep(4)
-            try:
-                client.connect(listen=args_pytest["listen"], port=args_pytest["port"])
-            except ConnectionRefusedError:
-                # Retrying...
-                pass
-            else:
-                break
-        yield client
-        del client
-        torch.cuda.empty_cache()
-
-    @fixture
-    def client(self, shared_client, request):
-        shared_client.set_test_name(f"async_nodes[{request.node.name}]")
-        yield shared_client
-
-    @fixture
-    def builder(self, request):
-        yield GraphBuilder(prefix=request.node.name)
-
-    # Happy Path Tests
-
-    def test_basic_async_execution(self, client: ComfyClient, builder: GraphBuilder):
-        """Test that a basic async node executes correctly."""
-        g = builder
-        image = g.node("StubImage", content="BLACK", height=512, width=512, batch_size=1)
-        sleep_node = g.node("TestSleep", value=image.out(0), seconds=0.1)
-        output = g.node("SaveImage", images=sleep_node.out(0))
-
-        result = client.run(g)
-
-        # Verify execution completed
-        assert result.did_run(sleep_node), "Async sleep node should have executed"
-        assert result.did_run(output), "Output node should have executed"
-
-        # Verify the image passed through correctly
-        result_images = result.get_images(output)
-        assert len(result_images) == 1, "Should have 1 image"
-        assert np.array(result_images[0]).min() == 0 and np.array(result_images[0]).max() == 0, "Image should be black"
-
-    def test_multiple_async_parallel_execution(self, client: ComfyClient, builder: GraphBuilder):
-        """Test that multiple async nodes execute in parallel."""
-        g = builder
-        image = g.node("StubImage", content="BLACK", height=512, width=512, batch_size=1)
-
-        # Create multiple async sleep nodes with different durations
-        sleep1 = g.node("TestSleep", value=image.out(0), seconds=0.3)
-        sleep2 = g.node("TestSleep", value=image.out(0), seconds=0.4)
-        sleep3 = g.node("TestSleep", value=image.out(0), seconds=0.5)
-
-        # Add outputs for each
-        _output1 = g.node("PreviewImage", images=sleep1.out(0))
-        _output2 = g.node("PreviewImage", images=sleep2.out(0))
-        _output3 = g.node("PreviewImage", images=sleep3.out(0))
-
-        start_time = time.time()
-        result = client.run(g)
-        elapsed_time = time.time() - start_time
-
-        # Should take ~0.5s (max duration) not 1.2s (sum of durations)
-        assert elapsed_time < 0.8, f"Parallel execution took {elapsed_time}s, expected < 0.8s"
-
-        # Verify all nodes executed
-        assert result.did_run(sleep1) and result.did_run(sleep2) and result.did_run(sleep3)
-
-    def test_async_with_dependencies(self, client: ComfyClient, builder: GraphBuilder):
-        """Test async nodes with proper dependency handling."""
-        g = builder
-        image1 = g.node("StubImage", content="BLACK", height=512, width=512, batch_size=1)
-        image2 = g.node("StubImage", content="WHITE", height=512, width=512, batch_size=1)
-
-        # Chain of async operations
-        sleep1 = g.node("TestSleep", value=image1.out(0), seconds=0.2)
-        sleep2 = g.node("TestSleep", value=image2.out(0), seconds=0.2)
-
-        # Average depends on both async results
-        average = g.node("TestVariadicAverage", input1=sleep1.out(0), input2=sleep2.out(0))
-        output = g.node("SaveImage", images=average.out(0))
-
-        result = client.run(g)
-
-        # Verify execution order
-        assert result.did_run(sleep1) and result.did_run(sleep2)
-        assert result.did_run(average) and result.did_run(output)
-
-        # Verify averaged result
-        result_images = result.get_images(output)
-        avg_value = np.array(result_images[0]).mean()
-        assert abs(avg_value - 127.5) < 1, f"Average value {avg_value} should be ~127.5"
-
-    def test_async_validate_inputs(self, client: ComfyClient, builder: GraphBuilder):
-        """Test async VALIDATE_INPUTS function."""
-        g = builder
-        # Create a test node with async validation
-        validation_node = g.node("TestAsyncValidation", value=5.0, threshold=10.0)
-        g.node("SaveImage", images=validation_node.out(0))
-
-        # Should pass validation
-        result = client.run(g)
-        assert result.did_run(validation_node)
-
-        # Test validation failure
-        validation_node.inputs['threshold'] = 3.0  # Will fail since value > threshold
-        with pytest.raises(urllib.error.HTTPError):
-            client.run(g)
-
-    def test_async_lazy_evaluation(self, client: ComfyClient, builder: GraphBuilder):
-        """Test async nodes with lazy evaluation."""
-        g = builder
-        input1 = g.node("StubImage", content="BLACK", height=512, width=512, batch_size=1)
-        input2 = g.node("StubImage", content="WHITE", height=512, width=512, batch_size=1)
-        mask = g.node("StubMask", value=0.0, height=512, width=512, batch_size=1)
-
-        # Create async nodes that will be evaluated lazily
-        sleep1 = g.node("TestSleep", value=input1.out(0), seconds=0.3)
-        sleep2 = g.node("TestSleep", value=input2.out(0), seconds=0.3)
-
-        # Use lazy mix that only needs sleep1 (mask=0.0)
-        lazy_mix = g.node("TestLazyMixImages", image1=sleep1.out(0), image2=sleep2.out(0), mask=mask.out(0))
-        g.node("SaveImage", images=lazy_mix.out(0))
-
-        start_time = time.time()
-        result = client.run(g)
-        elapsed_time = time.time() - start_time
-
-        # Should only execute sleep1, not sleep2
-        assert elapsed_time < 0.5, f"Should skip sleep2, took {elapsed_time}s"
-        assert result.did_run(sleep1), "Sleep1 should have executed"
-        assert not result.did_run(sleep2), "Sleep2 should have been skipped"
-
-    def test_async_check_lazy_status(self, client: ComfyClient, builder: GraphBuilder):
-        """Test async check_lazy_status function."""
-        g = builder
-        # Create a node with async check_lazy_status
-        lazy_node = g.node("TestAsyncLazyCheck",
-                          input1="value1",
-                          input2="value2",
-                          condition=True)
-        g.node("SaveImage", images=lazy_node.out(0))
-
-        result = client.run(g)
-        assert result.did_run(lazy_node)
-
-    # Error Handling Tests
-
-    def test_async_execution_error(self, client: ComfyClient, builder: GraphBuilder):
-        """Test that async execution errors are properly handled."""
-        g = builder
-        image = g.node("StubImage", content="BLACK", height=512, width=512, batch_size=1)
-        # Create an async node that will error
-        error_node = g.node("TestAsyncError", value=image.out(0), error_after=0.1)
-        g.node("SaveImage", images=error_node.out(0))
-
-        try:
-            client.run(g)
-            assert False, "Should have raised an error"
-        except Exception as e:
-            assert 'prompt_id' in e.args[0], f"Did not get proper error message: {e}"
-            assert e.args[0]['node_id'] == error_node.id, "Error should be from async error node"
-
-    def test_async_validation_error(self, client: ComfyClient, builder: GraphBuilder):
-        """Test async validation error handling."""
-        g = builder
-        # Node with async validation that will fail
-        validation_node = g.node("TestAsyncValidationError", value=15.0, max_value=10.0)
-        g.node("SaveImage", images=validation_node.out(0))
-
-        with pytest.raises(urllib.error.HTTPError) as exc_info:
-            client.run(g)
-        # Verify it's a validation error
-        assert exc_info.value.code == 400
-
-    def test_async_timeout_handling(self, client: ComfyClient, builder: GraphBuilder):
-        """Test handling of async operations that timeout."""
-        g = builder
-        image = g.node("StubImage", content="BLACK", height=512, width=512, batch_size=1)
-        # Very long sleep that would timeout
-        timeout_node = g.node("TestAsyncTimeout", value=image.out(0), timeout=0.5, operation_time=2.0)
-        g.node("SaveImage", images=timeout_node.out(0))
-
-        try:
-            client.run(g)
-            assert False, "Should have raised a timeout error"
-        except Exception as e:
-            assert 'timeout' in str(e).lower(), f"Expected timeout error, got: {e}"
-
-    def test_concurrent_async_error_recovery(self, client: ComfyClient, builder: GraphBuilder):
-        """Test that workflow can recover after async errors."""
-        g = builder
-        image = g.node("StubImage", content="BLACK", height=512, width=512, batch_size=1)
-
-        # First run with error
-        error_node = g.node("TestAsyncError", value=image.out(0), error_after=0.1)
-        g.node("SaveImage", images=error_node.out(0))
-
-        try:
-            client.run(g)
-        except Exception:
-            pass  # Expected
-
-        # Second run should succeed
-        g2 = GraphBuilder(prefix="recovery_test")
-        image2 = g2.node("StubImage", content="WHITE", height=512, width=512, batch_size=1)
-        sleep_node = g2.node("TestSleep", value=image2.out(0), seconds=0.1)
-        g2.node("SaveImage", images=sleep_node.out(0))
-
-        result = client.run(g2)
-        assert result.did_run(sleep_node), "Should be able to run after error"
-
-    def test_sync_error_during_async_execution(self, client: ComfyClient, builder: GraphBuilder):
-        """Test handling when sync node errors while async node is executing."""
-        g = builder
-        image = g.node("StubImage", content="BLACK", height=512, width=512, batch_size=1)
-
-        # Async node that takes time
-        sleep_node = g.node("TestSleep", value=image.out(0), seconds=0.5)
-
-        # Sync node that will error immediately
-        error_node = g.node("TestSyncError", value=image.out(0))
-
-        # Both feed into output
-        g.node("PreviewImage", images=sleep_node.out(0))
-        g.node("PreviewImage", images=error_node.out(0))
-
-        try:
-            client.run(g)
-            assert False, "Should have raised an error"
-        except Exception as e:
-            # Verify the sync error was caught even though async was running
-            assert 'prompt_id' in e.args[0]
-
-    # Edge Cases
-
-    def test_async_with_execution_blocker(self, client: ComfyClient, builder: GraphBuilder):
-        """Test async nodes with execution blockers."""
-        g = builder
-        image1 = g.node("StubImage", content="BLACK", height=512, width=512, batch_size=1)
-        image2 = g.node("StubImage", content="WHITE", height=512, width=512, batch_size=1)
-
-        # Async sleep nodes
-        sleep1 = g.node("TestSleep", value=image1.out(0), seconds=0.2)
-        sleep2 = g.node("TestSleep", value=image2.out(0), seconds=0.2)
-
-        # Create list of images
-        image_list = g.node("TestMakeListNode", value1=sleep1.out(0), value2=sleep2.out(0))
-
-        # Create list of blocking conditions - [False, True] to block only the second item
-        int1 = g.node("StubInt", value=1)
-        int2 = g.node("StubInt", value=2)
-        block_list = g.node("TestMakeListNode", value1=int1.out(0), value2=int2.out(0))
-
-        # Compare each value against 2, so first is False (1 != 2) and second is True (2 == 2)
-        compare = g.node("TestIntConditions", a=block_list.out(0), b=2, operation="==")
-
-        # Block based on the comparison results
-        blocker = g.node("TestExecutionBlocker", input=image_list.out(0), block=compare.out(0), verbose=False)
-
-        output = g.node("PreviewImage", images=blocker.out(0))
-
-        result = client.run(g)
-        images = result.get_images(output)
-        assert len(images) == 1, "Should have blocked second image"
-
-    def test_async_caching_behavior(self, client: ComfyClient, builder: GraphBuilder):
-        """Test that async nodes are properly cached."""
-        g = builder
-        image = g.node("StubImage", content="BLACK", height=512, width=512, batch_size=1)
-        sleep_node = g.node("TestSleep", value=image.out(0), seconds=0.2)
-        g.node("SaveImage", images=sleep_node.out(0))
-
-        # First run
-        result1 = client.run(g)
-        assert result1.did_run(sleep_node), "Should run first time"
-
-        # Second run - should be cached
-        start_time = time.time()
-        result2 = client.run(g)
-        elapsed_time = time.time() - start_time
-
-        assert not result2.did_run(sleep_node), "Should be cached"
-        assert elapsed_time < 0.1, f"Cached run took {elapsed_time}s, should be instant"
-
-    def test_async_with_dynamic_prompts(self, client: ComfyClient, builder: GraphBuilder):
-        """Test async nodes within dynamically generated prompts."""
-        g = builder
-        image1 = g.node("StubImage", content="BLACK", height=512, width=512, batch_size=1)
-        image2 = g.node("StubImage", content="WHITE", height=512, width=512, batch_size=1)
-
-        # Node that generates async nodes dynamically
-        dynamic_async = g.node("TestDynamicAsyncGeneration",
-                              image1=image1.out(0),
-                              image2=image2.out(0),
-                              num_async_nodes=3,
-                              sleep_duration=0.2)
-        g.node("SaveImage", images=dynamic_async.out(0))
-
-        start_time = time.time()
-        result = client.run(g)
-        elapsed_time = time.time() - start_time
-
-        # Should execute async nodes in parallel within dynamic prompt
-        assert elapsed_time < 0.5, f"Dynamic async execution took {elapsed_time}s"
-        assert result.did_run(dynamic_async)
-
-    def test_async_resource_cleanup(self, client: ComfyClient, builder: GraphBuilder):
-        """Test that async resources are properly cleaned up."""
-        g = builder
-        image = g.node("StubImage", content="BLACK", height=512, width=512, batch_size=1)
-
-        # Create multiple async nodes that use resources
-        resource_nodes = []
-        for i in range(5):
-            node = g.node("TestAsyncResourceUser",
-                         value=image.out(0),
-                         resource_id=f"resource_{i}",
-                         duration=0.1)
-            resource_nodes.append(node)
-            g.node("PreviewImage", images=node.out(0))
-
-        result = client.run(g)
-
-        # Verify all nodes executed
-        for node in resource_nodes:
-            assert result.did_run(node)
-
-        # Run again to ensure resources were cleaned up
-        result2 = client.run(g)
-        # Should be cached but not error due to resource conflicts
-        for node in resource_nodes:
-            assert not result2.did_run(node), "Should be cached"
-
-    def test_async_cancellation(self, client: ComfyClient, builder: GraphBuilder):
-        """Test cancellation of async operations."""
-        # This would require implementing cancellation in the client
-        # For now, we'll test that long-running async operations can be interrupted
-        pass  # TODO: Implement when cancellation API is available
-
-    def test_mixed_sync_async_execution(self, client: ComfyClient, builder: GraphBuilder):
-        """Test workflows with both sync and async nodes."""
-        g = builder
-        image1 = g.node("StubImage", content="BLACK", height=512, width=512, batch_size=1)
-        image2 = g.node("StubImage", content="WHITE", height=512, width=512, batch_size=1)
-        mask = g.node("StubMask", value=0.5, height=512, width=512, batch_size=1)
-
-        # Mix of sync and async operations
-        # Sync: lazy mix images
-        sync_op1 = g.node("TestLazyMixImages", image1=image1.out(0), image2=image2.out(0), mask=mask.out(0))
-        # Async: sleep
-        async_op1 = g.node("TestSleep", value=sync_op1.out(0), seconds=0.2)
-        # Sync: custom validation
-        sync_op2 = g.node("TestCustomValidation1", input1=async_op1.out(0), input2=0.5)
-        # Async: sleep again
-        async_op2 = g.node("TestSleep", value=sync_op2.out(0), seconds=0.2)
-
-        output = g.node("SaveImage", images=async_op2.out(0))
-
-        result = client.run(g)
-
-        # Verify all nodes executed in correct order
-        assert result.did_run(sync_op1)
-        assert result.did_run(async_op1)
-        assert result.did_run(sync_op2)
-        assert result.did_run(async_op2)
-
-        # Image should be a mix of black and white (gray)
-        result_images = result.get_images(output)
-        avg_value = np.array(result_images[0]).mean()
-        assert abs(avg_value - 63.75) < 5, f"Average value {avg_value} should be ~63.75"
--- a/tests/inference/testing_nodes/testing-pack/init.py
+++ b/tests/inference/testing_nodes/testing-pack/init.py
@@ -1,26 +1,23 @@
-from .specific_tests import TEST_NODE_CLASS_MAPPINGS, TEST_NODE_DISPLAY_NAME_MAPPINGS
-from .flow_control import FLOW_CONTROL_NODE_CLASS_MAPPINGS, FLOW_CONTROL_NODE_DISPLAY_NAME_MAPPINGS
-from .util import UTILITY_NODE_CLASS_MAPPINGS, UTILITY_NODE_DISPLAY_NAME_MAPPINGS
-from .conditions import CONDITION_NODE_CLASS_MAPPINGS, CONDITION_NODE_DISPLAY_NAME_MAPPINGS
-from .stubs import TEST_STUB_NODE_CLASS_MAPPINGS, TEST_STUB_NODE_DISPLAY_NAME_MAPPINGS
-from .async_test_nodes import ASYNC_TEST_NODE_CLASS_MAPPINGS, ASYNC_TEST_NODE_DISPLAY_NAME_MAPPINGS
-
-# NODE_CLASS_MAPPINGS = GENERAL_NODE_CLASS_MAPPINGS.update(COMPONENT_NODE_CLASS_MAPPINGS)
-# NODE_DISPLAY_NAME_MAPPINGS = GENERAL_NODE_DISPLAY_NAME_MAPPINGS.update(COMPONENT_NODE_DISPLAY_NAME_MAPPINGS)
-
-NODE_CLASS_MAPPINGS = {}
-NODE_CLASS_MAPPINGS.update(TEST_NODE_CLASS_MAPPINGS)
-NODE_CLASS_MAPPINGS.update(FLOW_CONTROL_NODE_CLASS_MAPPINGS)
-NODE_CLASS_MAPPINGS.update(UTILITY_NODE_CLASS_MAPPINGS)
-NODE_CLASS_MAPPINGS.update(CONDITION_NODE_CLASS_MAPPINGS)
-NODE_CLASS_MAPPINGS.update(TEST_STUB_NODE_CLASS_MAPPINGS)
-NODE_CLASS_MAPPINGS.update(ASYNC_TEST_NODE_CLASS_MAPPINGS)
-
-NODE_DISPLAY_NAME_MAPPINGS = {}
-NODE_DISPLAY_NAME_MAPPINGS.update(TEST_NODE_DISPLAY_NAME_MAPPINGS)
-NODE_DISPLAY_NAME_MAPPINGS.update(FLOW_CONTROL_NODE_DISPLAY_NAME_MAPPINGS)
-NODE_DISPLAY_NAME_MAPPINGS.update(UTILITY_NODE_DISPLAY_NAME_MAPPINGS)
-NODE_DISPLAY_NAME_MAPPINGS.update(CONDITION_NODE_DISPLAY_NAME_MAPPINGS)
-NODE_DISPLAY_NAME_MAPPINGS.update(TEST_STUB_NODE_DISPLAY_NAME_MAPPINGS)
-NODE_DISPLAY_NAME_MAPPINGS.update(ASYNC_TEST_NODE_DISPLAY_NAME_MAPPINGS)
-
+from .specific_tests import TEST_NODE_CLASS_MAPPINGS, TEST_NODE_DISPLAY_NAME_MAPPINGS
+from .flow_control import FLOW_CONTROL_NODE_CLASS_MAPPINGS, FLOW_CONTROL_NODE_DISPLAY_NAME_MAPPINGS
+from .util import UTILITY_NODE_CLASS_MAPPINGS, UTILITY_NODE_DISPLAY_NAME_MAPPINGS
+from .conditions import CONDITION_NODE_CLASS_MAPPINGS, CONDITION_NODE_DISPLAY_NAME_MAPPINGS
+from .stubs import TEST_STUB_NODE_CLASS_MAPPINGS, TEST_STUB_NODE_DISPLAY_NAME_MAPPINGS
+
+# NODE_CLASS_MAPPINGS = GENERAL_NODE_CLASS_MAPPINGS.update(COMPONENT_NODE_CLASS_MAPPINGS)
+# NODE_DISPLAY_NAME_MAPPINGS = GENERAL_NODE_DISPLAY_NAME_MAPPINGS.update(COMPONENT_NODE_DISPLAY_NAME_MAPPINGS)
+
+NODE_CLASS_MAPPINGS = {}
+NODE_CLASS_MAPPINGS.update(TEST_NODE_CLASS_MAPPINGS)
+NODE_CLASS_MAPPINGS.update(FLOW_CONTROL_NODE_CLASS_MAPPINGS)
+NODE_CLASS_MAPPINGS.update(UTILITY_NODE_CLASS_MAPPINGS)
+NODE_CLASS_MAPPINGS.update(CONDITION_NODE_CLASS_MAPPINGS)
+NODE_CLASS_MAPPINGS.update(TEST_STUB_NODE_CLASS_MAPPINGS)
+
+NODE_DISPLAY_NAME_MAPPINGS = {}
+NODE_DISPLAY_NAME_MAPPINGS.update(TEST_NODE_DISPLAY_NAME_MAPPINGS)
+NODE_DISPLAY_NAME_MAPPINGS.update(FLOW_CONTROL_NODE_DISPLAY_NAME_MAPPINGS)
+NODE_DISPLAY_NAME_MAPPINGS.update(UTILITY_NODE_DISPLAY_NAME_MAPPINGS)
+NODE_DISPLAY_NAME_MAPPINGS.update(CONDITION_NODE_DISPLAY_NAME_MAPPINGS)
+NODE_DISPLAY_NAME_MAPPINGS.update(TEST_STUB_NODE_DISPLAY_NAME_MAPPINGS)
+
--- a/tests/inference/testing_nodes/testing-pack/async_test_nodes.py
+++ b/tests/inference/testing_nodes/testing-pack/async_test_nodes.py
@@ -1,343 +0,0 @@
-import torch
-import asyncio
-from typing import Dict
-from comfy.utils import ProgressBar
-from comfy_execution.graph_utils import GraphBuilder
-from comfy.comfy_types.node_typing import ComfyNodeABC
-from comfy.comfy_types import IO
-
-
-class TestAsyncValidation(ComfyNodeABC):
-    """Test node with async VALIDATE_INPUTS."""
-
-    @classmethod
-    def INPUT_TYPES(cls):
-        return {
-            "required": {
-                "value": ("FLOAT", {"default": 5.0}),
-                "threshold": ("FLOAT", {"default": 10.0}),
-            },
-        }
-
-    RETURN_TYPES = ("IMAGE",)
-    FUNCTION = "process"
-    CATEGORY = "_for_testing/async"
-
-    @classmethod
-    async def VALIDATE_INPUTS(cls, value, threshold):
-        # Simulate async validation (e.g., checking remote service)
-        await asyncio.sleep(0.05)
-
-        if value > threshold:
-            return f"Value {value} exceeds threshold {threshold}"
-        return True
-
-    def process(self, value, threshold):
-        # Create image based on value
-        intensity = value / 10.0
-        image = torch.ones([1, 512, 512, 3]) * intensity
-        return (image,)
-
-
-class TestAsyncError(ComfyNodeABC):
-    """Test node that errors during async execution."""
-
-    @classmethod
-    def INPUT_TYPES(cls):
-        return {
-            "required": {
-                "value": (IO.ANY, {}),
-                "error_after": ("FLOAT", {"default": 0.1, "min": 0.0, "max": 10.0}),
-            },
-        }
-
-    RETURN_TYPES = (IO.ANY,)
-    FUNCTION = "error_execution"
-    CATEGORY = "_for_testing/async"
-
-    async def error_execution(self, value, error_after):
-        await asyncio.sleep(error_after)
-        raise RuntimeError("Intentional async execution error for testing")
-
-
-class TestAsyncValidationError(ComfyNodeABC):
-    """Test node with async validation that always fails."""
-
-    @classmethod
-    def INPUT_TYPES(cls):
-        return {
-            "required": {
-                "value": ("FLOAT", {"default": 5.0}),
-                "max_value": ("FLOAT", {"default": 10.0}),
-            },
-        }
-
-    RETURN_TYPES = ("IMAGE",)
-    FUNCTION = "process"
-    CATEGORY = "_for_testing/async"
-
-    @classmethod
-    async def VALIDATE_INPUTS(cls, value, max_value):
-        await asyncio.sleep(0.05)
-        # Always fail validation for values > max_value
-        if value > max_value:
-            return f"Async validation failed: {value} > {max_value}"
-        return True
-
-    def process(self, value, max_value):
-        # This won't be reached if validation fails
-        image = torch.ones([1, 512, 512, 3]) * (value / max_value)
-        return (image,)
-
-
-class TestAsyncTimeout(ComfyNodeABC):
-    """Test node that simulates timeout scenarios."""
-
-    @classmethod
-    def INPUT_TYPES(cls):
-        return {
-            "required": {
-                "value": (IO.ANY, {}),
-                "timeout": ("FLOAT", {"default": 1.0, "min": 0.1, "max": 10.0}),
-                "operation_time": ("FLOAT", {"default": 2.0, "min": 0.1, "max": 10.0}),
-            },
-        }
-
-    RETURN_TYPES = (IO.ANY,)
-    FUNCTION = "timeout_execution"
-    CATEGORY = "_for_testing/async"
-
-    async def timeout_execution(self, value, timeout, operation_time):
-        try:
-            # This will timeout if operation_time > timeout
-            await asyncio.wait_for(asyncio.sleep(operation_time), timeout=timeout)
-            return (value,)
-        except asyncio.TimeoutError:
-            raise RuntimeError(f"Operation timed out after {timeout} seconds")
-
-
-class TestSyncError(ComfyNodeABC):
-    """Test node that errors synchronously (for mixed sync/async testing)."""
-
-    @classmethod
-    def INPUT_TYPES(cls):
-        return {
-            "required": {
-                "value": (IO.ANY, {}),
-            },
-        }
-
-    RETURN_TYPES = (IO.ANY,)
-    FUNCTION = "sync_error"
-    CATEGORY = "_for_testing/async"
-
-    def sync_error(self, value):
-        raise RuntimeError("Intentional sync execution error for testing")
-
-
-class TestAsyncLazyCheck(ComfyNodeABC):
-    """Test node with async check_lazy_status."""
-
-    @classmethod
-    def INPUT_TYPES(cls):
-        return {
-            "required": {
-                "input1": (IO.ANY, {"lazy": True}),
-                "input2": (IO.ANY, {"lazy": True}),
-                "condition": ("BOOLEAN", {"default": True}),
-            },
-        }
-
-    RETURN_TYPES = ("IMAGE",)
-    FUNCTION = "process"
-    CATEGORY = "_for_testing/async"
-
-    async def check_lazy_status(self, condition, input1, input2):
-        # Simulate async checking (e.g., querying remote service)
-        await asyncio.sleep(0.05)
-
-        needed = []
-        if condition and input1 is None:
-            needed.append("input1")
-        if not condition and input2 is None:
-            needed.append("input2")
-        return needed
-
-    def process(self, input1, input2, condition):
-        # Return a simple image
-        return (torch.ones([1, 512, 512, 3]),)
-
-
-class TestDynamicAsyncGeneration(ComfyNodeABC):
-    """Test node that dynamically generates async nodes."""
-
-    @classmethod
-    def INPUT_TYPES(cls):
-        return {
-            "required": {
-                "image1": ("IMAGE",),
-                "image2": ("IMAGE",),
-                "num_async_nodes": ("INT", {"default": 3, "min": 1, "max": 10}),
-                "sleep_duration": ("FLOAT", {"default": 0.2, "min": 0.1, "max": 1.0}),
-            },
-        }
-
-    RETURN_TYPES = ("IMAGE",)
-    FUNCTION = "generate_async_workflow"
-    CATEGORY = "_for_testing/async"
-
-    def generate_async_workflow(self, image1, image2, num_async_nodes, sleep_duration):
-        g = GraphBuilder()
-
-        # Create multiple async sleep nodes
-        sleep_nodes = []
-        for i in range(num_async_nodes):
-            image = image1 if i % 2 == 0 else image2
-            sleep_node = g.node("TestSleep", value=image, seconds=sleep_duration)
-            sleep_nodes.append(sleep_node)
-
-        # Average all results
-        if len(sleep_nodes) == 1:
-            final_node = sleep_nodes[0]
-        else:
-            avg_inputs = {"input1": sleep_nodes[0].out(0)}
-            for i, node in enumerate(sleep_nodes[1:], 2):
-                avg_inputs[f"input{i}"] = node.out(0)
-            final_node = g.node("TestVariadicAverage", **avg_inputs)
-
-        return {
-            "result": (final_node.out(0),),
-            "expand": g.finalize(),
-        }
-
-
-class TestAsyncResourceUser(ComfyNodeABC):
-    """Test node that uses resources during async execution."""
-
-    # Class-level resource tracking for testing
-    _active_resources: Dict[str, bool] = {}
-
-    @classmethod
-    def INPUT_TYPES(cls):
-        return {
-            "required": {
-                "value": (IO.ANY, {}),
-                "resource_id": ("STRING", {"default": "resource_0"}),
-                "duration": ("FLOAT", {"default": 0.1, "min": 0.0, "max": 1.0}),
-            },
-        }
-
-    RETURN_TYPES = (IO.ANY,)
-    FUNCTION = "use_resource"
-    CATEGORY = "_for_testing/async"
-
-    async def use_resource(self, value, resource_id, duration):
-        # Check if resource is already in use
-        if self._active_resources.get(resource_id, False):
-            raise RuntimeError(f"Resource {resource_id} is already in use!")
-
-        # Mark resource as in use
-        self._active_resources[resource_id] = True
-
-        try:
-            # Simulate resource usage
-            await asyncio.sleep(duration)
-            return (value,)
-        finally:
-            # Always clean up resource
-            self._active_resources[resource_id] = False
-
-
-class TestAsyncBatchProcessing(ComfyNodeABC):
-    """Test async processing of batched inputs."""
-
-    @classmethod
-    def INPUT_TYPES(cls):
-        return {
-            "required": {
-                "images": ("IMAGE",),
-                "process_time_per_item": ("FLOAT", {"default": 0.1, "min": 0.01, "max": 1.0}),
-            },
-            "hidden": {
-                "unique_id": "UNIQUE_ID",
-            },
-        }
-
-    RETURN_TYPES = ("IMAGE",)
-    FUNCTION = "process_batch"
-    CATEGORY = "_for_testing/async"
-
-    async def process_batch(self, images, process_time_per_item, unique_id):
-        batch_size = images.shape[0]
-        pbar = ProgressBar(batch_size, node_id=unique_id)
-
-        # Process each image in the batch
-        processed = []
-        for i in range(batch_size):
-            # Simulate async processing
-            await asyncio.sleep(process_time_per_item)
-
-            # Simple processing: invert the image
-            processed_image = 1.0 - images[i:i+1]
-            processed.append(processed_image)
-
-            pbar.update(1)
-
-        # Stack processed images
-        result = torch.cat(processed, dim=0)
-        return (result,)
-
-
-class TestAsyncConcurrentLimit(ComfyNodeABC):
-    """Test concurrent execution limits for async nodes."""
-
-    _semaphore = asyncio.Semaphore(2)  # Only allow 2 concurrent executions
-
-    @classmethod
-    def INPUT_TYPES(cls):
-        return {
-            "required": {
-                "value": (IO.ANY, {}),
-                "duration": ("FLOAT", {"default": 0.5, "min": 0.1, "max": 2.0}),
-                "node_id": ("INT", {"default": 0}),
-            },
-        }
-
-    RETURN_TYPES = (IO.ANY,)
-    FUNCTION = "limited_execution"
-    CATEGORY = "_for_testing/async"
-
-    async def limited_execution(self, value, duration, node_id):
-        async with self._semaphore:
-            # Node {node_id} acquired semaphore
-            await asyncio.sleep(duration)
-            # Node {node_id} releasing semaphore
-            return (value,)
-
-
-# Add node mappings
-ASYNC_TEST_NODE_CLASS_MAPPINGS = {
-    "TestAsyncValidation": TestAsyncValidation,
-    "TestAsyncError": TestAsyncError,
-    "TestAsyncValidationError": TestAsyncValidationError,
-    "TestAsyncTimeout": TestAsyncTimeout,
-    "TestSyncError": TestSyncError,
-    "TestAsyncLazyCheck": TestAsyncLazyCheck,
-    "TestDynamicAsyncGeneration": TestDynamicAsyncGeneration,
-    "TestAsyncResourceUser": TestAsyncResourceUser,
-    "TestAsyncBatchProcessing": TestAsyncBatchProcessing,
-    "TestAsyncConcurrentLimit": TestAsyncConcurrentLimit,
-}
-
-ASYNC_TEST_NODE_DISPLAY_NAME_MAPPINGS = {
-    "TestAsyncValidation": "Test Async Validation",
-    "TestAsyncError": "Test Async Error",
-    "TestAsyncValidationError": "Test Async Validation Error",
-    "TestAsyncTimeout": "Test Async Timeout",
-    "TestSyncError": "Test Sync Error",
-    "TestAsyncLazyCheck": "Test Async Lazy Check",
-    "TestDynamicAsyncGeneration": "Test Dynamic Async Generation",
-    "TestAsyncResourceUser": "Test Async Resource User",
-    "TestAsyncBatchProcessing": "Test Async Batch Processing",
-    "TestAsyncConcurrentLimit": "Test Async Concurrent Limit",
-}
Author	SHA1	Message	Date
Jacob Segal	1316e608c9	Add the websocket library for automated tests	2025-06-13 21:51:32 -07:00
Jacob Segal	8d28c17369	Add a missing file It looks like this got caught by .gitignore? There's probably a better place to put it, but I'm not sure what that is.	2025-06-13 21:45:21 -07:00
Jacob Segal	6df907c413	Add the execution model tests to CI	2025-06-13 21:39:26 -07:00
Jacob Segal	f1dc13037e	Support for async execution functions This commit adds support for node execution functions defined as async. When a node's execution function is defined as async, we can continue executing other nodes while it is processing. Standard uses of `await` should "just work", but people will still have to be careful if they spawn actual threads. Because torch doesn't really have async/await versions of functions, this won't particularly help with most locally-executing nodes, but it does work for e.g. web requests to other machines. In addition to the execute function, the `VALIDATE_INPUTS` and `check_lazy_status` functions can also be defined as async, though we'll only resolve one node at a time right now for those.	2025-06-13 21:39:26 -07:00