Add the websocket library for automated tests

Add a missing file
It looks like this got caught by .gitignore? There's probably a better place to put it, but I'm not sure what that is.
2026-02-12 11:10:03 +00:00 · 2025-06-13 21:51:32 -07:00 · 2025-06-13 21:45:21 -07:00 · 2025-06-13 21:39:26 -07:00 · 2025-06-13 21:39:26 -07:00
47 changed files with 886 additions and 152921 deletions
--- a/.github/workflows/test-unit.yml
+++ b/.github/workflows/test-unit.yml
@@ -28,3 +28,7 @@ jobs:
      run: |
        pip install -r tests-unit/requirements.txt
        python -m pytest tests-unit
+    - name: Run Execution Model Tests
+      run: |
+        python -m pytest tests/inference/test_execution.py
+
--- a/README.md
+++ b/README.md
@@ -65,16 +65,12 @@ See what ComfyUI can do with the [example workflows](https://comfyanonymous.gith
   - [Flux](https://comfyanonymous.github.io/ComfyUI_examples/flux/)
   - [Lumina Image 2.0](https://comfyanonymous.github.io/ComfyUI_examples/lumina2/)
   - [HiDream](https://comfyanonymous.github.io/ComfyUI_examples/hidream/)
-   - [Cosmos Predict2](https://comfyanonymous.github.io/ComfyUI_examples/cosmos_predict2/)
- Image Editing Models
-   - [Omnigen 2](https://comfyanonymous.github.io/ComfyUI_examples/omnigen/)
-   - [Flux Kontext](https://comfyanonymous.github.io/ComfyUI_examples/flux/#flux-kontext-image-editing-model)
 - Video Models
   - [Stable Video Diffusion](https://comfyanonymous.github.io/ComfyUI_examples/video/)
   - [Mochi](https://comfyanonymous.github.io/ComfyUI_examples/mochi/)
   - [LTX-Video](https://comfyanonymous.github.io/ComfyUI_examples/ltxv/)
   - [Hunyuan Video](https://comfyanonymous.github.io/ComfyUI_examples/hunyuan_video/)
-   - [Nvidia Cosmos](https://comfyanonymous.github.io/ComfyUI_examples/cosmos/) and [Cosmos Predict2](https://comfyanonymous.github.io/ComfyUI_examples/cosmos_predict2/)
+   - [Nvidia Cosmos](https://comfyanonymous.github.io/ComfyUI_examples/cosmos/)
   - [Wan 2.1](https://comfyanonymous.github.io/ComfyUI_examples/wan/)
 - Audio Models
   - [Stable Audio](https://comfyanonymous.github.io/ComfyUI_examples/audio/)
@@ -276,8 +272,6 @@ You can install ComfyUI in Apple Mac silicon (M1 or M2) with any recent macOS ve

 #### DirectML (AMD Cards on Windows)

-This is very badly supported and is not recommended. There are some unofficial builds of pytorch ROCm on windows that exist that will give you a much better experience than this. This readme will be updated once official pytorch ROCm builds for windows come out.
-
 ```pip install torch-directml``` Then you can launch ComfyUI with: ```python main.py --directml```

 #### Ascend NPUs
--- a/comfy/k_diffusion/sampling.py
+++ b/comfy/k_diffusion/sampling.py
@@ -1,5 +1,4 @@
 import math
-from functools import partial

 from scipy import integrate
 import torch
@@ -143,33 +142,6 @@ class BrownianTreeNoiseSampler:
        return self.tree(t0, t1) / (t1 - t0).abs().sqrt()


-def sigma_to_half_log_snr(sigma, model_sampling):
-    """Convert sigma to half-logSNR log(alpha_t / sigma_t)."""
-    if isinstance(model_sampling, comfy.model_sampling.CONST):
-        # log((1 - t) / t) = log((1 - sigma) / sigma)
-        return sigma.logit().neg()
-    return sigma.log().neg()
-
-
-def half_log_snr_to_sigma(half_log_snr, model_sampling):
-    """Convert half-logSNR log(alpha_t / sigma_t) to sigma."""
-    if isinstance(model_sampling, comfy.model_sampling.CONST):
-        # 1 / (1 + exp(half_log_snr))
-        return half_log_snr.neg().sigmoid()
-    return half_log_snr.neg().exp()
-
-
-def offset_first_sigma_for_snr(sigmas, model_sampling, percent_offset=1e-4):
-    """Adjust the first sigma to avoid invalid logSNR."""
-    if len(sigmas) <= 1:
-        return sigmas
-    if isinstance(model_sampling, comfy.model_sampling.CONST):
-        if sigmas[0] >= 1:
-            sigmas = sigmas.clone()
-            sigmas[0] = model_sampling.percent_to_sigma(percent_offset)
-    return sigmas
-
-
@torch.no_grad()
 def sample_euler(model, x, sigmas, extra_args=None, callback=None, disable=None, s_churn=0., s_tmin=0., s_tmax=float('inf'), s_noise=1.):
    """Implements Algorithm 2 (Euler steps) from Karras et al. (2022)."""
@@ -710,7 +682,6 @@ def sample_dpmpp_2s_ancestral_RF(model, x, sigmas, extra_args=None, callback=Non
        # logged_x = torch.cat((logged_x, x.unsqueeze(0)), dim=0)
    return x

-
@torch.no_grad()
 def sample_dpmpp_sde(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, r=1 / 2):
    """DPM-Solver++ (stochastic)."""
@@ -722,49 +693,38 @@ def sample_dpmpp_sde(model, x, sigmas, extra_args=None, callback=None, disable=N
    seed = extra_args.get("seed", None)
    noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=seed, cpu=True) if noise_sampler is None else noise_sampler
    s_in = x.new_ones([x.shape[0]])
-
-    model_sampling = model.inner_model.model_patcher.get_model_object('model_sampling')
-    sigma_fn = partial(half_log_snr_to_sigma, model_sampling=model_sampling)
-    lambda_fn = partial(sigma_to_half_log_snr, model_sampling=model_sampling)
-    sigmas = offset_first_sigma_for_snr(sigmas, model_sampling)
+    sigma_fn = lambda t: t.neg().exp()
+    t_fn = lambda sigma: sigma.log().neg()

    for i in trange(len(sigmas) - 1, disable=disable):
        denoised = model(x, sigmas[i] * s_in, **extra_args)
        if callback is not None:
            callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised})
        if sigmas[i + 1] == 0:
-            # Denoising step
-            x = denoised
+            # Euler method
+            d = to_d(x, sigmas[i], denoised)
+            dt = sigmas[i + 1] - sigmas[i]
+            x = x + d * dt
        else:
            # DPM-Solver++
-            lambda_s, lambda_t = lambda_fn(sigmas[i]), lambda_fn(sigmas[i + 1])
-            h = lambda_t - lambda_s
-            lambda_s_1 = lambda_s + r * h
+            t, t_next = t_fn(sigmas[i]), t_fn(sigmas[i + 1])
+            h = t_next - t
+            s = t + h * r
            fac = 1 / (2 * r)

-            sigma_s_1 = sigma_fn(lambda_s_1)
-
-            alpha_s = sigmas[i] * lambda_s.exp()
-            alpha_s_1 = sigma_s_1 * lambda_s_1.exp()
-            alpha_t = sigmas[i + 1] * lambda_t.exp()
-
            # Step 1
-            sd, su = get_ancestral_step(lambda_s.neg().exp(), lambda_s_1.neg().exp(), eta)
-            lambda_s_1_ = sd.log().neg()
-            h_ = lambda_s_1_ - lambda_s
-            x_2 = (alpha_s_1 / alpha_s) * (-h_).exp() * x - alpha_s_1 * (-h_).expm1() * denoised
-            if eta > 0 and s_noise > 0:
-                x_2 = x_2 + alpha_s_1 * noise_sampler(sigmas[i], sigma_s_1) * s_noise * su
-            denoised_2 = model(x_2, sigma_s_1 * s_in, **extra_args)
+            sd, su = get_ancestral_step(sigma_fn(t), sigma_fn(s), eta)
+            s_ = t_fn(sd)
+            x_2 = (sigma_fn(s_) / sigma_fn(t)) * x - (t - s_).expm1() * denoised
+            x_2 = x_2 + noise_sampler(sigma_fn(t), sigma_fn(s)) * s_noise * su
+            denoised_2 = model(x_2, sigma_fn(s) * s_in, **extra_args)

            # Step 2
-            sd, su = get_ancestral_step(lambda_s.neg().exp(), lambda_t.neg().exp(), eta)
-            lambda_t_ = sd.log().neg()
-            h_ = lambda_t_ - lambda_s
+            sd, su = get_ancestral_step(sigma_fn(t), sigma_fn(t_next), eta)
+            t_next_ = t_fn(sd)
            denoised_d = (1 - fac) * denoised + fac * denoised_2
-            x = (alpha_t / alpha_s) * (-h_).exp() * x - alpha_t * (-h_).expm1() * denoised_d
-            if eta > 0 and s_noise > 0:
-                x = x + alpha_t * noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * su
+            x = (sigma_fn(t_next_) / sigma_fn(t)) * x - (t - t_next_).expm1() * denoised_d
+            x = x + noise_sampler(sigma_fn(t), sigma_fn(t_next)) * s_noise * su
    return x


@@ -793,7 +753,6 @@ def sample_dpmpp_2m(model, x, sigmas, extra_args=None, callback=None, disable=No
        old_denoised = denoised
    return x

-
@torch.no_grad()
 def sample_dpmpp_2m_sde(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, solver_type='midpoint'):
    """DPM-Solver++(2M) SDE."""
@@ -809,12 +768,9 @@ def sample_dpmpp_2m_sde(model, x, sigmas, extra_args=None, callback=None, disabl
    noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=seed, cpu=True) if noise_sampler is None else noise_sampler
    s_in = x.new_ones([x.shape[0]])

-    model_sampling = model.inner_model.model_patcher.get_model_object('model_sampling')
-    lambda_fn = partial(sigma_to_half_log_snr, model_sampling=model_sampling)
-    sigmas = offset_first_sigma_for_snr(sigmas, model_sampling)
-
    old_denoised = None
-    h, h_last = None, None
+    h_last = None
+    h = None

    for i in trange(len(sigmas) - 1, disable=disable):
        denoised = model(x, sigmas[i] * s_in, **extra_args)
@@ -825,29 +781,26 @@ def sample_dpmpp_2m_sde(model, x, sigmas, extra_args=None, callback=None, disabl
            x = denoised
        else:
            # DPM-Solver++(2M) SDE
-            lambda_s, lambda_t = lambda_fn(sigmas[i]), lambda_fn(sigmas[i + 1])
-            h = lambda_t - lambda_s
-            h_eta = h * (eta + 1)
+            t, s = -sigmas[i].log(), -sigmas[i + 1].log()
+            h = s - t
+            eta_h = eta * h

-            alpha_t = sigmas[i + 1] * lambda_t.exp()
-
-            x = sigmas[i + 1] / sigmas[i] * (-h * eta).exp() * x + alpha_t * (-h_eta).expm1().neg() * denoised
+            x = sigmas[i + 1] / sigmas[i] * (-eta_h).exp() * x + (-h - eta_h).expm1().neg() * denoised

            if old_denoised is not None:
                r = h_last / h
                if solver_type == 'heun':
-                    x = x + alpha_t * ((-h_eta).expm1().neg() / (-h_eta) + 1) * (1 / r) * (denoised - old_denoised)
+                    x = x + ((-h - eta_h).expm1().neg() / (-h - eta_h) + 1) * (1 / r) * (denoised - old_denoised)
                elif solver_type == 'midpoint':
-                    x = x + 0.5 * alpha_t * (-h_eta).expm1().neg() * (1 / r) * (denoised - old_denoised)
+                    x = x + 0.5 * (-h - eta_h).expm1().neg() * (1 / r) * (denoised - old_denoised)

-            if eta > 0 and s_noise > 0:
-                x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * sigmas[i + 1] * (-2 * h * eta).expm1().neg().sqrt() * s_noise
+            if eta:
+                x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * sigmas[i + 1] * (-2 * eta_h).expm1().neg().sqrt() * s_noise

        old_denoised = denoised
        h_last = h
    return x

-
@torch.no_grad()
 def sample_dpmpp_3m_sde(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None):
    """DPM-Solver++(3M) SDE."""
@@ -861,10 +814,6 @@ def sample_dpmpp_3m_sde(model, x, sigmas, extra_args=None, callback=None, disabl
    noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=seed, cpu=True) if noise_sampler is None else noise_sampler
    s_in = x.new_ones([x.shape[0]])

-    model_sampling = model.inner_model.model_patcher.get_model_object('model_sampling')
-    lambda_fn = partial(sigma_to_half_log_snr, model_sampling=model_sampling)
-    sigmas = offset_first_sigma_for_snr(sigmas, model_sampling)
-
    denoised_1, denoised_2 = None, None
    h, h_1, h_2 = None, None, None

@@ -876,16 +825,13 @@ def sample_dpmpp_3m_sde(model, x, sigmas, extra_args=None, callback=None, disabl
            # Denoising step
            x = denoised
        else:
-            lambda_s, lambda_t = lambda_fn(sigmas[i]), lambda_fn(sigmas[i + 1])
-            h = lambda_t - lambda_s
+            t, s = -sigmas[i].log(), -sigmas[i + 1].log()
+            h = s - t
            h_eta = h * (eta + 1)

-            alpha_t = sigmas[i + 1] * lambda_t.exp()
-
-            x = sigmas[i + 1] / sigmas[i] * (-h * eta).exp() * x + alpha_t * (-h_eta).expm1().neg() * denoised
+            x = torch.exp(-h_eta) * x + (-h_eta).expm1().neg() * denoised

            if h_2 is not None:
-                # DPM-Solver++(3M) SDE
                r0 = h_1 / h
                r1 = h_2 / h
                d1_0 = (denoised - denoised_1) / r0
@@ -894,22 +840,20 @@ def sample_dpmpp_3m_sde(model, x, sigmas, extra_args=None, callback=None, disabl
                d2 = (d1_0 - d1_1) / (r0 + r1)
                phi_2 = h_eta.neg().expm1() / h_eta + 1
                phi_3 = phi_2 / h_eta - 0.5
-                x = x + (alpha_t * phi_2) * d1 - (alpha_t * phi_3) * d2
+                x = x + phi_2 * d1 - phi_3 * d2
            elif h_1 is not None:
-                # DPM-Solver++(2M) SDE
                r = h_1 / h
                d = (denoised - denoised_1) / r
                phi_2 = h_eta.neg().expm1() / h_eta + 1
-                x = x + (alpha_t * phi_2) * d
+                x = x + phi_2 * d

-            if eta > 0 and s_noise > 0:
+            if eta:
                x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * sigmas[i + 1] * (-2 * h * eta).expm1().neg().sqrt() * s_noise

        denoised_1, denoised_2 = denoised, denoised_1
        h_1, h_2 = h, h_1
    return x

-
@torch.no_grad()
 def sample_dpmpp_3m_sde_gpu(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None):
    if len(sigmas) <= 1:
@@ -919,7 +863,6 @@ def sample_dpmpp_3m_sde_gpu(model, x, sigmas, extra_args=None, callback=None, di
    noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=extra_args.get("seed", None), cpu=False) if noise_sampler is None else noise_sampler
    return sample_dpmpp_3m_sde(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler)

-
@torch.no_grad()
 def sample_dpmpp_2m_sde_gpu(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, solver_type='midpoint'):
    if len(sigmas) <= 1:
@@ -929,7 +872,6 @@ def sample_dpmpp_2m_sde_gpu(model, x, sigmas, extra_args=None, callback=None, di
    noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=extra_args.get("seed", None), cpu=False) if noise_sampler is None else noise_sampler
    return sample_dpmpp_2m_sde(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler, solver_type=solver_type)

-
@torch.no_grad()
 def sample_dpmpp_sde_gpu(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, r=1 / 2):
    if len(sigmas) <= 1:
@@ -1507,12 +1449,12 @@ def sample_er_sde(model, x, sigmas, extra_args=None, callback=None, disable=None
        old_denoised = denoised
    return x

-
@torch.no_grad()
 def sample_seeds_2(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, r=0.5):
-    """SEEDS-2 - Stochastic Explicit Exponential Derivative-free Solvers (VP Data Prediction) stage 2.
-    arXiv: https://arxiv.org/abs/2305.14267
-    """
+    '''
+    SEEDS-2 - Stochastic Explicit Exponential Derivative-free Solvers (VE Data Prediction) stage 2
+    Arxiv: https://arxiv.org/abs/2305.14267
+    '''
    extra_args = {} if extra_args is None else extra_args
    seed = extra_args.get("seed", None)
    noise_sampler = default_noise_sampler(x, seed=seed) if noise_sampler is None else noise_sampler
@@ -1520,11 +1462,6 @@ def sample_seeds_2(model, x, sigmas, extra_args=None, callback=None, disable=Non

    inject_noise = eta > 0 and s_noise > 0

-    model_sampling = model.inner_model.model_patcher.get_model_object('model_sampling')
-    sigma_fn = partial(half_log_snr_to_sigma, model_sampling=model_sampling)
-    lambda_fn = partial(sigma_to_half_log_snr, model_sampling=model_sampling)
-    sigmas = offset_first_sigma_for_snr(sigmas, model_sampling)
-
    for i in trange(len(sigmas) - 1, disable=disable):
        denoised = model(x, sigmas[i] * s_in, **extra_args)
        if callback is not None:
@@ -1532,43 +1469,38 @@ def sample_seeds_2(model, x, sigmas, extra_args=None, callback=None, disable=Non
        if sigmas[i + 1] == 0:
            x = denoised
        else:
-            lambda_s, lambda_t = lambda_fn(sigmas[i]), lambda_fn(sigmas[i + 1])
-            h = lambda_t - lambda_s
+            t, t_next = -sigmas[i].log(), -sigmas[i + 1].log()
+            h = t_next - t
            h_eta = h * (eta + 1)
-            lambda_s_1 = lambda_s + r * h
+            s = t + r * h
            fac = 1 / (2 * r)
-            sigma_s_1 = sigma_fn(lambda_s_1)
-
-            # alpha_t = sigma_t * exp(log(alpha_t / sigma_t)) = sigma_t * exp(lambda_t)
-            alpha_s_1 = sigma_s_1 * lambda_s_1.exp()
-            alpha_t = sigmas[i + 1] * lambda_t.exp()
+            sigma_s = s.neg().exp()

            coeff_1, coeff_2 = (-r * h_eta).expm1(), (-h_eta).expm1()
            if inject_noise:
-                # 0 < r < 1
                noise_coeff_1 = (-2 * r * h * eta).expm1().neg().sqrt()
-                noise_coeff_2 = (-r * h * eta).exp() * (-2 * (1 - r) * h * eta).expm1().neg().sqrt()
-                noise_1, noise_2 = noise_sampler(sigmas[i], sigma_s_1), noise_sampler(sigma_s_1, sigmas[i + 1])
+                noise_coeff_2 = ((-2 * r * h * eta).expm1() - (-2 * h * eta).expm1()).sqrt()
+                noise_1, noise_2 = noise_sampler(sigmas[i], sigma_s), noise_sampler(sigma_s, sigmas[i + 1])

            # Step 1
-            x_2 = sigma_s_1 / sigmas[i] * (-r * h * eta).exp() * x - alpha_s_1 * coeff_1 * denoised
+            x_2 = (coeff_1 + 1) * x - coeff_1 * denoised
            if inject_noise:
-                x_2 = x_2 + sigma_s_1 * (noise_coeff_1 * noise_1) * s_noise
-            denoised_2 = model(x_2, sigma_s_1 * s_in, **extra_args)
+                x_2 = x_2 + sigma_s * (noise_coeff_1 * noise_1) * s_noise
+            denoised_2 = model(x_2, sigma_s * s_in, **extra_args)

            # Step 2
            denoised_d = (1 - fac) * denoised + fac * denoised_2
-            x = sigmas[i + 1] / sigmas[i] * (-h * eta).exp() * x - alpha_t * coeff_2 * denoised_d
+            x = (coeff_2 + 1) * x - coeff_2 * denoised_d
            if inject_noise:
                x = x + sigmas[i + 1] * (noise_coeff_2 * noise_1 + noise_coeff_1 * noise_2) * s_noise
    return x

-
@torch.no_grad()
 def sample_seeds_3(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, r_1=1./3, r_2=2./3):
-    """SEEDS-3 - Stochastic Explicit Exponential Derivative-free Solvers (VP Data Prediction) stage 3.
-    arXiv: https://arxiv.org/abs/2305.14267
-    """
+    '''
+    SEEDS-3 - Stochastic Explicit Exponential Derivative-free Solvers (VE Data Prediction) stage 3
+    Arxiv: https://arxiv.org/abs/2305.14267
+    '''
    extra_args = {} if extra_args is None else extra_args
    seed = extra_args.get("seed", None)
    noise_sampler = default_noise_sampler(x, seed=seed) if noise_sampler is None else noise_sampler
@@ -1576,11 +1508,6 @@ def sample_seeds_3(model, x, sigmas, extra_args=None, callback=None, disable=Non

    inject_noise = eta > 0 and s_noise > 0

-    model_sampling = model.inner_model.model_patcher.get_model_object('model_sampling')
-    sigma_fn = partial(half_log_snr_to_sigma, model_sampling=model_sampling)
-    lambda_fn = partial(sigma_to_half_log_snr, model_sampling=model_sampling)
-    sigmas = offset_first_sigma_for_snr(sigmas, model_sampling)
-
    for i in trange(len(sigmas) - 1, disable=disable):
        denoised = model(x, sigmas[i] * s_in, **extra_args)
        if callback is not None:
@@ -1588,40 +1515,34 @@ def sample_seeds_3(model, x, sigmas, extra_args=None, callback=None, disable=Non
        if sigmas[i + 1] == 0:
            x = denoised
        else:
-            lambda_s, lambda_t = lambda_fn(sigmas[i]), lambda_fn(sigmas[i + 1])
-            h = lambda_t - lambda_s
+            t, t_next = -sigmas[i].log(), -sigmas[i + 1].log()
+            h = t_next - t
            h_eta = h * (eta + 1)
-            lambda_s_1 = lambda_s + r_1 * h
-            lambda_s_2 = lambda_s + r_2 * h
-            sigma_s_1, sigma_s_2 = sigma_fn(lambda_s_1), sigma_fn(lambda_s_2)
-
-            # alpha_t = sigma_t * exp(log(alpha_t / sigma_t)) = sigma_t * exp(lambda_t)
-            alpha_s_1 = sigma_s_1 * lambda_s_1.exp()
-            alpha_s_2 = sigma_s_2 * lambda_s_2.exp()
-            alpha_t = sigmas[i + 1] * lambda_t.exp()
+            s_1 = t + r_1 * h
+            s_2 = t + r_2 * h
+            sigma_s_1, sigma_s_2 = s_1.neg().exp(), s_2.neg().exp()

            coeff_1, coeff_2, coeff_3 = (-r_1 * h_eta).expm1(), (-r_2 * h_eta).expm1(), (-h_eta).expm1()
            if inject_noise:
-                # 0 < r_1 < r_2 < 1
                noise_coeff_1 = (-2 * r_1 * h * eta).expm1().neg().sqrt()
-                noise_coeff_2 = (-r_1 * h * eta).exp() * (-2 * (r_2 - r_1) * h * eta).expm1().neg().sqrt()
-                noise_coeff_3 = (-r_2 * h * eta).exp() * (-2 * (1 - r_2) * h * eta).expm1().neg().sqrt()
+                noise_coeff_2 = ((-2 * r_1 * h * eta).expm1() - (-2 * r_2 * h * eta).expm1()).sqrt()
+                noise_coeff_3 = ((-2 * r_2 * h * eta).expm1() - (-2 * h * eta).expm1()).sqrt()
                noise_1, noise_2, noise_3 = noise_sampler(sigmas[i], sigma_s_1), noise_sampler(sigma_s_1, sigma_s_2), noise_sampler(sigma_s_2, sigmas[i + 1])

            # Step 1
-            x_2 = sigma_s_1 / sigmas[i] * (-r_1 * h * eta).exp() * x - alpha_s_1 * coeff_1 * denoised
+            x_2 = (coeff_1 + 1) * x - coeff_1 * denoised
            if inject_noise:
                x_2 = x_2 + sigma_s_1 * (noise_coeff_1 * noise_1) * s_noise
            denoised_2 = model(x_2, sigma_s_1 * s_in, **extra_args)

            # Step 2
-            x_3 = sigma_s_2 / sigmas[i] * (-r_2 * h * eta).exp() * x - alpha_s_2 * coeff_2 * denoised + (r_2 / r_1) * alpha_s_2 * (coeff_2 / (r_2 * h_eta) + 1) * (denoised_2 - denoised)
+            x_3 = (coeff_2 + 1) * x - coeff_2 * denoised + (r_2 / r_1) * (coeff_2 / (r_2 * h_eta) + 1) * (denoised_2 - denoised)
            if inject_noise:
                x_3 = x_3 + sigma_s_2 * (noise_coeff_2 * noise_1 + noise_coeff_1 * noise_2) * s_noise
            denoised_3 = model(x_3, sigma_s_2 * s_in, **extra_args)

            # Step 3
-            x = sigmas[i + 1] / sigmas[i] * (-h * eta).exp() * x - alpha_t * coeff_3 * denoised + (1. / r_2) * alpha_t * (coeff_3 / h_eta + 1) * (denoised_3 - denoised)
+            x = (coeff_3 + 1) * x - coeff_3 * denoised + (1. / r_2) * (coeff_3 / h_eta + 1) * (denoised_3 - denoised)
            if inject_noise:
                x = x + sigmas[i + 1] * (noise_coeff_3 * noise_1 + noise_coeff_2 * noise_2 + noise_coeff_1 * noise_3) * s_noise
    return x
--- a/comfy/ldm/cosmos/predict2.py
+++ b/comfy/ldm/cosmos/predict2.py
@@ -70,7 +70,11 @@ def torch_attention_op(q_B_S_H_D: torch.Tensor, k_B_S_H_D: torch.Tensor, v_B_S_H
    q_B_H_S_D = rearrange(q_B_S_H_D, "b ... h k -> b h ... k").view(in_q_shape[0], in_q_shape[-2], -1, in_q_shape[-1])
    k_B_H_S_D = rearrange(k_B_S_H_D, "b ... h v -> b h ... v").view(in_k_shape[0], in_k_shape[-2], -1, in_k_shape[-1])
    v_B_H_S_D = rearrange(v_B_S_H_D, "b ... h v -> b h ... v").view(in_k_shape[0], in_k_shape[-2], -1, in_k_shape[-1])
-    return optimized_attention(q_B_H_S_D, k_B_H_S_D, v_B_H_S_D, in_q_shape[-2], skip_reshape=True)
+    result_B_S_HD = rearrange(
+        optimized_attention(q_B_H_S_D, k_B_H_S_D, v_B_H_S_D, in_q_shape[-2], skip_reshape=True, skip_output_reshape=True), "b h ... l -> b ... (h l)"
+    )
+
+    return result_B_S_HD


 class Attention(nn.Module):
--- a/comfy/ldm/flux/controlnet.py
+++ b/comfy/ldm/flux/controlnet.py
@@ -123,8 +123,6 @@ class ControlNetFlux(Flux):

        if y is None:
            y = torch.zeros((img.shape[0], self.params.vec_in_dim), device=img.device, dtype=img.dtype)
-        else:
-            y = y[:, :self.params.vec_in_dim]

        # running on sequences img
        img = self.img_in(img)
--- a/comfy/ldm/flux/layers.py
+++ b/comfy/ldm/flux/layers.py
@@ -118,7 +118,7 @@ class Modulation(nn.Module):
 def apply_mod(tensor, m_mult, m_add=None, modulation_dims=None):
    if modulation_dims is None:
        if m_add is not None:
-            return torch.addcmul(m_add, tensor, m_mult)
+            return tensor * m_mult + m_add
        else:
            return tensor * m_mult
    else:
--- a/comfy/ldm/flux/model.py
+++ b/comfy/ldm/flux/model.py
@@ -195,50 +195,20 @@ class Flux(nn.Module):
        img = self.final_layer(img, vec)  # (N, T, patch_size ** 2 * out_channels)
        return img

-    def process_img(self, x, index=0, h_offset=0, w_offset=0):
+    def forward(self, x, timestep, context, y=None, guidance=None, control=None, transformer_options={}, **kwargs):
        bs, c, h, w = x.shape
        patch_size = self.patch_size
        x = comfy.ldm.common_dit.pad_to_patch_size(x, (patch_size, patch_size))

        img = rearrange(x, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size)
+
        h_len = ((h + (patch_size // 2)) // patch_size)
        w_len = ((w + (patch_size // 2)) // patch_size)
-
-        h_offset = ((h_offset + (patch_size // 2)) // patch_size)
-        w_offset = ((w_offset + (patch_size // 2)) // patch_size)
-
        img_ids = torch.zeros((h_len, w_len, 3), device=x.device, dtype=x.dtype)
-        img_ids[:, :, 0] = img_ids[:, :, 1] + index
-        img_ids[:, :, 1] = img_ids[:, :, 1] + torch.linspace(h_offset, h_len - 1 + h_offset, steps=h_len, device=x.device, dtype=x.dtype).unsqueeze(1)
-        img_ids[:, :, 2] = img_ids[:, :, 2] + torch.linspace(w_offset, w_len - 1 + w_offset, steps=w_len, device=x.device, dtype=x.dtype).unsqueeze(0)
-        return img, repeat(img_ids, "h w c -> b (h w) c", b=bs)
-
-    def forward(self, x, timestep, context, y=None, guidance=None, ref_latents=None, control=None, transformer_options={}, **kwargs):
-        bs, c, h_orig, w_orig = x.shape
-        patch_size = self.patch_size
-
-        h_len = ((h_orig + (patch_size // 2)) // patch_size)
-        w_len = ((w_orig + (patch_size // 2)) // patch_size)
-        img, img_ids = self.process_img(x)
-        img_tokens = img.shape[1]
-        if ref_latents is not None:
-            h = 0
-            w = 0
-            for ref in ref_latents:
-                h_offset = 0
-                w_offset = 0
-                if ref.shape[-2] + h > ref.shape[-1] + w:
-                    w_offset = w
-                else:
-                    h_offset = h
-
-                kontext, kontext_ids = self.process_img(ref, index=1, h_offset=h_offset, w_offset=w_offset)
-                img = torch.cat([img, kontext], dim=1)
-                img_ids = torch.cat([img_ids, kontext_ids], dim=1)
-                h = max(h, ref.shape[-2] + h_offset)
-                w = max(w, ref.shape[-1] + w_offset)
+        img_ids[:, :, 1] = img_ids[:, :, 1] + torch.linspace(0, h_len - 1, steps=h_len, device=x.device, dtype=x.dtype).unsqueeze(1)
+        img_ids[:, :, 2] = img_ids[:, :, 2] + torch.linspace(0, w_len - 1, steps=w_len, device=x.device, dtype=x.dtype).unsqueeze(0)
+        img_ids = repeat(img_ids, "h w c -> b (h w) c", b=bs)

        txt_ids = torch.zeros((bs, context.shape[1], 3), device=x.device, dtype=x.dtype)
        out = self.forward_orig(img, img_ids, context, txt_ids, timestep, y, guidance, control, transformer_options, attn_mask=kwargs.get("attention_mask", None))
-        out = out[:, :img_tokens]
-        return rearrange(out, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=2, pw=2)[:,:,:h_orig,:w_orig]
+        return rearrange(out, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=2, pw=2)[:,:,:h,:w]
--- a/comfy/ldm/lightricks/model.py
+++ b/comfy/ldm/lightricks/model.py
@@ -261,8 +261,8 @@ class CrossAttention(nn.Module):
        self.heads = heads
        self.dim_head = dim_head

-        self.q_norm = operations.RMSNorm(inner_dim, eps=1e-5, dtype=dtype, device=device)
-        self.k_norm = operations.RMSNorm(inner_dim, eps=1e-5, dtype=dtype, device=device)
+        self.q_norm = operations.RMSNorm(inner_dim, dtype=dtype, device=device)
+        self.k_norm = operations.RMSNorm(inner_dim, dtype=dtype, device=device)

        self.to_q = operations.Linear(query_dim, inner_dim, bias=True, dtype=dtype, device=device)
        self.to_k = operations.Linear(context_dim, inner_dim, bias=True, dtype=dtype, device=device)
--- a/comfy/ldm/models/autoencoder.py
+++ b/comfy/ldm/models/autoencoder.py
@@ -11,7 +11,7 @@ from comfy.ldm.modules.ema import LitEma
 import comfy.ops

 class DiagonalGaussianRegularizer(torch.nn.Module):
-    def __init__(self, sample: bool = False):
+    def __init__(self, sample: bool = True):
        super().__init__()
        self.sample = sample

@@ -19,12 +19,16 @@ class DiagonalGaussianRegularizer(torch.nn.Module):
        yield from ()

    def forward(self, z: torch.Tensor) -> Tuple[torch.Tensor, dict]:
+        log = dict()
        posterior = DiagonalGaussianDistribution(z)
        if self.sample:
            z = posterior.sample()
        else:
            z = posterior.mode()
-        return z, None
+        kl_loss = posterior.kl()
+        kl_loss = torch.sum(kl_loss) / kl_loss.shape[0]
+        log["kl_loss"] = kl_loss
+        return z, log


 class AbstractAutoencoder(torch.nn.Module):
--- a/comfy/ldm/modules/sub_quadratic_attention.py
+++ b/comfy/ldm/modules/sub_quadratic_attention.py
@@ -31,7 +31,7 @@ def dynamic_slice(
    starts: List[int],
    sizes: List[int],
 ) -> Tensor:
-    slicing = tuple(slice(start, start + size) for start, size in zip(starts, sizes))
+    slicing = [slice(start, start + size) for start, size in zip(starts, sizes)]
    return x[slicing]

 class AttnChunk(NamedTuple):
--- a/comfy/ldm/omnigen/omnigen2.py
+++ b/comfy/ldm/omnigen/omnigen2.py
@@ -1,469 +0,0 @@
-# Original code: https://github.com/VectorSpaceLab/OmniGen2
-
-from typing import Optional, Tuple
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from einops import rearrange, repeat
-from comfy.ldm.lightricks.model import Timesteps
-from comfy.ldm.flux.layers import EmbedND
-from comfy.ldm.modules.attention import optimized_attention_masked
-import comfy.model_management
-import comfy.ldm.common_dit
-
-
-def apply_rotary_emb(x, freqs_cis):
-    if x.shape[1] == 0:
-        return x
-
-    t_ = x.reshape(*x.shape[:-1], -1, 1, 2)
-    t_out = freqs_cis[..., 0] * t_[..., 0] + freqs_cis[..., 1] * t_[..., 1]
-    return t_out.reshape(*x.shape).to(dtype=x.dtype)
-
-
-def swiglu(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
-    return F.silu(x) * y
-
-
-class TimestepEmbedding(nn.Module):
-    def __init__(self, in_channels: int, time_embed_dim: int, dtype=None, device=None, operations=None):
-        super().__init__()
-        self.linear_1 = operations.Linear(in_channels, time_embed_dim, dtype=dtype, device=device)
-        self.act = nn.SiLU()
-        self.linear_2 = operations.Linear(time_embed_dim, time_embed_dim, dtype=dtype, device=device)
-
-    def forward(self, sample: torch.Tensor) -> torch.Tensor:
-        sample = self.linear_1(sample)
-        sample = self.act(sample)
-        sample = self.linear_2(sample)
-        return sample
-
-
-class LuminaRMSNormZero(nn.Module):
-    def __init__(self, embedding_dim: int, norm_eps: float = 1e-5, dtype=None, device=None, operations=None):
-        super().__init__()
-        self.silu = nn.SiLU()
-        self.linear = operations.Linear(min(embedding_dim, 1024), 4 * embedding_dim, dtype=dtype, device=device)
-        self.norm = operations.RMSNorm(embedding_dim, eps=norm_eps, dtype=dtype, device=device)
-
-    def forward(self, x: torch.Tensor, emb: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        emb = self.linear(self.silu(emb))
-        scale_msa, gate_msa, scale_mlp, gate_mlp = emb.chunk(4, dim=1)
-        x = self.norm(x) * (1 + scale_msa[:, None])
-        return x, gate_msa, scale_mlp, gate_mlp
-
-
-class LuminaLayerNormContinuous(nn.Module):
-    def __init__(self, embedding_dim: int, conditioning_embedding_dim: int, elementwise_affine: bool = False, eps: float = 1e-6, out_dim: Optional[int] = None, dtype=None, device=None, operations=None):
-        super().__init__()
-        self.silu = nn.SiLU()
-        self.linear_1 = operations.Linear(conditioning_embedding_dim, embedding_dim, dtype=dtype, device=device)
-        self.norm = operations.LayerNorm(embedding_dim, eps, elementwise_affine, dtype=dtype, device=device)
-        self.linear_2 = operations.Linear(embedding_dim, out_dim, bias=True, dtype=dtype, device=device) if out_dim is not None else None
-
-    def forward(self, x: torch.Tensor, conditioning_embedding: torch.Tensor) -> torch.Tensor:
-        emb = self.linear_1(self.silu(conditioning_embedding).to(x.dtype))
-        x = self.norm(x) * (1 + emb)[:, None, :]
-        if self.linear_2 is not None:
-            x = self.linear_2(x)
-        return x
-
-
-class LuminaFeedForward(nn.Module):
-    def __init__(self, dim: int, inner_dim: int, multiple_of: int = 256, dtype=None, device=None, operations=None):
-        super().__init__()
-        inner_dim = multiple_of * ((inner_dim + multiple_of - 1) // multiple_of)
-        self.linear_1 = operations.Linear(dim, inner_dim, bias=False, dtype=dtype, device=device)
-        self.linear_2 = operations.Linear(inner_dim, dim, bias=False, dtype=dtype, device=device)
-        self.linear_3 = operations.Linear(dim, inner_dim, bias=False, dtype=dtype, device=device)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        h1, h2 = self.linear_1(x), self.linear_3(x)
-        return self.linear_2(swiglu(h1, h2))
-
-
-class Lumina2CombinedTimestepCaptionEmbedding(nn.Module):
-    def __init__(self, hidden_size: int = 4096, text_feat_dim: int = 2048, frequency_embedding_size: int = 256, norm_eps: float = 1e-5, timestep_scale: float = 1.0, dtype=None, device=None, operations=None):
-        super().__init__()
-        self.time_proj = Timesteps(num_channels=frequency_embedding_size, flip_sin_to_cos=True, downscale_freq_shift=0.0, scale=timestep_scale)
-        self.timestep_embedder = TimestepEmbedding(in_channels=frequency_embedding_size, time_embed_dim=min(hidden_size, 1024), dtype=dtype, device=device, operations=operations)
-        self.caption_embedder = nn.Sequential(
-            operations.RMSNorm(text_feat_dim, eps=norm_eps, dtype=dtype, device=device),
-            operations.Linear(text_feat_dim, hidden_size, bias=True, dtype=dtype, device=device),
-        )
-
-    def forward(self, timestep: torch.Tensor, text_hidden_states: torch.Tensor, dtype: torch.dtype) -> Tuple[torch.Tensor, torch.Tensor]:
-        timestep_proj = self.time_proj(timestep).to(dtype=dtype)
-        time_embed = self.timestep_embedder(timestep_proj)
-        caption_embed = self.caption_embedder(text_hidden_states)
-        return time_embed, caption_embed
-
-
-class Attention(nn.Module):
-    def __init__(self, query_dim: int, dim_head: int, heads: int, kv_heads: int, eps: float = 1e-5, bias: bool = False, dtype=None, device=None, operations=None):
-        super().__init__()
-        self.heads = heads
-        self.kv_heads = kv_heads
-        self.dim_head = dim_head
-        self.scale = dim_head ** -0.5
-
-        self.to_q = operations.Linear(query_dim, heads * dim_head, bias=bias, dtype=dtype, device=device)
-        self.to_k = operations.Linear(query_dim, kv_heads * dim_head, bias=bias, dtype=dtype, device=device)
-        self.to_v = operations.Linear(query_dim, kv_heads * dim_head, bias=bias, dtype=dtype, device=device)
-
-        self.norm_q = operations.RMSNorm(dim_head, eps=eps, dtype=dtype, device=device)
-        self.norm_k = operations.RMSNorm(dim_head, eps=eps, dtype=dtype, device=device)
-
-        self.to_out = nn.Sequential(
-            operations.Linear(heads * dim_head, query_dim, bias=bias, dtype=dtype, device=device),
-            nn.Dropout(0.0)
-        )
-
-    def forward(self, hidden_states: torch.Tensor, encoder_hidden_states: torch.Tensor, attention_mask: Optional[torch.Tensor] = None, image_rotary_emb: Optional[torch.Tensor] = None) -> torch.Tensor:
-        batch_size, sequence_length, _ = hidden_states.shape
-
-        query = self.to_q(hidden_states)
-        key = self.to_k(encoder_hidden_states)
-        value = self.to_v(encoder_hidden_states)
-
-        query = query.view(batch_size, -1, self.heads, self.dim_head)
-        key = key.view(batch_size, -1, self.kv_heads, self.dim_head)
-        value = value.view(batch_size, -1, self.kv_heads, self.dim_head)
-
-        query = self.norm_q(query)
-        key = self.norm_k(key)
-
-        if image_rotary_emb is not None:
-            query = apply_rotary_emb(query, image_rotary_emb)
-            key = apply_rotary_emb(key, image_rotary_emb)
-
-        query = query.transpose(1, 2)
-        key = key.transpose(1, 2)
-        value = value.transpose(1, 2)
-
-        if self.kv_heads < self.heads:
-            key = key.repeat_interleave(self.heads // self.kv_heads, dim=1)
-            value = value.repeat_interleave(self.heads // self.kv_heads, dim=1)
-
-        hidden_states = optimized_attention_masked(query, key, value, self.heads, attention_mask, skip_reshape=True)
-        hidden_states = self.to_out[0](hidden_states)
-        return hidden_states
-
-
-class OmniGen2TransformerBlock(nn.Module):
-    def __init__(self, dim: int, num_attention_heads: int, num_kv_heads: int, multiple_of: int, ffn_dim_multiplier: float, norm_eps: float, modulation: bool = True, dtype=None, device=None, operations=None):
-        super().__init__()
-        self.modulation = modulation
-
-        self.attn = Attention(
-            query_dim=dim,
-            dim_head=dim // num_attention_heads,
-            heads=num_attention_heads,
-            kv_heads=num_kv_heads,
-            eps=1e-5,
-            bias=False,
-            dtype=dtype, device=device, operations=operations,
-        )
-
-        self.feed_forward = LuminaFeedForward(
-            dim=dim,
-            inner_dim=4 * dim,
-            multiple_of=multiple_of,
-            dtype=dtype, device=device, operations=operations
-        )
-
-        if modulation:
-            self.norm1 = LuminaRMSNormZero(embedding_dim=dim, norm_eps=norm_eps, dtype=dtype, device=device, operations=operations)
-        else:
-            self.norm1 = operations.RMSNorm(dim, eps=norm_eps, dtype=dtype, device=device)
-
-        self.ffn_norm1 = operations.RMSNorm(dim, eps=norm_eps, dtype=dtype, device=device)
-        self.norm2 = operations.RMSNorm(dim, eps=norm_eps, dtype=dtype, device=device)
-        self.ffn_norm2 = operations.RMSNorm(dim, eps=norm_eps, dtype=dtype, device=device)
-
-    def forward(self, hidden_states: torch.Tensor, attention_mask: torch.Tensor, image_rotary_emb: torch.Tensor, temb: Optional[torch.Tensor] = None) -> torch.Tensor:
-        if self.modulation:
-            norm_hidden_states, gate_msa, scale_mlp, gate_mlp = self.norm1(hidden_states, temb)
-            attn_output = self.attn(norm_hidden_states, norm_hidden_states, attention_mask, image_rotary_emb)
-            hidden_states = hidden_states + gate_msa.unsqueeze(1).tanh() * self.norm2(attn_output)
-            mlp_output = self.feed_forward(self.ffn_norm1(hidden_states) * (1 + scale_mlp.unsqueeze(1)))
-            hidden_states = hidden_states + gate_mlp.unsqueeze(1).tanh() * self.ffn_norm2(mlp_output)
-        else:
-            norm_hidden_states = self.norm1(hidden_states)
-            attn_output = self.attn(norm_hidden_states, norm_hidden_states, attention_mask, image_rotary_emb)
-            hidden_states = hidden_states + self.norm2(attn_output)
-            mlp_output = self.feed_forward(self.ffn_norm1(hidden_states))
-            hidden_states = hidden_states + self.ffn_norm2(mlp_output)
-        return hidden_states
-
-
-class OmniGen2RotaryPosEmbed(nn.Module):
-    def __init__(self, theta: int, axes_dim: Tuple[int, int, int], axes_lens: Tuple[int, int, int] = (300, 512, 512), patch_size: int = 2):
-        super().__init__()
-        self.theta = theta
-        self.axes_dim = axes_dim
-        self.axes_lens = axes_lens
-        self.patch_size = patch_size
-        self.rope_embedder = EmbedND(dim=sum(axes_dim), theta=self.theta, axes_dim=axes_dim)
-
-    def forward(self, batch_size, encoder_seq_len, l_effective_cap_len, l_effective_ref_img_len, l_effective_img_len, ref_img_sizes, img_sizes, device):
-        p = self.patch_size
-
-        seq_lengths = [cap_len + sum(ref_img_len) + img_len for cap_len, ref_img_len, img_len in zip(l_effective_cap_len, l_effective_ref_img_len, l_effective_img_len)]
-
-        max_seq_len = max(seq_lengths)
-        max_ref_img_len = max([sum(ref_img_len) for ref_img_len in l_effective_ref_img_len])
-        max_img_len = max(l_effective_img_len)
-
-        position_ids = torch.zeros(batch_size, max_seq_len, 3, dtype=torch.int32, device=device)
-
-        for i, (cap_seq_len, seq_len) in enumerate(zip(l_effective_cap_len, seq_lengths)):
-            position_ids[i, :cap_seq_len] = repeat(torch.arange(cap_seq_len, dtype=torch.int32, device=device), "l -> l 3")
-
-            pe_shift = cap_seq_len
-            pe_shift_len = cap_seq_len
-
-            if ref_img_sizes[i] is not None:
-                for ref_img_size, ref_img_len in zip(ref_img_sizes[i], l_effective_ref_img_len[i]):
-                    H, W = ref_img_size
-                    ref_H_tokens, ref_W_tokens = H // p, W // p
-
-                    row_ids = repeat(torch.arange(ref_H_tokens, dtype=torch.int32, device=device), "h -> h w", w=ref_W_tokens).flatten()
-                    col_ids = repeat(torch.arange(ref_W_tokens, dtype=torch.int32, device=device), "w -> h w", h=ref_H_tokens).flatten()
-                    position_ids[i, pe_shift_len:pe_shift_len + ref_img_len, 0] = pe_shift
-                    position_ids[i, pe_shift_len:pe_shift_len + ref_img_len, 1] = row_ids
-                    position_ids[i, pe_shift_len:pe_shift_len + ref_img_len, 2] = col_ids
-
-                    pe_shift += max(ref_H_tokens, ref_W_tokens)
-                    pe_shift_len += ref_img_len
-
-            H, W = img_sizes[i]
-            H_tokens, W_tokens = H // p, W // p
-
-            row_ids = repeat(torch.arange(H_tokens, dtype=torch.int32, device=device), "h -> h w", w=W_tokens).flatten()
-            col_ids = repeat(torch.arange(W_tokens, dtype=torch.int32, device=device), "w -> h w", h=H_tokens).flatten()
-
-            position_ids[i, pe_shift_len: seq_len, 0] = pe_shift
-            position_ids[i, pe_shift_len: seq_len, 1] = row_ids
-            position_ids[i, pe_shift_len: seq_len, 2] = col_ids
-
-        freqs_cis = self.rope_embedder(position_ids).movedim(1, 2)
-
-        cap_freqs_cis_shape = list(freqs_cis.shape)
-        cap_freqs_cis_shape[1] = encoder_seq_len
-        cap_freqs_cis = torch.zeros(*cap_freqs_cis_shape, device=device, dtype=freqs_cis.dtype)
-
-        ref_img_freqs_cis_shape = list(freqs_cis.shape)
-        ref_img_freqs_cis_shape[1] = max_ref_img_len
-        ref_img_freqs_cis = torch.zeros(*ref_img_freqs_cis_shape, device=device, dtype=freqs_cis.dtype)
-
-        img_freqs_cis_shape = list(freqs_cis.shape)
-        img_freqs_cis_shape[1] = max_img_len
-        img_freqs_cis = torch.zeros(*img_freqs_cis_shape, device=device, dtype=freqs_cis.dtype)
-
-        for i, (cap_seq_len, ref_img_len, img_len, seq_len) in enumerate(zip(l_effective_cap_len, l_effective_ref_img_len, l_effective_img_len, seq_lengths)):
-            cap_freqs_cis[i, :cap_seq_len] = freqs_cis[i, :cap_seq_len]
-            ref_img_freqs_cis[i, :sum(ref_img_len)] = freqs_cis[i, cap_seq_len:cap_seq_len + sum(ref_img_len)]
-            img_freqs_cis[i, :img_len] = freqs_cis[i, cap_seq_len + sum(ref_img_len):cap_seq_len + sum(ref_img_len) + img_len]
-
-        return cap_freqs_cis, ref_img_freqs_cis, img_freqs_cis, freqs_cis, l_effective_cap_len, seq_lengths
-
-
-class OmniGen2Transformer2DModel(nn.Module):
-    def __init__(
-        self,
-        patch_size: int = 2,
-        in_channels: int = 16,
-        out_channels: Optional[int] = None,
-        hidden_size: int = 2304,
-        num_layers: int = 26,
-        num_refiner_layers: int = 2,
-        num_attention_heads: int = 24,
-        num_kv_heads: int = 8,
-        multiple_of: int = 256,
-        ffn_dim_multiplier: Optional[float] = None,
-        norm_eps: float = 1e-5,
-        axes_dim_rope: Tuple[int, int, int] = (32, 32, 32),
-        axes_lens: Tuple[int, int, int] = (300, 512, 512),
-        text_feat_dim: int = 1024,
-        timestep_scale: float = 1.0,
-        image_model=None,
-        device=None,
-        dtype=None,
-        operations=None,
-    ):
-        super().__init__()
-
-        self.patch_size = patch_size
-        self.out_channels = out_channels or in_channels
-        self.hidden_size = hidden_size
-        self.dtype = dtype
-
-        self.rope_embedder = OmniGen2RotaryPosEmbed(
-            theta=10000,
-            axes_dim=axes_dim_rope,
-            axes_lens=axes_lens,
-            patch_size=patch_size,
-        )
-
-        self.x_embedder = operations.Linear(patch_size * patch_size * in_channels, hidden_size, dtype=dtype, device=device)
-        self.ref_image_patch_embedder = operations.Linear(patch_size * patch_size * in_channels, hidden_size, dtype=dtype, device=device)
-
-        self.time_caption_embed = Lumina2CombinedTimestepCaptionEmbedding(
-            hidden_size=hidden_size,
-            text_feat_dim=text_feat_dim,
-            norm_eps=norm_eps,
-            timestep_scale=timestep_scale, dtype=dtype, device=device, operations=operations
-        )
-
-        self.noise_refiner = nn.ModuleList([
-            OmniGen2TransformerBlock(
-                hidden_size, num_attention_heads, num_kv_heads,
-                multiple_of, ffn_dim_multiplier, norm_eps, modulation=True, dtype=dtype, device=device, operations=operations
-            ) for _ in range(num_refiner_layers)
-        ])
-
-        self.ref_image_refiner = nn.ModuleList([
-            OmniGen2TransformerBlock(
-                hidden_size, num_attention_heads, num_kv_heads,
-                multiple_of, ffn_dim_multiplier, norm_eps, modulation=True, dtype=dtype, device=device, operations=operations
-            ) for _ in range(num_refiner_layers)
-        ])
-
-        self.context_refiner = nn.ModuleList([
-            OmniGen2TransformerBlock(
-                hidden_size, num_attention_heads, num_kv_heads,
-                multiple_of, ffn_dim_multiplier, norm_eps, modulation=False, dtype=dtype, device=device, operations=operations
-            ) for _ in range(num_refiner_layers)
-        ])
-
-        self.layers = nn.ModuleList([
-            OmniGen2TransformerBlock(
-                hidden_size, num_attention_heads, num_kv_heads,
-                multiple_of, ffn_dim_multiplier, norm_eps, modulation=True, dtype=dtype, device=device, operations=operations
-            ) for _ in range(num_layers)
-        ])
-
-        self.norm_out = LuminaLayerNormContinuous(
-            embedding_dim=hidden_size,
-            conditioning_embedding_dim=min(hidden_size, 1024),
-            elementwise_affine=False,
-            eps=1e-6,
-            out_dim=patch_size * patch_size * self.out_channels, dtype=dtype, device=device, operations=operations
-        )
-
-        self.image_index_embedding = nn.Parameter(torch.empty(5, hidden_size, device=device, dtype=dtype))
-
-    def flat_and_pad_to_seq(self, hidden_states, ref_image_hidden_states):
-        batch_size = len(hidden_states)
-        p = self.patch_size
-
-        img_sizes = [(img.size(1), img.size(2)) for img in hidden_states]
-        l_effective_img_len = [(H // p) * (W // p) for (H, W) in img_sizes]
-
-        if ref_image_hidden_states is not None:
-            ref_image_hidden_states = list(map(lambda ref: comfy.ldm.common_dit.pad_to_patch_size(ref, (p, p)), ref_image_hidden_states))
-            ref_img_sizes = [[(imgs.size(2), imgs.size(3)) if imgs is not None else None for imgs in ref_image_hidden_states]] * batch_size
-            l_effective_ref_img_len = [[(ref_img_size[0] // p) * (ref_img_size[1] // p) for ref_img_size in _ref_img_sizes] if _ref_img_sizes is not None else [0] for _ref_img_sizes in ref_img_sizes]
-        else:
-            ref_img_sizes = [None for _ in range(batch_size)]
-            l_effective_ref_img_len = [[0] for _ in range(batch_size)]
-
-        flat_ref_img_hidden_states = None
-        if ref_image_hidden_states is not None:
-            imgs = []
-            for ref_img in ref_image_hidden_states:
-                B, C, H, W = ref_img.size()
-                ref_img = rearrange(ref_img, 'b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1=p, p2=p)
-                imgs.append(ref_img)
-            flat_ref_img_hidden_states = torch.cat(imgs, dim=1)
-
-        img = hidden_states
-        B, C, H, W = img.size()
-        flat_hidden_states = rearrange(img, 'b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1=p, p2=p)
-
-        return (
-            flat_hidden_states, flat_ref_img_hidden_states,
-            None, None,
-            l_effective_ref_img_len, l_effective_img_len,
-            ref_img_sizes, img_sizes,
-        )
-
-    def img_patch_embed_and_refine(self, hidden_states, ref_image_hidden_states, padded_img_mask, padded_ref_img_mask, noise_rotary_emb, ref_img_rotary_emb, l_effective_ref_img_len, l_effective_img_len, temb):
-        batch_size = len(hidden_states)
-
-        hidden_states = self.x_embedder(hidden_states)
-        if ref_image_hidden_states is not None:
-            ref_image_hidden_states = self.ref_image_patch_embedder(ref_image_hidden_states)
-            image_index_embedding = comfy.model_management.cast_to(self.image_index_embedding, dtype=hidden_states.dtype, device=hidden_states.device)
-
-            for i in range(batch_size):
-                shift = 0
-                for j, ref_img_len in enumerate(l_effective_ref_img_len[i]):
-                    ref_image_hidden_states[i, shift:shift + ref_img_len, :] = ref_image_hidden_states[i, shift:shift + ref_img_len, :] + image_index_embedding[j]
-                    shift += ref_img_len
-
-        for layer in self.noise_refiner:
-            hidden_states = layer(hidden_states, padded_img_mask, noise_rotary_emb, temb)
-
-        if ref_image_hidden_states is not None:
-            for layer in self.ref_image_refiner:
-                ref_image_hidden_states = layer(ref_image_hidden_states, padded_ref_img_mask, ref_img_rotary_emb, temb)
-
-            hidden_states = torch.cat([ref_image_hidden_states, hidden_states], dim=1)
-
-        return hidden_states
-
-    def forward(self, x, timesteps, context, num_tokens, ref_latents=None, attention_mask=None, **kwargs):
-        B, C, H, W = x.shape
-        hidden_states = comfy.ldm.common_dit.pad_to_patch_size(x, (self.patch_size, self.patch_size))
-        _, _, H_padded, W_padded = hidden_states.shape
-        timestep = 1.0 - timesteps
-        text_hidden_states = context
-        text_attention_mask = attention_mask
-        ref_image_hidden_states = ref_latents
-        device = hidden_states.device
-
-        temb, text_hidden_states = self.time_caption_embed(timestep, text_hidden_states, hidden_states[0].dtype)
-
-        (
-            hidden_states, ref_image_hidden_states,
-            img_mask, ref_img_mask,
-            l_effective_ref_img_len, l_effective_img_len,
-            ref_img_sizes, img_sizes,
-        ) = self.flat_and_pad_to_seq(hidden_states, ref_image_hidden_states)
-
-        (
-            context_rotary_emb, ref_img_rotary_emb, noise_rotary_emb,
-            rotary_emb, encoder_seq_lengths, seq_lengths,
-        ) = self.rope_embedder(
-            hidden_states.shape[0], text_hidden_states.shape[1], [num_tokens] * text_hidden_states.shape[0],
-            l_effective_ref_img_len, l_effective_img_len,
-            ref_img_sizes, img_sizes, device,
-        )
-
-        for layer in self.context_refiner:
-            text_hidden_states = layer(text_hidden_states, text_attention_mask, context_rotary_emb)
-
-        img_len = hidden_states.shape[1]
-        combined_img_hidden_states = self.img_patch_embed_and_refine(
-            hidden_states, ref_image_hidden_states,
-            img_mask, ref_img_mask,
-            noise_rotary_emb, ref_img_rotary_emb,
-            l_effective_ref_img_len, l_effective_img_len,
-            temb,
-        )
-
-        hidden_states = torch.cat([text_hidden_states, combined_img_hidden_states], dim=1)
-        attention_mask = None
-
-        for layer in self.layers:
-            hidden_states = layer(hidden_states, attention_mask, rotary_emb, temb)
-
-        hidden_states = self.norm_out(hidden_states, temb)
-
-        p = self.patch_size
-        output = rearrange(hidden_states[:, -img_len:], 'b (h w) (p1 p2 c) -> b c (h p1) (w p2)',  h=H_padded // p, w=W_padded// p, p1=p, p2=p)[:, :, :H, :W]
-
-        return -output
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@@ -41,7 +41,6 @@ import comfy.ldm.hunyuan3d.model
 import comfy.ldm.hidream.model
 import comfy.ldm.chroma.model
 import comfy.ldm.ace.model
-import comfy.ldm.omnigen.omnigen2

 import comfy.model_management
 import comfy.patcher_extension
@@ -816,7 +815,6 @@ class PixArt(BaseModel):
 class Flux(BaseModel):
    def __init__(self, model_config, model_type=ModelType.FLUX, device=None, unet_model=comfy.ldm.flux.model.Flux):
        super().__init__(model_config, model_type, device=device, unet_model=unet_model)
-        self.memory_usage_factor_conds = ("ref_latents",)

    def concat_cond(self, **kwargs):
        try:
@@ -877,23 +875,8 @@ class Flux(BaseModel):
        guidance = kwargs.get("guidance", 3.5)
        if guidance is not None:
            out['guidance'] = comfy.conds.CONDRegular(torch.FloatTensor([guidance]))
-
-        ref_latents = kwargs.get("reference_latents", None)
-        if ref_latents is not None:
-            latents = []
-            for lat in ref_latents:
-                latents.append(self.process_latent_in(lat))
-            out['ref_latents'] = comfy.conds.CONDList(latents)
        return out

-    def extra_conds_shapes(self, **kwargs):
-        out = {}
-        ref_latents = kwargs.get("reference_latents", None)
-        if ref_latents is not None:
-            out['ref_latents'] = list([1, 16, sum(map(lambda a: math.prod(a.size()), ref_latents)) // 16])
-        return out
-
-
 class GenmoMochi(BaseModel):
    def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
        super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.genmo.joint_model.asymm_models_joint.AsymmDiTJoint)
@@ -1031,32 +1014,9 @@ class CosmosPredict2(BaseModel):
        if cross_attn is not None:
            out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)

-        denoise_mask = kwargs.get("concat_mask", kwargs.get("denoise_mask", None))
-        if denoise_mask is not None:
-            out["denoise_mask"] = comfy.conds.CONDRegular(denoise_mask)
-
        out['fps'] = comfy.conds.CONDConstant(kwargs.get("frame_rate", None))
        return out

-    def process_timestep(self, timestep, x, denoise_mask=None, **kwargs):
-        if denoise_mask is None:
-            return timestep
-        if denoise_mask.ndim <= 4:
-            return timestep
-        condition_video_mask_B_1_T_1_1 = denoise_mask.mean(dim=[1, 3, 4], keepdim=True)
-        c_noise_B_1_T_1_1 = 0.0 * (1.0 - condition_video_mask_B_1_T_1_1) + timestep.reshape(timestep.shape[0], 1, 1, 1, 1) * condition_video_mask_B_1_T_1_1
-        out = c_noise_B_1_T_1_1.squeeze(dim=[1, 3, 4])
-        return out
-
-    def scale_latent_inpaint(self, sigma, noise, latent_image, **kwargs):
-        sigma = sigma.reshape([sigma.shape[0]] + [1] * (len(noise.shape) - 1))
-        sigma_noise_augmentation = 0 #TODO
-        if sigma_noise_augmentation != 0:
-            latent_image = latent_image + noise
-        latent_image = self.model_sampling.calculate_input(torch.tensor([sigma_noise_augmentation], device=latent_image.device, dtype=latent_image.dtype), latent_image)
-        sigma = (sigma / (sigma + 1))
-        return latent_image / (1.0 - sigma)
-
 class Lumina2(BaseModel):
    def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
        super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.lumina.model.NextDiT)
@@ -1247,33 +1207,3 @@ class ACEStep(BaseModel):
        out['speaker_embeds'] = comfy.conds.CONDRegular(torch.zeros(noise.shape[0], 512, device=noise.device, dtype=noise.dtype))
        out['lyrics_strength'] = comfy.conds.CONDConstant(kwargs.get("lyrics_strength", 1.0))
        return out
-
-class Omnigen2(BaseModel):
-    def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
-        super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.omnigen.omnigen2.OmniGen2Transformer2DModel)
-        self.memory_usage_factor_conds = ("ref_latents",)
-
-    def extra_conds(self, **kwargs):
-        out = super().extra_conds(**kwargs)
-        attention_mask = kwargs.get("attention_mask", None)
-        if attention_mask is not None:
-            if torch.numel(attention_mask) != attention_mask.sum():
-                out['attention_mask'] = comfy.conds.CONDRegular(attention_mask)
-            out['num_tokens'] = comfy.conds.CONDConstant(max(1, torch.sum(attention_mask).item()))
-        cross_attn = kwargs.get("cross_attn", None)
-        if cross_attn is not None:
-            out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
-        ref_latents = kwargs.get("reference_latents", None)
-        if ref_latents is not None:
-            latents = []
-            for lat in ref_latents:
-                latents.append(self.process_latent_in(lat))
-            out['ref_latents'] = comfy.conds.CONDList(latents)
-        return out
-
-    def extra_conds_shapes(self, **kwargs):
-        out = {}
-        ref_latents = kwargs.get("reference_latents", None)
-        if ref_latents is not None:
-            out['ref_latents'] = list([1, 16, sum(map(lambda a: math.prod(a.size()), ref_latents)) // 16])
-        return out
--- a/comfy/model_detection.py
+++ b/comfy/model_detection.py
@@ -441,16 +441,11 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
            dit_config["rope_h_extrapolation_ratio"] = 4.0
            dit_config["rope_w_extrapolation_ratio"] = 4.0
            dit_config["rope_t_extrapolation_ratio"] = 1.0
-        elif dit_config["in_channels"] == 17: # img to video
-            if dit_config["model_channels"] == 2048:
-                dit_config["extra_per_block_abs_pos_emb"] = False
-                dit_config["rope_h_extrapolation_ratio"] = 3.0
-                dit_config["rope_w_extrapolation_ratio"] = 3.0
-                dit_config["rope_t_extrapolation_ratio"] = 1.0
-            elif dit_config["model_channels"] == 5120:
-                dit_config["rope_h_extrapolation_ratio"] = 2.0
-                dit_config["rope_w_extrapolation_ratio"] = 2.0
-                dit_config["rope_t_extrapolation_ratio"] = 0.8333333333333334
+        elif dit_config["in_channels"] == 17:
+            dit_config["extra_per_block_abs_pos_emb"] = False
+            dit_config["rope_h_extrapolation_ratio"] = 3.0
+            dit_config["rope_w_extrapolation_ratio"] = 3.0
+            dit_config["rope_t_extrapolation_ratio"] = 1.0

        dit_config["extra_h_extrapolation_ratio"] = 1.0
        dit_config["extra_w_extrapolation_ratio"] = 1.0
@@ -459,26 +454,6 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):

        return dit_config

-    if '{}time_caption_embed.timestep_embedder.linear_1.bias'.format(key_prefix) in state_dict_keys:  # Omnigen2
-        dit_config = {}
-        dit_config["image_model"] = "omnigen2"
-        dit_config["axes_dim_rope"] = [40, 40, 40]
-        dit_config["axes_lens"] = [1024, 1664, 1664]
-        dit_config["ffn_dim_multiplier"] = None
-        dit_config["hidden_size"] = 2520
-        dit_config["in_channels"] = 16
-        dit_config["multiple_of"] = 256
-        dit_config["norm_eps"] = 1e-05
-        dit_config["num_attention_heads"] = 21
-        dit_config["num_kv_heads"] = 7
-        dit_config["num_layers"] = 32
-        dit_config["num_refiner_layers"] = 2
-        dit_config["out_channels"] = None
-        dit_config["patch_size"] = 2
-        dit_config["text_feat_dim"] = 2048
-        dit_config["timestep_scale"] = 1000.0
-        return dit_config
-
    if '{}input_blocks.0.0.weight'.format(key_prefix) not in state_dict_keys:
        return None

--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -1290,13 +1290,6 @@ def supports_fp8_compute(device=None):

    return True

-def extended_fp16_support():
-    # TODO: check why some models work with fp16 on newer torch versions but not on older
-    if torch_version_numeric < (2, 7):
-        return False
-
-    return True
-
 def soft_empty_cache(force=False):
    global cpu_state
    if cpu_state == CPUState.MPS:
--- a/comfy/sd.py
+++ b/comfy/sd.py
@@ -44,7 +44,6 @@ import comfy.text_encoders.lumina2
 import comfy.text_encoders.wan
 import comfy.text_encoders.hidream
 import comfy.text_encoders.ace
-import comfy.text_encoders.omnigen2

 import comfy.model_patcher
 import comfy.lora
@@ -755,7 +754,6 @@ class CLIPType(Enum):
    HIDREAM = 14
    CHROMA = 15
    ACE = 16
-    OMNIGEN2 = 17


 def load_clip(ckpt_paths, embedding_directory=None, clip_type=CLIPType.STABLE_DIFFUSION, model_options={}):
@@ -775,7 +773,6 @@ class TEModel(Enum):
    LLAMA3_8 = 7
    T5_XXL_OLD = 8
    GEMMA_2_2B = 9
-    QWEN25_3B = 10

 def detect_te_model(sd):
    if "text_model.encoder.layers.30.mlp.fc1.weight" in sd:
@@ -796,8 +793,6 @@ def detect_te_model(sd):
        return TEModel.T5_BASE
    if 'model.layers.0.post_feedforward_layernorm.weight' in sd:
        return TEModel.GEMMA_2_2B
-    if 'model.layers.0.self_attn.k_proj.bias' in sd:
-        return TEModel.QWEN25_3B
    if "model.layers.0.post_attention_layernorm.weight" in sd:
        return TEModel.LLAMA3_8
    return None
@@ -899,9 +894,6 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip
            clip_target.clip = comfy.text_encoders.hidream.hidream_clip(**llama_detect(clip_data),
                                                                        clip_l=False, clip_g=False, t5=False, llama=True, dtype_t5=None, t5xxl_scaled_fp8=None)
            clip_target.tokenizer = comfy.text_encoders.hidream.HiDreamTokenizer
-        elif te_model == TEModel.QWEN25_3B:
-            clip_target.clip = comfy.text_encoders.omnigen2.te(**llama_detect(clip_data))
-            clip_target.tokenizer = comfy.text_encoders.omnigen2.Omnigen2Tokenizer
        else:
            # clip_l
            if clip_type == CLIPType.SD3:
@@ -1168,7 +1160,7 @@ def load_diffusion_model_state_dict(sd, model_options={}):
    model.load_model_weights(new_sd, "")
    left_over = sd.keys()
    if len(left_over) > 0:
-        logging.info("left over keys in diffusion model: {}".format(left_over))
+        logging.info("left over keys in unet: {}".format(left_over))
    return comfy.model_patcher.ModelPatcher(model, load_device=load_device, offload_device=offload_device)


@@ -1176,7 +1168,7 @@ def load_diffusion_model(unet_path, model_options={}):
    sd = comfy.utils.load_torch_file(unet_path)
    model = load_diffusion_model_state_dict(sd, model_options=model_options)
    if model is None:
-        logging.error("ERROR UNSUPPORTED DIFFUSION MODEL {}".format(unet_path))
+        logging.error("ERROR UNSUPPORTED UNET {}".format(unet_path))
        raise RuntimeError("ERROR: Could not detect model type of: {}".format(unet_path))
    return model

--- a/comfy/sd1_clip.py
+++ b/comfy/sd1_clip.py
@@ -462,7 +462,7 @@ class SDTokenizer:
            tokenizer_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "sd1_tokenizer")
        self.tokenizer = tokenizer_class.from_pretrained(tokenizer_path, **tokenizer_args)
        self.max_length = tokenizer_data.get("{}_max_length".format(embedding_key), max_length)
-        self.min_length = tokenizer_data.get("{}_min_length".format(embedding_key), min_length)
+        self.min_length = min_length
        self.end_token = None
        self.min_padding = min_padding

@@ -482,8 +482,7 @@ class SDTokenizer:
            if end_token is not None:
                self.end_token = end_token
            else:
-                if has_end_token:
-                    self.end_token = empty[0]
+                self.end_token = empty[0]

        if pad_token is not None:
            self.pad_token = pad_token
--- a/comfy/supported_models.py
+++ b/comfy/supported_models.py
@@ -18,7 +18,6 @@ import comfy.text_encoders.cosmos
 import comfy.text_encoders.lumina2
 import comfy.text_encoders.wan
 import comfy.text_encoders.ace
-import comfy.text_encoders.omnigen2

 from . import supported_models_base
 from . import latent_formats
@@ -1182,41 +1181,6 @@ class ACEStep(supported_models_base.BASE):
    def clip_target(self, state_dict={}):
        return supported_models_base.ClipTarget(comfy.text_encoders.ace.AceT5Tokenizer, comfy.text_encoders.ace.AceT5Model)

-class Omnigen2(supported_models_base.BASE):
-    unet_config = {
-        "image_model": "omnigen2",
-    }
-
-    sampling_settings = {
-        "multiplier": 1.0,
-        "shift": 2.6,
-    }
-
-    memory_usage_factor = 1.65 #TODO
-
-    unet_extra_config = {}
-    latent_format = latent_formats.Flux
-
-    supported_inference_dtypes = [torch.bfloat16, torch.float32]
-
-    vae_key_prefix = ["vae."]
-    text_encoder_key_prefix = ["text_encoders."]
-
-    def __init__(self, unet_config):
-        super().__init__(unet_config)
-        if comfy.model_management.extended_fp16_support():
-            self.supported_inference_dtypes = [torch.float16] + self.supported_inference_dtypes
-
-    def get_model(self, state_dict, prefix="", device=None):
-        out = model_base.Omnigen2(self, device=device)
-        return out
-
-    def clip_target(self, state_dict={}):
-        pref = self.text_encoder_key_prefix[0]
-        hunyuan_detect = comfy.text_encoders.hunyuan_video.llama_detect(state_dict, "{}qwen25_3b.transformer.".format(pref))
-        return supported_models_base.ClipTarget(comfy.text_encoders.omnigen2.LuminaTokenizer, comfy.text_encoders.omnigen2.te(**hunyuan_detect))
-
-
-models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, FluxSchnell, GenmoMochi, LTXV, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, CosmosT2IPredict2, CosmosI2VPredict2, Lumina2, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, WAN21_Vace, WAN21_Camera, Hunyuan3Dv2mini, Hunyuan3Dv2, HiDream, Chroma, ACEStep, Omnigen2]
+models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, FluxSchnell, GenmoMochi, LTXV, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, CosmosT2IPredict2, CosmosI2VPredict2, Lumina2, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, WAN21_Vace, WAN21_Camera, Hunyuan3Dv2mini, Hunyuan3Dv2, HiDream, Chroma, ACEStep]

 models += [SVD_img2vid]
--- a/comfy/text_encoders/llama.py
+++ b/comfy/text_encoders/llama.py
@@ -24,24 +24,6 @@ class Llama2Config:
    head_dim = 128
    rms_norm_add = False
    mlp_activation = "silu"
-    qkv_bias = False
-
-@dataclass
-class Qwen25_3BConfig:
-    vocab_size: int = 151936
-    hidden_size: int = 2048
-    intermediate_size: int = 11008
-    num_hidden_layers: int = 36
-    num_attention_heads: int = 16
-    num_key_value_heads: int = 2
-    max_position_embeddings: int = 128000
-    rms_norm_eps: float = 1e-6
-    rope_theta: float = 1000000.0
-    transformer_type: str = "llama"
-    head_dim = 128
-    rms_norm_add = False
-    mlp_activation = "silu"
-    qkv_bias = True

@dataclass
 class Gemma2_2B_Config:
@@ -58,7 +40,6 @@ class Gemma2_2B_Config:
    head_dim = 256
    rms_norm_add = True
    mlp_activation = "gelu_pytorch_tanh"
-    qkv_bias = False

 class RMSNorm(nn.Module):
    def __init__(self, dim: int, eps: float = 1e-5, add=False, device=None, dtype=None):
@@ -117,9 +98,9 @@ class Attention(nn.Module):
        self.inner_size = self.num_heads * self.head_dim

        ops = ops or nn
-        self.q_proj = ops.Linear(config.hidden_size, self.inner_size, bias=config.qkv_bias, device=device, dtype=dtype)
-        self.k_proj = ops.Linear(config.hidden_size, self.num_kv_heads * self.head_dim, bias=config.qkv_bias, device=device, dtype=dtype)
-        self.v_proj = ops.Linear(config.hidden_size, self.num_kv_heads * self.head_dim, bias=config.qkv_bias, device=device, dtype=dtype)
+        self.q_proj = ops.Linear(config.hidden_size, self.inner_size, bias=False, device=device, dtype=dtype)
+        self.k_proj = ops.Linear(config.hidden_size, self.num_kv_heads * self.head_dim, bias=False, device=device, dtype=dtype)
+        self.v_proj = ops.Linear(config.hidden_size, self.num_kv_heads * self.head_dim, bias=False, device=device, dtype=dtype)
        self.o_proj = ops.Linear(self.inner_size, config.hidden_size, bias=False, device=device, dtype=dtype)

    def forward(
@@ -339,14 +320,6 @@ class Llama2(BaseLlama, torch.nn.Module):
        self.model = Llama2_(config, device=device, dtype=dtype, ops=operations)
        self.dtype = dtype

-class Qwen25_3B(BaseLlama, torch.nn.Module):
-    def __init__(self, config_dict, dtype, device, operations):
-        super().__init__()
-        config = Qwen25_3BConfig(**config_dict)
-        self.num_layers = config.num_hidden_layers
-
-        self.model = Llama2_(config, device=device, dtype=dtype, ops=operations)
-        self.dtype = dtype

 class Gemma2_2B(BaseLlama, torch.nn.Module):
    def __init__(self, config_dict, dtype, device, operations):
--- a/comfy/text_encoders/omnigen2.py
+++ b/comfy/text_encoders/omnigen2.py
@@ -1,44 +0,0 @@
-from transformers import Qwen2Tokenizer
-from comfy import sd1_clip
-import comfy.text_encoders.llama
-import os
-
-
-class Qwen25_3BTokenizer(sd1_clip.SDTokenizer):
-    def __init__(self, embedding_directory=None, tokenizer_data={}):
-        tokenizer_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "qwen25_tokenizer")
-        super().__init__(tokenizer_path, pad_with_end=False, embedding_size=2048, embedding_key='qwen25_3b', tokenizer_class=Qwen2Tokenizer, has_start_token=False, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, pad_token=151643, tokenizer_data=tokenizer_data)
-
-
-class Omnigen2Tokenizer(sd1_clip.SD1Tokenizer):
-    def __init__(self, embedding_directory=None, tokenizer_data={}):
-        super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, name="qwen25_3b", tokenizer=Qwen25_3BTokenizer)
-        self.llama_template = '<|im_start|>system\nYou are a helpful assistant that generates high-quality images based on user instructions.<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n'
-
-    def tokenize_with_weights(self, text, return_word_ids=False, llama_template=None,**kwargs):
-        if llama_template is None:
-            llama_text = self.llama_template.format(text)
-        else:
-            llama_text = llama_template.format(text)
-        return super().tokenize_with_weights(llama_text, return_word_ids=return_word_ids, **kwargs)
-
-class Qwen25_3BModel(sd1_clip.SDClipModel):
-    def __init__(self, device="cpu", layer="last", layer_idx=None, dtype=None, attention_mask=True, model_options={}):
-        super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"pad": 151643}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Qwen25_3B, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options)
-
-
-class Omnigen2Model(sd1_clip.SD1ClipModel):
-    def __init__(self, device="cpu", dtype=None, model_options={}):
-        super().__init__(device=device, dtype=dtype, name="qwen25_3b", clip_model=Qwen25_3BModel, model_options=model_options)
-
-
-def te(dtype_llama=None, llama_scaled_fp8=None):
-    class Omnigen2TEModel_(Omnigen2Model):
-        def __init__(self, device="cpu", dtype=None, model_options={}):
-            if llama_scaled_fp8 is not None and "scaled_fp8" not in model_options:
-                model_options = model_options.copy()
-                model_options["scaled_fp8"] = llama_scaled_fp8
-            if dtype_llama is not None:
-                dtype = dtype_llama
-            super().__init__(device=device, dtype=dtype, model_options=model_options)
-    return Omnigen2TEModel_
--- a/comfy/text_encoders/qwen25_tokenizer/merges.txt
+++ b/comfy/text_encoders/qwen25_tokenizer/merges.txt
--- a/comfy/text_encoders/qwen25_tokenizer/tokenizer_config.json
+++ b/comfy/text_encoders/qwen25_tokenizer/tokenizer_config.json
@@ -1,241 +0,0 @@
-{
-  "add_bos_token": false,
-  "add_prefix_space": false,
-  "added_tokens_decoder": {
-    "151643": {
-      "content": "<|endoftext|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151644": {
-      "content": "<|im_start|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151645": {
-      "content": "<|im_end|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151646": {
-      "content": "<|object_ref_start|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151647": {
-      "content": "<|object_ref_end|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151648": {
-      "content": "<|box_start|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151649": {
-      "content": "<|box_end|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151650": {
-      "content": "<|quad_start|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151651": {
-      "content": "<|quad_end|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151652": {
-      "content": "<|vision_start|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151653": {
-      "content": "<|vision_end|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151654": {
-      "content": "<|vision_pad|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151655": {
-      "content": "<|image_pad|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151656": {
-      "content": "<|video_pad|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151657": {
-      "content": "<tool_call>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151658": {
-      "content": "</tool_call>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151659": {
-      "content": "<|fim_prefix|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151660": {
-      "content": "<|fim_middle|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151661": {
-      "content": "<|fim_suffix|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151662": {
-      "content": "<|fim_pad|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151663": {
-      "content": "<|repo_name|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151664": {
-      "content": "<|file_sep|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "151665": {
-      "content": "<|img|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151666": {
-      "content": "<|endofimg|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151667": {
-      "content": "<|meta|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151668": {
-      "content": "<|endofmeta|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    }
-  },
-  "additional_special_tokens": [
-    "<|im_start|>",
-    "<|im_end|>",
-    "<|object_ref_start|>",
-    "<|object_ref_end|>",
-    "<|box_start|>",
-    "<|box_end|>",
-    "<|quad_start|>",
-    "<|quad_end|>",
-    "<|vision_start|>",
-    "<|vision_end|>",
-    "<|vision_pad|>",
-    "<|image_pad|>",
-    "<|video_pad|>"
-  ],
-  "bos_token": null,
-  "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- messages[0]['content'] }}\n    {%- else %}\n        {{- 'You are a helpful assistant.' }}\n    {%- endif %}\n    {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n    {%- else %}\n        {{- '<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {{- '<|im_start|>' + message.role }}\n        {%- if message.content %}\n            {{- '\\n' + message.content }}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls %}\n            {%- if tool_call.function is defined %}\n                {%- set tool_call = tool_call.function %}\n            {%- endif %}\n            {{- '\\n<tool_call>\\n{\"name\": \"' }}\n            {{- tool_call.name }}\n            {{- '\", \"arguments\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- '}\\n</tool_call>' }}\n        {%- endfor %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
-  "clean_up_tokenization_spaces": false,
-  "eos_token": "<|im_end|>",
-  "errors": "replace",
-  "extra_special_tokens": {},
-  "model_max_length": 131072,
-  "pad_token": "<|endoftext|>",
-  "processor_class": "Qwen2_5_VLProcessor",
-  "split_special_tokens": false,
-  "tokenizer_class": "Qwen2Tokenizer",
-  "unk_token": null
-}
--- a/comfy/text_encoders/qwen25_tokenizer/vocab.json
+++ b/comfy/text_encoders/qwen25_tokenizer/vocab.json
--- a/comfy/utils.py
+++ b/comfy/utils.py
@@ -997,11 +997,12 @@ def set_progress_bar_global_hook(function):
    PROGRESS_BAR_HOOK = function

 class ProgressBar:
-    def __init__(self, total):
+    def __init__(self, total, node_id=None):
        global PROGRESS_BAR_HOOK
        self.total = total
        self.current = 0
        self.hook = PROGRESS_BAR_HOOK
+        self.node_id = node_id

    def update_absolute(self, value, total=None, preview=None):
        if total is not None:
@@ -1010,7 +1011,7 @@ class ProgressBar:
            value = self.total
        self.current = value
        if self.hook is not None:
-            self.hook(self.current, self.total, preview)
+            self.hook(self.current, self.total, preview, node_id=self.node_id)

    def update(self, value):
        self.update_absolute(self.current + value)
--- a/comfy_config/config_parser.py
+++ b/comfy_config/config_parser.py
@@ -11,43 +11,6 @@ from comfy_config.types import (
    PyProjectSettings
 )

-def validate_and_extract_os_classifiers(classifiers: list) -> list:
-    os_classifiers = [c for c in classifiers if c.startswith("Operating System :: ")]
-    if not os_classifiers:
-        return []
-
-    os_values = [c[len("Operating System :: ") :] for c in os_classifiers]
-    valid_os_prefixes = {"Microsoft", "POSIX", "MacOS", "OS Independent"}
-
-    for os_value in os_values:
-        if not any(os_value.startswith(prefix) for prefix in valid_os_prefixes):
-            return []
-
-    return os_values
-
-
-def validate_and_extract_accelerator_classifiers(classifiers: list) -> list:
-    accelerator_classifiers = [c for c in classifiers if c.startswith("Environment ::")]
-    if not accelerator_classifiers:
-        return []
-
-    accelerator_values = [c[len("Environment :: ") :] for c in accelerator_classifiers]
-
-    valid_accelerators = {
-        "GPU :: NVIDIA CUDA",
-        "GPU :: AMD ROCm",
-        "GPU :: Intel Arc",
-        "NPU :: Huawei Ascend",
-        "GPU :: Apple Metal",
-    }
-
-    for accelerator_value in accelerator_values:
-        if accelerator_value not in valid_accelerators:
-            return []
-
-    return accelerator_values
-
-
 """
 Extract configuration from a custom node directory's pyproject.toml file or a Python file.

@@ -115,24 +78,6 @@ def extract_node_configuration(path) -> Optional[PyProjectConfig]:
    tool_data = raw_settings.tool
    comfy_data = tool_data.get("comfy", {}) if tool_data else {}

-    dependencies = project_data.get("dependencies", [])
-    supported_comfyui_frontend_version = ""
-    for dep in dependencies:
-        if isinstance(dep, str) and dep.startswith("comfyui-frontend-package"):
-            supported_comfyui_frontend_version = dep.removeprefix("comfyui-frontend-package")
-            break
-
-    supported_comfyui_version = comfy_data.get("requires-comfyui", "")
-
-    classifiers = project_data.get('classifiers', [])
-    supported_os = validate_and_extract_os_classifiers(classifiers)
-    supported_accelerators = validate_and_extract_accelerator_classifiers(classifiers)
-
-    project_data['supported_os'] = supported_os
-    project_data['supported_accelerators'] = supported_accelerators
-    project_data['supported_comfyui_frontend_version'] = supported_comfyui_frontend_version
-    project_data['supported_comfyui_version'] = supported_comfyui_version
-
    return PyProjectConfig(project=project_data, tool_comfy=comfy_data)


--- a/comfy_config/types.py
+++ b/comfy_config/types.py
@@ -51,7 +51,7 @@ class ComfyConfig(BaseModel):
    models: List[Model] = Field(default_factory=list, alias="Models")
    includes: List[str] = Field(default_factory=list)
    web: Optional[str] = None
-    banner_url: str = ""
+

 class License(BaseModel):
    file: str = ""
@@ -66,10 +66,6 @@ class ProjectConfig(BaseModel):
    dependencies: List[str] = Field(default_factory=list)
    license: License = Field(default_factory=License)
    urls: URLs = Field(default_factory=URLs)
-    supported_os: List[str] = Field(default_factory=list)
-    supported_accelerators: List[str] = Field(default_factory=list)
-    supported_comfyui_version: str = ""
-    supported_comfyui_frontend_version: str = ""

    @field_validator('license', mode='before')
    @classmethod
--- a/comfy_execution/caching.py
+++ b/comfy_execution/caching.py
@@ -1,6 +1,7 @@
 import itertools
 from typing import Sequence, Mapping, Dict
 from comfy_execution.graph import DynamicPrompt
+from abc import ABC, abstractmethod

 import nodes

@@ -16,12 +17,13 @@ def include_unique_id_in_input(class_type: str) -> bool:
    NODE_CLASS_CONTAINS_UNIQUE_ID[class_type] = "UNIQUE_ID" in class_def.INPUT_TYPES().get("hidden", {}).values()
    return NODE_CLASS_CONTAINS_UNIQUE_ID[class_type]

-class CacheKeySet:
+class CacheKeySet(ABC):
    def __init__(self, dynprompt, node_ids, is_changed_cache):
        self.keys = {}
        self.subcache_keys = {}

-    def add_keys(self, node_ids):
+    @abstractmethod
+    async def add_keys(self, node_ids):
        raise NotImplementedError()

    def all_node_ids(self):
@@ -60,9 +62,8 @@ class CacheKeySetID(CacheKeySet):
    def __init__(self, dynprompt, node_ids, is_changed_cache):
        super().__init__(dynprompt, node_ids, is_changed_cache)
        self.dynprompt = dynprompt
-        self.add_keys(node_ids)

-    def add_keys(self, node_ids):
+    async def add_keys(self, node_ids):
        for node_id in node_ids:
            if node_id in self.keys:
                continue
@@ -77,37 +78,36 @@ class CacheKeySetInputSignature(CacheKeySet):
        super().__init__(dynprompt, node_ids, is_changed_cache)
        self.dynprompt = dynprompt
        self.is_changed_cache = is_changed_cache
-        self.add_keys(node_ids)

    def include_node_id_in_input(self) -> bool:
        return False

-    def add_keys(self, node_ids):
+    async def add_keys(self, node_ids):
        for node_id in node_ids:
            if node_id in self.keys:
                continue
            if not self.dynprompt.has_node(node_id):
                continue
            node = self.dynprompt.get_node(node_id)
-            self.keys[node_id] = self.get_node_signature(self.dynprompt, node_id)
+            self.keys[node_id] = await self.get_node_signature(self.dynprompt, node_id)
            self.subcache_keys[node_id] = (node_id, node["class_type"])

-    def get_node_signature(self, dynprompt, node_id):
+    async def get_node_signature(self, dynprompt, node_id):
        signature = []
        ancestors, order_mapping = self.get_ordered_ancestry(dynprompt, node_id)
-        signature.append(self.get_immediate_node_signature(dynprompt, node_id, order_mapping))
+        signature.append(await self.get_immediate_node_signature(dynprompt, node_id, order_mapping))
        for ancestor_id in ancestors:
-            signature.append(self.get_immediate_node_signature(dynprompt, ancestor_id, order_mapping))
+            signature.append(await self.get_immediate_node_signature(dynprompt, ancestor_id, order_mapping))
        return to_hashable(signature)

-    def get_immediate_node_signature(self, dynprompt, node_id, ancestor_order_mapping):
+    async def get_immediate_node_signature(self, dynprompt, node_id, ancestor_order_mapping):
        if not dynprompt.has_node(node_id):
            # This node doesn't exist -- we can't cache it.
            return [float("NaN")]
        node = dynprompt.get_node(node_id)
        class_type = node["class_type"]
        class_def = nodes.NODE_CLASS_MAPPINGS[class_type]
-        signature = [class_type, self.is_changed_cache.get(node_id)]
+        signature = [class_type, await self.is_changed_cache.get(node_id)]
        if self.include_node_id_in_input() or (hasattr(class_def, "NOT_IDEMPOTENT") and class_def.NOT_IDEMPOTENT) or include_unique_id_in_input(class_type):
            signature.append(node_id)
        inputs = node["inputs"]
@@ -150,9 +150,10 @@ class BasicCache:
        self.cache = {}
        self.subcaches = {}

-    def set_prompt(self, dynprompt, node_ids, is_changed_cache):
+    async def set_prompt(self, dynprompt, node_ids, is_changed_cache):
        self.dynprompt = dynprompt
        self.cache_key_set = self.key_class(dynprompt, node_ids, is_changed_cache)
+        await self.cache_key_set.add_keys(node_ids)
        self.is_changed_cache = is_changed_cache
        self.initialized = True

@@ -201,13 +202,13 @@ class BasicCache:
        else:
            return None

-    def _ensure_subcache(self, node_id, children_ids):
+    async def _ensure_subcache(self, node_id, children_ids):
        subcache_key = self.cache_key_set.get_subcache_key(node_id)
        subcache = self.subcaches.get(subcache_key, None)
        if subcache is None:
            subcache = BasicCache(self.key_class)
            self.subcaches[subcache_key] = subcache
-        subcache.set_prompt(self.dynprompt, children_ids, self.is_changed_cache)
+        await subcache.set_prompt(self.dynprompt, children_ids, self.is_changed_cache)
        return subcache

    def _get_subcache(self, node_id):
@@ -259,10 +260,10 @@ class HierarchicalCache(BasicCache):
        assert cache is not None
        cache._set_immediate(node_id, value)

-    def ensure_subcache_for(self, node_id, children_ids):
+    async def ensure_subcache_for(self, node_id, children_ids):
        cache = self._get_cache_for(node_id)
        assert cache is not None
-        return cache._ensure_subcache(node_id, children_ids)
+        return await cache._ensure_subcache(node_id, children_ids)

 class LRUCache(BasicCache):
    def __init__(self, key_class, max_size=100):
@@ -273,8 +274,8 @@ class LRUCache(BasicCache):
        self.used_generation = {}
        self.children = {}

-    def set_prompt(self, dynprompt, node_ids, is_changed_cache):
-        super().set_prompt(dynprompt, node_ids, is_changed_cache)
+    async def set_prompt(self, dynprompt, node_ids, is_changed_cache):
+        await super().set_prompt(dynprompt, node_ids, is_changed_cache)
        self.generation += 1
        for node_id in node_ids:
            self._mark_used(node_id)
@@ -303,11 +304,11 @@ class LRUCache(BasicCache):
        self._mark_used(node_id)
        return self._set_immediate(node_id, value)

-    def ensure_subcache_for(self, node_id, children_ids):
+    async def ensure_subcache_for(self, node_id, children_ids):
        # Just uses subcaches for tracking 'live' nodes
-        super()._ensure_subcache(node_id, children_ids)
+        await super()._ensure_subcache(node_id, children_ids)

-        self.cache_key_set.add_keys(children_ids)
+        await self.cache_key_set.add_keys(children_ids)
        self._mark_used(node_id)
        cache_key = self.cache_key_set.get_data_key(node_id)
        self.children[cache_key] = []
@@ -337,7 +338,7 @@ class DependencyAwareCache(BasicCache):
        self.ancestors = {}    # Maps node_id -> set of ancestor node_ids
        self.executed_nodes = set()  # Tracks nodes that have been executed

-    def set_prompt(self, dynprompt, node_ids, is_changed_cache):
+    async def set_prompt(self, dynprompt, node_ids, is_changed_cache):
        """
        Clear the entire cache and rebuild the dependency graph.

@@ -354,7 +355,7 @@ class DependencyAwareCache(BasicCache):
        self.executed_nodes.clear()

        # Call the parent method to initialize the cache with the new prompt
-        super().set_prompt(dynprompt, node_ids, is_changed_cache)
+        await super().set_prompt(dynprompt, node_ids, is_changed_cache)

        # Rebuild the dependency graph
        self._build_dependency_graph(dynprompt, node_ids)
@@ -405,7 +406,7 @@ class DependencyAwareCache(BasicCache):
        """
        return self._get_immediate(node_id)

-    def ensure_subcache_for(self, node_id, children_ids):
+    async def ensure_subcache_for(self, node_id, children_ids):
        """
        Ensure a subcache exists for a node and update dependencies.

@@ -416,7 +417,7 @@ class DependencyAwareCache(BasicCache):
        Returns:
            The subcache object for the node.
        """
-        subcache = super()._ensure_subcache(node_id, children_ids)
+        subcache = await super()._ensure_subcache(node_id, children_ids)
        for child_id in children_ids:
            self.descendants[node_id].add(child_id)
            self.ancestors[child_id].add(node_id)
--- a/comfy_execution/graph.py
+++ b/comfy_execution/graph.py
@@ -2,6 +2,7 @@ from __future__ import annotations
 from typing import Type, Literal

 import nodes
+import asyncio
 from comfy_execution.graph_utils import is_link
 from comfy.comfy_types.node_typing import ComfyNodeABC, InputTypeDict, InputTypeOptions

@@ -100,6 +101,8 @@ class TopologicalSort:
        self.pendingNodes = {}
        self.blockCount = {} # Number of nodes this node is directly blocked by
        self.blocking = {} # Which nodes are blocked by this node
+        self.externalBlocks = 0
+        self.unblockedEvent = asyncio.Event()

    def get_input_info(self, unique_id, input_name):
        class_type = self.dynprompt.get_node(unique_id)["class_type"]
@@ -153,6 +156,16 @@ class TopologicalSort:
        for link in links:
            self.add_strong_link(*link)

+    def add_external_block(self, node_id):
+        assert node_id in self.blockCount, "Can't add external block to a node that isn't pending"
+        self.externalBlocks += 1
+        self.blockCount[node_id] += 1
+        def unblock():
+            self.externalBlocks -= 1
+            self.blockCount[node_id] -= 1
+            self.unblockedEvent.set()
+        return unblock
+
    def is_cached(self, node_id):
        return False

@@ -181,11 +194,16 @@ class ExecutionList(TopologicalSort):
    def is_cached(self, node_id):
        return self.output_cache.get(node_id) is not None

-    def stage_node_execution(self):
+    async def stage_node_execution(self):
        assert self.staged_node_id is None
        if self.is_empty():
            return None, None, None
        available = self.get_ready_nodes()
+        while len(available) == 0 and self.externalBlocks > 0:
+            # Wait for an external block to be released
+            await self.unblockedEvent.wait()
+            self.unblockedEvent.clear()
+            available = self.get_ready_nodes()
        if len(available) == 0:
            cycled_nodes = self.get_nodes_in_cycle()
            # Because cycles composed entirely of static nodes are caught during initial validation,
--- a/comfy_execution/progress.py
+++ b/comfy_execution/progress.py
@@ -0,0 +1,288 @@
+from typing import TypedDict, Dict, Optional
+from typing_extensions import override
+from PIL import Image
+from enum import Enum
+from abc import ABC
+from tqdm import tqdm
+from comfy_execution.graph import DynamicPrompt
+from protocol import BinaryEventTypes
+
+class NodeState(Enum):
+    Pending = "pending"
+    Running = "running"
+    Finished = "finished"
+    Error = "error"
+
+class NodeProgressState(TypedDict):
+    """
+    A class to represent the state of a node's progress.
+    """
+    state: NodeState
+    value: float
+    max: float
+
+class ProgressHandler(ABC):
+    """
+    Abstract base class for progress handlers.
+    Progress handlers receive progress updates and display them in various ways.
+    """
+    def __init__(self, name: str):
+        self.name = name
+        self.enabled = True
+
+    def set_registry(self, registry: "ProgressRegistry"):
+        pass
+
+    def start_handler(self, node_id: str, state: NodeProgressState, prompt_id: str):
+        """Called when a node starts processing"""
+        pass
+
+    def update_handler(self, node_id: str, value: float, max_value: float,
+                      state: NodeProgressState, prompt_id: str, image: Optional[Image.Image] = None):
+        """Called when a node's progress is updated"""
+        pass
+
+    def finish_handler(self, node_id: str, state: NodeProgressState, prompt_id: str):
+        """Called when a node finishes processing"""
+        pass
+
+    def reset(self):
+        """Called when the progress registry is reset"""
+        pass
+
+    def enable(self):
+        """Enable this handler"""
+        self.enabled = True
+
+    def disable(self):
+        """Disable this handler"""
+        self.enabled = False
+
+class CLIProgressHandler(ProgressHandler):
+    """
+    Handler that displays progress using tqdm progress bars in the CLI.
+    """
+    def __init__(self):
+        super().__init__("cli")
+        self.progress_bars: Dict[str, tqdm] = {}
+
+    @override
+    def start_handler(self, node_id: str, state: NodeProgressState, prompt_id: str):
+        # Create a new tqdm progress bar
+        if node_id not in self.progress_bars:
+            self.progress_bars[node_id] = tqdm(
+                total=state["max"],
+                desc=f"Node {node_id}",
+                unit="steps",
+                leave=True,
+                position=len(self.progress_bars)
+            )
+
+    @override
+    def update_handler(self, node_id: str, value: float, max_value: float,
+                      state: NodeProgressState, prompt_id: str, image: Optional[Image.Image] = None):
+        # Handle case where start_handler wasn't called
+        if node_id not in self.progress_bars:
+            self.progress_bars[node_id] = tqdm(
+                total=max_value,
+                desc=f"Node {node_id}",
+                unit="steps",
+                leave=True,
+                position=len(self.progress_bars)
+            )
+            self.progress_bars[node_id].update(value)
+        else:
+            # Update existing progress bar
+            if max_value != self.progress_bars[node_id].total:
+                self.progress_bars[node_id].total = max_value
+            # Calculate the update amount (difference from current position)
+            current_position = self.progress_bars[node_id].n
+            update_amount = value - current_position
+            if update_amount > 0:
+                self.progress_bars[node_id].update(update_amount)
+
+    @override
+    def finish_handler(self, node_id: str, state: NodeProgressState, prompt_id: str):
+        # Complete and close the progress bar if it exists
+        if node_id in self.progress_bars:
+            # Ensure the bar shows 100% completion
+            remaining = state["max"] - self.progress_bars[node_id].n
+            if remaining > 0:
+                self.progress_bars[node_id].update(remaining)
+            self.progress_bars[node_id].close()
+            del self.progress_bars[node_id]
+
+    @override
+    def reset(self):
+        # Close all progress bars
+        for bar in self.progress_bars.values():
+            bar.close()
+        self.progress_bars.clear()
+
+class WebUIProgressHandler(ProgressHandler):
+    """
+    Handler that sends progress updates to the WebUI via WebSockets.
+    """
+    def __init__(self, server_instance):
+        super().__init__("webui")
+        self.server_instance = server_instance
+
+    def set_registry(self, registry: "ProgressRegistry"):
+        self.registry = registry
+
+    def _send_progress_state(self, prompt_id: str, nodes: Dict[str, NodeProgressState]):
+        """Send the current progress state to the client"""
+        if self.server_instance is None:
+            return
+
+        # Only send info for non-pending nodes
+        active_nodes = {
+            node_id: {
+                "value": state["value"],
+                "max": state["max"],
+                "state": state["state"].value,
+                "node_id": node_id,
+                "prompt_id": prompt_id,
+                "display_node_id": self.registry.dynprompt.get_display_node_id(node_id),
+                "parent_node_id": self.registry.dynprompt.get_parent_node_id(node_id),
+                "real_node_id": self.registry.dynprompt.get_real_node_id(node_id)
+            }
+            for node_id, state in nodes.items()
+            if state["state"] != NodeState.Pending
+        }
+
+        # Send a combined progress_state message with all node states
+        self.server_instance.send_sync("progress_state", {
+            "prompt_id": prompt_id,
+            "nodes": active_nodes
+        })
+
+    @override
+    def start_handler(self, node_id: str, state: NodeProgressState, prompt_id: str):
+        # Send progress state of all nodes
+        if self.registry:
+            self._send_progress_state(prompt_id, self.registry.nodes)
+
+    @override
+    def update_handler(self, node_id: str, value: float, max_value: float,
+                      state: NodeProgressState, prompt_id: str, image: Optional[Image.Image] = None):
+        # Send progress state of all nodes
+        if self.registry:
+            self._send_progress_state(prompt_id, self.registry.nodes)
+        if image:
+            metadata = {
+                "node_id": node_id,
+                "prompt_id": prompt_id,
+                "display_node_id": self.registry.dynprompt.get_display_node_id(node_id),
+                "parent_node_id": self.registry.dynprompt.get_parent_node_id(node_id),
+                "real_node_id": self.registry.dynprompt.get_real_node_id(node_id)
+            }
+            self.server_instance.send_sync(BinaryEventTypes.PREVIEW_IMAGE_WITH_METADATA, (image, metadata), self.server_instance.client_id)
+
+
+    @override
+    def finish_handler(self, node_id: str, state: NodeProgressState, prompt_id: str):
+        # Send progress state of all nodes
+        if self.registry:
+            self._send_progress_state(prompt_id, self.registry.nodes)
+
+class ProgressRegistry:
+    """
+    Registry that maintains node progress state and notifies registered handlers.
+    """
+    def __init__(self, prompt_id: str, dynprompt: DynamicPrompt):
+        self.prompt_id = prompt_id
+        self.dynprompt = dynprompt
+        self.nodes: Dict[str, NodeProgressState] = {}
+        self.handlers: Dict[str, ProgressHandler] = {}
+
+    def register_handler(self, handler: ProgressHandler) -> None:
+        """Register a progress handler"""
+        self.handlers[handler.name] = handler
+
+    def unregister_handler(self, handler_name: str) -> None:
+        """Unregister a progress handler"""
+        if handler_name in self.handlers:
+            # Allow handler to clean up resources
+            self.handlers[handler_name].reset()
+            del self.handlers[handler_name]
+
+    def enable_handler(self, handler_name: str) -> None:
+        """Enable a progress handler"""
+        if handler_name in self.handlers:
+            self.handlers[handler_name].enable()
+
+    def disable_handler(self, handler_name: str) -> None:
+        """Disable a progress handler"""
+        if handler_name in self.handlers:
+            self.handlers[handler_name].disable()
+
+    def ensure_entry(self, node_id: str) -> NodeProgressState:
+        """Ensure a node entry exists"""
+        if node_id not in self.nodes:
+            self.nodes[node_id] = NodeProgressState(
+                state = NodeState.Pending,
+                value = 0,
+                max = 1
+            )
+        return self.nodes[node_id]
+
+    def start_progress(self, node_id: str) -> None:
+        """Start progress tracking for a node"""
+        entry = self.ensure_entry(node_id)
+        entry["state"] = NodeState.Running
+        entry["value"] = 0.0
+        entry["max"] = 1.0
+
+        # Notify all enabled handlers
+        for handler in self.handlers.values():
+            if handler.enabled:
+                handler.start_handler(node_id, entry, self.prompt_id)
+
+    def update_progress(self, node_id: str, value: float, max_value: float, image: Optional[Image.Image]) -> None:
+        """Update progress for a node"""
+        entry = self.ensure_entry(node_id)
+        entry["state"] = NodeState.Running
+        entry["value"] = value
+        entry["max"] = max_value
+
+        # Notify all enabled handlers
+        for handler in self.handlers.values():
+            if handler.enabled:
+                handler.update_handler(node_id, value, max_value, entry, self.prompt_id, image)
+
+    def finish_progress(self, node_id: str) -> None:
+        """Finish progress tracking for a node"""
+        entry = self.ensure_entry(node_id)
+        entry["state"] = NodeState.Finished
+        entry["value"] = entry["max"]
+
+        # Notify all enabled handlers
+        for handler in self.handlers.values():
+            if handler.enabled:
+                handler.finish_handler(node_id, entry, self.prompt_id)
+
+    def reset_handlers(self) -> None:
+        """Reset all handlers"""
+        for handler in self.handlers.values():
+            handler.reset()
+
+# Global registry instance
+global_progress_registry: ProgressRegistry = ProgressRegistry(prompt_id="", dynprompt=DynamicPrompt({}))
+
+def reset_progress_state(prompt_id: str, dynprompt: DynamicPrompt) -> None:
+    global global_progress_registry
+
+    # Reset existing handlers if registry exists
+    if global_progress_registry is not None:
+        global_progress_registry.reset_handlers()
+
+    # Create new registry
+    global_progress_registry = ProgressRegistry(prompt_id, dynprompt)
+
+def add_progress_handler(handler: ProgressHandler) -> None:
+    handler.set_registry(global_progress_registry)
+    global_progress_registry.register_handler(handler)
+
+def get_progress_state() -> ProgressRegistry:
+    return global_progress_registry
--- a/comfy_execution/utils.py
+++ b/comfy_execution/utils.py
@@ -0,0 +1,46 @@
+import contextvars
+from typing import Optional, NamedTuple
+
+class ExecutionContext(NamedTuple):
+    """
+    Context information about the currently executing node.
+
+    Attributes:
+        node_id: The ID of the currently executing node
+        list_index: The index in a list being processed (for operations on batches/lists)
+    """
+    prompt_id: str
+    node_id: str
+    list_index: Optional[int]
+
+current_executing_context: contextvars.ContextVar[Optional[ExecutionContext]] = contextvars.ContextVar("current_executing_context", default=None)
+
+def get_executing_context() -> Optional[ExecutionContext]:
+    return current_executing_context.get(None)
+
+class CurrentNodeContext:
+    """
+    Context manager for setting the current executing node context.
+
+    Sets the current_executing_context on enter and resets it on exit.
+
+    Example:
+        with CurrentNodeContext(node_id="123", list_index=0):
+            # Code that should run with the current node context set
+            process_image()
+    """
+    def __init__(self, prompt_id: str, node_id: str, list_index: Optional[int] = None):
+        self.context = ExecutionContext(
+            prompt_id= prompt_id,
+            node_id= node_id,
+            list_index= list_index
+        )
+        self.token = None
+
+    def __enter__(self):
+        self.token = current_executing_context.set(self.context)
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        if self.token is not None:
+            current_executing_context.reset(self.token)
--- a/comfy_extras/nodes_cosmos.py
+++ b/comfy_extras/nodes_cosmos.py
@@ -2,7 +2,6 @@ import nodes
 import torch
 import comfy.model_management
 import comfy.utils
-import comfy.latent_formats


 class EmptyCosmosLatentVideo:
@@ -76,53 +75,8 @@ class CosmosImageToVideoLatent:
        out_latent["noise_mask"] = mask.repeat((batch_size, ) + (1,) * (mask.ndim - 1))
        return (out_latent,)

-class CosmosPredict2ImageToVideoLatent:
-    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"vae": ("VAE", ),
-                             "width": ("INT", {"default": 848, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}),
-                             "height": ("INT", {"default": 480, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}),
-                             "length": ("INT", {"default": 93, "min": 1, "max": nodes.MAX_RESOLUTION, "step": 4}),
-                             "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}),
-                },
-                "optional": {"start_image": ("IMAGE", ),
-                             "end_image": ("IMAGE", ),
-                }}
-
-
-    RETURN_TYPES = ("LATENT",)
-    FUNCTION = "encode"
-
-    CATEGORY = "conditioning/inpaint"
-
-    def encode(self, vae, width, height, length, batch_size, start_image=None, end_image=None):
-        latent = torch.zeros([1, 16, ((length - 1) // 4) + 1, height // 8, width // 8], device=comfy.model_management.intermediate_device())
-        if start_image is None and end_image is None:
-            out_latent = {}
-            out_latent["samples"] = latent
-            return (out_latent,)
-
-        mask = torch.ones([latent.shape[0], 1, ((length - 1) // 4) + 1, latent.shape[-2], latent.shape[-1]], device=comfy.model_management.intermediate_device())
-
-        if start_image is not None:
-            latent_temp = vae_encode_with_padding(vae, start_image, width, height, length, padding=1)
-            latent[:, :, :latent_temp.shape[-3]] = latent_temp
-            mask[:, :, :latent_temp.shape[-3]] *= 0.0
-
-        if end_image is not None:
-            latent_temp = vae_encode_with_padding(vae, end_image, width, height, length, padding=0)
-            latent[:, :, -latent_temp.shape[-3]:] = latent_temp
-            mask[:, :, -latent_temp.shape[-3]:] *= 0.0
-
-        out_latent = {}
-        latent_format = comfy.latent_formats.Wan21()
-        latent = latent_format.process_out(latent) * mask + latent * (1.0 - mask)
-        out_latent["samples"] = latent.repeat((batch_size, ) + (1,) * (latent.ndim - 1))
-        out_latent["noise_mask"] = mask.repeat((batch_size, ) + (1,) * (mask.ndim - 1))
-        return (out_latent,)

 NODE_CLASS_MAPPINGS = {
    "EmptyCosmosLatentVideo": EmptyCosmosLatentVideo,
    "CosmosImageToVideoLatent": CosmosImageToVideoLatent,
-    "CosmosPredict2ImageToVideoLatent": CosmosPredict2ImageToVideoLatent,
 }
--- a/comfy_extras/nodes_edit_model.py
+++ b/comfy_extras/nodes_edit_model.py
@@ -1,26 +0,0 @@
-import node_helpers
-
-
-class ReferenceLatent:
-    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"conditioning": ("CONDITIONING", ),
-                            },
-                "optional": {"latent": ("LATENT", ),}
-               }
-
-    RETURN_TYPES = ("CONDITIONING",)
-    FUNCTION = "append"
-
-    CATEGORY = "advanced/conditioning/edit_models"
-    DESCRIPTION = "This node sets the guiding latent for an edit model. If the model supports it you can chain multiple to set multiple reference images."
-
-    def append(self, conditioning, latent=None):
-        if latent is not None:
-            conditioning = node_helpers.conditioning_set_values(conditioning, {"reference_latents": [latent["samples"]]}, append=True)
-        return (conditioning, )
-
-
-NODE_CLASS_MAPPINGS = {
-    "ReferenceLatent": ReferenceLatent,
-}
--- a/comfy_extras/nodes_flux.py
+++ b/comfy_extras/nodes_flux.py
@@ -1,5 +1,4 @@
 import node_helpers
-import comfy.utils

 class CLIPTextEncodeFlux:
    @classmethod
@@ -57,52 +56,8 @@ class FluxDisableGuidance:
        return (c, )


-PREFERED_KONTEXT_RESOLUTIONS = [
-    (672, 1568),
-    (688, 1504),
-    (720, 1456),
-    (752, 1392),
-    (800, 1328),
-    (832, 1248),
-    (880, 1184),
-    (944, 1104),
-    (1024, 1024),
-    (1104, 944),
-    (1184, 880),
-    (1248, 832),
-    (1328, 800),
-    (1392, 752),
-    (1456, 720),
-    (1504, 688),
-    (1568, 672),
-]
-
-
-class FluxKontextImageScale:
-    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"image": ("IMAGE", ),
-                            },
-               }
-
-    RETURN_TYPES = ("IMAGE",)
-    FUNCTION = "scale"
-
-    CATEGORY = "advanced/conditioning/flux"
-    DESCRIPTION = "This node resizes the image to one that is more optimal for flux kontext."
-
-    def scale(self, image):
-        width = image.shape[2]
-        height = image.shape[1]
-        aspect_ratio = width / height
-        _, width, height = min((abs(aspect_ratio - w / h), w, h) for w, h in PREFERED_KONTEXT_RESOLUTIONS)
-        image = comfy.utils.common_upscale(image.movedim(-1, 1), width, height, "lanczos", "center").movedim(1, -1)
-        return (image, )
-
-
 NODE_CLASS_MAPPINGS = {
    "CLIPTextEncodeFlux": CLIPTextEncodeFlux,
    "FluxGuidance": FluxGuidance,
    "FluxDisableGuidance": FluxDisableGuidance,
-    "FluxKontextImageScale": FluxKontextImageScale,
 }
--- a/comfy_extras/nodes_images.py
+++ b/comfy_extras/nodes_images.py
@@ -304,23 +304,10 @@ Optional spacing can be added between images.
                image2.movedim(-1, 1), target_w, target_h, "lanczos", "disabled"
            ).movedim(1, -1)

-        color_map = {
-            "white": 1.0,
-            "black": 0.0,
-            "red": (1.0, 0.0, 0.0),
-            "green": (0.0, 1.0, 0.0),
-            "blue": (0.0, 0.0, 1.0),
-        }
-
-        color_val = color_map[spacing_color]
-
        # When not matching sizes, pad to align non-concat dimensions
        if not match_image_size:
            h1, w1 = image1.shape[1:3]
            h2, w2 = image2.shape[1:3]
-            pad_value = 0.0
-            if not isinstance(color_val, tuple):
-                pad_value = color_val

            if direction in ["left", "right"]:
                # For horizontal concat, pad heights to match
@@ -329,11 +316,11 @@ Optional spacing can be added between images.
                    if h1 < target_h:
                        pad_h = target_h - h1
                        pad_top, pad_bottom = pad_h // 2, pad_h - pad_h // 2
-                        image1 = torch.nn.functional.pad(image1, (0, 0, 0, 0, pad_top, pad_bottom), mode='constant', value=pad_value)
+                        image1 = torch.nn.functional.pad(image1, (0, 0, 0, 0, pad_top, pad_bottom), mode='constant', value=0.0)
                    if h2 < target_h:
                        pad_h = target_h - h2
                        pad_top, pad_bottom = pad_h // 2, pad_h - pad_h // 2
-                        image2 = torch.nn.functional.pad(image2, (0, 0, 0, 0, pad_top, pad_bottom), mode='constant', value=pad_value)
+                        image2 = torch.nn.functional.pad(image2, (0, 0, 0, 0, pad_top, pad_bottom), mode='constant', value=0.0)
            else:  # up, down
                # For vertical concat, pad widths to match
                if w1 != w2:
@@ -341,11 +328,11 @@ Optional spacing can be added between images.
                    if w1 < target_w:
                        pad_w = target_w - w1
                        pad_left, pad_right = pad_w // 2, pad_w - pad_w // 2
-                        image1 = torch.nn.functional.pad(image1, (0, 0, pad_left, pad_right), mode='constant', value=pad_value)
+                        image1 = torch.nn.functional.pad(image1, (0, 0, pad_left, pad_right), mode='constant', value=0.0)
                    if w2 < target_w:
                        pad_w = target_w - w2
                        pad_left, pad_right = pad_w // 2, pad_w - pad_w // 2
-                        image2 = torch.nn.functional.pad(image2, (0, 0, pad_left, pad_right), mode='constant', value=pad_value)
+                        image2 = torch.nn.functional.pad(image2, (0, 0, pad_left, pad_right), mode='constant', value=0.0)

        # Ensure same number of channels
        if image1.shape[-1] != image2.shape[-1]:
@@ -379,6 +366,15 @@ Optional spacing can be added between images.
        if spacing_width > 0:
            spacing_width = spacing_width + (spacing_width % 2)  # Ensure even

+            color_map = {
+                "white": 1.0,
+                "black": 0.0,
+                "red": (1.0, 0.0, 0.0),
+                "green": (0.0, 1.0, 0.0),
+                "blue": (0.0, 0.0, 1.0),
+            }
+            color_val = color_map[spacing_color]
+
            if direction in ["left", "right"]:
                spacing_shape = (
                    image1.shape[0],
@@ -414,62 +410,6 @@ Optional spacing can be added between images.
        concat_dim = 2 if direction in ["left", "right"] else 1
        return (torch.cat(images, dim=concat_dim),)

-class ResizeAndPadImage:
-    @classmethod
-    def INPUT_TYPES(cls):
-        return {
-            "required": {
-                "image": ("IMAGE",),
-                "target_width": ("INT", {
-                    "default": 512,
-                    "min": 1,
-                    "max": MAX_RESOLUTION,
-                    "step": 1
-                }),
-                "target_height": ("INT", {
-                    "default": 512,
-                    "min": 1,
-                    "max": MAX_RESOLUTION,
-                    "step": 1
-                }),
-                "padding_color": (["white", "black"],),
-                "interpolation": (["area", "bicubic", "nearest-exact", "bilinear", "lanczos"],),
-            }
-        }
-
-    RETURN_TYPES = ("IMAGE",)
-    FUNCTION = "resize_and_pad"
-    CATEGORY = "image/transform"
-
-    def resize_and_pad(self, image, target_width, target_height, padding_color, interpolation):
-        batch_size, orig_height, orig_width, channels = image.shape
-
-        scale_w = target_width / orig_width
-        scale_h = target_height / orig_height
-        scale = min(scale_w, scale_h)
-
-        new_width = int(orig_width * scale)
-        new_height = int(orig_height * scale)
-
-        image_permuted = image.permute(0, 3, 1, 2)
-
-        resized = comfy.utils.common_upscale(image_permuted, new_width, new_height, interpolation, "disabled")
-
-        pad_value = 0.0 if padding_color == "black" else 1.0
-        padded = torch.full(
-            (batch_size, channels, target_height, target_width),
-            pad_value,
-            dtype=image.dtype,
-            device=image.device
-        )
-
-        y_offset = (target_height - new_height) // 2
-        x_offset = (target_width - new_width) // 2
-
-        padded[:, :, y_offset:y_offset + new_height, x_offset:x_offset + new_width] = resized
-
-        output = padded.permute(0, 2, 3, 1)
-        return (output,)

 class SaveSVGNode:
    """
@@ -592,6 +532,5 @@ NODE_CLASS_MAPPINGS = {
    "SaveAnimatedPNG": SaveAnimatedPNG,
    "SaveSVGNode": SaveSVGNode,
    "ImageStitch": ImageStitch,
-    "ResizeAndPadImage": ResizeAndPadImage,
    "GetImageSize": GetImageSize,
 }
--- a/comfy_extras/nodes_model_merging_model_specific.py
+++ b/comfy_extras/nodes_model_merging_model_specific.py
@@ -268,52 +268,6 @@ class ModelMergeWAN2_1(comfy_extras.nodes_model_merging.ModelMergeBlocks):

        return {"required": arg_dict}

-class ModelMergeCosmosPredict2_2B(comfy_extras.nodes_model_merging.ModelMergeBlocks):
-    CATEGORY = "advanced/model_merging/model_specific"
-
-    @classmethod
-    def INPUT_TYPES(s):
-        arg_dict = { "model1": ("MODEL",),
-                              "model2": ("MODEL",)}
-
-        argument = ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01})
-
-        arg_dict["pos_embedder."] = argument
-        arg_dict["x_embedder."] = argument
-        arg_dict["t_embedder."] = argument
-        arg_dict["t_embedding_norm."] = argument
-
-
-        for i in range(28):
-            arg_dict["blocks.{}.".format(i)] = argument
-
-        arg_dict["final_layer."] = argument
-
-        return {"required": arg_dict}
-
-class ModelMergeCosmosPredict2_14B(comfy_extras.nodes_model_merging.ModelMergeBlocks):
-    CATEGORY = "advanced/model_merging/model_specific"
-
-    @classmethod
-    def INPUT_TYPES(s):
-        arg_dict = { "model1": ("MODEL",),
-                              "model2": ("MODEL",)}
-
-        argument = ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01})
-
-        arg_dict["pos_embedder."] = argument
-        arg_dict["x_embedder."] = argument
-        arg_dict["t_embedder."] = argument
-        arg_dict["t_embedding_norm."] = argument
-
-
-        for i in range(36):
-            arg_dict["blocks.{}.".format(i)] = argument
-
-        arg_dict["final_layer."] = argument
-
-        return {"required": arg_dict}
-
 NODE_CLASS_MAPPINGS = {
    "ModelMergeSD1": ModelMergeSD1,
    "ModelMergeSD2": ModelMergeSD1, #SD1 and SD2 have the same blocks
@@ -327,6 +281,4 @@ NODE_CLASS_MAPPINGS = {
    "ModelMergeCosmos7B": ModelMergeCosmos7B,
    "ModelMergeCosmos14B": ModelMergeCosmos14B,
    "ModelMergeWAN2_1": ModelMergeWAN2_1,
-    "ModelMergeCosmosPredict2_2B": ModelMergeCosmosPredict2_2B,
-    "ModelMergeCosmosPredict2_14B": ModelMergeCosmosPredict2_14B,
 }
--- a/comfy_extras/nodes_train.py
+++ b/comfy_extras/nodes_train.py
@@ -552,9 +552,6 @@ class LoraModelLoader:


 class SaveLoRA:
-    def __init__(self):
-        self.output_dir = folder_paths.get_output_directory()
-
    @classmethod
    def INPUT_TYPES(s):
        return {
@@ -568,7 +565,7 @@ class SaveLoRA:
                "prefix": (
                    "STRING",
                    {
-                        "default": "loras/ComfyUI_trained_lora",
+                        "default": "trained_lora",
                        "tooltip": "The prefix to use for the saved LoRA file.",
                    },
                ),
@@ -591,13 +588,12 @@ class SaveLoRA:
    OUTPUT_NODE = True

    def save(self, lora, prefix, steps=None):
-        full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(prefix, self.output_dir)
+        date = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        if steps is None:
-            output_checkpoint = f"{filename}_{counter:05}_.safetensors"
+            output_file = f"models/loras/{prefix}_{date}_lora.safetensors"
        else:
-            output_checkpoint = f"{filename}_{steps}_steps_{counter:05}_.safetensors"
-        output_checkpoint = os.path.join(full_output_folder, output_checkpoint)
-        safetensors.torch.save_file(lora, output_checkpoint)
+            output_file = f"models/loras/{prefix}_{steps}_steps_{date}_lora.safetensors"
+        safetensors.torch.save_file(lora, output_file)
        return {}


--- a/comfyui_version.py
+++ b/comfyui_version.py
@@ -1,3 +1,3 @@
 # This file is automatically generated by the build process when version is
 # updated in pyproject.toml.
-__version__ = "0.3.43"
+__version__ = "0.3.40"
--- a/execution.py
+++ b/execution.py
@@ -8,12 +8,14 @@ import time
 import traceback
 from enum import Enum
 from typing import List, Literal, NamedTuple, Optional
+import asyncio

 import torch

 import comfy.model_management
 import nodes
 from comfy_execution.caching import (
+    BasicCache,
    CacheKeySetID,
    CacheKeySetInputSignature,
    DependencyAwareCache,
@@ -28,6 +30,8 @@ from comfy_execution.graph import (
 )
 from comfy_execution.graph_utils import GraphBuilder, is_link
 from comfy_execution.validation import validate_node_input
+from comfy_execution.progress import get_progress_state, reset_progress_state, add_progress_handler, WebUIProgressHandler
+from comfy_execution.utils import CurrentNodeContext


 class ExecutionResult(Enum):
@@ -39,12 +43,13 @@ class DuplicateNodeError(Exception):
    pass

 class IsChangedCache:
-    def __init__(self, dynprompt, outputs_cache):
+    def __init__(self, prompt_id: str, dynprompt: DynamicPrompt, outputs_cache: BasicCache):
+        self.prompt_id = prompt_id
        self.dynprompt = dynprompt
        self.outputs_cache = outputs_cache
        self.is_changed = {}

-    def get(self, node_id):
+    async def get(self, node_id):
        if node_id in self.is_changed:
            return self.is_changed[node_id]

@@ -62,7 +67,8 @@ class IsChangedCache:
        # Intentionally do not use cached outputs here. We only want constants in IS_CHANGED
        input_data_all, _ = get_input_data(node["inputs"], class_def, node_id, None)
        try:
-            is_changed = _map_node_over_list(class_def, input_data_all, "IS_CHANGED")
+            is_changed = await _async_map_node_over_list(self.prompt_id, node_id, class_def, input_data_all, "IS_CHANGED")
+            is_changed = await resolve_map_node_over_list_results(is_changed)
            node["is_changed"] = [None if isinstance(x, ExecutionBlocker) else x for x in is_changed]
        except Exception as e:
            logging.warning("WARNING: {}".format(e))
@@ -164,7 +170,19 @@ def get_input_data(inputs, class_def, unique_id, outputs=None, dynprompt=None, e

 map_node_over_list = None #Don't hook this please

-def _map_node_over_list(obj, input_data_all, func, allow_interrupt=False, execution_block_cb=None, pre_execute_cb=None):
+async def resolve_map_node_over_list_results(results):
+    remaining = [x for x in results if isinstance(x, asyncio.Task) and not x.done()]
+    if len(remaining) == 0:
+        return [x.result() if isinstance(x, asyncio.Task) else x for x in results]
+    else:
+        done, pending = await asyncio.wait(remaining)
+        for task in done:
+            exc = task.exception()
+            if exc is not None:
+                raise exc
+        return [x.result() if isinstance(x, asyncio.Task) else x for x in results]
+
+async def _async_map_node_over_list(prompt_id, unique_id, obj, input_data_all, func, allow_interrupt=False, execution_block_cb=None, pre_execute_cb=None):
    # check if node wants the lists
    input_is_list = getattr(obj, "INPUT_IS_LIST", False)

@@ -178,7 +196,7 @@ def _map_node_over_list(obj, input_data_all, func, allow_interrupt=False, execut
        return {k: v[i if len(v) > i else -1] for k, v in d.items()}

    results = []
-    def process_inputs(inputs, index=None, input_is_list=False):
+    async def process_inputs(inputs, index=None, input_is_list=False):
        if allow_interrupt:
            nodes.before_node_execution()
        execution_block = None
@@ -194,20 +212,37 @@ def _map_node_over_list(obj, input_data_all, func, allow_interrupt=False, execut
        if execution_block is None:
            if pre_execute_cb is not None and index is not None:
                pre_execute_cb(index)
-            results.append(getattr(obj, func)(**inputs))
+            f = getattr(obj, func)
+            if inspect.iscoroutinefunction(f):
+                async def async_wrapper(f, prompt_id, unique_id, list_index, args):
+                    with CurrentNodeContext(prompt_id, unique_id, list_index):
+                        return await f(**args)
+                task = asyncio.create_task(async_wrapper(f, prompt_id, unique_id, index, args=inputs))
+                # Give the task a chance to execute without yielding
+                await asyncio.sleep(0)
+                if task.done():
+                    result = task.result()
+                    results.append(result)
+                else:
+                    results.append(task)
+            else:
+                with CurrentNodeContext(prompt_id, unique_id, index):
+                    result = f(**inputs)
+                results.append(result)
        else:
            results.append(execution_block)

    if input_is_list:
-        process_inputs(input_data_all, 0, input_is_list=input_is_list)
+        await process_inputs(input_data_all, 0, input_is_list=input_is_list)
    elif max_len_input == 0:
-        process_inputs({})
+        await process_inputs({})
    else:
        for i in range(max_len_input):
            input_dict = slice_dict(input_data_all, i)
-            process_inputs(input_dict, i)
+            await process_inputs(input_dict, i)
    return results

+
 def merge_result_data(results, obj):
    # check which outputs need concatenating
    output = []
@@ -229,11 +264,18 @@ def merge_result_data(results, obj):
            output.append([o[i] for o in results])
    return output

-def get_output_data(obj, input_data_all, execution_block_cb=None, pre_execute_cb=None):
+async def get_output_data(prompt_id, unique_id, obj, input_data_all, execution_block_cb=None, pre_execute_cb=None):
+    return_values = await _async_map_node_over_list(prompt_id, unique_id, obj, input_data_all, obj.FUNCTION, allow_interrupt=True, execution_block_cb=execution_block_cb, pre_execute_cb=pre_execute_cb)
+    has_pending_task = any(isinstance(r, asyncio.Task) and not r.done() for r in return_values)
+    if has_pending_task:
+        return return_values, {}, False, has_pending_task
+    output, ui, has_subgraph = get_output_from_returns(return_values, obj)
+    return output, ui, has_subgraph, False
+
+def get_output_from_returns(return_values, obj):
    results = []
    uis = []
    subgraph_results = []
-    return_values = _map_node_over_list(obj, input_data_all, obj.FUNCTION, allow_interrupt=True, execution_block_cb=execution_block_cb, pre_execute_cb=pre_execute_cb)
    has_subgraph = False
    for i in range(len(return_values)):
        r = return_values[i]
@@ -267,6 +309,10 @@ def get_output_data(obj, input_data_all, execution_block_cb=None, pre_execute_cb
    else:
        output = []
    ui = dict()
+    # TODO: Think there's an existing bug here
+    # If we're performing a subgraph expansion, we probably shouldn't be returning UI values yet.
+    # They'll get cached without the completed subgraphs. It's an edge case and I'm not aware of
+    # any nodes that use both subgraph expansion and custom UI outputs, but might be a problem in the future.
    if len(uis) > 0:
        ui = {k: [y for x in uis for y in x[k]] for k in uis[0].keys()}
    return output, ui, has_subgraph
@@ -279,7 +325,7 @@ def format_value(x):
    else:
        return str(x)

-def execute(server, dynprompt, caches, current_item, extra_data, executed, prompt_id, execution_list, pending_subgraph_results):
+async def execute(server, dynprompt, caches, current_item, extra_data, executed, prompt_id, execution_list, pending_subgraph_results, pending_async_nodes):
    unique_id = current_item
    real_node_id = dynprompt.get_real_node_id(unique_id)
    display_node_id = dynprompt.get_display_node_id(unique_id)
@@ -291,11 +337,16 @@ def execute(server, dynprompt, caches, current_item, extra_data, executed, promp
        if server.client_id is not None:
            cached_output = caches.ui.get(unique_id) or {}
            server.send_sync("executed", { "node": unique_id, "display_node": display_node_id, "output": cached_output.get("output",None), "prompt_id": prompt_id }, server.client_id)
+        get_progress_state().finish_progress(unique_id)
        return (ExecutionResult.SUCCESS, None, None)

    input_data_all = None
    try:
-        if unique_id in pending_subgraph_results:
+        if unique_id in pending_async_nodes:
+            results = [r.result() if isinstance(r, asyncio.Task) else r for r in pending_async_nodes[unique_id]]
+            del pending_async_nodes[unique_id]
+            output_data, output_ui, has_subgraph = get_output_from_returns(results, class_def)
+        elif unique_id in pending_subgraph_results:
            cached_results = pending_subgraph_results[unique_id]
            resolved_outputs = []
            for is_subgraph, result in cached_results:
@@ -317,6 +368,7 @@ def execute(server, dynprompt, caches, current_item, extra_data, executed, promp
            output_ui = []
            has_subgraph = False
        else:
+            get_progress_state().start_progress(unique_id)
            input_data_all, missing_keys = get_input_data(inputs, class_def, unique_id, caches.outputs, dynprompt, extra_data)
            if server.client_id is not None:
                server.last_node_id = display_node_id
@@ -328,7 +380,8 @@ def execute(server, dynprompt, caches, current_item, extra_data, executed, promp
                caches.objects.set(unique_id, obj)

            if hasattr(obj, "check_lazy_status"):
-                required_inputs = _map_node_over_list(obj, input_data_all, "check_lazy_status", allow_interrupt=True)
+                required_inputs = await _async_map_node_over_list(prompt_id, unique_id, obj, input_data_all, "check_lazy_status", allow_interrupt=True)
+                required_inputs = await resolve_map_node_over_list_results(required_inputs)
                required_inputs = set(sum([r for r in required_inputs if isinstance(r,list)], []))
                required_inputs = [x for x in required_inputs if isinstance(x,str) and (
                    x not in input_data_all or x in missing_keys
@@ -357,8 +410,18 @@ def execute(server, dynprompt, caches, current_item, extra_data, executed, promp
                else:
                    return block
            def pre_execute_cb(call_index):
+                # TODO - How to handle this with async functions without contextvars (which requires Python 3.12)?
                GraphBuilder.set_default_prefix(unique_id, call_index, 0)
-            output_data, output_ui, has_subgraph = get_output_data(obj, input_data_all, execution_block_cb=execution_block_cb, pre_execute_cb=pre_execute_cb)
+            output_data, output_ui, has_subgraph, has_pending_tasks = await get_output_data(prompt_id, unique_id, obj, input_data_all, execution_block_cb=execution_block_cb, pre_execute_cb=pre_execute_cb)
+            if has_pending_tasks:
+                pending_async_nodes[unique_id] = output_data
+                unblock = execution_list.add_external_block(unique_id)
+                async def await_completion():
+                    tasks = [x for x in output_data if isinstance(x, asyncio.Task)]
+                    await asyncio.gather(*tasks)
+                    unblock()
+                asyncio.create_task(await_completion())
+                return (ExecutionResult.PENDING, None, None)
        if len(output_ui) > 0:
            caches.ui.set(unique_id, {
                "meta": {
@@ -401,7 +464,8 @@ def execute(server, dynprompt, caches, current_item, extra_data, executed, promp
                    cached_outputs.append((True, node_outputs))
            new_node_ids = set(new_node_ids)
            for cache in caches.all:
-                cache.ensure_subcache_for(unique_id, new_node_ids).clean_unused()
+                subcache = await cache.ensure_subcache_for(unique_id, new_node_ids)
+                subcache.clean_unused()
            for node_id in new_output_ids:
                execution_list.add_node(node_id)
            for link in new_output_links:
@@ -429,23 +493,21 @@ def execute(server, dynprompt, caches, current_item, extra_data, executed, promp

        logging.error(f"!!! Exception during processing !!! {ex}")
        logging.error(traceback.format_exc())
-        tips = ""
-
-        if isinstance(ex, comfy.model_management.OOM_EXCEPTION):
-            tips = "This error means you ran out of memory on your GPU.\n\nTIPS: If the workflow worked before you might have accidentally set the batch_size to a large number."
-            logging.error("Got an OOM, unloading all loaded models.")
-            comfy.model_management.unload_all_models()

        error_details = {
            "node_id": real_node_id,
-            "exception_message": "{}\n{}".format(ex, tips),
+            "exception_message": str(ex),
            "exception_type": exception_type,
            "traceback": traceback.format_tb(tb),
            "current_inputs": input_data_formatted
        }
+        if isinstance(ex, comfy.model_management.OOM_EXCEPTION):
+            logging.error("Got an OOM, unloading all loaded models.")
+            comfy.model_management.unload_all_models()

        return (ExecutionResult.FAILURE, error_details, ex)

+    get_progress_state().finish_progress(unique_id)
    executed.add(unique_id)

    return (ExecutionResult.SUCCESS, None, None)
@@ -500,6 +562,11 @@ class PromptExecutor:
            self.add_message("execution_error", mes, broadcast=False)

    def execute(self, prompt, prompt_id, extra_data={}, execute_outputs=[]):
+        asyncio_loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(asyncio_loop)
+        asyncio.run(self.execute_async(prompt, prompt_id, extra_data, execute_outputs))
+
+    async def execute_async(self, prompt, prompt_id, extra_data={}, execute_outputs=[]):
        nodes.interrupt_processing(False)

        if "client_id" in extra_data:
@@ -512,9 +579,11 @@ class PromptExecutor:

        with torch.inference_mode():
            dynamic_prompt = DynamicPrompt(prompt)
-            is_changed_cache = IsChangedCache(dynamic_prompt, self.caches.outputs)
+            reset_progress_state(prompt_id, dynamic_prompt)
+            add_progress_handler(WebUIProgressHandler(self.server))
+            is_changed_cache = IsChangedCache(prompt_id, dynamic_prompt, self.caches.outputs)
            for cache in self.caches.all:
-                cache.set_prompt(dynamic_prompt, prompt.keys(), is_changed_cache)
+                await cache.set_prompt(dynamic_prompt, prompt.keys(), is_changed_cache)
                cache.clean_unused()

            cached_nodes = []
@@ -527,6 +596,7 @@ class PromptExecutor:
                          { "nodes": cached_nodes, "prompt_id": prompt_id},
                          broadcast=False)
            pending_subgraph_results = {}
+            pending_async_nodes = {} # TODO - Unify this with pending_subgraph_results
            executed = set()
            execution_list = ExecutionList(dynamic_prompt, self.caches.outputs)
            current_outputs = self.caches.outputs.all_node_ids()
@@ -534,12 +604,13 @@ class PromptExecutor:
                execution_list.add_node(node_id)

            while not execution_list.is_empty():
-                node_id, error, ex = execution_list.stage_node_execution()
+                node_id, error, ex = await execution_list.stage_node_execution()
                if error is not None:
                    self.handle_execution_error(prompt_id, dynamic_prompt.original_prompt, current_outputs, executed, error, ex)
                    break

-                result, error, ex = execute(self.server, dynamic_prompt, self.caches, node_id, extra_data, executed, prompt_id, execution_list, pending_subgraph_results)
+                assert node_id is not None, "Node ID should not be None at this point"
+                result, error, ex = await execute(self.server, dynamic_prompt, self.caches, node_id, extra_data, executed, prompt_id, execution_list, pending_subgraph_results, pending_async_nodes)
                self.success = result != ExecutionResult.FAILURE
                if result == ExecutionResult.FAILURE:
                    self.handle_execution_error(prompt_id, dynamic_prompt.original_prompt, current_outputs, executed, error, ex)
@@ -569,7 +640,7 @@ class PromptExecutor:
                comfy.model_management.unload_all_models()


-def validate_inputs(prompt, item, validated):
+async def validate_inputs(prompt_id, prompt, item, validated):
    unique_id = item
    if unique_id in validated:
        return validated[unique_id]
@@ -646,7 +717,7 @@ def validate_inputs(prompt, item, validated):
                errors.append(error)
                continue
            try:
-                r = validate_inputs(prompt, o_id, validated)
+                r = await validate_inputs(prompt_id, prompt, o_id, validated)
                if r[0] is False:
                    # `r` will be set in `validated[o_id]` already
                    valid = False
@@ -771,7 +842,8 @@ def validate_inputs(prompt, item, validated):
            input_filtered['input_types'] = [received_types]

        #ret = obj_class.VALIDATE_INPUTS(**input_filtered)
-        ret = _map_node_over_list(obj_class, input_filtered, "VALIDATE_INPUTS")
+        ret = await _async_map_node_over_list(prompt_id, unique_id, obj_class, input_filtered, "VALIDATE_INPUTS")
+        ret = await resolve_map_node_over_list_results(ret)
        for x in input_filtered:
            for i, r in enumerate(ret):
                if r is not True and not isinstance(r, ExecutionBlocker):
@@ -804,7 +876,7 @@ def full_type_name(klass):
        return klass.__qualname__
    return module + '.' + klass.__qualname__

-def validate_prompt(prompt):
+async def validate_prompt(prompt_id, prompt):
    outputs = set()
    for x in prompt:
        if 'class_type' not in prompt[x]:
@@ -847,7 +919,7 @@ def validate_prompt(prompt):
        valid = False
        reasons = []
        try:
-            m = validate_inputs(prompt, o, validated)
+            m = await validate_inputs(prompt_id, prompt, o, validated)
            valid = m[0]
            reasons = m[1]
        except Exception as ex:
--- a/main.py
+++ b/main.py
@@ -11,6 +11,8 @@ import itertools
 import utils.extra_config
 import logging
 import sys
+from comfy_execution.progress import get_progress_state
+from comfy_execution.utils import get_executing_context

 if __name__ == "__main__":
    #NOTE: These do not do anything on core ComfyUI, they are for custom nodes.
@@ -128,7 +130,7 @@ import comfy.utils

 import execution
 import server
-from server import BinaryEventTypes
+from protocol import BinaryEventTypes
 import nodes
 import comfy.model_management
 import comfyui_version
@@ -185,13 +187,7 @@ def prompt_worker(q, server_instance):

            current_time = time.perf_counter()
            execution_time = current_time - execution_start_time
-
-            # Log Time in a more readable way after 10 minutes
-            if execution_time > 600:
-                execution_time = time.strftime("%H:%M:%S", time.gmtime(execution_time))
-                logging.info(f"Prompt executed in {execution_time}")
-            else:
-                logging.info("Prompt executed in {:.2f} seconds".format(execution_time))
+            logging.info("Prompt executed in {:.2f} seconds".format(execution_time))

        flags = q.get_flags()
        free_memory = flags.get("free_memory", False)
@@ -224,14 +220,25 @@ async def run(server_instance, address='', port=8188, verbose=True, call_on_star
        server_instance.start_multi_address(addresses, call_on_start, verbose), server_instance.publish_loop()
    )

-
 def hijack_progress(server_instance):
-    def hook(value, total, preview_image):
+    def hook(value, total, preview_image, prompt_id=None, node_id=None):
+        executing_context = get_executing_context()
+        if prompt_id is None and executing_context is not None:
+            prompt_id = executing_context.prompt_id
+        if node_id is None and executing_context is not None:
+            node_id = executing_context.node_id
        comfy.model_management.throw_exception_if_processing_interrupted()
-        progress = {"value": value, "max": total, "prompt_id": server_instance.last_prompt_id, "node": server_instance.last_node_id}
+        if prompt_id is None:
+            prompt_id = server_instance.last_prompt_id
+        if node_id is None:
+            node_id = server_instance.last_node_id
+        progress = {"value": value, "max": total, "prompt_id": prompt_id, "node": node_id}
+        get_progress_state().update_progress(node_id, value, total, preview_image)

        server_instance.send_sync("progress", progress, server_instance.client_id)
        if preview_image is not None:
+            # Also send old method for backward compatibility
+            # TODO - Remove after this repo is updated to frontend with metadata support
            server_instance.send_sync(BinaryEventTypes.UNENCODED_PREVIEW_IMAGE, preview_image, server_instance.client_id)

    comfy.utils.set_progress_bar_global_hook(hook)
--- a/nodes.py
+++ b/nodes.py
@@ -920,7 +920,7 @@ class CLIPLoader:
    @classmethod
    def INPUT_TYPES(s):
        return {"required": { "clip_name": (folder_paths.get_filename_list("text_encoders"), ),
-                              "type": (["stable_diffusion", "stable_cascade", "sd3", "stable_audio", "mochi", "ltxv", "pixart", "cosmos", "lumina2", "wan", "hidream", "chroma", "ace", "omnigen2"], ),
+                              "type": (["stable_diffusion", "stable_cascade", "sd3", "stable_audio", "mochi", "ltxv", "pixart", "cosmos", "lumina2", "wan", "hidream", "chroma", "ace"], ),
                              },
                "optional": {
                              "device": (["default", "cpu"], {"advanced": True}),
@@ -930,7 +930,7 @@ class CLIPLoader:

    CATEGORY = "advanced/loaders"

-    DESCRIPTION = "[Recipes]\n\nstable_diffusion: clip-l\nstable_cascade: clip-g\nsd3: t5 xxl/ clip-g / clip-l\nstable_audio: t5 base\nmochi: t5 xxl\ncosmos: old t5 xxl\nlumina2: gemma 2 2B\nwan: umt5 xxl\n hidream: llama-3.1 (Recommend) or t5\nomnigen2: qwen vl 2.5 3B"
+    DESCRIPTION = "[Recipes]\n\nstable_diffusion: clip-l\nstable_cascade: clip-g\nsd3: t5 xxl/ clip-g / clip-l\nstable_audio: t5 base\nmochi: t5 xxl\ncosmos: old t5 xxl\nlumina2: gemma 2 2B\nwan: umt5 xxl\n hidream: llama-3.1 (Recommend) or t5"

    def load_clip(self, clip_name, type="stable_diffusion", device="default"):
        clip_type = getattr(comfy.sd.CLIPType, type.upper(), comfy.sd.CLIPType.STABLE_DIFFUSION)
@@ -2279,7 +2279,6 @@ def init_builtin_extra_nodes():
        "nodes_ace.py",
        "nodes_string.py",
        "nodes_camera_trajectory.py",
-        "nodes_edit_model.py",
    ]

    import_failed = []
--- a/protocol.py
+++ b/protocol.py
@@ -0,0 +1,7 @@
+
+class BinaryEventTypes:
+    PREVIEW_IMAGE = 1
+    UNENCODED_PREVIEW_IMAGE = 2
+    TEXT = 3
+    PREVIEW_IMAGE_WITH_METADATA = 4
+
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "ComfyUI"
-version = "0.3.43"
+version = "0.3.40"
 readme = "README.md"
 license = { file = "LICENSE" }
 requires-python = ">=3.9"
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,13 +1,13 @@
-comfyui-frontend-package==1.23.4
-comfyui-workflow-templates==0.1.30
-comfyui-embedded-docs==0.2.3
+comfyui-frontend-package==1.21.7
+comfyui-workflow-templates==0.1.28
+comfyui-embedded-docs==0.2.2
 torch
 torchsde
 torchvision
 torchaudio
 numpy>=1.25.0
 einops
-transformers>=4.37.2
+transformers>=4.28.1
 tokenizers>=0.13.3
 sentencepiece
 safetensors>=0.4.2
--- a/server.py
+++ b/server.py
@@ -35,11 +35,7 @@ from app.model_manager import ModelFileManager
 from app.custom_node_manager import CustomNodeManager
 from typing import Optional, Union
 from api_server.routes.internal.internal_routes import InternalRoutes
-
-class BinaryEventTypes:
-    PREVIEW_IMAGE = 1
-    UNENCODED_PREVIEW_IMAGE = 2
-    TEXT = 3
+from protocol import BinaryEventTypes

 async def send_socket_catch_exception(function, message):
    try:
@@ -643,7 +639,8 @@ class PromptServer():

            if "prompt" in json_data:
                prompt = json_data["prompt"]
-                valid = execution.validate_prompt(prompt)
+                prompt_id = str(uuid.uuid4())
+                valid = await execution.validate_prompt(prompt_id, prompt)
                extra_data = {}
                if "extra_data" in json_data:
                    extra_data = json_data["extra_data"]
@@ -651,7 +648,6 @@ class PromptServer():
                if "client_id" in json_data:
                    extra_data["client_id"] = json_data["client_id"]
                if valid[0]:
-                    prompt_id = str(uuid.uuid4())
                    outputs_to_execute = valid[2]
                    self.prompt_queue.put((number, prompt_id, prompt, extra_data, outputs_to_execute))
                    response = {"prompt_id": prompt_id, "number": number, "node_errors": valid[3]}
@@ -766,6 +762,10 @@ class PromptServer():
    async def send(self, event, data, sid=None):
        if event == BinaryEventTypes.UNENCODED_PREVIEW_IMAGE:
            await self.send_image(data, sid=sid)
+        elif event == BinaryEventTypes.PREVIEW_IMAGE_WITH_METADATA:
+            # data is (preview_image, metadata)
+            preview_image, metadata = data
+            await self.send_image_with_metadata(preview_image, metadata, sid=sid)
        elif isinstance(data, (bytes, bytearray)):
            await self.send_bytes(event, data, sid)
        else:
@@ -804,6 +804,43 @@ class PromptServer():
        preview_bytes = bytesIO.getvalue()
        await self.send_bytes(BinaryEventTypes.PREVIEW_IMAGE, preview_bytes, sid=sid)

+    async def send_image_with_metadata(self, image_data, metadata=None, sid=None):
+        image_type = image_data[0]
+        image = image_data[1]
+        max_size = image_data[2]
+        if max_size is not None:
+            if hasattr(Image, 'Resampling'):
+                resampling = Image.Resampling.BILINEAR
+            else:
+                resampling = Image.Resampling.LANCZOS
+
+            image = ImageOps.contain(image, (max_size, max_size), resampling)
+
+        mimetype = "image/png" if image_type == "PNG" else "image/jpeg"
+
+        # Prepare metadata
+        if metadata is None:
+            metadata = {}
+        metadata["image_type"] = mimetype
+
+        # Serialize metadata as JSON
+        import json
+        metadata_json = json.dumps(metadata).encode('utf-8')
+        metadata_length = len(metadata_json)
+
+        # Prepare image data
+        bytesIO = BytesIO()
+        image.save(bytesIO, format=image_type, quality=95, compress_level=1)
+        image_bytes = bytesIO.getvalue()
+
+        # Combine metadata and image
+        combined_data = bytearray()
+        combined_data.extend(struct.pack(">I", metadata_length))
+        combined_data.extend(metadata_json)
+        combined_data.extend(image_bytes)
+
+        await self.send_bytes(BinaryEventTypes.PREVIEW_IMAGE_WITH_METADATA, combined_data, sid=sid)
+
    async def send_bytes(self, event, data, sid=None):
        message = self.encode_bytes(event, data)

--- a/tests-unit/requirements.txt
+++ b/tests-unit/requirements.txt
@@ -1,3 +1,4 @@
 pytest>=7.8.0
 pytest-aiohttp
 pytest-asyncio
+websocket-client
--- a/tests/inference/extra_model_paths.yaml
+++ b/tests/inference/extra_model_paths.yaml
@@ -1,4 +1,4 @@
 # Config for testing nodes
 testing:
-    custom_nodes: tests/inference/testing_nodes
+    custom_nodes: testing_nodes

--- a/tests/inference/test_execution.py
+++ b/tests/inference/test_execution.py
@@ -252,7 +252,7 @@ class TestExecution:

    @pytest.mark.parametrize("test_type, test_value", [
        ("StubInt", 5),
-        ("StubFloat", 5.0)
+        ("StubMask", 5.0)
    ])
    def test_validation_error_edge1(self, test_type, test_value, client: ComfyClient, builder: GraphBuilder):
        g = builder
@@ -497,6 +497,69 @@ class TestExecution:
        assert numpy.array(images[0]).min() == 63 and numpy.array(images[0]).max() == 63, "Image should have value 0.25"
        assert not result.did_run(test_node), "The execution should have been cached"

+    def test_parallel_sleep_nodes(self, client: ComfyClient, builder: GraphBuilder):
+        g = builder
+        image = g.node("StubImage", content="BLACK", height=512, width=512, batch_size=1)
+
+        # Create sleep nodes for each duration
+        sleep_node1 = g.node("TestSleep", value=image.out(0), seconds=2.8)
+        sleep_node2 = g.node("TestSleep", value=image.out(0), seconds=2.9)
+        sleep_node3 = g.node("TestSleep", value=image.out(0), seconds=3.0)
+
+        # Add outputs to verify the execution
+        _output1 = g.node("PreviewImage", images=sleep_node1.out(0))
+        _output2 = g.node("PreviewImage", images=sleep_node2.out(0))
+        _output3 = g.node("PreviewImage", images=sleep_node3.out(0))
+
+        start_time = time.time()
+        result = client.run(g)
+        elapsed_time = time.time() - start_time
+
+        # The test should take around 0.4 seconds (the longest sleep duration)
+        # plus some overhead, but definitely less than the sum of all sleeps (0.9s)
+        # We'll allow for up to 0.8s total to account for overhead
+        assert elapsed_time < 4.0, f"Parallel execution took {elapsed_time}s, expected less than 0.8s"
+
+        # Verify that all nodes executed
+        assert result.did_run(sleep_node1), "Sleep node 1 should have run"
+        assert result.did_run(sleep_node2), "Sleep node 2 should have run"
+        assert result.did_run(sleep_node3), "Sleep node 3 should have run"
+
+    def test_parallel_sleep_expansion(self, client: ComfyClient, builder: GraphBuilder):
+        g = builder
+        # Create input images with different values
+        image1 = g.node("StubImage", content="BLACK", height=512, width=512, batch_size=1)
+        image2 = g.node("StubImage", content="WHITE", height=512, width=512, batch_size=1)
+        image3 = g.node("StubImage", content="WHITE", height=512, width=512, batch_size=1)
+
+        # Create a TestParallelSleep node that expands into multiple TestSleep nodes
+        parallel_sleep = g.node("TestParallelSleep",
+                                image1=image1.out(0),
+                                image2=image2.out(0),
+                                image3=image3.out(0),
+                                sleep1=0.4,
+                                sleep2=0.5,
+                                sleep3=0.6)
+        output = g.node("SaveImage", images=parallel_sleep.out(0))
+
+        start_time = time.time()
+        result = client.run(g)
+        elapsed_time = time.time() - start_time
+
+        # Similar to the previous test, expect parallel execution of the sleep nodes
+        # which should complete in less than the sum of all sleeps
+        assert elapsed_time < 0.8, f"Expansion execution took {elapsed_time}s, expected less than 0.8s"
+
+        # Verify the parallel sleep node executed
+        assert result.did_run(parallel_sleep), "ParallelSleep node should have run"
+
+        # Verify we get an image as output (blend of the three input images)
+        result_images = result.get_images(output)
+        assert len(result_images) == 1, "Should have 1 image"
+        # Average pixel value should be around 170 (255 * 2 // 3)
+        avg_value = numpy.array(result_images[0]).mean()
+        assert avg_value == 170, f"Image average value {avg_value} should be 170"
+
    # This tests that nodes with OUTPUT_IS_LIST function correctly when they receive an ExecutionBlocker
    # as input. We also test that when that list (containing an ExecutionBlocker) is passed to a node,
    # only that one entry in the list is blocked.
--- a/tests/inference/testing_nodes/testing-pack/specific_tests.py
+++ b/tests/inference/testing_nodes/testing-pack/specific_tests.py
@@ -1,6 +1,11 @@
 import torch
+import time
+import asyncio
+from comfy.utils import ProgressBar
 from .tools import VariantSupport
 from comfy_execution.graph_utils import GraphBuilder
+from comfy.comfy_types.node_typing import ComfyNodeABC
+from comfy.comfy_types import IO

 class TestLazyMixImages:
    @classmethod
@@ -333,6 +338,131 @@ class TestMixedExpansionReturns:
                "expand": g.finalize(),
            }

+class TestSamplingInExpansion:
+    @classmethod
+    def INPUT_TYPES(cls):
+        return {
+            "required": {
+                "model": ("MODEL",),
+                "clip": ("CLIP",),
+                "vae": ("VAE",),
+                "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
+                "steps": ("INT", {"default": 20, "min": 1, "max": 100}),
+                "cfg": ("FLOAT", {"default": 7.0, "min": 0.0, "max": 30.0}),
+                "prompt": ("STRING", {"multiline": True, "default": "a beautiful landscape with mountains and trees"}),
+                "negative_prompt": ("STRING", {"multiline": True, "default": "blurry, bad quality, worst quality"}),
+            },
+        }
+
+    RETURN_TYPES = ("IMAGE",)
+    FUNCTION = "sampling_in_expansion"
+
+    CATEGORY = "Testing/Nodes"
+
+    def sampling_in_expansion(self, model, clip, vae, seed, steps, cfg, prompt, negative_prompt):
+        g = GraphBuilder()
+
+        # Create a basic image generation workflow using the input model, clip and vae
+        # 1. Setup text prompts using the provided CLIP model
+        positive_prompt = g.node("CLIPTextEncode",
+                               text=prompt,
+                               clip=clip)
+        negative_prompt = g.node("CLIPTextEncode",
+                                text=negative_prompt,
+                                clip=clip)
+
+        # 2. Create empty latent with specified size
+        empty_latent = g.node("EmptyLatentImage", width=512, height=512, batch_size=1)
+
+        # 3. Setup sampler and generate image latent
+        sampler = g.node("KSampler",
+                        model=model,
+                        positive=positive_prompt.out(0),
+                        negative=negative_prompt.out(0),
+                        latent_image=empty_latent.out(0),
+                        seed=seed,
+                        steps=steps,
+                        cfg=cfg,
+                        sampler_name="euler_ancestral",
+                        scheduler="normal")
+
+        # 4. Decode latent to image using VAE
+        output = g.node("VAEDecode", samples=sampler.out(0), vae=vae)
+
+        return {
+            "result": (output.out(0),),
+            "expand": g.finalize(),
+        }
+
+class TestSleep(ComfyNodeABC):
+    @classmethod
+    def INPUT_TYPES(cls):
+        return {
+            "required": {
+                "value": (IO.ANY, {}),
+                "seconds": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 9999.0, "step": 0.01, "tooltip": "The amount of seconds to sleep."}),
+            },
+            "hidden": {
+                "unique_id": "UNIQUE_ID",
+            },
+        }
+    RETURN_TYPES = (IO.ANY,)
+    FUNCTION = "sleep"
+
+    CATEGORY = "_for_testing"
+
+    async def sleep(self, value, seconds, unique_id):
+        pbar = ProgressBar(seconds, node_id=unique_id)
+        start = time.time()
+        expiration = start + seconds
+        now = start
+        while now < expiration:
+            now = time.time()
+            pbar.update_absolute(now - start)
+            await asyncio.sleep(0.01)
+        return (value,)
+
+class TestParallelSleep(ComfyNodeABC):
+    @classmethod
+    def INPUT_TYPES(cls):
+        return {
+            "required": {
+                "image1": ("IMAGE", ),
+                "image2": ("IMAGE", ),
+                "image3": ("IMAGE", ),
+                "sleep1": ("FLOAT", {"default": 0.5, "min": 0.0, "max": 10.0, "step": 0.01}),
+                "sleep2": ("FLOAT", {"default": 0.5, "min": 0.0, "max": 10.0, "step": 0.01}),
+                "sleep3": ("FLOAT", {"default": 0.5, "min": 0.0, "max": 10.0, "step": 0.01}),
+            },
+            "hidden": {
+                "unique_id": "UNIQUE_ID",
+            },
+        }
+    RETURN_TYPES = ("IMAGE",)
+    FUNCTION = "parallel_sleep"
+    CATEGORY = "_for_testing"
+    OUTPUT_NODE = True
+
+    def parallel_sleep(self, image1, image2, image3, sleep1, sleep2, sleep3, unique_id):
+        # Create a graph dynamically with three TestSleep nodes
+        g = GraphBuilder()
+
+        # Create sleep nodes for each duration and image
+        sleep_node1 = g.node("TestSleep", value=image1, seconds=sleep1)
+        sleep_node2 = g.node("TestSleep", value=image2, seconds=sleep2)
+        sleep_node3 = g.node("TestSleep", value=image3, seconds=sleep3)
+
+        # Blend the results using TestVariadicAverage
+        blend = g.node("TestVariadicAverage",
+                       input1=sleep_node1.out(0),
+                       input2=sleep_node2.out(0),
+                       input3=sleep_node3.out(0))
+
+        return {
+            "result": (blend.out(0),),
+            "expand": g.finalize(),
+        }
+
 TEST_NODE_CLASS_MAPPINGS = {
    "TestLazyMixImages": TestLazyMixImages,
    "TestVariadicAverage": TestVariadicAverage,
@@ -345,6 +475,9 @@ TEST_NODE_CLASS_MAPPINGS = {
    "TestCustomValidation5": TestCustomValidation5,
    "TestDynamicDependencyCycle": TestDynamicDependencyCycle,
    "TestMixedExpansionReturns": TestMixedExpansionReturns,
+    "TestSamplingInExpansion": TestSamplingInExpansion,
+    "TestSleep": TestSleep,
+    "TestParallelSleep": TestParallelSleep,
 }

 TEST_NODE_DISPLAY_NAME_MAPPINGS = {
@@ -359,4 +492,7 @@ TEST_NODE_DISPLAY_NAME_MAPPINGS = {
    "TestCustomValidation5": "Custom Validation 5",
    "TestDynamicDependencyCycle": "Dynamic Dependency Cycle",
    "TestMixedExpansionReturns": "Mixed Expansion Returns",
+    "TestSamplingInExpansion": "Sampling In Expansion",
+    "TestSleep": "Test Sleep",
+    "TestParallelSleep": "Test Parallel Sleep",
 }
Author	SHA1	Message	Date
Jacob Segal	1316e608c9	Add the websocket library for automated tests	2025-06-13 21:51:32 -07:00
Jacob Segal	8d28c17369	Add a missing file It looks like this got caught by .gitignore? There's probably a better place to put it, but I'm not sure what that is.	2025-06-13 21:45:21 -07:00
Jacob Segal	6df907c413	Add the execution model tests to CI	2025-06-13 21:39:26 -07:00
Jacob Segal	f1dc13037e	Support for async execution functions This commit adds support for node execution functions defined as async. When a node's execution function is defined as async, we can continue executing other nodes while it is processing. Standard uses of `await` should "just work", but people will still have to be careful if they spawn actual threads. Because torch doesn't really have async/await versions of functions, this won't particularly help with most locally-executing nodes, but it does work for e.g. web requests to other machines. In addition to the execute function, the `VALIDATE_INPUTS` and `check_lazy_status` functions can also be defined as async, though we'll only resolve one node at a time right now for those.	2025-06-13 21:39:26 -07:00