Merge branch 'master' into fix-context-window-slicing

Fast preview for hunyuan image. (#9814 )
Update template to 0.1.81 (#9811 )
2026-02-13 11:40:02 +00:00 · 2025-09-11 20:23:31 -07:00 · 2025-09-11 19:33:02 -04:00 · 2025-09-11 14:59:26 -04:00 · 2025-09-10 23:17:34 -04:00 · 2025-09-09 17:45:35 -07:00
5 changed files with 97 additions and 5 deletions
--- a/comfy/context_windows.py
+++ b/comfy/context_windows.py
@@ -146,11 +146,13 @@ class IndexListContextHandler(ContextHandlerABC):
                        # when in dictionary, look for tensors and CONDCrossAttn [comfy/conds.py] (has cond attr that is a tensor)
                        for cond_key, cond_value in new_cond_item.items():
                            if isinstance(cond_value, torch.Tensor):
-                                if cond_value.ndim < self.dim and cond_value.size(0) == x_in.size(self.dim):
+                                if (self.dim < cond_value.ndim and cond_value(self.dim) == x_in.size(self.dim)) or \
+                                   (cond_value.ndim < self.dim and cond_value.size(0) == x_in.size(self.dim)):
                                    new_cond_item[cond_key] = window.get_tensor(cond_value, device)
                            # if has cond that is a Tensor, check if needs to be subset
                            elif hasattr(cond_value, "cond") and isinstance(cond_value.cond, torch.Tensor):
-                                if cond_value.cond.ndim < self.dim and cond_value.cond.size(0) == x_in.size(self.dim):
+                                if  (self.dim < cond_value.cond.ndim and cond_value.cond.size(self.dim) == x_in.size(self.dim)) or \
+                                    (cond_value.cond.ndim < self.dim and cond_value.cond.size(0) == x_in.size(self.dim)):
                                    new_cond_item[cond_key] = cond_value._copy_with(window.get_tensor(cond_value.cond, device))
                            elif cond_key == "num_video_frames": # for SVD
                                new_cond_item[cond_key] = cond_value._copy_with(cond_value.cond)
--- a/comfy/latent_formats.py
+++ b/comfy/latent_formats.py
@@ -538,6 +538,74 @@ class HunyuanImage21(LatentFormat):
    latent_dimensions = 2
    scale_factor = 0.75289

+    latent_rgb_factors = [
+        [-0.0154, -0.0397, -0.0521],
+        [ 0.0005,  0.0093,  0.0006],
+        [-0.0805, -0.0773, -0.0586],
+        [-0.0494, -0.0487, -0.0498],
+        [-0.0212, -0.0076, -0.0261],
+        [-0.0179, -0.0417, -0.0505],
+        [ 0.0158,  0.0310,  0.0239],
+        [ 0.0409,  0.0516,  0.0201],
+        [ 0.0350,  0.0553,  0.0036],
+        [-0.0447, -0.0327, -0.0479],
+        [-0.0038, -0.0221, -0.0365],
+        [-0.0423, -0.0718, -0.0654],
+        [ 0.0039,  0.0368,  0.0104],
+        [ 0.0655,  0.0217,  0.0122],
+        [ 0.0490,  0.1638,  0.2053],
+        [ 0.0932,  0.0829,  0.0650],
+        [-0.0186, -0.0209, -0.0135],
+        [-0.0080, -0.0076, -0.0148],
+        [-0.0284, -0.0201,  0.0011],
+        [-0.0642, -0.0294, -0.0777],
+        [-0.0035,  0.0076, -0.0140],
+        [ 0.0519,  0.0731,  0.0887],
+        [-0.0102,  0.0095,  0.0704],
+        [ 0.0068,  0.0218, -0.0023],
+        [-0.0726, -0.0486, -0.0519],
+        [ 0.0260,  0.0295,  0.0263],
+        [ 0.0250,  0.0333,  0.0341],
+        [ 0.0168, -0.0120, -0.0174],
+        [ 0.0226,  0.1037,  0.0114],
+        [ 0.2577,  0.1906,  0.1604],
+        [-0.0646, -0.0137, -0.0018],
+        [-0.0112,  0.0309,  0.0358],
+        [-0.0347,  0.0146, -0.0481],
+        [ 0.0234,  0.0179,  0.0201],
+        [ 0.0157,  0.0313,  0.0225],
+        [ 0.0423,  0.0675,  0.0524],
+        [-0.0031,  0.0027, -0.0255],
+        [ 0.0447,  0.0555,  0.0330],
+        [-0.0152,  0.0103,  0.0299],
+        [-0.0755, -0.0489, -0.0635],
+        [ 0.0853,  0.0788,  0.1017],
+        [-0.0272, -0.0294, -0.0471],
+        [ 0.0440,  0.0400, -0.0137],
+        [ 0.0335,  0.0317, -0.0036],
+        [-0.0344, -0.0621, -0.0984],
+        [-0.0127, -0.0630, -0.0620],
+        [-0.0648,  0.0360,  0.0924],
+        [-0.0781, -0.0801, -0.0409],
+        [ 0.0363,  0.0613,  0.0499],
+        [ 0.0238,  0.0034,  0.0041],
+        [-0.0135,  0.0258,  0.0310],
+        [ 0.0614,  0.1086,  0.0589],
+        [ 0.0428,  0.0350,  0.0205],
+        [ 0.0153,  0.0173, -0.0018],
+        [-0.0288, -0.0455, -0.0091],
+        [ 0.0344,  0.0109, -0.0157],
+        [-0.0205, -0.0247, -0.0187],
+        [ 0.0487,  0.0126,  0.0064],
+        [-0.0220, -0.0013,  0.0074],
+        [-0.0203, -0.0094, -0.0048],
+        [-0.0719,  0.0429, -0.0442],
+        [ 0.1042,  0.0497,  0.0356],
+        [-0.0659, -0.0578, -0.0280],
+        [-0.0060, -0.0322, -0.0234]]
+
+    latent_rgb_factors_bias = [0.0007, -0.0256, -0.0206]
+
 class Hunyuan3Dv2(LatentFormat):
    latent_channels = 64
    latent_dimensions = 1
--- a/comfy/ldm/hunyuan_video/model.py
+++ b/comfy/ldm/hunyuan_video/model.py
@@ -41,6 +41,7 @@ class HunyuanVideoParams:
    qkv_bias: bool
    guidance_embed: bool
    byt5: bool
+    meanflow: bool


 class SelfAttentionRef(nn.Module):
@@ -256,6 +257,11 @@ class HunyuanVideo(nn.Module):
        else:
            self.byt5_in = None

+        if params.meanflow:
+            self.time_r_in = MLPEmbedder(in_dim=256, hidden_dim=self.hidden_size, dtype=dtype, device=device, operations=operations)
+        else:
+            self.time_r_in = None
+
        if final_layer:
            self.final_layer = LastLayer(self.hidden_size, self.patch_size[-1], self.out_channels, dtype=dtype, device=device, operations=operations)

@@ -282,6 +288,14 @@ class HunyuanVideo(nn.Module):
        img = self.img_in(img)
        vec = self.time_in(timestep_embedding(timesteps, 256, time_factor=1.0).to(img.dtype))

+        if self.time_r_in is not None:
+            w = torch.where(transformer_options['sigmas'][0] == transformer_options['sample_sigmas'])[0]  # This most likely could be improved
+            if len(w) > 0:
+                timesteps_r = transformer_options['sample_sigmas'][w[0] + 1]
+                timesteps_r = timesteps_r.unsqueeze(0).to(device=timesteps.device, dtype=timesteps.dtype)
+                vec_r = self.time_r_in(timestep_embedding(timesteps_r, 256, time_factor=1000.0).to(img.dtype))
+                vec = (vec + vec_r) / 2
+
        if ref_latent is not None:
            ref_latent_ids = self.img_ids(ref_latent)
            ref_latent = self.img_in(ref_latent)
--- a/comfy/model_detection.py
+++ b/comfy/model_detection.py
@@ -142,12 +142,20 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
        dit_config["in_channels"] = in_w.shape[1] #SkyReels img2video has 32 input channels
        dit_config["patch_size"] = list(in_w.shape[2:])
        dit_config["out_channels"] = out_w.shape[0] // math.prod(dit_config["patch_size"])
-        if '{}vector_in.in_layer.weight'.format(key_prefix) in state_dict:
+        if any(s.startswith('{}vector_in.'.format(key_prefix)) for s in state_dict_keys):
            dit_config["vec_in_dim"] = 768
-            dit_config["axes_dim"] = [16, 56, 56]
        else:
            dit_config["vec_in_dim"] = None
+
+        if len(dit_config["patch_size"]) == 2:
            dit_config["axes_dim"] = [64, 64]
+        else:
+            dit_config["axes_dim"] = [16, 56, 56]
+
+        if any(s.startswith('{}time_r_in.'.format(key_prefix)) for s in state_dict_keys):
+            dit_config["meanflow"] = True
+        else:
+            dit_config["meanflow"] = False

        dit_config["context_in_dim"] = state_dict['{}txt_in.input_embedder.weight'.format(key_prefix)].shape[1]
        dit_config["hidden_size"] = in_w.shape[0]
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 comfyui-frontend-package==1.25.11
-comfyui-workflow-templates==0.1.78
+comfyui-workflow-templates==0.1.81
 comfyui-embedded-docs==0.2.6
 torch
 torchsde
Author	SHA1	Message	Date
Jedrzej Kosinski	3b54b0256d	Merge branch 'master' into fix-context-window-slicing	2025-09-11 20:23:31 -07:00
comfyanonymous	18de0b2830	Fast preview for hunyuan image. (#9814 )	2025-09-11 19:33:02 -04:00
ComfyUI Wiki	df6850fae8	Update template to 0.1.81 (#9811 )	2025-09-11 14:59:26 -04:00
comfyanonymous	e01e99d075	Support hunyuan image distilled model. (#9807 )	2025-09-10 23:17:34 -04:00
Jedrzej Kosinski	2835f7f63e	Apply cond slice fix	2025-09-09 17:45:35 -07:00