Merge branch 'master' into fix-context-window-slicing

Fast preview for hunyuan image. (#9814 )
Update template to 0.1.81 (#9811 )
2026-02-15 20:50:01 +00:00 · 2025-09-11 20:23:31 -07:00 · 2025-09-11 19:33:02 -04:00 · 2025-09-11 14:59:26 -04:00 · 2025-09-10 23:17:34 -04:00 · 2025-09-10 17:25:41 -04:00
8 changed files with 306 additions and 8 deletions
--- a/comfy/context_windows.py
+++ b/comfy/context_windows.py
@@ -146,11 +146,13 @@ class IndexListContextHandler(ContextHandlerABC):
                        # when in dictionary, look for tensors and CONDCrossAttn [comfy/conds.py] (has cond attr that is a tensor)
                        for cond_key, cond_value in new_cond_item.items():
                            if isinstance(cond_value, torch.Tensor):
-                                if cond_value.ndim < self.dim and cond_value.size(0) == x_in.size(self.dim):
+                                if (self.dim < cond_value.ndim and cond_value(self.dim) == x_in.size(self.dim)) or \
+                                   (cond_value.ndim < self.dim and cond_value.size(0) == x_in.size(self.dim)):
                                    new_cond_item[cond_key] = window.get_tensor(cond_value, device)
                            # if has cond that is a Tensor, check if needs to be subset
                            elif hasattr(cond_value, "cond") and isinstance(cond_value.cond, torch.Tensor):
-                                if cond_value.cond.ndim < self.dim and cond_value.cond.size(0) == x_in.size(self.dim):
+                                if  (self.dim < cond_value.cond.ndim and cond_value.cond.size(self.dim) == x_in.size(self.dim)) or \
+                                    (cond_value.cond.ndim < self.dim and cond_value.cond.size(0) == x_in.size(self.dim)):
                                    new_cond_item[cond_key] = cond_value._copy_with(window.get_tensor(cond_value.cond, device))
                            elif cond_key == "num_video_frames": # for SVD
                                new_cond_item[cond_key] = cond_value._copy_with(cond_value.cond)
--- a/comfy/latent_formats.py
+++ b/comfy/latent_formats.py
@@ -538,6 +538,74 @@ class HunyuanImage21(LatentFormat):
    latent_dimensions = 2
    scale_factor = 0.75289

+    latent_rgb_factors = [
+        [-0.0154, -0.0397, -0.0521],
+        [ 0.0005,  0.0093,  0.0006],
+        [-0.0805, -0.0773, -0.0586],
+        [-0.0494, -0.0487, -0.0498],
+        [-0.0212, -0.0076, -0.0261],
+        [-0.0179, -0.0417, -0.0505],
+        [ 0.0158,  0.0310,  0.0239],
+        [ 0.0409,  0.0516,  0.0201],
+        [ 0.0350,  0.0553,  0.0036],
+        [-0.0447, -0.0327, -0.0479],
+        [-0.0038, -0.0221, -0.0365],
+        [-0.0423, -0.0718, -0.0654],
+        [ 0.0039,  0.0368,  0.0104],
+        [ 0.0655,  0.0217,  0.0122],
+        [ 0.0490,  0.1638,  0.2053],
+        [ 0.0932,  0.0829,  0.0650],
+        [-0.0186, -0.0209, -0.0135],
+        [-0.0080, -0.0076, -0.0148],
+        [-0.0284, -0.0201,  0.0011],
+        [-0.0642, -0.0294, -0.0777],
+        [-0.0035,  0.0076, -0.0140],
+        [ 0.0519,  0.0731,  0.0887],
+        [-0.0102,  0.0095,  0.0704],
+        [ 0.0068,  0.0218, -0.0023],
+        [-0.0726, -0.0486, -0.0519],
+        [ 0.0260,  0.0295,  0.0263],
+        [ 0.0250,  0.0333,  0.0341],
+        [ 0.0168, -0.0120, -0.0174],
+        [ 0.0226,  0.1037,  0.0114],
+        [ 0.2577,  0.1906,  0.1604],
+        [-0.0646, -0.0137, -0.0018],
+        [-0.0112,  0.0309,  0.0358],
+        [-0.0347,  0.0146, -0.0481],
+        [ 0.0234,  0.0179,  0.0201],
+        [ 0.0157,  0.0313,  0.0225],
+        [ 0.0423,  0.0675,  0.0524],
+        [-0.0031,  0.0027, -0.0255],
+        [ 0.0447,  0.0555,  0.0330],
+        [-0.0152,  0.0103,  0.0299],
+        [-0.0755, -0.0489, -0.0635],
+        [ 0.0853,  0.0788,  0.1017],
+        [-0.0272, -0.0294, -0.0471],
+        [ 0.0440,  0.0400, -0.0137],
+        [ 0.0335,  0.0317, -0.0036],
+        [-0.0344, -0.0621, -0.0984],
+        [-0.0127, -0.0630, -0.0620],
+        [-0.0648,  0.0360,  0.0924],
+        [-0.0781, -0.0801, -0.0409],
+        [ 0.0363,  0.0613,  0.0499],
+        [ 0.0238,  0.0034,  0.0041],
+        [-0.0135,  0.0258,  0.0310],
+        [ 0.0614,  0.1086,  0.0589],
+        [ 0.0428,  0.0350,  0.0205],
+        [ 0.0153,  0.0173, -0.0018],
+        [-0.0288, -0.0455, -0.0091],
+        [ 0.0344,  0.0109, -0.0157],
+        [-0.0205, -0.0247, -0.0187],
+        [ 0.0487,  0.0126,  0.0064],
+        [-0.0220, -0.0013,  0.0074],
+        [-0.0203, -0.0094, -0.0048],
+        [-0.0719,  0.0429, -0.0442],
+        [ 0.1042,  0.0497,  0.0356],
+        [-0.0659, -0.0578, -0.0280],
+        [-0.0060, -0.0322, -0.0234]]
+
+    latent_rgb_factors_bias = [0.0007, -0.0256, -0.0206]
+
 class Hunyuan3Dv2(LatentFormat):
    latent_channels = 64
    latent_dimensions = 1
--- a/comfy/ldm/hunyuan_video/model.py
+++ b/comfy/ldm/hunyuan_video/model.py
@@ -41,6 +41,7 @@ class HunyuanVideoParams:
    qkv_bias: bool
    guidance_embed: bool
    byt5: bool
+    meanflow: bool


 class SelfAttentionRef(nn.Module):
@@ -256,6 +257,11 @@ class HunyuanVideo(nn.Module):
        else:
            self.byt5_in = None

+        if params.meanflow:
+            self.time_r_in = MLPEmbedder(in_dim=256, hidden_dim=self.hidden_size, dtype=dtype, device=device, operations=operations)
+        else:
+            self.time_r_in = None
+
        if final_layer:
            self.final_layer = LastLayer(self.hidden_size, self.patch_size[-1], self.out_channels, dtype=dtype, device=device, operations=operations)

@@ -282,6 +288,14 @@ class HunyuanVideo(nn.Module):
        img = self.img_in(img)
        vec = self.time_in(timestep_embedding(timesteps, 256, time_factor=1.0).to(img.dtype))

+        if self.time_r_in is not None:
+            w = torch.where(transformer_options['sigmas'][0] == transformer_options['sample_sigmas'])[0]  # This most likely could be improved
+            if len(w) > 0:
+                timesteps_r = transformer_options['sample_sigmas'][w[0] + 1]
+                timesteps_r = timesteps_r.unsqueeze(0).to(device=timesteps.device, dtype=timesteps.dtype)
+                vec_r = self.time_r_in(timestep_embedding(timesteps_r, 256, time_factor=1000.0).to(img.dtype))
+                vec = (vec + vec_r) / 2
+
        if ref_latent is not None:
            ref_latent_ids = self.img_ids(ref_latent)
            ref_latent = self.img_in(ref_latent)
--- a/comfy/model_detection.py
+++ b/comfy/model_detection.py
@@ -142,12 +142,20 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
        dit_config["in_channels"] = in_w.shape[1] #SkyReels img2video has 32 input channels
        dit_config["patch_size"] = list(in_w.shape[2:])
        dit_config["out_channels"] = out_w.shape[0] // math.prod(dit_config["patch_size"])
-        if '{}vector_in.in_layer.weight'.format(key_prefix) in state_dict:
+        if any(s.startswith('{}vector_in.'.format(key_prefix)) for s in state_dict_keys):
            dit_config["vec_in_dim"] = 768
-            dit_config["axes_dim"] = [16, 56, 56]
        else:
            dit_config["vec_in_dim"] = None
+
+        if len(dit_config["patch_size"]) == 2:
            dit_config["axes_dim"] = [64, 64]
+        else:
+            dit_config["axes_dim"] = [16, 56, 56]
+
+        if any(s.startswith('{}time_r_in.'.format(key_prefix)) for s in state_dict_keys):
+            dit_config["meanflow"] = True
+        else:
+            dit_config["meanflow"] = False

        dit_config["context_in_dim"] = state_dict['{}txt_in.input_embedder.weight'.format(key_prefix)].shape[1]
        dit_config["hidden_size"] = in_w.shape[0]
--- a/comfy_api_nodes/nodes_bytedance.py
+++ b/comfy_api_nodes/nodes_bytedance.py
@@ -77,6 +77,22 @@ class Image2ImageTaskCreationRequest(BaseModel):
    watermark: Optional[bool] = Field(True)


+class Seedream4Options(BaseModel):
+    max_images: int = Field(15)
+
+
+class Seedream4TaskCreationRequest(BaseModel):
+    model: str = Field("seedream-4-0-250828")
+    prompt: str = Field(...)
+    response_format: str = Field("url")
+    image: Optional[list[str]] = Field(None, description="Image URLs")
+    size: str = Field(...)
+    seed: int = Field(..., ge=0, le=2147483647)
+    sequential_image_generation: str = Field("disabled")
+    sequential_image_generation_options: Seedream4Options = Field(Seedream4Options(max_images=15))
+    watermark: bool = Field(True)
+
+
 class ImageTaskCreationResponse(BaseModel):
    model: str = Field(...)
    created: int = Field(..., description="Unix timestamp (in seconds) indicating time when the request was created.")
@@ -143,6 +159,19 @@ RECOMMENDED_PRESETS = [
    ("Custom", None, None),
 ]

+RECOMMENDED_PRESETS_SEEDREAM_4 = [
+    ("2048x2048 (1:1)", 2048, 2048),
+    ("2304x1728 (4:3)", 2304, 1728),
+    ("1728x2304 (3:4)", 1728, 2304),
+    ("2560x1440 (16:9)", 2560, 1440),
+    ("1440x2560 (9:16)", 1440, 2560),
+    ("2496x1664 (3:2)", 2496, 1664),
+    ("1664x2496 (2:3)", 1664, 2496),
+    ("3024x1296 (21:9)", 3024, 1296),
+    ("4096x4096 (1:1)", 4096, 4096),
+    ("Custom", None, None),
+]
+
 # The time in this dictionary are given for 10 seconds duration.
 VIDEO_TASKS_EXECUTION_TIME = {
    "seedance-1-0-lite-t2v-250428": {
@@ -348,7 +377,7 @@ class ByteDanceImageEditNode(comfy_io.ComfyNode):
        return comfy_io.Schema(
            node_id="ByteDanceImageEditNode",
            display_name="ByteDance Image Edit",
-            category="api node/video/ByteDance",
+            category="api node/image/ByteDance",
            description="Edit images using ByteDance models via api based on prompt",
            inputs=[
                comfy_io.Combo.Input(
@@ -451,6 +480,182 @@ class ByteDanceImageEditNode(comfy_io.ComfyNode):
        return comfy_io.NodeOutput(await download_url_to_image_tensor(get_image_url_from_response(response)))


+class ByteDanceSeedreamNode(comfy_io.ComfyNode):
+
+    @classmethod
+    def define_schema(cls):
+        return comfy_io.Schema(
+            node_id="ByteDanceSeedreamNode",
+            display_name="ByteDance Seedream 4",
+            category="api node/image/ByteDance",
+            description="Unified text-to-image generation and precise single-sentence editing at up to 4K resolution.",
+            inputs=[
+                comfy_io.Combo.Input(
+                    "model",
+                    options=["seedream-4-0-250828"],
+                    tooltip="Model name",
+                ),
+                comfy_io.String.Input(
+                    "prompt",
+                    multiline=True,
+                    default="",
+                    tooltip="Text prompt for creating or editing an image.",
+                ),
+                comfy_io.Image.Input(
+                    "image",
+                    tooltip="Input image(s) for image-to-image generation. "
+                            "List of 1-10 images for single or multi-reference generation.",
+                    optional=True,
+                ),
+                comfy_io.Combo.Input(
+                    "size_preset",
+                    options=[label for label, _, _ in RECOMMENDED_PRESETS_SEEDREAM_4],
+                    tooltip="Pick a recommended size. Select Custom to use the width and height below.",
+                ),
+                comfy_io.Int.Input(
+                    "width",
+                    default=2048,
+                    min=1024,
+                    max=4096,
+                    step=64,
+                    tooltip="Custom width for image. Value is working only if `size_preset` is set to `Custom`",
+                    optional=True,
+                ),
+                comfy_io.Int.Input(
+                    "height",
+                    default=2048,
+                    min=1024,
+                    max=4096,
+                    step=64,
+                    tooltip="Custom height for image. Value is working only if `size_preset` is set to `Custom`",
+                    optional=True,
+                ),
+                comfy_io.Combo.Input(
+                    "sequential_image_generation",
+                    options=["disabled", "auto"],
+                    tooltip="Group image generation mode. "
+                            "'disabled' generates a single image. "
+                            "'auto' lets the model decide whether to generate multiple related images "
+                            "(e.g., story scenes, character variations).",
+                    optional=True,
+                ),
+                comfy_io.Int.Input(
+                    "max_images",
+                    default=1,
+                    min=1,
+                    max=15,
+                    step=1,
+                    display_mode=comfy_io.NumberDisplay.number,
+                    tooltip="Maximum number of images to generate when sequential_image_generation='auto'. "
+                            "Total images (input + generated) cannot exceed 15.",
+                    optional=True,
+                ),
+                comfy_io.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=2147483647,
+                    step=1,
+                    display_mode=comfy_io.NumberDisplay.number,
+                    control_after_generate=True,
+                    tooltip="Seed to use for generation.",
+                    optional=True,
+                ),
+                comfy_io.Boolean.Input(
+                    "watermark",
+                    default=True,
+                    tooltip="Whether to add an \"AI generated\" watermark to the image.",
+                    optional=True,
+                ),
+            ],
+            outputs=[
+                comfy_io.Image.Output(),
+            ],
+            hidden=[
+                comfy_io.Hidden.auth_token_comfy_org,
+                comfy_io.Hidden.api_key_comfy_org,
+                comfy_io.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        model: str,
+        prompt: str,
+        image: torch.Tensor = None,
+        size_preset: str = RECOMMENDED_PRESETS_SEEDREAM_4[0][0],
+        width: int = 2048,
+        height: int = 2048,
+        sequential_image_generation: str = "disabled",
+        max_images: int = 1,
+        seed: int = 0,
+        watermark: bool = True,
+    ) -> comfy_io.NodeOutput:
+        validate_string(prompt, strip_whitespace=True, min_length=1)
+        w = h = None
+        for label, tw, th in RECOMMENDED_PRESETS_SEEDREAM_4:
+            if label == size_preset:
+                w, h = tw, th
+                break
+
+        if w is None or h is None:
+            w, h = width, height
+            if not (1024 <= w <= 4096) or not (1024 <= h <= 4096):
+                raise ValueError(
+                    f"Custom size out of range: {w}x{h}. "
+                    "Both width and height must be between 1024 and 4096 pixels."
+                )
+        n_input_images = get_number_of_images(image) if image is not None else 0
+        if n_input_images > 10:
+            raise ValueError(f"Maximum of 10 reference images are supported, but {n_input_images} received.")
+        if sequential_image_generation == "auto" and n_input_images + max_images > 15:
+            raise ValueError(
+                "The maximum number of generated images plus the number of reference images cannot exceed 15."
+            )
+        auth_kwargs = {
+            "auth_token": cls.hidden.auth_token_comfy_org,
+            "comfy_api_key": cls.hidden.api_key_comfy_org,
+        }
+        reference_images_urls = []
+        if n_input_images:
+            for i in image:
+                validate_image_aspect_ratio_range(i, (1, 3), (3, 1))
+            reference_images_urls = (await upload_images_to_comfyapi(
+                image,
+                max_images=n_input_images,
+                mime_type="image/png",
+                auth_kwargs=auth_kwargs,
+            ))
+        payload = Seedream4TaskCreationRequest(
+            model=model,
+            prompt=prompt,
+            image=reference_images_urls,
+            size=f"{w}x{h}",
+            seed=seed,
+            sequential_image_generation=sequential_image_generation,
+            sequential_image_generation_options=Seedream4Options(max_images=max_images),
+            watermark=watermark,
+        )
+        response = await SynchronousOperation(
+            endpoint=ApiEndpoint(
+                path=BYTEPLUS_IMAGE_ENDPOINT,
+                method=HttpMethod.POST,
+                request_model=Seedream4TaskCreationRequest,
+                response_model=ImageTaskCreationResponse,
+            ),
+            request=payload,
+            auth_kwargs=auth_kwargs,
+        ).execute()
+
+        if len(response.data) == 1:
+            return comfy_io.NodeOutput(await download_url_to_image_tensor(get_image_url_from_response(response)))
+        return comfy_io.NodeOutput(
+            torch.cat([await download_url_to_image_tensor(str(i["url"])) for i in response.data])
+        )
+
+
 class ByteDanceTextToVideoNode(comfy_io.ComfyNode):

    @classmethod
@@ -1001,6 +1206,7 @@ class ByteDanceExtension(ComfyExtension):
        return [
            ByteDanceImageNode,
            ByteDanceImageEditNode,
+            ByteDanceSeedreamNode,
            ByteDanceTextToVideoNode,
            ByteDanceImageToVideoNode,
            ByteDanceFirstLastFrameNode,
--- a/comfyui_version.py
+++ b/comfyui_version.py
@@ -1,3 +1,3 @@
 # This file is automatically generated by the build process when version is
 # updated in pyproject.toml.
-__version__ = "0.3.58"
+__version__ = "0.3.59"
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "ComfyUI"
-version = "0.3.58"
+version = "0.3.59"
 readme = "README.md"
 license = { file = "LICENSE" }
 requires-python = ">=3.9"
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 comfyui-frontend-package==1.25.11
-comfyui-workflow-templates==0.1.76
+comfyui-workflow-templates==0.1.81
 comfyui-embedded-docs==0.2.6
 torch
 torchsde
Author	SHA1	Message	Date
Jedrzej Kosinski	3b54b0256d	Merge branch 'master' into fix-context-window-slicing	2025-09-11 20:23:31 -07:00
comfyanonymous	18de0b2830	Fast preview for hunyuan image. (#9814 )	2025-09-11 19:33:02 -04:00
ComfyUI Wiki	df6850fae8	Update template to 0.1.81 (#9811 )	2025-09-11 14:59:26 -04:00
comfyanonymous	e01e99d075	Support hunyuan image distilled model. (#9807 )	2025-09-10 23:17:34 -04:00
comfyanonymous	72212fef66	ComfyUI version 0.3.59	2025-09-10 17:25:41 -04:00
ComfyUI Wiki	df34f1549a	Update template to 0.1.78 (#9806 ) * Update template to 0.1.77 * Update template to 0.1.78	2025-09-10 14:16:41 -07:00
Alexander Piskun	9b0553809c	add new ByteDanceSeedream (4.0) node (#9802 )	2025-09-10 14:13:18 -07:00
Jedrzej Kosinski	2835f7f63e	Apply cond slice fix	2025-09-09 17:45:35 -07:00