Merge branch 'master' into fix/gradient-stops-format

add painter node (#12294 )
* add painter node * use io.Color * code improve --------- Co-authored-by: guill <jacob.e.segal@gmail.com>
2026-03-24 06:27:29 +00:00 · 2026-03-12 09:55:42 -07:00 · 2026-03-12 09:55:29 -07:00 · 2026-03-12 09:45:48 -07:00 · 2026-03-12 11:30:50 -04:00 · 2026-03-12 00:17:31 -04:00
13 changed files with 1093 additions and 183 deletions
--- a/comfy/comfy_types/node_typing.py
+++ b/comfy/comfy_types/node_typing.py
@@ -176,8 +176,8 @@ class InputTypeOptions(TypedDict):
    """COMBO type only. Specifies the configuration for a multi-select widget.
    Available after ComfyUI frontend v1.13.4
    https://github.com/Comfy-Org/ComfyUI_frontend/pull/2987"""
-    gradient_stops: NotRequired[list[list[float]]]
-    """Gradient color stops for gradientslider display mode. Each stop is [offset, r, g, b] (``FLOAT``)."""
+    gradient_stops: NotRequired[list[dict]]
+    """Gradient color stops for gradientslider display mode. Each stop is {"offset": float, "color": [r, g, b]}."""


 class HiddenInputTypeDict(TypedDict):
--- a/comfy/ldm/flux/layers.py
+++ b/comfy/ldm/flux/layers.py
@@ -144,9 +144,9 @@ def apply_mod(tensor, m_mult, m_add=None, modulation_dims=None):
            return tensor * m_mult
    else:
        for d in modulation_dims:
-            tensor[:, d[0]:d[1]] *= m_mult[:, d[2]]
+            tensor[:, d[0]:d[1]] *= m_mult[:, d[2]:d[2] + 1]
            if m_add is not None:
-                tensor[:, d[0]:d[1]] += m_add[:, d[2]]
+                tensor[:, d[0]:d[1]] += m_add[:, d[2]:d[2] + 1]
        return tensor


--- a/comfy/ldm/flux/model.py
+++ b/comfy/ldm/flux/model.py
@@ -44,6 +44,22 @@ class FluxParams:
    txt_norm: bool = False


+def invert_slices(slices, length):
+    sorted_slices = sorted(slices)
+    result = []
+    current = 0
+
+    for start, end in sorted_slices:
+        if current < start:
+            result.append((current, start))
+        current = max(current, end)
+
+    if current < length:
+        result.append((current, length))
+
+    return result
+
+
 class Flux(nn.Module):
    """
    Transformer model for flow matching on sequences.
@@ -138,6 +154,7 @@ class Flux(nn.Module):
        y: Tensor,
        guidance: Tensor = None,
        control = None,
+        timestep_zero_index=None,
        transformer_options={},
        attn_mask: Tensor = None,
    ) -> Tensor:
@@ -164,10 +181,6 @@ class Flux(nn.Module):
            txt = self.txt_norm(txt)
        txt = self.txt_in(txt)

-        vec_orig = vec
-        if self.params.global_modulation:
-            vec = (self.double_stream_modulation_img(vec_orig), self.double_stream_modulation_txt(vec_orig))
-
        if "post_input" in patches:
            for p in patches["post_input"]:
                out = p({"img": img, "txt": txt, "img_ids": img_ids, "txt_ids": txt_ids, "transformer_options": transformer_options})
@@ -182,6 +195,24 @@ class Flux(nn.Module):
        else:
            pe = None

+        vec_orig = vec
+        txt_vec = vec
+        extra_kwargs = {}
+        if timestep_zero_index is not None:
+            modulation_dims = []
+            batch = vec.shape[0] // 2
+            vec_orig = vec_orig.reshape(2, batch, vec.shape[1]).movedim(0, 1)
+            invert = invert_slices(timestep_zero_index, img.shape[1])
+            for s in invert:
+                modulation_dims.append((s[0], s[1], 0))
+            for s in timestep_zero_index:
+                modulation_dims.append((s[0], s[1], 1))
+            extra_kwargs["modulation_dims_img"] = modulation_dims
+            txt_vec = vec[:batch]
+
+        if self.params.global_modulation:
+            vec = (self.double_stream_modulation_img(vec_orig), self.double_stream_modulation_txt(txt_vec))
+
        blocks_replace = patches_replace.get("dit", {})
        transformer_options["total_blocks"] = len(self.double_blocks)
        transformer_options["block_type"] = "double"
@@ -195,7 +226,8 @@ class Flux(nn.Module):
                                                   vec=args["vec"],
                                                   pe=args["pe"],
                                                   attn_mask=args.get("attn_mask"),
-                                                   transformer_options=args.get("transformer_options"))
+                                                   transformer_options=args.get("transformer_options"),
+                                                   **extra_kwargs)
                    return out

                out = blocks_replace[("double_block", i)]({"img": img,
@@ -213,7 +245,8 @@ class Flux(nn.Module):
                                 vec=vec,
                                 pe=pe,
                                 attn_mask=attn_mask,
-                                 transformer_options=transformer_options)
+                                 transformer_options=transformer_options,
+                                 **extra_kwargs)

            if control is not None: # Controlnet
                control_i = control.get("input")
@@ -230,6 +263,12 @@ class Flux(nn.Module):
        if self.params.global_modulation:
            vec, _ = self.single_stream_modulation(vec_orig)

+        extra_kwargs = {}
+        if timestep_zero_index is not None:
+            lambda a: 0 if a == 0 else a + txt.shape[1]
+            modulation_dims_combined = list(map(lambda x: (0 if x[0] == 0 else x[0] + txt.shape[1], x[1] + txt.shape[1], x[2]), modulation_dims))
+            extra_kwargs["modulation_dims"] = modulation_dims_combined
+
        transformer_options["total_blocks"] = len(self.single_blocks)
        transformer_options["block_type"] = "single"
        transformer_options["img_slice"] = [txt.shape[1], img.shape[1]]
@@ -242,7 +281,8 @@ class Flux(nn.Module):
                                       vec=args["vec"],
                                       pe=args["pe"],
                                       attn_mask=args.get("attn_mask"),
-                                       transformer_options=args.get("transformer_options"))
+                                       transformer_options=args.get("transformer_options"),
+                                       **extra_kwargs)
                    return out

                out = blocks_replace[("single_block", i)]({"img": img,
@@ -253,7 +293,7 @@ class Flux(nn.Module):
                                                          {"original_block": block_wrap})
                img = out["img"]
            else:
-                img = block(img, vec=vec, pe=pe, attn_mask=attn_mask, transformer_options=transformer_options)
+                img = block(img, vec=vec, pe=pe, attn_mask=attn_mask, transformer_options=transformer_options, **extra_kwargs)

            if control is not None: # Controlnet
                control_o = control.get("output")
@@ -264,7 +304,11 @@ class Flux(nn.Module):

        img = img[:, txt.shape[1] :, ...]

-        img = self.final_layer(img, vec_orig)  # (N, T, patch_size ** 2 * out_channels)
+        extra_kwargs = {}
+        if timestep_zero_index is not None:
+            extra_kwargs["modulation_dims"] = modulation_dims
+
+        img = self.final_layer(img, vec_orig, **extra_kwargs)  # (N, T, patch_size ** 2 * out_channels)
        return img

    def process_img(self, x, index=0, h_offset=0, w_offset=0, transformer_options={}):
@@ -312,13 +356,16 @@ class Flux(nn.Module):
        w_len = ((w_orig + (patch_size // 2)) // patch_size)
        img, img_ids = self.process_img(x, transformer_options=transformer_options)
        img_tokens = img.shape[1]
+        timestep_zero_index = None
        if ref_latents is not None:
+            ref_num_tokens = []
            h = 0
            w = 0
            index = 0
            ref_latents_method = kwargs.get("ref_latents_method", self.params.default_ref_method)
+            timestep_zero = ref_latents_method == "index_timestep_zero"
            for ref in ref_latents:
-                if ref_latents_method == "index":
+                if ref_latents_method in ("index", "index_timestep_zero"):
                    index += self.params.ref_index_scale
                    h_offset = 0
                    w_offset = 0
@@ -342,6 +389,13 @@ class Flux(nn.Module):
                kontext, kontext_ids = self.process_img(ref, index=index, h_offset=h_offset, w_offset=w_offset)
                img = torch.cat([img, kontext], dim=1)
                img_ids = torch.cat([img_ids, kontext_ids], dim=1)
+                ref_num_tokens.append(kontext.shape[1])
+            if timestep_zero:
+                if index > 0:
+                    timestep = torch.cat([timestep, timestep * 0], dim=0)
+                    timestep_zero_index = [[img_tokens, img_ids.shape[1]]]
+            transformer_options = transformer_options.copy()
+            transformer_options["reference_image_num_tokens"] = ref_num_tokens

        txt_ids = torch.zeros((bs, context.shape[1], len(self.params.axes_dim)), device=x.device, dtype=torch.float32)

@@ -349,6 +403,6 @@ class Flux(nn.Module):
            for i in self.params.txt_ids_dims:
                txt_ids[:, :, i] = torch.linspace(0, context.shape[1] - 1, steps=context.shape[1], device=x.device, dtype=torch.float32)

-        out = self.forward_orig(img, img_ids, context, txt_ids, timestep, y, guidance, control, transformer_options, attn_mask=kwargs.get("attention_mask", None))
+        out = self.forward_orig(img, img_ids, context, txt_ids, timestep, y, guidance, control, timestep_zero_index=timestep_zero_index, transformer_options=transformer_options, attn_mask=kwargs.get("attention_mask", None))
        out = out[:, :img_tokens]
        return rearrange(out, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=self.patch_size, pw=self.patch_size)[:,:,:h_orig,:w_orig]
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -270,10 +270,15 @@ try:
 except:
    OOM_EXCEPTION = Exception

+try:
+    ACCELERATOR_ERROR = torch.AcceleratorError
+except AttributeError:
+    ACCELERATOR_ERROR = RuntimeError
+
 def is_oom(e):
    if isinstance(e, OOM_EXCEPTION):
        return True
-    if isinstance(e, torch.AcceleratorError) and getattr(e, 'error_code', None) == 2:
+    if isinstance(e, ACCELERATOR_ERROR) and (getattr(e, 'error_code', None) == 2 or "out of memory" in str(e).lower()):
        discard_cuda_async_error()
        return True
    return False
@@ -1275,7 +1280,7 @@ def discard_cuda_async_error():
        b = torch.tensor([1], dtype=torch.uint8, device=get_torch_device())
        _ = a + b
        synchronize()
-    except torch.AcceleratorError:
+    except RuntimeError:
        #Dump it! We already know about it from the synchronous return
        pass

--- a/comfy_api/latest/_io.py
+++ b/comfy_api/latest/_io.py
@@ -297,7 +297,7 @@ class Float(ComfyTypeIO):
        '''Float input.'''
        def __init__(self, id: str, display_name: str=None, optional=False, tooltip: str=None, lazy: bool=None,
                    default: float=None, min: float=None, max: float=None, step: float=None, round: float=None,
-                    display_mode: NumberDisplay=None, gradient_stops: list[list[float]]=None,
+                    display_mode: NumberDisplay=None, gradient_stops: list[dict]=None,
                    socketless: bool=None, force_input: bool=None, extra_dict=None, raw_link: bool=None, advanced: bool=None):
            super().__init__(id, display_name, optional, tooltip, lazy, default, socketless, None, force_input, extra_dict, raw_link, advanced)
            self.min = min
--- a/comfy_api_nodes/apis/reve.py
+++ b/comfy_api_nodes/apis/reve.py
@@ -0,0 +1,68 @@
+from pydantic import BaseModel, Field
+
+
+class RevePostprocessingOperation(BaseModel):
+    process: str = Field(..., description="The postprocessing operation: upscale or remove_background.")
+    upscale_factor: int | None = Field(
+        None,
+        description="Upscale factor (2, 3, or 4). Only used when process is upscale.",
+        ge=2,
+        le=4,
+    )
+
+
+class ReveImageCreateRequest(BaseModel):
+    prompt: str = Field(...)
+    aspect_ratio: str | None = Field(...)
+    version: str = Field(...)
+    test_time_scaling: int = Field(
+        ...,
+        description="If included, the model will spend more effort making better images. Values between 1 and 15.",
+        ge=1,
+        le=15,
+    )
+    postprocessing: list[RevePostprocessingOperation] | None = Field(
+        None, description="Optional postprocessing operations to apply after generation."
+    )
+
+
+class ReveImageEditRequest(BaseModel):
+    edit_instruction: str = Field(...)
+    reference_image: str = Field(..., description="A base64 encoded image to use as reference for the edit.")
+    aspect_ratio: str | None = Field(...)
+    version: str = Field(...)
+    test_time_scaling: int | None = Field(
+        ...,
+        description="If included, the model will spend more effort making better images. Values between 1 and 15.",
+        ge=1,
+        le=15,
+    )
+    postprocessing: list[RevePostprocessingOperation] | None = Field(
+        None, description="Optional postprocessing operations to apply after generation."
+    )
+
+
+class ReveImageRemixRequest(BaseModel):
+    prompt: str = Field(...)
+    reference_images: list[str] = Field(..., description="A list of 1-6 base64 encoded reference images.")
+    aspect_ratio: str | None = Field(...)
+    version: str = Field(...)
+    test_time_scaling: int | None = Field(
+        ...,
+        description="If included, the model will spend more effort making better images. Values between 1 and 15.",
+        ge=1,
+        le=15,
+    )
+    postprocessing: list[RevePostprocessingOperation] | None = Field(
+        None, description="Optional postprocessing operations to apply after generation."
+    )
+
+
+class ReveImageResponse(BaseModel):
+    image: str | None = Field(None, description="The base64 encoded image data.")
+    request_id: str | None = Field(None, description="A unique id for the request.")
+    credits_used: float | None = Field(None, description="The number of credits used for this request.")
+    version: str | None = Field(None, description="The specific model version used.")
+    content_violation: bool | None = Field(
+        None, description="Indicates whether the generated image violates the content policy."
+    )
--- a/comfy_api_nodes/nodes_reve.py
+++ b/comfy_api_nodes/nodes_reve.py
@@ -0,0 +1,395 @@
+from io import BytesIO
+
+from typing_extensions import override
+
+from comfy_api.latest import IO, ComfyExtension, Input
+from comfy_api_nodes.apis.reve import (
+    ReveImageCreateRequest,
+    ReveImageEditRequest,
+    ReveImageRemixRequest,
+    RevePostprocessingOperation,
+)
+from comfy_api_nodes.util import (
+    ApiEndpoint,
+    bytesio_to_image_tensor,
+    sync_op_raw,
+    tensor_to_base64_string,
+    validate_string,
+)
+
+
+def _build_postprocessing(upscale: dict, remove_background: bool) -> list[RevePostprocessingOperation] | None:
+    ops = []
+    if upscale["upscale"] == "enabled":
+        ops.append(
+            RevePostprocessingOperation(
+                process="upscale",
+                upscale_factor=upscale["upscale_factor"],
+            )
+        )
+    if remove_background:
+        ops.append(RevePostprocessingOperation(process="remove_background"))
+    return ops or None
+
+
+def _postprocessing_inputs():
+    return [
+        IO.DynamicCombo.Input(
+            "upscale",
+            options=[
+                IO.DynamicCombo.Option("disabled", []),
+                IO.DynamicCombo.Option(
+                    "enabled",
+                    [
+                        IO.Int.Input(
+                            "upscale_factor",
+                            default=2,
+                            min=2,
+                            max=4,
+                            step=1,
+                            tooltip="Upscale factor (2x, 3x, or 4x).",
+                        ),
+                    ],
+                ),
+            ],
+            tooltip="Upscale the generated image. May add additional cost.",
+        ),
+        IO.Boolean.Input(
+            "remove_background",
+            default=False,
+            tooltip="Remove the background from the generated image. May add additional cost.",
+        ),
+    ]
+
+
+def _reve_price_extractor(headers: dict) -> float | None:
+    credits_used = headers.get("x-reve-credits-used")
+    if credits_used is not None:
+        return float(credits_used) / 524.48
+    return None
+
+
+def _reve_response_header_validator(headers: dict) -> None:
+    error_code = headers.get("x-reve-error-code")
+    if error_code:
+        raise ValueError(f"Reve API error: {error_code}")
+    if headers.get("x-reve-content-violation", "").lower() == "true":
+        raise ValueError("The generated image was flagged for content policy violation.")
+
+
+def _model_inputs(versions: list[str], aspect_ratios: list[str]):
+    return [
+        IO.DynamicCombo.Option(
+            version,
+            [
+                IO.Combo.Input(
+                    "aspect_ratio",
+                    options=aspect_ratios,
+                    tooltip="Aspect ratio of the output image.",
+                ),
+                IO.Int.Input(
+                    "test_time_scaling",
+                    default=1,
+                    min=1,
+                    max=5,
+                    step=1,
+                    tooltip="Higher values produce better images but cost more credits.",
+                    advanced=True,
+                ),
+            ],
+        )
+        for version in versions
+    ]
+
+
+class ReveImageCreateNode(IO.ComfyNode):
+
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="ReveImageCreateNode",
+            display_name="Reve Image Create",
+            category="api node/image/Reve",
+            description="Generate images from text descriptions using Reve.",
+            inputs=[
+                IO.String.Input(
+                    "prompt",
+                    multiline=True,
+                    default="",
+                    tooltip="Text description of the desired image. Maximum 2560 characters.",
+                ),
+                IO.DynamicCombo.Input(
+                    "model",
+                    options=_model_inputs(
+                        ["reve-create@20250915"],
+                        aspect_ratios=["3:2", "16:9", "9:16", "2:3", "4:3", "3:4", "1:1"],
+                    ),
+                    tooltip="Model version to use for generation.",
+                ),
+                *_postprocessing_inputs(),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=2147483647,
+                    control_after_generate=True,
+                    tooltip="Seed controls whether the node should re-run; "
+                    "results are non-deterministic regardless of seed.",
+                ),
+            ],
+            outputs=[IO.Image.Output()],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+            price_badge=IO.PriceBadge(
+                expr="""{"type":"usd","usd":0.03432,"format":{"approximate":true,"note":"(base)"}}""",
+            ),
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        prompt: str,
+        model: dict,
+        upscale: dict,
+        remove_background: bool,
+        seed: int,
+    ) -> IO.NodeOutput:
+        validate_string(prompt, min_length=1, max_length=2560)
+        response = await sync_op_raw(
+            cls,
+            ApiEndpoint(
+                path="/proxy/reve/v1/image/create",
+                method="POST",
+                headers={"Accept": "image/webp"},
+            ),
+            as_binary=True,
+            price_extractor=_reve_price_extractor,
+            response_header_validator=_reve_response_header_validator,
+            data=ReveImageCreateRequest(
+                prompt=prompt,
+                aspect_ratio=model["aspect_ratio"],
+                version=model["model"],
+                test_time_scaling=model["test_time_scaling"],
+                postprocessing=_build_postprocessing(upscale, remove_background),
+            ),
+        )
+        return IO.NodeOutput(bytesio_to_image_tensor(BytesIO(response)))
+
+
+class ReveImageEditNode(IO.ComfyNode):
+
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="ReveImageEditNode",
+            display_name="Reve Image Edit",
+            category="api node/image/Reve",
+            description="Edit images using natural language instructions with Reve.",
+            inputs=[
+                IO.Image.Input("image", tooltip="The image to edit."),
+                IO.String.Input(
+                    "edit_instruction",
+                    multiline=True,
+                    default="",
+                    tooltip="Text description of how to edit the image. Maximum 2560 characters.",
+                ),
+                IO.DynamicCombo.Input(
+                    "model",
+                    options=_model_inputs(
+                        ["reve-edit@20250915", "reve-edit-fast@20251030"],
+                        aspect_ratios=["auto", "16:9", "9:16", "3:2", "2:3", "4:3", "3:4", "1:1"],
+                    ),
+                    tooltip="Model version to use for editing.",
+                ),
+                *_postprocessing_inputs(),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=2147483647,
+                    control_after_generate=True,
+                    tooltip="Seed controls whether the node should re-run; "
+                    "results are non-deterministic regardless of seed.",
+                ),
+            ],
+            outputs=[IO.Image.Output()],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+            price_badge=IO.PriceBadge(
+                depends_on=IO.PriceBadgeDepends(
+                    widgets=["model"],
+                ),
+                expr="""
+                (
+                    $isFast := $contains(widgets.model, "fast");
+                    $base := $isFast ? 0.01001 : 0.0572;
+                    {"type": "usd", "usd": $base, "format": {"approximate": true, "note": "(base)"}}
+                )
+                """,
+            ),
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        image: Input.Image,
+        edit_instruction: str,
+        model: dict,
+        upscale: dict,
+        remove_background: bool,
+        seed: int,
+    ) -> IO.NodeOutput:
+        validate_string(edit_instruction, min_length=1, max_length=2560)
+        tts = model["test_time_scaling"]
+        ar = model["aspect_ratio"]
+        response = await sync_op_raw(
+            cls,
+            ApiEndpoint(
+                path="/proxy/reve/v1/image/edit",
+                method="POST",
+                headers={"Accept": "image/webp"},
+            ),
+            as_binary=True,
+            price_extractor=_reve_price_extractor,
+            response_header_validator=_reve_response_header_validator,
+            data=ReveImageEditRequest(
+                edit_instruction=edit_instruction,
+                reference_image=tensor_to_base64_string(image),
+                aspect_ratio=ar if ar != "auto" else None,
+                version=model["model"],
+                test_time_scaling=tts if tts and tts > 1 else None,
+                postprocessing=_build_postprocessing(upscale, remove_background),
+            ),
+        )
+        return IO.NodeOutput(bytesio_to_image_tensor(BytesIO(response)))
+
+
+class ReveImageRemixNode(IO.ComfyNode):
+
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="ReveImageRemixNode",
+            display_name="Reve Image Remix",
+            category="api node/image/Reve",
+            description="Combine reference images with text prompts to create new images using Reve.",
+            inputs=[
+                IO.Autogrow.Input(
+                    "reference_images",
+                    template=IO.Autogrow.TemplatePrefix(
+                        IO.Image.Input("image"),
+                        prefix="image_",
+                        min=1,
+                        max=6,
+                    ),
+                ),
+                IO.String.Input(
+                    "prompt",
+                    multiline=True,
+                    default="",
+                    tooltip="Text description of the desired image. "
+                    "May include XML img tags to reference specific images by index, "
+                    "e.g. <img>0</img>, <img>1</img>, etc.",
+                ),
+                IO.DynamicCombo.Input(
+                    "model",
+                    options=_model_inputs(
+                        ["reve-remix@20250915", "reve-remix-fast@20251030"],
+                        aspect_ratios=["auto", "16:9", "9:16", "3:2", "2:3", "4:3", "3:4", "1:1"],
+                    ),
+                    tooltip="Model version to use for remixing.",
+                ),
+                *_postprocessing_inputs(),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=2147483647,
+                    control_after_generate=True,
+                    tooltip="Seed controls whether the node should re-run; "
+                    "results are non-deterministic regardless of seed.",
+                ),
+            ],
+            outputs=[IO.Image.Output()],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+            price_badge=IO.PriceBadge(
+                depends_on=IO.PriceBadgeDepends(
+                    widgets=["model"],
+                ),
+                expr="""
+                (
+                    $isFast := $contains(widgets.model, "fast");
+                    $base := $isFast ? 0.01001 : 0.0572;
+                    {"type": "usd", "usd": $base, "format": {"approximate": true, "note": "(base)"}}
+                )
+                """,
+            ),
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        reference_images: IO.Autogrow.Type,
+        prompt: str,
+        model: dict,
+        upscale: dict,
+        remove_background: bool,
+        seed: int,
+    ) -> IO.NodeOutput:
+        validate_string(prompt, min_length=1, max_length=2560)
+        if not reference_images:
+            raise ValueError("At least one reference image is required.")
+        ref_base64_list = []
+        for key in reference_images:
+            ref_base64_list.append(tensor_to_base64_string(reference_images[key]))
+        if len(ref_base64_list) > 6:
+            raise ValueError("Maximum 6 reference images are allowed.")
+        tts = model["test_time_scaling"]
+        ar = model["aspect_ratio"]
+        response = await sync_op_raw(
+            cls,
+            ApiEndpoint(
+                path="/proxy/reve/v1/image/remix",
+                method="POST",
+                headers={"Accept": "image/webp"},
+            ),
+            as_binary=True,
+            price_extractor=_reve_price_extractor,
+            response_header_validator=_reve_response_header_validator,
+            data=ReveImageRemixRequest(
+                prompt=prompt,
+                reference_images=ref_base64_list,
+                aspect_ratio=ar if ar != "auto" else None,
+                version=model["model"],
+                test_time_scaling=tts if tts and tts > 1 else None,
+                postprocessing=_build_postprocessing(upscale, remove_background),
+            ),
+        )
+        return IO.NodeOutput(bytesio_to_image_tensor(BytesIO(response)))
+
+
+class ReveExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
+        return [
+            ReveImageCreateNode,
+            ReveImageEditNode,
+            ReveImageRemixNode,
+        ]
+
+
+async def comfy_entrypoint() -> ReveExtension:
+    return ReveExtension()
--- a/comfy_api_nodes/util/client.py
+++ b/comfy_api_nodes/util/client.py
@@ -67,6 +67,7 @@ class _RequestConfig:
    progress_origin_ts: float | None = None
    price_extractor: Callable[[dict[str, Any]], float | None] | None = None
    is_rate_limited: Callable[[int, Any], bool] | None = None
+    response_header_validator: Callable[[dict[str, str]], None] | None = None


@dataclass
@@ -202,11 +203,13 @@ async def sync_op_raw(
    monitor_progress: bool = True,
    max_retries_on_rate_limit: int = 16,
    is_rate_limited: Callable[[int, Any], bool] | None = None,
+    response_header_validator: Callable[[dict[str, str]], None] | None = None,
 ) -> dict[str, Any] | bytes:
    """
    Make a single network request.
      - If as_binary=False (default): returns JSON dict (or {'_raw': '<text>'} if non-JSON).
      - If as_binary=True: returns bytes.
+      - response_header_validator: optional callback receiving response headers dict
    """
    if isinstance(data, BaseModel):
        data = data.model_dump(exclude_none=True)
@@ -232,6 +235,7 @@ async def sync_op_raw(
        price_extractor=price_extractor,
        max_retries_on_rate_limit=max_retries_on_rate_limit,
        is_rate_limited=is_rate_limited,
+        response_header_validator=response_header_validator,
    )
    return await _request_base(cfg, expect_binary=as_binary)

@@ -769,6 +773,12 @@ async def _request_base(cfg: _RequestConfig, expect_binary: bool):
                                    cfg.node_cls, cfg.wait_label, int(now - start_time), cfg.estimated_total
                                )
                    bytes_payload = bytes(buff)
+                    resp_headers = {k.lower(): v for k, v in resp.headers.items()}
+                    if cfg.price_extractor:
+                        with contextlib.suppress(Exception):
+                            extracted_price = cfg.price_extractor(resp_headers)
+                    if cfg.response_header_validator:
+                        cfg.response_header_validator(resp_headers)
                    operation_succeeded = True
                    final_elapsed_seconds = int(time.monotonic() - start_time)
                    request_logger.log_request_response(
@@ -776,7 +786,7 @@ async def _request_base(cfg: _RequestConfig, expect_binary: bool):
                        request_method=method,
                        request_url=url,
                        response_status_code=resp.status,
-                        response_headers=dict(resp.headers),
+                        response_headers=resp_headers,
                        response_content=bytes_payload,
                    )
                    return bytes_payload
--- a/comfy_extras/nodes_flux.py
+++ b/comfy_extras/nodes_flux.py
@@ -6,6 +6,7 @@ import comfy.model_management
 import torch
 import math
 import nodes
+import comfy.ldm.flux.math

 class CLIPTextEncodeFlux(io.ComfyNode):
    @classmethod
@@ -231,6 +232,68 @@ class Flux2Scheduler(io.ComfyNode):
        sigmas = get_schedule(steps, round(seq_len))
        return io.NodeOutput(sigmas)

+class KV_Attn_Input:
+    def __init__(self):
+        self.cache = {}
+
+    def __call__(self, q, k, v, extra_options, **kwargs):
+        reference_image_num_tokens = extra_options.get("reference_image_num_tokens", [])
+        if len(reference_image_num_tokens) == 0:
+            return {}
+
+        ref_toks = sum(reference_image_num_tokens)
+        cache_key = "{}_{}".format(extra_options["block_type"], extra_options["block_index"])
+        if cache_key in self.cache:
+            kk, vv = self.cache[cache_key]
+            self.set_cache = False
+            return {"q": q, "k": torch.cat((k, kk), dim=2), "v": torch.cat((v, vv), dim=2)}
+
+        self.cache[cache_key] = (k[:, :, -ref_toks:], v[:, :, -ref_toks:])
+        self.set_cache = True
+        return {"q": q, "k": k, "v": v}
+
+    def cleanup(self):
+        self.cache = {}
+
+
+class FluxKVCache(io.ComfyNode):
+    @classmethod
+    def define_schema(cls) -> io.Schema:
+        return io.Schema(
+            node_id="FluxKVCache",
+            display_name="Flux KV Cache",
+            description="Enables KV Cache optimization for reference images on Flux family models.",
+            category="",
+            is_experimental=True,
+            inputs=[
+                io.Model.Input("model", tooltip="The model to use KV Cache on."),
+            ],
+            outputs=[
+                io.Model.Output(tooltip="The patched model with KV Cache enabled."),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, model: io.Model.Type) -> io.NodeOutput:
+        m = model.clone()
+        input_patch_obj = KV_Attn_Input()
+
+        def model_input_patch(inputs):
+            if len(input_patch_obj.cache) > 0:
+                ref_image_tokens = sum(inputs["transformer_options"].get("reference_image_num_tokens", []))
+                if ref_image_tokens > 0:
+                    img = inputs["img"]
+                    inputs["img"] = img[:, :-ref_image_tokens]
+            return inputs
+
+        m.set_model_attn1_patch(input_patch_obj)
+        m.set_model_post_input_patch(model_input_patch)
+        if hasattr(model.model.diffusion_model, "params"):
+            m.add_object_patch("diffusion_model.params.default_ref_method", "index_timestep_zero")
+        else:
+            m.add_object_patch("diffusion_model.default_ref_method", "index_timestep_zero")
+
+        return io.NodeOutput(m)

 class FluxExtension(ComfyExtension):
    @override
@@ -243,6 +306,7 @@ class FluxExtension(ComfyExtension):
            FluxKontextMultiReferenceLatentMethod,
            EmptyFlux2LatentImage,
            Flux2Scheduler,
+            FluxKVCache,
        ]


--- a/comfy_extras/nodes_glsl.py
+++ b/comfy_extras/nodes_glsl.py
@@ -1,67 +1,85 @@
 import os
 import sys
 import re
-import ctypes
 import logging
+import ctypes.util
+import importlib.util
 from typing import TypedDict

 import numpy as np
 import torch

 import nodes
-import comfy_angle
 from comfy_api.latest import ComfyExtension, io, ui
 from typing_extensions import override
+from utils.install_util import get_missing_requirements_message

 logger = logging.getLogger(__name__)


-def _preload_angle():
-    egl_path = comfy_angle.get_egl_path()
-    gles_path = comfy_angle.get_glesv2_path()
+def _check_opengl_availability():
+    """Early check for OpenGL availability. Raises RuntimeError if unlikely to work."""
+    logger.debug("_check_opengl_availability: starting")
+    missing = []

-    if sys.platform == "win32":
-        angle_dir = comfy_angle.get_lib_dir()
-        os.add_dll_directory(angle_dir)
-        os.environ["PATH"] = angle_dir + os.pathsep + os.environ.get("PATH", "")
+    # Check Python packages (using find_spec to avoid importing)
+    logger.debug("_check_opengl_availability: checking for glfw package")
+    if importlib.util.find_spec("glfw") is None:
+        missing.append("glfw")

-    mode = 0 if sys.platform == "win32" else ctypes.RTLD_GLOBAL
-    ctypes.CDLL(str(egl_path), mode=mode)
-    ctypes.CDLL(str(gles_path), mode=mode)
+    logger.debug("_check_opengl_availability: checking for OpenGL package")
+    if importlib.util.find_spec("OpenGL") is None:
+        missing.append("PyOpenGL")
+
+    if missing:
+        raise RuntimeError(
+            f"OpenGL dependencies not available.\n{get_missing_requirements_message()}\n"
+        )
+
+    # On Linux without display, check if headless backends are available
+    logger.debug(f"_check_opengl_availability: platform={sys.platform}")
+    if sys.platform.startswith("linux"):
+        has_display = os.environ.get("DISPLAY") or os.environ.get("WAYLAND_DISPLAY")
+        logger.debug(f"_check_opengl_availability: has_display={bool(has_display)}")
+        if not has_display:
+            # Check for EGL or OSMesa libraries
+            logger.debug("_check_opengl_availability: checking for EGL library")
+            has_egl = ctypes.util.find_library("EGL")
+            logger.debug("_check_opengl_availability: checking for OSMesa library")
+            has_osmesa = ctypes.util.find_library("OSMesa")
+
+            # Error disabled for CI as it fails this check
+            # if not has_egl and not has_osmesa:
+            #     raise RuntimeError(
+            #         "GLSL Shader node: No display and no headless backend (EGL/OSMesa) found.\n"
+            #         "See error below for installation instructions."
+            #     )
+            logger.debug(f"Headless mode: EGL={'yes' if has_egl else 'no'}, OSMesa={'yes' if has_osmesa else 'no'}")
+
+    logger.debug("_check_opengl_availability: completed")


-# Pre-load ANGLE *before* any PyOpenGL import so that the EGL platform
-# plugin picks up ANGLE's libEGL / libGLESv2 instead of system libs.
-_preload_angle()
-os.environ.setdefault("PYOPENGL_PLATFORM", "egl")
+# Run early check at import time
+logger.debug("nodes_glsl: running _check_opengl_availability at import time")
+_check_opengl_availability()

-import OpenGL
-OpenGL.USE_ACCELERATE = False
+# OpenGL modules - initialized lazily when context is created
+gl = None
+glfw = None
+EGL = None


-def _patch_find_library():
-    """On Windows, PyOpenGL's EGL platform looks for 'EGL' and 'GLESv2' by
-    name via ctypes.util.find_library, but ANGLE ships as 'libEGL' and
-    'libGLESv2'.  Patch find_library to return the full ANGLE paths so
-    PyOpenGL loads the same DLLs we pre-loaded (same handle, no duplicates)."""
-    if sys.platform != "win32":
-        return
-    import ctypes.util
-    _orig = ctypes.util.find_library
-    def _patched(name):
-        if name == 'EGL':
-            return comfy_angle.get_egl_path()
-        if name == 'GLESv2':
-            return comfy_angle.get_glesv2_path()
-        return _orig(name)
-    ctypes.util.find_library = _patched
+def _import_opengl():
+    """Import OpenGL module. Called after context is created."""
+    global gl
+    if gl is None:
+        logger.debug("_import_opengl: importing OpenGL.GL")
+        import OpenGL.GL as _gl
+        gl = _gl
+        logger.debug("_import_opengl: import completed")
+    return gl


-_patch_find_library()
-
-from OpenGL import EGL
-from OpenGL import GLES3 as gl
-
 class SizeModeInput(TypedDict):
    size_mode: str
    width: int
@@ -82,7 +100,7 @@ MAX_OUTPUTS = 4     # fragColor0-3 (MRT)
 # (-1,-1)---(3,-1)
 #
 # v_texCoord is computed from clip space: * 0.5 + 0.5 maps (-1,1) -> (0,1)
-VERTEX_SHADER = """#version 300 es
+VERTEX_SHADER = """#version 330 core
 out vec2 v_texCoord;
 void main() {
    vec2 verts[3] = vec2[](vec2(-1, -1), vec2(3, -1), vec2(-1, 3));
@@ -106,21 +124,14 @@ void main() {
 """


-
-def _egl_attribs(*values):
-    """Build an EGL_NONE-terminated EGLint attribute array."""
-    vals = list(values) + [EGL.EGL_NONE]
-    return (ctypes.c_int32 * len(vals))(*vals)
-
-
-def _gl_str(name):
-    """Get an OpenGL string parameter."""
-    v = gl.glGetString(name)
-    if not v:
-        return "Unknown"
-    if isinstance(v, bytes):
-        return v.decode(errors="replace")
-    return ctypes.string_at(v).decode(errors="replace")
+def _convert_es_to_desktop(source: str) -> str:
+    """Convert GLSL ES (WebGL) shader source to desktop GLSL 330 core."""
+    # Remove any existing #version directive
+    source = re.sub(r"#version\s+\d+(\s+es)?\s*\n?", "", source, flags=re.IGNORECASE)
+    # Remove precision qualifiers (not needed in desktop GLSL)
+    source = re.sub(r"precision\s+(lowp|mediump|highp)\s+\w+\s*;\s*\n?", "", source)
+    # Prepend desktop GLSL version
+    return "#version 330 core\n" + source


 def _detect_output_count(source: str) -> int:
@@ -146,8 +157,163 @@ def _detect_pass_count(source: str) -> int:
    return 1


+def _init_glfw():
+    """Initialize GLFW. Returns (window, glfw_module). Raises RuntimeError on failure."""
+    logger.debug("_init_glfw: starting")
+    # On macOS, glfw.init() must be called from main thread or it hangs forever
+    if sys.platform == "darwin":
+        logger.debug("_init_glfw: skipping on macOS")
+        raise RuntimeError("GLFW backend not supported on macOS")
+
+    logger.debug("_init_glfw: importing glfw module")
+    import glfw as _glfw
+
+    logger.debug("_init_glfw: calling glfw.init()")
+    if not _glfw.init():
+        raise RuntimeError("glfw.init() failed")
+
+    try:
+        logger.debug("_init_glfw: setting window hints")
+        _glfw.window_hint(_glfw.VISIBLE, _glfw.FALSE)
+        _glfw.window_hint(_glfw.CONTEXT_VERSION_MAJOR, 3)
+        _glfw.window_hint(_glfw.CONTEXT_VERSION_MINOR, 3)
+        _glfw.window_hint(_glfw.OPENGL_PROFILE, _glfw.OPENGL_CORE_PROFILE)
+
+        logger.debug("_init_glfw: calling create_window()")
+        window = _glfw.create_window(64, 64, "ComfyUI GLSL", None, None)
+        if not window:
+            raise RuntimeError("glfw.create_window() failed")
+
+        logger.debug("_init_glfw: calling make_context_current()")
+        _glfw.make_context_current(window)
+        logger.debug("_init_glfw: completed successfully")
+        return window, _glfw
+    except Exception:
+        logger.debug("_init_glfw: failed, terminating glfw")
+        _glfw.terminate()
+        raise
+
+
+def _init_egl():
+    """Initialize EGL for headless rendering. Returns (display, context, surface, EGL_module). Raises RuntimeError on failure."""
+    logger.debug("_init_egl: starting")
+    from OpenGL import EGL as _EGL
+    from OpenGL.EGL import (
+        eglGetDisplay, eglInitialize, eglChooseConfig, eglCreateContext,
+        eglMakeCurrent, eglCreatePbufferSurface, eglBindAPI,
+        eglTerminate, eglDestroyContext, eglDestroySurface,
+        EGL_DEFAULT_DISPLAY, EGL_NO_CONTEXT, EGL_NONE,
+        EGL_SURFACE_TYPE, EGL_PBUFFER_BIT, EGL_RENDERABLE_TYPE, EGL_OPENGL_BIT,
+        EGL_RED_SIZE, EGL_GREEN_SIZE, EGL_BLUE_SIZE, EGL_ALPHA_SIZE, EGL_DEPTH_SIZE,
+        EGL_WIDTH, EGL_HEIGHT, EGL_OPENGL_API,
+    )
+    logger.debug("_init_egl: imports completed")
+
+    display = None
+    context = None
+    surface = None
+
+    try:
+        logger.debug("_init_egl: calling eglGetDisplay()")
+        display = eglGetDisplay(EGL_DEFAULT_DISPLAY)
+        if display == _EGL.EGL_NO_DISPLAY:
+            raise RuntimeError("eglGetDisplay() failed")
+
+        logger.debug("_init_egl: calling eglInitialize()")
+        major, minor = _EGL.EGLint(), _EGL.EGLint()
+        if not eglInitialize(display, major, minor):
+            display = None  # Not initialized, don't terminate
+            raise RuntimeError("eglInitialize() failed")
+        logger.debug(f"_init_egl: EGL version {major.value}.{minor.value}")
+
+        config_attribs = [
+            EGL_SURFACE_TYPE, EGL_PBUFFER_BIT,
+            EGL_RENDERABLE_TYPE, EGL_OPENGL_BIT,
+            EGL_RED_SIZE, 8, EGL_GREEN_SIZE, 8, EGL_BLUE_SIZE, 8, EGL_ALPHA_SIZE, 8,
+            EGL_DEPTH_SIZE, 0, EGL_NONE
+        ]
+        configs = (_EGL.EGLConfig * 1)()
+        num_configs = _EGL.EGLint()
+        if not eglChooseConfig(display, config_attribs, configs, 1, num_configs) or num_configs.value == 0:
+            raise RuntimeError("eglChooseConfig() failed")
+        config = configs[0]
+        logger.debug(f"_init_egl: config chosen, num_configs={num_configs.value}")
+
+        if not eglBindAPI(EGL_OPENGL_API):
+            raise RuntimeError("eglBindAPI() failed")
+
+        logger.debug("_init_egl: calling eglCreateContext()")
+        context_attribs = [
+            _EGL.EGL_CONTEXT_MAJOR_VERSION, 3,
+            _EGL.EGL_CONTEXT_MINOR_VERSION, 3,
+            _EGL.EGL_CONTEXT_OPENGL_PROFILE_MASK, _EGL.EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT,
+            EGL_NONE
+        ]
+        context = eglCreateContext(display, config, EGL_NO_CONTEXT, context_attribs)
+        if context == EGL_NO_CONTEXT:
+            raise RuntimeError("eglCreateContext() failed")
+
+        logger.debug("_init_egl: calling eglCreatePbufferSurface()")
+        pbuffer_attribs = [EGL_WIDTH, 64, EGL_HEIGHT, 64, EGL_NONE]
+        surface = eglCreatePbufferSurface(display, config, pbuffer_attribs)
+        if surface == _EGL.EGL_NO_SURFACE:
+            raise RuntimeError("eglCreatePbufferSurface() failed")
+
+        logger.debug("_init_egl: calling eglMakeCurrent()")
+        if not eglMakeCurrent(display, surface, surface, context):
+            raise RuntimeError("eglMakeCurrent() failed")
+
+        logger.debug("_init_egl: completed successfully")
+        return display, context, surface, _EGL
+
+    except Exception:
+        logger.debug("_init_egl: failed, cleaning up")
+        # Clean up any resources on failure
+        if surface is not None:
+            eglDestroySurface(display, surface)
+        if context is not None:
+            eglDestroyContext(display, context)
+        if display is not None:
+            eglTerminate(display)
+        raise
+
+
+def _init_osmesa():
+    """Initialize OSMesa for software rendering. Returns (context, buffer). Raises RuntimeError on failure."""
+    import ctypes
+
+    logger.debug("_init_osmesa: starting")
+    os.environ["PYOPENGL_PLATFORM"] = "osmesa"
+
+    logger.debug("_init_osmesa: importing OpenGL.osmesa")
+    from OpenGL import GL as _gl
+    from OpenGL.osmesa import (
+        OSMesaCreateContextExt, OSMesaMakeCurrent, OSMesaDestroyContext,
+        OSMESA_RGBA,
+    )
+    logger.debug("_init_osmesa: imports completed")
+
+    ctx = OSMesaCreateContextExt(OSMESA_RGBA, 24, 0, 0, None)
+    if not ctx:
+        raise RuntimeError("OSMesaCreateContextExt() failed")
+
+    width, height = 64, 64
+    buffer = (ctypes.c_ubyte * (width * height * 4))()
+
+    logger.debug("_init_osmesa: calling OSMesaMakeCurrent()")
+    if not OSMesaMakeCurrent(ctx, buffer, _gl.GL_UNSIGNED_BYTE, width, height):
+        OSMesaDestroyContext(ctx)
+        raise RuntimeError("OSMesaMakeCurrent() failed")
+
+    logger.debug("_init_osmesa: completed successfully")
+    return ctx, buffer
+
+
 class GLContext:
-    """Manages an OpenGL ES 3.0 context via EGL/ANGLE (singleton)."""
+    """Manages OpenGL context and resources for shader execution.
+
+    Tries backends in order: GLFW (desktop) → EGL (headless GPU) → OSMesa (software).
+    """

    _instance = None
    _initialized = False
@@ -159,111 +325,131 @@ class GLContext:

    def __init__(self):
        if GLContext._initialized:
+            logger.debug("GLContext.__init__: already initialized, skipping")
            return

+        logger.debug("GLContext.__init__: starting initialization")
+
+        global glfw, EGL
+
        import time
        start = time.perf_counter()

-        self._display = None
-        self._surface = None
-        self._context = None
+        self._backend = None
+        self._window = None
+        self._egl_display = None
+        self._egl_context = None
+        self._egl_surface = None
+        self._osmesa_ctx = None
+        self._osmesa_buffer = None
        self._vao = None

+        # Try backends in order: GLFW → EGL → OSMesa
+        errors = []
+
+        logger.debug("GLContext.__init__: trying GLFW backend")
        try:
-            self._display = EGL.eglGetDisplay(EGL.EGL_DEFAULT_DISPLAY)
-            if not self._display:
-                raise RuntimeError("eglGetDisplay() returned no display")
+            self._window, glfw = _init_glfw()
+            self._backend = "glfw"
+            logger.debug("GLContext.__init__: GLFW backend succeeded")
+        except Exception as e:
+            logger.debug(f"GLContext.__init__: GLFW backend failed: {e}")
+            errors.append(("GLFW", e))

-            major, minor = ctypes.c_int32(0), ctypes.c_int32(0)
-            if not EGL.eglInitialize(self._display, ctypes.byref(major), ctypes.byref(minor)):
-                err = EGL.eglGetError()
-                self._display = None
-                raise RuntimeError(f"eglInitialize() failed (EGL error: 0x{err:04X})")
+        if self._backend is None:
+            logger.debug("GLContext.__init__: trying EGL backend")
+            try:
+                self._egl_display, self._egl_context, self._egl_surface, EGL = _init_egl()
+                self._backend = "egl"
+                logger.debug("GLContext.__init__: EGL backend succeeded")
+            except Exception as e:
+                logger.debug(f"GLContext.__init__: EGL backend failed: {e}")
+                errors.append(("EGL", e))

-            if not EGL.eglBindAPI(EGL.EGL_OPENGL_ES_API):
-                raise RuntimeError("eglBindAPI(EGL_OPENGL_ES_API) failed")
+        if self._backend is None:
+            logger.debug("GLContext.__init__: trying OSMesa backend")
+            try:
+                self._osmesa_ctx, self._osmesa_buffer = _init_osmesa()
+                self._backend = "osmesa"
+                logger.debug("GLContext.__init__: OSMesa backend succeeded")
+            except Exception as e:
+                logger.debug(f"GLContext.__init__: OSMesa backend failed: {e}")
+                errors.append(("OSMesa", e))

-            config = EGL.EGLConfig()
-            n_configs = ctypes.c_int32(0)
-            if not EGL.eglChooseConfig(
-                self._display,
-                _egl_attribs(
-                    EGL.EGL_RENDERABLE_TYPE, EGL.EGL_OPENGL_ES3_BIT,
-                    EGL.EGL_SURFACE_TYPE, EGL.EGL_PBUFFER_BIT,
-                    EGL.EGL_RED_SIZE, 8, EGL.EGL_GREEN_SIZE, 8,
-                    EGL.EGL_BLUE_SIZE, 8, EGL.EGL_ALPHA_SIZE, 8,
-                ),
-                ctypes.byref(config), 1, ctypes.byref(n_configs),
-            ) or n_configs.value == 0:
-                raise RuntimeError("eglChooseConfig() failed")
+        if self._backend is None:
+            if sys.platform == "win32":
+                platform_help = (
+                    "Windows: Ensure GPU drivers are installed and display is available.\n"
+                    "         CPU-only/headless mode is not supported on Windows."
+                )
+            elif sys.platform == "darwin":
+                platform_help = (
+                    "macOS: GLFW is not supported.\n"
+                    "  Install OSMesa via Homebrew: brew install mesa\n"
+                    "  Then: pip install PyOpenGL PyOpenGL-accelerate"
+                )
+            else:
+                platform_help = (
+                    "Linux: Install one of these backends:\n"
+                    "  Desktop:           sudo apt install libgl1-mesa-glx libglfw3\n"
+                    "  Headless with GPU: sudo apt install libegl1-mesa libgl1-mesa-dri\n"
+                    "  Headless (CPU):    sudo apt install libosmesa6"
+                )

-            self._surface = EGL.eglCreatePbufferSurface(
-                self._display, config,
-                _egl_attribs(EGL.EGL_WIDTH, 64, EGL.EGL_HEIGHT, 64),
+            error_details = "\n".join(f"  {name}: {err}" for name, err in errors)
+            raise RuntimeError(
+                f"Failed to create OpenGL context.\n\n"
+                f"Backend errors:\n{error_details}\n\n"
+                f"{platform_help}"
            )
-            if not self._surface:
-                raise RuntimeError("eglCreatePbufferSurface() failed")

-            self._context = EGL.eglCreateContext(
-                self._display, config, EGL.EGL_NO_CONTEXT,
-                _egl_attribs(EGL.EGL_CONTEXT_CLIENT_VERSION, 3),
-            )
-            if not self._context:
-                raise RuntimeError("eglCreateContext() failed")
+        # Now import OpenGL.GL (after context is current)
+        logger.debug("GLContext.__init__: importing OpenGL.GL")
+        _import_opengl()

-            if not EGL.eglMakeCurrent(self._display, self._surface, self._surface, self._context):
-                raise RuntimeError("eglMakeCurrent() failed")
-
-            self._vao = gl.glGenVertexArrays(1)
-            gl.glBindVertexArray(self._vao)
-
-        except Exception:
-            self._cleanup()
-            raise
+        # Create VAO (required for core profile, but OSMesa may use compat profile)
+        logger.debug("GLContext.__init__: creating VAO")
+        try:
+            vao = gl.glGenVertexArrays(1)
+            gl.glBindVertexArray(vao)
+            self._vao = vao  # Only store after successful bind
+            logger.debug("GLContext.__init__: VAO created successfully")
+        except Exception as e:
+            logger.debug(f"GLContext.__init__: VAO creation failed (may be expected for OSMesa): {e}")
+            # OSMesa with older Mesa may not support VAOs
+            # Clean up if we created but couldn't bind
+            if vao:
+                try:
+                    gl.glDeleteVertexArrays(1, [vao])
+                except Exception:
+                    pass

        elapsed = (time.perf_counter() - start) * 1000

-        renderer = _gl_str(gl.GL_RENDERER)
-        vendor = _gl_str(gl.GL_VENDOR)
-        version = _gl_str(gl.GL_VERSION)
+        # Log device info
+        renderer = gl.glGetString(gl.GL_RENDERER)
+        vendor = gl.glGetString(gl.GL_VENDOR)
+        version = gl.glGetString(gl.GL_VERSION)
+        renderer = renderer.decode() if renderer else "Unknown"
+        vendor = vendor.decode() if vendor else "Unknown"
+        version = version.decode() if version else "Unknown"

        GLContext._initialized = True
-        logger.info(f"GLSL context initialized in {elapsed:.1f}ms - {renderer} ({vendor}), GL {version}")
+        logger.info(f"GLSL context initialized in {elapsed:.1f}ms ({self._backend}) - {renderer} ({vendor}), GL {version}")

    def make_current(self):
-        EGL.eglMakeCurrent(self._display, self._surface, self._surface, self._context)
+        if self._backend == "glfw":
+            glfw.make_context_current(self._window)
+        elif self._backend == "egl":
+            from OpenGL.EGL import eglMakeCurrent
+            eglMakeCurrent(self._egl_display, self._egl_surface, self._egl_surface, self._egl_context)
+        elif self._backend == "osmesa":
+            from OpenGL.osmesa import OSMesaMakeCurrent
+            OSMesaMakeCurrent(self._osmesa_ctx, self._osmesa_buffer, gl.GL_UNSIGNED_BYTE, 64, 64)
+
        if self._vao is not None:
            gl.glBindVertexArray(self._vao)

-    def _cleanup(self):
-        if not self._display:
-            return
-        try:
-            if self._vao is not None:
-                gl.glDeleteVertexArrays(1, [self._vao])
-                self._vao = None
-        except Exception:
-            pass
-        try:
-            EGL.eglMakeCurrent(self._display, EGL.EGL_NO_SURFACE, EGL.EGL_NO_SURFACE, EGL.EGL_NO_CONTEXT)
-        except Exception:
-            pass
-        try:
-            if self._context:
-                EGL.eglDestroyContext(self._display, self._context)
-        except Exception:
-            pass
-        try:
-            if self._surface:
-                EGL.eglDestroySurface(self._display, self._surface)
-        except Exception:
-            pass
-        try:
-            EGL.eglTerminate(self._display)
-        except Exception:
-            pass
-        self._display = None
-

 def _compile_shader(source: str, shader_type: int) -> int:
    """Compile a shader and return its ID."""
@@ -271,10 +457,8 @@ def _compile_shader(source: str, shader_type: int) -> int:
    gl.glShaderSource(shader, source)
    gl.glCompileShader(shader)

-    if not gl.glGetShaderiv(shader, gl.GL_COMPILE_STATUS):
-        error = gl.glGetShaderInfoLog(shader)
-        if isinstance(error, bytes):
-            error = error.decode(errors="replace")
+    if gl.glGetShaderiv(shader, gl.GL_COMPILE_STATUS) != gl.GL_TRUE:
+        error = gl.glGetShaderInfoLog(shader).decode()
        gl.glDeleteShader(shader)
        raise RuntimeError(f"Shader compilation failed:\n{error}")

@@ -298,10 +482,8 @@ def _create_program(vertex_source: str, fragment_source: str) -> int:
    gl.glDeleteShader(vertex_shader)
    gl.glDeleteShader(fragment_shader)

-    if not gl.glGetProgramiv(program, gl.GL_LINK_STATUS):
-        error = gl.glGetProgramInfoLog(program)
-        if isinstance(error, bytes):
-            error = error.decode(errors="replace")
+    if gl.glGetProgramiv(program, gl.GL_LINK_STATUS) != gl.GL_TRUE:
+        error = gl.glGetProgramInfoLog(program).decode()
        gl.glDeleteProgram(program)
        raise RuntimeError(f"Program linking failed:\n{error}")

@@ -342,6 +524,9 @@ def _render_shader_batch(
    ctx = GLContext()
    ctx.make_current()

+    # Convert from GLSL ES to desktop GLSL 330
+    fragment_source = _convert_es_to_desktop(fragment_code)
+
    # Detect how many outputs the shader actually uses
    num_outputs = _detect_output_count(fragment_code)

@@ -361,9 +546,9 @@ def _render_shader_batch(
    try:
        # Compile shaders (once for all batches)
        try:
-            program = _create_program(VERTEX_SHADER, fragment_code)
+            program = _create_program(VERTEX_SHADER, fragment_source)
        except RuntimeError:
-            logger.error(f"Fragment shader:\n{fragment_code}")
+            logger.error(f"Fragment shader:\n{fragment_source}")
            raise

        gl.glUseProgram(program)
@@ -504,13 +689,13 @@ def _render_shader_batch(
                    gl.glDrawArrays(gl.GL_TRIANGLES, 0, 3)

            # Read back outputs for this batch
-            gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, fbo)
+            # (glGetTexImage is synchronous, implicitly waits for rendering)
            batch_outputs = []
-            for i in range(num_outputs):
-                gl.glReadBuffer(gl.GL_COLOR_ATTACHMENT0 + i)
-                buf = np.empty((height, width, 4), dtype=np.float32)
-                gl.glReadPixels(0, 0, width, height, gl.GL_RGBA, gl.GL_FLOAT, buf)
-                batch_outputs.append(buf[::-1, :, :].copy())
+            for tex in output_textures:
+                gl.glBindTexture(gl.GL_TEXTURE_2D, tex)
+                data = gl.glGetTexImage(gl.GL_TEXTURE_2D, 0, gl.GL_RGBA, gl.GL_FLOAT)
+                img = np.frombuffer(data, dtype=np.float32).reshape(height, width, 4)
+                batch_outputs.append(img[::-1, :, :].copy())

            # Pad with black images for unused outputs
            black_img = np.zeros((height, width, 4), dtype=np.float32)
@@ -531,16 +716,16 @@ def _render_shader_batch(
        gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, 0)
        gl.glUseProgram(0)

-        if input_textures:
-            gl.glDeleteTextures(len(input_textures), input_textures)
-        if output_textures:
-            gl.glDeleteTextures(len(output_textures), output_textures)
-        if ping_pong_textures:
-            gl.glDeleteTextures(len(ping_pong_textures), ping_pong_textures)
+        for tex in input_textures:
+            gl.glDeleteTextures(int(tex))
+        for tex in output_textures:
+            gl.glDeleteTextures(int(tex))
+        for tex in ping_pong_textures:
+            gl.glDeleteTextures(int(tex))
        if fbo is not None:
            gl.glDeleteFramebuffers(1, [fbo])
-        if ping_pong_fbos:
-            gl.glDeleteFramebuffers(len(ping_pong_fbos), ping_pong_fbos)
+        for pp_fbo in ping_pong_fbos:
+            gl.glDeleteFramebuffers(1, [pp_fbo])
        if program is not None:
            gl.glDeleteProgram(program)

--- a/comfy_extras/nodes_painter.py
+++ b/comfy_extras/nodes_painter.py
@@ -0,0 +1,127 @@
+from __future__ import annotations
+
+import hashlib
+import os
+
+import numpy as np
+import torch
+from PIL import Image
+
+import folder_paths
+import node_helpers
+from comfy_api.latest import ComfyExtension, io, UI
+from typing_extensions import override
+
+
+def hex_to_rgb(hex_color: str) -> tuple[float, float, float]:
+    hex_color = hex_color.lstrip("#")
+    if len(hex_color) != 6:
+        return (0.0, 0.0, 0.0)
+    r = int(hex_color[0:2], 16) / 255.0
+    g = int(hex_color[2:4], 16) / 255.0
+    b = int(hex_color[4:6], 16) / 255.0
+    return (r, g, b)
+
+
+class PainterNode(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="Painter",
+            display_name="Painter",
+            category="image",
+            inputs=[
+                io.Image.Input(
+                    "image",
+                    optional=True,
+                    tooltip="Optional base image to paint over",
+                ),
+                io.String.Input(
+                    "mask",
+                    default="",
+                    socketless=True,
+                    extra_dict={"widgetType": "PAINTER", "image_upload": True},
+                ),
+                io.Int.Input(
+                    "width",
+                    default=512,
+                    min=64,
+                    max=4096,
+                    step=64,
+                    socketless=True,
+                    extra_dict={"hidden": True},
+                ),
+                io.Int.Input(
+                    "height",
+                    default=512,
+                    min=64,
+                    max=4096,
+                    step=64,
+                    socketless=True,
+                    extra_dict={"hidden": True},
+                ),
+                io.Color.Input("bg_color", default="#000000"),
+            ],
+            outputs=[
+                io.Image.Output("IMAGE"),
+                io.Mask.Output("MASK"),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, mask, width, height, bg_color="#000000", image=None) -> io.NodeOutput:
+        if image is not None:
+            base_image = image[:1]
+            h, w = base_image.shape[1], base_image.shape[2]
+        else:
+            h, w = height, width
+            r, g, b = hex_to_rgb(bg_color)
+            base_image = torch.zeros((1, h, w, 3), dtype=torch.float32)
+            base_image[0, :, :, 0] = r
+            base_image[0, :, :, 1] = g
+            base_image[0, :, :, 2] = b
+
+        if mask and mask.strip():
+            mask_path = folder_paths.get_annotated_filepath(mask)
+            painter_img = node_helpers.pillow(Image.open, mask_path)
+            painter_img = painter_img.convert("RGBA")
+
+            if painter_img.size != (w, h):
+                painter_img = painter_img.resize((w, h), Image.LANCZOS)
+
+            painter_np = np.array(painter_img).astype(np.float32) / 255.0
+            painter_rgb = painter_np[:, :, :3]
+            painter_alpha = painter_np[:, :, 3:4]
+
+            mask_tensor = torch.from_numpy(painter_np[:, :, 3]).unsqueeze(0)
+
+            base_np = base_image[0].cpu().numpy()
+            composited = painter_rgb * painter_alpha + base_np * (1.0 - painter_alpha)
+            out_image = torch.from_numpy(composited).unsqueeze(0)
+        else:
+            mask_tensor = torch.zeros((1, h, w), dtype=torch.float32)
+            out_image = base_image
+
+        return io.NodeOutput(out_image, mask_tensor, ui=UI.PreviewImage(out_image))
+
+    @classmethod
+    def fingerprint_inputs(cls, mask, width, height, bg_color="#000000", image=None):
+        if mask and mask.strip():
+            mask_path = folder_paths.get_annotated_filepath(mask)
+            if os.path.exists(mask_path):
+                m = hashlib.sha256()
+                with open(mask_path, "rb") as f:
+                    m.update(f.read())
+                return m.digest().hex()
+        return ""
+
+
+
+class PainterExtension(ComfyExtension):
+    @override
+    async def get_node_list(self):
+        return [PainterNode]
+
+
+async def comfy_entrypoint():
+    return PainterExtension()
--- a/nodes.py
+++ b/nodes.py
@@ -2450,6 +2450,7 @@ async def init_builtin_extra_nodes():
        "nodes_nag.py",
        "nodes_sdpose.py",
        "nodes_math.py",
+        "nodes_painter.py",
    ]

    import_failed = []
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-comfyui-frontend-package==1.39.19
+comfyui-frontend-package==1.41.16
 comfyui-workflow-templates==0.9.18
 comfyui-embedded-docs==0.4.3
 torch
@@ -22,8 +22,8 @@ alembic
 SQLAlchemy
 filelock
 av>=14.2.0
-comfy-kitchen>=0.2.7
-comfy-aimdo>=0.2.9
+comfy-kitchen>=0.2.8
+comfy-aimdo>=0.2.10
 requests
 simpleeval>=1.0.0
 blake3
@@ -33,4 +33,5 @@ kornia>=0.7.1
 spandrel
 pydantic~=2.0
 pydantic-settings~=2.0
-PyOpenGL>=3.1.8
+PyOpenGL
+glfw
Author	SHA1	Message	Date
Jedrzej Kosinski	eb7be530e3	Merge branch 'master' into fix/gradient-stops-format	2026-03-12 09:55:42 -07:00
Terry Jia	73d9599495	add painter node (#12294 ) * add painter node * use io.Color * code improve --------- Co-authored-by: guill <jacob.e.segal@gmail.com>	2026-03-12 09:55:29 -07:00
guill	6c79a3cb68	Merge branch 'master' into fix/gradient-stops-format	2026-03-12 09:45:48 -07:00
comfyanonymous	44f1246c89	Support flux 2 klein kv cache model: Use the FluxKVCache node. (#12905 )	2026-03-12 11:30:50 -04:00
comfyanonymous	8f9ea49571	Bump comfy-kitchen version to 0.2.8 (#12895 )	2026-03-12 00:17:31 -04:00
Comfy Org PR Bot	9ce4c3dd87	Bump comfyui-frontend-package to 1.41.16 (#12894 ) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>	2026-03-11 18:16:30 -07:00
Comfy Org PR Bot	abc87d3669	Bump comfyui-frontend-package to 1.41.15 (#12891 ) --------- Co-authored-by: Alexander Brown <DrJKL0424@gmail.com>	2026-03-11 17:04:51 -04:00
comfyanonymous	f6274c06b4	Fix issue with batch_size > 1 on some models. (#12892 )	2026-03-11 16:37:31 -04:00
Adi Borochov	4f4f8659c2	fix: guard torch.AcceleratorError for compatibility with torch < 2.8.0 (#12874 ) * fix: guard torch.AcceleratorError for compatibility with torch < 2.8.0 torch.AcceleratorError was introduced in PyTorch 2.8.0. Accessing it directly raises AttributeError on older versions. Use a try/except fallback at module load time, consistent with the existing pattern used for OOM_EXCEPTION. * fix: address review feedback for AcceleratorError compat - Fall back to RuntimeError instead of type(None) for ACCELERATOR_ERROR, consistent with OOM_EXCEPTION fallback pattern and valid for except clauses - Add "out of memory" message introspection for RuntimeError fallback case - Use RuntimeError directly in discard_cuda_async_error except clause ---------	2026-03-11 10:04:13 -07:00
Alexander Piskun	3365008dfe	feat(api-nodes): add Reve Image nodes (#12848 )	2026-03-11 09:53:55 -07:00
rattus	980621da83	comfy-aimdo 0.2.10 (#12890 ) Comfy Aimdo 0.2.10 fixes the aimdo allocator hook for legacy cudaMalloc consumers. Some consumers of cudaMalloc assume implicit synchronization built in closed source logic inside cuda. This is preserved by passing through to cuda as-is and accouting after the fact as opposed to integrating these hooks with Aimdos VMA based allocator.	2026-03-11 08:49:38 -07:00
Terry Jia	ca597d2182	fix: use frontend-compatible format for Float gradient_stops	2026-03-05 10:29:54 -05:00