fix: prevent --cpu flag from allocating GPU memory

Two root causes fixed: 1. soft_empty_cache() and synchronize() in model_management.py lacked a cpu_state == CPUState.CPU guard. They fell through to torch.cuda calls that initialize a CUDA context (150-500MB VRAM) even in CPU-only mode. 2. comfy_kitchen is imported unconditionally at startup via quant_ops.py. The import chain triggers torch.cuda.is_available() -> cuInit, which initializes the CUDA driver. Now gated behind args.cpu check. Also adds missing QuantizedLayout and register_layout_op fallback stubs that were absent from the original ImportError handler. Amp-Thread-ID: https://ampcode.com/threads/T-019cbd03-433e-7601-93ff-3887227496b4
2026-03-14 01:29:58 +00:00 · 2026-03-05 12:32:46 -08:00
10 changed files with 59 additions and 151 deletions
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -1666,16 +1666,12 @@ def lora_compute_dtype(device):
    return dtype
 def synchronize():
    if cpu_mode():
        return
    if is_intel_xpu():
        torch.xpu.synchronize()
    elif torch.cuda.is_available():
        torch.cuda.synchronize()
 def soft_empty_cache(force=False):
    if cpu_mode():
        return
    global cpu_state
    if cpu_state == CPUState.MPS:
        torch.mps.empty_cache()
--- a/comfy/ops.py
+++ b/comfy/ops.py
@@ -80,21 +80,6 @@ def cast_to_input(weight, input, non_blocking=False, copy=True):
 def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compute_dtype, want_requant):
    #vbar doesn't support CPU weights, but some custom nodes have weird paths
    #that might switch the layer to the CPU and expect it to work. We have to take
    #a clone conservatively as we are mmapped and some SFT files are packed misaligned
    #If you are a custom node author reading this, please move your layer to the GPU
    #or declare your ModelPatcher as CPU in the first place.
    if comfy.model_management.is_device_cpu(device):
        weight = s.weight.to(dtype=dtype, copy=True)
        if isinstance(weight, QuantizedTensor):
            weight = weight.dequantize()
        bias = None
        if s.bias is not None:
            bias = s.bias.to(dtype=bias_dtype, copy=True)
        return weight, bias, (None, None, None)
    offload_stream = None
    xfer_dest = None
@@ -675,29 +660,23 @@ class fp8_ops(manual_cast):
 CUBLAS_IS_AVAILABLE = False
 try:
-    from cublas_ops import CublasLinear, cublas_half_matmul
+    from cublas_ops import CublasLinear
    CUBLAS_IS_AVAILABLE = True
 except ImportError:
    pass
 if CUBLAS_IS_AVAILABLE:
-    class cublas_ops(manual_cast):
+    class cublas_ops(disable_weight_init):
-        class Linear(CublasLinear, manual_cast.Linear):
+        class Linear(CublasLinear, disable_weight_init.Linear):
            def reset_parameters(self):
                return None
            def forward_comfy_cast_weights(self, input):
-                weight, bias, offload_stream = cast_bias_weight(self, input, offloadable=True)
+                return super().forward(input)
                x = cublas_half_matmul(input, weight, bias, self._epilogue_str, self.has_bias)
                uncast_bias_weight(self, weight, bias, offload_stream)
                return x
            def forward(self, *args, **kwargs):
-                run_every_op()
+                return super().forward(*args, **kwargs)
-                if self.comfy_cast_weights or len(self.weight_function) > 0 or len(self.bias_function) > 0:
+
                    return self.forward_comfy_cast_weights(*args, **kwargs)
                else:
                    return super().forward(*args, **kwargs)
 # ==============================================================================
 # Mixed Precision Operations
--- a/comfy/quant_ops.py
+++ b/comfy/quant_ops.py
@@ -1,33 +1,38 @@
 import torch
 import logging
 from comfy.cli_args import args
-try:
+if args.cpu:
    import comfy_kitchen as ck
    from comfy_kitchen.tensor import (
        QuantizedTensor,
        QuantizedLayout,
        TensorCoreFP8Layout as _CKFp8Layout,
        TensorCoreNVFP4Layout as _CKNvfp4Layout,
        register_layout_op,
        register_layout_class,
        get_layout_class,
    )
    _CK_AVAILABLE = True
    if torch.version.cuda is None:
        ck.registry.disable("cuda")
    else:
        cuda_version = tuple(map(int, str(torch.version.cuda).split('.')))
        if cuda_version < (13,):
            ck.registry.disable("cuda")
            logging.warning("WARNING: You need pytorch with cu130 or higher to use optimized CUDA operations.")
    ck.registry.disable("triton")
    for k, v in ck.list_backends().items():
        logging.info(f"Found comfy_kitchen backend {k}: {v}")
 except ImportError as e:
    logging.error(f"Failed to import comfy_kitchen, Error: {e}, fp8 and fp4 support will not be available.")
    _CK_AVAILABLE = False
 else:
    try:
        import comfy_kitchen as ck
        from comfy_kitchen.tensor import (
            QuantizedTensor,
            QuantizedLayout,
            TensorCoreFP8Layout as _CKFp8Layout,
            TensorCoreNVFP4Layout as _CKNvfp4Layout,
            register_layout_op,
            register_layout_class,
            get_layout_class,
        )
        _CK_AVAILABLE = True
        if torch.version.cuda is None:
            ck.registry.disable("cuda")
        else:
            cuda_version = tuple(map(int, str(torch.version.cuda).split('.')))
            if cuda_version < (13,):
                ck.registry.disable("cuda")
                logging.warning("WARNING: You need pytorch with cu130 or higher to use optimized CUDA operations.")
        ck.registry.disable("triton")
        for k, v in ck.list_backends().items():
            logging.info(f"Found comfy_kitchen backend {k}: {v}")
    except ImportError as e:
        logging.error(f"Failed to import comfy_kitchen, Error: {e}, fp8 and fp4 support will not be available.")
        _CK_AVAILABLE = False
 if not _CK_AVAILABLE:
    class QuantizedTensor:
        pass
--- a/comfy_api_nodes/apis/grok.py
+++ b/comfy_api_nodes/apis/grok.py
@@ -7,8 +7,7 @@ class ImageGenerationRequest(BaseModel):
    aspect_ratio: str = Field(...)
    n: int = Field(...)
    seed: int = Field(...)
-    response_format: str = Field("url")
+    response_for: str = Field("url")
    resolution: str = Field(...)
 class InputUrlObject(BaseModel):
@@ -17,13 +16,12 @@ class InputUrlObject(BaseModel):
 class ImageEditRequest(BaseModel):
    model: str = Field(...)
-    images: list[InputUrlObject] = Field(...)
+    image: InputUrlObject = Field(...)
    prompt: str = Field(...)
    resolution: str = Field(...)
    n: int = Field(...)
    seed: int = Field(...)
-    response_format: str = Field("url")
+    response_for: str = Field("url")
    aspect_ratio: str | None = Field(...)
 class VideoGenerationRequest(BaseModel):
@@ -49,13 +47,8 @@ class ImageResponseObject(BaseModel):
    revised_prompt: str | None = Field(None)
 class UsageObject(BaseModel):
    cost_in_usd_ticks: int | None = Field(None)
 class ImageGenerationResponse(BaseModel):
    data: list[ImageResponseObject] = Field(...)
    usage: UsageObject | None = Field(None)
 class VideoGenerationResponse(BaseModel):
@@ -72,4 +65,3 @@ class VideoStatusResponse(BaseModel):
    status: str | None = Field(None)
    video: VideoResponseObject | None = Field(None)
    model: str | None = Field(None)
    usage: UsageObject | None = Field(None)
--- a/comfy_api_nodes/apis/kling.py
+++ b/comfy_api_nodes/apis/kling.py
@@ -148,4 +148,3 @@ class MotionControlRequest(BaseModel):
    keep_original_sound: str = Field(...)
    character_orientation: str = Field(...)
    mode: str = Field(..., description="'pro' or 'std'")
    model_name: str = Field(...)
--- a/comfy_api_nodes/nodes_grok.py
+++ b/comfy_api_nodes/nodes_grok.py
@@ -27,12 +27,6 @@ from comfy_api_nodes.util import (
 )
 def _extract_grok_price(response) -> float | None:
    if response.usage and response.usage.cost_in_usd_ticks is not None:
        return response.usage.cost_in_usd_ticks / 10_000_000_000
    return None
 class GrokImageNode(IO.ComfyNode):
    @classmethod
@@ -43,10 +37,7 @@ class GrokImageNode(IO.ComfyNode):
            category="api node/image/Grok",
            description="Generate images using Grok based on a text prompt",
            inputs=[
-                IO.Combo.Input(
+                IO.Combo.Input("model", options=["grok-imagine-image-beta"]),
                    "model",
                    options=["grok-imagine-image-pro", "grok-imagine-image", "grok-imagine-image-beta"],
                ),
                IO.String.Input(
                    "prompt",
                    multiline=True,
@@ -90,7 +81,6 @@ class GrokImageNode(IO.ComfyNode):
                    tooltip="Seed to determine if node should re-run; "
                    "actual results are nondeterministic regardless of seed.",
                ),
                IO.Combo.Input("resolution", options=["1K", "2K"], optional=True),
            ],
            outputs=[
                IO.Image.Output(),
@@ -102,13 +92,8 @@ class GrokImageNode(IO.ComfyNode):
            ],
            is_api_node=True,
            price_badge=IO.PriceBadge(
-                depends_on=IO.PriceBadgeDepends(widgets=["model", "number_of_images"]),
+                depends_on=IO.PriceBadgeDepends(widgets=["number_of_images"]),
-                expr="""
+                expr="""{"type":"usd","usd":0.033 * widgets.number_of_images}""",
                (
                  $rate := $contains(widgets.model, "pro") ? 0.07 : 0.02;
                  {"type":"usd","usd": $rate * widgets.number_of_images}
                )
                """,
            ),
        )
@@ -120,7 +105,6 @@ class GrokImageNode(IO.ComfyNode):
        aspect_ratio: str,
        number_of_images: int,
        seed: int,
        resolution: str = "1K",
    ) -> IO.NodeOutput:
        validate_string(prompt, strip_whitespace=True, min_length=1)
        response = await sync_op(
@@ -132,10 +116,8 @@ class GrokImageNode(IO.ComfyNode):
                aspect_ratio=aspect_ratio,
                n=number_of_images,
                seed=seed,
                resolution=resolution.lower(),
            ),
            response_model=ImageGenerationResponse,
            price_extractor=_extract_grok_price,
        )
        if len(response.data) == 1:
            return IO.NodeOutput(await download_url_to_image_tensor(response.data[0].url))
@@ -156,17 +138,14 @@ class GrokImageEditNode(IO.ComfyNode):
            category="api node/image/Grok",
            description="Modify an existing image based on a text prompt",
            inputs=[
-                IO.Combo.Input(
+                IO.Combo.Input("model", options=["grok-imagine-image-beta"]),
-                    "model",
+                IO.Image.Input("image"),
                    options=["grok-imagine-image-pro", "grok-imagine-image", "grok-imagine-image-beta"],
                ),
                IO.Image.Input("image", display_name="images"),
                IO.String.Input(
                    "prompt",
                    multiline=True,
                    tooltip="The text prompt used to generate the image",
                ),
-                IO.Combo.Input("resolution", options=["1K", "2K"]),
+                IO.Combo.Input("resolution", options=["1K"]),
                IO.Int.Input(
                    "number_of_images",
                    default=1,
@@ -187,27 +166,6 @@ class GrokImageEditNode(IO.ComfyNode):
                    tooltip="Seed to determine if node should re-run; "
                    "actual results are nondeterministic regardless of seed.",
                ),
                IO.Combo.Input(
                    "aspect_ratio",
                    options=[
                        "auto",
                        "1:1",
                        "2:3",
                        "3:2",
                        "3:4",
                        "4:3",
                        "9:16",
                        "16:9",
                        "9:19.5",
                        "19.5:9",
                        "9:20",
                        "20:9",
                        "1:2",
                        "2:1",
                    ],
                    optional=True,
                    tooltip="Only allowed when multiple images are connected to the image input.",
                ),
            ],
            outputs=[
                IO.Image.Output(),
@@ -219,13 +177,8 @@ class GrokImageEditNode(IO.ComfyNode):
            ],
            is_api_node=True,
            price_badge=IO.PriceBadge(
-                depends_on=IO.PriceBadgeDepends(widgets=["model", "number_of_images"]),
+                depends_on=IO.PriceBadgeDepends(widgets=["number_of_images"]),
-                expr="""
+                expr="""{"type":"usd","usd":0.002 + 0.033 * widgets.number_of_images}""",
                (
                  $rate := $contains(widgets.model, "pro") ? 0.07 : 0.02;
                  {"type":"usd","usd": 0.002 + $rate * widgets.number_of_images}
                )
                """,
            ),
        )
@@ -238,32 +191,22 @@ class GrokImageEditNode(IO.ComfyNode):
        resolution: str,
        number_of_images: int,
        seed: int,
        aspect_ratio: str = "auto",
    ) -> IO.NodeOutput:
        validate_string(prompt, strip_whitespace=True, min_length=1)
-        if model == "grok-imagine-image-pro":
+        if get_number_of_images(image) != 1:
-            if get_number_of_images(image) > 1:
+            raise ValueError("Only one input image is supported.")
                raise ValueError("The pro model supports only 1 input image.")
        elif get_number_of_images(image) > 3:
            raise ValueError("A maximum of 3 input images is supported.")
        if aspect_ratio != "auto" and get_number_of_images(image) == 1:
            raise ValueError(
                "Custom aspect ratio is only allowed when multiple images are connected to the image input."
            )
        response = await sync_op(
            cls,
            ApiEndpoint(path="/proxy/xai/v1/images/edits", method="POST"),
            data=ImageEditRequest(
                model=model,
-                images=[InputUrlObject(url=f"data:image/png;base64,{tensor_to_base64_string(i)}") for i in image],
+                image=InputUrlObject(url=f"data:image/png;base64,{tensor_to_base64_string(image)}"),
                prompt=prompt,
                resolution=resolution.lower(),
                n=number_of_images,
                seed=seed,
                aspect_ratio=None if aspect_ratio == "auto" else aspect_ratio,
            ),
            response_model=ImageGenerationResponse,
            price_extractor=_extract_grok_price,
        )
        if len(response.data) == 1:
            return IO.NodeOutput(await download_url_to_image_tensor(response.data[0].url))
@@ -284,7 +227,7 @@ class GrokVideoNode(IO.ComfyNode):
            category="api node/video/Grok",
            description="Generate video from a prompt or an image",
            inputs=[
-                IO.Combo.Input("model", options=["grok-imagine-video", "grok-imagine-video-beta"]),
+                IO.Combo.Input("model", options=["grok-imagine-video-beta"]),
                IO.String.Input(
                    "prompt",
                    multiline=True,
@@ -332,11 +275,10 @@ class GrokVideoNode(IO.ComfyNode):
            ],
            is_api_node=True,
            price_badge=IO.PriceBadge(
-                depends_on=IO.PriceBadgeDepends(widgets=["duration", "resolution"], inputs=["image"]),
+                depends_on=IO.PriceBadgeDepends(widgets=["duration"], inputs=["image"]),
                expr="""
                (
-                  $rate := widgets.resolution = "720p" ? 0.07 : 0.05;
+                  $base := 0.181 * widgets.duration;
                  $base := $rate * widgets.duration;
                  {"type":"usd","usd": inputs.image.connected ? $base + 0.002 : $base}
                )
                """,
@@ -379,7 +321,6 @@ class GrokVideoNode(IO.ComfyNode):
            ApiEndpoint(path=f"/proxy/xai/v1/videos/{initial_response.request_id}"),
            status_extractor=lambda r: r.status if r.status is not None else "complete",
            response_model=VideoStatusResponse,
            price_extractor=_extract_grok_price,
        )
        return IO.NodeOutput(await download_url_to_video_output(response.video.url))
@@ -394,7 +335,7 @@ class GrokVideoEditNode(IO.ComfyNode):
            category="api node/video/Grok",
            description="Edit an existing video based on a text prompt.",
            inputs=[
-                IO.Combo.Input("model", options=["grok-imagine-video", "grok-imagine-video-beta"]),
+                IO.Combo.Input("model", options=["grok-imagine-video-beta"]),
                IO.String.Input(
                    "prompt",
                    multiline=True,
@@ -423,7 +364,7 @@ class GrokVideoEditNode(IO.ComfyNode):
            ],
            is_api_node=True,
            price_badge=IO.PriceBadge(
-                expr="""{"type":"usd","usd": 0.06, "format": {"suffix": "/sec", "approximate": true}}""",
+                expr="""{"type":"usd","usd": 0.191, "format": {"suffix": "/sec", "approximate": true}}""",
            ),
        )
@@ -457,7 +398,6 @@ class GrokVideoEditNode(IO.ComfyNode):
            ApiEndpoint(path=f"/proxy/xai/v1/videos/{initial_response.request_id}"),
            status_extractor=lambda r: r.status if r.status is not None else "complete",
            response_model=VideoStatusResponse,
            price_extractor=_extract_grok_price,
        )
        return IO.NodeOutput(await download_url_to_video_output(response.video.url))
--- a/comfy_api_nodes/nodes_kling.py
+++ b/comfy_api_nodes/nodes_kling.py
@@ -2747,7 +2747,6 @@ class MotionControl(IO.ComfyNode):
                    "but the character orientation matches the reference image (camera/other details via prompt).",
                ),
                IO.Combo.Input("mode", options=["pro", "std"]),
                IO.Combo.Input("model", options=["kling-v3", "kling-v2-6"], optional=True),
            ],
            outputs=[
                IO.Video.Output(),
@@ -2778,7 +2777,6 @@ class MotionControl(IO.ComfyNode):
        keep_original_sound: bool,
        character_orientation: str,
        mode: str,
        model: str = "kling-v2-6",
    ) -> IO.NodeOutput:
        validate_string(prompt, max_length=2500)
        validate_image_dimensions(reference_image, min_width=340, min_height=340)
@@ -2799,7 +2797,6 @@ class MotionControl(IO.ComfyNode):
                keep_original_sound="yes" if keep_original_sound else "no",
                character_orientation=character_orientation,
                mode=mode,
                model_name=model,
            ),
        )
        if response.code:
--- a/comfyui_version.py
+++ b/comfyui_version.py
@@ -1,3 +1,3 @@
 # This file is automatically generated by the build process when version is
 # updated in pyproject.toml.
-__version__ = "0.16.2"
+__version__ = "0.15.1"
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "ComfyUI"
-version = "0.16.2"
+version = "0.15.1"
 readme = "README.md"
 license = { file = "LICENSE" }
 requires-python = ">=3.10"
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 comfyui-frontend-package==1.39.19
-comfyui-workflow-templates==0.9.10
+comfyui-workflow-templates==0.9.5
 comfyui-embedded-docs==0.4.3
 torch
 torchsde
@@ -22,7 +22,7 @@ alembic
 SQLAlchemy
 av>=14.2.0
 comfy-kitchen>=0.2.7
-comfy-aimdo>=0.2.7
+comfy-aimdo>=0.2.6
 requests
 #non essential dependencies: