Compare commits

..

1 Commits

Author SHA1 Message Date
bymyself
cffeebfa01 fix: prevent --cpu flag from allocating GPU memory
Two root causes fixed:

1. soft_empty_cache() and synchronize() in model_management.py lacked a
   cpu_state == CPUState.CPU guard. They fell through to torch.cuda calls
   that initialize a CUDA context (150-500MB VRAM) even in CPU-only mode.

2. comfy_kitchen is imported unconditionally at startup via quant_ops.py.
   The import chain triggers torch.cuda.is_available() -> cuInit, which
   initializes the CUDA driver. Now gated behind args.cpu check.

Also adds missing QuantizedLayout and register_layout_op fallback stubs
that were absent from the original ImportError handler.

Amp-Thread-ID: https://ampcode.com/threads/T-019cbd03-433e-7601-93ff-3887227496b4
2026-03-05 12:32:46 -08:00
10 changed files with 59 additions and 151 deletions

View File

@@ -1666,16 +1666,12 @@ def lora_compute_dtype(device):
return dtype return dtype
def synchronize(): def synchronize():
if cpu_mode():
return
if is_intel_xpu(): if is_intel_xpu():
torch.xpu.synchronize() torch.xpu.synchronize()
elif torch.cuda.is_available(): elif torch.cuda.is_available():
torch.cuda.synchronize() torch.cuda.synchronize()
def soft_empty_cache(force=False): def soft_empty_cache(force=False):
if cpu_mode():
return
global cpu_state global cpu_state
if cpu_state == CPUState.MPS: if cpu_state == CPUState.MPS:
torch.mps.empty_cache() torch.mps.empty_cache()

View File

@@ -80,21 +80,6 @@ def cast_to_input(weight, input, non_blocking=False, copy=True):
def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compute_dtype, want_requant): def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compute_dtype, want_requant):
#vbar doesn't support CPU weights, but some custom nodes have weird paths
#that might switch the layer to the CPU and expect it to work. We have to take
#a clone conservatively as we are mmapped and some SFT files are packed misaligned
#If you are a custom node author reading this, please move your layer to the GPU
#or declare your ModelPatcher as CPU in the first place.
if comfy.model_management.is_device_cpu(device):
weight = s.weight.to(dtype=dtype, copy=True)
if isinstance(weight, QuantizedTensor):
weight = weight.dequantize()
bias = None
if s.bias is not None:
bias = s.bias.to(dtype=bias_dtype, copy=True)
return weight, bias, (None, None, None)
offload_stream = None offload_stream = None
xfer_dest = None xfer_dest = None
@@ -675,29 +660,23 @@ class fp8_ops(manual_cast):
CUBLAS_IS_AVAILABLE = False CUBLAS_IS_AVAILABLE = False
try: try:
from cublas_ops import CublasLinear, cublas_half_matmul from cublas_ops import CublasLinear
CUBLAS_IS_AVAILABLE = True CUBLAS_IS_AVAILABLE = True
except ImportError: except ImportError:
pass pass
if CUBLAS_IS_AVAILABLE: if CUBLAS_IS_AVAILABLE:
class cublas_ops(manual_cast): class cublas_ops(disable_weight_init):
class Linear(CublasLinear, manual_cast.Linear): class Linear(CublasLinear, disable_weight_init.Linear):
def reset_parameters(self): def reset_parameters(self):
return None return None
def forward_comfy_cast_weights(self, input): def forward_comfy_cast_weights(self, input):
weight, bias, offload_stream = cast_bias_weight(self, input, offloadable=True) return super().forward(input)
x = cublas_half_matmul(input, weight, bias, self._epilogue_str, self.has_bias)
uncast_bias_weight(self, weight, bias, offload_stream)
return x
def forward(self, *args, **kwargs): def forward(self, *args, **kwargs):
run_every_op() return super().forward(*args, **kwargs)
if self.comfy_cast_weights or len(self.weight_function) > 0 or len(self.bias_function) > 0:
return self.forward_comfy_cast_weights(*args, **kwargs)
else:
return super().forward(*args, **kwargs)
# ============================================================================== # ==============================================================================
# Mixed Precision Operations # Mixed Precision Operations

View File

@@ -1,33 +1,38 @@
import torch import torch
import logging import logging
from comfy.cli_args import args
try: if args.cpu:
import comfy_kitchen as ck
from comfy_kitchen.tensor import (
QuantizedTensor,
QuantizedLayout,
TensorCoreFP8Layout as _CKFp8Layout,
TensorCoreNVFP4Layout as _CKNvfp4Layout,
register_layout_op,
register_layout_class,
get_layout_class,
)
_CK_AVAILABLE = True
if torch.version.cuda is None:
ck.registry.disable("cuda")
else:
cuda_version = tuple(map(int, str(torch.version.cuda).split('.')))
if cuda_version < (13,):
ck.registry.disable("cuda")
logging.warning("WARNING: You need pytorch with cu130 or higher to use optimized CUDA operations.")
ck.registry.disable("triton")
for k, v in ck.list_backends().items():
logging.info(f"Found comfy_kitchen backend {k}: {v}")
except ImportError as e:
logging.error(f"Failed to import comfy_kitchen, Error: {e}, fp8 and fp4 support will not be available.")
_CK_AVAILABLE = False _CK_AVAILABLE = False
else:
try:
import comfy_kitchen as ck
from comfy_kitchen.tensor import (
QuantizedTensor,
QuantizedLayout,
TensorCoreFP8Layout as _CKFp8Layout,
TensorCoreNVFP4Layout as _CKNvfp4Layout,
register_layout_op,
register_layout_class,
get_layout_class,
)
_CK_AVAILABLE = True
if torch.version.cuda is None:
ck.registry.disable("cuda")
else:
cuda_version = tuple(map(int, str(torch.version.cuda).split('.')))
if cuda_version < (13,):
ck.registry.disable("cuda")
logging.warning("WARNING: You need pytorch with cu130 or higher to use optimized CUDA operations.")
ck.registry.disable("triton")
for k, v in ck.list_backends().items():
logging.info(f"Found comfy_kitchen backend {k}: {v}")
except ImportError as e:
logging.error(f"Failed to import comfy_kitchen, Error: {e}, fp8 and fp4 support will not be available.")
_CK_AVAILABLE = False
if not _CK_AVAILABLE:
class QuantizedTensor: class QuantizedTensor:
pass pass

View File

@@ -7,8 +7,7 @@ class ImageGenerationRequest(BaseModel):
aspect_ratio: str = Field(...) aspect_ratio: str = Field(...)
n: int = Field(...) n: int = Field(...)
seed: int = Field(...) seed: int = Field(...)
response_format: str = Field("url") response_for: str = Field("url")
resolution: str = Field(...)
class InputUrlObject(BaseModel): class InputUrlObject(BaseModel):
@@ -17,13 +16,12 @@ class InputUrlObject(BaseModel):
class ImageEditRequest(BaseModel): class ImageEditRequest(BaseModel):
model: str = Field(...) model: str = Field(...)
images: list[InputUrlObject] = Field(...) image: InputUrlObject = Field(...)
prompt: str = Field(...) prompt: str = Field(...)
resolution: str = Field(...) resolution: str = Field(...)
n: int = Field(...) n: int = Field(...)
seed: int = Field(...) seed: int = Field(...)
response_format: str = Field("url") response_for: str = Field("url")
aspect_ratio: str | None = Field(...)
class VideoGenerationRequest(BaseModel): class VideoGenerationRequest(BaseModel):
@@ -49,13 +47,8 @@ class ImageResponseObject(BaseModel):
revised_prompt: str | None = Field(None) revised_prompt: str | None = Field(None)
class UsageObject(BaseModel):
cost_in_usd_ticks: int | None = Field(None)
class ImageGenerationResponse(BaseModel): class ImageGenerationResponse(BaseModel):
data: list[ImageResponseObject] = Field(...) data: list[ImageResponseObject] = Field(...)
usage: UsageObject | None = Field(None)
class VideoGenerationResponse(BaseModel): class VideoGenerationResponse(BaseModel):
@@ -72,4 +65,3 @@ class VideoStatusResponse(BaseModel):
status: str | None = Field(None) status: str | None = Field(None)
video: VideoResponseObject | None = Field(None) video: VideoResponseObject | None = Field(None)
model: str | None = Field(None) model: str | None = Field(None)
usage: UsageObject | None = Field(None)

View File

@@ -148,4 +148,3 @@ class MotionControlRequest(BaseModel):
keep_original_sound: str = Field(...) keep_original_sound: str = Field(...)
character_orientation: str = Field(...) character_orientation: str = Field(...)
mode: str = Field(..., description="'pro' or 'std'") mode: str = Field(..., description="'pro' or 'std'")
model_name: str = Field(...)

View File

@@ -27,12 +27,6 @@ from comfy_api_nodes.util import (
) )
def _extract_grok_price(response) -> float | None:
if response.usage and response.usage.cost_in_usd_ticks is not None:
return response.usage.cost_in_usd_ticks / 10_000_000_000
return None
class GrokImageNode(IO.ComfyNode): class GrokImageNode(IO.ComfyNode):
@classmethod @classmethod
@@ -43,10 +37,7 @@ class GrokImageNode(IO.ComfyNode):
category="api node/image/Grok", category="api node/image/Grok",
description="Generate images using Grok based on a text prompt", description="Generate images using Grok based on a text prompt",
inputs=[ inputs=[
IO.Combo.Input( IO.Combo.Input("model", options=["grok-imagine-image-beta"]),
"model",
options=["grok-imagine-image-pro", "grok-imagine-image", "grok-imagine-image-beta"],
),
IO.String.Input( IO.String.Input(
"prompt", "prompt",
multiline=True, multiline=True,
@@ -90,7 +81,6 @@ class GrokImageNode(IO.ComfyNode):
tooltip="Seed to determine if node should re-run; " tooltip="Seed to determine if node should re-run; "
"actual results are nondeterministic regardless of seed.", "actual results are nondeterministic regardless of seed.",
), ),
IO.Combo.Input("resolution", options=["1K", "2K"], optional=True),
], ],
outputs=[ outputs=[
IO.Image.Output(), IO.Image.Output(),
@@ -102,13 +92,8 @@ class GrokImageNode(IO.ComfyNode):
], ],
is_api_node=True, is_api_node=True,
price_badge=IO.PriceBadge( price_badge=IO.PriceBadge(
depends_on=IO.PriceBadgeDepends(widgets=["model", "number_of_images"]), depends_on=IO.PriceBadgeDepends(widgets=["number_of_images"]),
expr=""" expr="""{"type":"usd","usd":0.033 * widgets.number_of_images}""",
(
$rate := $contains(widgets.model, "pro") ? 0.07 : 0.02;
{"type":"usd","usd": $rate * widgets.number_of_images}
)
""",
), ),
) )
@@ -120,7 +105,6 @@ class GrokImageNode(IO.ComfyNode):
aspect_ratio: str, aspect_ratio: str,
number_of_images: int, number_of_images: int,
seed: int, seed: int,
resolution: str = "1K",
) -> IO.NodeOutput: ) -> IO.NodeOutput:
validate_string(prompt, strip_whitespace=True, min_length=1) validate_string(prompt, strip_whitespace=True, min_length=1)
response = await sync_op( response = await sync_op(
@@ -132,10 +116,8 @@ class GrokImageNode(IO.ComfyNode):
aspect_ratio=aspect_ratio, aspect_ratio=aspect_ratio,
n=number_of_images, n=number_of_images,
seed=seed, seed=seed,
resolution=resolution.lower(),
), ),
response_model=ImageGenerationResponse, response_model=ImageGenerationResponse,
price_extractor=_extract_grok_price,
) )
if len(response.data) == 1: if len(response.data) == 1:
return IO.NodeOutput(await download_url_to_image_tensor(response.data[0].url)) return IO.NodeOutput(await download_url_to_image_tensor(response.data[0].url))
@@ -156,17 +138,14 @@ class GrokImageEditNode(IO.ComfyNode):
category="api node/image/Grok", category="api node/image/Grok",
description="Modify an existing image based on a text prompt", description="Modify an existing image based on a text prompt",
inputs=[ inputs=[
IO.Combo.Input( IO.Combo.Input("model", options=["grok-imagine-image-beta"]),
"model", IO.Image.Input("image"),
options=["grok-imagine-image-pro", "grok-imagine-image", "grok-imagine-image-beta"],
),
IO.Image.Input("image", display_name="images"),
IO.String.Input( IO.String.Input(
"prompt", "prompt",
multiline=True, multiline=True,
tooltip="The text prompt used to generate the image", tooltip="The text prompt used to generate the image",
), ),
IO.Combo.Input("resolution", options=["1K", "2K"]), IO.Combo.Input("resolution", options=["1K"]),
IO.Int.Input( IO.Int.Input(
"number_of_images", "number_of_images",
default=1, default=1,
@@ -187,27 +166,6 @@ class GrokImageEditNode(IO.ComfyNode):
tooltip="Seed to determine if node should re-run; " tooltip="Seed to determine if node should re-run; "
"actual results are nondeterministic regardless of seed.", "actual results are nondeterministic regardless of seed.",
), ),
IO.Combo.Input(
"aspect_ratio",
options=[
"auto",
"1:1",
"2:3",
"3:2",
"3:4",
"4:3",
"9:16",
"16:9",
"9:19.5",
"19.5:9",
"9:20",
"20:9",
"1:2",
"2:1",
],
optional=True,
tooltip="Only allowed when multiple images are connected to the image input.",
),
], ],
outputs=[ outputs=[
IO.Image.Output(), IO.Image.Output(),
@@ -219,13 +177,8 @@ class GrokImageEditNode(IO.ComfyNode):
], ],
is_api_node=True, is_api_node=True,
price_badge=IO.PriceBadge( price_badge=IO.PriceBadge(
depends_on=IO.PriceBadgeDepends(widgets=["model", "number_of_images"]), depends_on=IO.PriceBadgeDepends(widgets=["number_of_images"]),
expr=""" expr="""{"type":"usd","usd":0.002 + 0.033 * widgets.number_of_images}""",
(
$rate := $contains(widgets.model, "pro") ? 0.07 : 0.02;
{"type":"usd","usd": 0.002 + $rate * widgets.number_of_images}
)
""",
), ),
) )
@@ -238,32 +191,22 @@ class GrokImageEditNode(IO.ComfyNode):
resolution: str, resolution: str,
number_of_images: int, number_of_images: int,
seed: int, seed: int,
aspect_ratio: str = "auto",
) -> IO.NodeOutput: ) -> IO.NodeOutput:
validate_string(prompt, strip_whitespace=True, min_length=1) validate_string(prompt, strip_whitespace=True, min_length=1)
if model == "grok-imagine-image-pro": if get_number_of_images(image) != 1:
if get_number_of_images(image) > 1: raise ValueError("Only one input image is supported.")
raise ValueError("The pro model supports only 1 input image.")
elif get_number_of_images(image) > 3:
raise ValueError("A maximum of 3 input images is supported.")
if aspect_ratio != "auto" and get_number_of_images(image) == 1:
raise ValueError(
"Custom aspect ratio is only allowed when multiple images are connected to the image input."
)
response = await sync_op( response = await sync_op(
cls, cls,
ApiEndpoint(path="/proxy/xai/v1/images/edits", method="POST"), ApiEndpoint(path="/proxy/xai/v1/images/edits", method="POST"),
data=ImageEditRequest( data=ImageEditRequest(
model=model, model=model,
images=[InputUrlObject(url=f"data:image/png;base64,{tensor_to_base64_string(i)}") for i in image], image=InputUrlObject(url=f"data:image/png;base64,{tensor_to_base64_string(image)}"),
prompt=prompt, prompt=prompt,
resolution=resolution.lower(), resolution=resolution.lower(),
n=number_of_images, n=number_of_images,
seed=seed, seed=seed,
aspect_ratio=None if aspect_ratio == "auto" else aspect_ratio,
), ),
response_model=ImageGenerationResponse, response_model=ImageGenerationResponse,
price_extractor=_extract_grok_price,
) )
if len(response.data) == 1: if len(response.data) == 1:
return IO.NodeOutput(await download_url_to_image_tensor(response.data[0].url)) return IO.NodeOutput(await download_url_to_image_tensor(response.data[0].url))
@@ -284,7 +227,7 @@ class GrokVideoNode(IO.ComfyNode):
category="api node/video/Grok", category="api node/video/Grok",
description="Generate video from a prompt or an image", description="Generate video from a prompt or an image",
inputs=[ inputs=[
IO.Combo.Input("model", options=["grok-imagine-video", "grok-imagine-video-beta"]), IO.Combo.Input("model", options=["grok-imagine-video-beta"]),
IO.String.Input( IO.String.Input(
"prompt", "prompt",
multiline=True, multiline=True,
@@ -332,11 +275,10 @@ class GrokVideoNode(IO.ComfyNode):
], ],
is_api_node=True, is_api_node=True,
price_badge=IO.PriceBadge( price_badge=IO.PriceBadge(
depends_on=IO.PriceBadgeDepends(widgets=["duration", "resolution"], inputs=["image"]), depends_on=IO.PriceBadgeDepends(widgets=["duration"], inputs=["image"]),
expr=""" expr="""
( (
$rate := widgets.resolution = "720p" ? 0.07 : 0.05; $base := 0.181 * widgets.duration;
$base := $rate * widgets.duration;
{"type":"usd","usd": inputs.image.connected ? $base + 0.002 : $base} {"type":"usd","usd": inputs.image.connected ? $base + 0.002 : $base}
) )
""", """,
@@ -379,7 +321,6 @@ class GrokVideoNode(IO.ComfyNode):
ApiEndpoint(path=f"/proxy/xai/v1/videos/{initial_response.request_id}"), ApiEndpoint(path=f"/proxy/xai/v1/videos/{initial_response.request_id}"),
status_extractor=lambda r: r.status if r.status is not None else "complete", status_extractor=lambda r: r.status if r.status is not None else "complete",
response_model=VideoStatusResponse, response_model=VideoStatusResponse,
price_extractor=_extract_grok_price,
) )
return IO.NodeOutput(await download_url_to_video_output(response.video.url)) return IO.NodeOutput(await download_url_to_video_output(response.video.url))
@@ -394,7 +335,7 @@ class GrokVideoEditNode(IO.ComfyNode):
category="api node/video/Grok", category="api node/video/Grok",
description="Edit an existing video based on a text prompt.", description="Edit an existing video based on a text prompt.",
inputs=[ inputs=[
IO.Combo.Input("model", options=["grok-imagine-video", "grok-imagine-video-beta"]), IO.Combo.Input("model", options=["grok-imagine-video-beta"]),
IO.String.Input( IO.String.Input(
"prompt", "prompt",
multiline=True, multiline=True,
@@ -423,7 +364,7 @@ class GrokVideoEditNode(IO.ComfyNode):
], ],
is_api_node=True, is_api_node=True,
price_badge=IO.PriceBadge( price_badge=IO.PriceBadge(
expr="""{"type":"usd","usd": 0.06, "format": {"suffix": "/sec", "approximate": true}}""", expr="""{"type":"usd","usd": 0.191, "format": {"suffix": "/sec", "approximate": true}}""",
), ),
) )
@@ -457,7 +398,6 @@ class GrokVideoEditNode(IO.ComfyNode):
ApiEndpoint(path=f"/proxy/xai/v1/videos/{initial_response.request_id}"), ApiEndpoint(path=f"/proxy/xai/v1/videos/{initial_response.request_id}"),
status_extractor=lambda r: r.status if r.status is not None else "complete", status_extractor=lambda r: r.status if r.status is not None else "complete",
response_model=VideoStatusResponse, response_model=VideoStatusResponse,
price_extractor=_extract_grok_price,
) )
return IO.NodeOutput(await download_url_to_video_output(response.video.url)) return IO.NodeOutput(await download_url_to_video_output(response.video.url))

View File

@@ -2747,7 +2747,6 @@ class MotionControl(IO.ComfyNode):
"but the character orientation matches the reference image (camera/other details via prompt).", "but the character orientation matches the reference image (camera/other details via prompt).",
), ),
IO.Combo.Input("mode", options=["pro", "std"]), IO.Combo.Input("mode", options=["pro", "std"]),
IO.Combo.Input("model", options=["kling-v3", "kling-v2-6"], optional=True),
], ],
outputs=[ outputs=[
IO.Video.Output(), IO.Video.Output(),
@@ -2778,7 +2777,6 @@ class MotionControl(IO.ComfyNode):
keep_original_sound: bool, keep_original_sound: bool,
character_orientation: str, character_orientation: str,
mode: str, mode: str,
model: str = "kling-v2-6",
) -> IO.NodeOutput: ) -> IO.NodeOutput:
validate_string(prompt, max_length=2500) validate_string(prompt, max_length=2500)
validate_image_dimensions(reference_image, min_width=340, min_height=340) validate_image_dimensions(reference_image, min_width=340, min_height=340)
@@ -2799,7 +2797,6 @@ class MotionControl(IO.ComfyNode):
keep_original_sound="yes" if keep_original_sound else "no", keep_original_sound="yes" if keep_original_sound else "no",
character_orientation=character_orientation, character_orientation=character_orientation,
mode=mode, mode=mode,
model_name=model,
), ),
) )
if response.code: if response.code:

View File

@@ -1,3 +1,3 @@
# This file is automatically generated by the build process when version is # This file is automatically generated by the build process when version is
# updated in pyproject.toml. # updated in pyproject.toml.
__version__ = "0.16.2" __version__ = "0.15.1"

View File

@@ -1,6 +1,6 @@
[project] [project]
name = "ComfyUI" name = "ComfyUI"
version = "0.16.2" version = "0.15.1"
readme = "README.md" readme = "README.md"
license = { file = "LICENSE" } license = { file = "LICENSE" }
requires-python = ">=3.10" requires-python = ">=3.10"

View File

@@ -1,5 +1,5 @@
comfyui-frontend-package==1.39.19 comfyui-frontend-package==1.39.19
comfyui-workflow-templates==0.9.10 comfyui-workflow-templates==0.9.5
comfyui-embedded-docs==0.4.3 comfyui-embedded-docs==0.4.3
torch torch
torchsde torchsde
@@ -22,7 +22,7 @@ alembic
SQLAlchemy SQLAlchemy
av>=14.2.0 av>=14.2.0
comfy-kitchen>=0.2.7 comfy-kitchen>=0.2.7
comfy-aimdo>=0.2.7 comfy-aimdo>=0.2.6
requests requests
#non essential dependencies: #non essential dependencies: