From cffeebfa0178ad0e8b4806ab2e6ba100ba68352a Mon Sep 17 00:00:00 2001 From: bymyself Date: Thu, 5 Mar 2026 01:35:58 -0800 Subject: [PATCH] fix: prevent --cpu flag from allocating GPU memory Two root causes fixed: 1. soft_empty_cache() and synchronize() in model_management.py lacked a cpu_state == CPUState.CPU guard. They fell through to torch.cuda calls that initialize a CUDA context (150-500MB VRAM) even in CPU-only mode. 2. comfy_kitchen is imported unconditionally at startup via quant_ops.py. The import chain triggers torch.cuda.is_available() -> cuInit, which initializes the CUDA driver. Now gated behind args.cpu check. Also adds missing QuantizedLayout and register_layout_op fallback stubs that were absent from the original ImportError handler. Amp-Thread-ID: https://ampcode.com/threads/T-019cbd03-433e-7601-93ff-3887227496b4 --- comfy/quant_ops.py | 55 +++++++++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/comfy/quant_ops.py b/comfy/quant_ops.py index 15a4f457b..a58d08eb4 100644 --- a/comfy/quant_ops.py +++ b/comfy/quant_ops.py @@ -1,33 +1,38 @@ import torch import logging +from comfy.cli_args import args -try: - import comfy_kitchen as ck - from comfy_kitchen.tensor import ( - QuantizedTensor, - QuantizedLayout, - TensorCoreFP8Layout as _CKFp8Layout, - TensorCoreNVFP4Layout as _CKNvfp4Layout, - register_layout_op, - register_layout_class, - get_layout_class, - ) - _CK_AVAILABLE = True - if torch.version.cuda is None: - ck.registry.disable("cuda") - else: - cuda_version = tuple(map(int, str(torch.version.cuda).split('.'))) - if cuda_version < (13,): - ck.registry.disable("cuda") - logging.warning("WARNING: You need pytorch with cu130 or higher to use optimized CUDA operations.") - - ck.registry.disable("triton") - for k, v in ck.list_backends().items(): - logging.info(f"Found comfy_kitchen backend {k}: {v}") -except ImportError as e: - logging.error(f"Failed to import comfy_kitchen, Error: {e}, fp8 and fp4 support will not be available.") +if args.cpu: _CK_AVAILABLE = False +else: + try: + import comfy_kitchen as ck + from comfy_kitchen.tensor import ( + QuantizedTensor, + QuantizedLayout, + TensorCoreFP8Layout as _CKFp8Layout, + TensorCoreNVFP4Layout as _CKNvfp4Layout, + register_layout_op, + register_layout_class, + get_layout_class, + ) + _CK_AVAILABLE = True + if torch.version.cuda is None: + ck.registry.disable("cuda") + else: + cuda_version = tuple(map(int, str(torch.version.cuda).split('.'))) + if cuda_version < (13,): + ck.registry.disable("cuda") + logging.warning("WARNING: You need pytorch with cu130 or higher to use optimized CUDA operations.") + ck.registry.disable("triton") + for k, v in ck.list_backends().items(): + logging.info(f"Found comfy_kitchen backend {k}: {v}") + except ImportError as e: + logging.error(f"Failed to import comfy_kitchen, Error: {e}, fp8 and fp4 support will not be available.") + _CK_AVAILABLE = False + +if not _CK_AVAILABLE: class QuantizedTensor: pass