From 313e1c5411c3dd18ac015d5ec142860affe3ef1b Mon Sep 17 00:00:00 2001 From: bymyself Date: Thu, 5 Mar 2026 01:35:58 -0800 Subject: [PATCH] fix: prevent --cpu flag from allocating GPU memory Two root causes fixed: 1. soft_empty_cache() and synchronize() in model_management.py lacked a cpu_state == CPUState.CPU guard. They fell through to torch.cuda calls that initialize a CUDA context (150-500MB VRAM) even in CPU-only mode. 2. comfy_kitchen is imported unconditionally at startup via quant_ops.py. The import chain triggers torch.cuda.is_available() -> cuInit, which initializes the CUDA driver. Now gated behind args.cpu check. Also adds missing QuantizedLayout and register_layout_op fallback stubs that were absent from the original ImportError handler. Amp-Thread-ID: https://ampcode.com/threads/T-019cbd03-433e-7601-93ff-3887227496b4 --- comfy/model_management.py | 4 +++ comfy/quant_ops.py | 61 +++++++++++++++++++++++---------------- 2 files changed, 40 insertions(+), 25 deletions(-) diff --git a/comfy/model_management.py b/comfy/model_management.py index 809600815..fb93c8acc 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -1666,6 +1666,8 @@ def lora_compute_dtype(device): return dtype def synchronize(): + if cpu_state == CPUState.CPU: + return if is_intel_xpu(): torch.xpu.synchronize() elif torch.cuda.is_available(): @@ -1673,6 +1675,8 @@ def synchronize(): def soft_empty_cache(force=False): global cpu_state + if cpu_state == CPUState.CPU: + return if cpu_state == CPUState.MPS: torch.mps.empty_cache() elif is_intel_xpu(): diff --git a/comfy/quant_ops.py b/comfy/quant_ops.py index 15a4f457b..8020794b8 100644 --- a/comfy/quant_ops.py +++ b/comfy/quant_ops.py @@ -1,42 +1,53 @@ import torch import logging +from comfy.cli_args import args -try: - import comfy_kitchen as ck - from comfy_kitchen.tensor import ( - QuantizedTensor, - QuantizedLayout, - TensorCoreFP8Layout as _CKFp8Layout, - TensorCoreNVFP4Layout as _CKNvfp4Layout, - register_layout_op, - register_layout_class, - get_layout_class, - ) - _CK_AVAILABLE = True - if torch.version.cuda is None: - ck.registry.disable("cuda") - else: - cuda_version = tuple(map(int, str(torch.version.cuda).split('.'))) - if cuda_version < (13,): - ck.registry.disable("cuda") - logging.warning("WARNING: You need pytorch with cu130 or higher to use optimized CUDA operations.") - - ck.registry.disable("triton") - for k, v in ck.list_backends().items(): - logging.info(f"Found comfy_kitchen backend {k}: {v}") -except ImportError as e: - logging.error(f"Failed to import comfy_kitchen, Error: {e}, fp8 and fp4 support will not be available.") +if args.cpu: _CK_AVAILABLE = False +else: + try: + import comfy_kitchen as ck + from comfy_kitchen.tensor import ( + QuantizedTensor, + QuantizedLayout, + TensorCoreFP8Layout as _CKFp8Layout, + TensorCoreNVFP4Layout as _CKNvfp4Layout, + register_layout_op, + register_layout_class, + get_layout_class, + ) + _CK_AVAILABLE = True + if torch.version.cuda is None: + ck.registry.disable("cuda") + else: + cuda_version = tuple(map(int, str(torch.version.cuda).split('.'))) + if cuda_version < (13,): + ck.registry.disable("cuda") + logging.warning("WARNING: You need pytorch with cu130 or higher to use optimized CUDA operations.") + ck.registry.disable("triton") + for k, v in ck.list_backends().items(): + logging.info(f"Found comfy_kitchen backend {k}: {v}") + except ImportError as e: + logging.error(f"Failed to import comfy_kitchen, Error: {e}, fp8 and fp4 support will not be available.") + _CK_AVAILABLE = False + +if not _CK_AVAILABLE: class QuantizedTensor: pass + class QuantizedLayout: + pass + class _CKFp8Layout: pass class _CKNvfp4Layout: pass + def register_layout_op(name, func): + pass + def register_layout_class(name, cls): pass