fix: prevent --cpu flag from allocating GPU memory

Two root causes fixed: 1. soft_empty_cache() and synchronize() in model_management.py lacked a cpu_state == CPUState.CPU guard. They fell through to torch.cuda calls that initialize a CUDA context (150-500MB VRAM) even in CPU-only mode. 2. comfy_kitchen is imported unconditionally at startup via quant_ops.py. The import chain triggers torch.cuda.is_available() -> cuInit, which initializes the CUDA driver. Now gated behind args.cpu check. Also adds missing QuantizedLayout and register_layout_op fallback stubs that were absent from the original ImportError handler. Amp-Thread-ID: https://ampcode.com/threads/T-019cbd03-433e-7601-93ff-3887227496b4
2026-03-06 17:57:40 +08:00 · 2026-03-05 01:35:58 -08:00 · 2026-03-05 01:35:58 -08:00 · 313e1c5411
commit 313e1c5411
parent 43c64b6308
2 changed files with 40 additions and 25 deletions
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@ -1666,6 +1666,8 @@ def lora_compute_dtype(device):
    return dtype
 def synchronize():
    if cpu_state == CPUState.CPU:
        return
    if is_intel_xpu():
        torch.xpu.synchronize()
    elif torch.cuda.is_available():
@ -1673,6 +1675,8 @@ def synchronize():
 def soft_empty_cache(force=False):
    global cpu_state
    if cpu_state == CPUState.CPU:
        return
    if cpu_state == CPUState.MPS:
        torch.mps.empty_cache()
    elif is_intel_xpu():
--- a/comfy/quant_ops.py
+++ b/comfy/quant_ops.py
@ -1,42 +1,53 @@
 import torch
 import logging
 from comfy.cli_args import args
-try:
+if args.cpu:
    import comfy_kitchen as ck
    from comfy_kitchen.tensor import (
        QuantizedTensor,
        QuantizedLayout,
        TensorCoreFP8Layout as _CKFp8Layout,
        TensorCoreNVFP4Layout as _CKNvfp4Layout,
        register_layout_op,
        register_layout_class,
        get_layout_class,
    )
    _CK_AVAILABLE = True
    if torch.version.cuda is None:
        ck.registry.disable("cuda")
    else:
        cuda_version = tuple(map(int, str(torch.version.cuda).split('.')))
        if cuda_version < (13,):
            ck.registry.disable("cuda")
            logging.warning("WARNING: You need pytorch with cu130 or higher to use optimized CUDA operations.")
    ck.registry.disable("triton")
    for k, v in ck.list_backends().items():
        logging.info(f"Found comfy_kitchen backend {k}: {v}")
 except ImportError as e:
    logging.error(f"Failed to import comfy_kitchen, Error: {e}, fp8 and fp4 support will not be available.")
    _CK_AVAILABLE = False
 else:
    try:
        import comfy_kitchen as ck
        from comfy_kitchen.tensor import (
            QuantizedTensor,
            QuantizedLayout,
            TensorCoreFP8Layout as _CKFp8Layout,
            TensorCoreNVFP4Layout as _CKNvfp4Layout,
            register_layout_op,
            register_layout_class,
            get_layout_class,
        )
        _CK_AVAILABLE = True
        if torch.version.cuda is None:
            ck.registry.disable("cuda")
        else:
            cuda_version = tuple(map(int, str(torch.version.cuda).split('.')))
            if cuda_version < (13,):
                ck.registry.disable("cuda")
                logging.warning("WARNING: You need pytorch with cu130 or higher to use optimized CUDA operations.")
        ck.registry.disable("triton")
        for k, v in ck.list_backends().items():
            logging.info(f"Found comfy_kitchen backend {k}: {v}")
    except ImportError as e:
        logging.error(f"Failed to import comfy_kitchen, Error: {e}, fp8 and fp4 support will not be available.")
        _CK_AVAILABLE = False
 if not _CK_AVAILABLE:
    class QuantizedTensor:
        pass
    class QuantizedLayout:
        pass
    class _CKFp8Layout:
        pass
    class _CKNvfp4Layout:
        pass
    def register_layout_op(name, func):
        pass
    def register_layout_class(name, cls):
        pass