fix: prevent --cpu flag from allocating GPU memory

Two root causes fixed:

1. soft_empty_cache() and synchronize() in model_management.py lacked a
   cpu_state == CPUState.CPU guard. They fell through to torch.cuda calls
   that initialize a CUDA context (150-500MB VRAM) even in CPU-only mode.

2. comfy_kitchen is imported unconditionally at startup via quant_ops.py.
   The import chain triggers torch.cuda.is_available() -> cuInit, which
   initializes the CUDA driver. Now gated behind args.cpu check.

Also adds missing QuantizedLayout and register_layout_op fallback stubs
that were absent from the original ImportError handler.

Amp-Thread-ID: https://ampcode.com/threads/T-019cbd03-433e-7601-93ff-3887227496b4
This commit is contained in:
bymyself 2026-03-05 01:35:58 -08:00
parent 43c64b6308
commit 313e1c5411
2 changed files with 40 additions and 25 deletions

View File

@ -1666,6 +1666,8 @@ def lora_compute_dtype(device):
return dtype return dtype
def synchronize(): def synchronize():
if cpu_state == CPUState.CPU:
return
if is_intel_xpu(): if is_intel_xpu():
torch.xpu.synchronize() torch.xpu.synchronize()
elif torch.cuda.is_available(): elif torch.cuda.is_available():
@ -1673,6 +1675,8 @@ def synchronize():
def soft_empty_cache(force=False): def soft_empty_cache(force=False):
global cpu_state global cpu_state
if cpu_state == CPUState.CPU:
return
if cpu_state == CPUState.MPS: if cpu_state == CPUState.MPS:
torch.mps.empty_cache() torch.mps.empty_cache()
elif is_intel_xpu(): elif is_intel_xpu():

View File

@ -1,42 +1,53 @@
import torch import torch
import logging import logging
from comfy.cli_args import args
try: if args.cpu:
import comfy_kitchen as ck
from comfy_kitchen.tensor import (
QuantizedTensor,
QuantizedLayout,
TensorCoreFP8Layout as _CKFp8Layout,
TensorCoreNVFP4Layout as _CKNvfp4Layout,
register_layout_op,
register_layout_class,
get_layout_class,
)
_CK_AVAILABLE = True
if torch.version.cuda is None:
ck.registry.disable("cuda")
else:
cuda_version = tuple(map(int, str(torch.version.cuda).split('.')))
if cuda_version < (13,):
ck.registry.disable("cuda")
logging.warning("WARNING: You need pytorch with cu130 or higher to use optimized CUDA operations.")
ck.registry.disable("triton")
for k, v in ck.list_backends().items():
logging.info(f"Found comfy_kitchen backend {k}: {v}")
except ImportError as e:
logging.error(f"Failed to import comfy_kitchen, Error: {e}, fp8 and fp4 support will not be available.")
_CK_AVAILABLE = False _CK_AVAILABLE = False
else:
try:
import comfy_kitchen as ck
from comfy_kitchen.tensor import (
QuantizedTensor,
QuantizedLayout,
TensorCoreFP8Layout as _CKFp8Layout,
TensorCoreNVFP4Layout as _CKNvfp4Layout,
register_layout_op,
register_layout_class,
get_layout_class,
)
_CK_AVAILABLE = True
if torch.version.cuda is None:
ck.registry.disable("cuda")
else:
cuda_version = tuple(map(int, str(torch.version.cuda).split('.')))
if cuda_version < (13,):
ck.registry.disable("cuda")
logging.warning("WARNING: You need pytorch with cu130 or higher to use optimized CUDA operations.")
ck.registry.disable("triton")
for k, v in ck.list_backends().items():
logging.info(f"Found comfy_kitchen backend {k}: {v}")
except ImportError as e:
logging.error(f"Failed to import comfy_kitchen, Error: {e}, fp8 and fp4 support will not be available.")
_CK_AVAILABLE = False
if not _CK_AVAILABLE:
class QuantizedTensor: class QuantizedTensor:
pass pass
class QuantizedLayout:
pass
class _CKFp8Layout: class _CKFp8Layout:
pass pass
class _CKNvfp4Layout: class _CKNvfp4Layout:
pass pass
def register_layout_op(name, func):
pass
def register_layout_class(name, cls): def register_layout_class(name, cls):
pass pass