diff --git a/comfy/model_management.py b/comfy/model_management.py index 809600815..fb93c8acc 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -1666,6 +1666,8 @@ def lora_compute_dtype(device): return dtype def synchronize(): + if cpu_state == CPUState.CPU: + return if is_intel_xpu(): torch.xpu.synchronize() elif torch.cuda.is_available(): @@ -1673,6 +1675,8 @@ def synchronize(): def soft_empty_cache(force=False): global cpu_state + if cpu_state == CPUState.CPU: + return if cpu_state == CPUState.MPS: torch.mps.empty_cache() elif is_intel_xpu(): diff --git a/comfy/quant_ops.py b/comfy/quant_ops.py index 15a4f457b..8020794b8 100644 --- a/comfy/quant_ops.py +++ b/comfy/quant_ops.py @@ -1,42 +1,53 @@ import torch import logging +from comfy.cli_args import args -try: - import comfy_kitchen as ck - from comfy_kitchen.tensor import ( - QuantizedTensor, - QuantizedLayout, - TensorCoreFP8Layout as _CKFp8Layout, - TensorCoreNVFP4Layout as _CKNvfp4Layout, - register_layout_op, - register_layout_class, - get_layout_class, - ) - _CK_AVAILABLE = True - if torch.version.cuda is None: - ck.registry.disable("cuda") - else: - cuda_version = tuple(map(int, str(torch.version.cuda).split('.'))) - if cuda_version < (13,): - ck.registry.disable("cuda") - logging.warning("WARNING: You need pytorch with cu130 or higher to use optimized CUDA operations.") - - ck.registry.disable("triton") - for k, v in ck.list_backends().items(): - logging.info(f"Found comfy_kitchen backend {k}: {v}") -except ImportError as e: - logging.error(f"Failed to import comfy_kitchen, Error: {e}, fp8 and fp4 support will not be available.") +if args.cpu: _CK_AVAILABLE = False +else: + try: + import comfy_kitchen as ck + from comfy_kitchen.tensor import ( + QuantizedTensor, + QuantizedLayout, + TensorCoreFP8Layout as _CKFp8Layout, + TensorCoreNVFP4Layout as _CKNvfp4Layout, + register_layout_op, + register_layout_class, + get_layout_class, + ) + _CK_AVAILABLE = True + if torch.version.cuda is None: + ck.registry.disable("cuda") + else: + cuda_version = tuple(map(int, str(torch.version.cuda).split('.'))) + if cuda_version < (13,): + ck.registry.disable("cuda") + logging.warning("WARNING: You need pytorch with cu130 or higher to use optimized CUDA operations.") + ck.registry.disable("triton") + for k, v in ck.list_backends().items(): + logging.info(f"Found comfy_kitchen backend {k}: {v}") + except ImportError as e: + logging.error(f"Failed to import comfy_kitchen, Error: {e}, fp8 and fp4 support will not be available.") + _CK_AVAILABLE = False + +if not _CK_AVAILABLE: class QuantizedTensor: pass + class QuantizedLayout: + pass + class _CKFp8Layout: pass class _CKNvfp4Layout: pass + def register_layout_op(name, func): + pass + def register_layout_class(name, cls): pass