From 313e1c5411c3dd18ac015d5ec142860affe3ef1b Mon Sep 17 00:00:00 2001
From: bymyself <cbyrne@comfy.org>
Date: Thu, 5 Mar 2026 01:35:58 -0800
Subject: [PATCH] fix: prevent --cpu flag from allocating GPU memory

Two root causes fixed:

1. soft_empty_cache() and synchronize() in model_management.py lacked a
   cpu_state == CPUState.CPU guard. They fell through to torch.cuda calls
   that initialize a CUDA context (150-500MB VRAM) even in CPU-only mode.

2. comfy_kitchen is imported unconditionally at startup via quant_ops.py.
   The import chain triggers torch.cuda.is_available() -> cuInit, which
   initializes the CUDA driver. Now gated behind args.cpu check.

Also adds missing QuantizedLayout and register_layout_op fallback stubs
that were absent from the original ImportError handler.

Amp-Thread-ID: https://ampcode.com/threads/T-019cbd03-433e-7601-93ff-3887227496b4
---
 comfy/model_management.py |  4 +++
 comfy/quant_ops.py        | 61 +++++++++++++++++++++++----------------
 2 files changed, 40 insertions(+), 25 deletions(-)

diff --git a/comfy/model_management.py b/comfy/model_management.py
index 809600815..fb93c8acc 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -1666,6 +1666,8 @@ def lora_compute_dtype(device):
     return dtype
 
 def synchronize():
+    if cpu_state == CPUState.CPU:
+        return
     if is_intel_xpu():
         torch.xpu.synchronize()
     elif torch.cuda.is_available():
@@ -1673,6 +1675,8 @@ def synchronize():
 
 def soft_empty_cache(force=False):
     global cpu_state
+    if cpu_state == CPUState.CPU:
+        return
     if cpu_state == CPUState.MPS:
         torch.mps.empty_cache()
     elif is_intel_xpu():
diff --git a/comfy/quant_ops.py b/comfy/quant_ops.py
index 15a4f457b..8020794b8 100644
--- a/comfy/quant_ops.py
+++ b/comfy/quant_ops.py
@@ -1,42 +1,53 @@
 import torch
 import logging
+from comfy.cli_args import args
 
-try:
-    import comfy_kitchen as ck
-    from comfy_kitchen.tensor import (
-        QuantizedTensor,
-        QuantizedLayout,
-        TensorCoreFP8Layout as _CKFp8Layout,
-        TensorCoreNVFP4Layout as _CKNvfp4Layout,
-        register_layout_op,
-        register_layout_class,
-        get_layout_class,
-    )
-    _CK_AVAILABLE = True
-    if torch.version.cuda is None:
-        ck.registry.disable("cuda")
-    else:
-        cuda_version = tuple(map(int, str(torch.version.cuda).split('.')))
-        if cuda_version < (13,):
-            ck.registry.disable("cuda")
-            logging.warning("WARNING: You need pytorch with cu130 or higher to use optimized CUDA operations.")
-
-    ck.registry.disable("triton")
-    for k, v in ck.list_backends().items():
-        logging.info(f"Found comfy_kitchen backend {k}: {v}")
-except ImportError as e:
-    logging.error(f"Failed to import comfy_kitchen, Error: {e}, fp8 and fp4 support will not be available.")
+if args.cpu:
     _CK_AVAILABLE = False
+else:
+    try:
+        import comfy_kitchen as ck
+        from comfy_kitchen.tensor import (
+            QuantizedTensor,
+            QuantizedLayout,
+            TensorCoreFP8Layout as _CKFp8Layout,
+            TensorCoreNVFP4Layout as _CKNvfp4Layout,
+            register_layout_op,
+            register_layout_class,
+            get_layout_class,
+        )
+        _CK_AVAILABLE = True
+        if torch.version.cuda is None:
+            ck.registry.disable("cuda")
+        else:
+            cuda_version = tuple(map(int, str(torch.version.cuda).split('.')))
+            if cuda_version < (13,):
+                ck.registry.disable("cuda")
+                logging.warning("WARNING: You need pytorch with cu130 or higher to use optimized CUDA operations.")
 
+        ck.registry.disable("triton")
+        for k, v in ck.list_backends().items():
+            logging.info(f"Found comfy_kitchen backend {k}: {v}")
+    except ImportError as e:
+        logging.error(f"Failed to import comfy_kitchen, Error: {e}, fp8 and fp4 support will not be available.")
+        _CK_AVAILABLE = False
+
+if not _CK_AVAILABLE:
     class QuantizedTensor:
         pass
 
+    class QuantizedLayout:
+        pass
+
     class _CKFp8Layout:
         pass
 
     class _CKNvfp4Layout:
         pass
 
+    def register_layout_op(name, func):
+        pass
+
     def register_layout_class(name, cls):
         pass