From e632e5de281b91dd7199636dd6d82126fbfb07d5 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Sun, 9 Nov 2025 15:06:39 -0800
Subject: [PATCH 1/3] Add logging for model unloading. (#10692)

---
 comfy/model_patcher.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py
index 5a31a8734..17e06a869 100644
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@@ -909,6 +909,7 @@ class ModelPatcher:
             self.model.model_lowvram = True
             self.model.lowvram_patch_counter += patch_counter
             self.model.model_loaded_weight_memory -= memory_freed
+            logging.info("loaded partially: {:.2f} MB loaded, lowvram patches: {}".format(self.model.model_loaded_weight_memory / (1024 * 1024), self.model.lowvram_patch_counter))
             return memory_freed
 
     def partially_load(self, device_to, extra_memory=0, force_patch_weights=False):

From dea899f22125d38a8b48147d6cce89a2b659fdeb Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Sun, 9 Nov 2025 15:51:33 -0800
Subject: [PATCH 2/3] Unload weights if vram usage goes up between runs.
 (#10690)

---
 comfy/model_management.py | 11 +++++++++--
 comfy/model_patcher.py    | 20 +++++++++++++-------
 2 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/comfy/model_management.py b/comfy/model_management.py
index 7012df858..a4410f2ec 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -503,7 +503,11 @@ class LoadedModel:
         use_more_vram = lowvram_model_memory
         if use_more_vram == 0:
             use_more_vram = 1e32
-        self.model_use_more_vram(use_more_vram, force_patch_weights=force_patch_weights)
+        if use_more_vram > 0:
+            self.model_use_more_vram(use_more_vram, force_patch_weights=force_patch_weights)
+        else:
+            self.model.partially_unload(self.model.offload_device, -use_more_vram, force_patch_weights=force_patch_weights)
+
         real_model = self.model.model
 
         if is_intel_xpu() and not args.disable_ipex_optimize and 'ipex' in globals() and real_model is not None:
@@ -689,7 +693,10 @@ def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimu
             current_free_mem = get_free_memory(torch_dev) + loaded_memory
 
             lowvram_model_memory = max(128 * 1024 * 1024, (current_free_mem - minimum_memory_required), min(current_free_mem * MIN_WEIGHT_MEMORY_RATIO, current_free_mem - minimum_inference_memory()))
-            lowvram_model_memory = max(0.1, lowvram_model_memory - loaded_memory)
+            lowvram_model_memory = lowvram_model_memory - loaded_memory
+
+            if lowvram_model_memory == 0:
+                lowvram_model_memory = 0.1
 
         if vram_set_state == VRAMState.NO_VRAM:
             lowvram_model_memory = 0.1
diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py
index 17e06a869..68b0a9192 100644
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@@ -843,7 +843,7 @@ class ModelPatcher:
 
         self.object_patches_backup.clear()
 
-    def partially_unload(self, device_to, memory_to_free=0):
+    def partially_unload(self, device_to, memory_to_free=0, force_patch_weights=False):
         with self.use_ejected():
             hooks_unpatched = False
             memory_freed = 0
@@ -887,13 +887,19 @@ class ModelPatcher:
                         module_mem += move_weight_functions(m, device_to)
                         if lowvram_possible:
                             if weight_key in self.patches:
-                                _, set_func, convert_func = get_key_weight(self.model, weight_key)
-                                m.weight_function.append(LowVramPatch(weight_key, self.patches, convert_func, set_func))
-                                patch_counter += 1
+                                if force_patch_weights:
+                                    self.patch_weight_to_device(weight_key)
+                                else:
+                                    _, set_func, convert_func = get_key_weight(self.model, weight_key)
+                                    m.weight_function.append(LowVramPatch(weight_key, self.patches, convert_func, set_func))
+                                    patch_counter += 1
                             if bias_key in self.patches:
-                                _, set_func, convert_func = get_key_weight(self.model, bias_key)
-                                m.bias_function.append(LowVramPatch(bias_key, self.patches, convert_func, set_func))
-                                patch_counter += 1
+                                if force_patch_weights:
+                                    self.patch_weight_to_device(bias_key)
+                                else:
+                                    _, set_func, convert_func = get_key_weight(self.model, bias_key)
+                                    m.bias_function.append(LowVramPatch(bias_key, self.patches, convert_func, set_func))
+                                    patch_counter += 1
                             cast_weight = True
 
                         if cast_weight:

From c350009236e5d172a3050c04043ea70a301378ca Mon Sep 17 00:00:00 2001
From: rattus <46076784+rattus128@users.noreply.github.com>
Date: Mon, 10 Nov 2025 13:52:11 +1000
Subject: [PATCH 3/3] ops: Put weight cast on the offload stream (#10697)

This needs to be on the offload stream. This reproduced a black screen
with low resolution images on a slow bus when using FP8.
---
 comfy/ops.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/comfy/ops.py b/comfy/ops.py
index 733bff99d..96dffa85d 100644
--- a/comfy/ops.py
+++ b/comfy/ops.py
@@ -110,9 +110,9 @@ def cast_bias_weight(s, input=None, dtype=None, device=None, bias_dtype=None, of
                 for f in s.bias_function:
                     bias = f(bias)
 
-    weight = weight.to(dtype=dtype)
-    if weight_has_function:
+    if weight_has_function or weight.dtype != dtype:
         with wf_context:
+            weight = weight.to(dtype=dtype)
             for f in s.weight_function:
                 weight = f(weight)