Merge f43e446709 into de9ada6a41

Dynamic VRAM unloading fix (#12227 )
* mp: fix full dynamic unloading This was not unloading dynamic models when requesting a full unload via the unpatch() code path. This was ok, i your workflow was all dynamic models but fails with big VRAM leaks if you need to fully unload something for a regular ModelPatcher It also fices the "unload models" button. * mm: load models outside of Aimdo Mempool In dynamic_vram mode, escape the Aimdo mempool and load into the regular mempool. Use a dummy thread to do it.
2026-02-06 03:22:33 +08:00 · 2026-02-02 23:40:38 +01:00 · 2026-02-02 17:35:20 -05:00 · 2026-02-02 17:34:46 -05:00 · 2025-12-19 15:23:15 +01:00 · 2025-12-13 07:19:39 +01:00
4 changed files with 39 additions and 15 deletions
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@ -44,6 +44,7 @@ parser.add_argument("--max-upload-size", type=float, default=100, help="Set the

 parser.add_argument("--base-directory", type=str, default=None, help="Set the ComfyUI base directory for models, custom_nodes, input, output, temp, and user directories.")
 parser.add_argument("--extra-model-paths-config", type=str, default=None, metavar="PATH", nargs='+', action='append', help="Load one or more extra_model_paths.yaml files.")
+parser.add_argument("--models-directory", type=str, default=None, help="Set the ComfyUI models directory. Overrides --base-directory.")
 parser.add_argument("--output-directory", type=str, default=None, help="Set the ComfyUI output directory. Overrides --base-directory.")
 parser.add_argument("--temp-directory", type=str, default=None, help="Set the ComfyUI temp directory (default is in the ComfyUI directory). Overrides --base-directory.")
 parser.add_argument("--input-directory", type=str, default=None, help="Set the ComfyUI input directory. Overrides --base-directory.")
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@ -19,7 +19,8 @@
 import psutil
 import logging
 from enum import Enum
-from comfy.cli_args import args, PerformanceFeature
+from comfy.cli_args import args, PerformanceFeature, enables_dynamic_vram
+import threading
 import torch
 import sys
 import platform
@ -650,7 +651,7 @@ def free_memory(memory_required, device, keep_loaded=[], for_dynamic=False, ram_
                soft_empty_cache()
    return unloaded_models

-def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimum_memory_required=None, force_full_load=False):
+def load_models_gpu_orig(models, memory_required=0, force_patch_weights=False, minimum_memory_required=None, force_full_load=False):
    cleanup_models_gc()
    global vram_state

@ -746,8 +747,25 @@ def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimu
        current_loaded_models.insert(0, loaded_model)
    return

-def load_model_gpu(model):
-    return load_models_gpu([model])
+def load_models_gpu_thread(models, memory_required, force_patch_weights, minimum_memory_required, force_full_load):
+    with torch.inference_mode():
+        load_models_gpu_orig(models, memory_required, force_patch_weights, minimum_memory_required, force_full_load)
+        soft_empty_cache()
+
+def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimum_memory_required=None, force_full_load=False):
+    #Deliberately load models outside of the Aimdo mempool so they can be retained accross
+    #nodes. Use a dummy thread to do it as pytorch documents that mempool contexts are
+    #thread local. So exploit that to escape context
+    if enables_dynamic_vram():
+        t = threading.Thread(
+            target=load_models_gpu_thread,
+            args=(models, memory_required, force_patch_weights, minimum_memory_required, force_full_load)
+        )
+        t.start()
+        t.join()
+    else:
+        load_models_gpu_orig(models, memory_required=memory_required, force_patch_weights=force_patch_weights,
+                             minimum_memory_required=minimum_memory_required, force_full_load=force_full_load)

 def loaded_models(only_currently_used=False):
    output = []
@ -1112,11 +1130,11 @@ def get_cast_buffer(offload_stream, device, size, ref):
            return None
        if cast_buffer is not None and cast_buffer.numel() > 50 * (1024 ** 2):
            #I want my wrongly sized 50MB+ of VRAM back from the caching allocator right now
-            torch.cuda.synchronize()
+            synchronize()
            del STREAM_CAST_BUFFERS[offload_stream]
            del cast_buffer
            #FIXME: This doesn't work in Aimdo because mempool cant clear cache
-            torch.cuda.empty_cache()
+            soft_empty_cache()
        with wf_context:
            cast_buffer = torch.empty((size), dtype=torch.int8, device=device)
            STREAM_CAST_BUFFERS[offload_stream] = cast_buffer
@ -1132,9 +1150,7 @@ def reset_cast_buffers():
    for offload_stream in STREAM_CAST_BUFFERS:
        offload_stream.synchronize()
    STREAM_CAST_BUFFERS.clear()
-    if comfy.memory_management.aimdo_allocator is None:
-        #Pytorch 2.7 and earlier crashes if you try and empty_cache when mempools exist
-        torch.cuda.empty_cache()
+    soft_empty_cache()

 def get_offload_stream(device):
    stream_counter = stream_counters.get(device, 0)
@ -1284,7 +1300,7 @@ def discard_cuda_async_error():
        a = torch.tensor([1], dtype=torch.uint8, device=get_torch_device())
        b = torch.tensor([1], dtype=torch.uint8, device=get_torch_device())
        _ = a + b
-        torch.cuda.synchronize()
+        synchronize()
    except torch.AcceleratorError:
        #Dump it! We already know about it from the synchronous return
        pass
@ -1688,6 +1704,12 @@ def lora_compute_dtype(device):
    LORA_COMPUTE_DTYPES[device] = dtype
    return dtype

+def synchronize():
+    if is_intel_xpu():
+        torch.xpu.synchronize()
+    elif torch.cuda.is_available():
+        torch.cuda.synchronize()
+
 def soft_empty_cache(force=False):
    global cpu_state
    if cpu_state == CPUState.MPS:
@ -1713,9 +1735,6 @@ def debug_memory_summary():
        return torch.cuda.memory.memory_summary()
    return ""

-#TODO: might be cleaner to put this somewhere else
-import threading
-
 class InterruptProcessingException(Exception):
    pass

--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@ -1597,7 +1597,7 @@ class ModelPatcherDynamic(ModelPatcher):

        if unpatch_weights:
            self.partially_unload_ram(1e32)
-            self.partially_unload(None)
+            self.partially_unload(None, 1e32)

    def partially_load(self, device_to, extra_memory=0, force_patch_weights=False):
        assert not force_patch_weights #See above
--- a/folder_paths.py
+++ b/folder_paths.py
@ -19,7 +19,11 @@ if args.base_directory:
 else:
    base_path = os.path.dirname(os.path.realpath(__file__))

-models_dir = os.path.join(base_path, "models")
+if args.models_directory:
+    models_dir = os.path.abspath(args.models_directory)
+else:
+    models_dir = os.path.join(base_path, "models")
+
 folder_names_and_paths["checkpoints"] = ([os.path.join(models_dir, "checkpoints")], supported_pt_extensions)
 folder_names_and_paths["configs"] = ([os.path.join(models_dir, "configs")], [".yaml"])
Author	SHA1	Message	Date
Silver	2f1196a9c7	Merge `f43e446709` into `de9ada6a41`	2026-02-02 23:40:38 +01:00
rattus	de9ada6a41	Dynamic VRAM unloading fix (#12227 ) * mp: fix full dynamic unloading This was not unloading dynamic models when requesting a full unload via the unpatch() code path. This was ok, i your workflow was all dynamic models but fails with big VRAM leaks if you need to fully unload something for a regular ModelPatcher It also fices the "unload models" button. * mm: load models outside of Aimdo Mempool In dynamic_vram mode, escape the Aimdo mempool and load into the regular mempool. Use a dummy thread to do it.	2026-02-02 17:35:20 -05:00
rattus	37f711d4a1	mm: Fix cast buffers with intel offloading (#12229 ) Intel has offloading support but there were some nvidia calls in the new cast buffer stuff.	2026-02-02 17:34:46 -05:00
Silver	f43e446709	Merge branch 'comfyanonymous:master' into models-dir-arg	2025-12-19 15:23:15 +01:00
Silver	6ee2edde95	Merge branch 'comfyanonymous:master' into models-dir-arg	2025-12-13 07:19:39 +01:00
Silver	be3323cdec	Merge branch 'comfyanonymous:master' into models-dir-arg	2025-12-10 03:01:15 +01:00
Silver	0458e55a7a	Merge branch 'comfyanonymous:master' into models-dir-arg	2025-11-23 19:34:19 +01:00
Silver	9581882209	Merge branch 'comfyanonymous:master' into models-dir-arg	2025-11-18 10:46:36 +01:00
Silver	dc2c517694	Merge branch 'comfyanonymous:master' into models-dir-arg	2025-10-05 21:18:42 +02:00
silveroxides	0b27676fe1	add models-directory launch argument	2025-07-30 13:12:54 +02:00