mm: redefine free memory for Windows

As commented.
ops: dont discard pins
2026-02-05 19:12:41 +08:00 · 2026-01-27 14:19:07 +10:00 · 2026-01-27 14:11:42 +10:00 · 2026-01-27 14:10:54 +10:00 · 2026-01-27 14:07:15 +10:00
3 changed files with 62 additions and 7 deletions
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@ -27,6 +27,7 @@ import weakref
 import gc
 import os
 from contextlib import nullcontext
+import comfy.memory_management
 import comfy.utils
 import comfy.quant_ops

@ -584,9 +585,15 @@ WINDOWS = any(platform.win32_ver())

 EXTRA_RESERVED_VRAM = 400 * 1024 * 1024
 if WINDOWS:
+    import comfy.windows
    EXTRA_RESERVED_VRAM = 600 * 1024 * 1024 #Windows is higher because of the shared vram issue
    if total_vram > (15 * 1024):  # more extra reserved vram on 16GB+ cards
        EXTRA_RESERVED_VRAM += 100 * 1024 * 1024
+    def get_free_ram():
+        return comfy.windows.get_free_ram()
+else:
+    def get_free_ram():
+        return psutil.virtual_memory().available

 if args.reserve_vram is not None:
    EXTRA_RESERVED_VRAM = args.reserve_vram * 1024 * 1024 * 1024
@ -617,7 +624,7 @@ def free_memory(memory_required, device, keep_loaded=[], for_dynamic=False, ram_
        ram_to_free = 1e32
        if not DISABLE_SMART_MEMORY:
            memory_to_free = memory_required - get_free_memory(device)
-            ram_to_free = ram_required - psutil.virtual_memory().available
+            ram_to_free = ram_required - get_free_ram()

        if current_loaded_models[i].model.is_dynamic() and for_dynamic:
            #don't actually unload dynamic models for the sake of other dynamic models
@ -822,7 +829,7 @@ def unet_inital_load_device(parameters, dtype):

    mem_dev = get_free_memory(torch_dev)
    mem_cpu = get_free_memory(cpu_dev)
-    if mem_dev > mem_cpu and model_size < mem_dev:
+    if mem_dev > mem_cpu and model_size < mem_dev and comfy.memory_management.aimdo_allocator is None:
        return torch_dev
    else:
        return cpu_dev
--- a/comfy/ops.py
+++ b/comfy/ops.py
@ -131,11 +131,7 @@ def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compu
            xfer_dest = cast_dest

    pin = None
-    if signature is not None:
-        #If we are able to increase our load level (e.g. user reduces resolution or batch number)
-        #reclaim the pin previously used for offload.
-        comfy.pinned_memory.unpin_memory(s)
-    elif not resident:
+    if signature is None and not resident:
        #prepare a new pin
        assert comfy.pinned_memory.get_pin(s) is None
        comfy.pinned_memory.pin_memory(s)
--- a/comfy/windows.py
+++ b/comfy/windows.py
@ -0,0 +1,52 @@
+import ctypes
+import logging
+import psutil
+from ctypes import wintypes
+
+import comfy_aimdo.control
+
+psapi = ctypes.WinDLL("psapi")
+kernel32 = ctypes.WinDLL("kernel32")
+
+class PERFORMANCE_INFORMATION(ctypes.Structure):
+    _fields_ = [
+        ("cb", wintypes.DWORD),
+        ("CommitTotal", ctypes.c_size_t),
+        ("CommitLimit", ctypes.c_size_t),
+        ("CommitPeak", ctypes.c_size_t),
+        ("PhysicalTotal", ctypes.c_size_t),
+        ("PhysicalAvailable", ctypes.c_size_t),
+        ("SystemCache", ctypes.c_size_t),
+        ("KernelTotal", ctypes.c_size_t),
+        ("KernelPaged", ctypes.c_size_t),
+        ("KernelNonpaged", ctypes.c_size_t),
+        ("PageSize", ctypes.c_size_t),
+        ("HandleCount", wintypes.DWORD),
+        ("ProcessCount", wintypes.DWORD),
+        ("ThreadCount", wintypes.DWORD),
+    ]
+
+def get_free_ram():
+    #Windows is way too conservative and chalks recently used uncommitted model RAM
+    #as "in-use". So, calculate free RAM for the sake of general use as the greater of:
+    #
+    #1: What psutil says
+    #2: Total Memory - (Committed Memory - VRAM in use)
+    #
+    #We have to subtract VRAM in use from the comitted memory as WDDM creates a naked
+    #commit charge for all VRAM used just incase it wants to page it all out. This just
+    #isn't realistic so "overcommit" on our calculations by just subtracting it off.
+
+    pi = PERFORMANCE_INFORMATION()
+    pi.cb = ctypes.sizeof(pi)
+
+    if not psapi.GetPerformanceInfo(ctypes.byref(pi), pi.cb):
+        logging.warning("WARNING: Failed to query windows performance info. RAM usage may be sub optimal")
+        return psutil.virtual_memory().available
+
+    committed = pi.CommitTotal * pi.PageSize
+    total = pi.PhysicalTotal * pi.PageSize
+
+    return max(psutil.virtual_memory().available,
+               total - (committed - comfy_aimdo.control.get_total_vram_usage()))
+
Author	SHA1	Message	Date
Rattus	cf76b0447a	mm: redefine free memory for Windows As commented.	2026-01-27 14:19:07 +10:00
Rattus	5bbd5597fd	ops: dont discard pins Its more likely that the user will rerun their workflow and want whatever pins are inplace so remove this. pins still have to respect RAM pressure per model anyway.	2026-01-27 14:11:42 +10:00
Rattus	26dc3a20c6	mm: Dont GPU load models Aimdo will do this on demand as 0 copy. Remove the special case for vram > ram.	2026-01-27 14:10:54 +10:00
Rattus	db99ab48c2	fix missing import	2026-01-27 14:07:15 +10:00