Compare commits

...

4 Commits

Author SHA1 Message Date
Rattus
cf76b0447a mm: redefine free memory for Windows
As commented.
2026-01-27 14:19:07 +10:00
Rattus
5bbd5597fd ops: dont discard pins
Its more likely that the user will rerun their workflow and want
whatever pins are inplace so remove this. pins still have to respect
RAM pressure per model anyway.
2026-01-27 14:11:42 +10:00
Rattus
26dc3a20c6 mm: Dont GPU load models
Aimdo will do this on demand as 0 copy. Remove the special case for
vram > ram.
2026-01-27 14:10:54 +10:00
Rattus
db99ab48c2 fix missing import 2026-01-27 14:07:15 +10:00
3 changed files with 62 additions and 7 deletions

View File

@ -27,6 +27,7 @@ import weakref
import gc
import os
from contextlib import nullcontext
import comfy.memory_management
import comfy.utils
import comfy.quant_ops
@ -584,9 +585,15 @@ WINDOWS = any(platform.win32_ver())
EXTRA_RESERVED_VRAM = 400 * 1024 * 1024
if WINDOWS:
import comfy.windows
EXTRA_RESERVED_VRAM = 600 * 1024 * 1024 #Windows is higher because of the shared vram issue
if total_vram > (15 * 1024): # more extra reserved vram on 16GB+ cards
EXTRA_RESERVED_VRAM += 100 * 1024 * 1024
def get_free_ram():
return comfy.windows.get_free_ram()
else:
def get_free_ram():
return psutil.virtual_memory().available
if args.reserve_vram is not None:
EXTRA_RESERVED_VRAM = args.reserve_vram * 1024 * 1024 * 1024
@ -617,7 +624,7 @@ def free_memory(memory_required, device, keep_loaded=[], for_dynamic=False, ram_
ram_to_free = 1e32
if not DISABLE_SMART_MEMORY:
memory_to_free = memory_required - get_free_memory(device)
ram_to_free = ram_required - psutil.virtual_memory().available
ram_to_free = ram_required - get_free_ram()
if current_loaded_models[i].model.is_dynamic() and for_dynamic:
#don't actually unload dynamic models for the sake of other dynamic models
@ -822,7 +829,7 @@ def unet_inital_load_device(parameters, dtype):
mem_dev = get_free_memory(torch_dev)
mem_cpu = get_free_memory(cpu_dev)
if mem_dev > mem_cpu and model_size < mem_dev:
if mem_dev > mem_cpu and model_size < mem_dev and comfy.memory_management.aimdo_allocator is None:
return torch_dev
else:
return cpu_dev

View File

@ -131,11 +131,7 @@ def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compu
xfer_dest = cast_dest
pin = None
if signature is not None:
#If we are able to increase our load level (e.g. user reduces resolution or batch number)
#reclaim the pin previously used for offload.
comfy.pinned_memory.unpin_memory(s)
elif not resident:
if signature is None and not resident:
#prepare a new pin
assert comfy.pinned_memory.get_pin(s) is None
comfy.pinned_memory.pin_memory(s)

52
comfy/windows.py Normal file
View File

@ -0,0 +1,52 @@
import ctypes
import logging
import psutil
from ctypes import wintypes
import comfy_aimdo.control
psapi = ctypes.WinDLL("psapi")
kernel32 = ctypes.WinDLL("kernel32")
class PERFORMANCE_INFORMATION(ctypes.Structure):
_fields_ = [
("cb", wintypes.DWORD),
("CommitTotal", ctypes.c_size_t),
("CommitLimit", ctypes.c_size_t),
("CommitPeak", ctypes.c_size_t),
("PhysicalTotal", ctypes.c_size_t),
("PhysicalAvailable", ctypes.c_size_t),
("SystemCache", ctypes.c_size_t),
("KernelTotal", ctypes.c_size_t),
("KernelPaged", ctypes.c_size_t),
("KernelNonpaged", ctypes.c_size_t),
("PageSize", ctypes.c_size_t),
("HandleCount", wintypes.DWORD),
("ProcessCount", wintypes.DWORD),
("ThreadCount", wintypes.DWORD),
]
def get_free_ram():
#Windows is way too conservative and chalks recently used uncommitted model RAM
#as "in-use". So, calculate free RAM for the sake of general use as the greater of:
#
#1: What psutil says
#2: Total Memory - (Committed Memory - VRAM in use)
#
#We have to subtract VRAM in use from the comitted memory as WDDM creates a naked
#commit charge for all VRAM used just incase it wants to page it all out. This just
#isn't realistic so "overcommit" on our calculations by just subtracting it off.
pi = PERFORMANCE_INFORMATION()
pi.cb = ctypes.sizeof(pi)
if not psapi.GetPerformanceInfo(ctypes.byref(pi), pi.cb):
logging.warning("WARNING: Failed to query windows performance info. RAM usage may be sub optimal")
return psutil.virtual_memory().available
committed = pi.CommitTotal * pi.PageSize
total = pi.PhysicalTotal * pi.PageSize
return max(psutil.virtual_memory().available,
total - (committed - comfy_aimdo.control.get_total_vram_usage()))