diff --git a/comfy/model_management.py b/comfy/model_management.py index 697359d3a..f358621c9 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -513,6 +513,17 @@ def mark_mmap_dirty(storage): if mmap_refs is not None: DIRTY_MMAPS.add(mmap_refs[0]) +def free_pins(size, evict_active=False): + if size <= 0: + return + + for loaded_model in reversed(current_loaded_models): + model = loaded_model.model + if model is not None and model.is_dynamic() and (evict_active or not model.model.dynamic_pins[model.load_device]["active"]): + size -= model.partially_unload_ram(size) + if size <= 0: + break + def ensure_pin_budget(size, evict_active=False): if MAX_PINNED_MEMORY <= 0: return @@ -521,13 +532,7 @@ def ensure_pin_budget(size, evict_active=False): if shortfall <= 0: return - shortfall += PIN_PRESSURE_HYSTERESIS - for loaded_model in reversed(current_loaded_models): - model = loaded_model.model - if model is not None and model.is_dynamic() and (evict_active or not model.model.dynamic_pins[model.load_device]["active"]): - shortfall -= model.partially_unload_ram(shortfall) - if shortfall <= 0: - break + free_pins(shortfall + PIN_PRESSURE_HYSTERESIS, evict_active=evict_active) class LoadedModel: def __init__(self, model): diff --git a/execution.py b/execution.py index f37d0360d..5605f09e7 100644 --- a/execution.py +++ b/execution.py @@ -2,6 +2,7 @@ import copy import heapq import inspect import logging +import psutil import sys import threading import time @@ -780,7 +781,9 @@ class PromptExecutor: execution_list.complete_node_execution() if self.cache_type == CacheType.RAM_PRESSURE: - comfy.model_management.free_memory(0, None, pins_required=ram_headroom, ram_required=ram_headroom) + ram_release_callback(ram_headroom) + ram_shortfall = ram_headroom - psutil.virtual_memory().available + comfy.model_management.free_pins(ram_shortfall) ram_release_callback(ram_headroom, free_active=True) else: # Only execute when the while-loop ends without break