execution: implement pin eviction on RAM presure

Add back proper pin freeing on RAM pressure
This commit is contained in:
Rattus 2026-05-13 21:57:35 +10:00
parent d61026d020
commit 3f717816e1
2 changed files with 16 additions and 8 deletions

View File

@ -513,6 +513,17 @@ def mark_mmap_dirty(storage):
if mmap_refs is not None: if mmap_refs is not None:
DIRTY_MMAPS.add(mmap_refs[0]) DIRTY_MMAPS.add(mmap_refs[0])
def free_pins(size, evict_active=False):
if size <= 0:
return
for loaded_model in reversed(current_loaded_models):
model = loaded_model.model
if model is not None and model.is_dynamic() and (evict_active or not model.model.dynamic_pins[model.load_device]["active"]):
size -= model.partially_unload_ram(size)
if size <= 0:
break
def ensure_pin_budget(size, evict_active=False): def ensure_pin_budget(size, evict_active=False):
if MAX_PINNED_MEMORY <= 0: if MAX_PINNED_MEMORY <= 0:
return return
@ -521,13 +532,7 @@ def ensure_pin_budget(size, evict_active=False):
if shortfall <= 0: if shortfall <= 0:
return return
shortfall += PIN_PRESSURE_HYSTERESIS free_pins(shortfall + PIN_PRESSURE_HYSTERESIS, evict_active=evict_active)
for loaded_model in reversed(current_loaded_models):
model = loaded_model.model
if model is not None and model.is_dynamic() and (evict_active or not model.model.dynamic_pins[model.load_device]["active"]):
shortfall -= model.partially_unload_ram(shortfall)
if shortfall <= 0:
break
class LoadedModel: class LoadedModel:
def __init__(self, model): def __init__(self, model):

View File

@ -2,6 +2,7 @@ import copy
import heapq import heapq
import inspect import inspect
import logging import logging
import psutil
import sys import sys
import threading import threading
import time import time
@ -780,7 +781,9 @@ class PromptExecutor:
execution_list.complete_node_execution() execution_list.complete_node_execution()
if self.cache_type == CacheType.RAM_PRESSURE: if self.cache_type == CacheType.RAM_PRESSURE:
comfy.model_management.free_memory(0, None, pins_required=ram_headroom, ram_required=ram_headroom) ram_release_callback(ram_headroom)
ram_shortfall = ram_headroom - psutil.virtual_memory().available
comfy.model_management.free_pins(ram_shortfall)
ram_release_callback(ram_headroom, free_active=True) ram_release_callback(ram_headroom, free_active=True)
else: else:
# Only execute when the while-loop ends without break # Only execute when the while-loop ends without break