execution: implement pin eviction on RAM presure

Add back proper pin freeing on RAM pressure
This commit is contained in:
Rattus 2026-05-13 21:57:35 +10:00
parent d61026d020
commit 3f717816e1
2 changed files with 16 additions and 8 deletions

View File

@ -513,6 +513,17 @@ def mark_mmap_dirty(storage):
if mmap_refs is not None:
DIRTY_MMAPS.add(mmap_refs[0])
def free_pins(size, evict_active=False):
if size <= 0:
return
for loaded_model in reversed(current_loaded_models):
model = loaded_model.model
if model is not None and model.is_dynamic() and (evict_active or not model.model.dynamic_pins[model.load_device]["active"]):
size -= model.partially_unload_ram(size)
if size <= 0:
break
def ensure_pin_budget(size, evict_active=False):
if MAX_PINNED_MEMORY <= 0:
return
@ -521,13 +532,7 @@ def ensure_pin_budget(size, evict_active=False):
if shortfall <= 0:
return
shortfall += PIN_PRESSURE_HYSTERESIS
for loaded_model in reversed(current_loaded_models):
model = loaded_model.model
if model is not None and model.is_dynamic() and (evict_active or not model.model.dynamic_pins[model.load_device]["active"]):
shortfall -= model.partially_unload_ram(shortfall)
if shortfall <= 0:
break
free_pins(shortfall + PIN_PRESSURE_HYSTERESIS, evict_active=evict_active)
class LoadedModel:
def __init__(self, model):

View File

@ -2,6 +2,7 @@ import copy
import heapq
import inspect
import logging
import psutil
import sys
import threading
import time
@ -780,7 +781,9 @@ class PromptExecutor:
execution_list.complete_node_execution()
if self.cache_type == CacheType.RAM_PRESSURE:
comfy.model_management.free_memory(0, None, pins_required=ram_headroom, ram_required=ram_headroom)
ram_release_callback(ram_headroom)
ram_shortfall = ram_headroom - psutil.virtual_memory().available
comfy.model_management.free_pins(ram_shortfall)
ram_release_callback(ram_headroom, free_active=True)
else:
# Only execute when the while-loop ends without break