diff --git a/comfy/model_management.py b/comfy/model_management.py
index 697359d3a..f358621c9 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -513,6 +513,17 @@ def mark_mmap_dirty(storage):
     if mmap_refs is not None:
         DIRTY_MMAPS.add(mmap_refs[0])
 
+def free_pins(size, evict_active=False):
+    if size <= 0:
+        return
+
+    for loaded_model in reversed(current_loaded_models):
+        model = loaded_model.model
+        if model is not None and model.is_dynamic() and (evict_active or not model.model.dynamic_pins[model.load_device]["active"]):
+            size -= model.partially_unload_ram(size)
+            if size <= 0:
+                break
+
 def ensure_pin_budget(size, evict_active=False):
     if MAX_PINNED_MEMORY <= 0:
         return
@@ -521,13 +532,7 @@ def ensure_pin_budget(size, evict_active=False):
     if shortfall <= 0:
         return
 
-    shortfall += PIN_PRESSURE_HYSTERESIS
-    for loaded_model in reversed(current_loaded_models):
-        model = loaded_model.model
-        if model is not None and model.is_dynamic() and (evict_active or not model.model.dynamic_pins[model.load_device]["active"]):
-            shortfall -= model.partially_unload_ram(shortfall)
-            if shortfall <= 0:
-                break
+    free_pins(shortfall + PIN_PRESSURE_HYSTERESIS, evict_active=evict_active)
 
 class LoadedModel:
     def __init__(self, model):
diff --git a/execution.py b/execution.py
index f37d0360d..5605f09e7 100644
--- a/execution.py
+++ b/execution.py
@@ -2,6 +2,7 @@ import copy
 import heapq
 import inspect
 import logging
+import psutil
 import sys
 import threading
 import time
@@ -780,7 +781,9 @@ class PromptExecutor:
                         execution_list.complete_node_execution()
 
                     if self.cache_type == CacheType.RAM_PRESSURE:
-                        comfy.model_management.free_memory(0, None, pins_required=ram_headroom, ram_required=ram_headroom)
+                        ram_release_callback(ram_headroom)
+                        ram_shortfall = ram_headroom - psutil.virtual_memory().available
+                        comfy.model_management.free_pins(ram_shortfall)
                         ram_release_callback(ram_headroom, free_active=True)
                 else:
                     # Only execute when the while-loop ends without break