diff --git a/comfy/model_management.py b/comfy/model_management.py
index f358621c9..19a916362 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -498,7 +498,11 @@ current_loaded_models = []
 
 DIRTY_MMAPS = set()
 
-PIN_PRESSURE_HYSTERESIS = 128 * 1024 * 1024
+PIN_PRESSURE_HYSTERESIS = 256 * 1024 * 1024
+
+#Freeing registerables on pressure does imply a GPU sync, so go big on
+#the hysteresis so each expensive sync gives us back a good chunk.
+REGISTERABLE_PIN_HYSTERESIS = 768 * 1024 * 1024
 
 def module_size(module):
     module_mem = 0
@@ -525,15 +529,28 @@ def free_pins(size, evict_active=False):
                 break
 
 def ensure_pin_budget(size, evict_active=False):
-    if MAX_PINNED_MEMORY <= 0:
+    if MAX_MODEL_MEMORY <= 0:
         return
 
-    shortfall = TOTAL_PINNED_MEMORY + size - MAX_PINNED_MEMORY
+    shortfall = TOTAL_MODEL_MEMORY + size - MAX_MODEL_MEMORY
     if shortfall <= 0:
         return
 
     free_pins(shortfall + PIN_PRESSURE_HYSTERESIS, evict_active=evict_active)
 
+def ensure_pin_registerable(size, evict_active=False):
+    shortfall = TOTAL_PINNED_MEMORY + size - MAX_PINNED_MEMORY
+    if MAX_PINNED_MEMORY <= 0 or shortfall <= 0:
+        return
+
+    shortfall += REGISTERABLE_PIN_HYSTERESIS
+    for loaded_model in reversed(current_loaded_models):
+        model = loaded_model.model
+        if model is not None and model.is_dynamic() and (evict_active or not model.model.dynamic_pins[model.load_device]["active"]):
+            shortfall -= model.unregister_inactive_pins(shortfall)
+            if shortfall <= 0:
+                return
+
 class LoadedModel:
     def __init__(self, model):
         self._set_model(model)
@@ -1208,22 +1225,24 @@ def get_pin_buffer(offload_stream):
     return pin_buffer
 
 def resize_pin_buffer(pin_buffer, size):
-    global TOTAL_PINNED_MEMORY
+    global TOTAL_MODEL_MEMORY, TOTAL_PINNED_MEMORY
     old_size = pin_buffer.size
     if size <= old_size:
         return True
     growth = size - old_size
     comfy.memory_management.extra_ram_release(comfy.memory_management.RAM_CACHE_HEADROOM, free_active=True)
     ensure_pin_budget(growth, evict_active=True)
+    ensure_pin_registerable(growth, evict_active=True)
     try:
         pin_buffer.extend(size=size, reallocate=True)
     except RuntimeError:
         return False
+    TOTAL_MODEL_MEMORY += pin_buffer.size - old_size
     TOTAL_PINNED_MEMORY += pin_buffer.size - old_size
     return True
 
 def reset_cast_buffers():
-    global TOTAL_PINNED_MEMORY
+    global TOTAL_MODEL_MEMORY, TOTAL_PINNED_MEMORY
     global LARGEST_CASTED_WEIGHT
     global LARGEST_AIMDO_CASTED_WEIGHT
 
@@ -1239,16 +1258,17 @@ def reset_cast_buffers():
     DIRTY_MMAPS.clear()
 
     for pin_buffer in STREAM_PIN_BUFFERS.values():
+        TOTAL_MODEL_MEMORY -= pin_buffer.size
         TOTAL_PINNED_MEMORY -= pin_buffer.size
-    if TOTAL_PINNED_MEMORY < 0:
-        TOTAL_PINNED_MEMORY = 0
+    TOTAL_MODEL_MEMORY = max(0, TOTAL_MODEL_MEMORY)
+    TOTAL_PINNED_MEMORY = max(0, TOTAL_PINNED_MEMORY)
 
     for loaded_model in current_loaded_models:
         model = loaded_model.model
         if model is not None and model.is_dynamic():
             model.model.dynamic_pins[model.load_device]["active"] = False
             model.partially_unload_ram(1e30, subsets=[ "patches" ])
-            model.model.dynamic_pins[model.load_device]["patches"] = (comfy_aimdo.host_buffer.HostBuffer(0, 8 * 1024 * 1024), [])
+            model.model.dynamic_pins[model.load_device]["patches"] = (comfy_aimdo.host_buffer.HostBuffer(0, 8 * 1024 * 1024), [], [-1])
 
     STREAM_CAST_BUFFERS.clear()
     STREAM_AIMDO_CAST_BUFFERS.clear()
@@ -1352,14 +1372,18 @@ def cast_to_device(tensor, device, dtype, copy=False):
 
 
 PINNED_MEMORY = {}
+TOTAL_MODEL_MEMORY = 0
 TOTAL_PINNED_MEMORY = 0
+MAX_MODEL_MEMORY = -1
 MAX_PINNED_MEMORY = -1
 if not args.disable_pinned_memory:
     if is_nvidia() or is_amd():
+        ram = get_total_memory(torch.device("cpu"))
+        MAX_MODEL_MEMORY = min(ram - 4 * 1024 * 1024 * 1024, ram * 0.90)
         if WINDOWS:
-            MAX_PINNED_MEMORY = get_total_memory(torch.device("cpu")) * 0.40  # Windows limit is apparently 50%
+            MAX_PINNED_MEMORY = ram * 0.40  # Windows limit is apparently 50%
         else:
-            MAX_PINNED_MEMORY = get_total_memory(torch.device("cpu")) * 0.90
+            MAX_PINNED_MEMORY = ram * 0.90
         logging.info("Enabled pinned memory {}".format(MAX_PINNED_MEMORY // (1024 * 1024)))
 
 PINNING_ALLOWED_TYPES = set(["Tensor", "Parameter", "QuantizedTensor"])
@@ -1396,7 +1420,7 @@ def pin_memory(tensor):
 
     size = tensor.nbytes
     comfy.memory_management.extra_ram_release(comfy.memory_management.RAM_CACHE_HEADROOM)
-    ensure_pin_budget(size)
+    ensure_pin_registerable(size)
 
     ptr = tensor.data_ptr()
     if ptr == 0:
@@ -1433,7 +1457,8 @@ def unpin_memory(tensor):
         return False
 
     if torch.cuda.cudart().cudaHostUnregister(ptr) == 0:
-        TOTAL_PINNED_MEMORY -= PINNED_MEMORY.pop(ptr)
+        size = PINNED_MEMORY.pop(ptr)
+        TOTAL_PINNED_MEMORY -= size
         return True
     else:
         logging.warning("Unpin error.")
diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py
index f4845bb43..7dc4d7801 100644
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@@ -1557,8 +1557,8 @@ class ModelPatcherDynamic(ModelPatcher):
             self.model.dynamic_pins = {}
         if self.load_device not in self.model.dynamic_pins:
             self.model.dynamic_pins[self.load_device] = {
-                "weights": (comfy_aimdo.host_buffer.HostBuffer(0, 64 * 1024 * 1024), []),
-                "patches": (comfy_aimdo.host_buffer.HostBuffer(0, 8 * 1024 * 1024), []),
+                "weights": (comfy_aimdo.host_buffer.HostBuffer(0, 64 * 1024 * 1024), [], [-1]),
+                "patches": (comfy_aimdo.host_buffer.HostBuffer(0, 8 * 1024 * 1024), [], [-1]),
                 "failed": False,
                 "active": False,
             }
@@ -1761,19 +1761,44 @@ class ModelPatcherDynamic(ModelPatcher):
         return (self.model.dynamic_pins[self.load_device]["weights"][0].size +
                 self.model.dynamic_pins[self.load_device]["patches"][0].size)
 
+    def unregister_inactive_pins(self, ram_to_unload, subsets=[ "weights", "patches" ]):
+        freed = 0
+        pin_state = self.model.dynamic_pins[self.load_device]
+        for subset in subsets:
+            hostbuf, stack, stack_split = pin_state[subset]
+            split = stack_split[0]
+            while split >= 0:
+                module, offset = stack[split]
+                split -= 1
+                stack_split[0] = split
+                if not module._pin_registered:
+                    continue
+                size = module._pin.numel() * module._pin.element_size()
+                if torch.cuda.cudart().cudaHostUnregister(module._pin.data_ptr()) != 0:
+                    comfy.model_management.discard_cuda_async_error()
+                    continue
+                module._pin_registered = False
+                comfy.model_management.TOTAL_PINNED_MEMORY = max(0, comfy.model_management.TOTAL_PINNED_MEMORY - size)
+                freed += size
+                ram_to_unload -= size
+                if ram_to_unload <= 0:
+                    return freed
+        return freed
+
     def partially_unload_ram(self, ram_to_unload, subsets=[ "weights", "patches" ]):
         freed = 0
         pin_state = self.model.dynamic_pins[self.load_device]
         for subset in subsets:
-            hostbuf, stack = pin_state[subset]
+            hostbuf, stack, stack_split = pin_state[subset]
             while len(stack) > 0:
                 module, offset = stack.pop()
                 size = module._pin.numel() * module._pin.element_size()
                 del module._pin
-                hostbuf.truncate(offset)
-                comfy.model_management.TOTAL_PINNED_MEMORY -= size
-                if comfy.model_management.TOTAL_PINNED_MEMORY < 0:
-                    comfy.model_management.TOTAL_PINNED_MEMORY = 0
+                hostbuf.truncate(offset, do_unregister=module._pin_registered)
+                stack_split[0] = min(stack_split[0], len(stack) - 1)
+                comfy.model_management.TOTAL_MODEL_MEMORY = max(0, comfy.model_management.TOTAL_MODEL_MEMORY - size)
+                if module._pin_registered:
+                    comfy.model_management.TOTAL_PINNED_MEMORY = max(0, comfy.model_management.TOTAL_PINNED_MEMORY - size)
                 freed += size
                 ram_to_unload -= size
                 if ram_to_unload <= 0:
diff --git a/comfy/pinned_memory.py b/comfy/pinned_memory.py
index 35cbbcd9e..8fe69916f 100644
--- a/comfy/pinned_memory.py
+++ b/comfy/pinned_memory.py
@@ -2,6 +2,7 @@ import comfy.model_management
 import comfy.memory_management
 import comfy_aimdo.host_buffer
 import comfy_aimdo.torch
+import torch
 
 from comfy.cli_args import args
 
@@ -10,15 +11,33 @@ def get_pin(module, subset="weights"):
 
 def pin_memory(module, subset="weights", size=None):
     pin_state = module._pin_state
-    if pin_state["failed"] or args.disable_pinned_memory or get_pin(module, subset) is not None:
+    if pin_state["failed"] or args.disable_pinned_memory:
         return
 
-    hostbuf, stack = pin_state[subset]
+    hostbuf, stack, stack_split = pin_state[subset]
+    pin = get_pin(module, subset)
+    if pin is not None:
+        if module._pin_registered:
+            return
+
+        size = module._pin.nbytes
+        comfy.model_management.ensure_pin_registerable(size)
+
+        if torch.cuda.cudart().cudaHostRegister(module._pin.data_ptr(), size, 1) != 0:
+            comfy.model_management.discard_cuda_async_error()
+            return False
+        module._pin_registered = True
+        stack_split[0] = max(stack_split[0], module._pin_stack_index)
+        comfy.model_management.TOTAL_PINNED_MEMORY += size
+        return True
+
     if size is None:
         size = comfy.memory_management.vram_aligned_size([ module.weight, module.bias ])
     offset = hostbuf.size
+
     comfy.memory_management.extra_ram_release(comfy.memory_management.RAM_CACHE_HEADROOM)
     comfy.model_management.ensure_pin_budget(size)
+    comfy.model_management.ensure_pin_registerable(size)
 
     try:
         hostbuf.extend(size=size)
@@ -29,5 +48,9 @@ def pin_memory(module, subset="weights", size=None):
     module._pin = comfy_aimdo.torch.hostbuf_to_tensor(hostbuf)[offset:offset + size]
     module._pin.untyped_storage()._comfy_hostbuf = hostbuf
     stack.append((module, offset))
+    module._pin_registered = True
+    module._pin_stack_index = len(stack) - 1
+    stack_split[0] = max(stack_split[0], module._pin_stack_index)
+    comfy.model_management.TOTAL_MODEL_MEMORY += size
     comfy.model_management.TOTAL_PINNED_MEMORY += size
     return True