diff --git a/comfy/model_management.py b/comfy/model_management.py index b15d08ba1..611fcd5d1 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -1534,7 +1534,14 @@ def pin_memory(tensor): size = tensor.nbytes comfy.memory_management.extra_ram_release(comfy.memory_management.RAM_CACHE_HEADROOM) - ensure_pin_registerable(size) + # Respect the host-RAM budget like the dynamic-VRAM pin path (comfy/pinned_memory.py) + # already does. Without this gate the static load path keeps cudaHostRegister-ing + # offloaded weights toward MAX_PINNED_MEMORY (90% of RAM on Linux) regardless of how + # little RAM is actually free, so unswappable pages fill RAM+swap and large model + # loads stall (issue #13730). When the budget cannot be met, skip pinning and leave + # the weight in pageable RAM (still correct, just not pinned). + if not ensure_pin_budget(size) or not ensure_pin_registerable(size): + return False ptr = tensor.data_ptr() if ptr == 0: