diff --git a/comfy/model_management.py b/comfy/model_management.py index dfd58bf1b..8e786c0a5 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -651,8 +651,7 @@ def ensure_pin_budget(size, evict_active=False): to_free = shortfall + PIN_PRESSURE_HYSTERESIS return free_pins(to_free, evict_active=evict_active) >= shortfall -def ensure_pin_registerable(size, evict_active=True): - shortfall = TOTAL_PINNED_MEMORY + size - MAX_PINNED_MEMORY +def free_registrations(shortfall, evict_active=True): if MAX_PINNED_MEMORY <= 0: return False if shortfall <= 0: @@ -674,6 +673,9 @@ def ensure_pin_registerable(size, evict_active=True): return True return shortfall <= REGISTERABLE_PIN_HYSTERESIS +def ensure_pin_registerable(size, evict_active=True): + return free_registrations(TOTAL_PINNED_MEMORY + size - MAX_PINNED_MEMORY, evict_active=evict_active) + class LoadedModel: def __init__(self, model: ModelPatcher): self._set_model(model) diff --git a/comfy/pinned_memory.py b/comfy/pinned_memory.py index ffe12e0dc..cb77c517a 100644 --- a/comfy/pinned_memory.py +++ b/comfy/pinned_memory.py @@ -89,13 +89,26 @@ def pin_memory(module, subset="weights", size=None): not comfy.model_management.ensure_pin_registerable(registerable_size)): return _steal_pin(module, stack, buckets, size, priority) + extended = False try: - hostbuf.extend(size=size) + hostbuf.extend(size=size, register=False) + extended = True + pin = comfy_aimdo.torch.hostbuf_to_tensor(hostbuf)[offset:offset + size] + pin.untyped_storage()._comfy_hostbuf = hostbuf + if torch.cuda.cudart().cudaHostRegister(pin.data_ptr(), size, 1) != 0: + comfy.model_management.discard_cuda_async_error() + comfy.model_management.free_registrations(size) + if torch.cuda.cudart().cudaHostRegister(pin.data_ptr(), size, 1) != 0: + comfy.model_management.discard_cuda_async_error() + del pin + hostbuf.truncate(offset, do_unregister=False) + return _steal_pin(module, stack, buckets, size, priority) except RuntimeError: + if extended: + hostbuf.truncate(offset, do_unregister=False) return _steal_pin(module, stack, buckets, size, priority) - module._pin = comfy_aimdo.torch.hostbuf_to_tensor(hostbuf)[offset:offset + size] - module._pin.untyped_storage()._comfy_hostbuf = hostbuf + module._pin = pin stack.append((module, offset)) module._pin_registered = True module._pin_stack_index = len(stack) - 1