mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-06-10 00:07:33 +08:00
Fix interoperation with external source of pinned memory pressure (#14252)
* mm: split off registration helper to doer and headroom calc * pinned_memory: implement registration comfy side Move away from Aimdo buffer registrations which seem fraught with danger and do it comfy side. Just start with the basic move. * pinned_memory: do registrations as portable memory * pinned_memory: discard async errors on registration fail Like the good ol days. * pinned_memory: implement abs shortfall retry If pinned registration happens to fail despite the previous budget ensures, consider the allocation shortfall, ensure it again, and try again. This allows comfy pins to interoperate with other software that might be doing substantive pinning.
This commit is contained in:
parent
5aa71b9bc2
commit
410df27253
@ -651,8 +651,7 @@ def ensure_pin_budget(size, evict_active=False):
|
||||
to_free = shortfall + PIN_PRESSURE_HYSTERESIS
|
||||
return free_pins(to_free, evict_active=evict_active) >= shortfall
|
||||
|
||||
def ensure_pin_registerable(size, evict_active=True):
|
||||
shortfall = TOTAL_PINNED_MEMORY + size - MAX_PINNED_MEMORY
|
||||
def free_registrations(shortfall, evict_active=True):
|
||||
if MAX_PINNED_MEMORY <= 0:
|
||||
return False
|
||||
if shortfall <= 0:
|
||||
@ -674,6 +673,9 @@ def ensure_pin_registerable(size, evict_active=True):
|
||||
return True
|
||||
return shortfall <= REGISTERABLE_PIN_HYSTERESIS
|
||||
|
||||
def ensure_pin_registerable(size, evict_active=True):
|
||||
return free_registrations(TOTAL_PINNED_MEMORY + size - MAX_PINNED_MEMORY, evict_active=evict_active)
|
||||
|
||||
class LoadedModel:
|
||||
def __init__(self, model: ModelPatcher):
|
||||
self._set_model(model)
|
||||
|
||||
@ -89,13 +89,26 @@ def pin_memory(module, subset="weights", size=None):
|
||||
not comfy.model_management.ensure_pin_registerable(registerable_size)):
|
||||
return _steal_pin(module, stack, buckets, size, priority)
|
||||
|
||||
extended = False
|
||||
try:
|
||||
hostbuf.extend(size=size)
|
||||
hostbuf.extend(size=size, register=False)
|
||||
extended = True
|
||||
pin = comfy_aimdo.torch.hostbuf_to_tensor(hostbuf)[offset:offset + size]
|
||||
pin.untyped_storage()._comfy_hostbuf = hostbuf
|
||||
if torch.cuda.cudart().cudaHostRegister(pin.data_ptr(), size, 1) != 0:
|
||||
comfy.model_management.discard_cuda_async_error()
|
||||
comfy.model_management.free_registrations(size)
|
||||
if torch.cuda.cudart().cudaHostRegister(pin.data_ptr(), size, 1) != 0:
|
||||
comfy.model_management.discard_cuda_async_error()
|
||||
del pin
|
||||
hostbuf.truncate(offset, do_unregister=False)
|
||||
return _steal_pin(module, stack, buckets, size, priority)
|
||||
except RuntimeError:
|
||||
if extended:
|
||||
hostbuf.truncate(offset, do_unregister=False)
|
||||
return _steal_pin(module, stack, buckets, size, priority)
|
||||
|
||||
module._pin = comfy_aimdo.torch.hostbuf_to_tensor(hostbuf)[offset:offset + size]
|
||||
module._pin.untyped_storage()._comfy_hostbuf = hostbuf
|
||||
module._pin = pin
|
||||
stack.append((module, offset))
|
||||
module._pin_registered = True
|
||||
module._pin_stack_index = len(stack) - 1
|
||||
|
||||
Loading…
Reference in New Issue
Block a user