mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-06-21 15:29:32 +08:00
specify hostbuf max allocation size
There a signs of virtual memory exhaustion on some linux systems when throwing 128GB for every little piece. Pass the actual to save aimdo from over-estimates
This commit is contained in:
parent
c451854f84
commit
09a98a9539
@ -60,7 +60,8 @@ def read_tensor_file_slice_into(tensor, destination, stream=None, destination2=N
|
|||||||
hostbuf.read_file_slice(file_obj, info.offset, info.size,
|
hostbuf.read_file_slice(file_obj, info.offset, info.size,
|
||||||
offset=destination.data_ptr() - hostbuf.get_raw_address(),
|
offset=destination.data_ptr() - hostbuf.get_raw_address(),
|
||||||
stream=stream_ptr,
|
stream=stream_ptr,
|
||||||
device_ptr=device_ptr)
|
device_ptr=device_ptr,
|
||||||
|
device=None if destination2 is None else destination2.device.index)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
buf_type = ctypes.c_ubyte * info.size
|
buf_type = ctypes.c_ubyte * info.size
|
||||||
|
|||||||
@ -1217,7 +1217,7 @@ def get_aimdo_cast_buffer(offload_stream, device):
|
|||||||
def get_pin_buffer(offload_stream):
|
def get_pin_buffer(offload_stream):
|
||||||
pin_buffer = STREAM_PIN_BUFFERS.get(offload_stream, None)
|
pin_buffer = STREAM_PIN_BUFFERS.get(offload_stream, None)
|
||||||
if pin_buffer is None:
|
if pin_buffer is None:
|
||||||
pin_buffer = comfy_aimdo.host_buffer.HostBuffer(0)
|
pin_buffer = comfy_aimdo.host_buffer.HostBuffer(0, 0, pinned_hostbuf_size(8 * 1024**3))
|
||||||
STREAM_PIN_BUFFERS[offload_stream] = pin_buffer
|
STREAM_PIN_BUFFERS[offload_stream] = pin_buffer
|
||||||
elif offload_stream is not None:
|
elif offload_stream is not None:
|
||||||
event = getattr(pin_buffer, "_comfy_event", None)
|
event = getattr(pin_buffer, "_comfy_event", None)
|
||||||
@ -1267,7 +1267,7 @@ def reset_cast_buffers():
|
|||||||
if model is not None and model.is_dynamic():
|
if model is not None and model.is_dynamic():
|
||||||
model.model.dynamic_pins[model.load_device]["active"] = False
|
model.model.dynamic_pins[model.load_device]["active"] = False
|
||||||
model.partially_unload_ram(1e30, subsets=[ "patches" ])
|
model.partially_unload_ram(1e30, subsets=[ "patches" ])
|
||||||
model.model.dynamic_pins[model.load_device]["patches"] = (comfy_aimdo.host_buffer.HostBuffer(0, 8 * 1024 * 1024), [], [-1], [0])
|
model.model.dynamic_pins[model.load_device]["patches"] = (comfy_aimdo.host_buffer.HostBuffer(0, 8 * 1024 * 1024, pinned_hostbuf_size(model.model_size())), [], [-1], [0])
|
||||||
|
|
||||||
STREAM_CAST_BUFFERS.clear()
|
STREAM_CAST_BUFFERS.clear()
|
||||||
STREAM_AIMDO_CAST_BUFFERS.clear()
|
STREAM_AIMDO_CAST_BUFFERS.clear()
|
||||||
@ -1388,6 +1388,9 @@ if not args.disable_pinned_memory:
|
|||||||
|
|
||||||
PINNING_ALLOWED_TYPES = set(["Tensor", "Parameter", "QuantizedTensor"])
|
PINNING_ALLOWED_TYPES = set(["Tensor", "Parameter", "QuantizedTensor"])
|
||||||
|
|
||||||
|
def pinned_hostbuf_size(size):
|
||||||
|
return max(0, int(min(size, MAX_PINNED_MEMORY) * 2))
|
||||||
|
|
||||||
def discard_cuda_async_error():
|
def discard_cuda_async_error():
|
||||||
try:
|
try:
|
||||||
a = torch.tensor([1], dtype=torch.uint8, device=get_torch_device())
|
a = torch.tensor([1], dtype=torch.uint8, device=get_torch_device())
|
||||||
|
|||||||
@ -1561,8 +1561,9 @@ class ModelPatcherDynamic(ModelPatcher):
|
|||||||
self.model.dynamic_pins = {}
|
self.model.dynamic_pins = {}
|
||||||
if self.load_device not in self.model.dynamic_pins:
|
if self.load_device not in self.model.dynamic_pins:
|
||||||
self.model.dynamic_pins[self.load_device] = {
|
self.model.dynamic_pins[self.load_device] = {
|
||||||
"weights": (comfy_aimdo.host_buffer.HostBuffer(0, 64 * 1024 * 1024), [], [-1], [0]),
|
"weights": (comfy_aimdo.host_buffer.HostBuffer(0, 0, 0), [], [-1], [0]),
|
||||||
"patches": (comfy_aimdo.host_buffer.HostBuffer(0, 8 * 1024 * 1024), [], [-1], [0]),
|
"patches": (comfy_aimdo.host_buffer.HostBuffer(0, 0, 0), [], [-1], [0]),
|
||||||
|
"hostbufs_initialized": False,
|
||||||
"failed": False,
|
"failed": False,
|
||||||
"active": False,
|
"active": False,
|
||||||
}
|
}
|
||||||
@ -1628,6 +1629,11 @@ class ModelPatcherDynamic(ModelPatcher):
|
|||||||
|
|
||||||
vbar = self._vbar_get(create=True)
|
vbar = self._vbar_get(create=True)
|
||||||
pin_state = self.model.dynamic_pins[self.load_device]
|
pin_state = self.model.dynamic_pins[self.load_device]
|
||||||
|
if not pin_state["hostbufs_initialized"]:
|
||||||
|
hostbuf_size = comfy.model_management.pinned_hostbuf_size(self.model_size())
|
||||||
|
pin_state["weights"] = (comfy_aimdo.host_buffer.HostBuffer(0, 64 * 1024 * 1024, hostbuf_size), [], [-1], [0])
|
||||||
|
pin_state["patches"] = (comfy_aimdo.host_buffer.HostBuffer(0, 8 * 1024 * 1024, hostbuf_size), [], [-1], [0])
|
||||||
|
pin_state["hostbufs_initialized"] = True
|
||||||
pin_state["failed"] = False
|
pin_state["failed"] = False
|
||||||
pin_state["active"] = True
|
pin_state["active"] = True
|
||||||
if vbar is not None:
|
if vbar is not None:
|
||||||
|
|||||||
@ -23,7 +23,7 @@ SQLAlchemy>=2.0.0
|
|||||||
filelock
|
filelock
|
||||||
av>=14.2.0
|
av>=14.2.0
|
||||||
comfy-kitchen>=0.2.8
|
comfy-kitchen>=0.2.8
|
||||||
comfy-aimdo==0.4.2
|
comfy-aimdo==0.4.3
|
||||||
requests
|
requests
|
||||||
simpleeval>=1.0.0
|
simpleeval>=1.0.0
|
||||||
blake3
|
blake3
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user