From 09a98a953984a83104c470e2f18d3f00d4f61099 Mon Sep 17 00:00:00 2001 From: Rattus Date: Mon, 18 May 2026 17:38:55 +1000 Subject: [PATCH] specify hostbuf max allocation size There a signs of virtual memory exhaustion on some linux systems when throwing 128GB for every little piece. Pass the actual to save aimdo from over-estimates --- comfy/memory_management.py | 3 ++- comfy/model_management.py | 7 +++++-- comfy/model_patcher.py | 10 ++++++++-- requirements.txt | 2 +- 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/comfy/memory_management.py b/comfy/memory_management.py index 21e3cf59b..c43f0c4a2 100644 --- a/comfy/memory_management.py +++ b/comfy/memory_management.py @@ -60,7 +60,8 @@ def read_tensor_file_slice_into(tensor, destination, stream=None, destination2=N hostbuf.read_file_slice(file_obj, info.offset, info.size, offset=destination.data_ptr() - hostbuf.get_raw_address(), stream=stream_ptr, - device_ptr=device_ptr) + device_ptr=device_ptr, + device=None if destination2 is None else destination2.device.index) return True buf_type = ctypes.c_ubyte * info.size diff --git a/comfy/model_management.py b/comfy/model_management.py index 69ba794c2..3894dfa9c 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -1217,7 +1217,7 @@ def get_aimdo_cast_buffer(offload_stream, device): def get_pin_buffer(offload_stream): pin_buffer = STREAM_PIN_BUFFERS.get(offload_stream, None) if pin_buffer is None: - pin_buffer = comfy_aimdo.host_buffer.HostBuffer(0) + pin_buffer = comfy_aimdo.host_buffer.HostBuffer(0, 0, pinned_hostbuf_size(8 * 1024**3)) STREAM_PIN_BUFFERS[offload_stream] = pin_buffer elif offload_stream is not None: event = getattr(pin_buffer, "_comfy_event", None) @@ -1267,7 +1267,7 @@ def reset_cast_buffers(): if model is not None and model.is_dynamic(): model.model.dynamic_pins[model.load_device]["active"] = False model.partially_unload_ram(1e30, subsets=[ "patches" ]) - model.model.dynamic_pins[model.load_device]["patches"] = (comfy_aimdo.host_buffer.HostBuffer(0, 8 * 1024 * 1024), [], [-1], [0]) + model.model.dynamic_pins[model.load_device]["patches"] = (comfy_aimdo.host_buffer.HostBuffer(0, 8 * 1024 * 1024, pinned_hostbuf_size(model.model_size())), [], [-1], [0]) STREAM_CAST_BUFFERS.clear() STREAM_AIMDO_CAST_BUFFERS.clear() @@ -1388,6 +1388,9 @@ if not args.disable_pinned_memory: PINNING_ALLOWED_TYPES = set(["Tensor", "Parameter", "QuantizedTensor"]) +def pinned_hostbuf_size(size): + return max(0, int(min(size, MAX_PINNED_MEMORY) * 2)) + def discard_cuda_async_error(): try: a = torch.tensor([1], dtype=torch.uint8, device=get_torch_device()) diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py index c5ffd4d17..ba3036c94 100644 --- a/comfy/model_patcher.py +++ b/comfy/model_patcher.py @@ -1561,8 +1561,9 @@ class ModelPatcherDynamic(ModelPatcher): self.model.dynamic_pins = {} if self.load_device not in self.model.dynamic_pins: self.model.dynamic_pins[self.load_device] = { - "weights": (comfy_aimdo.host_buffer.HostBuffer(0, 64 * 1024 * 1024), [], [-1], [0]), - "patches": (comfy_aimdo.host_buffer.HostBuffer(0, 8 * 1024 * 1024), [], [-1], [0]), + "weights": (comfy_aimdo.host_buffer.HostBuffer(0, 0, 0), [], [-1], [0]), + "patches": (comfy_aimdo.host_buffer.HostBuffer(0, 0, 0), [], [-1], [0]), + "hostbufs_initialized": False, "failed": False, "active": False, } @@ -1628,6 +1629,11 @@ class ModelPatcherDynamic(ModelPatcher): vbar = self._vbar_get(create=True) pin_state = self.model.dynamic_pins[self.load_device] + if not pin_state["hostbufs_initialized"]: + hostbuf_size = comfy.model_management.pinned_hostbuf_size(self.model_size()) + pin_state["weights"] = (comfy_aimdo.host_buffer.HostBuffer(0, 64 * 1024 * 1024, hostbuf_size), [], [-1], [0]) + pin_state["patches"] = (comfy_aimdo.host_buffer.HostBuffer(0, 8 * 1024 * 1024, hostbuf_size), [], [-1], [0]) + pin_state["hostbufs_initialized"] = True pin_state["failed"] = False pin_state["active"] = True if vbar is not None: diff --git a/requirements.txt b/requirements.txt index 193d60cf0..1f52fb199 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,7 +23,7 @@ SQLAlchemy>=2.0.0 filelock av>=14.2.0 comfy-kitchen>=0.2.8 -comfy-aimdo==0.4.2 +comfy-aimdo==0.4.3 requests simpleeval>=1.0.0 blake3