From 09a98a953984a83104c470e2f18d3f00d4f61099 Mon Sep 17 00:00:00 2001
From: Rattus <rattus128@gmail.com>
Date: Mon, 18 May 2026 17:38:55 +1000
Subject: [PATCH] specify hostbuf max allocation size

There a signs of virtual memory exhaustion on some linux systems when
throwing 128GB for every little piece. Pass the actual to save aimdo
from over-estimates
---
 comfy/memory_management.py |  3 ++-
 comfy/model_management.py  |  7 +++++--
 comfy/model_patcher.py     | 10 ++++++++--
 requirements.txt           |  2 +-
 4 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/comfy/memory_management.py b/comfy/memory_management.py
index 21e3cf59b..c43f0c4a2 100644
--- a/comfy/memory_management.py
+++ b/comfy/memory_management.py
@@ -60,7 +60,8 @@ def read_tensor_file_slice_into(tensor, destination, stream=None, destination2=N
         hostbuf.read_file_slice(file_obj, info.offset, info.size,
                                 offset=destination.data_ptr() - hostbuf.get_raw_address(),
                                 stream=stream_ptr,
-                                device_ptr=device_ptr)
+                                device_ptr=device_ptr,
+                                device=None if destination2 is None else destination2.device.index)
         return True
 
     buf_type = ctypes.c_ubyte * info.size
diff --git a/comfy/model_management.py b/comfy/model_management.py
index 69ba794c2..3894dfa9c 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -1217,7 +1217,7 @@ def get_aimdo_cast_buffer(offload_stream, device):
 def get_pin_buffer(offload_stream):
     pin_buffer = STREAM_PIN_BUFFERS.get(offload_stream, None)
     if pin_buffer is None:
-        pin_buffer = comfy_aimdo.host_buffer.HostBuffer(0)
+        pin_buffer = comfy_aimdo.host_buffer.HostBuffer(0, 0, pinned_hostbuf_size(8 * 1024**3))
         STREAM_PIN_BUFFERS[offload_stream] = pin_buffer
     elif offload_stream is not None:
         event = getattr(pin_buffer, "_comfy_event", None)
@@ -1267,7 +1267,7 @@ def reset_cast_buffers():
         if model is not None and model.is_dynamic():
             model.model.dynamic_pins[model.load_device]["active"] = False
             model.partially_unload_ram(1e30, subsets=[ "patches" ])
-            model.model.dynamic_pins[model.load_device]["patches"] = (comfy_aimdo.host_buffer.HostBuffer(0, 8 * 1024 * 1024), [], [-1], [0])
+            model.model.dynamic_pins[model.load_device]["patches"] = (comfy_aimdo.host_buffer.HostBuffer(0, 8 * 1024 * 1024, pinned_hostbuf_size(model.model_size())), [], [-1], [0])
 
     STREAM_CAST_BUFFERS.clear()
     STREAM_AIMDO_CAST_BUFFERS.clear()
@@ -1388,6 +1388,9 @@ if not args.disable_pinned_memory:
 
 PINNING_ALLOWED_TYPES = set(["Tensor", "Parameter", "QuantizedTensor"])
 
+def pinned_hostbuf_size(size):
+    return max(0, int(min(size, MAX_PINNED_MEMORY) * 2))
+
 def discard_cuda_async_error():
     try:
         a = torch.tensor([1], dtype=torch.uint8, device=get_torch_device())
diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py
index c5ffd4d17..ba3036c94 100644
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@@ -1561,8 +1561,9 @@ class ModelPatcherDynamic(ModelPatcher):
             self.model.dynamic_pins = {}
         if self.load_device not in self.model.dynamic_pins:
             self.model.dynamic_pins[self.load_device] = {
-                "weights": (comfy_aimdo.host_buffer.HostBuffer(0, 64 * 1024 * 1024), [], [-1], [0]),
-                "patches": (comfy_aimdo.host_buffer.HostBuffer(0, 8 * 1024 * 1024), [], [-1], [0]),
+                "weights": (comfy_aimdo.host_buffer.HostBuffer(0, 0, 0), [], [-1], [0]),
+                "patches": (comfy_aimdo.host_buffer.HostBuffer(0, 0, 0), [], [-1], [0]),
+                "hostbufs_initialized": False,
                 "failed": False,
                 "active": False,
             }
@@ -1628,6 +1629,11 @@ class ModelPatcherDynamic(ModelPatcher):
 
             vbar = self._vbar_get(create=True)
             pin_state = self.model.dynamic_pins[self.load_device]
+            if not pin_state["hostbufs_initialized"]:
+                hostbuf_size = comfy.model_management.pinned_hostbuf_size(self.model_size())
+                pin_state["weights"] = (comfy_aimdo.host_buffer.HostBuffer(0, 64 * 1024 * 1024, hostbuf_size), [], [-1], [0])
+                pin_state["patches"] = (comfy_aimdo.host_buffer.HostBuffer(0, 8 * 1024 * 1024, hostbuf_size), [], [-1], [0])
+                pin_state["hostbufs_initialized"] = True
             pin_state["failed"] = False
             pin_state["active"] = True
             if vbar is not None:
diff --git a/requirements.txt b/requirements.txt
index 193d60cf0..1f52fb199 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -23,7 +23,7 @@ SQLAlchemy>=2.0.0
 filelock
 av>=14.2.0
 comfy-kitchen>=0.2.8
-comfy-aimdo==0.4.2
+comfy-aimdo==0.4.3
 requests
 simpleeval>=1.0.0
 blake3