From df17b560c5b1666ce0693b3985a51467a6d805b9 Mon Sep 17 00:00:00 2001
From: Rattus <rattus128@gmail.com>
Date: Fri, 22 May 2026 23:30:35 +1000
Subject: [PATCH 1/2] memory_management: replace thread refusal with mutex

This was an attempt to be a fast path by ensuring the file slice was
created by the owning thread and refusing without needing ot mutex
but worksplit-multigpu doesnt work that way. Go mutex.

Shoot me for overthinking next time.
---
 comfy/memory_management.py | 36 ++++++++++++++++++------------------
 comfy/utils.py             |  3 ++-
 2 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/comfy/memory_management.py b/comfy/memory_management.py
index c43f0c4a2..962addb27 100644
--- a/comfy/memory_management.py
+++ b/comfy/memory_management.py
@@ -1,6 +1,5 @@
 import math
 import ctypes
-import threading
 import dataclasses
 import torch
 from typing import NamedTuple
@@ -10,7 +9,7 @@ from comfy.quant_ops import QuantizedTensor
 
 class TensorFileSlice(NamedTuple):
     file_ref: object
-    thread_id: int
+    lock: object
     offset: int
     size: int
 
@@ -43,7 +42,6 @@ def read_tensor_file_slice_into(tensor, destination, stream=None, destination2=N
     file_obj = info.file_ref
     if (destination.device.type != "cpu"
             or file_obj is None
-            or threading.get_ident() != info.thread_id
             or destination.numel() * destination.element_size() < info.size
             or tensor.numel() * tensor.element_size() != info.size
             or tensor.storage_offset() != 0
@@ -57,27 +55,29 @@ def read_tensor_file_slice_into(tensor, destination, stream=None, destination2=N
     if hostbuf is not None:
         stream_ptr = getattr(stream, "cuda_stream", 0) if stream is not None else 0
         device_ptr = destination2.data_ptr() if destination2 is not None else 0
-        hostbuf.read_file_slice(file_obj, info.offset, info.size,
-                                offset=destination.data_ptr() - hostbuf.get_raw_address(),
-                                stream=stream_ptr,
-                                device_ptr=device_ptr,
-                                device=None if destination2 is None else destination2.device.index)
+        with info.lock:
+            hostbuf.read_file_slice(file_obj, info.offset, info.size,
+                                    offset=destination.data_ptr() - hostbuf.get_raw_address(),
+                                    stream=stream_ptr,
+                                    device_ptr=device_ptr,
+                                    device=None if destination2 is None else destination2.device.index)
         return True
 
     buf_type = ctypes.c_ubyte * info.size
     view = memoryview(buf_type.from_address(destination.data_ptr()))
 
     try:
-        file_obj.seek(info.offset)
-        done = 0
-        while done < info.size:
-            try:
-                n = file_obj.readinto(view[done:])
-            except OSError:
-                return False
-            if n <= 0:
-                return False
-            done += n
+        with info.lock:
+            file_obj.seek(info.offset)
+            done = 0
+            while done < info.size:
+                try:
+                    n = file_obj.readinto(view[done:])
+                except OSError:
+                    return False
+                if n <= 0:
+                    return False
+                done += n
         return True
     finally:
         view.release()
diff --git a/comfy/utils.py b/comfy/utils.py
index 31052714a..49ae12b06 100644
--- a/comfy/utils.py
+++ b/comfy/utils.py
@@ -86,6 +86,7 @@ def load_safetensors(ckpt):
     import comfy_aimdo.model_mmap
 
     f = open(ckpt, "rb", buffering=0)
+    file_lock = threading.Lock()
     model_mmap = comfy_aimdo.model_mmap.ModelMMAP(ckpt)
     file_size = os.path.getsize(ckpt)
     mv = memoryview((ctypes.c_uint8 * file_size).from_address(model_mmap.get()))
@@ -111,7 +112,7 @@ def load_safetensors(ckpt):
                 storage = tensor.untyped_storage()
                 setattr(storage,
                         "_comfy_tensor_file_slice",
-                        comfy.memory_management.TensorFileSlice(f, threading.get_ident(), data_base_offset + start, end - start))
+                        comfy.memory_management.TensorFileSlice(f, file_lock, data_base_offset + start, end - start))
                 setattr(storage, "_comfy_tensor_mmap_refs", (model_mmap, mv))
                 sd[name] = tensor
 

From 7a18f9affbe52f6b6560fd40eb5de0dcb520f7c8 Mon Sep 17 00:00:00 2001
From: Rattus <rattus128@gmail.com>
Date: Sat, 23 May 2026 00:47:19 +1000
Subject: [PATCH 2/2] comfy-aimdo 0.4.4

Comfy-aimdo 0.4.4 contains a small bugfix to allow recovery of a hostbuf
after full truncation.

This pattern doesnt happen as a general rule, but does happen in the
upcoming worksplit-multigpu branch.
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index e20b6e044..381e7d05f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -23,7 +23,7 @@ SQLAlchemy>=2.0.0
 filelock
 av>=14.2.0
 comfy-kitchen>=0.2.8
-comfy-aimdo==0.4.3
+comfy-aimdo==0.4.4
 requests
 simpleeval>=1.0.0
 blake3