Implement seek and read for pins

Source pins from an mmap is pad because its its a CPU->CPU copy that attempts to fully buffer the same data twice. Instead, use seek and read which avoids the mmap buffering while usually being a faster read in the first place (avoiding mmap faulting etc).
2026-06-18 22:09:38 +08:00 · 2026-03-09 22:53:23 +10:00 · 2026-03-09 22:53:23 +10:00 · 6fbbcc4cb7
commit 6fbbcc4cb7
parent 4a8cf359fe
3 changed files with 52 additions and 3 deletions
--- a/comfy/memory_management.py
+++ b/comfy/memory_management.py
@ -1,9 +1,51 @@
 import math
+import ctypes
+import threading
 import torch
 from typing import NamedTuple
+import logging

 from comfy.quant_ops import QuantizedTensor

+
+class TensorFileSlice(NamedTuple):
+    file_ref: object
+    thread_id: int
+    offset: int
+    size: int
+
+
+def read_tensor_file_slice_into(tensor, destination):
+
+    info = getattr(tensor.untyped_storage(), "_comfy_tensor_file_slice", None)
+    if info is None:
+        return False
+
+    file_obj = info.file_ref
+    if (destination.device.type != "cpu"
+            or file_obj is None
+            or threading.get_ident() != info.thread_id
+            or destination.numel() * destination.element_size() < info.size):
+        return False
+
+    if info.size == 0:
+        return True
+
+    buf_type = ctypes.c_ubyte * info.size
+    view = memoryview(buf_type.from_address(destination.data_ptr()))
+
+    try:
+        file_obj.seek(info.offset)
+        done = 0
+        while done < info.size:
+            n = file_obj.readinto(view[done:])
+            if n <= 0:
+                return False
+            done += n
+        return True
+    finally:
+        view.release()
+
 class TensorGeometry(NamedTuple):
    shape: any
    dtype: torch.dtype
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@ -1225,6 +1225,8 @@ def cast_to_gathered(tensors, r, non_blocking=False, stream=None):
            dest_view = dest_views.pop(0)
            if tensor is None:
                continue
+            if comfy.memory_management.read_tensor_file_slice_into(tensor, dest_view):
+                continue
            dest_view.copy_(tensor, non_blocking=non_blocking)


--- a/comfy/utils.py
+++ b/comfy/utils.py
@ -33,6 +33,7 @@ from comfy.cli_args import args
 import json
 import time
 import mmap
+import threading
 import warnings

 MMAP_TORCH_FILES = args.mmap_torch_files
@ -81,14 +82,14 @@ _TYPES = {
 }

 def load_safetensors(ckpt):
-    f = open(ckpt, "rb")
+    f = open(ckpt, "rb", buffering=0)
    mapping = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
    mv = memoryview(mapping)

    header_size = struct.unpack("<Q", mapping[:8])[0]
    header = json.loads(mapping[8:8+header_size].decode("utf-8"))

-    mv = mv[8 + header_size:]
+    mv = mv[(data_base_offset := 8 + header_size):]

    sd = {}
    for name, info in header.items():
@ -102,7 +103,11 @@ def load_safetensors(ckpt):
            with warnings.catch_warnings():
                #We are working with read-only RAM by design
                warnings.filterwarnings("ignore", message="The given buffer is not writable")
-                sd[name] = torch.frombuffer(mv[start:end], dtype=_TYPES[info["dtype"]]).view(info["shape"])
+                tensor = torch.frombuffer(mv[start:end], dtype=_TYPES[info["dtype"]]).view(info["shape"])
+                setattr(tensor.untyped_storage(),
+                        "_comfy_tensor_file_slice",
+                        comfy.memory_management.TensorFileSlice(f, threading.get_ident(), data_base_offset + start, end - start))
+                sd[name] = tensor

    return sd, header.get("__metadata__", {}),