fix: address CodeRabbit review comments on PR #14587

- utils.py: add device param to _load_safetensors_no_mmap, move tensors
  to target device instead of always returning CPU tensors
- utils.py: validate read length == expected bytes; raise RuntimeError
  on partial/corrupt reads instead of silently creating empty tensors
- utils.py: scope no-mmap fallback to sys.platform == win32 to avoid
  unnecessary overhead on Linux/Mac CUDA systems; add sys import
- baselines: replace hardcoded LvHHu username with %USERPROFILE% in
  startup commands for portability

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Houde 2026-06-23 14:51:22 +01:00
parent e912b910a2
commit 0df0b0d613
3 changed files with 24 additions and 12 deletions

View File

@ -28,7 +28,7 @@ torchaudio: scottt/rocm-TheRock v6.5.0rc-pytorch/torchaudio-2.6.0a0+1a8f621-cp3
numpy: pinned to <2 (1.26.4) for wheel compatibility numpy: pinned to <2 (1.26.4) for wheel compatibility
--- Startup --- --- Startup ---
cd C:\\Users\\LvHHu\\ComfyUI cd %USERPROFILE%\ComfyUI
.\\venv\\Scripts\\activate .\\venv\\Scripts\\activate
python main.py python main.py

View File

@ -22,7 +22,7 @@ Functions: _load_safetensors_no_mmap(), _LARGE_FILE_MMAP_THRESHOLD = 4_000_000_0
Branch: load_torch_file() elif os.path.getsize > threshold and cuda available Branch: load_torch_file() elif os.path.getsize > threshold and cuda available
--- Startup command --- --- Startup command ---
cd C:\Users\LvHHu\ComfyUI cd %USERPROFILE%\ComfyUI
.\venv\Scripts\activate .\venv\Scripts\activate
python main.py --disable-dynamic-vram --disable-mmap python main.py --disable-dynamic-vram --disable-mmap

View File

@ -22,6 +22,7 @@ import math
import struct import struct
import ctypes import ctypes
import os import os
import sys
import comfy.memory_management import comfy.memory_management
import safetensors.torch import safetensors.torch
import numpy as np import numpy as np
@ -122,9 +123,11 @@ def load_safetensors(ckpt):
_LARGE_FILE_MMAP_THRESHOLD = 4_000_000_000 # 4 GB _LARGE_FILE_MMAP_THRESHOLD = 4_000_000_000 # 4 GB
def _load_safetensors_no_mmap(ckpt): def _load_safetensors_no_mmap(ckpt, device=None):
# Windows + ROCm/CUDA UMA: large mmaps fail after GPU virtual address space is reserved. # Windows + ROCm/CUDA UMA: large mmaps fail after GPU virtual address space is reserved.
# Read tensors sequentially from file instead. # Read tensors sequentially from file instead.
if device is None:
device = torch.device("cpu")
sd = {} sd = {}
with open(ckpt, "rb") as fh: with open(ckpt, "rb") as fh:
header_len = struct.unpack("<Q", fh.read(8))[0] header_len = struct.unpack("<Q", fh.read(8))[0]
@ -136,12 +139,18 @@ def _load_safetensors_no_mmap(ckpt):
start, end = info["data_offsets"] start, end = info["data_offsets"]
dtype = _TYPES[info["dtype"]] dtype = _TYPES[info["dtype"]]
shape = info["shape"] shape = info["shape"]
expected = end - start
if expected == 0:
sd[name] = torch.empty(shape, dtype=dtype, device=device)
continue
fh.seek(data_start + start) fh.seek(data_start + start)
raw = fh.read(end - start) raw = fh.read(expected)
if raw: if len(raw) != expected:
sd[name] = torch.frombuffer(bytearray(raw), dtype=dtype).reshape(shape).clone() raise RuntimeError(
else: f"Safetensors read error: tensor '{name}' expected {expected} bytes, got {len(raw)}. "
sd[name] = torch.empty(shape, dtype=dtype) f"File may be corrupt or truncated."
)
sd[name] = torch.frombuffer(bytearray(raw), dtype=dtype).reshape(shape).clone().to(device=device)
return sd, header.get("__metadata__", {}) return sd, header.get("__metadata__", {})
@ -155,10 +164,13 @@ def load_torch_file(ckpt, safe_load=False, device=None, return_metadata=False):
sd, metadata = load_safetensors(ckpt) sd, metadata = load_safetensors(ckpt)
if not return_metadata: if not return_metadata:
metadata = None metadata = None
elif os.path.getsize(ckpt) > _LARGE_FILE_MMAP_THRESHOLD and torch.cuda.is_available(): elif (os.path.getsize(ckpt) > _LARGE_FILE_MMAP_THRESHOLD
# File > 4 GB with active CUDA/ROCm: mmap would exhaust Windows virtual and sys.platform == "win32"
# address space reserved by UMA GPU init. Use sequential file-read instead. and torch.cuda.is_available()):
sd, metadata = _load_safetensors_no_mmap(ckpt) # Windows ROCm/UMA: GPU init reserves ~14 GB of virtual address space,
# preventing mmap of files >4 GB. Use sequential file-read instead.
# Scoped to Windows only to avoid overhead on Linux/Mac CUDA systems.
sd, metadata = _load_safetensors_no_mmap(ckpt, device=device)
if not return_metadata: if not return_metadata:
metadata = None metadata = None
else: else: