mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-07-03 13:19:23 +08:00
fix: address CodeRabbit review comments on PR #14587
- utils.py: add device param to _load_safetensors_no_mmap, move tensors to target device instead of always returning CPU tensors - utils.py: validate read length == expected bytes; raise RuntimeError on partial/corrupt reads instead of silently creating empty tensors - utils.py: scope no-mmap fallback to sys.platform == win32 to avoid unnecessary overhead on Linux/Mac CUDA systems; add sys import - baselines: replace hardcoded LvHHu username with %USERPROFILE% in startup commands for portability Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
e912b910a2
commit
0df0b0d613
@ -28,7 +28,7 @@ torchaudio: scottt/rocm-TheRock v6.5.0rc-pytorch/torchaudio-2.6.0a0+1a8f621-cp3
|
|||||||
numpy: pinned to <2 (1.26.4) for wheel compatibility
|
numpy: pinned to <2 (1.26.4) for wheel compatibility
|
||||||
|
|
||||||
--- Startup ---
|
--- Startup ---
|
||||||
cd C:\\Users\\LvHHu\\ComfyUI
|
cd %USERPROFILE%\ComfyUI
|
||||||
.\\venv\\Scripts\\activate
|
.\\venv\\Scripts\\activate
|
||||||
python main.py
|
python main.py
|
||||||
|
|
||||||
|
|||||||
@ -22,7 +22,7 @@ Functions: _load_safetensors_no_mmap(), _LARGE_FILE_MMAP_THRESHOLD = 4_000_000_0
|
|||||||
Branch: load_torch_file() elif os.path.getsize > threshold and cuda available
|
Branch: load_torch_file() elif os.path.getsize > threshold and cuda available
|
||||||
|
|
||||||
--- Startup command ---
|
--- Startup command ---
|
||||||
cd C:\Users\LvHHu\ComfyUI
|
cd %USERPROFILE%\ComfyUI
|
||||||
.\venv\Scripts\activate
|
.\venv\Scripts\activate
|
||||||
python main.py --disable-dynamic-vram --disable-mmap
|
python main.py --disable-dynamic-vram --disable-mmap
|
||||||
|
|
||||||
|
|||||||
@ -22,6 +22,7 @@ import math
|
|||||||
import struct
|
import struct
|
||||||
import ctypes
|
import ctypes
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
import comfy.memory_management
|
import comfy.memory_management
|
||||||
import safetensors.torch
|
import safetensors.torch
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@ -122,9 +123,11 @@ def load_safetensors(ckpt):
|
|||||||
_LARGE_FILE_MMAP_THRESHOLD = 4_000_000_000 # 4 GB
|
_LARGE_FILE_MMAP_THRESHOLD = 4_000_000_000 # 4 GB
|
||||||
|
|
||||||
|
|
||||||
def _load_safetensors_no_mmap(ckpt):
|
def _load_safetensors_no_mmap(ckpt, device=None):
|
||||||
# Windows + ROCm/CUDA UMA: large mmaps fail after GPU virtual address space is reserved.
|
# Windows + ROCm/CUDA UMA: large mmaps fail after GPU virtual address space is reserved.
|
||||||
# Read tensors sequentially from file instead.
|
# Read tensors sequentially from file instead.
|
||||||
|
if device is None:
|
||||||
|
device = torch.device("cpu")
|
||||||
sd = {}
|
sd = {}
|
||||||
with open(ckpt, "rb") as fh:
|
with open(ckpt, "rb") as fh:
|
||||||
header_len = struct.unpack("<Q", fh.read(8))[0]
|
header_len = struct.unpack("<Q", fh.read(8))[0]
|
||||||
@ -136,12 +139,18 @@ def _load_safetensors_no_mmap(ckpt):
|
|||||||
start, end = info["data_offsets"]
|
start, end = info["data_offsets"]
|
||||||
dtype = _TYPES[info["dtype"]]
|
dtype = _TYPES[info["dtype"]]
|
||||||
shape = info["shape"]
|
shape = info["shape"]
|
||||||
|
expected = end - start
|
||||||
|
if expected == 0:
|
||||||
|
sd[name] = torch.empty(shape, dtype=dtype, device=device)
|
||||||
|
continue
|
||||||
fh.seek(data_start + start)
|
fh.seek(data_start + start)
|
||||||
raw = fh.read(end - start)
|
raw = fh.read(expected)
|
||||||
if raw:
|
if len(raw) != expected:
|
||||||
sd[name] = torch.frombuffer(bytearray(raw), dtype=dtype).reshape(shape).clone()
|
raise RuntimeError(
|
||||||
else:
|
f"Safetensors read error: tensor '{name}' expected {expected} bytes, got {len(raw)}. "
|
||||||
sd[name] = torch.empty(shape, dtype=dtype)
|
f"File may be corrupt or truncated."
|
||||||
|
)
|
||||||
|
sd[name] = torch.frombuffer(bytearray(raw), dtype=dtype).reshape(shape).clone().to(device=device)
|
||||||
return sd, header.get("__metadata__", {})
|
return sd, header.get("__metadata__", {})
|
||||||
|
|
||||||
|
|
||||||
@ -155,10 +164,13 @@ def load_torch_file(ckpt, safe_load=False, device=None, return_metadata=False):
|
|||||||
sd, metadata = load_safetensors(ckpt)
|
sd, metadata = load_safetensors(ckpt)
|
||||||
if not return_metadata:
|
if not return_metadata:
|
||||||
metadata = None
|
metadata = None
|
||||||
elif os.path.getsize(ckpt) > _LARGE_FILE_MMAP_THRESHOLD and torch.cuda.is_available():
|
elif (os.path.getsize(ckpt) > _LARGE_FILE_MMAP_THRESHOLD
|
||||||
# File > 4 GB with active CUDA/ROCm: mmap would exhaust Windows virtual
|
and sys.platform == "win32"
|
||||||
# address space reserved by UMA GPU init. Use sequential file-read instead.
|
and torch.cuda.is_available()):
|
||||||
sd, metadata = _load_safetensors_no_mmap(ckpt)
|
# Windows ROCm/UMA: GPU init reserves ~14 GB of virtual address space,
|
||||||
|
# preventing mmap of files >4 GB. Use sequential file-read instead.
|
||||||
|
# Scoped to Windows only to avoid overhead on Linux/Mac CUDA systems.
|
||||||
|
sd, metadata = _load_safetensors_no_mmap(ckpt, device=device)
|
||||||
if not return_metadata:
|
if not return_metadata:
|
||||||
metadata = None
|
metadata = None
|
||||||
else:
|
else:
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user