From e912b910a2beaaa7cfeb6c991ea327e586892083 Mon Sep 17 00:00:00 2001 From: Houde Date: Sat, 20 Jun 2026 19:03:20 +0100 Subject: [PATCH] fix: add no-mmap safetensors loader for >4GB files on Windows ROCm/UMA Root cause: Strix Halo UMA ROCm init reserves ~14 GB of Windows virtual address space for GPU. This prevents safetensors from mmap-ing files larger than ~4 GB (SDXL fp16 ~6.5 GB), causing access violations. SD1.5 (3.97 GB) is below the threshold and unaffected. Fix in comfy/utils.py: - Add _LARGE_FILE_MMAP_THRESHOLD = 4_000_000_000 - Add _load_safetensors_no_mmap(): reads tensors via open()+seek()+read() instead of mmap, then clones each tensor for independent ownership - In load_torch_file(): route files >4 GB with CUDA active through _load_safetensors_no_mmap() automatically Tested: RealVisXL_V4.0.safetensors (6.46 GB) loads and generates 768x1024 portrait images at ~5 it/s on AMD Radeon 8050S (gfx1151). SD1.5 baseline unaffected (still uses original mmap path). --- baselines/system_info_realvisxl.txt | 55 +++++++++++++++++++++++++++++ comfy/utils.py | 32 +++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 baselines/system_info_realvisxl.txt diff --git a/baselines/system_info_realvisxl.txt b/baselines/system_info_realvisxl.txt new file mode 100644 index 000000000..8a6d3b839 --- /dev/null +++ b/baselines/system_info_realvisxl.txt @@ -0,0 +1,55 @@ +=== ComfyUI RealVisXL no-mmap Stable Baseline === +Date: 2026-06-20 + +--- Previous baseline --- +Tag: rocm-sd15-working-baseline (preserved, not modified) + +--- This baseline adds --- +Fix: comfy/utils.py: _load_safetensors_no_mmap() for files >4 GB +Model: RealVisXL_V4.0.safetensors (6.46 GB) - path only, not in git +Test: 768x1024, 25 steps, cfg=6, dpmpp_2m, karras -> OK + +--- Root cause of crash (diagnosed & fixed) --- +Strix Halo UMA: ROCm init reserves ~14 GB GPU virtual address space. +safetensors mmap of files >~4 GB then fails (Windows VA space exhausted). +SD1.5 (3.97 GB) < threshold -> mmap OK. +SDXL fp16 (~6.5 GB) > threshold -> access violation in safe_open(). +Fix: sequential file-read (open+seek+read) bypasses mmap entirely. + +--- Patch location --- +File: comfy/utils.py +Functions: _load_safetensors_no_mmap(), _LARGE_FILE_MMAP_THRESHOLD = 4_000_000_000 +Branch: load_torch_file() elif os.path.getsize > threshold and cuda available + +--- Startup command --- +cd C:\Users\LvHHu\ComfyUI +.\venv\Scripts\activate +python main.py --disable-dynamic-vram --disable-mmap + +--- GPU / ROCm --- +torch: 2.7.0a0+git3f903c3 +Device: AMD Radeon(TM) 8050S Graphics +VRAM GB: 14.37 +ROCm: 6.5 / gfx1151 (Strix Halo) + +--- Models in checkpoints (not in git) --- +v1-5-pruned-emaonly.safetensors 3.97 GB SD1.5 baseline +RealVisXL_V4.0.safetensors 6.46 GB SDXL realistic portrait + +--- Working parameters (RealVisXL) --- +Resolution: 768x1024 +Steps: 25 +CFG: 6 +Sampler: dpmpp_2m +Scheduler: karras +Batch size: 1 + +--- Recovery commands --- +# 1. Return to this code state: +git checkout rocm-realvisxl-nommap-working + +# 2. Re-download RealVisXL if needed (not in git): +# python -c "from huggingface_hub import hf_hub_download; hf_hub_download(repo_id='SG161222/RealVisXL_V4.0', filename='RealVisXL_V4.0.safetensors', local_dir='models/checkpoints')" + +# 3. Start ComfyUI: +# python main.py --disable-dynamic-vram --disable-mmap diff --git a/comfy/utils.py b/comfy/utils.py index 09d783fff..ceddf7af1 100644 --- a/comfy/utils.py +++ b/comfy/utils.py @@ -119,6 +119,32 @@ def load_safetensors(ckpt): return sd, header.get("__metadata__", {}), +_LARGE_FILE_MMAP_THRESHOLD = 4_000_000_000 # 4 GB + + +def _load_safetensors_no_mmap(ckpt): + # Windows + ROCm/CUDA UMA: large mmaps fail after GPU virtual address space is reserved. + # Read tensors sequentially from file instead. + sd = {} + with open(ckpt, "rb") as fh: + header_len = struct.unpack(" _LARGE_FILE_MMAP_THRESHOLD and torch.cuda.is_available(): + # File > 4 GB with active CUDA/ROCm: mmap would exhaust Windows virtual + # address space reserved by UMA GPU init. Use sequential file-read instead. + sd, metadata = _load_safetensors_no_mmap(ckpt) + if not return_metadata: + metadata = None else: with safetensors.safe_open(ckpt, framework="pt", device=device.type) as f: sd = {}