This commit is contained in:
Houde Li 2026-07-03 03:05:59 +08:00 committed by GitHub
commit 5770141fb9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 325 additions and 0 deletions

View File

@ -0,0 +1,83 @@
aiohappyeyeballs==2.6.2
aiohttp==3.14.1
aiosignal==1.4.0
alembic==1.18.4
annotated-doc==0.0.4
annotated-types==0.7.0
anyio==4.13.0
attrs==26.1.0
av==17.1.0
blake3==1.0.8
certifi==2026.5.20
charset-normalizer==3.4.7
click==8.4.1
colorama==0.4.6
comfy-aimdo==0.4.9
comfy-kitchen==0.2.10
comfyui-embedded-docs==0.5.3
comfyui-workflow-templates-core==0.3.252
comfyui-workflow-templates-media-api==0.3.80
comfyui-workflow-templates-media-image==0.3.150
comfyui-workflow-templates-media-other==0.3.217
comfyui-workflow-templates-media-video==0.3.91
comfyui_frontend_package==1.45.15
comfyui_workflow_templates==0.9.98
einops==0.8.2
filelock==3.29.4
frozenlist==1.8.0
fsspec==2026.4.0
glfw==2.10.0
greenlet==3.5.1
h11==0.16.0
hf-xet==1.5.1
httpcore==1.0.9
httpx==0.28.1
huggingface_hub==1.19.0
idna==3.18
Jinja2==3.1.6
kornia==0.8.3
kornia_rs==0.1.14
Mako==1.3.12
markdown-it-py==4.2.0
MarkupSafe==3.0.3
mdurl==0.1.2
mpmath==1.3.0
multidict==6.7.1
networkx==3.6.1
numpy==1.26.4
packaging==26.2
pillow==12.2.0
propcache==0.5.2
psutil==7.2.2
pydantic==2.13.4
pydantic-settings==2.14.1
pydantic_core==2.46.4
Pygments==2.20.0
PyOpenGL==3.1.10
python-dotenv==1.2.2
PyYAML==6.0.3
regex==2026.5.9
requests==2.34.2
rich==15.0.0
safetensors==0.8.0
scipy==1.17.1
sentencepiece==0.2.1
setuptools==82.0.1
shellingham==1.5.4
simpleeval==1.0.7
spandrel==0.4.2
SQLAlchemy==2.0.50
sympy==1.14.0
tokenizers==0.22.2
torch @ https://github.com/scottt/rocm-TheRock/releases/download/v6.5.0rc-pytorch/torch-2.7.0a0+git3f903c3-cp312-cp312-win_amd64.whl#sha256=ab308d20b8568354781ceaad1c9a1637b6dff16ab42e589fa87b19fa87f3c839
torchaudio @ https://github.com/scottt/rocm-TheRock/releases/download/v6.5.0rc-pytorch/torchaudio-2.6.0a0+1a8f621-cp312-cp312-win_amd64.whl#sha256=caa1291b5040325d67ac2d6bddb9c3ec9478337dfc70a4d08bda8a557c834698
torchsde==0.2.6
torchvision @ https://github.com/scottt/rocm-TheRock/releases/download/v6.5.0rc-pytorch/torchvision-0.22.0+9eb57cd-cp312-cp312-win_amd64.whl#sha256=47fbcdc9b5e80ee7ab40c27bbf5cd36f7a7091eae3d43a09eebd833a391de1ec
tqdm==4.68.2
trampoline==0.1.2
transformers==5.12.0
typer==0.25.1
typing-inspection==0.4.2
typing_extensions==4.15.0
urllib3==2.7.0
yarl==1.24.2

36
baselines/system_info.txt Normal file
View File

@ -0,0 +1,36 @@
=== ComfyUI ROCm Stable Baseline ===
Date: 2026-06-20
--- Python ---
Python: 3.12.10
--- PyTorch / ROCm ---
torch: 2.7.0a0+git3f903c3
CUDA avail: True
Device: AMD Radeon(TM) 8050S Graphics
VRAM (GB): 14.37
ROCm/HIP: 6.5.25205-c1c2abe52
--- torch packages ---
torch: 2.7.0a0+git3f903c3
torchvision: 0.22.0+9eb57cd
torchaudio: 2.6.0a0+1a8f621
--- ComfyUI ---
Version: 0.24.0
Backend: ROCm 6.5 (scottt/rocm-TheRock gfx1151 wheel)
Tested: SD1.5 512x512 20steps ~5 it/s, stable
--- Wheel sources (gfx1151 / Strix Halo) ---
torch: scottt/rocm-TheRock v6.5.0rc-pytorch/torch-2.7.0a0+git3f903c3-cp312-cp312-win_amd64.whl
torchvision: scottt/rocm-TheRock v6.5.0rc-pytorch/torchvision-0.22.0+9eb57cd-cp312-cp312-win_amd64.whl
torchaudio: scottt/rocm-TheRock v6.5.0rc-pytorch/torchaudio-2.6.0a0+1a8f621-cp312-cp312-win_amd64.whl
numpy: pinned to <2 (1.26.4) for wheel compatibility
--- Startup ---
cd %USERPROFILE%\ComfyUI
.\\venv\\Scripts\\activate
python main.py
--- Saved workflow ---
baselines/workflows/sd15_test_rocm_workflow.json

View File

@ -0,0 +1,55 @@
=== ComfyUI RealVisXL no-mmap Stable Baseline ===
Date: 2026-06-20
--- Previous baseline ---
Tag: rocm-sd15-working-baseline (preserved, not modified)
--- This baseline adds ---
Fix: comfy/utils.py: _load_safetensors_no_mmap() for files >4 GB
Model: RealVisXL_V4.0.safetensors (6.46 GB) - path only, not in git
Test: 768x1024, 25 steps, cfg=6, dpmpp_2m, karras -> OK
--- Root cause of crash (diagnosed & fixed) ---
Strix Halo UMA: ROCm init reserves ~14 GB GPU virtual address space.
safetensors mmap of files >~4 GB then fails (Windows VA space exhausted).
SD1.5 (3.97 GB) < threshold -> mmap OK.
SDXL fp16 (~6.5 GB) > threshold -> access violation in safe_open().
Fix: sequential file-read (open+seek+read) bypasses mmap entirely.
--- Patch location ---
File: comfy/utils.py
Functions: _load_safetensors_no_mmap(), _LARGE_FILE_MMAP_THRESHOLD = 4_000_000_000
Branch: load_torch_file() elif os.path.getsize > threshold and cuda available
--- Startup command ---
cd %USERPROFILE%\ComfyUI
.\venv\Scripts\activate
python main.py --disable-dynamic-vram --disable-mmap
--- GPU / ROCm ---
torch: 2.7.0a0+git3f903c3
Device: AMD Radeon(TM) 8050S Graphics
VRAM GB: 14.37
ROCm: 6.5 / gfx1151 (Strix Halo)
--- Models in checkpoints (not in git) ---
v1-5-pruned-emaonly.safetensors 3.97 GB SD1.5 baseline
RealVisXL_V4.0.safetensors 6.46 GB SDXL realistic portrait
--- Working parameters (RealVisXL) ---
Resolution: 768x1024
Steps: 25
CFG: 6
Sampler: dpmpp_2m
Scheduler: karras
Batch size: 1
--- Recovery commands ---
# 1. Return to this code state:
git checkout rocm-realvisxl-nommap-working
# 2. Re-download RealVisXL if needed (not in git):
# python -c "from huggingface_hub import hf_hub_download; hf_hub_download(repo_id='SG161222/RealVisXL_V4.0', filename='RealVisXL_V4.0.safetensors', local_dir='models/checkpoints')"
# 3. Start ComfyUI:
# python main.py --disable-dynamic-vram --disable-mmap

View File

@ -0,0 +1,107 @@
{
"2": {
"inputs": {
"ckpt_name": "v1-5-pruned-emaonly.safetensors"
},
"class_type": "CheckpointLoaderSimple",
"_meta": {
"title": "Checkpoint加载器简易"
}
},
"3": {
"inputs": {
"text": "aa cute fluffy kitten, big round eyes, detailed fur, soft natural window light, cozy indoor background, shallow depth of field, photorealistic, high quality, 50mm lens",
"clip": [
"2",
1
]
},
"class_type": "CLIPTextEncode",
"_meta": {
"title": "CLIP文本编码"
}
},
"4": {
"inputs": {
"text": "low quality, blurry, deformed, ugly, bad anatomy, distorted face, extra limbs, bad eyes, oversaturated",
"clip": [
"2",
1
]
},
"class_type": "CLIPTextEncode",
"_meta": {
"title": "CLIP文本编码"
}
},
"5": {
"inputs": {
"width": 512,
"height": 512,
"batch_size": 1
},
"class_type": "EmptyLatentImage",
"_meta": {
"title": "空Latent图像"
}
},
"6": {
"inputs": {
"seed": 826325619577598,
"steps": 30,
"cfg": 7,
"sampler_name": "dpmpp_2m",
"scheduler": "normal",
"denoise": 1,
"model": [
"2",
0
],
"positive": [
"3",
0
],
"negative": [
"4",
0
],
"latent_image": [
"5",
0
]
},
"class_type": "KSampler",
"_meta": {
"title": "K采样器"
}
},
"7": {
"inputs": {
"samples": [
"6",
0
],
"vae": [
"2",
2
]
},
"class_type": "VAEDecode",
"_meta": {
"title": "VAE解码"
}
},
"8": {
"inputs": {
"filename_prefix": "ComfyUI",
"images": [
"7",
0
]
},
"class_type": "SaveImage",
"_meta": {
"title": "保存图像"
}
}
}

View File

@ -22,6 +22,7 @@ import math
import struct
import ctypes
import os
import sys
import comfy.memory_management
import safetensors.torch
import numpy as np
@ -119,6 +120,40 @@ def load_safetensors(ckpt):
return sd, header.get("__metadata__", {}),
_LARGE_FILE_MMAP_THRESHOLD = 4_000_000_000 # 4 GB
def _load_safetensors_no_mmap(ckpt, device=None):
# Windows + ROCm/CUDA UMA: large mmaps fail after GPU virtual address space is reserved.
# Read tensors sequentially from file instead.
if device is None:
device = torch.device("cpu")
sd = {}
with open(ckpt, "rb") as fh:
header_len = struct.unpack("<Q", fh.read(8))[0]
header = json.loads(fh.read(header_len).decode("utf-8"))
data_start = 8 + header_len
for name, info in header.items():
if name == "__metadata__":
continue
start, end = info["data_offsets"]
dtype = _TYPES[info["dtype"]]
shape = info["shape"]
expected = end - start
if expected == 0:
sd[name] = torch.empty(shape, dtype=dtype, device=device)
continue
fh.seek(data_start + start)
raw = fh.read(expected)
if len(raw) != expected:
raise RuntimeError(
f"Safetensors read error: tensor '{name}' expected {expected} bytes, got {len(raw)}. "
f"File may be corrupt or truncated."
)
sd[name] = torch.frombuffer(bytearray(raw), dtype=dtype).reshape(shape).clone().to(device=device)
return sd, header.get("__metadata__", {})
def load_torch_file(ckpt, safe_load=False, device=None, return_metadata=False):
if device is None:
device = torch.device("cpu")
@ -129,6 +164,15 @@ def load_torch_file(ckpt, safe_load=False, device=None, return_metadata=False):
sd, metadata = load_safetensors(ckpt)
if not return_metadata:
metadata = None
elif (os.path.getsize(ckpt) > _LARGE_FILE_MMAP_THRESHOLD
and sys.platform == "win32"
and torch.cuda.is_available()):
# Windows ROCm/UMA: GPU init reserves ~14 GB of virtual address space,
# preventing mmap of files >4 GB. Use sequential file-read instead.
# Scoped to Windows only to avoid overhead on Linux/Mac CUDA systems.
sd, metadata = _load_safetensors_no_mmap(ckpt, device=device)
if not return_metadata:
metadata = None
else:
with safetensors.safe_open(ckpt, framework="pt", device=device.type) as f:
sd = {}