mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-07-03 13:19:23 +08:00
Merge 0df0b0d613 into 35c1470935
This commit is contained in:
commit
5770141fb9
83
baselines/environment_rocm_working.txt
Normal file
83
baselines/environment_rocm_working.txt
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
aiohappyeyeballs==2.6.2
|
||||||
|
aiohttp==3.14.1
|
||||||
|
aiosignal==1.4.0
|
||||||
|
alembic==1.18.4
|
||||||
|
annotated-doc==0.0.4
|
||||||
|
annotated-types==0.7.0
|
||||||
|
anyio==4.13.0
|
||||||
|
attrs==26.1.0
|
||||||
|
av==17.1.0
|
||||||
|
blake3==1.0.8
|
||||||
|
certifi==2026.5.20
|
||||||
|
charset-normalizer==3.4.7
|
||||||
|
click==8.4.1
|
||||||
|
colorama==0.4.6
|
||||||
|
comfy-aimdo==0.4.9
|
||||||
|
comfy-kitchen==0.2.10
|
||||||
|
comfyui-embedded-docs==0.5.3
|
||||||
|
comfyui-workflow-templates-core==0.3.252
|
||||||
|
comfyui-workflow-templates-media-api==0.3.80
|
||||||
|
comfyui-workflow-templates-media-image==0.3.150
|
||||||
|
comfyui-workflow-templates-media-other==0.3.217
|
||||||
|
comfyui-workflow-templates-media-video==0.3.91
|
||||||
|
comfyui_frontend_package==1.45.15
|
||||||
|
comfyui_workflow_templates==0.9.98
|
||||||
|
einops==0.8.2
|
||||||
|
filelock==3.29.4
|
||||||
|
frozenlist==1.8.0
|
||||||
|
fsspec==2026.4.0
|
||||||
|
glfw==2.10.0
|
||||||
|
greenlet==3.5.1
|
||||||
|
h11==0.16.0
|
||||||
|
hf-xet==1.5.1
|
||||||
|
httpcore==1.0.9
|
||||||
|
httpx==0.28.1
|
||||||
|
huggingface_hub==1.19.0
|
||||||
|
idna==3.18
|
||||||
|
Jinja2==3.1.6
|
||||||
|
kornia==0.8.3
|
||||||
|
kornia_rs==0.1.14
|
||||||
|
Mako==1.3.12
|
||||||
|
markdown-it-py==4.2.0
|
||||||
|
MarkupSafe==3.0.3
|
||||||
|
mdurl==0.1.2
|
||||||
|
mpmath==1.3.0
|
||||||
|
multidict==6.7.1
|
||||||
|
networkx==3.6.1
|
||||||
|
numpy==1.26.4
|
||||||
|
packaging==26.2
|
||||||
|
pillow==12.2.0
|
||||||
|
propcache==0.5.2
|
||||||
|
psutil==7.2.2
|
||||||
|
pydantic==2.13.4
|
||||||
|
pydantic-settings==2.14.1
|
||||||
|
pydantic_core==2.46.4
|
||||||
|
Pygments==2.20.0
|
||||||
|
PyOpenGL==3.1.10
|
||||||
|
python-dotenv==1.2.2
|
||||||
|
PyYAML==6.0.3
|
||||||
|
regex==2026.5.9
|
||||||
|
requests==2.34.2
|
||||||
|
rich==15.0.0
|
||||||
|
safetensors==0.8.0
|
||||||
|
scipy==1.17.1
|
||||||
|
sentencepiece==0.2.1
|
||||||
|
setuptools==82.0.1
|
||||||
|
shellingham==1.5.4
|
||||||
|
simpleeval==1.0.7
|
||||||
|
spandrel==0.4.2
|
||||||
|
SQLAlchemy==2.0.50
|
||||||
|
sympy==1.14.0
|
||||||
|
tokenizers==0.22.2
|
||||||
|
torch @ https://github.com/scottt/rocm-TheRock/releases/download/v6.5.0rc-pytorch/torch-2.7.0a0+git3f903c3-cp312-cp312-win_amd64.whl#sha256=ab308d20b8568354781ceaad1c9a1637b6dff16ab42e589fa87b19fa87f3c839
|
||||||
|
torchaudio @ https://github.com/scottt/rocm-TheRock/releases/download/v6.5.0rc-pytorch/torchaudio-2.6.0a0+1a8f621-cp312-cp312-win_amd64.whl#sha256=caa1291b5040325d67ac2d6bddb9c3ec9478337dfc70a4d08bda8a557c834698
|
||||||
|
torchsde==0.2.6
|
||||||
|
torchvision @ https://github.com/scottt/rocm-TheRock/releases/download/v6.5.0rc-pytorch/torchvision-0.22.0+9eb57cd-cp312-cp312-win_amd64.whl#sha256=47fbcdc9b5e80ee7ab40c27bbf5cd36f7a7091eae3d43a09eebd833a391de1ec
|
||||||
|
tqdm==4.68.2
|
||||||
|
trampoline==0.1.2
|
||||||
|
transformers==5.12.0
|
||||||
|
typer==0.25.1
|
||||||
|
typing-inspection==0.4.2
|
||||||
|
typing_extensions==4.15.0
|
||||||
|
urllib3==2.7.0
|
||||||
|
yarl==1.24.2
|
||||||
36
baselines/system_info.txt
Normal file
36
baselines/system_info.txt
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
=== ComfyUI ROCm Stable Baseline ===
|
||||||
|
Date: 2026-06-20
|
||||||
|
|
||||||
|
--- Python ---
|
||||||
|
Python: 3.12.10
|
||||||
|
|
||||||
|
--- PyTorch / ROCm ---
|
||||||
|
torch: 2.7.0a0+git3f903c3
|
||||||
|
CUDA avail: True
|
||||||
|
Device: AMD Radeon(TM) 8050S Graphics
|
||||||
|
VRAM (GB): 14.37
|
||||||
|
ROCm/HIP: 6.5.25205-c1c2abe52
|
||||||
|
|
||||||
|
--- torch packages ---
|
||||||
|
torch: 2.7.0a0+git3f903c3
|
||||||
|
torchvision: 0.22.0+9eb57cd
|
||||||
|
torchaudio: 2.6.0a0+1a8f621
|
||||||
|
|
||||||
|
--- ComfyUI ---
|
||||||
|
Version: 0.24.0
|
||||||
|
Backend: ROCm 6.5 (scottt/rocm-TheRock gfx1151 wheel)
|
||||||
|
Tested: SD1.5 512x512 20steps ~5 it/s, stable
|
||||||
|
|
||||||
|
--- Wheel sources (gfx1151 / Strix Halo) ---
|
||||||
|
torch: scottt/rocm-TheRock v6.5.0rc-pytorch/torch-2.7.0a0+git3f903c3-cp312-cp312-win_amd64.whl
|
||||||
|
torchvision: scottt/rocm-TheRock v6.5.0rc-pytorch/torchvision-0.22.0+9eb57cd-cp312-cp312-win_amd64.whl
|
||||||
|
torchaudio: scottt/rocm-TheRock v6.5.0rc-pytorch/torchaudio-2.6.0a0+1a8f621-cp312-cp312-win_amd64.whl
|
||||||
|
numpy: pinned to <2 (1.26.4) for wheel compatibility
|
||||||
|
|
||||||
|
--- Startup ---
|
||||||
|
cd %USERPROFILE%\ComfyUI
|
||||||
|
.\\venv\\Scripts\\activate
|
||||||
|
python main.py
|
||||||
|
|
||||||
|
--- Saved workflow ---
|
||||||
|
baselines/workflows/sd15_test_rocm_workflow.json
|
||||||
55
baselines/system_info_realvisxl.txt
Normal file
55
baselines/system_info_realvisxl.txt
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
=== ComfyUI RealVisXL no-mmap Stable Baseline ===
|
||||||
|
Date: 2026-06-20
|
||||||
|
|
||||||
|
--- Previous baseline ---
|
||||||
|
Tag: rocm-sd15-working-baseline (preserved, not modified)
|
||||||
|
|
||||||
|
--- This baseline adds ---
|
||||||
|
Fix: comfy/utils.py: _load_safetensors_no_mmap() for files >4 GB
|
||||||
|
Model: RealVisXL_V4.0.safetensors (6.46 GB) - path only, not in git
|
||||||
|
Test: 768x1024, 25 steps, cfg=6, dpmpp_2m, karras -> OK
|
||||||
|
|
||||||
|
--- Root cause of crash (diagnosed & fixed) ---
|
||||||
|
Strix Halo UMA: ROCm init reserves ~14 GB GPU virtual address space.
|
||||||
|
safetensors mmap of files >~4 GB then fails (Windows VA space exhausted).
|
||||||
|
SD1.5 (3.97 GB) < threshold -> mmap OK.
|
||||||
|
SDXL fp16 (~6.5 GB) > threshold -> access violation in safe_open().
|
||||||
|
Fix: sequential file-read (open+seek+read) bypasses mmap entirely.
|
||||||
|
|
||||||
|
--- Patch location ---
|
||||||
|
File: comfy/utils.py
|
||||||
|
Functions: _load_safetensors_no_mmap(), _LARGE_FILE_MMAP_THRESHOLD = 4_000_000_000
|
||||||
|
Branch: load_torch_file() elif os.path.getsize > threshold and cuda available
|
||||||
|
|
||||||
|
--- Startup command ---
|
||||||
|
cd %USERPROFILE%\ComfyUI
|
||||||
|
.\venv\Scripts\activate
|
||||||
|
python main.py --disable-dynamic-vram --disable-mmap
|
||||||
|
|
||||||
|
--- GPU / ROCm ---
|
||||||
|
torch: 2.7.0a0+git3f903c3
|
||||||
|
Device: AMD Radeon(TM) 8050S Graphics
|
||||||
|
VRAM GB: 14.37
|
||||||
|
ROCm: 6.5 / gfx1151 (Strix Halo)
|
||||||
|
|
||||||
|
--- Models in checkpoints (not in git) ---
|
||||||
|
v1-5-pruned-emaonly.safetensors 3.97 GB SD1.5 baseline
|
||||||
|
RealVisXL_V4.0.safetensors 6.46 GB SDXL realistic portrait
|
||||||
|
|
||||||
|
--- Working parameters (RealVisXL) ---
|
||||||
|
Resolution: 768x1024
|
||||||
|
Steps: 25
|
||||||
|
CFG: 6
|
||||||
|
Sampler: dpmpp_2m
|
||||||
|
Scheduler: karras
|
||||||
|
Batch size: 1
|
||||||
|
|
||||||
|
--- Recovery commands ---
|
||||||
|
# 1. Return to this code state:
|
||||||
|
git checkout rocm-realvisxl-nommap-working
|
||||||
|
|
||||||
|
# 2. Re-download RealVisXL if needed (not in git):
|
||||||
|
# python -c "from huggingface_hub import hf_hub_download; hf_hub_download(repo_id='SG161222/RealVisXL_V4.0', filename='RealVisXL_V4.0.safetensors', local_dir='models/checkpoints')"
|
||||||
|
|
||||||
|
# 3. Start ComfyUI:
|
||||||
|
# python main.py --disable-dynamic-vram --disable-mmap
|
||||||
107
baselines/workflows/sd15_test_rocm_workflow.json
Normal file
107
baselines/workflows/sd15_test_rocm_workflow.json
Normal file
@ -0,0 +1,107 @@
|
|||||||
|
{
|
||||||
|
"2": {
|
||||||
|
"inputs": {
|
||||||
|
"ckpt_name": "v1-5-pruned-emaonly.safetensors"
|
||||||
|
},
|
||||||
|
"class_type": "CheckpointLoaderSimple",
|
||||||
|
"_meta": {
|
||||||
|
"title": "Checkpoint加载器(简易)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"3": {
|
||||||
|
"inputs": {
|
||||||
|
"text": "aa cute fluffy kitten, big round eyes, detailed fur, soft natural window light, cozy indoor background, shallow depth of field, photorealistic, high quality, 50mm lens",
|
||||||
|
"clip": [
|
||||||
|
"2",
|
||||||
|
1
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"class_type": "CLIPTextEncode",
|
||||||
|
"_meta": {
|
||||||
|
"title": "CLIP文本编码"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"4": {
|
||||||
|
"inputs": {
|
||||||
|
"text": "low quality, blurry, deformed, ugly, bad anatomy, distorted face, extra limbs, bad eyes, oversaturated",
|
||||||
|
"clip": [
|
||||||
|
"2",
|
||||||
|
1
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"class_type": "CLIPTextEncode",
|
||||||
|
"_meta": {
|
||||||
|
"title": "CLIP文本编码"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"5": {
|
||||||
|
"inputs": {
|
||||||
|
"width": 512,
|
||||||
|
"height": 512,
|
||||||
|
"batch_size": 1
|
||||||
|
},
|
||||||
|
"class_type": "EmptyLatentImage",
|
||||||
|
"_meta": {
|
||||||
|
"title": "空Latent图像"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"6": {
|
||||||
|
"inputs": {
|
||||||
|
"seed": 826325619577598,
|
||||||
|
"steps": 30,
|
||||||
|
"cfg": 7,
|
||||||
|
"sampler_name": "dpmpp_2m",
|
||||||
|
"scheduler": "normal",
|
||||||
|
"denoise": 1,
|
||||||
|
"model": [
|
||||||
|
"2",
|
||||||
|
0
|
||||||
|
],
|
||||||
|
"positive": [
|
||||||
|
"3",
|
||||||
|
0
|
||||||
|
],
|
||||||
|
"negative": [
|
||||||
|
"4",
|
||||||
|
0
|
||||||
|
],
|
||||||
|
"latent_image": [
|
||||||
|
"5",
|
||||||
|
0
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"class_type": "KSampler",
|
||||||
|
"_meta": {
|
||||||
|
"title": "K采样器"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"7": {
|
||||||
|
"inputs": {
|
||||||
|
"samples": [
|
||||||
|
"6",
|
||||||
|
0
|
||||||
|
],
|
||||||
|
"vae": [
|
||||||
|
"2",
|
||||||
|
2
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"class_type": "VAEDecode",
|
||||||
|
"_meta": {
|
||||||
|
"title": "VAE解码"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"8": {
|
||||||
|
"inputs": {
|
||||||
|
"filename_prefix": "ComfyUI",
|
||||||
|
"images": [
|
||||||
|
"7",
|
||||||
|
0
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"class_type": "SaveImage",
|
||||||
|
"_meta": {
|
||||||
|
"title": "保存图像"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -22,6 +22,7 @@ import math
|
|||||||
import struct
|
import struct
|
||||||
import ctypes
|
import ctypes
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
import comfy.memory_management
|
import comfy.memory_management
|
||||||
import safetensors.torch
|
import safetensors.torch
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@ -119,6 +120,40 @@ def load_safetensors(ckpt):
|
|||||||
return sd, header.get("__metadata__", {}),
|
return sd, header.get("__metadata__", {}),
|
||||||
|
|
||||||
|
|
||||||
|
_LARGE_FILE_MMAP_THRESHOLD = 4_000_000_000 # 4 GB
|
||||||
|
|
||||||
|
|
||||||
|
def _load_safetensors_no_mmap(ckpt, device=None):
|
||||||
|
# Windows + ROCm/CUDA UMA: large mmaps fail after GPU virtual address space is reserved.
|
||||||
|
# Read tensors sequentially from file instead.
|
||||||
|
if device is None:
|
||||||
|
device = torch.device("cpu")
|
||||||
|
sd = {}
|
||||||
|
with open(ckpt, "rb") as fh:
|
||||||
|
header_len = struct.unpack("<Q", fh.read(8))[0]
|
||||||
|
header = json.loads(fh.read(header_len).decode("utf-8"))
|
||||||
|
data_start = 8 + header_len
|
||||||
|
for name, info in header.items():
|
||||||
|
if name == "__metadata__":
|
||||||
|
continue
|
||||||
|
start, end = info["data_offsets"]
|
||||||
|
dtype = _TYPES[info["dtype"]]
|
||||||
|
shape = info["shape"]
|
||||||
|
expected = end - start
|
||||||
|
if expected == 0:
|
||||||
|
sd[name] = torch.empty(shape, dtype=dtype, device=device)
|
||||||
|
continue
|
||||||
|
fh.seek(data_start + start)
|
||||||
|
raw = fh.read(expected)
|
||||||
|
if len(raw) != expected:
|
||||||
|
raise RuntimeError(
|
||||||
|
f"Safetensors read error: tensor '{name}' expected {expected} bytes, got {len(raw)}. "
|
||||||
|
f"File may be corrupt or truncated."
|
||||||
|
)
|
||||||
|
sd[name] = torch.frombuffer(bytearray(raw), dtype=dtype).reshape(shape).clone().to(device=device)
|
||||||
|
return sd, header.get("__metadata__", {})
|
||||||
|
|
||||||
|
|
||||||
def load_torch_file(ckpt, safe_load=False, device=None, return_metadata=False):
|
def load_torch_file(ckpt, safe_load=False, device=None, return_metadata=False):
|
||||||
if device is None:
|
if device is None:
|
||||||
device = torch.device("cpu")
|
device = torch.device("cpu")
|
||||||
@ -129,6 +164,15 @@ def load_torch_file(ckpt, safe_load=False, device=None, return_metadata=False):
|
|||||||
sd, metadata = load_safetensors(ckpt)
|
sd, metadata = load_safetensors(ckpt)
|
||||||
if not return_metadata:
|
if not return_metadata:
|
||||||
metadata = None
|
metadata = None
|
||||||
|
elif (os.path.getsize(ckpt) > _LARGE_FILE_MMAP_THRESHOLD
|
||||||
|
and sys.platform == "win32"
|
||||||
|
and torch.cuda.is_available()):
|
||||||
|
# Windows ROCm/UMA: GPU init reserves ~14 GB of virtual address space,
|
||||||
|
# preventing mmap of files >4 GB. Use sequential file-read instead.
|
||||||
|
# Scoped to Windows only to avoid overhead on Linux/Mac CUDA systems.
|
||||||
|
sd, metadata = _load_safetensors_no_mmap(ckpt, device=device)
|
||||||
|
if not return_metadata:
|
||||||
|
metadata = None
|
||||||
else:
|
else:
|
||||||
with safetensors.safe_open(ckpt, framework="pt", device=device.type) as f:
|
with safetensors.safe_open(ckpt, framework="pt", device=device.type) as f:
|
||||||
sd = {}
|
sd = {}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user