mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-05-12 10:12:35 +08:00
Merge 875bdc4015 into 25757a53c9
This commit is contained in:
commit
6a9a8a4f63
127
benchmarks/README.md
Normal file
127
benchmarks/README.md
Normal file
@ -0,0 +1,127 @@
|
||||
# ComfyUI Serving Benchmarks
|
||||
|
||||
Measures latency and throughput of a running ComfyUI server by submitting
|
||||
concurrent prompt requests and collecting results from the history API.
|
||||
|
||||
## Dependencies
|
||||
|
||||
```bash
|
||||
pip install aiohttp tqdm gdown
|
||||
```
|
||||
|
||||
## Supported models / tasks
|
||||
|
||||
| Model | Task | Description |
|
||||
|-------|------|-------------|
|
||||
| `wan22` | `i2v` | Wan 2.2 Image-to-Video — LightX2V 4-step, 720×720, 81 frames |
|
||||
|
||||
To add a new model/task: drop a workflow JSON in `workflows/` (with
|
||||
`__INPUT_IMAGE__` as the image placeholder) and add an entry to
|
||||
`_MODEL_REGISTRY` in `benchmark_comfyui_serving.py`.
|
||||
|
||||
## How it works
|
||||
|
||||
On each run the script:
|
||||
|
||||
1. Downloads model weights into the ComfyUI `models/` directory (only if
|
||||
`--download-models` is passed).
|
||||
2. Downloads the [VBench I2V](https://github.com/Vchitect/VBench) image
|
||||
dataset via `gdown` into ComfyUI's `input/` folder.
|
||||
3. Generates one prompt JSON per input image under
|
||||
`benchmarks/prompts/<model>_<task>/`.
|
||||
4. Submits `--num-requests` prompts to the server, cycling through the
|
||||
generated prompt files in round-robin order.
|
||||
5. Polls `/history/{prompt_id}` for completion and prints a latency /
|
||||
throughput summary.
|
||||
|
||||
Per-node execution times are available when the server is started with
|
||||
`--benchmark-server-only`.
|
||||
|
||||
## Usage
|
||||
|
||||
### Start the server
|
||||
|
||||
```bash
|
||||
python main.py --listen 127.0.0.1 --port 8188 --benchmark-server-only
|
||||
```
|
||||
|
||||
### Run the benchmark
|
||||
|
||||
```bash
|
||||
# From the ComfyUI root directory:
|
||||
python3 benchmarks/benchmark_comfyui_serving.py \
|
||||
--model wan22 --task i2v \
|
||||
--num-requests 50 --max-concurrency 4 \
|
||||
--host http://127.0.0.1:8188
|
||||
```
|
||||
|
||||
Include model weight download on first run:
|
||||
|
||||
```bash
|
||||
python3 benchmarks/benchmark_comfyui_serving.py \
|
||||
--model wan22 --task i2v \
|
||||
--download-models --comfyui-base-dir /path/to/ComfyUI \
|
||||
--num-requests 50 --max-concurrency 4 \
|
||||
--host http://127.0.0.1:8188
|
||||
```
|
||||
|
||||
### All flags
|
||||
|
||||
| Flag | Default | Description |
|
||||
|------|---------|-------------|
|
||||
| `--model` | *(required)* | Model name (e.g. `wan22`) |
|
||||
| `--task` | *(required)* | Task type (e.g. `i2v`) |
|
||||
| `--host` | `http://127.0.0.1:8188` | ComfyUI base URL |
|
||||
| `--num-requests` | `50` | Total requests to submit |
|
||||
| `--max-concurrency` | `8` | Max in-flight requests |
|
||||
| `--request-rate` | `0` | Requests/sec; `0` = fire immediately |
|
||||
| `--poisson` | off | Poisson inter-arrival when `--request-rate > 0` |
|
||||
| `--num-images` | `20` | Synthetic images if VBench download unavailable |
|
||||
| `--prompts-dir` | `benchmarks/prompts/<model>_<task>/` | Prompt JSON output directory |
|
||||
| `--download-models` | off | Download model weights before benchmarking |
|
||||
| `--comfyui-base-dir` | — | ComfyUI root (required with `--download-models`) |
|
||||
| `--output-json` | — | Write full per-request results to a JSON file |
|
||||
|
||||
## Output
|
||||
|
||||
```
|
||||
benchmark: 100%|█████████████| 5/5 [02:58<00:00, 35.73s/req, succeeded=5]
|
||||
|
||||
=== ComfyUI Serving Benchmark Summary ===
|
||||
requests_total: 5
|
||||
requests_success: 5
|
||||
requests_failed: 0
|
||||
wall_time_s: 178.652
|
||||
throughput_req_s: 0.028
|
||||
latency_p50_s: 109.594
|
||||
latency_p90_s: 164.840
|
||||
latency_p95_s: 171.744
|
||||
latency_p99_s: 177.266
|
||||
latency_mean_s: 109.781
|
||||
latency_max_s: 178.647
|
||||
execution_mean_ms: 35465.21
|
||||
execution_p95_ms: 39685.06
|
||||
|
||||
--- Per-node execution time (mean ms across successful requests) ---
|
||||
KSamplerAdvanced (130:110): mean=12827.5 p95=14264.0 n=5
|
||||
KSamplerAdvanced (130:111): mean=12726.4 p95=13822.2 n=5
|
||||
VAEDecode (130:129): mean=3439.0 p95=3467.6 n=5
|
||||
SaveVideo (108): mean=2844.7 p95=3280.0 n=5
|
||||
WanImageToVideo (130:128): mean=2367.7 p95=2595.9 n=5
|
||||
CLIPTextEncode (130:125): mean=1785.0 p95=1785.0 n=1
|
||||
CLIPLoader (130:105): mean=700.7 p95=700.7 n=1
|
||||
LoadImage (97): mean=518.4 p95=970.0 n=5
|
||||
VAELoader (130:106): mean=507.7 p95=507.7 n=1
|
||||
CLIPTextEncode (130:107): mean=223.4 p95=223.4 n=1
|
||||
UNETLoader (130:122): mean=122.2 p95=122.2 n=1
|
||||
LoraLoaderModelOnly (130:126): mean=68.1 p95=68.1 n=1
|
||||
UNETLoader (130:123): mean=65.9 p95=65.9 n=1
|
||||
LoraLoaderModelOnly (130:127): mean=36.2 p95=36.2 n=1
|
||||
ModelSamplingSD3 (130:109): mean=1.0 p95=1.0 n=1
|
||||
ModelSamplingSD3 (130:124): mean=0.9 p95=0.9 n=1
|
||||
CreateVideo (130:117): mean=0.7 p95=1.1 n=5
|
||||
```
|
||||
|
||||
> **Note:** Nodes with `n=1` (e.g. model loaders) are cached by ComfyUI after
|
||||
> the first request and skipped in subsequent executions, so they only appear
|
||||
> once across the benchmark run.
|
||||
685
benchmarks/benchmark_comfyui_serving.py
Normal file
685
benchmarks/benchmark_comfyui_serving.py
Normal file
@ -0,0 +1,685 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
ComfyUI model serving benchmark.
|
||||
|
||||
Submits prompts concurrently to a running ComfyUI server and reports
|
||||
latency/throughput metrics. Input images and prompt files are prepared
|
||||
automatically (and cached for reuse) before the benchmark starts.
|
||||
|
||||
On first run the script will:
|
||||
1. Download model weights (if --download-models is set).
|
||||
2. Download the VBench I2V image dataset (requires: pip install gdown),
|
||||
or generate synthetic placeholder images as a fallback.
|
||||
3. Write one prompt JSON per input image under benchmarks/prompts/<model>_<task>/.
|
||||
|
||||
On subsequent runs all three steps are skipped if the files already exist.
|
||||
Requests are distributed across prompt files in round-robin order.
|
||||
|
||||
Supported models / tasks
|
||||
------------------------
|
||||
wan22 / i2v — Wan 2.2 Image-to-Video (LightX2V 4-step, 720×720, 81 frames)
|
||||
|
||||
Usage
|
||||
-----
|
||||
python3 benchmarks/benchmark_comfyui_serving.py \\
|
||||
--model wan22 --task i2v \\
|
||||
--num-requests 50 --max-concurrency 4 \\
|
||||
--host http://127.0.0.1:8188
|
||||
|
||||
# Also download model weights (run from ComfyUI root):
|
||||
python3 benchmarks/benchmark_comfyui_serving.py \\
|
||||
--model wan22 --task i2v \\
|
||||
--download-models --comfyui-base-dir /path/to/ComfyUI \\
|
||||
--num-requests 50 --max-concurrency 4 \\
|
||||
--host http://127.0.0.1:8188
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import math
|
||||
import random
|
||||
import statistics
|
||||
import subprocess
|
||||
import time
|
||||
import urllib.request
|
||||
import uuid
|
||||
from dataclasses import dataclass, asdict
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import aiohttp
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────────────
|
||||
# Benchmark setup helpers
|
||||
# ──────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
# Workflow JSON files live in benchmarks/workflows/<model>_<task>.json.
|
||||
_WORKFLOWS_DIR = Path(__file__).parent / "workflows"
|
||||
|
||||
# Placeholder in workflow JSON files that is replaced with the actual image filename.
|
||||
_IMAGE_PLACEHOLDER = "__INPUT_IMAGE__"
|
||||
|
||||
# Model weight downloads for wan22/i2v.
|
||||
_WAN22_I2V_MODELS: list[tuple[str, str]] = [
|
||||
(
|
||||
"models/diffusion_models/wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors",
|
||||
"https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/diffusion_models/wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors",
|
||||
),
|
||||
(
|
||||
"models/diffusion_models/wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors",
|
||||
"https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/diffusion_models/wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors",
|
||||
),
|
||||
(
|
||||
"models/loras/wan2.2_i2v_lightx2v_4steps_lora_v1_high_noise.safetensors",
|
||||
"https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/loras/wan2.2_i2v_lightx2v_4steps_lora_v1_high_noise.safetensors",
|
||||
),
|
||||
(
|
||||
"models/loras/wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors",
|
||||
"https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/loras/wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors",
|
||||
),
|
||||
(
|
||||
"models/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors",
|
||||
"https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors",
|
||||
),
|
||||
(
|
||||
"models/vae/wan_2.1_vae.safetensors",
|
||||
"https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/vae/wan_2.1_vae.safetensors",
|
||||
),
|
||||
]
|
||||
|
||||
# Google Drive file IDs from VBench's vbench2_beta_i2v/download_data.sh
|
||||
_VBENCH_ORIGIN_ZIP_GDRIVE_ID = "1qhkLCSBkzll0dkKpwlDTwLL0nxdQ4nrY"
|
||||
|
||||
# Registry mapping (model, task) → benchmark configuration.
|
||||
# To add a new model/task: drop a workflow JSON in benchmarks/workflows/ and
|
||||
# add an entry here.
|
||||
_MODEL_REGISTRY: dict[tuple[str, str], dict[str, Any]] = {
|
||||
("wan22", "i2v"): {
|
||||
"workflow_file": "wan22_i2v.json",
|
||||
"model_files": _WAN22_I2V_MODELS,
|
||||
"image_source": "vbench_i2v",
|
||||
},
|
||||
}
|
||||
|
||||
_VALID_MODELS = sorted({m for m, _ in _MODEL_REGISTRY})
|
||||
_VALID_TASKS = sorted({t for _, t in _MODEL_REGISTRY})
|
||||
|
||||
|
||||
def _replace_in_graph(obj: Any, placeholder: str, value: str) -> None:
|
||||
"""Recursively replace every occurrence of *placeholder* with *value* in-place."""
|
||||
if isinstance(obj, dict):
|
||||
for k, v in obj.items():
|
||||
if v == placeholder:
|
||||
obj[k] = value
|
||||
else:
|
||||
_replace_in_graph(v, placeholder, value)
|
||||
elif isinstance(obj, list):
|
||||
for i, item in enumerate(obj):
|
||||
if item == placeholder:
|
||||
obj[i] = value
|
||||
else:
|
||||
_replace_in_graph(item, placeholder, value)
|
||||
|
||||
|
||||
def download_models(base_dir: Path, model: str, task: str) -> None:
|
||||
"""Download model weights for *model*/*task* into *base_dir* using wget."""
|
||||
key = (model, task)
|
||||
if key not in _MODEL_REGISTRY:
|
||||
raise ValueError(f"No model files registered for {model}/{task}")
|
||||
for rel_path, url in _MODEL_REGISTRY[key]["model_files"]:
|
||||
dest = base_dir / rel_path
|
||||
if dest.exists():
|
||||
print(f"[setup] already exists, skipping: {dest}")
|
||||
continue
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
print(f"[setup] downloading {dest.name} ...")
|
||||
subprocess.run(["wget", "-O", str(dest), url], check=True)
|
||||
|
||||
|
||||
def _try_download_vbench_i2v(input_dir: Path) -> list[str]:
|
||||
"""
|
||||
Download VBench I2V origin images from Google Drive via gdown (pip install gdown).
|
||||
Raises on any failure.
|
||||
"""
|
||||
import gdown # type: ignore; raises ImportError if not installed
|
||||
|
||||
import zipfile
|
||||
|
||||
zip_path = input_dir / "origin.zip"
|
||||
try:
|
||||
if not zip_path.exists():
|
||||
print("[setup] downloading VBench I2V origin images from Google Drive ...")
|
||||
gdown.download(id=_VBENCH_ORIGIN_ZIP_GDRIVE_ID, output=str(zip_path), quiet=False)
|
||||
print("[setup] extracting origin.zip ...")
|
||||
with zipfile.ZipFile(zip_path, "r") as zf:
|
||||
zf.extractall(str(input_dir))
|
||||
zip_path.unlink()
|
||||
except Exception:
|
||||
if zip_path.exists():
|
||||
zip_path.unlink()
|
||||
raise
|
||||
|
||||
image_exts = {".png", ".jpg", ".jpeg", ".webp"}
|
||||
filenames = sorted(
|
||||
p.relative_to(input_dir).as_posix()
|
||||
for p in input_dir.rglob("*")
|
||||
if p.suffix.lower() in image_exts
|
||||
)
|
||||
print(f"[setup] prepared {len(filenames)} VBench I2V images in {input_dir}")
|
||||
return filenames
|
||||
|
||||
|
||||
def _generate_synthetic_images(input_dir: Path, num_images: int) -> list[str]:
|
||||
"""Generate synthetic 720×720 white PNG placeholders; returns filenames."""
|
||||
try:
|
||||
from PIL import Image as PILImage # type: ignore
|
||||
except ImportError:
|
||||
raise RuntimeError(
|
||||
"Pillow is required for synthetic image generation. "
|
||||
"Install it with: pip install Pillow"
|
||||
)
|
||||
|
||||
filenames: list[str] = []
|
||||
for i in range(num_images):
|
||||
fname = f"benchmark_input_{i:04d}.png"
|
||||
dest = input_dir / fname
|
||||
if not dest.exists():
|
||||
PILImage.new("RGB", (720, 720), color=(255, 255, 255)).save(str(dest))
|
||||
filenames.append(fname)
|
||||
return filenames
|
||||
|
||||
|
||||
def prepare_input_images(
|
||||
input_dir: Path,
|
||||
num_images: int = 20,
|
||||
image_source: str = "vbench_i2v",
|
||||
) -> list[str]:
|
||||
"""
|
||||
Prepare benchmark input images in *input_dir*.
|
||||
For "vbench_i2v", downloads from Google Drive and raises on failure.
|
||||
Falls back to synthetic images only when image_source is not "vbench_i2v".
|
||||
Returns a list of image paths relative to *input_dir*.
|
||||
"""
|
||||
input_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if image_source == "vbench_i2v":
|
||||
return _try_download_vbench_i2v(input_dir)
|
||||
|
||||
print(f"[setup] generating {num_images} synthetic 720×720 placeholder images ...")
|
||||
return _generate_synthetic_images(input_dir, num_images)
|
||||
|
||||
|
||||
def generate_prompt_file(
|
||||
output_path: Path,
|
||||
workflow_path: Path,
|
||||
image_filename: str,
|
||||
) -> None:
|
||||
"""
|
||||
Write a single ComfyUI prompt JSON to *output_path* from *workflow_path*.
|
||||
|
||||
Replaces every occurrence of the sentinel string "__INPUT_IMAGE__" in the
|
||||
workflow graph with *image_filename*.
|
||||
"""
|
||||
graph: dict[str, Any] = json.loads(workflow_path.read_text())
|
||||
_replace_in_graph(graph, _IMAGE_PLACEHOLDER, image_filename)
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
output_path.write_text(json.dumps({"prompt": graph}, indent=2))
|
||||
|
||||
|
||||
def generate_prompt_files(
|
||||
model: str,
|
||||
task: str,
|
||||
output_dir: Path,
|
||||
input_dir: Path,
|
||||
num_images: int = 20,
|
||||
download_model_weights: bool = False,
|
||||
comfyui_base_dir: Path | None = None,
|
||||
) -> list[Path]:
|
||||
"""
|
||||
Full benchmark setup for a given *model*/*task*:
|
||||
|
||||
1. Optionally download model weights into *comfyui_base_dir*.
|
||||
2. Prepare input images in *input_dir* (skipped if images already exist).
|
||||
3. Generate one prompt JSON per input image in *output_dir*
|
||||
(skipped if prompt files already exist).
|
||||
|
||||
Returns the list of prompt file paths.
|
||||
"""
|
||||
key = (model, task)
|
||||
if key not in _MODEL_REGISTRY:
|
||||
available = ", ".join(f"{m}/{t}" for m, t in _MODEL_REGISTRY)
|
||||
raise ValueError(f"Unknown --model {model!r} --task {task!r}. Available: {available}")
|
||||
|
||||
cfg = _MODEL_REGISTRY[key]
|
||||
|
||||
if download_model_weights:
|
||||
if comfyui_base_dir is None:
|
||||
raise ValueError("--comfyui-base-dir is required when --download-models is set")
|
||||
download_models(comfyui_base_dir, model, task)
|
||||
|
||||
image_filenames = prepare_input_images(
|
||||
input_dir,
|
||||
num_images=num_images,
|
||||
image_source=cfg.get("image_source", "synthetic"),
|
||||
)
|
||||
if not image_filenames:
|
||||
raise RuntimeError(f"No input images available in {input_dir}")
|
||||
|
||||
workflow_path = _WORKFLOWS_DIR / cfg["workflow_file"]
|
||||
if not workflow_path.exists():
|
||||
raise FileNotFoundError(f"Workflow file not found: {workflow_path}")
|
||||
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
generated: list[Path] = []
|
||||
for i, image_name in enumerate(image_filenames):
|
||||
prompt_path = output_dir / f"{model}_{task}_prompt_{i:04d}.json"
|
||||
generate_prompt_file(prompt_path, workflow_path, image_name)
|
||||
generated.append(prompt_path)
|
||||
|
||||
print(f"[setup] generated {len(generated)} prompt files in {output_dir}")
|
||||
return generated
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@dataclass
|
||||
class RequestResult:
|
||||
request_index: int
|
||||
prompt_id: str | None
|
||||
ok: bool
|
||||
error: str | None
|
||||
queued_at: float
|
||||
started_at: float
|
||||
finished_at: float
|
||||
end_to_end_s: float
|
||||
execution_ms: float | None
|
||||
node_timing_ms: dict[str, dict] | None
|
||||
|
||||
|
||||
def percentile(values: list[float], pct: float) -> float:
|
||||
if not values:
|
||||
return float("nan")
|
||||
if len(values) == 1:
|
||||
return values[0]
|
||||
values = sorted(values)
|
||||
rank = (len(values) - 1) * (pct / 100.0)
|
||||
lower = math.floor(rank)
|
||||
upper = math.ceil(rank)
|
||||
if lower == upper:
|
||||
return values[lower]
|
||||
weight = rank - lower
|
||||
return values[lower] * (1.0 - weight) + values[upper] * weight
|
||||
|
||||
|
||||
def patch_seed_in_prompt(prompt: dict[str, Any], seed: int, seed_path: str | None) -> dict[str, Any]:
|
||||
"""
|
||||
Patch prompt seed in-place for common sampler nodes.
|
||||
seed_path format: "<node_id>.<input_name>".
|
||||
"""
|
||||
if seed_path:
|
||||
try:
|
||||
node_id, input_name = seed_path.split(".", 1)
|
||||
prompt[node_id]["inputs"][input_name] = seed
|
||||
return prompt
|
||||
except Exception as exc:
|
||||
raise ValueError(f"Invalid --seed-path '{seed_path}': {exc}") from exc
|
||||
|
||||
# Best-effort fallback: update any input key named 'seed' or 'noise_seed'
|
||||
for node in prompt.values():
|
||||
if not isinstance(node, dict):
|
||||
continue
|
||||
inputs = node.get("inputs")
|
||||
if not isinstance(inputs, dict):
|
||||
continue
|
||||
if "seed" in inputs:
|
||||
inputs["seed"] = seed
|
||||
if "noise_seed" in inputs:
|
||||
inputs["noise_seed"] = seed
|
||||
return prompt
|
||||
|
||||
|
||||
def load_prompt_template(path: Path) -> dict[str, Any]:
|
||||
data = json.loads(path.read_text())
|
||||
if "prompt" in data and isinstance(data["prompt"], dict):
|
||||
return data
|
||||
if isinstance(data, dict):
|
||||
return {"prompt": data}
|
||||
raise ValueError("Prompt file must be a JSON object (prompt graph or wrapper with 'prompt').")
|
||||
|
||||
|
||||
async def submit_prompt(
|
||||
session: aiohttp.ClientSession,
|
||||
base_url: str,
|
||||
endpoint: str,
|
||||
payload: dict[str, Any],
|
||||
timeout_s: float,
|
||||
) -> str:
|
||||
url = f"{base_url}{endpoint}"
|
||||
async with session.post(url, json=payload, timeout=timeout_s) as resp:
|
||||
text = await resp.text()
|
||||
if resp.status != 200:
|
||||
raise RuntimeError(f"submit failed [{resp.status}] {text}")
|
||||
body = json.loads(text)
|
||||
prompt_id = body.get("prompt_id")
|
||||
if not prompt_id:
|
||||
raise RuntimeError(f"missing prompt_id in response: {body}")
|
||||
return prompt_id
|
||||
|
||||
|
||||
async def wait_for_prompt_done(
|
||||
session: aiohttp.ClientSession,
|
||||
base_url: str,
|
||||
prompt_id: str,
|
||||
poll_interval_s: float,
|
||||
timeout_s: float,
|
||||
) -> tuple[float | None, dict | None]:
|
||||
"""
|
||||
Returns (execution_ms, node_timing_ms) from history_item["benchmark"].
|
||||
Falls back to (None, None) if unavailable.
|
||||
"""
|
||||
deadline = time.perf_counter() + timeout_s
|
||||
history_url = f"{base_url}/history/{prompt_id}"
|
||||
|
||||
while time.perf_counter() < deadline:
|
||||
async with session.get(history_url, timeout=timeout_s) as resp:
|
||||
if resp.status != 200:
|
||||
text = await resp.text()
|
||||
raise RuntimeError(f"history failed [{resp.status}] {text}")
|
||||
|
||||
payload = await resp.json()
|
||||
if not payload:
|
||||
await asyncio.sleep(poll_interval_s)
|
||||
continue
|
||||
|
||||
history_item = payload.get(prompt_id)
|
||||
if history_item is None:
|
||||
await asyncio.sleep(poll_interval_s)
|
||||
continue
|
||||
|
||||
status = history_item.get("status", {})
|
||||
if status.get("status_str") not in ("success", "error"):
|
||||
await asyncio.sleep(poll_interval_s)
|
||||
continue
|
||||
|
||||
benchmark = history_item.get("benchmark", {})
|
||||
return (
|
||||
benchmark.get("execution_ms"),
|
||||
benchmark.get("nodes"),
|
||||
)
|
||||
|
||||
await asyncio.sleep(poll_interval_s)
|
||||
|
||||
raise TimeoutError(f"timed out waiting for prompt_id={prompt_id}")
|
||||
|
||||
|
||||
def build_arrival_schedule(num_requests: int, request_rate: float, poisson: bool, seed: int) -> list[float]:
|
||||
"""
|
||||
Returns absolute offsets (seconds from benchmark start) for each request.
|
||||
"""
|
||||
if request_rate <= 0:
|
||||
return [0.0] * num_requests
|
||||
|
||||
rnd = random.Random(seed)
|
||||
offsets: list[float] = []
|
||||
t = 0.0
|
||||
for _ in range(num_requests):
|
||||
if poisson:
|
||||
delta = rnd.expovariate(request_rate)
|
||||
else:
|
||||
delta = 1.0 / request_rate
|
||||
t += delta
|
||||
offsets.append(t)
|
||||
return offsets
|
||||
|
||||
|
||||
async def run_request(
|
||||
idx: int,
|
||||
start_time: float,
|
||||
scheduled_offset_s: float,
|
||||
semaphore: asyncio.Semaphore,
|
||||
session: aiohttp.ClientSession,
|
||||
args: argparse.Namespace,
|
||||
prompt_templates: list[dict[str, Any]],
|
||||
) -> RequestResult:
|
||||
await asyncio.sleep(max(0.0, (start_time + scheduled_offset_s) - time.perf_counter()))
|
||||
queued_at = time.perf_counter()
|
||||
|
||||
async with semaphore:
|
||||
started_at = time.perf_counter()
|
||||
prompt_id = None
|
||||
try:
|
||||
payload = json.loads(json.dumps(prompt_templates[idx % len(prompt_templates)]))
|
||||
payload.setdefault("extra_data", {})
|
||||
payload["client_id"] = args.client_id
|
||||
|
||||
seed = args.base_seed + idx
|
||||
payload["prompt"] = patch_seed_in_prompt(payload["prompt"], seed, args.seed_path)
|
||||
|
||||
prompt_id = await submit_prompt(
|
||||
session=session,
|
||||
base_url=args.host,
|
||||
endpoint=args.endpoint,
|
||||
payload=payload,
|
||||
timeout_s=args.request_timeout_s,
|
||||
)
|
||||
|
||||
execution_ms, node_timing_ms = await wait_for_prompt_done(
|
||||
session=session,
|
||||
base_url=args.host,
|
||||
prompt_id=prompt_id,
|
||||
poll_interval_s=args.poll_interval_s,
|
||||
timeout_s=args.request_timeout_s,
|
||||
)
|
||||
finished_at = time.perf_counter()
|
||||
return RequestResult(
|
||||
request_index=idx,
|
||||
prompt_id=prompt_id,
|
||||
ok=True,
|
||||
error=None,
|
||||
queued_at=queued_at,
|
||||
started_at=started_at,
|
||||
finished_at=finished_at,
|
||||
end_to_end_s=finished_at - queued_at,
|
||||
execution_ms=execution_ms,
|
||||
node_timing_ms=node_timing_ms,
|
||||
)
|
||||
except Exception as exc:
|
||||
finished_at = time.perf_counter()
|
||||
return RequestResult(
|
||||
request_index=idx,
|
||||
prompt_id=prompt_id,
|
||||
ok=False,
|
||||
error=repr(exc),
|
||||
queued_at=queued_at,
|
||||
started_at=started_at,
|
||||
finished_at=finished_at,
|
||||
end_to_end_s=finished_at - queued_at,
|
||||
execution_ms=None,
|
||||
node_timing_ms=None,
|
||||
)
|
||||
|
||||
|
||||
def print_summary(results: list[RequestResult], wall_s: float) -> None:
|
||||
success = [r for r in results if r.ok]
|
||||
fail = [r for r in results if not r.ok]
|
||||
lat_s = [r.end_to_end_s for r in success]
|
||||
exec_ms = [r.execution_ms for r in success if r.execution_ms is not None]
|
||||
|
||||
throughput = (len(success) / wall_s) if wall_s > 0 else 0.0
|
||||
print("\n=== ComfyUI Serving Benchmark Summary ===")
|
||||
print(f"requests_total: {len(results)}")
|
||||
print(f"requests_success: {len(success)}")
|
||||
print(f"requests_failed: {len(fail)}")
|
||||
print(f"wall_time_s: {wall_s:.3f}")
|
||||
print(f"throughput_req_s: {throughput:.3f}")
|
||||
|
||||
if lat_s:
|
||||
print(f"latency_p50_s: {percentile(lat_s, 50):.3f}")
|
||||
print(f"latency_p90_s: {percentile(lat_s, 90):.3f}")
|
||||
print(f"latency_p95_s: {percentile(lat_s, 95):.3f}")
|
||||
print(f"latency_p99_s: {percentile(lat_s, 99):.3f}")
|
||||
print(f"latency_mean_s: {statistics.mean(lat_s):.3f}")
|
||||
print(f"latency_max_s: {max(lat_s):.3f}")
|
||||
|
||||
if exec_ms:
|
||||
print(f"execution_mean_ms: {statistics.mean(exec_ms):.2f}")
|
||||
print(f"execution_p95_ms: {percentile(exec_ms, 95):.2f}")
|
||||
|
||||
# Per-node timing: aggregate execution_ms across all successful results.
|
||||
node_totals: dict[str, list[float]] = {}
|
||||
for r in success:
|
||||
if not r.node_timing_ms:
|
||||
continue
|
||||
for node_id, info in r.node_timing_ms.items():
|
||||
key = f"{info.get('class_type', 'unknown')} ({node_id})"
|
||||
node_totals.setdefault(key, []).append(info.get("execution_ms", 0.0))
|
||||
if node_totals:
|
||||
print("\n--- Per-node execution time (mean ms across successful requests) ---")
|
||||
for key, times in sorted(node_totals.items(), key=lambda x: -statistics.mean(x[1])):
|
||||
print(f" {key}: mean={statistics.mean(times):.1f} p95={percentile(times, 95):.1f} n={len(times)}")
|
||||
|
||||
if fail:
|
||||
print("\nSample failures:")
|
||||
for r in fail[:5]:
|
||||
print(f" idx={r.request_index} prompt_id={r.prompt_id} error={r.error}")
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
p = argparse.ArgumentParser(description="Benchmark ComfyUI request serving.")
|
||||
p.add_argument("--host", type=str, default="http://127.0.0.1:8188", help="ComfyUI base URL.")
|
||||
p.add_argument(
|
||||
"--endpoint",
|
||||
type=str,
|
||||
default="/prompt",
|
||||
choices=("/prompt", "/bench/prompt"),
|
||||
help="Submission endpoint.",
|
||||
)
|
||||
p.add_argument(
|
||||
"--model",
|
||||
choices=_VALID_MODELS,
|
||||
required=True,
|
||||
help=f"Model to benchmark. Choices: {_VALID_MODELS}.",
|
||||
)
|
||||
p.add_argument(
|
||||
"--task",
|
||||
choices=_VALID_TASKS,
|
||||
required=True,
|
||||
help=f"Task type. Choices: {_VALID_TASKS}.",
|
||||
)
|
||||
p.add_argument(
|
||||
"--prompts-dir",
|
||||
type=Path,
|
||||
default=None,
|
||||
help="Directory where generated prompt JSON files are written (default: benchmarks/prompts/<model>_<task>/).",
|
||||
)
|
||||
p.add_argument(
|
||||
"--num-images",
|
||||
type=int,
|
||||
default=20,
|
||||
help="Number of synthetic images to generate when dataset download is unavailable (default: 20).",
|
||||
)
|
||||
p.add_argument(
|
||||
"--download-models",
|
||||
action="store_true",
|
||||
help="Download model weights before generating prompts (requires --comfyui-base-dir).",
|
||||
)
|
||||
p.add_argument(
|
||||
"--comfyui-base-dir",
|
||||
type=Path,
|
||||
default=None,
|
||||
help="ComfyUI root directory used as the base for model downloads.",
|
||||
)
|
||||
p.add_argument("--num-requests", type=int, default=50)
|
||||
p.add_argument("--max-concurrency", type=int, default=8)
|
||||
p.add_argument("--request-rate", type=float, default=0.0, help="Requests/sec. 0 = fire immediately.")
|
||||
p.add_argument("--poisson", action="store_true", help="Use Poisson inter-arrival when request-rate > 0.")
|
||||
p.add_argument("--base-seed", type=int, default=1234)
|
||||
p.add_argument(
|
||||
"--seed-path",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Optional path to seed field in prompt: <node_id>.<input_name> (e.g. 3.seed).",
|
||||
)
|
||||
p.add_argument("--client-id", type=str, default=f"bench-{uuid.uuid4().hex[:12]}")
|
||||
p.add_argument("--request-timeout-s", type=float, default=600.0)
|
||||
p.add_argument("--poll-interval-s", type=float, default=0.2)
|
||||
p.add_argument("--output-json", type=Path, default=None, help="Write detailed result JSON.")
|
||||
p.add_argument("--seed", type=int, default=0, help="RNG seed for schedule generation.")
|
||||
return p.parse_args()
|
||||
|
||||
|
||||
async def async_main(args: argparse.Namespace) -> None:
|
||||
prompts_dir = args.prompts_dir or Path("benchmarks/prompts") / f"{args.model}_{args.task}"
|
||||
prompt_paths = generate_prompt_files(
|
||||
model=args.model,
|
||||
task=args.task,
|
||||
output_dir=prompts_dir,
|
||||
input_dir=Path("input"),
|
||||
num_images=args.num_images,
|
||||
download_model_weights=args.download_models,
|
||||
comfyui_base_dir=args.comfyui_base_dir,
|
||||
)
|
||||
prompt_templates = [load_prompt_template(p) for p in prompt_paths]
|
||||
print(f"[bench] loaded {len(prompt_templates)} prompt templates, round-robining over {args.num_requests} requests")
|
||||
|
||||
schedule = build_arrival_schedule(
|
||||
num_requests=args.num_requests,
|
||||
request_rate=args.request_rate,
|
||||
poisson=args.poisson,
|
||||
seed=args.seed,
|
||||
)
|
||||
semaphore = asyncio.Semaphore(args.max_concurrency)
|
||||
connector = aiohttp.TCPConnector(limit=max(args.max_concurrency * 2, 32))
|
||||
|
||||
started = time.perf_counter()
|
||||
async with aiohttp.ClientSession(connector=connector) as session:
|
||||
tasks = [
|
||||
asyncio.create_task(
|
||||
run_request(
|
||||
idx=i,
|
||||
start_time=started,
|
||||
scheduled_offset_s=schedule[i],
|
||||
semaphore=semaphore,
|
||||
session=session,
|
||||
args=args,
|
||||
prompt_templates=prompt_templates,
|
||||
)
|
||||
)
|
||||
for i in range(args.num_requests)
|
||||
]
|
||||
results = []
|
||||
with tqdm(total=args.num_requests, unit="req", desc="benchmark") as pbar:
|
||||
for coro in asyncio.as_completed(tasks):
|
||||
result = await coro
|
||||
results.append(result)
|
||||
pbar.update(1)
|
||||
if result.ok:
|
||||
pbar.set_postfix(succeeded=sum(r.ok for r in results))
|
||||
wall_s = time.perf_counter() - started
|
||||
|
||||
print_summary(results, wall_s)
|
||||
|
||||
if args.output_json is not None:
|
||||
out = {
|
||||
"config": vars(args),
|
||||
"wall_time_s": wall_s,
|
||||
"results": [asdict(r) for r in sorted(results, key=lambda x: x.request_index)],
|
||||
}
|
||||
args.output_json.parent.mkdir(parents=True, exist_ok=True)
|
||||
args.output_json.write_text(json.dumps(out, indent=2))
|
||||
print(f"\nWrote results to: {args.output_json}")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
args = parse_args()
|
||||
asyncio.run(async_main(args))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
154
benchmarks/workflows/wan22_i2v.json
Normal file
154
benchmarks/workflows/wan22_i2v.json
Normal file
@ -0,0 +1,154 @@
|
||||
{
|
||||
"97": {
|
||||
"inputs": {"image": "__INPUT_IMAGE__"},
|
||||
"class_type": "LoadImage",
|
||||
"_meta": {"title": "Start Frame Image"}
|
||||
},
|
||||
"108": {
|
||||
"inputs": {
|
||||
"filename_prefix": "video/Wan2.2_image_to_video",
|
||||
"format": "auto",
|
||||
"codec": "auto",
|
||||
"video-preview": "",
|
||||
"video": ["130:117", 0]
|
||||
},
|
||||
"class_type": "SaveVideo",
|
||||
"_meta": {"title": "Save Video"}
|
||||
},
|
||||
"130:105": {
|
||||
"inputs": {
|
||||
"clip_name": "umt5_xxl_fp8_e4m3fn_scaled.safetensors",
|
||||
"type": "wan",
|
||||
"device": "default"
|
||||
},
|
||||
"class_type": "CLIPLoader",
|
||||
"_meta": {"title": "Load CLIP"}
|
||||
},
|
||||
"130:106": {
|
||||
"inputs": {"vae_name": "wan_2.1_vae.safetensors"},
|
||||
"class_type": "VAELoader",
|
||||
"_meta": {"title": "Load VAE"}
|
||||
},
|
||||
"130:107": {
|
||||
"inputs": {
|
||||
"text": "A felt-style little eagle cashier greeting, waving, and smiling at the camera.",
|
||||
"clip": ["130:105", 0]
|
||||
},
|
||||
"class_type": "CLIPTextEncode",
|
||||
"_meta": {"title": "CLIP Text Encode (Positive Prompt)"}
|
||||
},
|
||||
"130:109": {
|
||||
"inputs": {"shift": 5.000000000000001, "model": ["130:126", 0]},
|
||||
"class_type": "ModelSamplingSD3",
|
||||
"_meta": {"title": "ModelSamplingSD3"}
|
||||
},
|
||||
"130:110": {
|
||||
"inputs": {
|
||||
"add_noise": "enable",
|
||||
"noise_seed": 636787045983965,
|
||||
"steps": 4,
|
||||
"cfg": 1,
|
||||
"sampler_name": "euler",
|
||||
"scheduler": "simple",
|
||||
"start_at_step": 0,
|
||||
"end_at_step": 2,
|
||||
"return_with_leftover_noise": "enable",
|
||||
"model": ["130:109", 0],
|
||||
"positive": ["130:128", 0],
|
||||
"negative": ["130:128", 1],
|
||||
"latent_image": ["130:128", 2]
|
||||
},
|
||||
"class_type": "KSamplerAdvanced",
|
||||
"_meta": {"title": "KSampler (Advanced)"}
|
||||
},
|
||||
"130:111": {
|
||||
"inputs": {
|
||||
"add_noise": "disable",
|
||||
"noise_seed": 0,
|
||||
"steps": 4,
|
||||
"cfg": 1,
|
||||
"sampler_name": "euler",
|
||||
"scheduler": "simple",
|
||||
"start_at_step": 2,
|
||||
"end_at_step": 4,
|
||||
"return_with_leftover_noise": "disable",
|
||||
"model": ["130:124", 0],
|
||||
"positive": ["130:128", 0],
|
||||
"negative": ["130:128", 1],
|
||||
"latent_image": ["130:110", 0]
|
||||
},
|
||||
"class_type": "KSamplerAdvanced",
|
||||
"_meta": {"title": "KSampler (Advanced)"}
|
||||
},
|
||||
"130:117": {
|
||||
"inputs": {"fps": 16, "images": ["130:129", 0]},
|
||||
"class_type": "CreateVideo",
|
||||
"_meta": {"title": "Create Video"}
|
||||
},
|
||||
"130:122": {
|
||||
"inputs": {
|
||||
"unet_name": "wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors",
|
||||
"weight_dtype": "default"
|
||||
},
|
||||
"class_type": "UNETLoader",
|
||||
"_meta": {"title": "Load Diffusion Model"}
|
||||
},
|
||||
"130:123": {
|
||||
"inputs": {
|
||||
"unet_name": "wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors",
|
||||
"weight_dtype": "default"
|
||||
},
|
||||
"class_type": "UNETLoader",
|
||||
"_meta": {"title": "Load Diffusion Model"}
|
||||
},
|
||||
"130:124": {
|
||||
"inputs": {"shift": 5.000000000000001, "model": ["130:127", 0]},
|
||||
"class_type": "ModelSamplingSD3",
|
||||
"_meta": {"title": "ModelSamplingSD3"}
|
||||
},
|
||||
"130:125": {
|
||||
"inputs": {
|
||||
"text": "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走",
|
||||
"clip": ["130:105", 0]
|
||||
},
|
||||
"class_type": "CLIPTextEncode",
|
||||
"_meta": {"title": "CLIP Text Encode (Negative Prompt)"}
|
||||
},
|
||||
"130:126": {
|
||||
"inputs": {
|
||||
"lora_name": "wan2.2_i2v_lightx2v_4steps_lora_v1_high_noise.safetensors",
|
||||
"strength_model": 1.0000000000000002,
|
||||
"model": ["130:122", 0]
|
||||
},
|
||||
"class_type": "LoraLoaderModelOnly",
|
||||
"_meta": {"title": "Load LoRA"}
|
||||
},
|
||||
"130:127": {
|
||||
"inputs": {
|
||||
"lora_name": "wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors",
|
||||
"strength_model": 1.0000000000000002,
|
||||
"model": ["130:123", 0]
|
||||
},
|
||||
"class_type": "LoraLoaderModelOnly",
|
||||
"_meta": {"title": "Load LoRA"}
|
||||
},
|
||||
"130:128": {
|
||||
"inputs": {
|
||||
"width": 720,
|
||||
"height": 720,
|
||||
"length": 81,
|
||||
"batch_size": 1,
|
||||
"positive": ["130:107", 0],
|
||||
"negative": ["130:125", 0],
|
||||
"vae": ["130:106", 0],
|
||||
"start_image": ["97", 0]
|
||||
},
|
||||
"class_type": "WanImageToVideo",
|
||||
"_meta": {"title": "WanImageToVideo"}
|
||||
},
|
||||
"130:129": {
|
||||
"inputs": {"samples": ["130:111", 0], "vae": ["130:106", 0]},
|
||||
"class_type": "VAEDecode",
|
||||
"_meta": {"title": "VAE Decode"}
|
||||
}
|
||||
}
|
||||
@ -225,6 +225,7 @@ parser.add_argument(
|
||||
parser.add_argument("--user-directory", type=is_valid_directory, default=None, help="Set the ComfyUI user directory with an absolute path. Overrides --base-directory.")
|
||||
|
||||
parser.add_argument("--enable-compress-response-body", action="store_true", help="Enable compressing response body.")
|
||||
parser.add_argument("--benchmark-server-only", action="store_true", help="Enable lightweight benchmark routes and worker fast-paths focused on model serving throughput/latency.")
|
||||
|
||||
parser.add_argument(
|
||||
"--comfy-api-base",
|
||||
|
||||
@ -723,6 +723,7 @@ class PromptExecutor:
|
||||
self.server.client_id = None
|
||||
|
||||
self.status_messages = []
|
||||
self.node_timing_ms: dict[str, dict] = {}
|
||||
self.add_message("execution_start", { "prompt_id": prompt_id}, broadcast=False)
|
||||
|
||||
self._notify_prompt_lifecycle("start", prompt_id)
|
||||
@ -769,6 +770,7 @@ class PromptExecutor:
|
||||
break
|
||||
|
||||
assert node_id is not None, "Node ID should not be None at this point"
|
||||
node_start_s = time.perf_counter() if args.benchmark_server_only else None
|
||||
result, error, ex = await execute(self.server, dynamic_prompt, self.caches, node_id, extra_data, executed, prompt_id, execution_list, pending_subgraph_results, pending_async_nodes, ui_node_outputs)
|
||||
self.success = result != ExecutionResult.FAILURE
|
||||
if result == ExecutionResult.FAILURE:
|
||||
@ -778,6 +780,12 @@ class PromptExecutor:
|
||||
execution_list.unstage_node_execution()
|
||||
else: # result == ExecutionResult.SUCCESS:
|
||||
execution_list.complete_node_execution()
|
||||
if node_start_s is not None:
|
||||
class_type = dynamic_prompt.get_node(node_id).get("class_type", "unknown")
|
||||
self.node_timing_ms[node_id] = {
|
||||
"class_type": class_type,
|
||||
"execution_ms": (time.perf_counter() - node_start_s) * 1000.0,
|
||||
}
|
||||
|
||||
if self.cache_type == CacheType.RAM_PRESSURE:
|
||||
comfy.model_management.free_memory(0, None, pins_required=ram_headroom, ram_required=ram_headroom)
|
||||
|
||||
11
main.py
11
main.py
@ -316,12 +316,19 @@ def prompt_worker(q, server_instance):
|
||||
extra_data = item[3].copy()
|
||||
for k in sensitive:
|
||||
extra_data[k] = sensitive[k]
|
||||
benchmark_mode = args.benchmark_server_only
|
||||
|
||||
asset_seeder.pause()
|
||||
e.execute(item[2], prompt_id, extra_data, item[4])
|
||||
|
||||
need_gc = True
|
||||
|
||||
if benchmark_mode:
|
||||
e.history_result["benchmark"] = {
|
||||
"execution_ms": (time.perf_counter() - execution_start_time) * 1000.0,
|
||||
"nodes": e.node_timing_ms,
|
||||
}
|
||||
|
||||
remove_sensitive = lambda prompt: prompt[:5] + prompt[6:]
|
||||
q.task_done(item_id,
|
||||
e.history_result,
|
||||
@ -337,8 +344,8 @@ def prompt_worker(q, server_instance):
|
||||
|
||||
# Log Time in a more readable way after 10 minutes
|
||||
if execution_time > 600:
|
||||
execution_time = time.strftime("%H:%M:%S", time.gmtime(execution_time))
|
||||
logging.info(f"Prompt executed in {execution_time}")
|
||||
execution_time_formatted = time.strftime("%H:%M:%S", time.gmtime(execution_time))
|
||||
logging.info(f"Prompt executed in {execution_time_formatted}")
|
||||
else:
|
||||
logging.info("Prompt executed in {:.2f} seconds".format(execution_time))
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user