From c02b5d4c1ece453d57529cb306fd0f0c755d69cd Mon Sep 17 00:00:00 2001
From: Tara Ding <38710454+windtara0619@users.noreply.github.com>
Date: Mon, 27 Apr 2026 11:50:09 -0700
Subject: [PATCH] Generate prompt file automatically.

---
 benchmarks/benchmark_comfyui_serving.py | 458 +++++++++++++++++++++++-
 1 file changed, 457 insertions(+), 1 deletion(-)

diff --git a/benchmarks/benchmark_comfyui_serving.py b/benchmarks/benchmark_comfyui_serving.py
index 07d498c21..6431be2bc 100644
--- a/benchmarks/benchmark_comfyui_serving.py
+++ b/benchmarks/benchmark_comfyui_serving.py
@@ -7,6 +7,41 @@ This script is inspired by diffusion serving benchmarks and is designed to:
   - optionally shape request arrivals (fixed rate or Poisson),
   - poll completion via /history/{prompt_id},
   - report latency/throughput/error metrics.
+
+Usage — Wan 2.2 I2V benchmark
+==============================
+
+Step 1 — Generate prompt files (downloads images, writes JSONs, then exits):
+
+  # Minimal: uses synthetic images, writes to prompts/wan22_i2v/
+  python3 benchmarks/benchmark_comfyui_serving.py \\
+    --generate-wan22-prompts \\
+    --num-requests 50
+
+  # With model download (needs ComfyUI root):
+  python3 benchmarks/benchmark_comfyui_serving.py \\
+    --generate-wan22-prompts \\
+    --download-models \\
+    --comfyui-base-dir /path/to/ComfyUI \\
+    --num-requests 50
+
+  # Custom image/output dirs:
+  python3 benchmarks/benchmark_comfyui_serving.py \\
+    --generate-wan22-prompts \\
+    --wan22-input-dir /data/images \\
+    --wan22-output-dir /data/prompts/wan22 \\
+    --wan22-num-images 30 \\
+    --num-requests 50
+
+Step 2 — Run the benchmark (point at any one of the generated prompt files):
+
+  python3 benchmarks/benchmark_comfyui_serving.py \\
+    --prompt-file prompts/wan22_i2v/wan22_i2v_prompt_0000.json \\
+    --num-requests 50 \\
+    --max-concurrency 4 \\
+    --host http://127.0.0.1:8188
+
+The setup step also prints the exact run command at the end, so you can copy it directly.
 """
 
 from __future__ import annotations
@@ -17,7 +52,9 @@ import json
 import math
 import random
 import statistics
+import subprocess
 import time
+import urllib.request
 import uuid
 from dataclasses import dataclass, asdict
 from pathlib import Path
@@ -26,6 +63,374 @@ from typing import Any
 import aiohttp
 
 
+# ──────────────────────────────────────────────────────────────────────────────
+# Wan 2.2 I2V benchmark setup helpers
+# ──────────────────────────────────────────────────────────────────────────────
+
+_WAN22_MODELS: list[tuple[str, str]] = [
+    (
+        "models/diffusion_models/wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors",
+        "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/diffusion_models/wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors",
+    ),
+    (
+        "models/diffusion_models/wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors",
+        "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/diffusion_models/wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors",
+    ),
+    (
+        "models/loras/wan2.2_i2v_lightx2v_4steps_lora_v1_high_noise.safetensors",
+        "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/loras/wan2.2_i2v_lightx2v_4steps_lora_v1_high_noise.safetensors",
+    ),
+    (
+        "models/loras/wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors",
+        "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/loras/wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors",
+    ),
+    (
+        "models/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors",
+        "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors",
+    ),
+    (
+        "models/vae/wan_2.1_vae.safetensors",
+        "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/vae/wan_2.1_vae.safetensors",
+    ),
+]
+
+# Placeholder sentinel replaced by generate_prompt_file.
+_IMAGE_PLACEHOLDER = "__INPUT_IMAGE__"
+
+_WAN22_I2V_GRAPH: dict[str, Any] = {
+    "97": {
+        "inputs": {"image": _IMAGE_PLACEHOLDER},
+        "class_type": "LoadImage",
+        "_meta": {"title": "Start Frame Image"},
+    },
+    "108": {
+        "inputs": {
+            "filename_prefix": "video/Wan2.2_image_to_video",
+            "format": "auto",
+            "codec": "auto",
+            "video-preview": "",
+            "video": ["130:117", 0],
+        },
+        "class_type": "SaveVideo",
+        "_meta": {"title": "Save Video"},
+    },
+    "130:105": {
+        "inputs": {
+            "clip_name": "umt5_xxl_fp8_e4m3fn_scaled.safetensors",
+            "type": "wan",
+            "device": "default",
+        },
+        "class_type": "CLIPLoader",
+        "_meta": {"title": "Load CLIP"},
+    },
+    "130:106": {
+        "inputs": {"vae_name": "wan_2.1_vae.safetensors"},
+        "class_type": "VAELoader",
+        "_meta": {"title": "Load VAE"},
+    },
+    "130:107": {
+        "inputs": {
+            "text": "A felt-style little eagle cashier greeting, waving, and smiling at the camera.",
+            "clip": ["130:105", 0],
+        },
+        "class_type": "CLIPTextEncode",
+        "_meta": {"title": "CLIP Text Encode (Positive Prompt)"},
+    },
+    "130:109": {
+        "inputs": {"shift": 5.000000000000001, "model": ["130:126", 0]},
+        "class_type": "ModelSamplingSD3",
+        "_meta": {"title": "ModelSamplingSD3"},
+    },
+    "130:110": {
+        "inputs": {
+            "add_noise": "enable",
+            "noise_seed": 636787045983965,
+            "steps": 4,
+            "cfg": 1,
+            "sampler_name": "euler",
+            "scheduler": "simple",
+            "start_at_step": 0,
+            "end_at_step": 2,
+            "return_with_leftover_noise": "enable",
+            "model": ["130:109", 0],
+            "positive": ["130:128", 0],
+            "negative": ["130:128", 1],
+            "latent_image": ["130:128", 2],
+        },
+        "class_type": "KSamplerAdvanced",
+        "_meta": {"title": "KSampler (Advanced)"},
+    },
+    "130:111": {
+        "inputs": {
+            "add_noise": "disable",
+            "noise_seed": 0,
+            "steps": 4,
+            "cfg": 1,
+            "sampler_name": "euler",
+            "scheduler": "simple",
+            "start_at_step": 2,
+            "end_at_step": 4,
+            "return_with_leftover_noise": "disable",
+            "model": ["130:124", 0],
+            "positive": ["130:128", 0],
+            "negative": ["130:128", 1],
+            "latent_image": ["130:110", 0],
+        },
+        "class_type": "KSamplerAdvanced",
+        "_meta": {"title": "KSampler (Advanced)"},
+    },
+    "130:117": {
+        "inputs": {"fps": 16, "images": ["130:129", 0]},
+        "class_type": "CreateVideo",
+        "_meta": {"title": "Create Video"},
+    },
+    "130:122": {
+        "inputs": {
+            "unet_name": "wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors",
+            "weight_dtype": "default",
+        },
+        "class_type": "UNETLoader",
+        "_meta": {"title": "Load Diffusion Model"},
+    },
+    "130:123": {
+        "inputs": {
+            "unet_name": "wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors",
+            "weight_dtype": "default",
+        },
+        "class_type": "UNETLoader",
+        "_meta": {"title": "Load Diffusion Model"},
+    },
+    "130:124": {
+        "inputs": {"shift": 5.000000000000001, "model": ["130:127", 0]},
+        "class_type": "ModelSamplingSD3",
+        "_meta": {"title": "ModelSamplingSD3"},
+    },
+    "130:125": {
+        "inputs": {
+            "text": (
+                "色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，"
+                "JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，"
+                "形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走"
+            ),
+            "clip": ["130:105", 0],
+        },
+        "class_type": "CLIPTextEncode",
+        "_meta": {"title": "CLIP Text Encode (Negative Prompt)"},
+    },
+    "130:126": {
+        "inputs": {
+            "lora_name": "wan2.2_i2v_lightx2v_4steps_lora_v1_high_noise.safetensors",
+            "strength_model": 1.0000000000000002,
+            "model": ["130:122", 0],
+        },
+        "class_type": "LoraLoaderModelOnly",
+        "_meta": {"title": "Load LoRA"},
+    },
+    "130:127": {
+        "inputs": {
+            "lora_name": "wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors",
+            "strength_model": 1.0000000000000002,
+            "model": ["130:123", 0],
+        },
+        "class_type": "LoraLoaderModelOnly",
+        "_meta": {"title": "Load LoRA"},
+    },
+    "130:128": {
+        "inputs": {
+            "width": 720,
+            "height": 720,
+            "length": 81,
+            "batch_size": 1,
+            "positive": ["130:107", 0],
+            "negative": ["130:125", 0],
+            "vae": ["130:106", 0],
+            "start_image": ["97", 0],
+        },
+        "class_type": "WanImageToVideo",
+        "_meta": {"title": "WanImageToVideo"},
+    },
+    "130:129": {
+        "inputs": {"samples": ["130:111", 0], "vae": ["130:106", 0]},
+        "class_type": "VAEDecode",
+        "_meta": {"title": "VAE Decode"},
+    },
+}
+
+_VBENCH_I2V_JSON_URL = (
+    "https://raw.githubusercontent.com/Vchitect/VBench/master/vbench2_beta_i2v/i2v-bench-info.json"
+)
+
+
+def download_wan22_models(base_dir: Path) -> None:
+    """Download Wan 2.2 I2V model files into *base_dir* using wget."""
+    for rel_path, url in _WAN22_MODELS:
+        dest = base_dir / rel_path
+        if dest.exists():
+            print(f"[setup] already exists, skipping: {dest}")
+            continue
+        dest.parent.mkdir(parents=True, exist_ok=True)
+        print(f"[setup] downloading {dest.name} ...")
+        subprocess.run(["wget", "-O", str(dest), url], check=True)
+
+
+def _try_download_vbench_i2v(input_dir: Path) -> list[str]:
+    """
+    Attempt to fetch VBench I2V images via huggingface_hub.
+    Returns image basenames placed in *input_dir*, or [] on failure.
+    """
+    try:
+        from huggingface_hub import snapshot_download  # type: ignore
+    except ImportError:
+        print("[setup] huggingface_hub not available; skipping VBench download.")
+        return []
+
+    try:
+        print("[setup] downloading Vchitect/VBench_I2V dataset from HuggingFace ...")
+        cache_dir = input_dir / "_vbench_cache"
+        local = snapshot_download(
+            repo_id="Vchitect/VBench_I2V",
+            repo_type="dataset",
+            local_dir=str(cache_dir),
+        )
+    except Exception as exc:
+        print(f"[setup] VBench I2V download failed: {exc}")
+        return []
+
+    image_exts = {".png", ".jpg", ".jpeg", ".webp"}
+    found = sorted(p for p in Path(local).rglob("*") if p.suffix.lower() in image_exts)
+    if not found:
+        return []
+
+    import shutil
+
+    filenames: list[str] = []
+    for src in found:
+        dest = input_dir / src.name
+        if not dest.exists():
+            shutil.copy2(str(src), str(dest))
+        filenames.append(src.name)
+
+    print(f"[setup] prepared {len(filenames)} VBench I2V images in {input_dir}")
+    return filenames
+
+
+def _generate_synthetic_images(input_dir: Path, num_images: int) -> list[str]:
+    """Generate synthetic 720×720 white PNG placeholders; returns filenames."""
+    try:
+        from PIL import Image as PILImage  # type: ignore
+    except ImportError:
+        raise RuntimeError(
+            "Pillow is required for synthetic image generation. "
+            "Install it with: pip install Pillow"
+        )
+
+    filenames: list[str] = []
+    for i in range(num_images):
+        fname = f"benchmark_input_{i:04d}.png"
+        dest = input_dir / fname
+        if not dest.exists():
+            PILImage.new("RGB", (720, 720), color=(255, 255, 255)).save(str(dest))
+        filenames.append(fname)
+    return filenames
+
+
+def prepare_input_images(input_dir: Path, num_images: int = 20) -> list[str]:
+    """
+    Prepare benchmark input images in *input_dir*.
+
+    Priority:
+      1. Reuse any images already present in the directory.
+      2. Download Vchitect/VBench_I2V dataset via huggingface_hub.
+      3. Generate synthetic 720×720 white PNG placeholders with Pillow.
+
+    Returns a list of image basenames (not full paths).
+    """
+    input_dir.mkdir(parents=True, exist_ok=True)
+    image_exts = {".png", ".jpg", ".jpeg", ".webp"}
+
+    existing = sorted(
+        p.name for p in input_dir.iterdir() if p.suffix.lower() in image_exts
+    )
+    if existing:
+        print(f"[setup] found {len(existing)} existing images in {input_dir}")
+        return existing
+
+    filenames = _try_download_vbench_i2v(input_dir)
+    if filenames:
+        return filenames
+
+    print(f"[setup] generating {num_images} synthetic 720×720 placeholder images ...")
+    return _generate_synthetic_images(input_dir, num_images)
+
+
+def generate_prompt_file(
+    output_path: Path,
+    image_filename: str,
+    positive_prompt: str | None = None,
+) -> None:
+    """
+    Write a single Wan 2.2 I2V ComfyUI prompt JSON to *output_path*.
+
+    *image_filename* is substituted into the LoadImage node (node "97").
+    *positive_prompt* overrides the default positive text if provided.
+    """
+    graph: dict[str, Any] = json.loads(json.dumps(_WAN22_I2V_GRAPH))
+    graph["97"]["inputs"]["image"] = image_filename
+    if positive_prompt is not None:
+        graph["130:107"]["inputs"]["text"] = positive_prompt
+
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    output_path.write_text(json.dumps({"prompt": graph}, indent=2))
+
+
+def generate_prompt_files(
+    output_dir: Path,
+    input_dir: Path,
+    num_prompts: int = 50,
+    num_images: int = 20,
+    download_models: bool = False,
+    comfyui_base_dir: Path | None = None,
+) -> list[Path]:
+    """
+    Full Wan 2.2 I2V benchmark setup:
+
+      1. Optionally download model weights into *comfyui_base_dir*.
+      2. Prepare input images in *input_dir* (VBench I2V or synthetic).
+      3. Generate *num_prompts* prompt JSON files in *output_dir*, cycling
+         through the available images.
+
+    Returns the list of generated prompt file paths.
+    """
+    if download_models:
+        if comfyui_base_dir is None:
+            raise ValueError("--comfyui-base-dir is required when --download-models is set")
+        download_wan22_models(comfyui_base_dir)
+
+    image_filenames = prepare_input_images(input_dir, num_images=num_images)
+    if not image_filenames:
+        raise RuntimeError(f"No input images available in {input_dir}")
+
+    output_dir.mkdir(parents=True, exist_ok=True)
+    generated: list[Path] = []
+    for i in range(num_prompts):
+        image_name = image_filenames[i % len(image_filenames)]
+        prompt_path = output_dir / f"wan22_i2v_prompt_{i:04d}.json"
+        generate_prompt_file(prompt_path, image_name)
+        generated.append(prompt_path)
+
+    print(f"[setup] generated {len(generated)} prompt files in {output_dir}")
+    print(f"[setup] example run:")
+    print(
+        f"  python benchmark_comfyui_serving.py"
+        f" --prompt-file {generated[0]}"
+        f" --num-requests {num_prompts}"
+    )
+    return generated
+
+
+# ──────────────────────────────────────────────────────────────────────────────
+
+
 @dataclass
 class RequestResult:
     request_index: int
@@ -302,7 +707,46 @@ def parse_args() -> argparse.Namespace:
         choices=("/prompt", "/bench/prompt"),
         help="Submission endpoint.",
     )
-    p.add_argument("--prompt-file", type=Path, required=True, help="Path to prompt JSON.")
+    p.add_argument(
+        "--prompt-file",
+        type=Path,
+        default=None,
+        help="Path to prompt JSON. Required unless --generate-wan22-prompts is set.",
+    )
+    p.add_argument(
+        "--generate-wan22-prompts",
+        action="store_true",
+        help="Generate Wan 2.2 I2V prompt files (steps: prepare images, write JSONs) then exit.",
+    )
+    p.add_argument(
+        "--wan22-input-dir",
+        type=Path,
+        default=Path("inputs"),
+        help="Directory for benchmark input images (default: inputs/).",
+    )
+    p.add_argument(
+        "--wan22-output-dir",
+        type=Path,
+        default=Path("prompts/wan22_i2v"),
+        help="Directory where generated prompt JSON files are written (default: prompts/wan22_i2v/).",
+    )
+    p.add_argument(
+        "--wan22-num-images",
+        type=int,
+        default=20,
+        help="Number of synthetic images to generate when VBench download is unavailable (default: 20).",
+    )
+    p.add_argument(
+        "--download-models",
+        action="store_true",
+        help="Download Wan 2.2 model weights before generating prompts (requires --comfyui-base-dir).",
+    )
+    p.add_argument(
+        "--comfyui-base-dir",
+        type=Path,
+        default=None,
+        help="ComfyUI root directory used as the base for model downloads.",
+    )
     p.add_argument("--num-requests", type=int, default=50)
     p.add_argument("--max-concurrency", type=int, default=8)
     p.add_argument("--request-rate", type=float, default=0.0, help="Requests/sec. 0 = fire immediately.")
@@ -323,6 +767,8 @@ def parse_args() -> argparse.Namespace:
 
 
 async def async_main(args: argparse.Namespace) -> None:
+    if args.prompt_file is None:
+        raise SystemExit("error: --prompt-file is required (or use --generate-wan22-prompts to create one)")
     prompt_template = load_prompt_template(args.prompt_file)
     schedule = build_arrival_schedule(
         num_requests=args.num_requests,
@@ -367,6 +813,16 @@ async def async_main(args: argparse.Namespace) -> None:
 
 def main() -> None:
     args = parse_args()
+    if args.generate_wan22_prompts:
+        generate_prompt_files(
+            output_dir=args.wan22_output_dir,
+            input_dir=args.wan22_input_dir,
+            num_prompts=args.num_requests,
+            num_images=args.wan22_num_images,
+            download_models=args.download_models,
+            comfyui_base_dir=args.comfyui_base_dir,
+        )
+        return
     asyncio.run(async_main(args))