From ac85d7887f23372b3ed4fb3f2234546685af0e0b Mon Sep 17 00:00:00 2001 From: Tara Ding <38710454+windtara0619@users.noreply.github.com> Date: Fri, 24 Apr 2026 16:24:28 -0700 Subject: [PATCH 01/27] Add benchmark for model serving --- benchmarks/benchmarking_model_serving.py | 444 +++++++++++++++++++++++ 1 file changed, 444 insertions(+) create mode 100644 benchmarks/benchmarking_model_serving.py diff --git a/benchmarks/benchmarking_model_serving.py b/benchmarks/benchmarking_model_serving.py new file mode 100644 index 000000000..493d0a574 --- /dev/null +++ b/benchmarks/benchmarking_model_serving.py @@ -0,0 +1,444 @@ +from __future__ import annotations + +import argparse +import asyncio +import dataclasses +import json +import math +import os +import random +import statistics +import sys +import time +from pathlib import Path +from typing import Any, Dict, List, Optional + +import torch + +# Make the repo root importable when running directly from the benchmarks/ dir. +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import comfy.model_management +import comfy.sd + + +# ----------------------------- +# Data models +# ----------------------------- + +@dataclasses.dataclass +class RequestSpec: + profile_name: str + batch_size: int + width: int + height: int + num_frames: int + steps: int + cfg_scale: float + seed: int + timeout_s: float = 180.0 + extra: Dict[str, Any] = dataclasses.field(default_factory=dict) + + +@dataclasses.dataclass +class RequestResult: + request_id: int + profile_name: str + ok: bool + error: Optional[str] + latency_ms: float + queue_wait_ms: float + step_latencies_ms: List[float] + ttfs_ms: float # time to first (denoising) step + peak_vram_mb: float + est_mem_mb: Optional[float] + started_at: float + ended_at: float + + +@dataclasses.dataclass +class RunSummary: + total_requests: int + success: int + failed: int + throughput_req_s: float + p50_ms: float + p90_ms: float + p95_ms: float + p99_ms: float + mean_ms: float + ttfs_p50_ms: float + ttfs_p99_ms: float + step_mean_ms: float + step_p99_ms: float + max_vram_mb: float + + +# ----------------------------- +# Helpers +# ----------------------------- + +def percentile(values: List[float], p: float) -> float: + if not values: + return float("nan") + values = sorted(values) + k = (len(values) - 1) * (p / 100.0) + f = math.floor(k) + c = math.ceil(k) + if f == c: + return values[int(k)] + return values[f] * (c - k) + values[c] * (k - f) + + +def now() -> float: + return time.perf_counter() + + +def gpu_peak_mb() -> float: + if not torch.cuda.is_available(): + return 0.0 + return torch.cuda.max_memory_allocated() / (1024 ** 2) + + +def reset_gpu_peak() -> None: + if torch.cuda.is_available(): + torch.cuda.reset_peak_memory_stats() + + +def sync_cuda() -> None: + if torch.cuda.is_available(): + torch.cuda.synchronize() + + +def build_request_stream( + num_requests: int, + base_seed: int, + profiles: List[RequestSpec], + weighted: Optional[List[float]] = None, +) -> List[RequestSpec]: + rnd = random.Random(base_seed) + out: List[RequestSpec] = [] + for i in range(num_requests): + p = rnd.choices(profiles, weights=weighted, k=1)[0] + out.append(dataclasses.replace(p, seed=base_seed + i)) + return out + + +# ----------------------------- +# Model adapter +# ----------------------------- + +class WanRunner: + """ + Thin adapter around ComfyUI model loading + the BaseModel.apply_model call path. + + Only the DiT denoiser is timed — no VAE encode/decode, no CLIP, no scheduler + overhead — so measurements reflect true model inference cost. + + Latent shape convention (WAN): [B, 16, T, H//8, W//8] + Text conditioning shape (UMT5): [B, text_seq_len, text_dim] (zeros for benchmarking) + Sigma schedule (flow-matching): linspace(1.0 → 1/steps, steps) + """ + + def __init__( + self, + checkpoint: str, + device: str, + dtype_str: str, + text_seq_len: int = 512, + text_dim: int = 4096, + ): + self.checkpoint = checkpoint + self.device_str = device + self.dtype_str = dtype_str + self.text_seq_len = text_seq_len + self.text_dim = text_dim + self.patcher, self.model = self._load_model() + + # ------------------------------------------------------------------ + # Internals + # ------------------------------------------------------------------ + + def _load_model(self): + dtype_map = { + "fp16": torch.float16, + "bf16": torch.bfloat16, + "fp32": torch.float32, + } + dtype = dtype_map.get(self.dtype_str) + model_opts = {"dtype": dtype} if dtype is not None else {} + + patcher = comfy.sd.load_diffusion_model(self.checkpoint, model_options=model_opts) + # force_full_load=True keeps the whole model resident on GPU rather than + # streaming weights on demand (important for latency benchmarking). + comfy.model_management.load_models_gpu([patcher], force_full_load=True) + return patcher, patcher.model + + def _estimate_mem_mb(self, latent_shape: tuple, text_seq_len: int) -> Optional[float]: + cond_shapes = { + "c_crossattn": [(latent_shape[0], text_seq_len, self.text_dim)], + } + try: + return self.model.memory_required(latent_shape, cond_shapes) / (1024 ** 2) + except Exception: + return None + + # ------------------------------------------------------------------ + # Single-request execution + # ------------------------------------------------------------------ + + @torch.inference_mode() + def run_one(self, req: RequestSpec) -> RequestResult: + start = now() + reset_gpu_peak() + + step_latencies: List[float] = [] + ttfs_ms = float("nan") + est_mem_mb: Optional[float] = None + ok = True + err = None + + try: + device = comfy.model_management.get_torch_device() + dtype = self.model.get_dtype_inference() + + # Latent noise tensor: [B, 16 channels, T frames, H/8, W/8] + latent_shape = ( + req.batch_size, 16, + req.num_frames, + req.height // 8, + req.width // 8, + ) + x = torch.randn(latent_shape, dtype=dtype, device=device) + est_mem_mb = self._estimate_mem_mb(latent_shape, self.text_seq_len) + + # Fake text conditioning — zeros have the right shape, non-zero + # values are not needed for throughput/latency benchmarking. + cross_attn = torch.zeros( + req.batch_size, self.text_seq_len, self.text_dim, + dtype=dtype, device=device, + ) + + # Linear sigma schedule: 1.0 → 1/steps (flow-matching, noise→clean) + sigmas = torch.linspace(1.0, 1.0 / req.steps, req.steps, device=device) + + for step_i, sigma_val in enumerate(sigmas): + sigma_t = sigma_val.expand(req.batch_size) + t0 = now() + x = self.model.apply_model(x, sigma_t, c_crossattn=cross_attn) + sync_cuda() + elapsed_ms = (now() - t0) * 1000.0 + step_latencies.append(elapsed_ms) + if step_i == 0: + ttfs_ms = elapsed_ms + + except Exception as e: + ok = False + err = repr(e) + + end = now() + return RequestResult( + request_id=-1, + profile_name=req.profile_name, + ok=ok, + error=err, + latency_ms=(end - start) * 1000.0, + queue_wait_ms=0.0, # filled in by the scheduler + step_latencies_ms=step_latencies, + ttfs_ms=ttfs_ms, + peak_vram_mb=gpu_peak_mb(), + est_mem_mb=est_mem_mb, + started_at=start, + ended_at=end, + ) + + +# ----------------------------- +# Serving-style scheduler +# ----------------------------- + +async def run_closed_loop( + runner: WanRunner, + requests: List[RequestSpec], + concurrency: int, + request_rate: float = float("inf"), +) -> List[RequestResult]: + """ + Closed-loop scheduler (default) or Poisson open-loop when request_rate is finite. + + Each request is dispatched to a thread so the asyncio event loop stays + free to issue the next request while the GPU is busy. + """ + sem = asyncio.Semaphore(concurrency) + results: List[Optional[RequestResult]] = [None] * len(requests) + + async def worker(i: int, req: RequestSpec) -> None: + async with sem: + t_enq = now() + res = await asyncio.to_thread(runner.run_one, req) + res.request_id = i + res.queue_wait_ms = max(0.0, (res.started_at - t_enq) * 1000.0) + results[i] = res + + if request_rate == float("inf") or request_rate <= 0: + await asyncio.gather(*(worker(i, r) for i, r in enumerate(requests))) + else: + tasks: List[asyncio.Task] = [] + for i, req in enumerate(requests): + if i > 0: + await asyncio.sleep(random.expovariate(request_rate)) + tasks.append(asyncio.create_task(worker(i, req))) + await asyncio.gather(*tasks) + + return [r for r in results if r is not None] + + +def summarize(results: List[RequestResult], wall_s: float) -> RunSummary: + lat = [r.latency_ms for r in results if r.ok] + ttfs = [r.ttfs_ms for r in results if r.ok and math.isfinite(r.ttfs_ms)] + all_steps = [s for r in results if r.ok for s in r.step_latencies_ms] + succ = sum(1 for r in results if r.ok) + fail = len(results) - succ + return RunSummary( + total_requests=len(results), + success=succ, + failed=fail, + throughput_req_s=(succ / wall_s) if wall_s > 0 else 0.0, + p50_ms=percentile(lat, 50), + p90_ms=percentile(lat, 90), + p95_ms=percentile(lat, 95), + p99_ms=percentile(lat, 99), + mean_ms=(statistics.mean(lat) if lat else float("nan")), + ttfs_p50_ms=percentile(ttfs, 50), + ttfs_p99_ms=percentile(ttfs, 99), + step_mean_ms=(statistics.mean(all_steps) if all_steps else float("nan")), + step_p99_ms=percentile(all_steps, 99), + max_vram_mb=max((r.peak_vram_mb for r in results), default=0.0), + ) + + +def print_summary( + args: argparse.Namespace, + summ: RunSummary, + total_requests: int, + wall_s: float, +) -> None: + w = 60 + sep = "-" * w + print("\n" + "=" * w) + print("{s:^{n}}".format(s=" WAN Benchmark Result ", n=w)) + print("=" * w) + print("{:<40} {:<}".format("Checkpoint:", Path(args.checkpoint).name)) + print("{:<40} {:<}".format("Device / dtype:", f"{args.device}/{args.dtype}")) + print("{:<40} {:<}".format("Concurrency:", args.concurrency)) + rate_str = f"{args.request_rate:.1f} req/s" if args.request_rate != float("inf") else "inf (closed-loop)" + print("{:<40} {:<}".format("Request rate:", rate_str)) + print(sep) + print("{:<40} {:<.2f}".format("Benchmark duration (s):", wall_s)) + print("{:<40} {}/{}".format("Successful requests:", summ.success, total_requests)) + if summ.failed: + print("{:<40} {:<}".format("Failed requests:", summ.failed)) + print(sep) + print("{:<40} {:<.3f}".format("Throughput (req/s):", summ.throughput_req_s)) + print("{:<40} {:<.1f}".format("Latency mean (ms):", summ.mean_ms)) + print("{:<40} {:<.1f}".format("Latency p50 (ms):", summ.p50_ms)) + print("{:<40} {:<.1f}".format("Latency p90 (ms):", summ.p90_ms)) + print("{:<40} {:<.1f}".format("Latency p95 (ms):", summ.p95_ms)) + print("{:<40} {:<.1f}".format("Latency p99 (ms):", summ.p99_ms)) + print(sep) + print("{:<40} {:<.1f}".format("TTFS p50 (ms):", summ.ttfs_p50_ms)) + print("{:<40} {:<.1f}".format("TTFS p99 (ms):", summ.ttfs_p99_ms)) + print("{:<40} {:<.1f}".format("Step latency mean (ms):", summ.step_mean_ms)) + print("{:<40} {:<.1f}".format("Step latency p99 (ms):", summ.step_p99_ms)) + print(sep) + print("{:<40} {:<.1f}".format("Peak VRAM (MB):", summ.max_vram_mb)) + print("=" * w) + + +# ----------------------------- +# CLI +# ----------------------------- + +def parse_args() -> argparse.Namespace: + p = argparse.ArgumentParser( + description="Benchmark ComfyUI WAN diffusion model denoising throughput and latency." + ) + p.add_argument( + "--checkpoint", required=True, + help="Path to the WAN diffusion-model checkpoint (.safetensors / .pt).", + ) + p.add_argument("--device", default="cuda") + p.add_argument("--dtype", default="fp16", choices=["fp16", "bf16", "fp32"]) + p.add_argument("--num-requests", type=int, default=100) + p.add_argument("--concurrency", type=int, default=4, + help="Max number of in-flight requests (semaphore width).") + p.add_argument( + "--request-rate", type=float, default=float("inf"), + help="Poisson arrival rate in req/s. inf = closed-loop (default).", + ) + p.add_argument("--warmup-requests", type=int, default=2, + help="Warmup iterations excluded from metrics.") + p.add_argument("--seed", type=int, default=1234) + p.add_argument("--text-seq-len", type=int, default=512, + help="Cross-attention sequence length (UMT5 default: 512).") + p.add_argument("--text-dim", type=int, default=4096, + help="Text embedding width (UMT5-XXL: 4096).") + p.add_argument("--out-dir", type=Path, default=Path("benchmarks/out")) + p.add_argument("--output-file", type=Path, default=None, + help="Override path for the summary JSON output.") + return p.parse_args() + + +def default_profiles() -> List[RequestSpec]: + return [ + RequestSpec("wan21_t2v_720p_16f_30s", 1, 1280, 720, 16, 30, 6.0, 0), + RequestSpec("wan21_t2v_720p_32f_30s", 1, 1280, 720, 32, 30, 6.0, 0), + RequestSpec("wan21_t2v_480p_32f_20s", 1, 854, 480, 32, 20, 6.0, 0), + ] + + +async def main_async() -> None: + args = parse_args() + args.out_dir.mkdir(parents=True, exist_ok=True) + + runner = WanRunner( + checkpoint=args.checkpoint, + device=args.device, + dtype_str=args.dtype, + text_seq_len=args.text_seq_len, + text_dim=args.text_dim, + ) + + all_reqs = build_request_stream( + args.num_requests + args.warmup_requests, + args.seed, + default_profiles(), + ) + warmup_reqs = all_reqs[: args.warmup_requests] + bench_reqs = all_reqs[args.warmup_requests :] + + if warmup_reqs: + print(f"Running {len(warmup_reqs)} warmup request(s)...") + for req in warmup_reqs: + runner.run_one(req) + print("Warmup complete.") + + print(f"Benchmarking {len(bench_reqs)} requests (concurrency={args.concurrency})...") + t0 = now() + results = await run_closed_loop(runner, bench_reqs, args.concurrency, args.request_rate) + wall_s = now() - t0 + + summ = summarize(results, wall_s) + print_summary(args, summ, len(bench_reqs), wall_s) + + out_file = args.output_file or (args.out_dir / "summary.json") + with open(args.out_dir / "requests.jsonl", "w") as f: + for r in results: + f.write(json.dumps(dataclasses.asdict(r)) + "\n") + with open(out_file, "w") as f: + json.dump(dataclasses.asdict(summ), f, indent=2) + print(f"\nResults written to {args.out_dir}/") + + +if __name__ == "__main__": + asyncio.run(main_async()) From 96363fa74a48612ee855611eeb656be650885daa Mon Sep 17 00:00:00 2001 From: Tara Ding <38710454+windtara0619@users.noreply.github.com> Date: Sun, 26 Apr 2026 16:48:58 -0700 Subject: [PATCH 02/27] Revert "Add benchmark for model serving" This reverts commit ac85d7887f23372b3ed4fb3f2234546685af0e0b. --- benchmarks/benchmarking_model_serving.py | 444 ----------------------- 1 file changed, 444 deletions(-) delete mode 100644 benchmarks/benchmarking_model_serving.py diff --git a/benchmarks/benchmarking_model_serving.py b/benchmarks/benchmarking_model_serving.py deleted file mode 100644 index 493d0a574..000000000 --- a/benchmarks/benchmarking_model_serving.py +++ /dev/null @@ -1,444 +0,0 @@ -from __future__ import annotations - -import argparse -import asyncio -import dataclasses -import json -import math -import os -import random -import statistics -import sys -import time -from pathlib import Path -from typing import Any, Dict, List, Optional - -import torch - -# Make the repo root importable when running directly from the benchmarks/ dir. -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -import comfy.model_management -import comfy.sd - - -# ----------------------------- -# Data models -# ----------------------------- - -@dataclasses.dataclass -class RequestSpec: - profile_name: str - batch_size: int - width: int - height: int - num_frames: int - steps: int - cfg_scale: float - seed: int - timeout_s: float = 180.0 - extra: Dict[str, Any] = dataclasses.field(default_factory=dict) - - -@dataclasses.dataclass -class RequestResult: - request_id: int - profile_name: str - ok: bool - error: Optional[str] - latency_ms: float - queue_wait_ms: float - step_latencies_ms: List[float] - ttfs_ms: float # time to first (denoising) step - peak_vram_mb: float - est_mem_mb: Optional[float] - started_at: float - ended_at: float - - -@dataclasses.dataclass -class RunSummary: - total_requests: int - success: int - failed: int - throughput_req_s: float - p50_ms: float - p90_ms: float - p95_ms: float - p99_ms: float - mean_ms: float - ttfs_p50_ms: float - ttfs_p99_ms: float - step_mean_ms: float - step_p99_ms: float - max_vram_mb: float - - -# ----------------------------- -# Helpers -# ----------------------------- - -def percentile(values: List[float], p: float) -> float: - if not values: - return float("nan") - values = sorted(values) - k = (len(values) - 1) * (p / 100.0) - f = math.floor(k) - c = math.ceil(k) - if f == c: - return values[int(k)] - return values[f] * (c - k) + values[c] * (k - f) - - -def now() -> float: - return time.perf_counter() - - -def gpu_peak_mb() -> float: - if not torch.cuda.is_available(): - return 0.0 - return torch.cuda.max_memory_allocated() / (1024 ** 2) - - -def reset_gpu_peak() -> None: - if torch.cuda.is_available(): - torch.cuda.reset_peak_memory_stats() - - -def sync_cuda() -> None: - if torch.cuda.is_available(): - torch.cuda.synchronize() - - -def build_request_stream( - num_requests: int, - base_seed: int, - profiles: List[RequestSpec], - weighted: Optional[List[float]] = None, -) -> List[RequestSpec]: - rnd = random.Random(base_seed) - out: List[RequestSpec] = [] - for i in range(num_requests): - p = rnd.choices(profiles, weights=weighted, k=1)[0] - out.append(dataclasses.replace(p, seed=base_seed + i)) - return out - - -# ----------------------------- -# Model adapter -# ----------------------------- - -class WanRunner: - """ - Thin adapter around ComfyUI model loading + the BaseModel.apply_model call path. - - Only the DiT denoiser is timed — no VAE encode/decode, no CLIP, no scheduler - overhead — so measurements reflect true model inference cost. - - Latent shape convention (WAN): [B, 16, T, H//8, W//8] - Text conditioning shape (UMT5): [B, text_seq_len, text_dim] (zeros for benchmarking) - Sigma schedule (flow-matching): linspace(1.0 → 1/steps, steps) - """ - - def __init__( - self, - checkpoint: str, - device: str, - dtype_str: str, - text_seq_len: int = 512, - text_dim: int = 4096, - ): - self.checkpoint = checkpoint - self.device_str = device - self.dtype_str = dtype_str - self.text_seq_len = text_seq_len - self.text_dim = text_dim - self.patcher, self.model = self._load_model() - - # ------------------------------------------------------------------ - # Internals - # ------------------------------------------------------------------ - - def _load_model(self): - dtype_map = { - "fp16": torch.float16, - "bf16": torch.bfloat16, - "fp32": torch.float32, - } - dtype = dtype_map.get(self.dtype_str) - model_opts = {"dtype": dtype} if dtype is not None else {} - - patcher = comfy.sd.load_diffusion_model(self.checkpoint, model_options=model_opts) - # force_full_load=True keeps the whole model resident on GPU rather than - # streaming weights on demand (important for latency benchmarking). - comfy.model_management.load_models_gpu([patcher], force_full_load=True) - return patcher, patcher.model - - def _estimate_mem_mb(self, latent_shape: tuple, text_seq_len: int) -> Optional[float]: - cond_shapes = { - "c_crossattn": [(latent_shape[0], text_seq_len, self.text_dim)], - } - try: - return self.model.memory_required(latent_shape, cond_shapes) / (1024 ** 2) - except Exception: - return None - - # ------------------------------------------------------------------ - # Single-request execution - # ------------------------------------------------------------------ - - @torch.inference_mode() - def run_one(self, req: RequestSpec) -> RequestResult: - start = now() - reset_gpu_peak() - - step_latencies: List[float] = [] - ttfs_ms = float("nan") - est_mem_mb: Optional[float] = None - ok = True - err = None - - try: - device = comfy.model_management.get_torch_device() - dtype = self.model.get_dtype_inference() - - # Latent noise tensor: [B, 16 channels, T frames, H/8, W/8] - latent_shape = ( - req.batch_size, 16, - req.num_frames, - req.height // 8, - req.width // 8, - ) - x = torch.randn(latent_shape, dtype=dtype, device=device) - est_mem_mb = self._estimate_mem_mb(latent_shape, self.text_seq_len) - - # Fake text conditioning — zeros have the right shape, non-zero - # values are not needed for throughput/latency benchmarking. - cross_attn = torch.zeros( - req.batch_size, self.text_seq_len, self.text_dim, - dtype=dtype, device=device, - ) - - # Linear sigma schedule: 1.0 → 1/steps (flow-matching, noise→clean) - sigmas = torch.linspace(1.0, 1.0 / req.steps, req.steps, device=device) - - for step_i, sigma_val in enumerate(sigmas): - sigma_t = sigma_val.expand(req.batch_size) - t0 = now() - x = self.model.apply_model(x, sigma_t, c_crossattn=cross_attn) - sync_cuda() - elapsed_ms = (now() - t0) * 1000.0 - step_latencies.append(elapsed_ms) - if step_i == 0: - ttfs_ms = elapsed_ms - - except Exception as e: - ok = False - err = repr(e) - - end = now() - return RequestResult( - request_id=-1, - profile_name=req.profile_name, - ok=ok, - error=err, - latency_ms=(end - start) * 1000.0, - queue_wait_ms=0.0, # filled in by the scheduler - step_latencies_ms=step_latencies, - ttfs_ms=ttfs_ms, - peak_vram_mb=gpu_peak_mb(), - est_mem_mb=est_mem_mb, - started_at=start, - ended_at=end, - ) - - -# ----------------------------- -# Serving-style scheduler -# ----------------------------- - -async def run_closed_loop( - runner: WanRunner, - requests: List[RequestSpec], - concurrency: int, - request_rate: float = float("inf"), -) -> List[RequestResult]: - """ - Closed-loop scheduler (default) or Poisson open-loop when request_rate is finite. - - Each request is dispatched to a thread so the asyncio event loop stays - free to issue the next request while the GPU is busy. - """ - sem = asyncio.Semaphore(concurrency) - results: List[Optional[RequestResult]] = [None] * len(requests) - - async def worker(i: int, req: RequestSpec) -> None: - async with sem: - t_enq = now() - res = await asyncio.to_thread(runner.run_one, req) - res.request_id = i - res.queue_wait_ms = max(0.0, (res.started_at - t_enq) * 1000.0) - results[i] = res - - if request_rate == float("inf") or request_rate <= 0: - await asyncio.gather(*(worker(i, r) for i, r in enumerate(requests))) - else: - tasks: List[asyncio.Task] = [] - for i, req in enumerate(requests): - if i > 0: - await asyncio.sleep(random.expovariate(request_rate)) - tasks.append(asyncio.create_task(worker(i, req))) - await asyncio.gather(*tasks) - - return [r for r in results if r is not None] - - -def summarize(results: List[RequestResult], wall_s: float) -> RunSummary: - lat = [r.latency_ms for r in results if r.ok] - ttfs = [r.ttfs_ms for r in results if r.ok and math.isfinite(r.ttfs_ms)] - all_steps = [s for r in results if r.ok for s in r.step_latencies_ms] - succ = sum(1 for r in results if r.ok) - fail = len(results) - succ - return RunSummary( - total_requests=len(results), - success=succ, - failed=fail, - throughput_req_s=(succ / wall_s) if wall_s > 0 else 0.0, - p50_ms=percentile(lat, 50), - p90_ms=percentile(lat, 90), - p95_ms=percentile(lat, 95), - p99_ms=percentile(lat, 99), - mean_ms=(statistics.mean(lat) if lat else float("nan")), - ttfs_p50_ms=percentile(ttfs, 50), - ttfs_p99_ms=percentile(ttfs, 99), - step_mean_ms=(statistics.mean(all_steps) if all_steps else float("nan")), - step_p99_ms=percentile(all_steps, 99), - max_vram_mb=max((r.peak_vram_mb for r in results), default=0.0), - ) - - -def print_summary( - args: argparse.Namespace, - summ: RunSummary, - total_requests: int, - wall_s: float, -) -> None: - w = 60 - sep = "-" * w - print("\n" + "=" * w) - print("{s:^{n}}".format(s=" WAN Benchmark Result ", n=w)) - print("=" * w) - print("{:<40} {:<}".format("Checkpoint:", Path(args.checkpoint).name)) - print("{:<40} {:<}".format("Device / dtype:", f"{args.device}/{args.dtype}")) - print("{:<40} {:<}".format("Concurrency:", args.concurrency)) - rate_str = f"{args.request_rate:.1f} req/s" if args.request_rate != float("inf") else "inf (closed-loop)" - print("{:<40} {:<}".format("Request rate:", rate_str)) - print(sep) - print("{:<40} {:<.2f}".format("Benchmark duration (s):", wall_s)) - print("{:<40} {}/{}".format("Successful requests:", summ.success, total_requests)) - if summ.failed: - print("{:<40} {:<}".format("Failed requests:", summ.failed)) - print(sep) - print("{:<40} {:<.3f}".format("Throughput (req/s):", summ.throughput_req_s)) - print("{:<40} {:<.1f}".format("Latency mean (ms):", summ.mean_ms)) - print("{:<40} {:<.1f}".format("Latency p50 (ms):", summ.p50_ms)) - print("{:<40} {:<.1f}".format("Latency p90 (ms):", summ.p90_ms)) - print("{:<40} {:<.1f}".format("Latency p95 (ms):", summ.p95_ms)) - print("{:<40} {:<.1f}".format("Latency p99 (ms):", summ.p99_ms)) - print(sep) - print("{:<40} {:<.1f}".format("TTFS p50 (ms):", summ.ttfs_p50_ms)) - print("{:<40} {:<.1f}".format("TTFS p99 (ms):", summ.ttfs_p99_ms)) - print("{:<40} {:<.1f}".format("Step latency mean (ms):", summ.step_mean_ms)) - print("{:<40} {:<.1f}".format("Step latency p99 (ms):", summ.step_p99_ms)) - print(sep) - print("{:<40} {:<.1f}".format("Peak VRAM (MB):", summ.max_vram_mb)) - print("=" * w) - - -# ----------------------------- -# CLI -# ----------------------------- - -def parse_args() -> argparse.Namespace: - p = argparse.ArgumentParser( - description="Benchmark ComfyUI WAN diffusion model denoising throughput and latency." - ) - p.add_argument( - "--checkpoint", required=True, - help="Path to the WAN diffusion-model checkpoint (.safetensors / .pt).", - ) - p.add_argument("--device", default="cuda") - p.add_argument("--dtype", default="fp16", choices=["fp16", "bf16", "fp32"]) - p.add_argument("--num-requests", type=int, default=100) - p.add_argument("--concurrency", type=int, default=4, - help="Max number of in-flight requests (semaphore width).") - p.add_argument( - "--request-rate", type=float, default=float("inf"), - help="Poisson arrival rate in req/s. inf = closed-loop (default).", - ) - p.add_argument("--warmup-requests", type=int, default=2, - help="Warmup iterations excluded from metrics.") - p.add_argument("--seed", type=int, default=1234) - p.add_argument("--text-seq-len", type=int, default=512, - help="Cross-attention sequence length (UMT5 default: 512).") - p.add_argument("--text-dim", type=int, default=4096, - help="Text embedding width (UMT5-XXL: 4096).") - p.add_argument("--out-dir", type=Path, default=Path("benchmarks/out")) - p.add_argument("--output-file", type=Path, default=None, - help="Override path for the summary JSON output.") - return p.parse_args() - - -def default_profiles() -> List[RequestSpec]: - return [ - RequestSpec("wan21_t2v_720p_16f_30s", 1, 1280, 720, 16, 30, 6.0, 0), - RequestSpec("wan21_t2v_720p_32f_30s", 1, 1280, 720, 32, 30, 6.0, 0), - RequestSpec("wan21_t2v_480p_32f_20s", 1, 854, 480, 32, 20, 6.0, 0), - ] - - -async def main_async() -> None: - args = parse_args() - args.out_dir.mkdir(parents=True, exist_ok=True) - - runner = WanRunner( - checkpoint=args.checkpoint, - device=args.device, - dtype_str=args.dtype, - text_seq_len=args.text_seq_len, - text_dim=args.text_dim, - ) - - all_reqs = build_request_stream( - args.num_requests + args.warmup_requests, - args.seed, - default_profiles(), - ) - warmup_reqs = all_reqs[: args.warmup_requests] - bench_reqs = all_reqs[args.warmup_requests :] - - if warmup_reqs: - print(f"Running {len(warmup_reqs)} warmup request(s)...") - for req in warmup_reqs: - runner.run_one(req) - print("Warmup complete.") - - print(f"Benchmarking {len(bench_reqs)} requests (concurrency={args.concurrency})...") - t0 = now() - results = await run_closed_loop(runner, bench_reqs, args.concurrency, args.request_rate) - wall_s = now() - t0 - - summ = summarize(results, wall_s) - print_summary(args, summ, len(bench_reqs), wall_s) - - out_file = args.output_file or (args.out_dir / "summary.json") - with open(args.out_dir / "requests.jsonl", "w") as f: - for r in results: - f.write(json.dumps(dataclasses.asdict(r)) + "\n") - with open(out_file, "w") as f: - json.dump(dataclasses.asdict(summ), f, indent=2) - print(f"\nResults written to {args.out_dir}/") - - -if __name__ == "__main__": - asyncio.run(main_async()) From 00379b4acf8c65822812b4ad5c7dd68ec6d3043d Mon Sep 17 00:00:00 2001 From: Tara Ding <38710454+windtara0619@users.noreply.github.com> Date: Sun, 26 Apr 2026 19:41:55 -0700 Subject: [PATCH 03/27] Move benchmark serving client into benchmarks folder --- benchmarks/benchmark_comfyui_serving.py | 374 ++++++++++++++++++++++++ comfy/cli_args.py | 1 + main.py | 37 ++- server.py | 78 ++++- 4 files changed, 475 insertions(+), 15 deletions(-) create mode 100644 benchmarks/benchmark_comfyui_serving.py diff --git a/benchmarks/benchmark_comfyui_serving.py b/benchmarks/benchmark_comfyui_serving.py new file mode 100644 index 000000000..07d498c21 --- /dev/null +++ b/benchmarks/benchmark_comfyui_serving.py @@ -0,0 +1,374 @@ +#!/usr/bin/env python3 +""" +Simple serving benchmark client for ComfyUI's HTTP API. + +This script is inspired by diffusion serving benchmarks and is designed to: + - submit prompts to ComfyUI (/prompt or /bench/prompt), + - optionally shape request arrivals (fixed rate or Poisson), + - poll completion via /history/{prompt_id}, + - report latency/throughput/error metrics. +""" + +from __future__ import annotations + +import argparse +import asyncio +import json +import math +import random +import statistics +import time +import uuid +from dataclasses import dataclass, asdict +from pathlib import Path +from typing import Any + +import aiohttp + + +@dataclass +class RequestResult: + request_index: int + prompt_id: str | None + ok: bool + error: str | None + queued_at: float + started_at: float + finished_at: float + end_to_end_s: float + queue_wait_ms: float | None + execution_ms: float | None + + +def percentile(values: list[float], pct: float) -> float: + if not values: + return float("nan") + if len(values) == 1: + return values[0] + values = sorted(values) + rank = (len(values) - 1) * (pct / 100.0) + lower = math.floor(rank) + upper = math.ceil(rank) + if lower == upper: + return values[lower] + weight = rank - lower + return values[lower] * (1.0 - weight) + values[upper] * weight + + +def patch_seed_in_prompt(prompt: dict[str, Any], seed: int, seed_path: str | None) -> dict[str, Any]: + """ + Patch prompt seed in-place for common sampler nodes. + seed_path format: ".". + """ + if seed_path: + try: + node_id, input_name = seed_path.split(".", 1) + prompt[node_id]["inputs"][input_name] = seed + return prompt + except Exception as exc: + raise ValueError(f"Invalid --seed-path '{seed_path}': {exc}") from exc + + # Best-effort fallback: update any input key named 'seed' or 'noise_seed' + for node in prompt.values(): + if not isinstance(node, dict): + continue + inputs = node.get("inputs") + if not isinstance(inputs, dict): + continue + if "seed" in inputs: + inputs["seed"] = seed + if "noise_seed" in inputs: + inputs["noise_seed"] = seed + return prompt + + +def load_prompt_template(path: Path) -> dict[str, Any]: + data = json.loads(path.read_text()) + if "prompt" in data and isinstance(data["prompt"], dict): + return data + if isinstance(data, dict): + return {"prompt": data} + raise ValueError("Prompt file must be a JSON object (prompt graph or wrapper with 'prompt').") + + +async def submit_prompt( + session: aiohttp.ClientSession, + base_url: str, + endpoint: str, + payload: dict[str, Any], + timeout_s: float, +) -> str: + url = f"{base_url}{endpoint}" + async with session.post(url, json=payload, timeout=timeout_s) as resp: + text = await resp.text() + if resp.status != 200: + raise RuntimeError(f"submit failed [{resp.status}] {text}") + body = json.loads(text) + prompt_id = body.get("prompt_id") + if not prompt_id: + raise RuntimeError(f"missing prompt_id in response: {body}") + return prompt_id + + +async def wait_for_prompt_done( + session: aiohttp.ClientSession, + base_url: str, + prompt_id: str, + poll_interval_s: float, + timeout_s: float, +) -> tuple[float | None, float | None]: + """ + Returns (queue_wait_ms, execution_ms) when available from history status messages. + Falls back to (None, None) if unavailable. + """ + deadline = time.perf_counter() + timeout_s + history_url = f"{base_url}/history/{prompt_id}" + + while time.perf_counter() < deadline: + async with session.get(history_url, timeout=timeout_s) as resp: + if resp.status != 200: + text = await resp.text() + raise RuntimeError(f"history failed [{resp.status}] {text}") + + payload = await resp.json() + if not payload: + await asyncio.sleep(poll_interval_s) + continue + + history_item = payload.get(prompt_id) + if history_item is None: + await asyncio.sleep(poll_interval_s) + continue + + status = history_item.get("status", {}) + status_str = status.get("status_str") + messages = status.get("messages", []) + if status_str not in ("success", "error"): + await asyncio.sleep(poll_interval_s) + continue + + queue_wait_ms = None + execution_ms = None + try: + timestamp_map: dict[str, int] = {} + for event, msg in messages: + if isinstance(msg, dict) and "timestamp" in msg: + timestamp_map[event] = int(msg["timestamp"]) + start_ts = timestamp_map.get("execution_start") + end_ts = timestamp_map.get("execution_success") or timestamp_map.get("execution_error") + if start_ts is not None and end_ts is not None: + execution_ms = max(0.0, end_ts - start_ts) + except Exception: + execution_ms = None + + return queue_wait_ms, execution_ms + + await asyncio.sleep(poll_interval_s) + + raise TimeoutError(f"timed out waiting for prompt_id={prompt_id}") + + +def build_arrival_schedule(num_requests: int, request_rate: float, poisson: bool, seed: int) -> list[float]: + """ + Returns absolute offsets (seconds from benchmark start) for each request. + """ + if request_rate <= 0: + return [0.0] * num_requests + + rnd = random.Random(seed) + offsets: list[float] = [] + t = 0.0 + for _ in range(num_requests): + if poisson: + delta = rnd.expovariate(request_rate) + else: + delta = 1.0 / request_rate + t += delta + offsets.append(t) + return offsets + + +async def run_request( + idx: int, + start_time: float, + scheduled_offset_s: float, + semaphore: asyncio.Semaphore, + session: aiohttp.ClientSession, + args: argparse.Namespace, + prompt_wrapper_template: dict[str, Any], +) -> RequestResult: + await asyncio.sleep(max(0.0, (start_time + scheduled_offset_s) - time.perf_counter())) + queued_at = time.perf_counter() + + async with semaphore: + started_at = time.perf_counter() + prompt_id = None + try: + payload = json.loads(json.dumps(prompt_wrapper_template)) + payload.setdefault("extra_data", {}) + payload["client_id"] = args.client_id + + seed = args.base_seed + idx + payload["prompt"] = patch_seed_in_prompt(payload["prompt"], seed, args.seed_path) + + prompt_id = await submit_prompt( + session=session, + base_url=args.host, + endpoint=args.endpoint, + payload=payload, + timeout_s=args.request_timeout_s, + ) + + queue_wait_ms, execution_ms = await wait_for_prompt_done( + session=session, + base_url=args.host, + prompt_id=prompt_id, + poll_interval_s=args.poll_interval_s, + timeout_s=args.request_timeout_s, + ) + finished_at = time.perf_counter() + return RequestResult( + request_index=idx, + prompt_id=prompt_id, + ok=True, + error=None, + queued_at=queued_at, + started_at=started_at, + finished_at=finished_at, + end_to_end_s=finished_at - queued_at, + queue_wait_ms=queue_wait_ms, + execution_ms=execution_ms, + ) + except Exception as exc: + finished_at = time.perf_counter() + return RequestResult( + request_index=idx, + prompt_id=prompt_id, + ok=False, + error=repr(exc), + queued_at=queued_at, + started_at=started_at, + finished_at=finished_at, + end_to_end_s=finished_at - queued_at, + queue_wait_ms=None, + execution_ms=None, + ) + + +def print_summary(results: list[RequestResult], wall_s: float) -> None: + success = [r for r in results if r.ok] + fail = [r for r in results if not r.ok] + lat_s = [r.end_to_end_s for r in success] + queue_wait_ms = [r.queue_wait_ms for r in success if r.queue_wait_ms is not None] + exec_ms = [r.execution_ms for r in success if r.execution_ms is not None] + + throughput = (len(success) / wall_s) if wall_s > 0 else 0.0 + print("\n=== ComfyUI Serving Benchmark Summary ===") + print(f"requests_total: {len(results)}") + print(f"requests_success: {len(success)}") + print(f"requests_failed: {len(fail)}") + print(f"wall_time_s: {wall_s:.3f}") + print(f"throughput_req_s: {throughput:.3f}") + + if lat_s: + print(f"latency_p50_s: {percentile(lat_s, 50):.3f}") + print(f"latency_p90_s: {percentile(lat_s, 90):.3f}") + print(f"latency_p95_s: {percentile(lat_s, 95):.3f}") + print(f"latency_p99_s: {percentile(lat_s, 99):.3f}") + print(f"latency_mean_s: {statistics.mean(lat_s):.3f}") + print(f"latency_max_s: {max(lat_s):.3f}") + + if queue_wait_ms: + print(f"queue_wait_mean_ms: {statistics.mean(queue_wait_ms):.2f}") + print(f"queue_wait_p95_ms: {percentile(queue_wait_ms, 95):.2f}") + + if exec_ms: + print(f"execution_mean_ms: {statistics.mean(exec_ms):.2f}") + print(f"execution_p95_ms: {percentile(exec_ms, 95):.2f}") + + if fail: + print("\nSample failures:") + for r in fail[:5]: + print(f" idx={r.request_index} prompt_id={r.prompt_id} error={r.error}") + + +def parse_args() -> argparse.Namespace: + p = argparse.ArgumentParser(description="Benchmark ComfyUI request serving.") + p.add_argument("--host", type=str, default="http://127.0.0.1:8188", help="ComfyUI base URL.") + p.add_argument( + "--endpoint", + type=str, + default="/prompt", + choices=("/prompt", "/bench/prompt"), + help="Submission endpoint.", + ) + p.add_argument("--prompt-file", type=Path, required=True, help="Path to prompt JSON.") + p.add_argument("--num-requests", type=int, default=50) + p.add_argument("--max-concurrency", type=int, default=8) + p.add_argument("--request-rate", type=float, default=0.0, help="Requests/sec. 0 = fire immediately.") + p.add_argument("--poisson", action="store_true", help="Use Poisson inter-arrival when request-rate > 0.") + p.add_argument("--base-seed", type=int, default=1234) + p.add_argument( + "--seed-path", + type=str, + default=None, + help="Optional path to seed field in prompt: . (e.g. 3.seed).", + ) + p.add_argument("--client-id", type=str, default=f"bench-{uuid.uuid4().hex[:12]}") + p.add_argument("--request-timeout-s", type=float, default=600.0) + p.add_argument("--poll-interval-s", type=float, default=0.2) + p.add_argument("--output-json", type=Path, default=None, help="Write detailed result JSON.") + p.add_argument("--seed", type=int, default=0, help="RNG seed for schedule generation.") + return p.parse_args() + + +async def async_main(args: argparse.Namespace) -> None: + prompt_template = load_prompt_template(args.prompt_file) + schedule = build_arrival_schedule( + num_requests=args.num_requests, + request_rate=args.request_rate, + poisson=args.poisson, + seed=args.seed, + ) + semaphore = asyncio.Semaphore(args.max_concurrency) + connector = aiohttp.TCPConnector(limit=max(args.max_concurrency * 2, 32)) + + started = time.perf_counter() + async with aiohttp.ClientSession(connector=connector) as session: + tasks = [ + asyncio.create_task( + run_request( + idx=i, + start_time=started, + scheduled_offset_s=schedule[i], + semaphore=semaphore, + session=session, + args=args, + prompt_wrapper_template=prompt_template, + ) + ) + for i in range(args.num_requests) + ] + results = await asyncio.gather(*tasks) + wall_s = time.perf_counter() - started + + print_summary(results, wall_s) + + if args.output_json is not None: + out = { + "config": vars(args), + "wall_time_s": wall_s, + "results": [asdict(r) for r in sorted(results, key=lambda x: x.request_index)], + } + args.output_json.parent.mkdir(parents=True, exist_ok=True) + args.output_json.write_text(json.dumps(out, indent=2)) + print(f"\nWrote results to: {args.output_json}") + + +def main() -> None: + args = parse_args() + asyncio.run(async_main(args)) + + +if __name__ == "__main__": + main() diff --git a/comfy/cli_args.py b/comfy/cli_args.py index dbaadf723..e9828a5db 100644 --- a/comfy/cli_args.py +++ b/comfy/cli_args.py @@ -225,6 +225,7 @@ parser.add_argument( parser.add_argument("--user-directory", type=is_valid_directory, default=None, help="Set the ComfyUI user directory with an absolute path. Overrides --base-directory.") parser.add_argument("--enable-compress-response-body", action="store_true", help="Enable compressing response body.") +parser.add_argument("--benchmark-server-only", action="store_true", help="Enable lightweight benchmark routes and worker fast-paths focused on model serving throughput/latency.") parser.add_argument( "--comfy-api-base", diff --git a/main.py b/main.py index dbaf2745c..5013bac42 100644 --- a/main.py +++ b/main.py @@ -293,6 +293,7 @@ def prompt_worker(q, server_instance): gc_collect_interval = 10.0 while True: + benchmark_mode = args.benchmark_server_only timeout = 1000.0 if need_gc: timeout = max(gc_collect_interval - (current_time - last_gc_collect), 0.0) @@ -301,6 +302,7 @@ def prompt_worker(q, server_instance): if queue_item is not None: item, item_id = queue_item execution_start_time = time.perf_counter() + execution_start_wall_ms = int(time.time() * 1000) prompt_id = item[1] server_instance.last_prompt_id = prompt_id @@ -308,15 +310,21 @@ def prompt_worker(q, server_instance): extra_data = item[3].copy() for k in sensitive: extra_data[k] = sensitive[k] + benchmark_mode = args.benchmark_server_only or extra_data.get("benchmark_server_only", False) - asset_seeder.pause() + if not benchmark_mode: + asset_seeder.pause() e.execute(item[2], prompt_id, extra_data, item[4]) need_gc = True remove_sensitive = lambda prompt: prompt[:5] + prompt[6:] + history_result = e.history_result + if benchmark_mode: + history_result = {"outputs": {}, "meta": {}} + q.task_done(item_id, - e.history_result, + history_result, status=execution.PromptQueue.ExecutionStatus( status_str='success' if e.success else 'error', completed=e.success, @@ -325,16 +333,24 @@ def prompt_worker(q, server_instance): server_instance.send_sync("executing", {"node": None, "prompt_id": prompt_id}, server_instance.client_id) current_time = time.perf_counter() - execution_time = current_time - execution_start_time + execution_time_s = current_time - execution_start_time # Log Time in a more readable way after 10 minutes - if execution_time > 600: - execution_time = time.strftime("%H:%M:%S", time.gmtime(execution_time)) - logging.info(f"Prompt executed in {execution_time}") + if execution_time_s > 600: + execution_time_formatted = time.strftime("%H:%M:%S", time.gmtime(execution_time_s)) + logging.info(f"Prompt executed in {execution_time_formatted}") else: - logging.info("Prompt executed in {:.2f} seconds".format(execution_time)) + logging.info("Prompt executed in {:.2f} seconds".format(execution_time_s)) - if not asset_seeder.is_disabled(): + queue_wait_ms = 0.0 + created_at = extra_data.get("create_time") + if isinstance(created_at, int): + queue_wait_ms = max(0.0, execution_start_wall_ms - created_at) + + if benchmark_mode: + server_instance.record_benchmark_result(prompt_id, e.success, execution_time_s * 1000.0, queue_wait_ms) + + if not benchmark_mode and not asset_seeder.is_disabled(): paths = _collect_output_absolute_paths(e.history_result) register_output_files(paths, job_id=prompt_id) @@ -360,9 +376,10 @@ def prompt_worker(q, server_instance): need_gc = False hook_breaker_ac10a0.restore_functions() - if not asset_seeder.is_disabled(): + if not benchmark_mode and not asset_seeder.is_disabled(): asset_seeder.enqueue_enrich(roots=("output",), compute_hashes=True) - asset_seeder.resume() + if not benchmark_mode: + asset_seeder.resume() async def run(server_instance, address='', port=8188, verbose=True, call_on_start=None): diff --git a/server.py b/server.py index 881da8e66..5db448b7f 100644 --- a/server.py +++ b/server.py @@ -16,6 +16,7 @@ import struct import ssl import socket import ipaddress +import threading from PIL import Image, ImageOps from PIL.PngImagePlugin import PngInfo from io import BytesIO @@ -252,6 +253,17 @@ class PromptServer(): self.client_id = None self.on_prompt_handlers = [] + self._benchmark_lock = threading.Lock() + self._benchmark_stats = { + "requests_total": 0, + "requests_success": 0, + "requests_error": 0, + "latency_ms_total": 0.0, + "latency_ms_max": 0.0, + "queue_wait_ms_total": 0.0, + "queue_wait_ms_max": 0.0, + "last_prompt_id": None, + } @routes.get('/ws') async def websocket_handler(request): @@ -912,12 +924,17 @@ class PromptServer(): queue_info['queue_pending'] = _remove_sensitive_from_queue(current_queue[1]) return web.json_response(queue_info) - @routes.post("/prompt") - async def post_prompt(request): - logging.info("got prompt") - json_data = await request.json() - json_data = self.trigger_on_prompt(json_data) + @routes.get("/bench/stats") + async def get_bench_stats(request): + stats = self.get_benchmark_stats() + return web.json_response(stats) + @routes.post("/bench/reset") + async def reset_bench_stats(request): + self.reset_benchmark_stats() + return web.json_response({"status": "ok"}) + + async def enqueue_prompt(json_data): if "number" in json_data: number = float(json_data['number']) else: @@ -967,6 +984,22 @@ class PromptServer(): } return web.json_response({"error": error, "node_errors": {}}, status=400) + @routes.post("/bench/prompt") + async def post_bench_prompt(request): + json_data = await request.json() + json_data = self.trigger_on_prompt(json_data) + extra_data = json_data.setdefault("extra_data", {}) + extra_data["benchmark_server_only"] = True + extra_data.setdefault("preview_method", "none") + return await enqueue_prompt(json_data) + + @routes.post("/prompt") + async def post_prompt(request): + logging.info("got prompt") + json_data = await request.json() + json_data = self.trigger_on_prompt(json_data) + return await enqueue_prompt(json_data) + @routes.post("/queue") async def post_queue(request): json_data = await request.json() @@ -1111,6 +1144,41 @@ class PromptServer(): prompt_info['exec_info'] = exec_info return prompt_info + def reset_benchmark_stats(self): + with self._benchmark_lock: + self._benchmark_stats = { + "requests_total": 0, + "requests_success": 0, + "requests_error": 0, + "latency_ms_total": 0.0, + "latency_ms_max": 0.0, + "queue_wait_ms_total": 0.0, + "queue_wait_ms_max": 0.0, + "last_prompt_id": None, + } + + def record_benchmark_result(self, prompt_id, success, latency_ms, queue_wait_ms=0.0): + with self._benchmark_lock: + self._benchmark_stats["requests_total"] += 1 + if success: + self._benchmark_stats["requests_success"] += 1 + else: + self._benchmark_stats["requests_error"] += 1 + self._benchmark_stats["latency_ms_total"] += max(0.0, latency_ms) + self._benchmark_stats["queue_wait_ms_total"] += max(0.0, queue_wait_ms) + self._benchmark_stats["latency_ms_max"] = max(self._benchmark_stats["latency_ms_max"], max(0.0, latency_ms)) + self._benchmark_stats["queue_wait_ms_max"] = max(self._benchmark_stats["queue_wait_ms_max"], max(0.0, queue_wait_ms)) + self._benchmark_stats["last_prompt_id"] = prompt_id + + def get_benchmark_stats(self): + with self._benchmark_lock: + stats = dict(self._benchmark_stats) + + total = stats["requests_total"] + stats["latency_ms_avg"] = (stats["latency_ms_total"] / total) if total > 0 else 0.0 + stats["queue_wait_ms_avg"] = (stats["queue_wait_ms_total"] / total) if total > 0 else 0.0 + return stats + async def send(self, event, data, sid=None): if event == BinaryEventTypes.UNENCODED_PREVIEW_IMAGE: await self.send_image(data, sid=sid) From c02b5d4c1ece453d57529cb306fd0f0c755d69cd Mon Sep 17 00:00:00 2001 From: Tara Ding <38710454+windtara0619@users.noreply.github.com> Date: Mon, 27 Apr 2026 11:50:09 -0700 Subject: [PATCH 04/27] Generate prompt file automatically. --- benchmarks/benchmark_comfyui_serving.py | 458 +++++++++++++++++++++++- 1 file changed, 457 insertions(+), 1 deletion(-) diff --git a/benchmarks/benchmark_comfyui_serving.py b/benchmarks/benchmark_comfyui_serving.py index 07d498c21..6431be2bc 100644 --- a/benchmarks/benchmark_comfyui_serving.py +++ b/benchmarks/benchmark_comfyui_serving.py @@ -7,6 +7,41 @@ This script is inspired by diffusion serving benchmarks and is designed to: - optionally shape request arrivals (fixed rate or Poisson), - poll completion via /history/{prompt_id}, - report latency/throughput/error metrics. + +Usage — Wan 2.2 I2V benchmark +============================== + +Step 1 — Generate prompt files (downloads images, writes JSONs, then exits): + + # Minimal: uses synthetic images, writes to prompts/wan22_i2v/ + python3 benchmarks/benchmark_comfyui_serving.py \\ + --generate-wan22-prompts \\ + --num-requests 50 + + # With model download (needs ComfyUI root): + python3 benchmarks/benchmark_comfyui_serving.py \\ + --generate-wan22-prompts \\ + --download-models \\ + --comfyui-base-dir /path/to/ComfyUI \\ + --num-requests 50 + + # Custom image/output dirs: + python3 benchmarks/benchmark_comfyui_serving.py \\ + --generate-wan22-prompts \\ + --wan22-input-dir /data/images \\ + --wan22-output-dir /data/prompts/wan22 \\ + --wan22-num-images 30 \\ + --num-requests 50 + +Step 2 — Run the benchmark (point at any one of the generated prompt files): + + python3 benchmarks/benchmark_comfyui_serving.py \\ + --prompt-file prompts/wan22_i2v/wan22_i2v_prompt_0000.json \\ + --num-requests 50 \\ + --max-concurrency 4 \\ + --host http://127.0.0.1:8188 + +The setup step also prints the exact run command at the end, so you can copy it directly. """ from __future__ import annotations @@ -17,7 +52,9 @@ import json import math import random import statistics +import subprocess import time +import urllib.request import uuid from dataclasses import dataclass, asdict from pathlib import Path @@ -26,6 +63,374 @@ from typing import Any import aiohttp +# ────────────────────────────────────────────────────────────────────────────── +# Wan 2.2 I2V benchmark setup helpers +# ────────────────────────────────────────────────────────────────────────────── + +_WAN22_MODELS: list[tuple[str, str]] = [ + ( + "models/diffusion_models/wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors", + "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/diffusion_models/wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors", + ), + ( + "models/diffusion_models/wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors", + "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/diffusion_models/wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors", + ), + ( + "models/loras/wan2.2_i2v_lightx2v_4steps_lora_v1_high_noise.safetensors", + "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/loras/wan2.2_i2v_lightx2v_4steps_lora_v1_high_noise.safetensors", + ), + ( + "models/loras/wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors", + "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/loras/wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors", + ), + ( + "models/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors", + ), + ( + "models/vae/wan_2.1_vae.safetensors", + "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/vae/wan_2.1_vae.safetensors", + ), +] + +# Placeholder sentinel replaced by generate_prompt_file. +_IMAGE_PLACEHOLDER = "__INPUT_IMAGE__" + +_WAN22_I2V_GRAPH: dict[str, Any] = { + "97": { + "inputs": {"image": _IMAGE_PLACEHOLDER}, + "class_type": "LoadImage", + "_meta": {"title": "Start Frame Image"}, + }, + "108": { + "inputs": { + "filename_prefix": "video/Wan2.2_image_to_video", + "format": "auto", + "codec": "auto", + "video-preview": "", + "video": ["130:117", 0], + }, + "class_type": "SaveVideo", + "_meta": {"title": "Save Video"}, + }, + "130:105": { + "inputs": { + "clip_name": "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "type": "wan", + "device": "default", + }, + "class_type": "CLIPLoader", + "_meta": {"title": "Load CLIP"}, + }, + "130:106": { + "inputs": {"vae_name": "wan_2.1_vae.safetensors"}, + "class_type": "VAELoader", + "_meta": {"title": "Load VAE"}, + }, + "130:107": { + "inputs": { + "text": "A felt-style little eagle cashier greeting, waving, and smiling at the camera.", + "clip": ["130:105", 0], + }, + "class_type": "CLIPTextEncode", + "_meta": {"title": "CLIP Text Encode (Positive Prompt)"}, + }, + "130:109": { + "inputs": {"shift": 5.000000000000001, "model": ["130:126", 0]}, + "class_type": "ModelSamplingSD3", + "_meta": {"title": "ModelSamplingSD3"}, + }, + "130:110": { + "inputs": { + "add_noise": "enable", + "noise_seed": 636787045983965, + "steps": 4, + "cfg": 1, + "sampler_name": "euler", + "scheduler": "simple", + "start_at_step": 0, + "end_at_step": 2, + "return_with_leftover_noise": "enable", + "model": ["130:109", 0], + "positive": ["130:128", 0], + "negative": ["130:128", 1], + "latent_image": ["130:128", 2], + }, + "class_type": "KSamplerAdvanced", + "_meta": {"title": "KSampler (Advanced)"}, + }, + "130:111": { + "inputs": { + "add_noise": "disable", + "noise_seed": 0, + "steps": 4, + "cfg": 1, + "sampler_name": "euler", + "scheduler": "simple", + "start_at_step": 2, + "end_at_step": 4, + "return_with_leftover_noise": "disable", + "model": ["130:124", 0], + "positive": ["130:128", 0], + "negative": ["130:128", 1], + "latent_image": ["130:110", 0], + }, + "class_type": "KSamplerAdvanced", + "_meta": {"title": "KSampler (Advanced)"}, + }, + "130:117": { + "inputs": {"fps": 16, "images": ["130:129", 0]}, + "class_type": "CreateVideo", + "_meta": {"title": "Create Video"}, + }, + "130:122": { + "inputs": { + "unet_name": "wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors", + "weight_dtype": "default", + }, + "class_type": "UNETLoader", + "_meta": {"title": "Load Diffusion Model"}, + }, + "130:123": { + "inputs": { + "unet_name": "wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors", + "weight_dtype": "default", + }, + "class_type": "UNETLoader", + "_meta": {"title": "Load Diffusion Model"}, + }, + "130:124": { + "inputs": {"shift": 5.000000000000001, "model": ["130:127", 0]}, + "class_type": "ModelSamplingSD3", + "_meta": {"title": "ModelSamplingSD3"}, + }, + "130:125": { + "inputs": { + "text": ( + "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量," + "JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的," + "形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" + ), + "clip": ["130:105", 0], + }, + "class_type": "CLIPTextEncode", + "_meta": {"title": "CLIP Text Encode (Negative Prompt)"}, + }, + "130:126": { + "inputs": { + "lora_name": "wan2.2_i2v_lightx2v_4steps_lora_v1_high_noise.safetensors", + "strength_model": 1.0000000000000002, + "model": ["130:122", 0], + }, + "class_type": "LoraLoaderModelOnly", + "_meta": {"title": "Load LoRA"}, + }, + "130:127": { + "inputs": { + "lora_name": "wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors", + "strength_model": 1.0000000000000002, + "model": ["130:123", 0], + }, + "class_type": "LoraLoaderModelOnly", + "_meta": {"title": "Load LoRA"}, + }, + "130:128": { + "inputs": { + "width": 720, + "height": 720, + "length": 81, + "batch_size": 1, + "positive": ["130:107", 0], + "negative": ["130:125", 0], + "vae": ["130:106", 0], + "start_image": ["97", 0], + }, + "class_type": "WanImageToVideo", + "_meta": {"title": "WanImageToVideo"}, + }, + "130:129": { + "inputs": {"samples": ["130:111", 0], "vae": ["130:106", 0]}, + "class_type": "VAEDecode", + "_meta": {"title": "VAE Decode"}, + }, +} + +_VBENCH_I2V_JSON_URL = ( + "https://raw.githubusercontent.com/Vchitect/VBench/master/vbench2_beta_i2v/i2v-bench-info.json" +) + + +def download_wan22_models(base_dir: Path) -> None: + """Download Wan 2.2 I2V model files into *base_dir* using wget.""" + for rel_path, url in _WAN22_MODELS: + dest = base_dir / rel_path + if dest.exists(): + print(f"[setup] already exists, skipping: {dest}") + continue + dest.parent.mkdir(parents=True, exist_ok=True) + print(f"[setup] downloading {dest.name} ...") + subprocess.run(["wget", "-O", str(dest), url], check=True) + + +def _try_download_vbench_i2v(input_dir: Path) -> list[str]: + """ + Attempt to fetch VBench I2V images via huggingface_hub. + Returns image basenames placed in *input_dir*, or [] on failure. + """ + try: + from huggingface_hub import snapshot_download # type: ignore + except ImportError: + print("[setup] huggingface_hub not available; skipping VBench download.") + return [] + + try: + print("[setup] downloading Vchitect/VBench_I2V dataset from HuggingFace ...") + cache_dir = input_dir / "_vbench_cache" + local = snapshot_download( + repo_id="Vchitect/VBench_I2V", + repo_type="dataset", + local_dir=str(cache_dir), + ) + except Exception as exc: + print(f"[setup] VBench I2V download failed: {exc}") + return [] + + image_exts = {".png", ".jpg", ".jpeg", ".webp"} + found = sorted(p for p in Path(local).rglob("*") if p.suffix.lower() in image_exts) + if not found: + return [] + + import shutil + + filenames: list[str] = [] + for src in found: + dest = input_dir / src.name + if not dest.exists(): + shutil.copy2(str(src), str(dest)) + filenames.append(src.name) + + print(f"[setup] prepared {len(filenames)} VBench I2V images in {input_dir}") + return filenames + + +def _generate_synthetic_images(input_dir: Path, num_images: int) -> list[str]: + """Generate synthetic 720×720 white PNG placeholders; returns filenames.""" + try: + from PIL import Image as PILImage # type: ignore + except ImportError: + raise RuntimeError( + "Pillow is required for synthetic image generation. " + "Install it with: pip install Pillow" + ) + + filenames: list[str] = [] + for i in range(num_images): + fname = f"benchmark_input_{i:04d}.png" + dest = input_dir / fname + if not dest.exists(): + PILImage.new("RGB", (720, 720), color=(255, 255, 255)).save(str(dest)) + filenames.append(fname) + return filenames + + +def prepare_input_images(input_dir: Path, num_images: int = 20) -> list[str]: + """ + Prepare benchmark input images in *input_dir*. + + Priority: + 1. Reuse any images already present in the directory. + 2. Download Vchitect/VBench_I2V dataset via huggingface_hub. + 3. Generate synthetic 720×720 white PNG placeholders with Pillow. + + Returns a list of image basenames (not full paths). + """ + input_dir.mkdir(parents=True, exist_ok=True) + image_exts = {".png", ".jpg", ".jpeg", ".webp"} + + existing = sorted( + p.name for p in input_dir.iterdir() if p.suffix.lower() in image_exts + ) + if existing: + print(f"[setup] found {len(existing)} existing images in {input_dir}") + return existing + + filenames = _try_download_vbench_i2v(input_dir) + if filenames: + return filenames + + print(f"[setup] generating {num_images} synthetic 720×720 placeholder images ...") + return _generate_synthetic_images(input_dir, num_images) + + +def generate_prompt_file( + output_path: Path, + image_filename: str, + positive_prompt: str | None = None, +) -> None: + """ + Write a single Wan 2.2 I2V ComfyUI prompt JSON to *output_path*. + + *image_filename* is substituted into the LoadImage node (node "97"). + *positive_prompt* overrides the default positive text if provided. + """ + graph: dict[str, Any] = json.loads(json.dumps(_WAN22_I2V_GRAPH)) + graph["97"]["inputs"]["image"] = image_filename + if positive_prompt is not None: + graph["130:107"]["inputs"]["text"] = positive_prompt + + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(json.dumps({"prompt": graph}, indent=2)) + + +def generate_prompt_files( + output_dir: Path, + input_dir: Path, + num_prompts: int = 50, + num_images: int = 20, + download_models: bool = False, + comfyui_base_dir: Path | None = None, +) -> list[Path]: + """ + Full Wan 2.2 I2V benchmark setup: + + 1. Optionally download model weights into *comfyui_base_dir*. + 2. Prepare input images in *input_dir* (VBench I2V or synthetic). + 3. Generate *num_prompts* prompt JSON files in *output_dir*, cycling + through the available images. + + Returns the list of generated prompt file paths. + """ + if download_models: + if comfyui_base_dir is None: + raise ValueError("--comfyui-base-dir is required when --download-models is set") + download_wan22_models(comfyui_base_dir) + + image_filenames = prepare_input_images(input_dir, num_images=num_images) + if not image_filenames: + raise RuntimeError(f"No input images available in {input_dir}") + + output_dir.mkdir(parents=True, exist_ok=True) + generated: list[Path] = [] + for i in range(num_prompts): + image_name = image_filenames[i % len(image_filenames)] + prompt_path = output_dir / f"wan22_i2v_prompt_{i:04d}.json" + generate_prompt_file(prompt_path, image_name) + generated.append(prompt_path) + + print(f"[setup] generated {len(generated)} prompt files in {output_dir}") + print(f"[setup] example run:") + print( + f" python benchmark_comfyui_serving.py" + f" --prompt-file {generated[0]}" + f" --num-requests {num_prompts}" + ) + return generated + + +# ────────────────────────────────────────────────────────────────────────────── + + @dataclass class RequestResult: request_index: int @@ -302,7 +707,46 @@ def parse_args() -> argparse.Namespace: choices=("/prompt", "/bench/prompt"), help="Submission endpoint.", ) - p.add_argument("--prompt-file", type=Path, required=True, help="Path to prompt JSON.") + p.add_argument( + "--prompt-file", + type=Path, + default=None, + help="Path to prompt JSON. Required unless --generate-wan22-prompts is set.", + ) + p.add_argument( + "--generate-wan22-prompts", + action="store_true", + help="Generate Wan 2.2 I2V prompt files (steps: prepare images, write JSONs) then exit.", + ) + p.add_argument( + "--wan22-input-dir", + type=Path, + default=Path("inputs"), + help="Directory for benchmark input images (default: inputs/).", + ) + p.add_argument( + "--wan22-output-dir", + type=Path, + default=Path("prompts/wan22_i2v"), + help="Directory where generated prompt JSON files are written (default: prompts/wan22_i2v/).", + ) + p.add_argument( + "--wan22-num-images", + type=int, + default=20, + help="Number of synthetic images to generate when VBench download is unavailable (default: 20).", + ) + p.add_argument( + "--download-models", + action="store_true", + help="Download Wan 2.2 model weights before generating prompts (requires --comfyui-base-dir).", + ) + p.add_argument( + "--comfyui-base-dir", + type=Path, + default=None, + help="ComfyUI root directory used as the base for model downloads.", + ) p.add_argument("--num-requests", type=int, default=50) p.add_argument("--max-concurrency", type=int, default=8) p.add_argument("--request-rate", type=float, default=0.0, help="Requests/sec. 0 = fire immediately.") @@ -323,6 +767,8 @@ def parse_args() -> argparse.Namespace: async def async_main(args: argparse.Namespace) -> None: + if args.prompt_file is None: + raise SystemExit("error: --prompt-file is required (or use --generate-wan22-prompts to create one)") prompt_template = load_prompt_template(args.prompt_file) schedule = build_arrival_schedule( num_requests=args.num_requests, @@ -367,6 +813,16 @@ async def async_main(args: argparse.Namespace) -> None: def main() -> None: args = parse_args() + if args.generate_wan22_prompts: + generate_prompt_files( + output_dir=args.wan22_output_dir, + input_dir=args.wan22_input_dir, + num_prompts=args.num_requests, + num_images=args.wan22_num_images, + download_models=args.download_models, + comfyui_base_dir=args.comfyui_base_dir, + ) + return asyncio.run(async_main(args)) From 28bbdb00317b004598281913165c598f617c69c8 Mon Sep 17 00:00:00 2001 From: Tara Ding <38710454+windtara0619@users.noreply.github.com> Date: Mon, 27 Apr 2026 11:53:35 -0700 Subject: [PATCH 05/27] Fix vbench download --- benchmarks/benchmark_comfyui_serving.py | 46 +++++++++++-------------- 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/benchmarks/benchmark_comfyui_serving.py b/benchmarks/benchmark_comfyui_serving.py index 6431be2bc..6eb7050ec 100644 --- a/benchmarks/benchmark_comfyui_serving.py +++ b/benchmarks/benchmark_comfyui_serving.py @@ -256,9 +256,8 @@ _WAN22_I2V_GRAPH: dict[str, Any] = { }, } -_VBENCH_I2V_JSON_URL = ( - "https://raw.githubusercontent.com/Vchitect/VBench/master/vbench2_beta_i2v/i2v-bench-info.json" -) +# Google Drive file IDs from VBench's vbench2_beta_i2v/download_data.sh +_VBENCH_ORIGIN_ZIP_GDRIVE_ID = "1qhkLCSBkzll0dkKpwlDTwLL0nxdQ4nrY" def download_wan22_models(base_dir: Path) -> None: @@ -275,41 +274,36 @@ def download_wan22_models(base_dir: Path) -> None: def _try_download_vbench_i2v(input_dir: Path) -> list[str]: """ - Attempt to fetch VBench I2V images via huggingface_hub. + Download VBench I2V origin images from Google Drive via gdown (pip install gdown). Returns image basenames placed in *input_dir*, or [] on failure. """ try: - from huggingface_hub import snapshot_download # type: ignore + import gdown # type: ignore except ImportError: - print("[setup] huggingface_hub not available; skipping VBench download.") + print("[setup] gdown not available; skipping VBench download. Install with: pip install gdown") return [] + import zipfile + + zip_path = input_dir / "origin.zip" try: - print("[setup] downloading Vchitect/VBench_I2V dataset from HuggingFace ...") - cache_dir = input_dir / "_vbench_cache" - local = snapshot_download( - repo_id="Vchitect/VBench_I2V", - repo_type="dataset", - local_dir=str(cache_dir), - ) + if not zip_path.exists(): + print("[setup] downloading VBench I2V origin images from Google Drive ...") + gdown.download(id=_VBENCH_ORIGIN_ZIP_GDRIVE_ID, output=str(zip_path), quiet=False) + print("[setup] extracting origin.zip ...") + with zipfile.ZipFile(zip_path, "r") as zf: + zf.extractall(str(input_dir)) + zip_path.unlink() except Exception as exc: print(f"[setup] VBench I2V download failed: {exc}") + if zip_path.exists(): + zip_path.unlink() return [] image_exts = {".png", ".jpg", ".jpeg", ".webp"} - found = sorted(p for p in Path(local).rglob("*") if p.suffix.lower() in image_exts) - if not found: - return [] - - import shutil - - filenames: list[str] = [] - for src in found: - dest = input_dir / src.name - if not dest.exists(): - shutil.copy2(str(src), str(dest)) - filenames.append(src.name) - + filenames = sorted( + p.name for p in input_dir.rglob("*") if p.suffix.lower() in image_exts + ) print(f"[setup] prepared {len(filenames)} VBench I2V images in {input_dir}") return filenames From 8136fbbb4a78fa6b6409309ac21644e42b43007b Mon Sep 17 00:00:00 2001 From: Tara Ding <38710454+windtara0619@users.noreply.github.com> Date: Mon, 27 Apr 2026 12:23:24 -0700 Subject: [PATCH 06/27] Fix input --- benchmarks/benchmark_comfyui_serving.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/benchmarks/benchmark_comfyui_serving.py b/benchmarks/benchmark_comfyui_serving.py index 6eb7050ec..c9dfeae49 100644 --- a/benchmarks/benchmark_comfyui_serving.py +++ b/benchmarks/benchmark_comfyui_serving.py @@ -25,10 +25,10 @@ Step 1 — Generate prompt files (downloads images, writes JSONs, then exits): --comfyui-base-dir /path/to/ComfyUI \\ --num-requests 50 - # Custom image/output dirs: + # Custom image/output dirs (input dir must be ComfyUI's input/ folder): python3 benchmarks/benchmark_comfyui_serving.py \\ --generate-wan22-prompts \\ - --wan22-input-dir /data/images \\ + --wan22-input-dir /home/ubuntu/ComfyUI/input \\ --wan22-output-dir /data/prompts/wan22 \\ --wan22-num-images 30 \\ --num-requests 50 @@ -715,8 +715,8 @@ def parse_args() -> argparse.Namespace: p.add_argument( "--wan22-input-dir", type=Path, - default=Path("inputs"), - help="Directory for benchmark input images (default: inputs/).", + default=Path("input"), + help="Directory for benchmark input images. Must be ComfyUI's input/ folder so LoadImage can find them (default: input/).", ) p.add_argument( "--wan22-output-dir", From 978b962300a914afcb2a76a4a2158c5c35e257d0 Mon Sep 17 00:00:00 2001 From: Tara Ding <38710454+windtara0619@users.noreply.github.com> Date: Mon, 27 Apr 2026 13:42:53 -0700 Subject: [PATCH 07/27] fix scripts --- benchmarks/benchmark_comfyui_serving.py | 348 +++++++++--------------- 1 file changed, 130 insertions(+), 218 deletions(-) diff --git a/benchmarks/benchmark_comfyui_serving.py b/benchmarks/benchmark_comfyui_serving.py index c9dfeae49..2f9ca1c6e 100644 --- a/benchmarks/benchmark_comfyui_serving.py +++ b/benchmarks/benchmark_comfyui_serving.py @@ -2,7 +2,7 @@ """ Simple serving benchmark client for ComfyUI's HTTP API. -This script is inspired by diffusion serving benchmarks and is designed to: +This script is designed to: - submit prompts to ComfyUI (/prompt or /bench/prompt), - optionally shape request arrivals (fixed rate or Poisson), - poll completion via /history/{prompt_id}, @@ -15,28 +15,26 @@ Step 1 — Generate prompt files (downloads images, writes JSONs, then exits): # Minimal: uses synthetic images, writes to prompts/wan22_i2v/ python3 benchmarks/benchmark_comfyui_serving.py \\ - --generate-wan22-prompts \\ + --generate-prompts --model wan22 --task i2v \\ --num-requests 50 # With model download (needs ComfyUI root): python3 benchmarks/benchmark_comfyui_serving.py \\ - --generate-wan22-prompts \\ - --download-models \\ - --comfyui-base-dir /path/to/ComfyUI \\ + --generate-prompts --model wan22 --task i2v \\ + --download-models --comfyui-base-dir /path/to/ComfyUI \\ --num-requests 50 - # Custom image/output dirs (input dir must be ComfyUI's input/ folder): + # Custom image/output dirs (input-dir must be ComfyUI's input/ folder): python3 benchmarks/benchmark_comfyui_serving.py \\ - --generate-wan22-prompts \\ - --wan22-input-dir /home/ubuntu/ComfyUI/input \\ - --wan22-output-dir /data/prompts/wan22 \\ - --wan22-num-images 30 \\ - --num-requests 50 + --generate-prompts --model wan22 --task i2v \\ + --input-dir /home/ubuntu/ComfyUI/input \\ + --prompts-dir /home/ubuntu/ComfyUI/benchmarks/prompts/wan22_i2v \\ + --num-images 30 --num-requests 50 Step 2 — Run the benchmark (point at any one of the generated prompt files): python3 benchmarks/benchmark_comfyui_serving.py \\ - --prompt-file prompts/wan22_i2v/wan22_i2v_prompt_0000.json \\ + --prompt-file benchmarks/prompts/wan22_i2v/wan22_i2v_prompt_0000.json \\ --num-requests 50 \\ --max-concurrency 4 \\ --host http://127.0.0.1:8188 @@ -64,10 +62,17 @@ import aiohttp # ────────────────────────────────────────────────────────────────────────────── -# Wan 2.2 I2V benchmark setup helpers +# Benchmark setup helpers # ────────────────────────────────────────────────────────────────────────────── -_WAN22_MODELS: list[tuple[str, str]] = [ +# Workflow JSON files live in benchmarks/workflows/_.json. +_WORKFLOWS_DIR = Path(__file__).parent / "workflows" + +# Placeholder in workflow JSON files that is replaced with the actual image filename. +_IMAGE_PLACEHOLDER = "__INPUT_IMAGE__" + +# Model weight downloads for wan22/i2v. +_WAN22_I2V_MODELS: list[tuple[str, str]] = [ ( "models/diffusion_models/wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors", "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/diffusion_models/wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors", @@ -94,175 +99,46 @@ _WAN22_MODELS: list[tuple[str, str]] = [ ), ] -# Placeholder sentinel replaced by generate_prompt_file. -_IMAGE_PLACEHOLDER = "__INPUT_IMAGE__" - -_WAN22_I2V_GRAPH: dict[str, Any] = { - "97": { - "inputs": {"image": _IMAGE_PLACEHOLDER}, - "class_type": "LoadImage", - "_meta": {"title": "Start Frame Image"}, - }, - "108": { - "inputs": { - "filename_prefix": "video/Wan2.2_image_to_video", - "format": "auto", - "codec": "auto", - "video-preview": "", - "video": ["130:117", 0], - }, - "class_type": "SaveVideo", - "_meta": {"title": "Save Video"}, - }, - "130:105": { - "inputs": { - "clip_name": "umt5_xxl_fp8_e4m3fn_scaled.safetensors", - "type": "wan", - "device": "default", - }, - "class_type": "CLIPLoader", - "_meta": {"title": "Load CLIP"}, - }, - "130:106": { - "inputs": {"vae_name": "wan_2.1_vae.safetensors"}, - "class_type": "VAELoader", - "_meta": {"title": "Load VAE"}, - }, - "130:107": { - "inputs": { - "text": "A felt-style little eagle cashier greeting, waving, and smiling at the camera.", - "clip": ["130:105", 0], - }, - "class_type": "CLIPTextEncode", - "_meta": {"title": "CLIP Text Encode (Positive Prompt)"}, - }, - "130:109": { - "inputs": {"shift": 5.000000000000001, "model": ["130:126", 0]}, - "class_type": "ModelSamplingSD3", - "_meta": {"title": "ModelSamplingSD3"}, - }, - "130:110": { - "inputs": { - "add_noise": "enable", - "noise_seed": 636787045983965, - "steps": 4, - "cfg": 1, - "sampler_name": "euler", - "scheduler": "simple", - "start_at_step": 0, - "end_at_step": 2, - "return_with_leftover_noise": "enable", - "model": ["130:109", 0], - "positive": ["130:128", 0], - "negative": ["130:128", 1], - "latent_image": ["130:128", 2], - }, - "class_type": "KSamplerAdvanced", - "_meta": {"title": "KSampler (Advanced)"}, - }, - "130:111": { - "inputs": { - "add_noise": "disable", - "noise_seed": 0, - "steps": 4, - "cfg": 1, - "sampler_name": "euler", - "scheduler": "simple", - "start_at_step": 2, - "end_at_step": 4, - "return_with_leftover_noise": "disable", - "model": ["130:124", 0], - "positive": ["130:128", 0], - "negative": ["130:128", 1], - "latent_image": ["130:110", 0], - }, - "class_type": "KSamplerAdvanced", - "_meta": {"title": "KSampler (Advanced)"}, - }, - "130:117": { - "inputs": {"fps": 16, "images": ["130:129", 0]}, - "class_type": "CreateVideo", - "_meta": {"title": "Create Video"}, - }, - "130:122": { - "inputs": { - "unet_name": "wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors", - "weight_dtype": "default", - }, - "class_type": "UNETLoader", - "_meta": {"title": "Load Diffusion Model"}, - }, - "130:123": { - "inputs": { - "unet_name": "wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors", - "weight_dtype": "default", - }, - "class_type": "UNETLoader", - "_meta": {"title": "Load Diffusion Model"}, - }, - "130:124": { - "inputs": {"shift": 5.000000000000001, "model": ["130:127", 0]}, - "class_type": "ModelSamplingSD3", - "_meta": {"title": "ModelSamplingSD3"}, - }, - "130:125": { - "inputs": { - "text": ( - "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量," - "JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的," - "形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" - ), - "clip": ["130:105", 0], - }, - "class_type": "CLIPTextEncode", - "_meta": {"title": "CLIP Text Encode (Negative Prompt)"}, - }, - "130:126": { - "inputs": { - "lora_name": "wan2.2_i2v_lightx2v_4steps_lora_v1_high_noise.safetensors", - "strength_model": 1.0000000000000002, - "model": ["130:122", 0], - }, - "class_type": "LoraLoaderModelOnly", - "_meta": {"title": "Load LoRA"}, - }, - "130:127": { - "inputs": { - "lora_name": "wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors", - "strength_model": 1.0000000000000002, - "model": ["130:123", 0], - }, - "class_type": "LoraLoaderModelOnly", - "_meta": {"title": "Load LoRA"}, - }, - "130:128": { - "inputs": { - "width": 720, - "height": 720, - "length": 81, - "batch_size": 1, - "positive": ["130:107", 0], - "negative": ["130:125", 0], - "vae": ["130:106", 0], - "start_image": ["97", 0], - }, - "class_type": "WanImageToVideo", - "_meta": {"title": "WanImageToVideo"}, - }, - "130:129": { - "inputs": {"samples": ["130:111", 0], "vae": ["130:106", 0]}, - "class_type": "VAEDecode", - "_meta": {"title": "VAE Decode"}, - }, -} - # Google Drive file IDs from VBench's vbench2_beta_i2v/download_data.sh _VBENCH_ORIGIN_ZIP_GDRIVE_ID = "1qhkLCSBkzll0dkKpwlDTwLL0nxdQ4nrY" +# Registry mapping (model, task) → benchmark configuration. +# To add a new model/task: drop a workflow JSON in benchmarks/workflows/ and +# add an entry here. +_MODEL_REGISTRY: dict[tuple[str, str], dict[str, Any]] = { + ("wan22", "i2v"): { + "workflow_file": "wan22_i2v.json", + "model_files": _WAN22_I2V_MODELS, + "image_source": "vbench_i2v", + }, +} -def download_wan22_models(base_dir: Path) -> None: - """Download Wan 2.2 I2V model files into *base_dir* using wget.""" - for rel_path, url in _WAN22_MODELS: +_VALID_MODELS = sorted({m for m, _ in _MODEL_REGISTRY}) +_VALID_TASKS = sorted({t for _, t in _MODEL_REGISTRY}) + + +def _replace_in_graph(obj: Any, placeholder: str, value: str) -> None: + """Recursively replace every occurrence of *placeholder* with *value* in-place.""" + if isinstance(obj, dict): + for k, v in obj.items(): + if v == placeholder: + obj[k] = value + else: + _replace_in_graph(v, placeholder, value) + elif isinstance(obj, list): + for i, item in enumerate(obj): + if item == placeholder: + obj[i] = value + else: + _replace_in_graph(item, placeholder, value) + + +def download_models(base_dir: Path, model: str, task: str) -> None: + """Download model weights for *model*/*task* into *base_dir* using wget.""" + key = (model, task) + if key not in _MODEL_REGISTRY: + raise ValueError(f"No model files registered for {model}/{task}") + for rel_path, url in _MODEL_REGISTRY[key]["model_files"]: dest = base_dir / rel_path if dest.exists(): print(f"[setup] already exists, skipping: {dest}") @@ -328,13 +204,17 @@ def _generate_synthetic_images(input_dir: Path, num_images: int) -> list[str]: return filenames -def prepare_input_images(input_dir: Path, num_images: int = 20) -> list[str]: +def prepare_input_images( + input_dir: Path, + num_images: int = 20, + image_source: str = "vbench_i2v", +) -> list[str]: """ Prepare benchmark input images in *input_dir*. Priority: 1. Reuse any images already present in the directory. - 2. Download Vchitect/VBench_I2V dataset via huggingface_hub. + 2. Fetch from the source specified by *image_source* (e.g. "vbench_i2v"). 3. Generate synthetic 720×720 white PNG placeholders with Pillow. Returns a list of image basenames (not full paths). @@ -349,9 +229,10 @@ def prepare_input_images(input_dir: Path, num_images: int = 20) -> list[str]: print(f"[setup] found {len(existing)} existing images in {input_dir}") return existing - filenames = _try_download_vbench_i2v(input_dir) - if filenames: - return filenames + if image_source == "vbench_i2v": + filenames = _try_download_vbench_i2v(input_dir) + if filenames: + return filenames print(f"[setup] generating {num_images} synthetic 720×720 placeholder images ...") return _generate_synthetic_images(input_dir, num_images) @@ -359,57 +240,71 @@ def prepare_input_images(input_dir: Path, num_images: int = 20) -> list[str]: def generate_prompt_file( output_path: Path, + workflow_path: Path, image_filename: str, - positive_prompt: str | None = None, ) -> None: """ - Write a single Wan 2.2 I2V ComfyUI prompt JSON to *output_path*. + Write a single ComfyUI prompt JSON to *output_path* from *workflow_path*. - *image_filename* is substituted into the LoadImage node (node "97"). - *positive_prompt* overrides the default positive text if provided. + Replaces every occurrence of the sentinel string "__INPUT_IMAGE__" in the + workflow graph with *image_filename*. """ - graph: dict[str, Any] = json.loads(json.dumps(_WAN22_I2V_GRAPH)) - graph["97"]["inputs"]["image"] = image_filename - if positive_prompt is not None: - graph["130:107"]["inputs"]["text"] = positive_prompt - + graph: dict[str, Any] = json.loads(workflow_path.read_text()) + _replace_in_graph(graph, _IMAGE_PLACEHOLDER, image_filename) output_path.parent.mkdir(parents=True, exist_ok=True) output_path.write_text(json.dumps({"prompt": graph}, indent=2)) def generate_prompt_files( + model: str, + task: str, output_dir: Path, input_dir: Path, num_prompts: int = 50, num_images: int = 20, - download_models: bool = False, + download_model_weights: bool = False, comfyui_base_dir: Path | None = None, ) -> list[Path]: """ - Full Wan 2.2 I2V benchmark setup: + Full benchmark setup for a given *model*/*task*: 1. Optionally download model weights into *comfyui_base_dir*. - 2. Prepare input images in *input_dir* (VBench I2V or synthetic). + 2. Prepare input images in *input_dir*. 3. Generate *num_prompts* prompt JSON files in *output_dir*, cycling through the available images. Returns the list of generated prompt file paths. """ - if download_models: + key = (model, task) + if key not in _MODEL_REGISTRY: + available = ", ".join(f"{m}/{t}" for m, t in _MODEL_REGISTRY) + raise ValueError(f"Unknown --model {model!r} --task {task!r}. Available: {available}") + + cfg = _MODEL_REGISTRY[key] + + if download_model_weights: if comfyui_base_dir is None: raise ValueError("--comfyui-base-dir is required when --download-models is set") - download_wan22_models(comfyui_base_dir) + download_models(comfyui_base_dir, model, task) - image_filenames = prepare_input_images(input_dir, num_images=num_images) + image_filenames = prepare_input_images( + input_dir, + num_images=num_images, + image_source=cfg.get("image_source", "synthetic"), + ) if not image_filenames: raise RuntimeError(f"No input images available in {input_dir}") + workflow_path = _WORKFLOWS_DIR / cfg["workflow_file"] + if not workflow_path.exists(): + raise FileNotFoundError(f"Workflow file not found: {workflow_path}") + output_dir.mkdir(parents=True, exist_ok=True) generated: list[Path] = [] for i in range(num_prompts): image_name = image_filenames[i % len(image_filenames)] - prompt_path = output_dir / f"wan22_i2v_prompt_{i:04d}.json" - generate_prompt_file(prompt_path, image_name) + prompt_path = output_dir / f"{model}_{task}_prompt_{i:04d}.json" + generate_prompt_file(prompt_path, workflow_path, image_name) generated.append(prompt_path) print(f"[setup] generated {len(generated)} prompt files in {output_dir}") @@ -705,35 +600,47 @@ def parse_args() -> argparse.Namespace: "--prompt-file", type=Path, default=None, - help="Path to prompt JSON. Required unless --generate-wan22-prompts is set.", + help="Path to prompt JSON. Required unless --generate-prompts is set.", ) p.add_argument( - "--generate-wan22-prompts", + "--generate-prompts", action="store_true", - help="Generate Wan 2.2 I2V prompt files (steps: prepare images, write JSONs) then exit.", + help="Prepare input images and generate prompt JSON files, then exit.", ) p.add_argument( - "--wan22-input-dir", + "--model", + choices=_VALID_MODELS, + default=None, + help=f"Model to benchmark. Required with --generate-prompts. Choices: {_VALID_MODELS}.", + ) + p.add_argument( + "--task", + choices=_VALID_TASKS, + default=None, + help=f"Task type. Required with --generate-prompts. Choices: {_VALID_TASKS}.", + ) + p.add_argument( + "--input-dir", type=Path, default=Path("input"), - help="Directory for benchmark input images. Must be ComfyUI's input/ folder so LoadImage can find them (default: input/).", + help="ComfyUI input image directory (default: input/). LoadImage resolves files from this folder.", ) p.add_argument( - "--wan22-output-dir", + "--prompts-dir", type=Path, - default=Path("prompts/wan22_i2v"), - help="Directory where generated prompt JSON files are written (default: prompts/wan22_i2v/).", + default=None, + help="Directory where generated prompt JSON files are written (default: benchmarks/prompts/_/).", ) p.add_argument( - "--wan22-num-images", + "--num-images", type=int, default=20, - help="Number of synthetic images to generate when VBench download is unavailable (default: 20).", + help="Number of synthetic images to generate when dataset download is unavailable (default: 20).", ) p.add_argument( "--download-models", action="store_true", - help="Download Wan 2.2 model weights before generating prompts (requires --comfyui-base-dir).", + help="Download model weights before generating prompts (requires --comfyui-base-dir).", ) p.add_argument( "--comfyui-base-dir", @@ -762,7 +669,7 @@ def parse_args() -> argparse.Namespace: async def async_main(args: argparse.Namespace) -> None: if args.prompt_file is None: - raise SystemExit("error: --prompt-file is required (or use --generate-wan22-prompts to create one)") + raise SystemExit("error: --prompt-file is required (or use --generate-prompts to create one)") prompt_template = load_prompt_template(args.prompt_file) schedule = build_arrival_schedule( num_requests=args.num_requests, @@ -807,13 +714,18 @@ async def async_main(args: argparse.Namespace) -> None: def main() -> None: args = parse_args() - if args.generate_wan22_prompts: + if args.generate_prompts: + if not args.model or not args.task: + raise SystemExit("error: --model and --task are required with --generate-prompts") + prompts_dir = args.prompts_dir or Path("benchmarks/prompts") / f"{args.model}_{args.task}" generate_prompt_files( - output_dir=args.wan22_output_dir, - input_dir=args.wan22_input_dir, + model=args.model, + task=args.task, + output_dir=prompts_dir, + input_dir=args.input_dir, num_prompts=args.num_requests, - num_images=args.wan22_num_images, - download_models=args.download_models, + num_images=args.num_images, + download_model_weights=args.download_models, comfyui_base_dir=args.comfyui_base_dir, ) return From 52da6933b45a231af439c81828449aa3149bd7e1 Mon Sep 17 00:00:00 2001 From: Tara Ding <38710454+windtara0619@users.noreply.github.com> Date: Mon, 27 Apr 2026 14:01:06 -0700 Subject: [PATCH 08/27] Add workflow --- benchmarks/workflows/wan22_i2v.json | 154 ++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 benchmarks/workflows/wan22_i2v.json diff --git a/benchmarks/workflows/wan22_i2v.json b/benchmarks/workflows/wan22_i2v.json new file mode 100644 index 000000000..85a40956f --- /dev/null +++ b/benchmarks/workflows/wan22_i2v.json @@ -0,0 +1,154 @@ +{ + "97": { + "inputs": {"image": "__INPUT_IMAGE__"}, + "class_type": "LoadImage", + "_meta": {"title": "Start Frame Image"} + }, + "108": { + "inputs": { + "filename_prefix": "video/Wan2.2_image_to_video", + "format": "auto", + "codec": "auto", + "video-preview": "", + "video": ["130:117", 0] + }, + "class_type": "SaveVideo", + "_meta": {"title": "Save Video"} + }, + "130:105": { + "inputs": { + "clip_name": "umt5_xxl_fp8_e4m3fn_scaled.safetensors", + "type": "wan", + "device": "default" + }, + "class_type": "CLIPLoader", + "_meta": {"title": "Load CLIP"} + }, + "130:106": { + "inputs": {"vae_name": "wan_2.1_vae.safetensors"}, + "class_type": "VAELoader", + "_meta": {"title": "Load VAE"} + }, + "130:107": { + "inputs": { + "text": "A felt-style little eagle cashier greeting, waving, and smiling at the camera.", + "clip": ["130:105", 0] + }, + "class_type": "CLIPTextEncode", + "_meta": {"title": "CLIP Text Encode (Positive Prompt)"} + }, + "130:109": { + "inputs": {"shift": 5.000000000000001, "model": ["130:126", 0]}, + "class_type": "ModelSamplingSD3", + "_meta": {"title": "ModelSamplingSD3"} + }, + "130:110": { + "inputs": { + "add_noise": "enable", + "noise_seed": 636787045983965, + "steps": 4, + "cfg": 1, + "sampler_name": "euler", + "scheduler": "simple", + "start_at_step": 0, + "end_at_step": 2, + "return_with_leftover_noise": "enable", + "model": ["130:109", 0], + "positive": ["130:128", 0], + "negative": ["130:128", 1], + "latent_image": ["130:128", 2] + }, + "class_type": "KSamplerAdvanced", + "_meta": {"title": "KSampler (Advanced)"} + }, + "130:111": { + "inputs": { + "add_noise": "disable", + "noise_seed": 0, + "steps": 4, + "cfg": 1, + "sampler_name": "euler", + "scheduler": "simple", + "start_at_step": 2, + "end_at_step": 4, + "return_with_leftover_noise": "disable", + "model": ["130:124", 0], + "positive": ["130:128", 0], + "negative": ["130:128", 1], + "latent_image": ["130:110", 0] + }, + "class_type": "KSamplerAdvanced", + "_meta": {"title": "KSampler (Advanced)"} + }, + "130:117": { + "inputs": {"fps": 16, "images": ["130:129", 0]}, + "class_type": "CreateVideo", + "_meta": {"title": "Create Video"} + }, + "130:122": { + "inputs": { + "unet_name": "wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors", + "weight_dtype": "default" + }, + "class_type": "UNETLoader", + "_meta": {"title": "Load Diffusion Model"} + }, + "130:123": { + "inputs": { + "unet_name": "wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors", + "weight_dtype": "default" + }, + "class_type": "UNETLoader", + "_meta": {"title": "Load Diffusion Model"} + }, + "130:124": { + "inputs": {"shift": 5.000000000000001, "model": ["130:127", 0]}, + "class_type": "ModelSamplingSD3", + "_meta": {"title": "ModelSamplingSD3"} + }, + "130:125": { + "inputs": { + "text": "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走", + "clip": ["130:105", 0] + }, + "class_type": "CLIPTextEncode", + "_meta": {"title": "CLIP Text Encode (Negative Prompt)"} + }, + "130:126": { + "inputs": { + "lora_name": "wan2.2_i2v_lightx2v_4steps_lora_v1_high_noise.safetensors", + "strength_model": 1.0000000000000002, + "model": ["130:122", 0] + }, + "class_type": "LoraLoaderModelOnly", + "_meta": {"title": "Load LoRA"} + }, + "130:127": { + "inputs": { + "lora_name": "wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors", + "strength_model": 1.0000000000000002, + "model": ["130:123", 0] + }, + "class_type": "LoraLoaderModelOnly", + "_meta": {"title": "Load LoRA"} + }, + "130:128": { + "inputs": { + "width": 720, + "height": 720, + "length": 81, + "batch_size": 1, + "positive": ["130:107", 0], + "negative": ["130:125", 0], + "vae": ["130:106", 0], + "start_image": ["97", 0] + }, + "class_type": "WanImageToVideo", + "_meta": {"title": "WanImageToVideo"} + }, + "130:129": { + "inputs": {"samples": ["130:111", 0], "vae": ["130:106", 0]}, + "class_type": "VAEDecode", + "_meta": {"title": "VAE Decode"} + } +} From ff5e379cc27852965f41ea817cb61f326aada5f5 Mon Sep 17 00:00:00 2001 From: Tara Ding <38710454+windtara0619@users.noreply.github.com> Date: Mon, 27 Apr 2026 14:13:01 -0700 Subject: [PATCH 09/27] convert these two steps commands into one command: 1. check if downloading image or model is already there, if it is, skip. 2. remove prompt-file arg, when generating a new request, roundrobin the generated prompts --- benchmarks/benchmark_comfyui_serving.py | 108 +++++++++--------------- 1 file changed, 41 insertions(+), 67 deletions(-) diff --git a/benchmarks/benchmark_comfyui_serving.py b/benchmarks/benchmark_comfyui_serving.py index 2f9ca1c6e..1855912da 100644 --- a/benchmarks/benchmark_comfyui_serving.py +++ b/benchmarks/benchmark_comfyui_serving.py @@ -11,35 +11,27 @@ This script is designed to: Usage — Wan 2.2 I2V benchmark ============================== -Step 1 — Generate prompt files (downloads images, writes JSONs, then exits): +Images and prompt files are prepared automatically on first run and reused on +subsequent runs. Just specify --model and --task: - # Minimal: uses synthetic images, writes to prompts/wan22_i2v/ + # Minimal (synthetic images, default paths): python3 benchmarks/benchmark_comfyui_serving.py \\ - --generate-prompts --model wan22 --task i2v \\ - --num-requests 50 + --model wan22 --task i2v \\ + --num-requests 50 --max-concurrency 4 # With model download (needs ComfyUI root): python3 benchmarks/benchmark_comfyui_serving.py \\ - --generate-prompts --model wan22 --task i2v \\ + --model wan22 --task i2v \\ --download-models --comfyui-base-dir /path/to/ComfyUI \\ - --num-requests 50 + --num-requests 50 --max-concurrency 4 - # Custom image/output dirs (input-dir must be ComfyUI's input/ folder): + # Custom paths: python3 benchmarks/benchmark_comfyui_serving.py \\ - --generate-prompts --model wan22 --task i2v \\ + --model wan22 --task i2v \\ --input-dir /home/ubuntu/ComfyUI/input \\ --prompts-dir /home/ubuntu/ComfyUI/benchmarks/prompts/wan22_i2v \\ - --num-images 30 --num-requests 50 - -Step 2 — Run the benchmark (point at any one of the generated prompt files): - - python3 benchmarks/benchmark_comfyui_serving.py \\ - --prompt-file benchmarks/prompts/wan22_i2v/wan22_i2v_prompt_0000.json \\ - --num-requests 50 \\ - --max-concurrency 4 \\ + --num-images 30 --num-requests 50 --max-concurrency 4 \\ --host http://127.0.0.1:8188 - -The setup step also prints the exact run command at the end, so you can copy it directly. """ from __future__ import annotations @@ -260,7 +252,6 @@ def generate_prompt_files( task: str, output_dir: Path, input_dir: Path, - num_prompts: int = 50, num_images: int = 20, download_model_weights: bool = False, comfyui_base_dir: Path | None = None, @@ -269,11 +260,11 @@ def generate_prompt_files( Full benchmark setup for a given *model*/*task*: 1. Optionally download model weights into *comfyui_base_dir*. - 2. Prepare input images in *input_dir*. - 3. Generate *num_prompts* prompt JSON files in *output_dir*, cycling - through the available images. + 2. Prepare input images in *input_dir* (skipped if images already exist). + 3. Generate one prompt JSON per input image in *output_dir* + (skipped if prompt files already exist). - Returns the list of generated prompt file paths. + Returns the list of prompt file paths. """ key = (model, task) if key not in _MODEL_REGISTRY: @@ -287,6 +278,12 @@ def generate_prompt_files( raise ValueError("--comfyui-base-dir is required when --download-models is set") download_models(comfyui_base_dir, model, task) + # Skip prompt generation if files already exist. + existing = sorted(output_dir.glob(f"{model}_{task}_prompt_*.json")) + if existing: + print(f"[setup] found {len(existing)} existing prompt files in {output_dir}, skipping generation") + return existing + image_filenames = prepare_input_images( input_dir, num_images=num_images, @@ -301,19 +298,12 @@ def generate_prompt_files( output_dir.mkdir(parents=True, exist_ok=True) generated: list[Path] = [] - for i in range(num_prompts): - image_name = image_filenames[i % len(image_filenames)] + for i, image_name in enumerate(image_filenames): prompt_path = output_dir / f"{model}_{task}_prompt_{i:04d}.json" generate_prompt_file(prompt_path, workflow_path, image_name) generated.append(prompt_path) print(f"[setup] generated {len(generated)} prompt files in {output_dir}") - print(f"[setup] example run:") - print( - f" python benchmark_comfyui_serving.py" - f" --prompt-file {generated[0]}" - f" --num-requests {num_prompts}" - ) return generated @@ -489,7 +479,7 @@ async def run_request( semaphore: asyncio.Semaphore, session: aiohttp.ClientSession, args: argparse.Namespace, - prompt_wrapper_template: dict[str, Any], + prompt_templates: list[dict[str, Any]], ) -> RequestResult: await asyncio.sleep(max(0.0, (start_time + scheduled_offset_s) - time.perf_counter())) queued_at = time.perf_counter() @@ -498,7 +488,7 @@ async def run_request( started_at = time.perf_counter() prompt_id = None try: - payload = json.loads(json.dumps(prompt_wrapper_template)) + payload = json.loads(json.dumps(prompt_templates[idx % len(prompt_templates)])) payload.setdefault("extra_data", {}) payload["client_id"] = args.client_id @@ -596,28 +586,17 @@ def parse_args() -> argparse.Namespace: choices=("/prompt", "/bench/prompt"), help="Submission endpoint.", ) - p.add_argument( - "--prompt-file", - type=Path, - default=None, - help="Path to prompt JSON. Required unless --generate-prompts is set.", - ) - p.add_argument( - "--generate-prompts", - action="store_true", - help="Prepare input images and generate prompt JSON files, then exit.", - ) p.add_argument( "--model", choices=_VALID_MODELS, - default=None, - help=f"Model to benchmark. Required with --generate-prompts. Choices: {_VALID_MODELS}.", + required=True, + help=f"Model to benchmark. Choices: {_VALID_MODELS}.", ) p.add_argument( "--task", choices=_VALID_TASKS, - default=None, - help=f"Task type. Required with --generate-prompts. Choices: {_VALID_TASKS}.", + required=True, + help=f"Task type. Choices: {_VALID_TASKS}.", ) p.add_argument( "--input-dir", @@ -668,9 +647,19 @@ def parse_args() -> argparse.Namespace: async def async_main(args: argparse.Namespace) -> None: - if args.prompt_file is None: - raise SystemExit("error: --prompt-file is required (or use --generate-prompts to create one)") - prompt_template = load_prompt_template(args.prompt_file) + prompts_dir = args.prompts_dir or Path("benchmarks/prompts") / f"{args.model}_{args.task}" + prompt_paths = generate_prompt_files( + model=args.model, + task=args.task, + output_dir=prompts_dir, + input_dir=args.input_dir, + num_images=args.num_images, + download_model_weights=args.download_models, + comfyui_base_dir=args.comfyui_base_dir, + ) + prompt_templates = [load_prompt_template(p) for p in prompt_paths] + print(f"[bench] loaded {len(prompt_templates)} prompt templates, round-robining over {args.num_requests} requests") + schedule = build_arrival_schedule( num_requests=args.num_requests, request_rate=args.request_rate, @@ -691,7 +680,7 @@ async def async_main(args: argparse.Namespace) -> None: semaphore=semaphore, session=session, args=args, - prompt_wrapper_template=prompt_template, + prompt_templates=prompt_templates, ) ) for i in range(args.num_requests) @@ -714,21 +703,6 @@ async def async_main(args: argparse.Namespace) -> None: def main() -> None: args = parse_args() - if args.generate_prompts: - if not args.model or not args.task: - raise SystemExit("error: --model and --task are required with --generate-prompts") - prompts_dir = args.prompts_dir or Path("benchmarks/prompts") / f"{args.model}_{args.task}" - generate_prompt_files( - model=args.model, - task=args.task, - output_dir=prompts_dir, - input_dir=args.input_dir, - num_prompts=args.num_requests, - num_images=args.num_images, - download_model_weights=args.download_models, - comfyui_base_dir=args.comfyui_base_dir, - ) - return asyncio.run(async_main(args)) From d407d823500b327d56a8d59c499ae4fb5e9166ab Mon Sep 17 00:00:00 2001 From: Tara Ding <38710454+windtara0619@users.noreply.github.com> Date: Mon, 27 Apr 2026 14:20:32 -0700 Subject: [PATCH 10/27] Fix comments --- benchmarks/benchmark_comfyui_serving.py | 35 +++++++++---------------- 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/benchmarks/benchmark_comfyui_serving.py b/benchmarks/benchmark_comfyui_serving.py index 1855912da..2629833e2 100644 --- a/benchmarks/benchmark_comfyui_serving.py +++ b/benchmarks/benchmark_comfyui_serving.py @@ -1,36 +1,27 @@ #!/usr/bin/env python3 """ -Simple serving benchmark client for ComfyUI's HTTP API. +ComfyUI model serving benchmark. -This script is designed to: - - submit prompts to ComfyUI (/prompt or /bench/prompt), - - optionally shape request arrivals (fixed rate or Poisson), - - poll completion via /history/{prompt_id}, - - report latency/throughput/error metrics. +Submits prompts concurrently to a running ComfyUI server and reports +latency/throughput metrics. Input images and prompt files are prepared +automatically (and cached for reuse) before the benchmark starts. -Usage — Wan 2.2 I2V benchmark -============================== +Supported models / tasks +------------------------ + wan22 / i2v — Wan 2.2 Image-to-Video (LightX2V 4-step, 720×720, 81 frames) -Images and prompt files are prepared automatically on first run and reused on -subsequent runs. Just specify --model and --task: - - # Minimal (synthetic images, default paths): +Usage +----- python3 benchmarks/benchmark_comfyui_serving.py \\ --model wan22 --task i2v \\ - --num-requests 50 --max-concurrency 4 + --num-requests 50 --max-concurrency 4 \\ + --host http://127.0.0.1:8188 - # With model download (needs ComfyUI root): + # Also download model weights (run from ComfyUI root): python3 benchmarks/benchmark_comfyui_serving.py \\ --model wan22 --task i2v \\ --download-models --comfyui-base-dir /path/to/ComfyUI \\ - --num-requests 50 --max-concurrency 4 - - # Custom paths: - python3 benchmarks/benchmark_comfyui_serving.py \\ - --model wan22 --task i2v \\ - --input-dir /home/ubuntu/ComfyUI/input \\ - --prompts-dir /home/ubuntu/ComfyUI/benchmarks/prompts/wan22_i2v \\ - --num-images 30 --num-requests 50 --max-concurrency 4 \\ + --num-requests 50 --max-concurrency 4 \\ --host http://127.0.0.1:8188 """ From 125ed0be4b97df39bfcdc7c267baa9ad31f82a27 Mon Sep 17 00:00:00 2001 From: Tara Ding <38710454+windtara0619@users.noreply.github.com> Date: Mon, 27 Apr 2026 14:22:27 -0700 Subject: [PATCH 11/27] Fix comments --- benchmarks/benchmark_comfyui_serving.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/benchmarks/benchmark_comfyui_serving.py b/benchmarks/benchmark_comfyui_serving.py index 2629833e2..2236230bd 100644 --- a/benchmarks/benchmark_comfyui_serving.py +++ b/benchmarks/benchmark_comfyui_serving.py @@ -6,6 +6,15 @@ Submits prompts concurrently to a running ComfyUI server and reports latency/throughput metrics. Input images and prompt files are prepared automatically (and cached for reuse) before the benchmark starts. +On first run the script will: + 1. Download model weights (if --download-models is set). + 2. Download the VBench I2V image dataset (requires: pip install gdown), + or generate synthetic placeholder images as a fallback. + 3. Write one prompt JSON per input image under benchmarks/prompts/_/. + +On subsequent runs all three steps are skipped if the files already exist. +Requests are distributed across prompt files in round-robin order. + Supported models / tasks ------------------------ wan22 / i2v — Wan 2.2 Image-to-Video (LightX2V 4-step, 720×720, 81 frames) From 08411a1d65a6e42db5611984089c71d8c654c83e Mon Sep 17 00:00:00 2001 From: Tara Ding <38710454+windtara0619@users.noreply.github.com> Date: Mon, 27 Apr 2026 14:28:46 -0700 Subject: [PATCH 12/27] Fix input dir --- benchmarks/benchmark_comfyui_serving.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/benchmarks/benchmark_comfyui_serving.py b/benchmarks/benchmark_comfyui_serving.py index 2236230bd..91c81bcf3 100644 --- a/benchmarks/benchmark_comfyui_serving.py +++ b/benchmarks/benchmark_comfyui_serving.py @@ -598,12 +598,6 @@ def parse_args() -> argparse.Namespace: required=True, help=f"Task type. Choices: {_VALID_TASKS}.", ) - p.add_argument( - "--input-dir", - type=Path, - default=Path("input"), - help="ComfyUI input image directory (default: input/). LoadImage resolves files from this folder.", - ) p.add_argument( "--prompts-dir", type=Path, @@ -652,7 +646,7 @@ async def async_main(args: argparse.Namespace) -> None: model=args.model, task=args.task, output_dir=prompts_dir, - input_dir=args.input_dir, + input_dir=Path("input"), num_images=args.num_images, download_model_weights=args.download_models, comfyui_base_dir=args.comfyui_base_dir, From 09f03107c2740250181f66ca329e37ecce887c44 Mon Sep 17 00:00:00 2001 From: Tara Ding <38710454+windtara0619@users.noreply.github.com> Date: Mon, 27 Apr 2026 14:34:31 -0700 Subject: [PATCH 13/27] remove checking existing png under input since example.png is always under input folder --- benchmarks/benchmark_comfyui_serving.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/benchmarks/benchmark_comfyui_serving.py b/benchmarks/benchmark_comfyui_serving.py index 91c81bcf3..119527cd1 100644 --- a/benchmarks/benchmark_comfyui_serving.py +++ b/benchmarks/benchmark_comfyui_serving.py @@ -212,14 +212,6 @@ def prepare_input_images( Returns a list of image basenames (not full paths). """ input_dir.mkdir(parents=True, exist_ok=True) - image_exts = {".png", ".jpg", ".jpeg", ".webp"} - - existing = sorted( - p.name for p in input_dir.iterdir() if p.suffix.lower() in image_exts - ) - if existing: - print(f"[setup] found {len(existing)} existing images in {input_dir}") - return existing if image_source == "vbench_i2v": filenames = _try_download_vbench_i2v(input_dir) From 512deb3cd6d6e914d7deb08ee493f35fd1788165 Mon Sep 17 00:00:00 2001 From: Tara Ding <38710454+windtara0619@users.noreply.github.com> Date: Mon, 27 Apr 2026 15:08:49 -0700 Subject: [PATCH 14/27] Fix returned vbench image filenames --- benchmarks/benchmark_comfyui_serving.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/benchmarks/benchmark_comfyui_serving.py b/benchmarks/benchmark_comfyui_serving.py index 119527cd1..418371ffe 100644 --- a/benchmarks/benchmark_comfyui_serving.py +++ b/benchmarks/benchmark_comfyui_serving.py @@ -170,7 +170,9 @@ def _try_download_vbench_i2v(input_dir: Path) -> list[str]: image_exts = {".png", ".jpg", ".jpeg", ".webp"} filenames = sorted( - p.name for p in input_dir.rglob("*") if p.suffix.lower() in image_exts + p.relative_to(input_dir).as_posix() + for p in input_dir.rglob("*") + if p.suffix.lower() in image_exts ) print(f"[setup] prepared {len(filenames)} VBench I2V images in {input_dir}") return filenames From ca56e224a035d1b4934b9507a31642b201bce201 Mon Sep 17 00:00:00 2001 From: Tara Ding <38710454+windtara0619@users.noreply.github.com> Date: Mon, 27 Apr 2026 16:50:40 -0700 Subject: [PATCH 15/27] Moving collecting summary to benchmark_comfyui_serving --- benchmarks/benchmark_comfyui_serving.py | 23 ++------- main.py | 22 ++++----- server.py | 66 ------------------------- 3 files changed, 16 insertions(+), 95 deletions(-) diff --git a/benchmarks/benchmark_comfyui_serving.py b/benchmarks/benchmark_comfyui_serving.py index 418371ffe..2859d2e96 100644 --- a/benchmarks/benchmark_comfyui_serving.py +++ b/benchmarks/benchmark_comfyui_serving.py @@ -396,7 +396,7 @@ async def wait_for_prompt_done( timeout_s: float, ) -> tuple[float | None, float | None]: """ - Returns (queue_wait_ms, execution_ms) when available from history status messages. + Returns (queue_wait_ms, execution_ms) from history_item["benchmark"] written by the server. Falls back to (None, None) if unavailable. """ deadline = time.perf_counter() + timeout_s @@ -419,26 +419,13 @@ async def wait_for_prompt_done( continue status = history_item.get("status", {}) - status_str = status.get("status_str") - messages = status.get("messages", []) - if status_str not in ("success", "error"): + if status.get("status_str") not in ("success", "error"): await asyncio.sleep(poll_interval_s) continue - queue_wait_ms = None - execution_ms = None - try: - timestamp_map: dict[str, int] = {} - for event, msg in messages: - if isinstance(msg, dict) and "timestamp" in msg: - timestamp_map[event] = int(msg["timestamp"]) - start_ts = timestamp_map.get("execution_start") - end_ts = timestamp_map.get("execution_success") or timestamp_map.get("execution_error") - if start_ts is not None and end_ts is not None: - execution_ms = max(0.0, end_ts - start_ts) - except Exception: - execution_ms = None - + benchmark = history_item.get("benchmark", {}) + queue_wait_ms = benchmark.get("queue_wait_ms") + execution_ms = benchmark.get("execution_ms") return queue_wait_ms, execution_ms await asyncio.sleep(poll_interval_s) diff --git a/main.py b/main.py index 5013bac42..77b0058a5 100644 --- a/main.py +++ b/main.py @@ -293,7 +293,6 @@ def prompt_worker(q, server_instance): gc_collect_interval = 10.0 while True: - benchmark_mode = args.benchmark_server_only timeout = 1000.0 if need_gc: timeout = max(gc_collect_interval - (current_time - last_gc_collect), 0.0) @@ -310,18 +309,28 @@ def prompt_worker(q, server_instance): extra_data = item[3].copy() for k in sensitive: extra_data[k] = sensitive[k] - benchmark_mode = args.benchmark_server_only or extra_data.get("benchmark_server_only", False) + benchmark_mode = args.benchmark_server_only if not benchmark_mode: asset_seeder.pause() e.execute(item[2], prompt_id, extra_data, item[4]) + execution_time_s = time.perf_counter() - execution_start_time need_gc = True + queue_wait_ms = 0.0 + created_at = extra_data.get("create_time") + if isinstance(created_at, int): + queue_wait_ms = max(0.0, execution_start_wall_ms - created_at) + remove_sensitive = lambda prompt: prompt[:5] + prompt[6:] history_result = e.history_result if benchmark_mode: history_result = {"outputs": {}, "meta": {}} + history_result["benchmark"] = { + "execution_ms": execution_time_s * 1000.0, + "queue_wait_ms": queue_wait_ms, + } q.task_done(item_id, history_result, @@ -333,7 +342,6 @@ def prompt_worker(q, server_instance): server_instance.send_sync("executing", {"node": None, "prompt_id": prompt_id}, server_instance.client_id) current_time = time.perf_counter() - execution_time_s = current_time - execution_start_time # Log Time in a more readable way after 10 minutes if execution_time_s > 600: @@ -342,14 +350,6 @@ def prompt_worker(q, server_instance): else: logging.info("Prompt executed in {:.2f} seconds".format(execution_time_s)) - queue_wait_ms = 0.0 - created_at = extra_data.get("create_time") - if isinstance(created_at, int): - queue_wait_ms = max(0.0, execution_start_wall_ms - created_at) - - if benchmark_mode: - server_instance.record_benchmark_result(prompt_id, e.success, execution_time_s * 1000.0, queue_wait_ms) - if not benchmark_mode and not asset_seeder.is_disabled(): paths = _collect_output_absolute_paths(e.history_result) register_output_files(paths, job_id=prompt_id) diff --git a/server.py b/server.py index 5db448b7f..ff15904f6 100644 --- a/server.py +++ b/server.py @@ -16,7 +16,6 @@ import struct import ssl import socket import ipaddress -import threading from PIL import Image, ImageOps from PIL.PngImagePlugin import PngInfo from io import BytesIO @@ -253,17 +252,6 @@ class PromptServer(): self.client_id = None self.on_prompt_handlers = [] - self._benchmark_lock = threading.Lock() - self._benchmark_stats = { - "requests_total": 0, - "requests_success": 0, - "requests_error": 0, - "latency_ms_total": 0.0, - "latency_ms_max": 0.0, - "queue_wait_ms_total": 0.0, - "queue_wait_ms_max": 0.0, - "last_prompt_id": None, - } @routes.get('/ws') async def websocket_handler(request): @@ -924,16 +912,6 @@ class PromptServer(): queue_info['queue_pending'] = _remove_sensitive_from_queue(current_queue[1]) return web.json_response(queue_info) - @routes.get("/bench/stats") - async def get_bench_stats(request): - stats = self.get_benchmark_stats() - return web.json_response(stats) - - @routes.post("/bench/reset") - async def reset_bench_stats(request): - self.reset_benchmark_stats() - return web.json_response({"status": "ok"}) - async def enqueue_prompt(json_data): if "number" in json_data: number = float(json_data['number']) @@ -984,15 +962,6 @@ class PromptServer(): } return web.json_response({"error": error, "node_errors": {}}, status=400) - @routes.post("/bench/prompt") - async def post_bench_prompt(request): - json_data = await request.json() - json_data = self.trigger_on_prompt(json_data) - extra_data = json_data.setdefault("extra_data", {}) - extra_data["benchmark_server_only"] = True - extra_data.setdefault("preview_method", "none") - return await enqueue_prompt(json_data) - @routes.post("/prompt") async def post_prompt(request): logging.info("got prompt") @@ -1144,41 +1113,6 @@ class PromptServer(): prompt_info['exec_info'] = exec_info return prompt_info - def reset_benchmark_stats(self): - with self._benchmark_lock: - self._benchmark_stats = { - "requests_total": 0, - "requests_success": 0, - "requests_error": 0, - "latency_ms_total": 0.0, - "latency_ms_max": 0.0, - "queue_wait_ms_total": 0.0, - "queue_wait_ms_max": 0.0, - "last_prompt_id": None, - } - - def record_benchmark_result(self, prompt_id, success, latency_ms, queue_wait_ms=0.0): - with self._benchmark_lock: - self._benchmark_stats["requests_total"] += 1 - if success: - self._benchmark_stats["requests_success"] += 1 - else: - self._benchmark_stats["requests_error"] += 1 - self._benchmark_stats["latency_ms_total"] += max(0.0, latency_ms) - self._benchmark_stats["queue_wait_ms_total"] += max(0.0, queue_wait_ms) - self._benchmark_stats["latency_ms_max"] = max(self._benchmark_stats["latency_ms_max"], max(0.0, latency_ms)) - self._benchmark_stats["queue_wait_ms_max"] = max(self._benchmark_stats["queue_wait_ms_max"], max(0.0, queue_wait_ms)) - self._benchmark_stats["last_prompt_id"] = prompt_id - - def get_benchmark_stats(self): - with self._benchmark_lock: - stats = dict(self._benchmark_stats) - - total = stats["requests_total"] - stats["latency_ms_avg"] = (stats["latency_ms_total"] / total) if total > 0 else 0.0 - stats["queue_wait_ms_avg"] = (stats["queue_wait_ms_total"] / total) if total > 0 else 0.0 - return stats - async def send(self, event, data, sid=None): if event == BinaryEventTypes.UNENCODED_PREVIEW_IMAGE: await self.send_image(data, sid=sid) From 059b346966c6905cf3f59c8a606eca41086ded6f Mon Sep 17 00:00:00 2001 From: Tara Ding <38710454+windtara0619@users.noreply.github.com> Date: Mon, 27 Apr 2026 16:57:16 -0700 Subject: [PATCH 16/27] fix server.py --- server.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/server.py b/server.py index ff15904f6..881da8e66 100644 --- a/server.py +++ b/server.py @@ -912,7 +912,12 @@ class PromptServer(): queue_info['queue_pending'] = _remove_sensitive_from_queue(current_queue[1]) return web.json_response(queue_info) - async def enqueue_prompt(json_data): + @routes.post("/prompt") + async def post_prompt(request): + logging.info("got prompt") + json_data = await request.json() + json_data = self.trigger_on_prompt(json_data) + if "number" in json_data: number = float(json_data['number']) else: @@ -962,13 +967,6 @@ class PromptServer(): } return web.json_response({"error": error, "node_errors": {}}, status=400) - @routes.post("/prompt") - async def post_prompt(request): - logging.info("got prompt") - json_data = await request.json() - json_data = self.trigger_on_prompt(json_data) - return await enqueue_prompt(json_data) - @routes.post("/queue") async def post_queue(request): json_data = await request.json() From 69f6272edcf0aafc34f04f9e84dac9e63ef88ab7 Mon Sep 17 00:00:00 2001 From: Tara Ding <38710454+windtara0619@users.noreply.github.com> Date: Mon, 27 Apr 2026 21:39:57 -0700 Subject: [PATCH 17/27] Add benchmark for each node. --- benchmarks/benchmark_comfyui_serving.py | 32 +++++++++++++++++++------ execution.py | 8 +++++++ main.py | 14 +++++++---- 3 files changed, 42 insertions(+), 12 deletions(-) diff --git a/benchmarks/benchmark_comfyui_serving.py b/benchmarks/benchmark_comfyui_serving.py index 2859d2e96..cccc85df7 100644 --- a/benchmarks/benchmark_comfyui_serving.py +++ b/benchmarks/benchmark_comfyui_serving.py @@ -316,6 +316,7 @@ class RequestResult: end_to_end_s: float queue_wait_ms: float | None execution_ms: float | None + node_timing_ms: dict[str, dict] | None def percentile(values: list[float], pct: float) -> float: @@ -394,10 +395,10 @@ async def wait_for_prompt_done( prompt_id: str, poll_interval_s: float, timeout_s: float, -) -> tuple[float | None, float | None]: +) -> tuple[float | None, float | None, dict | None]: """ - Returns (queue_wait_ms, execution_ms) from history_item["benchmark"] written by the server. - Falls back to (None, None) if unavailable. + Returns (queue_wait_ms, execution_ms, node_timing_ms) from history_item["benchmark"]. + Falls back to (None, None, None) if unavailable. """ deadline = time.perf_counter() + timeout_s history_url = f"{base_url}/history/{prompt_id}" @@ -424,9 +425,11 @@ async def wait_for_prompt_done( continue benchmark = history_item.get("benchmark", {}) - queue_wait_ms = benchmark.get("queue_wait_ms") - execution_ms = benchmark.get("execution_ms") - return queue_wait_ms, execution_ms + return ( + benchmark.get("queue_wait_ms"), + benchmark.get("execution_ms"), + benchmark.get("nodes"), + ) await asyncio.sleep(poll_interval_s) @@ -484,7 +487,7 @@ async def run_request( timeout_s=args.request_timeout_s, ) - queue_wait_ms, execution_ms = await wait_for_prompt_done( + queue_wait_ms, execution_ms, node_timing_ms = await wait_for_prompt_done( session=session, base_url=args.host, prompt_id=prompt_id, @@ -503,6 +506,7 @@ async def run_request( end_to_end_s=finished_at - queued_at, queue_wait_ms=queue_wait_ms, execution_ms=execution_ms, + node_timing_ms=node_timing_ms, ) except Exception as exc: finished_at = time.perf_counter() @@ -517,6 +521,7 @@ async def run_request( end_to_end_s=finished_at - queued_at, queue_wait_ms=None, execution_ms=None, + node_timing_ms=None, ) @@ -551,6 +556,19 @@ def print_summary(results: list[RequestResult], wall_s: float) -> None: print(f"execution_mean_ms: {statistics.mean(exec_ms):.2f}") print(f"execution_p95_ms: {percentile(exec_ms, 95):.2f}") + # Per-node timing: aggregate execution_ms across all successful results. + node_totals: dict[str, list[float]] = {} + for r in success: + if not r.node_timing_ms: + continue + for node_id, info in r.node_timing_ms.items(): + key = f"{info.get('class_type', 'unknown')} ({node_id})" + node_totals.setdefault(key, []).append(info.get("execution_ms", 0.0)) + if node_totals: + print("\n--- Per-node execution time (mean ms across successful requests) ---") + for key, times in sorted(node_totals.items(), key=lambda x: -statistics.mean(x[1])): + print(f" {key}: mean={statistics.mean(times):.1f} p95={percentile(times, 95):.1f} n={len(times)}") + if fail: print("\nSample failures:") for r in fail[:5]: diff --git a/execution.py b/execution.py index e15eb4bda..777ecda77 100644 --- a/execution.py +++ b/execution.py @@ -721,6 +721,7 @@ class PromptExecutor: self.server.client_id = None self.status_messages = [] + self.node_timing_ms: dict[str, dict] = {} self.add_message("execution_start", { "prompt_id": prompt_id}, broadcast=False) self._notify_prompt_lifecycle("start", prompt_id) @@ -767,6 +768,7 @@ class PromptExecutor: break assert node_id is not None, "Node ID should not be None at this point" + node_start_s = time.perf_counter() if args.benchmark_server_only else None result, error, ex = await execute(self.server, dynamic_prompt, self.caches, node_id, extra_data, executed, prompt_id, execution_list, pending_subgraph_results, pending_async_nodes, ui_node_outputs) self.success = result != ExecutionResult.FAILURE if result == ExecutionResult.FAILURE: @@ -776,6 +778,12 @@ class PromptExecutor: execution_list.unstage_node_execution() else: # result == ExecutionResult.SUCCESS: execution_list.complete_node_execution() + if node_start_s is not None: + class_type = dynamic_prompt.get_node(node_id).get("class_type", "unknown") + self.node_timing_ms[node_id] = { + "class_type": class_type, + "execution_ms": (time.perf_counter() - node_start_s) * 1000.0, + } if self.cache_type == CacheType.RAM_PRESSURE: comfy.model_management.free_memory(0, None, pins_required=ram_headroom, ram_required=ram_headroom) diff --git a/main.py b/main.py index 77b0058a5..ac15ad30a 100644 --- a/main.py +++ b/main.py @@ -326,11 +326,15 @@ def prompt_worker(q, server_instance): remove_sensitive = lambda prompt: prompt[:5] + prompt[6:] history_result = e.history_result if benchmark_mode: - history_result = {"outputs": {}, "meta": {}} - history_result["benchmark"] = { - "execution_ms": execution_time_s * 1000.0, - "queue_wait_ms": queue_wait_ms, - } + history_result = { + "outputs": {}, + "meta": {}, + "benchmark": { + "execution_ms": execution_time_s * 1000.0, + "queue_wait_ms": queue_wait_ms, + "nodes": e.node_timing_ms, + }, + } q.task_done(item_id, history_result, From 6251350cf4b709eb984f0f8e292e11a10f9445f7 Mon Sep 17 00:00:00 2001 From: Tara Ding <38710454+windtara0619@users.noreply.github.com> Date: Mon, 27 Apr 2026 22:03:52 -0700 Subject: [PATCH 18/27] Remove queue_time --- benchmarks/benchmark_comfyui_serving.py | 17 ++++------------- main.py | 7 ------- 2 files changed, 4 insertions(+), 20 deletions(-) diff --git a/benchmarks/benchmark_comfyui_serving.py b/benchmarks/benchmark_comfyui_serving.py index cccc85df7..06979cdc5 100644 --- a/benchmarks/benchmark_comfyui_serving.py +++ b/benchmarks/benchmark_comfyui_serving.py @@ -314,7 +314,6 @@ class RequestResult: started_at: float finished_at: float end_to_end_s: float - queue_wait_ms: float | None execution_ms: float | None node_timing_ms: dict[str, dict] | None @@ -395,10 +394,10 @@ async def wait_for_prompt_done( prompt_id: str, poll_interval_s: float, timeout_s: float, -) -> tuple[float | None, float | None, dict | None]: +) -> tuple[float | None, dict | None]: """ - Returns (queue_wait_ms, execution_ms, node_timing_ms) from history_item["benchmark"]. - Falls back to (None, None, None) if unavailable. + Returns (execution_ms, node_timing_ms) from history_item["benchmark"]. + Falls back to (None, None) if unavailable. """ deadline = time.perf_counter() + timeout_s history_url = f"{base_url}/history/{prompt_id}" @@ -426,7 +425,6 @@ async def wait_for_prompt_done( benchmark = history_item.get("benchmark", {}) return ( - benchmark.get("queue_wait_ms"), benchmark.get("execution_ms"), benchmark.get("nodes"), ) @@ -487,7 +485,7 @@ async def run_request( timeout_s=args.request_timeout_s, ) - queue_wait_ms, execution_ms, node_timing_ms = await wait_for_prompt_done( + execution_ms, node_timing_ms = await wait_for_prompt_done( session=session, base_url=args.host, prompt_id=prompt_id, @@ -504,7 +502,6 @@ async def run_request( started_at=started_at, finished_at=finished_at, end_to_end_s=finished_at - queued_at, - queue_wait_ms=queue_wait_ms, execution_ms=execution_ms, node_timing_ms=node_timing_ms, ) @@ -519,7 +516,6 @@ async def run_request( started_at=started_at, finished_at=finished_at, end_to_end_s=finished_at - queued_at, - queue_wait_ms=None, execution_ms=None, node_timing_ms=None, ) @@ -529,7 +525,6 @@ def print_summary(results: list[RequestResult], wall_s: float) -> None: success = [r for r in results if r.ok] fail = [r for r in results if not r.ok] lat_s = [r.end_to_end_s for r in success] - queue_wait_ms = [r.queue_wait_ms for r in success if r.queue_wait_ms is not None] exec_ms = [r.execution_ms for r in success if r.execution_ms is not None] throughput = (len(success) / wall_s) if wall_s > 0 else 0.0 @@ -548,10 +543,6 @@ def print_summary(results: list[RequestResult], wall_s: float) -> None: print(f"latency_mean_s: {statistics.mean(lat_s):.3f}") print(f"latency_max_s: {max(lat_s):.3f}") - if queue_wait_ms: - print(f"queue_wait_mean_ms: {statistics.mean(queue_wait_ms):.2f}") - print(f"queue_wait_p95_ms: {percentile(queue_wait_ms, 95):.2f}") - if exec_ms: print(f"execution_mean_ms: {statistics.mean(exec_ms):.2f}") print(f"execution_p95_ms: {percentile(exec_ms, 95):.2f}") diff --git a/main.py b/main.py index ac15ad30a..110dc34bf 100644 --- a/main.py +++ b/main.py @@ -301,7 +301,6 @@ def prompt_worker(q, server_instance): if queue_item is not None: item, item_id = queue_item execution_start_time = time.perf_counter() - execution_start_wall_ms = int(time.time() * 1000) prompt_id = item[1] server_instance.last_prompt_id = prompt_id @@ -318,11 +317,6 @@ def prompt_worker(q, server_instance): need_gc = True - queue_wait_ms = 0.0 - created_at = extra_data.get("create_time") - if isinstance(created_at, int): - queue_wait_ms = max(0.0, execution_start_wall_ms - created_at) - remove_sensitive = lambda prompt: prompt[:5] + prompt[6:] history_result = e.history_result if benchmark_mode: @@ -331,7 +325,6 @@ def prompt_worker(q, server_instance): "meta": {}, "benchmark": { "execution_ms": execution_time_s * 1000.0, - "queue_wait_ms": queue_wait_ms, "nodes": e.node_timing_ms, }, } From 139d4a7e862c786c4af235c6e9291a3bc4ecfb74 Mon Sep 17 00:00:00 2001 From: Tara Ding <38710454+windtara0619@users.noreply.github.com> Date: Mon, 27 Apr 2026 22:07:53 -0700 Subject: [PATCH 19/27] fix format --- main.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/main.py b/main.py index 110dc34bf..a8652ad6b 100644 --- a/main.py +++ b/main.py @@ -313,7 +313,6 @@ def prompt_worker(q, server_instance): if not benchmark_mode: asset_seeder.pause() e.execute(item[2], prompt_id, extra_data, item[4]) - execution_time_s = time.perf_counter() - execution_start_time need_gc = True @@ -323,10 +322,6 @@ def prompt_worker(q, server_instance): history_result = { "outputs": {}, "meta": {}, - "benchmark": { - "execution_ms": execution_time_s * 1000.0, - "nodes": e.node_timing_ms, - }, } q.task_done(item_id, @@ -339,13 +334,20 @@ def prompt_worker(q, server_instance): server_instance.send_sync("executing", {"node": None, "prompt_id": prompt_id}, server_instance.client_id) current_time = time.perf_counter() + execution_time = current_time - execution_start_time # Log Time in a more readable way after 10 minutes - if execution_time_s > 600: - execution_time_formatted = time.strftime("%H:%M:%S", time.gmtime(execution_time_s)) + if execution_time > 600: + execution_time_formatted = time.strftime("%H:%M:%S", time.gmtime(execution_time)) logging.info(f"Prompt executed in {execution_time_formatted}") else: - logging.info("Prompt executed in {:.2f} seconds".format(execution_time_s)) + logging.info("Prompt executed in {:.2f} seconds".format(execution_time)) + + if benchmark_mode: + history_result["benchmark"] = { + "execution_ms": execution_time * 1000.0, + "nodes": e.node_timing_ms, + } if not benchmark_mode and not asset_seeder.is_disabled(): paths = _collect_output_absolute_paths(e.history_result) From 79825dbd320d61fa8f19435753f884f0266ae918 Mon Sep 17 00:00:00 2001 From: Tara Ding <38710454+windtara0619@users.noreply.github.com> Date: Mon, 27 Apr 2026 22:11:47 -0700 Subject: [PATCH 20/27] Fix format --- main.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/main.py b/main.py index a8652ad6b..07c0dd359 100644 --- a/main.py +++ b/main.py @@ -317,15 +317,8 @@ def prompt_worker(q, server_instance): need_gc = True remove_sensitive = lambda prompt: prompt[:5] + prompt[6:] - history_result = e.history_result - if benchmark_mode: - history_result = { - "outputs": {}, - "meta": {}, - } - q.task_done(item_id, - history_result, + e.history_result, status=execution.PromptQueue.ExecutionStatus( status_str='success' if e.success else 'error', completed=e.success, @@ -344,7 +337,7 @@ def prompt_worker(q, server_instance): logging.info("Prompt executed in {:.2f} seconds".format(execution_time)) if benchmark_mode: - history_result["benchmark"] = { + e.history_result["benchmark"] = { "execution_ms": execution_time * 1000.0, "nodes": e.node_timing_ms, } From ba978bc0e20a2e66ddc2dc23c09b3a137734a56a Mon Sep 17 00:00:00 2001 From: Tara Ding <38710454+windtara0619@users.noreply.github.com> Date: Mon, 27 Apr 2026 22:13:55 -0700 Subject: [PATCH 21/27] Fix format --- main.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/main.py b/main.py index 07c0dd359..8be3036ab 100644 --- a/main.py +++ b/main.py @@ -310,8 +310,7 @@ def prompt_worker(q, server_instance): extra_data[k] = sensitive[k] benchmark_mode = args.benchmark_server_only - if not benchmark_mode: - asset_seeder.pause() + asset_seeder.pause() e.execute(item[2], prompt_id, extra_data, item[4]) need_gc = True @@ -342,7 +341,7 @@ def prompt_worker(q, server_instance): "nodes": e.node_timing_ms, } - if not benchmark_mode and not asset_seeder.is_disabled(): + if not asset_seeder.is_disabled(): paths = _collect_output_absolute_paths(e.history_result) register_output_files(paths, job_id=prompt_id) @@ -368,10 +367,9 @@ def prompt_worker(q, server_instance): need_gc = False hook_breaker_ac10a0.restore_functions() - if not benchmark_mode and not asset_seeder.is_disabled(): + if not asset_seeder.is_disabled(): asset_seeder.enqueue_enrich(roots=("output",), compute_hashes=True) - if not benchmark_mode: - asset_seeder.resume() + asset_seeder.resume() async def run(server_instance, address='', port=8188, verbose=True, call_on_start=None): From c39f7ea76c551b1890084c6aea3b89b97c10a943 Mon Sep 17 00:00:00 2001 From: Tara Ding <38710454+windtara0619@users.noreply.github.com> Date: Mon, 27 Apr 2026 22:23:42 -0700 Subject: [PATCH 22/27] add tqdm to the benchmark --- benchmarks/benchmark_comfyui_serving.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/benchmarks/benchmark_comfyui_serving.py b/benchmarks/benchmark_comfyui_serving.py index 06979cdc5..2e961660c 100644 --- a/benchmarks/benchmark_comfyui_serving.py +++ b/benchmarks/benchmark_comfyui_serving.py @@ -51,6 +51,7 @@ from pathlib import Path from typing import Any import aiohttp +from tqdm import tqdm # ────────────────────────────────────────────────────────────────────────────── @@ -669,7 +670,14 @@ async def async_main(args: argparse.Namespace) -> None: ) for i in range(args.num_requests) ] - results = await asyncio.gather(*tasks) + results = [] + with tqdm(total=args.num_requests, unit="req", desc="benchmark") as pbar: + for coro in asyncio.as_completed(tasks): + result = await coro + results.append(result) + pbar.update(1) + if result.ok: + pbar.set_postfix(succeeded=sum(r.ok for r in results)) wall_s = time.perf_counter() - started print_summary(results, wall_s) From 54ced2923bfff2f3950ba92f90196ab4b215ab84 Mon Sep 17 00:00:00 2001 From: Tara Ding <38710454+windtara0619@users.noreply.github.com> Date: Mon, 27 Apr 2026 22:28:42 -0700 Subject: [PATCH 23/27] don't generate synthetic when load vbench fails --- benchmarks/benchmark_comfyui_serving.py | 27 ++++++++----------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/benchmarks/benchmark_comfyui_serving.py b/benchmarks/benchmark_comfyui_serving.py index 2e961660c..f68c22054 100644 --- a/benchmarks/benchmark_comfyui_serving.py +++ b/benchmarks/benchmark_comfyui_serving.py @@ -144,13 +144,9 @@ def download_models(base_dir: Path, model: str, task: str) -> None: def _try_download_vbench_i2v(input_dir: Path) -> list[str]: """ Download VBench I2V origin images from Google Drive via gdown (pip install gdown). - Returns image basenames placed in *input_dir*, or [] on failure. + Raises on any failure. """ - try: - import gdown # type: ignore - except ImportError: - print("[setup] gdown not available; skipping VBench download. Install with: pip install gdown") - return [] + import gdown # type: ignore; raises ImportError if not installed import zipfile @@ -163,11 +159,10 @@ def _try_download_vbench_i2v(input_dir: Path) -> list[str]: with zipfile.ZipFile(zip_path, "r") as zf: zf.extractall(str(input_dir)) zip_path.unlink() - except Exception as exc: - print(f"[setup] VBench I2V download failed: {exc}") + except Exception: if zip_path.exists(): zip_path.unlink() - return [] + raise image_exts = {".png", ".jpg", ".jpeg", ".webp"} filenames = sorted( @@ -206,20 +201,14 @@ def prepare_input_images( ) -> list[str]: """ Prepare benchmark input images in *input_dir*. - - Priority: - 1. Reuse any images already present in the directory. - 2. Fetch from the source specified by *image_source* (e.g. "vbench_i2v"). - 3. Generate synthetic 720×720 white PNG placeholders with Pillow. - - Returns a list of image basenames (not full paths). + For "vbench_i2v", downloads from Google Drive and raises on failure. + Falls back to synthetic images only when image_source is not "vbench_i2v". + Returns a list of image paths relative to *input_dir*. """ input_dir.mkdir(parents=True, exist_ok=True) if image_source == "vbench_i2v": - filenames = _try_download_vbench_i2v(input_dir) - if filenames: - return filenames + return _try_download_vbench_i2v(input_dir) print(f"[setup] generating {num_images} synthetic 720×720 placeholder images ...") return _generate_synthetic_images(input_dir, num_images) From a2204ec976719aeab1bc6ce5f2978a4ddf1d0eb7 Mon Sep 17 00:00:00 2001 From: Tara Ding <38710454+windtara0619@users.noreply.github.com> Date: Mon, 27 Apr 2026 22:34:38 -0700 Subject: [PATCH 24/27] force to regenerate prompts everytime --- benchmarks/benchmark_comfyui_serving.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/benchmarks/benchmark_comfyui_serving.py b/benchmarks/benchmark_comfyui_serving.py index f68c22054..bd884baca 100644 --- a/benchmarks/benchmark_comfyui_serving.py +++ b/benchmarks/benchmark_comfyui_serving.py @@ -262,12 +262,6 @@ def generate_prompt_files( raise ValueError("--comfyui-base-dir is required when --download-models is set") download_models(comfyui_base_dir, model, task) - # Skip prompt generation if files already exist. - existing = sorted(output_dir.glob(f"{model}_{task}_prompt_*.json")) - if existing: - print(f"[setup] found {len(existing)} existing prompt files in {output_dir}, skipping generation") - return existing - image_filenames = prepare_input_images( input_dir, num_images=num_images, From 9ea25780c66dc91a30d358dbaad3d16965702d87 Mon Sep 17 00:00:00 2001 From: Tara Ding <38710454+windtara0619@users.noreply.github.com> Date: Mon, 27 Apr 2026 22:44:00 -0700 Subject: [PATCH 25/27] fix benchmark --- main.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/main.py b/main.py index 8be3036ab..bab80379a 100644 --- a/main.py +++ b/main.py @@ -315,6 +315,12 @@ def prompt_worker(q, server_instance): need_gc = True + if benchmark_mode: + e.history_result["benchmark"] = { + "execution_ms": (time.perf_counter() - execution_start_time) * 1000.0, + "nodes": e.node_timing_ms, + } + remove_sensitive = lambda prompt: prompt[:5] + prompt[6:] q.task_done(item_id, e.history_result, @@ -335,12 +341,6 @@ def prompt_worker(q, server_instance): else: logging.info("Prompt executed in {:.2f} seconds".format(execution_time)) - if benchmark_mode: - e.history_result["benchmark"] = { - "execution_ms": execution_time * 1000.0, - "nodes": e.node_timing_ms, - } - if not asset_seeder.is_disabled(): paths = _collect_output_absolute_paths(e.history_result) register_output_files(paths, job_id=prompt_id) From 1d64200d2e77901d0db252eeaf454bada3209984 Mon Sep 17 00:00:00 2001 From: Tara Ding <38710454+windtara0619@users.noreply.github.com> Date: Mon, 27 Apr 2026 23:01:37 -0700 Subject: [PATCH 26/27] Add benchmark README --- benchmarks/README.md | 110 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 benchmarks/README.md diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 000000000..8d785fd9d --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1,110 @@ +# ComfyUI Serving Benchmarks + +Measures latency and throughput of a running ComfyUI server by submitting +concurrent prompt requests and collecting results from the history API. + +## Dependencies + +```bash +pip install aiohttp tqdm gdown +``` + +## Supported models / tasks + +| Model | Task | Description | +|-------|------|-------------| +| `wan22` | `i2v` | Wan 2.2 Image-to-Video — LightX2V 4-step, 720×720, 81 frames | + +To add a new model/task: drop a workflow JSON in `workflows/` (with +`__INPUT_IMAGE__` as the image placeholder) and add an entry to +`_MODEL_REGISTRY` in `benchmark_comfyui_serving.py`. + +## How it works + +On each run the script: + +1. Downloads model weights into the ComfyUI `models/` directory (only if + `--download-models` is passed). +2. Downloads the [VBench I2V](https://github.com/Vchitect/VBench) image + dataset via `gdown` into ComfyUI's `input/` folder. +3. Generates one prompt JSON per input image under + `benchmarks/prompts/_/`. +4. Submits `--num-requests` prompts to the server, cycling through the + generated prompt files in round-robin order. +5. Polls `/history/{prompt_id}` for completion and prints a latency / + throughput summary. + +Per-node execution times are available when the server is started with +`--benchmark-server-only`. + +## Usage + +### Start the server + +```bash +python main.py --listen 127.0.0.1 --port 8188 --benchmark-server-only +``` + +### Run the benchmark + +```bash +# From the ComfyUI root directory: +python3 benchmarks/benchmark_comfyui_serving.py \ + --model wan22 --task i2v \ + --num-requests 50 --max-concurrency 4 \ + --host http://127.0.0.1:8188 +``` + +Include model weight download on first run: + +```bash +python3 benchmarks/benchmark_comfyui_serving.py \ + --model wan22 --task i2v \ + --download-models --comfyui-base-dir /path/to/ComfyUI \ + --num-requests 50 --max-concurrency 4 \ + --host http://127.0.0.1:8188 +``` + +### All flags + +| Flag | Default | Description | +|------|---------|-------------| +| `--model` | *(required)* | Model name (e.g. `wan22`) | +| `--task` | *(required)* | Task type (e.g. `i2v`) | +| `--host` | `http://127.0.0.1:8188` | ComfyUI base URL | +| `--num-requests` | `50` | Total requests to submit | +| `--max-concurrency` | `8` | Max in-flight requests | +| `--request-rate` | `0` | Requests/sec; `0` = fire immediately | +| `--poisson` | off | Poisson inter-arrival when `--request-rate > 0` | +| `--num-images` | `20` | Synthetic images if VBench download unavailable | +| `--prompts-dir` | `benchmarks/prompts/_/` | Prompt JSON output directory | +| `--download-models` | off | Download model weights before benchmarking | +| `--comfyui-base-dir` | — | ComfyUI root (required with `--download-models`) | +| `--output-json` | — | Write full per-request results to a JSON file | + +## Output + +``` +benchmark: 100%|████████████| 50/50 [req, succeeded=50] + +=== ComfyUI Serving Benchmark Summary === +requests_total: 50 +requests_success: 50 +requests_failed: 0 +wall_time_s: 412.341 +throughput_req_s: 0.121 +latency_p50_s: 38.201 +latency_p90_s: 52.110 +latency_p95_s: 55.837 +latency_p99_s: 60.012 +latency_mean_s: 39.445 +latency_max_s: 61.203 +execution_mean_ms: 35210.44 +execution_p95_ms: 51200.11 + +--- Per-node execution time (mean ms across successful requests) --- + KSampler (Advanced) (130:110): mean=18200.1 p95=22100.3 n=50 + KSampler (Advanced) (130:111): mean=16900.4 p95=20800.7 n=50 + VAEDecode (130:129): mean=420.2 p95=510.1 n=50 + ... +``` From 875bdc4015e50541463e8587c604a663ef6d4e3a Mon Sep 17 00:00:00 2001 From: Tara Ding <38710454+windtara0619@users.noreply.github.com> Date: Mon, 27 Apr 2026 23:05:42 -0700 Subject: [PATCH 27/27] Update README --- benchmarks/README.md | 51 +++++++++++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/benchmarks/README.md b/benchmarks/README.md index 8d785fd9d..4dbc42a01 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -85,26 +85,43 @@ python3 benchmarks/benchmark_comfyui_serving.py \ ## Output ``` -benchmark: 100%|████████████| 50/50 [req, succeeded=50] +benchmark: 100%|█████████████| 5/5 [02:58<00:00, 35.73s/req, succeeded=5] === ComfyUI Serving Benchmark Summary === -requests_total: 50 -requests_success: 50 +requests_total: 5 +requests_success: 5 requests_failed: 0 -wall_time_s: 412.341 -throughput_req_s: 0.121 -latency_p50_s: 38.201 -latency_p90_s: 52.110 -latency_p95_s: 55.837 -latency_p99_s: 60.012 -latency_mean_s: 39.445 -latency_max_s: 61.203 -execution_mean_ms: 35210.44 -execution_p95_ms: 51200.11 +wall_time_s: 178.652 +throughput_req_s: 0.028 +latency_p50_s: 109.594 +latency_p90_s: 164.840 +latency_p95_s: 171.744 +latency_p99_s: 177.266 +latency_mean_s: 109.781 +latency_max_s: 178.647 +execution_mean_ms: 35465.21 +execution_p95_ms: 39685.06 --- Per-node execution time (mean ms across successful requests) --- - KSampler (Advanced) (130:110): mean=18200.1 p95=22100.3 n=50 - KSampler (Advanced) (130:111): mean=16900.4 p95=20800.7 n=50 - VAEDecode (130:129): mean=420.2 p95=510.1 n=50 - ... + KSamplerAdvanced (130:110): mean=12827.5 p95=14264.0 n=5 + KSamplerAdvanced (130:111): mean=12726.4 p95=13822.2 n=5 + VAEDecode (130:129): mean=3439.0 p95=3467.6 n=5 + SaveVideo (108): mean=2844.7 p95=3280.0 n=5 + WanImageToVideo (130:128): mean=2367.7 p95=2595.9 n=5 + CLIPTextEncode (130:125): mean=1785.0 p95=1785.0 n=1 + CLIPLoader (130:105): mean=700.7 p95=700.7 n=1 + LoadImage (97): mean=518.4 p95=970.0 n=5 + VAELoader (130:106): mean=507.7 p95=507.7 n=1 + CLIPTextEncode (130:107): mean=223.4 p95=223.4 n=1 + UNETLoader (130:122): mean=122.2 p95=122.2 n=1 + LoraLoaderModelOnly (130:126): mean=68.1 p95=68.1 n=1 + UNETLoader (130:123): mean=65.9 p95=65.9 n=1 + LoraLoaderModelOnly (130:127): mean=36.2 p95=36.2 n=1 + ModelSamplingSD3 (130:109): mean=1.0 p95=1.0 n=1 + ModelSamplingSD3 (130:124): mean=0.9 p95=0.9 n=1 + CreateVideo (130:117): mean=0.7 p95=1.1 n=5 ``` + +> **Note:** Nodes with `n=1` (e.g. model loaders) are cached by ComfyUI after +> the first request and skipped in subsequent executions, so they only appear +> once across the benchmark run.