From 6251350cf4b709eb984f0f8e292e11a10f9445f7 Mon Sep 17 00:00:00 2001 From: Tara Ding <38710454+windtara0619@users.noreply.github.com> Date: Mon, 27 Apr 2026 22:03:52 -0700 Subject: [PATCH] Remove queue_time --- benchmarks/benchmark_comfyui_serving.py | 17 ++++------------- main.py | 7 ------- 2 files changed, 4 insertions(+), 20 deletions(-) diff --git a/benchmarks/benchmark_comfyui_serving.py b/benchmarks/benchmark_comfyui_serving.py index cccc85df7..06979cdc5 100644 --- a/benchmarks/benchmark_comfyui_serving.py +++ b/benchmarks/benchmark_comfyui_serving.py @@ -314,7 +314,6 @@ class RequestResult: started_at: float finished_at: float end_to_end_s: float - queue_wait_ms: float | None execution_ms: float | None node_timing_ms: dict[str, dict] | None @@ -395,10 +394,10 @@ async def wait_for_prompt_done( prompt_id: str, poll_interval_s: float, timeout_s: float, -) -> tuple[float | None, float | None, dict | None]: +) -> tuple[float | None, dict | None]: """ - Returns (queue_wait_ms, execution_ms, node_timing_ms) from history_item["benchmark"]. - Falls back to (None, None, None) if unavailable. + Returns (execution_ms, node_timing_ms) from history_item["benchmark"]. + Falls back to (None, None) if unavailable. """ deadline = time.perf_counter() + timeout_s history_url = f"{base_url}/history/{prompt_id}" @@ -426,7 +425,6 @@ async def wait_for_prompt_done( benchmark = history_item.get("benchmark", {}) return ( - benchmark.get("queue_wait_ms"), benchmark.get("execution_ms"), benchmark.get("nodes"), ) @@ -487,7 +485,7 @@ async def run_request( timeout_s=args.request_timeout_s, ) - queue_wait_ms, execution_ms, node_timing_ms = await wait_for_prompt_done( + execution_ms, node_timing_ms = await wait_for_prompt_done( session=session, base_url=args.host, prompt_id=prompt_id, @@ -504,7 +502,6 @@ async def run_request( started_at=started_at, finished_at=finished_at, end_to_end_s=finished_at - queued_at, - queue_wait_ms=queue_wait_ms, execution_ms=execution_ms, node_timing_ms=node_timing_ms, ) @@ -519,7 +516,6 @@ async def run_request( started_at=started_at, finished_at=finished_at, end_to_end_s=finished_at - queued_at, - queue_wait_ms=None, execution_ms=None, node_timing_ms=None, ) @@ -529,7 +525,6 @@ def print_summary(results: list[RequestResult], wall_s: float) -> None: success = [r for r in results if r.ok] fail = [r for r in results if not r.ok] lat_s = [r.end_to_end_s for r in success] - queue_wait_ms = [r.queue_wait_ms for r in success if r.queue_wait_ms is not None] exec_ms = [r.execution_ms for r in success if r.execution_ms is not None] throughput = (len(success) / wall_s) if wall_s > 0 else 0.0 @@ -548,10 +543,6 @@ def print_summary(results: list[RequestResult], wall_s: float) -> None: print(f"latency_mean_s: {statistics.mean(lat_s):.3f}") print(f"latency_max_s: {max(lat_s):.3f}") - if queue_wait_ms: - print(f"queue_wait_mean_ms: {statistics.mean(queue_wait_ms):.2f}") - print(f"queue_wait_p95_ms: {percentile(queue_wait_ms, 95):.2f}") - if exec_ms: print(f"execution_mean_ms: {statistics.mean(exec_ms):.2f}") print(f"execution_p95_ms: {percentile(exec_ms, 95):.2f}") diff --git a/main.py b/main.py index ac15ad30a..110dc34bf 100644 --- a/main.py +++ b/main.py @@ -301,7 +301,6 @@ def prompt_worker(q, server_instance): if queue_item is not None: item, item_id = queue_item execution_start_time = time.perf_counter() - execution_start_wall_ms = int(time.time() * 1000) prompt_id = item[1] server_instance.last_prompt_id = prompt_id @@ -318,11 +317,6 @@ def prompt_worker(q, server_instance): need_gc = True - queue_wait_ms = 0.0 - created_at = extra_data.get("create_time") - if isinstance(created_at, int): - queue_wait_ms = max(0.0, execution_start_wall_ms - created_at) - remove_sensitive = lambda prompt: prompt[:5] + prompt[6:] history_result = e.history_result if benchmark_mode: @@ -331,7 +325,6 @@ def prompt_worker(q, server_instance): "meta": {}, "benchmark": { "execution_ms": execution_time_s * 1000.0, - "queue_wait_ms": queue_wait_ms, "nodes": e.node_timing_ms, }, }