Remove queue_time

2026-06-04 05:17:50 +08:00 · 2026-04-27 22:03:52 -07:00 · 2026-04-27 22:03:52 -07:00 · 6251350cf4
commit 6251350cf4
parent 69f6272edc
2 changed files with 4 additions and 20 deletions
--- a/benchmarks/benchmark_comfyui_serving.py
+++ b/benchmarks/benchmark_comfyui_serving.py
@ -314,7 +314,6 @@ class RequestResult:
    started_at: float
    finished_at: float
    end_to_end_s: float
-    queue_wait_ms: float | None
    execution_ms: float | None
    node_timing_ms: dict[str, dict] | None

@ -395,10 +394,10 @@ async def wait_for_prompt_done(
    prompt_id: str,
    poll_interval_s: float,
    timeout_s: float,
-) -> tuple[float | None, float | None, dict | None]:
+) -> tuple[float | None, dict | None]:
    """
-    Returns (queue_wait_ms, execution_ms, node_timing_ms) from history_item["benchmark"].
-    Falls back to (None, None, None) if unavailable.
+    Returns (execution_ms, node_timing_ms) from history_item["benchmark"].
+    Falls back to (None, None) if unavailable.
    """
    deadline = time.perf_counter() + timeout_s
    history_url = f"{base_url}/history/{prompt_id}"
@ -426,7 +425,6 @@ async def wait_for_prompt_done(

            benchmark = history_item.get("benchmark", {})
            return (
-                benchmark.get("queue_wait_ms"),
                benchmark.get("execution_ms"),
                benchmark.get("nodes"),
            )
@ -487,7 +485,7 @@ async def run_request(
                timeout_s=args.request_timeout_s,
            )

-            queue_wait_ms, execution_ms, node_timing_ms = await wait_for_prompt_done(
+            execution_ms, node_timing_ms = await wait_for_prompt_done(
                session=session,
                base_url=args.host,
                prompt_id=prompt_id,
@ -504,7 +502,6 @@ async def run_request(
                started_at=started_at,
                finished_at=finished_at,
                end_to_end_s=finished_at - queued_at,
-                queue_wait_ms=queue_wait_ms,
                execution_ms=execution_ms,
                node_timing_ms=node_timing_ms,
            )
@ -519,7 +516,6 @@ async def run_request(
                started_at=started_at,
                finished_at=finished_at,
                end_to_end_s=finished_at - queued_at,
-                queue_wait_ms=None,
                execution_ms=None,
                node_timing_ms=None,
            )
@ -529,7 +525,6 @@ def print_summary(results: list[RequestResult], wall_s: float) -> None:
    success = [r for r in results if r.ok]
    fail = [r for r in results if not r.ok]
    lat_s = [r.end_to_end_s for r in success]
-    queue_wait_ms = [r.queue_wait_ms for r in success if r.queue_wait_ms is not None]
    exec_ms = [r.execution_ms for r in success if r.execution_ms is not None]

    throughput = (len(success) / wall_s) if wall_s > 0 else 0.0
@ -548,10 +543,6 @@ def print_summary(results: list[RequestResult], wall_s: float) -> None:
        print(f"latency_mean_s:   {statistics.mean(lat_s):.3f}")
        print(f"latency_max_s:    {max(lat_s):.3f}")

-    if queue_wait_ms:
-        print(f"queue_wait_mean_ms: {statistics.mean(queue_wait_ms):.2f}")
-        print(f"queue_wait_p95_ms:  {percentile(queue_wait_ms, 95):.2f}")
-
    if exec_ms:
        print(f"execution_mean_ms:  {statistics.mean(exec_ms):.2f}")
        print(f"execution_p95_ms:   {percentile(exec_ms, 95):.2f}")
--- a/main.py
+++ b/main.py
@ -301,7 +301,6 @@ def prompt_worker(q, server_instance):
        if queue_item is not None:
            item, item_id = queue_item
            execution_start_time = time.perf_counter()
-            execution_start_wall_ms = int(time.time() * 1000)
            prompt_id = item[1]
            server_instance.last_prompt_id = prompt_id

@ -318,11 +317,6 @@ def prompt_worker(q, server_instance):

            need_gc = True

-            queue_wait_ms = 0.0
-            created_at = extra_data.get("create_time")
-            if isinstance(created_at, int):
-                queue_wait_ms = max(0.0, execution_start_wall_ms - created_at)
-
            remove_sensitive = lambda prompt: prompt[:5] + prompt[6:]
            history_result = e.history_result
            if benchmark_mode:
@ -331,7 +325,6 @@ def prompt_worker(q, server_instance):
                    "meta": {},
                    "benchmark": {
                        "execution_ms": execution_time_s * 1000.0,
-                        "queue_wait_ms": queue_wait_ms,
                        "nodes": e.node_timing_ms,
                    },
                }