Remove queue_time

This commit is contained in:
Tara Ding 2026-04-27 22:03:52 -07:00
parent 69f6272edc
commit 6251350cf4
2 changed files with 4 additions and 20 deletions

View File

@ -314,7 +314,6 @@ class RequestResult:
started_at: float
finished_at: float
end_to_end_s: float
queue_wait_ms: float | None
execution_ms: float | None
node_timing_ms: dict[str, dict] | None
@ -395,10 +394,10 @@ async def wait_for_prompt_done(
prompt_id: str,
poll_interval_s: float,
timeout_s: float,
) -> tuple[float | None, float | None, dict | None]:
) -> tuple[float | None, dict | None]:
"""
Returns (queue_wait_ms, execution_ms, node_timing_ms) from history_item["benchmark"].
Falls back to (None, None, None) if unavailable.
Returns (execution_ms, node_timing_ms) from history_item["benchmark"].
Falls back to (None, None) if unavailable.
"""
deadline = time.perf_counter() + timeout_s
history_url = f"{base_url}/history/{prompt_id}"
@ -426,7 +425,6 @@ async def wait_for_prompt_done(
benchmark = history_item.get("benchmark", {})
return (
benchmark.get("queue_wait_ms"),
benchmark.get("execution_ms"),
benchmark.get("nodes"),
)
@ -487,7 +485,7 @@ async def run_request(
timeout_s=args.request_timeout_s,
)
queue_wait_ms, execution_ms, node_timing_ms = await wait_for_prompt_done(
execution_ms, node_timing_ms = await wait_for_prompt_done(
session=session,
base_url=args.host,
prompt_id=prompt_id,
@ -504,7 +502,6 @@ async def run_request(
started_at=started_at,
finished_at=finished_at,
end_to_end_s=finished_at - queued_at,
queue_wait_ms=queue_wait_ms,
execution_ms=execution_ms,
node_timing_ms=node_timing_ms,
)
@ -519,7 +516,6 @@ async def run_request(
started_at=started_at,
finished_at=finished_at,
end_to_end_s=finished_at - queued_at,
queue_wait_ms=None,
execution_ms=None,
node_timing_ms=None,
)
@ -529,7 +525,6 @@ def print_summary(results: list[RequestResult], wall_s: float) -> None:
success = [r for r in results if r.ok]
fail = [r for r in results if not r.ok]
lat_s = [r.end_to_end_s for r in success]
queue_wait_ms = [r.queue_wait_ms for r in success if r.queue_wait_ms is not None]
exec_ms = [r.execution_ms for r in success if r.execution_ms is not None]
throughput = (len(success) / wall_s) if wall_s > 0 else 0.0
@ -548,10 +543,6 @@ def print_summary(results: list[RequestResult], wall_s: float) -> None:
print(f"latency_mean_s: {statistics.mean(lat_s):.3f}")
print(f"latency_max_s: {max(lat_s):.3f}")
if queue_wait_ms:
print(f"queue_wait_mean_ms: {statistics.mean(queue_wait_ms):.2f}")
print(f"queue_wait_p95_ms: {percentile(queue_wait_ms, 95):.2f}")
if exec_ms:
print(f"execution_mean_ms: {statistics.mean(exec_ms):.2f}")
print(f"execution_p95_ms: {percentile(exec_ms, 95):.2f}")

View File

@ -301,7 +301,6 @@ def prompt_worker(q, server_instance):
if queue_item is not None:
item, item_id = queue_item
execution_start_time = time.perf_counter()
execution_start_wall_ms = int(time.time() * 1000)
prompt_id = item[1]
server_instance.last_prompt_id = prompt_id
@ -318,11 +317,6 @@ def prompt_worker(q, server_instance):
need_gc = True
queue_wait_ms = 0.0
created_at = extra_data.get("create_time")
if isinstance(created_at, int):
queue_wait_ms = max(0.0, execution_start_wall_ms - created_at)
remove_sensitive = lambda prompt: prompt[:5] + prompt[6:]
history_result = e.history_result
if benchmark_mode:
@ -331,7 +325,6 @@ def prompt_worker(q, server_instance):
"meta": {},
"benchmark": {
"execution_ms": execution_time_s * 1000.0,
"queue_wait_ms": queue_wait_ms,
"nodes": e.node_timing_ms,
},
}