docs(api): 同步 /api/v1/chat/completions 的 OpenAPI 与同步响应

补充 Chat Completions 的兼容响应模型与路由注释，确保 /api/v1/chat/completions 按同步兼容格式返回并更新对应测试与 Swagger 文档。
2026-05-16 00:19:39 +08:00 · 2026-05-16 00:19:39 +08:00 · ae197a742f
commit ae197a742f
parent 34c3251c6d
7 changed files with 491 additions and 154 deletions
--- a/apps/api/docs/swagger.json
+++ b/apps/api/docs/swagger.json
@ -4000,26 +4000,27 @@
                        "BearerAuth": []
                    }
                ],
-                "description": "网关任务接口按 model 选择平台模型；/api/v1 路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
+                "description": "/api/v1/chat/completions 同步执行：stream=true 返回 text/event-stream SSE；stream=false 或未传返回兼容 JSON；该接口忽略 X-Async。",
                "consumes": [
                    "application/json"
                ],
                "produces": [
-                    "application/json"
+                    "application/json",
+                    "text/event-stream"
                ],
                "tags": [
                    "tasks"
                ],
-                "summary": "创建或执行 AI 任务",
+                "summary": "创建 Chat Completions",
                "parameters": [
                    {
                        "type": "boolean",
-                        "description": "true 时异步创建任务并返回 202",
+                        "description": "该接口忽略此参数",
                        "name": "X-Async",
                        "in": "header"
                    },
                    {
-                        "description": "AI 任务请求，字段随任务类型变化",
+                        "description": "Chat Completions 请求",
                        "name": "input",
                        "in": "body",
                        "required": true,
@ -4032,13 +4033,7 @@
                    "200": {
                        "description": "OK",
                        "schema": {
-                            "$ref": "#/definitions/httpapi.CompatibleResponse"
-                        }
-                    },
-                    "202": {
-                        "description": "Accepted",
-                        "schema": {
-                            "$ref": "#/definitions/httpapi.TaskAcceptedResponse"
+                            "$ref": "#/definitions/httpapi.ChatCompletionCompatibleResponse"
                        }
                    },
                    "400": {
@ -4161,7 +4156,7 @@
                        "BearerAuth": []
                    }
                ],
-                "description": "网关任务接口按 model 选择平台模型；/api/v1 路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
+                "description": "网关任务接口按 model 选择平台模型；除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
                "consumes": [
                    "application/json"
                ],
@ -4254,7 +4249,7 @@
                        "BearerAuth": []
                    }
                ],
-                "description": "网关任务接口按 model 选择平台模型；/api/v1 路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
+                "description": "网关任务接口按 model 选择平台模型；除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
                "consumes": [
                    "application/json"
                ],
@ -4653,7 +4648,7 @@
                        "BearerAuth": []
                    }
                ],
-                "description": "网关任务接口按 model 选择平台模型；/api/v1 路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
+                "description": "网关任务接口按 model 选择平台模型；除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
                "consumes": [
                    "application/json"
                ],
@ -4985,7 +4980,7 @@
                        "BearerAuth": []
                    }
                ],
-                "description": "网关任务接口按 model 选择平台模型；/api/v1 路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
+                "description": "网关任务接口按 model 选择平台模型；除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
                "consumes": [
                    "application/json"
                ],
@ -5452,7 +5447,7 @@
                        "BearerAuth": []
                    }
                ],
-                "description": "网关任务接口按 model 选择平台模型；/api/v1 路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
+                "description": "网关任务接口按 model 选择平台模型；除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
                "consumes": [
                    "application/json"
                ],
@ -5565,7 +5560,7 @@
                        "BearerAuth": []
                    }
                ],
-                "description": "网关任务接口按 model 选择平台模型；/api/v1 路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
+                "description": "网关任务接口按 model 选择平台模型；除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
                "consumes": [
                    "application/json"
                ],
@ -5658,7 +5653,7 @@
                        "BearerAuth": []
                    }
                ],
-                "description": "网关任务接口按 model 选择平台模型；/api/v1 路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
+                "description": "网关任务接口按 model 选择平台模型；除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
                "consumes": [
                    "application/json"
                ],
@ -5777,7 +5772,7 @@
                        "BearerAuth": []
                    }
                ],
-                "description": "网关任务接口按 model 选择平台模型；/api/v1 路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
+                "description": "网关任务接口按 model 选择平台模型；除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
                "consumes": [
                    "application/json"
                ],
@ -5976,7 +5971,7 @@
                        "BearerAuth": []
                    }
                ],
-                "description": "网关任务接口按 model 选择平台模型；/api/v1 路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
+                "description": "网关任务接口按 model 选择平台模型；除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
                "consumes": [
                    "application/json"
                ],
@ -6137,7 +6132,7 @@
                        "BearerAuth": []
                    }
                ],
-                "description": "网关任务接口按 model 选择平台模型；/api/v1 路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
+                "description": "网关任务接口按 model 选择平台模型；除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
                "consumes": [
                    "application/json"
                ],
@ -6230,7 +6225,7 @@
                        "BearerAuth": []
                    }
                ],
-                "description": "网关任务接口按 model 选择平台模型；/api/v1 路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
+                "description": "网关任务接口按 model 选择平台模型；除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
                "consumes": [
                    "application/json"
                ],
@ -6323,7 +6318,7 @@
                        "BearerAuth": []
                    }
                ],
-                "description": "网关任务接口按 model 选择平台模型；/api/v1 路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
+                "description": "网关任务接口按 model 选择平台模型；除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
                "consumes": [
                    "application/json"
                ],
@ -6551,6 +6546,82 @@
                }
            }
        },
+        "httpapi.ChatCompletionChoice": {
+            "type": "object",
+            "properties": {
+                "finish_reason": {
+                    "type": "string",
+                    "example": "stop"
+                },
+                "index": {
+                    "type": "integer",
+                    "example": 0
+                },
+                "message": {
+                    "$ref": "#/definitions/httpapi.ChatCompletionChoiceMessage"
+                }
+            }
+        },
+        "httpapi.ChatCompletionChoiceMessage": {
+            "type": "object",
+            "properties": {
+                "content": {
+                    "type": "string",
+                    "example": "Hello"
+                },
+                "role": {
+                    "type": "string",
+                    "example": "assistant"
+                }
+            }
+        },
+        "httpapi.ChatCompletionCompatibleResponse": {
+            "type": "object",
+            "properties": {
+                "choices": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/httpapi.ChatCompletionChoice"
+                    }
+                },
+                "created": {
+                    "type": "integer",
+                    "example": 1710000000
+                },
+                "id": {
+                    "type": "string",
+                    "example": "chatcmpl-123"
+                },
+                "model": {
+                    "type": "string",
+                    "example": "gpt-4o-mini"
+                },
+                "object": {
+                    "type": "string",
+                    "example": "chat.completion"
+                },
+                "usage": {
+                    "$ref": "#/definitions/httpapi.ChatCompletionUsage"
+                }
+            }
+        },
+        "httpapi.ChatCompletionUsage": {
+            "type": "object",
+            "properties": {
+                "completion_tokens": {
+                    "type": "integer",
+                    "example": 8
+                },
+                "prompt_tokens": {
+                    "type": "integer",
+                    "example": 12
+                },
+                "total_tokens": {
+                    "type": "integer",
+                    "example": 20
+                }
+            }
+        },
        "httpapi.ChatMessage": {
            "type": "object",
            "properties": {
--- a/apps/api/docs/swagger.yaml
+++ b/apps/api/docs/swagger.yaml
@ -92,6 +92,59 @@ definitions:
          $ref: '#/definitions/store.CatalogProvider'
        type: array
    type: object
+  httpapi.ChatCompletionChoice:
+    properties:
+      finish_reason:
+        example: stop
+        type: string
+      index:
+        example: 0
+        type: integer
+      message:
+        $ref: '#/definitions/httpapi.ChatCompletionChoiceMessage'
+    type: object
+  httpapi.ChatCompletionChoiceMessage:
+    properties:
+      content:
+        example: Hello
+        type: string
+      role:
+        example: assistant
+        type: string
+    type: object
+  httpapi.ChatCompletionCompatibleResponse:
+    properties:
+      choices:
+        items:
+          $ref: '#/definitions/httpapi.ChatCompletionChoice'
+        type: array
+      created:
+        example: 1710000000
+        type: integer
+      id:
+        example: chatcmpl-123
+        type: string
+      model:
+        example: gpt-4o-mini
+        type: string
+      object:
+        example: chat.completion
+        type: string
+      usage:
+        $ref: '#/definitions/httpapi.ChatCompletionUsage'
+    type: object
+  httpapi.ChatCompletionUsage:
+    properties:
+      completion_tokens:
+        example: 8
+        type: integer
+      prompt_tokens:
+        example: 12
+        type: integer
+      total_tokens:
+        example: 20
+        type: integer
+    type: object
  httpapi.ChatMessage:
    properties:
      content:
@ -4800,14 +4853,14 @@ paths:
    post:
      consumes:
      - application/json
-      description: 网关任务接口按 model 选择平台模型；/api/v1 路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或
-        SSE 流。
+      description: /api/v1/chat/completions 同步执行：stream=true 返回 text/event-stream
+        SSE；stream=false 或未传返回兼容 JSON；该接口忽略 X-Async。
      parameters:
-      - description: true 时异步创建任务并返回 202
+      - description: 该接口忽略此参数
        in: header
        name: X-Async
        type: boolean
-      - description: AI 任务请求，字段随任务类型变化
+      - description: Chat Completions 请求
        in: body
        name: input
        required: true
@ -4815,15 +4868,12 @@ paths:
          $ref: '#/definitions/httpapi.TaskRequest'
      produces:
      - application/json
+      - text/event-stream
      responses:
        "200":
          description: OK
          schema:
-            $ref: '#/definitions/httpapi.CompatibleResponse'
-        "202":
-          description: Accepted
-          schema:
-            $ref: '#/definitions/httpapi.TaskAcceptedResponse'
+            $ref: '#/definitions/httpapi.ChatCompletionCompatibleResponse'
        "400":
          description: Bad Request
          schema:
@ -4854,7 +4904,7 @@ paths:
            $ref: '#/definitions/httpapi.ErrorEnvelope'
      security:
      - BearerAuth: []
-      summary: 创建或执行 AI 任务
+      summary: 创建 Chat Completions
      tags:
      - tasks
  /api/v1/files/upload:
@ -4905,8 +4955,8 @@ paths:
    post:
      consumes:
      - application/json
-      description: 网关任务接口按 model 选择平台模型；/api/v1 路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或
-        SSE 流。
+      description: 网关任务接口按 model 选择平台模型；除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果，OpenAI-compatible
+        路径同步返回兼容响应或 SSE 流。
      parameters:
      - description: true 时异步创建任务并返回 202
        in: header
@ -4966,8 +5016,8 @@ paths:
    post:
      consumes:
      - application/json
-      description: 网关任务接口按 model 选择平台模型；/api/v1 路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或
-        SSE 流。
+      description: 网关任务接口按 model 选择平台模型；除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果，OpenAI-compatible
+        路径同步返回兼容响应或 SSE 流。
      parameters:
      - description: true 时异步创建任务并返回 202
        in: header
@ -5220,8 +5270,8 @@ paths:
    post:
      consumes:
      - application/json
-      description: 网关任务接口按 model 选择平台模型；/api/v1 路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或
-        SSE 流。
+      description: 网关任务接口按 model 选择平台模型；除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果，OpenAI-compatible
+        路径同步返回兼容响应或 SSE 流。
      parameters:
      - description: true 时异步创建任务并返回 202
        in: header
@ -5434,8 +5484,8 @@ paths:
    post:
      consumes:
      - application/json
-      description: 网关任务接口按 model 选择平台模型；/api/v1 路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或
-        SSE 流。
+      description: 网关任务接口按 model 选择平台模型；除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果，OpenAI-compatible
+        路径同步返回兼容响应或 SSE 流。
      parameters:
      - description: true 时异步创建任务并返回 202
        in: header
@ -5735,8 +5785,8 @@ paths:
    post:
      consumes:
      - application/json
-      description: 网关任务接口按 model 选择平台模型；/api/v1 路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或
-        SSE 流。
+      description: 网关任务接口按 model 选择平台模型；除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果，OpenAI-compatible
+        路径同步返回兼容响应或 SSE 流。
      parameters:
      - description: true 时异步创建任务并返回 202
        in: header
@ -5809,8 +5859,8 @@ paths:
    post:
      consumes:
      - application/json
-      description: 网关任务接口按 model 选择平台模型；/api/v1 路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或
-        SSE 流。
+      description: 网关任务接口按 model 选择平台模型；除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果，OpenAI-compatible
+        路径同步返回兼容响应或 SSE 流。
      parameters:
      - description: true 时异步创建任务并返回 202
        in: header
@ -5870,8 +5920,8 @@ paths:
    post:
      consumes:
      - application/json
-      description: 网关任务接口按 model 选择平台模型；/api/v1 路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或
-        SSE 流。
+      description: 网关任务接口按 model 选择平台模型；除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果，OpenAI-compatible
+        路径同步返回兼容响应或 SSE 流。
      parameters:
      - description: true 时异步创建任务并返回 202
        in: header
@ -5948,8 +5998,8 @@ paths:
    post:
      consumes:
      - application/json
-      description: 网关任务接口按 model 选择平台模型；/api/v1 路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或
-        SSE 流。
+      description: 网关任务接口按 model 选择平台模型；除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果，OpenAI-compatible
+        路径同步返回兼容响应或 SSE 流。
      parameters:
      - description: true 时异步创建任务并返回 202
        in: header
@ -6079,8 +6129,8 @@ paths:
    post:
      consumes:
      - application/json
-      description: 网关任务接口按 model 选择平台模型；/api/v1 路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或
-        SSE 流。
+      description: 网关任务接口按 model 选择平台模型；除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果，OpenAI-compatible
+        路径同步返回兼容响应或 SSE 流。
      parameters:
      - description: true 时异步创建任务并返回 202
        in: header
@ -6184,8 +6234,8 @@ paths:
    post:
      consumes:
      - application/json
-      description: 网关任务接口按 model 选择平台模型；/api/v1 路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或
-        SSE 流。
+      description: 网关任务接口按 model 选择平台模型；除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果，OpenAI-compatible
+        路径同步返回兼容响应或 SSE 流。
      parameters:
      - description: true 时异步创建任务并返回 202
        in: header
@ -6245,8 +6295,8 @@ paths:
    post:
      consumes:
      - application/json
-      description: 网关任务接口按 model 选择平台模型；/api/v1 路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或
-        SSE 流。
+      description: 网关任务接口按 model 选择平台模型；除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果，OpenAI-compatible
+        路径同步返回兼容响应或 SSE 流。
      parameters:
      - description: true 时异步创建任务并返回 202
        in: header
@ -6306,8 +6356,8 @@ paths:
    post:
      consumes:
      - application/json
-      description: 网关任务接口按 model 选择平台模型；/api/v1 路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或
-        SSE 流。
+      description: 网关任务接口按 model 选择平台模型；除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果，OpenAI-compatible
+        路径同步返回兼容响应或 SSE 流。
      parameters:
      - description: true 时异步创建任务并返回 202
        in: header
--- a/apps/api/internal/httpapi/chat_completions_mode_test.go
+++ b/apps/api/internal/httpapi/chat_completions_mode_test.go
@ -0,0 +1,114 @@
+package httpapi
+
+import (
+	"context"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+
+	"github.com/easyai/easyai-ai-gateway/apps/api/internal/auth"
+	"github.com/easyai/easyai-ai-gateway/apps/api/internal/clients"
+	"github.com/easyai/easyai-ai-gateway/apps/api/internal/runner"
+	"github.com/easyai/easyai-ai-gateway/apps/api/internal/store"
+)
+
+func TestPlanTaskResponseTreatsAPIV1ChatCompletionsAsSynchronousCompatibleResponse(t *testing.T) {
+	req := httptest.NewRequest(http.MethodPost, "/api/v1/chat/completions", nil)
+	req.Header.Set("X-Async", "true")
+
+	plan := planTaskResponse("chat.completions", false, map[string]any{"stream": true}, req)
+
+	if plan.asyncMode {
+		t.Fatal("/api/v1/chat/completions must not enter async task mode")
+	}
+	if !plan.compatibleMode {
+		t.Fatal("/api/v1/chat/completions should return OpenAI-compatible response payloads")
+	}
+	if !plan.streamMode {
+		t.Fatal("stream=true should select SSE streaming mode")
+	}
+}
+
+func TestPlanTaskResponseKeepsAsyncTaskModeForOtherAPIV1Tasks(t *testing.T) {
+	req := httptest.NewRequest(http.MethodPost, "/api/v1/images/generations", nil)
+	req.Header.Set("X-Async", "true")
+
+	plan := planTaskResponse("images.generations", false, map[string]any{"stream": true}, req)
+
+	if !plan.asyncMode {
+		t.Fatal("non-chat /api/v1 task endpoints should keep X-Async task mode")
+	}
+	if plan.compatibleMode {
+		t.Fatal("non-compatible /api/v1 task endpoints should not return OpenAI-compatible payloads")
+	}
+}
+
+func TestWriteCompatibleTaskResponseReturnsJSONWhenStreamIsFalse(t *testing.T) {
+	executor := &fakeTaskExecutor{output: map[string]any{"id": "chatcmpl-test", "object": "chat.completion"}}
+	req := httptest.NewRequest(http.MethodPost, "/api/v1/chat/completions", nil)
+	recorder := httptest.NewRecorder()
+
+	writeCompatibleTaskResponse(context.Background(), recorder, req, executor, "chat.completions", "gpt-test", store.GatewayTask{ID: "task-test"}, &auth.User{}, false)
+
+	if recorder.Code != http.StatusOK {
+		t.Fatalf("status=%d want=%d body=%s", recorder.Code, http.StatusOK, recorder.Body.String())
+	}
+	if executor.executeCalls != 1 || executor.streamCalls != 0 {
+		t.Fatalf("expected non-stream execute only, got execute=%d stream=%d", executor.executeCalls, executor.streamCalls)
+	}
+	var body map[string]any
+	if err := json.Unmarshal(recorder.Body.Bytes(), &body); err != nil {
+		t.Fatalf("decode response body: %v body=%s", err, recorder.Body.String())
+	}
+	if body["object"] != "chat.completion" {
+		t.Fatalf("unexpected compatible JSON response: %+v", body)
+	}
+}
+
+func TestWriteCompatibleTaskResponseReturnsSSEWhenStreamIsTrue(t *testing.T) {
+	executor := &fakeTaskExecutor{
+		deltas: []string{"hel", "lo"},
+		output: map[string]any{"id": "chatcmpl-test", "object": "chat.completion"},
+	}
+	req := httptest.NewRequest(http.MethodPost, "/api/v1/chat/completions", nil)
+	recorder := httptest.NewRecorder()
+
+	writeCompatibleTaskResponse(context.Background(), recorder, req, executor, "chat.completions", "gpt-test", store.GatewayTask{ID: "task-test"}, &auth.User{}, true)
+
+	if executor.executeCalls != 0 || executor.streamCalls != 1 {
+		t.Fatalf("expected stream execute only, got execute=%d stream=%d", executor.executeCalls, executor.streamCalls)
+	}
+	if contentType := recorder.Header().Get("Content-Type"); contentType != "text/event-stream" {
+		t.Fatalf("Content-Type=%q want text/event-stream", contentType)
+	}
+	body := recorder.Body.String()
+	for _, want := range []string{"event: message", `"content":"hel"`, `"content":"lo"`, `"finish_reason":"stop"`} {
+		if !strings.Contains(body, want) {
+			t.Fatalf("SSE body missing %s: %s", want, body)
+		}
+	}
+}
+
+type fakeTaskExecutor struct {
+	executeCalls int
+	streamCalls  int
+	deltas       []string
+	output       map[string]any
+}
+
+func (f *fakeTaskExecutor) Execute(context.Context, store.GatewayTask, *auth.User) (runner.Result, error) {
+	f.executeCalls++
+	return runner.Result{Output: f.output}, nil
+}
+
+func (f *fakeTaskExecutor) ExecuteStream(_ context.Context, _ store.GatewayTask, _ *auth.User, onDelta clients.StreamDelta) (runner.Result, error) {
+	f.streamCalls++
+	for _, delta := range f.deltas {
+		if err := onDelta(delta); err != nil {
+			return runner.Result{}, err
+		}
+	}
+	return runner.Result{Output: f.output}, nil
+}
--- a/apps/api/internal/httpapi/core_flow_integration_test.go
+++ b/apps/api/internal/httpapi/core_flow_integration_test.go
@ -316,13 +316,17 @@ VALUES ($1, 5, '{"purpose":"core-flow"}'::jsonb)`, inviteCode); err != nil {
 		} `json:"task"`
 	}
 	defaultTextModel := "openai:gpt-4o-mini"
-	doJSON(t, server.URL, http.MethodPost, "/api/v1/chat/completions", apiKeyResponse.Secret, map[string]any{
+	var apiV1Chat map[string]any
+	doAPIV1ChatCompletionAndLoadTask(t, ctx, testPool, server.URL, apiKeyResponse.Secret, map[string]any{
 		"model":                defaultTextModel,
 		"runMode":              "simulation",
 		"simulation":           true,
 		"simulationDurationMs": 5,
 		"messages":             []map[string]any{{"role": "user", "content": "ping"}},
-	}, http.StatusAccepted, &taskResponse)
+	}, "default-chat-"+suffixText, http.StatusOK, &apiV1Chat, &taskResponse.Task)
+	if apiV1Chat["object"] != "chat.completion" {
+		t.Fatalf("unexpected api v1 chat response: %+v", apiV1Chat)
+	}
 	if taskResponse.Task.ID == "" || taskResponse.Task.Status != "succeeded" || taskResponse.Task.RunMode != "simulation" {
 		t.Fatalf("unexpected task response: %+v", taskResponse.Task)
 	}
@ -513,13 +517,13 @@ LIMIT 1`).Scan(&gptImageModelTypesRaw); err != nil {
 			ErrorCode string `json:"errorCode"`
 		} `json:"task"`
 	}
-	doJSON(t, server.URL, http.MethodPost, "/api/v1/chat/completions", chatOnlyAPIKeyResponse.Secret, map[string]any{
+	doAPIV1ChatCompletionAndLoadTask(t, ctx, testPool, server.URL, chatOnlyAPIKeyResponse.Secret, map[string]any{
 		"model":                deniedModel,
 		"runMode":              "simulation",
 		"simulation":           true,
 		"simulationDurationMs": 5,
 		"messages":             []map[string]any{{"role": "user", "content": "permission deny"}},
-	}, http.StatusAccepted, &deniedTask)
+	}, "permission-deny-"+suffixText, http.StatusNotFound, nil, &deniedTask.Task)
 	if deniedTask.Task.Status != "failed" || deniedTask.Task.ErrorCode != "no_model_candidate" {
 		t.Fatalf("deny access rule should hide denied model from runtime candidates: %+v", deniedTask.Task)
 	}
@ -561,13 +565,13 @@ LIMIT 1`).Scan(&gptImageModelTypesRaw); err != nil {
 			ErrorCode string `json:"errorCode"`
 		} `json:"task"`
 	}
-	doJSON(t, server.URL, http.MethodPost, "/api/v1/chat/completions", chatOnlyAPIKeyResponse.Secret, map[string]any{
+	doAPIV1ChatCompletionAndLoadTask(t, ctx, testPool, server.URL, chatOnlyAPIKeyResponse.Secret, map[string]any{
 		"model":                controlledModel,
 		"runMode":              "simulation",
 		"simulation":           true,
 		"simulationDurationMs": 5,
 		"messages":             []map[string]any{{"role": "user", "content": "allow should block other keys"}},
-	}, http.StatusAccepted, &blockedControlledTask)
+	}, "permission-allow-block-"+suffixText, http.StatusNotFound, nil, &blockedControlledTask.Task)
 	if blockedControlledTask.Task.Status != "failed" || blockedControlledTask.Task.ErrorCode != "no_model_candidate" {
 		t.Fatalf("allow access rule should make the resource unavailable to unmatched subjects: %+v", blockedControlledTask.Task)
 	}
@ -586,13 +590,13 @@ LIMIT 1`).Scan(&gptImageModelTypesRaw); err != nil {
 			Status string `json:"status"`
 		} `json:"task"`
 	}
-	doJSON(t, server.URL, http.MethodPost, "/api/v1/chat/completions", chatOnlyAPIKeyResponse.Secret, map[string]any{
+	doAPIV1ChatCompletionAndLoadTask(t, ctx, testPool, server.URL, chatOnlyAPIKeyResponse.Secret, map[string]any{
 		"model":                controlledModel,
 		"runMode":              "simulation",
 		"simulation":           true,
 		"simulationDurationMs": 5,
 		"messages":             []map[string]any{{"role": "user", "content": "allow should pass"}},
-	}, http.StatusAccepted, &allowedControlledTask)
+	}, "permission-allow-pass-"+suffixText, http.StatusOK, nil, &allowedControlledTask.Task)
 	if allowedControlledTask.Task.Status != "succeeded" {
 		t.Fatalf("matching allow access rule should make the controlled model usable: %+v", allowedControlledTask.Task)
 	}
@ -645,13 +649,13 @@ WHERE gateway_user_id = $1::uuid
 			FinalChargeAmount float64        `json:"finalChargeAmount"`
 		} `json:"task"`
 	}
-	doJSON(t, server.URL, http.MethodPost, "/api/v1/chat/completions", apiKeyResponse.Secret, map[string]any{
+	doAPIV1ChatCompletionAndLoadTask(t, ctx, testPool, server.URL, apiKeyResponse.Secret, map[string]any{
 		"model":                pricingModel,
 		"runMode":              "simulation",
 		"simulation":           true,
 		"simulationDurationMs": 5,
 		"messages":             []map[string]any{{"role": "user", "content": "priced ping"}},
-	}, http.StatusAccepted, &pricingTask)
+	}, "pricing-chat-"+suffixText, http.StatusOK, nil, &pricingTask.Task)
 	if pricingTask.Task.Status != "succeeded" || !floatNear(pricingTask.Task.FinalChargeAmount, 0.028) {
 		t.Fatalf("custom pricing rule set should drive text billing, got task=%+v", pricingTask.Task)
 	}
@ -757,14 +761,14 @@ WHERE reference_type = 'gateway_task'
 			ErrorCode string `json:"errorCode"`
 		} `json:"task"`
 	}
-	doJSON(t, server.URL, http.MethodPost, "/api/v1/chat/completions", apiKeyResponse.Secret, map[string]any{
+	doAPIV1ChatCompletionAndLoadTask(t, ctx, testPool, server.URL, apiKeyResponse.Secret, map[string]any{
 		"model":                rateLimitedModel,
 		"runMode":              "simulation",
 		"simulation":           true,
 		"simulationDurationMs": 5,
 		"simulationProfile":    "non_retryable_failure",
 		"messages":             []map[string]any{{"role": "user", "content": "failed first"}},
-	}, http.StatusAccepted, &rateLimitFailedTask)
+	}, "rate-limit-failed-first-"+suffixText, http.StatusBadGateway, nil, &rateLimitFailedTask.Task)
 	if rateLimitFailedTask.Task.Status != "failed" || rateLimitFailedTask.Task.ErrorCode != "bad_request" {
 		t.Fatalf("failed rate-limited task should fail before consuming rpm: %+v", rateLimitFailedTask.Task)
 	}
@ -774,13 +778,13 @@ WHERE reference_type = 'gateway_task'
 			Status string `json:"status"`
 		} `json:"task"`
 	}
-	doJSON(t, server.URL, http.MethodPost, "/api/v1/chat/completions", apiKeyResponse.Secret, map[string]any{
+	doAPIV1ChatCompletionAndLoadTask(t, ctx, testPool, server.URL, apiKeyResponse.Secret, map[string]any{
 		"model":                rateLimitedModel,
 		"runMode":              "simulation",
 		"simulation":           true,
 		"simulationDurationMs": 5,
 		"messages":             []map[string]any{{"role": "user", "content": "first"}},
-	}, http.StatusAccepted, &rateLimitTaskOne)
+	}, "rate-limit-first-"+suffixText, http.StatusOK, nil, &rateLimitTaskOne.Task)
 	if rateLimitTaskOne.Task.Status != "succeeded" {
 		t.Fatalf("first rate-limited task should succeed: %+v", rateLimitTaskOne.Task)
 	}
@ -790,13 +794,13 @@ WHERE reference_type = 'gateway_task'
 			ErrorCode string `json:"errorCode"`
 		} `json:"task"`
 	}
-	doJSON(t, server.URL, http.MethodPost, "/api/v1/chat/completions", apiKeyResponse.Secret, map[string]any{
+	doAPIV1ChatCompletionAndLoadTask(t, ctx, testPool, server.URL, apiKeyResponse.Secret, map[string]any{
 		"model":                rateLimitedModel,
 		"runMode":              "simulation",
 		"simulation":           true,
 		"simulationDurationMs": 5,
 		"messages":             []map[string]any{{"role": "user", "content": "second"}},
-	}, http.StatusAccepted, &rateLimitTaskTwo)
+	}, "rate-limit-second-"+suffixText, http.StatusTooManyRequests, nil, &rateLimitTaskTwo.Task)
 	if rateLimitTaskTwo.Task.Status != "failed" || rateLimitTaskTwo.Task.ErrorCode != "rate_limit" {
 		t.Fatalf("runtime policy rate limit should fail second task with rate_limit: %+v", rateLimitTaskTwo.Task)
 	}
@ -808,12 +812,12 @@ WHERE reference_type = 'gateway_task'
 			AsyncMode bool   `json:"asyncMode"`
 		} `json:"task"`
 	}
-	doJSONWithHeaders(t, server.URL, http.MethodPost, "/api/v1/chat/completions", apiKeyResponse.Secret, map[string]any{
+	doJSONWithHeaders(t, server.URL, http.MethodPost, "/api/v1/responses", apiKeyResponse.Secret, map[string]any{
 		"model":                rateLimitedModel,
 		"runMode":              "simulation",
 		"simulation":           true,
 		"simulationDurationMs": 5,
-		"messages":             []map[string]any{{"role": "user", "content": "async queued"}},
+		"input":                "async queued",
 	}, map[string]string{"X-Async": "true"}, http.StatusAccepted, &asyncRateLimitTask)
 	if asyncRateLimitTask.TaskID == "" || asyncRateLimitTask.Task.ID != asyncRateLimitTask.TaskID || !asyncRateLimitTask.Task.AsyncMode {
 		t.Fatalf("async task response should expose task id and async mode: %+v", asyncRateLimitTask)
@ -984,11 +988,11 @@ WHERE reference_type = 'gateway_task'
 			Status string `json:"status"`
 		} `json:"task"`
 	}
-	doJSON(t, server.URL, http.MethodPost, "/api/v1/chat/completions", apiKeyResponse.Secret, map[string]any{
+	doAPIV1ChatCompletionAndLoadTask(t, ctx, testPool, server.URL, apiKeyResponse.Secret, map[string]any{
 		"model":    failoverModel,
 		"runMode":  "simulation",
 		"messages": []map[string]any{{"role": "user", "content": "retry please"}},
-	}, http.StatusAccepted, &failoverTask)
+	}, "failover-chat-"+suffixText, http.StatusOK, nil, &failoverTask.Task)
 	if failoverTask.Task.Status != "succeeded" {
 		t.Fatalf("failover task should succeed through second client: %+v", failoverTask.Task)
 	}
@ -1103,13 +1107,13 @@ WHERE failed.id = $1::uuid`, failedPlatform.ID, successPlatform.ID, unrelatedPri
 			Status string `json:"status"`
 		} `json:"task"`
 	}
-	doJSON(t, server.URL, http.MethodPost, "/api/v1/chat/completions", apiKeyResponse.Secret, map[string]any{
+	doAPIV1ChatCompletionAndLoadTask(t, ctx, testPool, server.URL, apiKeyResponse.Secret, map[string]any{
 		"model":                degradeModel,
 		"runMode":              "simulation",
 		"simulation":           true,
 		"simulationDurationMs": 5,
 		"messages":             []map[string]any{{"role": "user", "content": "degrade please"}},
-	}, http.StatusAccepted, &degradeTask)
+	}, "degrade-chat-"+suffixText, http.StatusOK, nil, &degradeTask.Task)
 	if degradeTask.Task.Status != "succeeded" {
 		t.Fatalf("degrade task should fail over after cooling down failed model: %+v", degradeTask.Task)
 	}
@ -1170,13 +1174,13 @@ WHERE m.platform_id = $1::uuid
 			ErrorCode string `json:"errorCode"`
 		} `json:"task"`
 	}
-	doJSON(t, server.URL, http.MethodPost, "/api/v1/chat/completions", apiKeyResponse.Secret, map[string]any{
+	doAPIV1ChatCompletionAndLoadTask(t, ctx, testPool, server.URL, apiKeyResponse.Secret, map[string]any{
 		"model":                autoDisableModel,
 		"runMode":              "simulation",
 		"simulation":           true,
 		"simulationDurationMs": 5,
 		"messages":             []map[string]any{{"role": "user", "content": "disable please"}},
-	}, http.StatusAccepted, &autoDisableTask)
+	}, "auto-disable-chat-"+suffixText, http.StatusBadGateway, nil, &autoDisableTask.Task)
 	if autoDisableTask.Task.Status != "failed" || autoDisableTask.Task.ErrorCode != "invalid_api_key" {
 		t.Fatalf("auto disable task should fail with invalid_api_key: %+v", autoDisableTask.Task)
 	}
@ -1293,12 +1297,12 @@ WHERE m.platform_id = $1::uuid
 			AsyncMode bool   `json:"asyncMode"`
 		} `json:"task"`
 	}
-	doJSONWithHeaders(t, server.URL, http.MethodPost, "/api/v1/chat/completions", apiKeyResponse.Secret, map[string]any{
+	doJSONWithHeaders(t, server.URL, http.MethodPost, "/api/v1/responses", apiKeyResponse.Secret, map[string]any{
 		"model":                defaultTextModel,
 		"runMode":              "simulation",
 		"simulation":           true,
 		"simulationDurationMs": 2000,
-		"messages":             []map[string]any{{"role": "user", "content": "river worker restart"}},
+		"input":                "river worker restart",
 	}, map[string]string{"X-Async": "true"}, http.StatusAccepted, &restartAsyncTask)
 	if restartAsyncTask.TaskID == "" || !restartAsyncTask.Task.AsyncMode {
 		t.Fatalf("restart async task should be accepted as async: %+v", restartAsyncTask)
@ -1453,6 +1457,20 @@ func doJSONWithHeaders(t *testing.T, baseURL string, method string, path string,
 	}
 }

+func doAPIV1ChatCompletionAndLoadTask(t *testing.T, ctx context.Context, pool *pgxpool.Pool, baseURL string, token string, payload map[string]any, marker string, expectedStatus int, responseOut any, taskDetailOut any) string {
+	t.Helper()
+	payload["integrationTestMarker"] = marker
+	if responseOut == nil {
+		responseOut = &map[string]any{}
+	}
+	doJSON(t, baseURL, http.MethodPost, "/api/v1/chat/completions", token, payload, expectedStatus, responseOut)
+	taskID := waitForTaskIDByRequestField(t, ctx, pool, "integrationTestMarker", marker, 2*time.Second)
+	if taskDetailOut != nil {
+		doJSON(t, baseURL, http.MethodGet, "/api/v1/tasks/"+taskID, token, nil, http.StatusOK, taskDetailOut)
+	}
+	return taskID
+}
+
 type taskWaitDetail struct {
 	ID       string `json:"id"`
 	Status   string `json:"status"`
@ -1481,6 +1499,11 @@ func waitForTaskStatus(t *testing.T, baseURL string, token string, taskID string
 }

 func waitForTaskIDByRequestMarker(t *testing.T, ctx context.Context, pool *pgxpool.Pool, marker string, timeout time.Duration) string {
+	t.Helper()
+	return waitForTaskIDByRequestField(t, ctx, pool, "cancelTestId", marker, timeout)
+}
+
+func waitForTaskIDByRequestField(t *testing.T, ctx context.Context, pool *pgxpool.Pool, key string, value string, timeout time.Duration) string {
 	t.Helper()
 	deadline := time.Now().Add(timeout)
 	for time.Now().Before(deadline) {
@ -1488,15 +1511,15 @@ func waitForTaskIDByRequestMarker(t *testing.T, ctx context.Context, pool *pgxpo
 		err := pool.QueryRow(ctx, `
 SELECT id::text
 FROM gateway_tasks
-WHERE request->>'cancelTestId' = $1
+WHERE request->>$1 = $2
 ORDER BY created_at DESC
-LIMIT 1`, marker).Scan(&taskID)
+LIMIT 1`, key, value).Scan(&taskID)
 		if err == nil && taskID != "" {
 			return taskID
 		}
 		time.Sleep(50 * time.Millisecond)
 	}
-	t.Fatalf("task with request marker %s was not created within %s", marker, timeout)
+	t.Fatalf("task with request %s=%s was not created within %s", key, value, timeout)
 	return ""
 }

@ -1577,13 +1600,13 @@ func assertLoadAvoidanceSimulatedRetryChain(t *testing.T, ctx context.Context, t
 			ErrorCode string `json:"errorCode"`
 		} `json:"task"`
 	}
-	doJSON(t, baseURL, http.MethodPost, "/api/v1/chat/completions", runtimeToken, map[string]any{
+	doAPIV1ChatCompletionAndLoadTask(t, ctx, testPool, baseURL, runtimeToken, map[string]any{
 		"model":                model,
 		"runMode":              "simulation",
 		"simulation":           true,
 		"simulationDurationMs": 5,
 		"messages":             []map[string]any{{"role": "user", "content": "load avoidance retry chain"}},
-	}, http.StatusAccepted, &taskResponse)
+	}, "load-avoidance-"+suffixText, http.StatusBadGateway, nil, &taskResponse.Task)
 	if taskResponse.Task.ID == "" || taskResponse.Task.Status != "failed" || taskResponse.Task.ErrorCode != "bad_request" {
 		t.Fatalf("load avoidance task should only fail after avoided clients are retried, got %+v", taskResponse.Task)
 	}
--- a/apps/api/internal/httpapi/handlers.go
+++ b/apps/api/internal/httpapi/handlers.go
@ -13,6 +13,7 @@ import (
 	"github.com/easyai/easyai-ai-gateway/apps/api/internal/auth"
 	"github.com/easyai/easyai-ai-gateway/apps/api/internal/clients"
 	"github.com/easyai/easyai-ai-gateway/apps/api/internal/netproxy"
+	"github.com/easyai/easyai-ai-gateway/apps/api/internal/runner"
 	"github.com/easyai/easyai-ai-gateway/apps/api/internal/store"
 )

@ -858,7 +859,7 @@ func (s *Server) listModelRateLimitStatuses(w http.ResponseWriter, r *http.Reque

 // createTask godoc
 // @Summary 创建或执行 AI 任务
-// @Description 网关任务接口按 model 选择平台模型；/api/v1 路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或 SSE 流。
+// @Description 网关任务接口按 model 选择平台模型；除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果，OpenAI-compatible 路径同步返回兼容响应或 SSE 流。
 // @Tags tasks
 // @Accept json
 // @Produce json
@ -874,7 +875,6 @@ func (s *Server) listModelRateLimitStatuses(w http.ResponseWriter, r *http.Reque
 // @Failure 404 {object} ErrorEnvelope
 // @Failure 429 {object} ErrorEnvelope
 // @Failure 502 {object} ErrorEnvelope
-// @Router /api/v1/chat/completions [post]
 // @Router /api/v1/responses [post]
 // @Router /api/v1/images/generations [post]
 // @Router /api/v1/images/edits [post]
@ -909,13 +909,13 @@ func (s *Server) createTask(kind string, compatible bool) http.Handler {
 			writeError(w, http.StatusForbidden, "api key scope does not allow this capability")
 			return
 		}
-		asyncMode := asyncRequest(r)
+		responsePlan := planTaskResponse(kind, compatible, body, r)

 		task, err := s.store.CreateTask(r.Context(), store.CreateTaskInput{
 			Kind:    kind,
 			Model:   model,
 			RunMode: runModeFromRequest(body),
-			Async:   asyncMode,
+			Async:   responsePlan.asyncMode,
 			Request: body,
 		}, user)
 		if err != nil {
@ -923,7 +923,7 @@ func (s *Server) createTask(kind string, compatible bool) http.Handler {
 			writeError(w, http.StatusInternalServerError, "create task failed")
 			return
 		}
-		if asyncMode {
+		if responsePlan.asyncMode {
 			if err := s.runner.EnqueueAsyncTask(r.Context(), task); err != nil {
 				writeError(w, http.StatusInternalServerError, err.Error(), "enqueue_failed")
 				return
@ -933,65 +933,8 @@ func (s *Server) createTask(kind string, compatible bool) http.Handler {
 		}
 		runCtx, cancelRun := s.requestExecutionContext(r)
 		defer cancelRun()
-		if compatible {
-			if boolValue(body, "stream") {
-				flusher := prepareCompatibleStream(w)
-				result, runErr := s.runner.ExecuteStream(runCtx, task, user, func(delta string) error {
-					if !requestStillConnected(r) {
-						return nil
-					}
-					writeCompatibleDelta(w, kind, model, delta)
-					if flusher != nil {
-						flusher.Flush()
-					}
-					return nil
-				})
-				if runErr != nil {
-					if !requestStillConnected(r) {
-						return
-					}
-					status := statusFromRunError(runErr)
-					errorPayload := map[string]any{
-						"code":    runErrorCode(runErr),
-						"message": runErrorMessage(runErr),
-						"status":  status,
-					}
-					if result.Task.ID != "" {
-						errorPayload["taskId"] = result.Task.ID
-					}
-					if result.Task.RequestID != "" {
-						errorPayload["requestId"] = result.Task.RequestID
-					}
-					for key, value := range runErrorDetails(runErr) {
-						errorPayload[key] = value
-					}
-					sendSSE(w, "error", map[string]any{"error": errorPayload})
-					if flusher != nil {
-						flusher.Flush()
-					}
-					return
-				}
-				if !requestStillConnected(r) {
-					return
-				}
-				writeCompatibleDone(w, kind, model, result.Output)
-				if flusher != nil {
-					flusher.Flush()
-				}
-				return
-			}
-			result, runErr := s.runner.Execute(runCtx, task, user)
-			if runErr != nil {
-				if !requestStillConnected(r) {
-					return
-				}
-				writeErrorWithDetails(w, statusFromRunError(runErr), runErrorMessage(runErr), runErrorDetails(runErr), runErrorCode(runErr))
-				return
-			}
-			if !requestStillConnected(r) {
-				return
-			}
-			writeJSON(w, http.StatusOK, result.Output)
+		if responsePlan.compatibleMode {
+			writeCompatibleTaskResponse(runCtx, w, r, s.runner, kind, model, task, user, responsePlan.streamMode)
 			return
 		}
 		result, runErr := s.runner.Execute(runCtx, task, user)
@ -1006,6 +949,29 @@ func (s *Server) createTask(kind string, compatible bool) http.Handler {
 	})
 }

+// createAPIV1ChatCompletions godoc
+// @Summary 创建 Chat Completions
+// @Description /api/v1/chat/completions 同步执行：stream=true 返回 text/event-stream SSE；stream=false 或未传返回兼容 JSON；该接口忽略 X-Async。
+// @Tags tasks
+// @Accept json
+// @Produce json
+// @Produce text/event-stream
+// @Security BearerAuth
+// @Param X-Async header bool false "该接口忽略此参数"
+// @Param input body TaskRequest true "Chat Completions 请求"
+// @Success 200 {object} ChatCompletionCompatibleResponse
+// @Failure 400 {object} ErrorEnvelope
+// @Failure 401 {object} ErrorEnvelope
+// @Failure 402 {object} ErrorEnvelope
+// @Failure 403 {object} ErrorEnvelope
+// @Failure 404 {object} ErrorEnvelope
+// @Failure 429 {object} ErrorEnvelope
+// @Failure 502 {object} ErrorEnvelope
+// @Router /api/v1/chat/completions [post]
+func (s *Server) createAPIV1ChatCompletions() http.Handler {
+	return s.createTask("chat.completions", false)
+}
+
 func (s *Server) requestExecutionContext(r *http.Request) (context.Context, context.CancelFunc) {
 	base := context.WithoutCancel(r.Context())
 	if s.ctx == nil {
@ -1031,11 +997,98 @@ func requestStillConnected(r *http.Request) bool {
 	}
 }

+type taskExecutor interface {
+	Execute(context.Context, store.GatewayTask, *auth.User) (runner.Result, error)
+	ExecuteStream(context.Context, store.GatewayTask, *auth.User, clients.StreamDelta) (runner.Result, error)
+}
+
+func writeCompatibleTaskResponse(runCtx context.Context, w http.ResponseWriter, r *http.Request, executor taskExecutor, kind string, model string, task store.GatewayTask, user *auth.User, streamMode bool) {
+	if streamMode {
+		flusher := prepareCompatibleStream(w)
+		result, runErr := executor.ExecuteStream(runCtx, task, user, func(delta string) error {
+			if !requestStillConnected(r) {
+				return nil
+			}
+			writeCompatibleDelta(w, kind, model, delta)
+			if flusher != nil {
+				flusher.Flush()
+			}
+			return nil
+		})
+		if runErr != nil {
+			if !requestStillConnected(r) {
+				return
+			}
+			status := statusFromRunError(runErr)
+			errorPayload := map[string]any{
+				"code":    runErrorCode(runErr),
+				"message": runErrorMessage(runErr),
+				"status":  status,
+			}
+			if result.Task.ID != "" {
+				errorPayload["taskId"] = result.Task.ID
+			}
+			if result.Task.RequestID != "" {
+				errorPayload["requestId"] = result.Task.RequestID
+			}
+			for key, value := range runErrorDetails(runErr) {
+				errorPayload[key] = value
+			}
+			sendSSE(w, "error", map[string]any{"error": errorPayload})
+			if flusher != nil {
+				flusher.Flush()
+			}
+			return
+		}
+		if !requestStillConnected(r) {
+			return
+		}
+		writeCompatibleDone(w, kind, model, result.Output)
+		if flusher != nil {
+			flusher.Flush()
+		}
+		return
+	}
+
+	result, runErr := executor.Execute(runCtx, task, user)
+	if runErr != nil {
+		if !requestStillConnected(r) {
+			return
+		}
+		writeErrorWithDetails(w, statusFromRunError(runErr), runErrorMessage(runErr), runErrorDetails(runErr), runErrorCode(runErr))
+		return
+	}
+	if !requestStillConnected(r) {
+		return
+	}
+	writeJSON(w, http.StatusOK, result.Output)
+}
+
 func asyncRequest(r *http.Request) bool {
 	value := strings.TrimSpace(strings.ToLower(r.Header.Get("x-async")))
 	return value == "1" || value == "true" || value == "yes" || value == "on"
 }

+type taskResponsePlan struct {
+	asyncMode      bool
+	compatibleMode bool
+	streamMode     bool
+}
+
+func planTaskResponse(kind string, compatible bool, body map[string]any, r *http.Request) taskResponsePlan {
+	asyncMode := asyncRequest(r)
+	compatibleMode := compatible
+	if kind == "chat.completions" && !compatible {
+		asyncMode = false
+		compatibleMode = true
+	}
+	return taskResponsePlan{
+		asyncMode:      asyncMode,
+		compatibleMode: compatibleMode,
+		streamMode:     boolValue(body, "stream"),
+	}
+}
+
 func writeTaskAccepted(w http.ResponseWriter, task store.GatewayTask) {
 	writeJSON(w, http.StatusAccepted, map[string]any{
 		"taskId": task.ID,
--- a/apps/api/internal/httpapi/openapi_models.go
+++ b/apps/api/internal/httpapi/openapi_models.go
@ -242,6 +242,32 @@ type CompatibleResponse struct {
 	Usage   map[string]interface{}   `json:"usage,omitempty"`
 }

+type ChatCompletionCompatibleResponse struct {
+	ID      string                 `json:"id" example:"chatcmpl-123"`
+	Object  string                 `json:"object" example:"chat.completion"`
+	Created int64                  `json:"created,omitempty" example:"1710000000"`
+	Model   string                 `json:"model" example:"gpt-4o-mini"`
+	Choices []ChatCompletionChoice `json:"choices"`
+	Usage   *ChatCompletionUsage   `json:"usage,omitempty"`
+}
+
+type ChatCompletionChoice struct {
+	Index        int                         `json:"index" example:"0"`
+	Message      ChatCompletionChoiceMessage `json:"message"`
+	FinishReason string                      `json:"finish_reason,omitempty" example:"stop"`
+}
+
+type ChatCompletionChoiceMessage struct {
+	Role    string `json:"role" example:"assistant"`
+	Content string `json:"content" example:"Hello"`
+}
+
+type ChatCompletionUsage struct {
+	PromptTokens     int `json:"prompt_tokens,omitempty" example:"12"`
+	CompletionTokens int `json:"completion_tokens,omitempty" example:"8"`
+	TotalTokens      int `json:"total_tokens,omitempty" example:"20"`
+}
+
 type NetworkProxyConfigResponse struct {
 	GlobalHTTPProxy       string `json:"globalHttpProxy" example:"http://127.0.0.1:7890"`
 	GlobalHTTPProxySet    bool   `json:"globalHttpProxySet" example:"true"`
--- a/apps/api/internal/httpapi/server.go
+++ b/apps/api/internal/httpapi/server.go
@ -126,7 +126,7 @@ func NewServerWithContext(ctx context.Context, cfg config.Config, db *store.Stor
 	mux.Handle("GET /api/v1/playground/models", server.auth.Require(auth.PermissionBasic, http.HandlerFunc(server.listPlayableModels)))
 	mux.Handle("GET /api/admin/runtime/rate-limit-windows", server.requireAdmin(auth.PermissionPower, http.HandlerFunc(server.listRateLimitWindows)))
 	mux.Handle("GET /api/admin/runtime/model-rate-limits", server.requireAdmin(auth.PermissionPower, http.HandlerFunc(server.listModelRateLimitStatuses)))
-	mux.Handle("POST /api/v1/chat/completions", server.auth.Require(auth.PermissionBasic, server.createTask("chat.completions", false)))
+	mux.Handle("POST /api/v1/chat/completions", server.auth.Require(auth.PermissionBasic, server.createAPIV1ChatCompletions()))
 	mux.Handle("POST /api/v1/responses", server.auth.Require(auth.PermissionBasic, server.createTask("responses", false)))
 	mux.Handle("POST /api/v1/images/generations", server.auth.Require(auth.PermissionBasic, server.createTask("images.generations", false)))
 	mux.Handle("POST /api/v1/images/edits", server.auth.Require(auth.PermissionBasic, server.createTask("images.edits", false)))