From ae197a742fce6f3ee2656f14739531127b5b643f Mon Sep 17 00:00:00 2001 From: chensipeng Date: Sat, 16 May 2026 00:19:39 +0800 Subject: [PATCH] =?UTF-8?q?docs(api):=20=E5=90=8C=E6=AD=A5=20/api/v1/chat/?= =?UTF-8?q?completions=20=E7=9A=84=20OpenAPI=20=E4=B8=8E=E5=90=8C=E6=AD=A5?= =?UTF-8?q?=E5=93=8D=E5=BA=94?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 补充 Chat Completions 的兼容响应模型与路由注释,确保 /api/v1/chat/completions 按同步兼容格式返回并更新对应测试与 Swagger 文档。 --- apps/api/docs/swagger.json | 119 +++++++++--- apps/api/docs/swagger.yaml | 118 ++++++++---- .../httpapi/chat_completions_mode_test.go | 114 +++++++++++ .../httpapi/core_flow_integration_test.go | 85 +++++--- apps/api/internal/httpapi/handlers.go | 181 +++++++++++------- apps/api/internal/httpapi/openapi_models.go | 26 +++ apps/api/internal/httpapi/server.go | 2 +- 7 files changed, 491 insertions(+), 154 deletions(-) create mode 100644 apps/api/internal/httpapi/chat_completions_mode_test.go diff --git a/apps/api/docs/swagger.json b/apps/api/docs/swagger.json index 653288e..805b611 100644 --- a/apps/api/docs/swagger.json +++ b/apps/api/docs/swagger.json @@ -4000,26 +4000,27 @@ "BearerAuth": [] } ], - "description": "网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。", + "description": "/api/v1/chat/completions 同步执行:stream=true 返回 text/event-stream SSE;stream=false 或未传返回兼容 JSON;该接口忽略 X-Async。", "consumes": [ "application/json" ], "produces": [ - "application/json" + "application/json", + "text/event-stream" ], "tags": [ "tasks" ], - "summary": "创建或执行 AI 任务", + "summary": "创建 Chat Completions", "parameters": [ { "type": "boolean", - "description": "true 时异步创建任务并返回 202", + "description": "该接口忽略此参数", "name": "X-Async", "in": "header" }, { - "description": "AI 任务请求,字段随任务类型变化", + "description": "Chat Completions 请求", "name": "input", "in": "body", "required": true, @@ -4032,13 +4033,7 @@ "200": { "description": "OK", "schema": { - "$ref": "#/definitions/httpapi.CompatibleResponse" - } - }, - "202": { - "description": "Accepted", - "schema": { - "$ref": "#/definitions/httpapi.TaskAcceptedResponse" + "$ref": "#/definitions/httpapi.ChatCompletionCompatibleResponse" } }, "400": { @@ -4161,7 +4156,7 @@ "BearerAuth": [] } ], - "description": "网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。", + "description": "网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。", "consumes": [ "application/json" ], @@ -4254,7 +4249,7 @@ "BearerAuth": [] } ], - "description": "网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。", + "description": "网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。", "consumes": [ "application/json" ], @@ -4653,7 +4648,7 @@ "BearerAuth": [] } ], - "description": "网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。", + "description": "网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。", "consumes": [ "application/json" ], @@ -4985,7 +4980,7 @@ "BearerAuth": [] } ], - "description": "网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。", + "description": "网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。", "consumes": [ "application/json" ], @@ -5452,7 +5447,7 @@ "BearerAuth": [] } ], - "description": "网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。", + "description": "网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。", "consumes": [ "application/json" ], @@ -5565,7 +5560,7 @@ "BearerAuth": [] } ], - "description": "网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。", + "description": "网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。", "consumes": [ "application/json" ], @@ -5658,7 +5653,7 @@ "BearerAuth": [] } ], - "description": "网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。", + "description": "网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。", "consumes": [ "application/json" ], @@ -5777,7 +5772,7 @@ "BearerAuth": [] } ], - "description": "网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。", + "description": "网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。", "consumes": [ "application/json" ], @@ -5976,7 +5971,7 @@ "BearerAuth": [] } ], - "description": "网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。", + "description": "网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。", "consumes": [ "application/json" ], @@ -6137,7 +6132,7 @@ "BearerAuth": [] } ], - "description": "网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。", + "description": "网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。", "consumes": [ "application/json" ], @@ -6230,7 +6225,7 @@ "BearerAuth": [] } ], - "description": "网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。", + "description": "网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。", "consumes": [ "application/json" ], @@ -6323,7 +6318,7 @@ "BearerAuth": [] } ], - "description": "网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。", + "description": "网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。", "consumes": [ "application/json" ], @@ -6551,6 +6546,82 @@ } } }, + "httpapi.ChatCompletionChoice": { + "type": "object", + "properties": { + "finish_reason": { + "type": "string", + "example": "stop" + }, + "index": { + "type": "integer", + "example": 0 + }, + "message": { + "$ref": "#/definitions/httpapi.ChatCompletionChoiceMessage" + } + } + }, + "httpapi.ChatCompletionChoiceMessage": { + "type": "object", + "properties": { + "content": { + "type": "string", + "example": "Hello" + }, + "role": { + "type": "string", + "example": "assistant" + } + } + }, + "httpapi.ChatCompletionCompatibleResponse": { + "type": "object", + "properties": { + "choices": { + "type": "array", + "items": { + "$ref": "#/definitions/httpapi.ChatCompletionChoice" + } + }, + "created": { + "type": "integer", + "example": 1710000000 + }, + "id": { + "type": "string", + "example": "chatcmpl-123" + }, + "model": { + "type": "string", + "example": "gpt-4o-mini" + }, + "object": { + "type": "string", + "example": "chat.completion" + }, + "usage": { + "$ref": "#/definitions/httpapi.ChatCompletionUsage" + } + } + }, + "httpapi.ChatCompletionUsage": { + "type": "object", + "properties": { + "completion_tokens": { + "type": "integer", + "example": 8 + }, + "prompt_tokens": { + "type": "integer", + "example": 12 + }, + "total_tokens": { + "type": "integer", + "example": 20 + } + } + }, "httpapi.ChatMessage": { "type": "object", "properties": { diff --git a/apps/api/docs/swagger.yaml b/apps/api/docs/swagger.yaml index f60925b..16a7825 100644 --- a/apps/api/docs/swagger.yaml +++ b/apps/api/docs/swagger.yaml @@ -92,6 +92,59 @@ definitions: $ref: '#/definitions/store.CatalogProvider' type: array type: object + httpapi.ChatCompletionChoice: + properties: + finish_reason: + example: stop + type: string + index: + example: 0 + type: integer + message: + $ref: '#/definitions/httpapi.ChatCompletionChoiceMessage' + type: object + httpapi.ChatCompletionChoiceMessage: + properties: + content: + example: Hello + type: string + role: + example: assistant + type: string + type: object + httpapi.ChatCompletionCompatibleResponse: + properties: + choices: + items: + $ref: '#/definitions/httpapi.ChatCompletionChoice' + type: array + created: + example: 1710000000 + type: integer + id: + example: chatcmpl-123 + type: string + model: + example: gpt-4o-mini + type: string + object: + example: chat.completion + type: string + usage: + $ref: '#/definitions/httpapi.ChatCompletionUsage' + type: object + httpapi.ChatCompletionUsage: + properties: + completion_tokens: + example: 8 + type: integer + prompt_tokens: + example: 12 + type: integer + total_tokens: + example: 20 + type: integer + type: object httpapi.ChatMessage: properties: content: @@ -4800,14 +4853,14 @@ paths: post: consumes: - application/json - description: 网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 - SSE 流。 + description: /api/v1/chat/completions 同步执行:stream=true 返回 text/event-stream + SSE;stream=false 或未传返回兼容 JSON;该接口忽略 X-Async。 parameters: - - description: true 时异步创建任务并返回 202 + - description: 该接口忽略此参数 in: header name: X-Async type: boolean - - description: AI 任务请求,字段随任务类型变化 + - description: Chat Completions 请求 in: body name: input required: true @@ -4815,15 +4868,12 @@ paths: $ref: '#/definitions/httpapi.TaskRequest' produces: - application/json + - text/event-stream responses: "200": description: OK schema: - $ref: '#/definitions/httpapi.CompatibleResponse' - "202": - description: Accepted - schema: - $ref: '#/definitions/httpapi.TaskAcceptedResponse' + $ref: '#/definitions/httpapi.ChatCompletionCompatibleResponse' "400": description: Bad Request schema: @@ -4854,7 +4904,7 @@ paths: $ref: '#/definitions/httpapi.ErrorEnvelope' security: - BearerAuth: [] - summary: 创建或执行 AI 任务 + summary: 创建 Chat Completions tags: - tasks /api/v1/files/upload: @@ -4905,8 +4955,8 @@ paths: post: consumes: - application/json - description: 网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 - SSE 流。 + description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible + 路径同步返回兼容响应或 SSE 流。 parameters: - description: true 时异步创建任务并返回 202 in: header @@ -4966,8 +5016,8 @@ paths: post: consumes: - application/json - description: 网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 - SSE 流。 + description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible + 路径同步返回兼容响应或 SSE 流。 parameters: - description: true 时异步创建任务并返回 202 in: header @@ -5220,8 +5270,8 @@ paths: post: consumes: - application/json - description: 网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 - SSE 流。 + description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible + 路径同步返回兼容响应或 SSE 流。 parameters: - description: true 时异步创建任务并返回 202 in: header @@ -5434,8 +5484,8 @@ paths: post: consumes: - application/json - description: 网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 - SSE 流。 + description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible + 路径同步返回兼容响应或 SSE 流。 parameters: - description: true 时异步创建任务并返回 202 in: header @@ -5735,8 +5785,8 @@ paths: post: consumes: - application/json - description: 网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 - SSE 流。 + description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible + 路径同步返回兼容响应或 SSE 流。 parameters: - description: true 时异步创建任务并返回 202 in: header @@ -5809,8 +5859,8 @@ paths: post: consumes: - application/json - description: 网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 - SSE 流。 + description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible + 路径同步返回兼容响应或 SSE 流。 parameters: - description: true 时异步创建任务并返回 202 in: header @@ -5870,8 +5920,8 @@ paths: post: consumes: - application/json - description: 网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 - SSE 流。 + description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible + 路径同步返回兼容响应或 SSE 流。 parameters: - description: true 时异步创建任务并返回 202 in: header @@ -5948,8 +5998,8 @@ paths: post: consumes: - application/json - description: 网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 - SSE 流。 + description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible + 路径同步返回兼容响应或 SSE 流。 parameters: - description: true 时异步创建任务并返回 202 in: header @@ -6079,8 +6129,8 @@ paths: post: consumes: - application/json - description: 网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 - SSE 流。 + description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible + 路径同步返回兼容响应或 SSE 流。 parameters: - description: true 时异步创建任务并返回 202 in: header @@ -6184,8 +6234,8 @@ paths: post: consumes: - application/json - description: 网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 - SSE 流。 + description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible + 路径同步返回兼容响应或 SSE 流。 parameters: - description: true 时异步创建任务并返回 202 in: header @@ -6245,8 +6295,8 @@ paths: post: consumes: - application/json - description: 网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 - SSE 流。 + description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible + 路径同步返回兼容响应或 SSE 流。 parameters: - description: true 时异步创建任务并返回 202 in: header @@ -6306,8 +6356,8 @@ paths: post: consumes: - application/json - description: 网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 - SSE 流。 + description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible + 路径同步返回兼容响应或 SSE 流。 parameters: - description: true 时异步创建任务并返回 202 in: header diff --git a/apps/api/internal/httpapi/chat_completions_mode_test.go b/apps/api/internal/httpapi/chat_completions_mode_test.go new file mode 100644 index 0000000..599046c --- /dev/null +++ b/apps/api/internal/httpapi/chat_completions_mode_test.go @@ -0,0 +1,114 @@ +package httpapi + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/easyai/easyai-ai-gateway/apps/api/internal/auth" + "github.com/easyai/easyai-ai-gateway/apps/api/internal/clients" + "github.com/easyai/easyai-ai-gateway/apps/api/internal/runner" + "github.com/easyai/easyai-ai-gateway/apps/api/internal/store" +) + +func TestPlanTaskResponseTreatsAPIV1ChatCompletionsAsSynchronousCompatibleResponse(t *testing.T) { + req := httptest.NewRequest(http.MethodPost, "/api/v1/chat/completions", nil) + req.Header.Set("X-Async", "true") + + plan := planTaskResponse("chat.completions", false, map[string]any{"stream": true}, req) + + if plan.asyncMode { + t.Fatal("/api/v1/chat/completions must not enter async task mode") + } + if !plan.compatibleMode { + t.Fatal("/api/v1/chat/completions should return OpenAI-compatible response payloads") + } + if !plan.streamMode { + t.Fatal("stream=true should select SSE streaming mode") + } +} + +func TestPlanTaskResponseKeepsAsyncTaskModeForOtherAPIV1Tasks(t *testing.T) { + req := httptest.NewRequest(http.MethodPost, "/api/v1/images/generations", nil) + req.Header.Set("X-Async", "true") + + plan := planTaskResponse("images.generations", false, map[string]any{"stream": true}, req) + + if !plan.asyncMode { + t.Fatal("non-chat /api/v1 task endpoints should keep X-Async task mode") + } + if plan.compatibleMode { + t.Fatal("non-compatible /api/v1 task endpoints should not return OpenAI-compatible payloads") + } +} + +func TestWriteCompatibleTaskResponseReturnsJSONWhenStreamIsFalse(t *testing.T) { + executor := &fakeTaskExecutor{output: map[string]any{"id": "chatcmpl-test", "object": "chat.completion"}} + req := httptest.NewRequest(http.MethodPost, "/api/v1/chat/completions", nil) + recorder := httptest.NewRecorder() + + writeCompatibleTaskResponse(context.Background(), recorder, req, executor, "chat.completions", "gpt-test", store.GatewayTask{ID: "task-test"}, &auth.User{}, false) + + if recorder.Code != http.StatusOK { + t.Fatalf("status=%d want=%d body=%s", recorder.Code, http.StatusOK, recorder.Body.String()) + } + if executor.executeCalls != 1 || executor.streamCalls != 0 { + t.Fatalf("expected non-stream execute only, got execute=%d stream=%d", executor.executeCalls, executor.streamCalls) + } + var body map[string]any + if err := json.Unmarshal(recorder.Body.Bytes(), &body); err != nil { + t.Fatalf("decode response body: %v body=%s", err, recorder.Body.String()) + } + if body["object"] != "chat.completion" { + t.Fatalf("unexpected compatible JSON response: %+v", body) + } +} + +func TestWriteCompatibleTaskResponseReturnsSSEWhenStreamIsTrue(t *testing.T) { + executor := &fakeTaskExecutor{ + deltas: []string{"hel", "lo"}, + output: map[string]any{"id": "chatcmpl-test", "object": "chat.completion"}, + } + req := httptest.NewRequest(http.MethodPost, "/api/v1/chat/completions", nil) + recorder := httptest.NewRecorder() + + writeCompatibleTaskResponse(context.Background(), recorder, req, executor, "chat.completions", "gpt-test", store.GatewayTask{ID: "task-test"}, &auth.User{}, true) + + if executor.executeCalls != 0 || executor.streamCalls != 1 { + t.Fatalf("expected stream execute only, got execute=%d stream=%d", executor.executeCalls, executor.streamCalls) + } + if contentType := recorder.Header().Get("Content-Type"); contentType != "text/event-stream" { + t.Fatalf("Content-Type=%q want text/event-stream", contentType) + } + body := recorder.Body.String() + for _, want := range []string{"event: message", `"content":"hel"`, `"content":"lo"`, `"finish_reason":"stop"`} { + if !strings.Contains(body, want) { + t.Fatalf("SSE body missing %s: %s", want, body) + } + } +} + +type fakeTaskExecutor struct { + executeCalls int + streamCalls int + deltas []string + output map[string]any +} + +func (f *fakeTaskExecutor) Execute(context.Context, store.GatewayTask, *auth.User) (runner.Result, error) { + f.executeCalls++ + return runner.Result{Output: f.output}, nil +} + +func (f *fakeTaskExecutor) ExecuteStream(_ context.Context, _ store.GatewayTask, _ *auth.User, onDelta clients.StreamDelta) (runner.Result, error) { + f.streamCalls++ + for _, delta := range f.deltas { + if err := onDelta(delta); err != nil { + return runner.Result{}, err + } + } + return runner.Result{Output: f.output}, nil +} diff --git a/apps/api/internal/httpapi/core_flow_integration_test.go b/apps/api/internal/httpapi/core_flow_integration_test.go index 5ee452c..e3a9995 100644 --- a/apps/api/internal/httpapi/core_flow_integration_test.go +++ b/apps/api/internal/httpapi/core_flow_integration_test.go @@ -316,13 +316,17 @@ VALUES ($1, 5, '{"purpose":"core-flow"}'::jsonb)`, inviteCode); err != nil { } `json:"task"` } defaultTextModel := "openai:gpt-4o-mini" - doJSON(t, server.URL, http.MethodPost, "/api/v1/chat/completions", apiKeyResponse.Secret, map[string]any{ + var apiV1Chat map[string]any + doAPIV1ChatCompletionAndLoadTask(t, ctx, testPool, server.URL, apiKeyResponse.Secret, map[string]any{ "model": defaultTextModel, "runMode": "simulation", "simulation": true, "simulationDurationMs": 5, "messages": []map[string]any{{"role": "user", "content": "ping"}}, - }, http.StatusAccepted, &taskResponse) + }, "default-chat-"+suffixText, http.StatusOK, &apiV1Chat, &taskResponse.Task) + if apiV1Chat["object"] != "chat.completion" { + t.Fatalf("unexpected api v1 chat response: %+v", apiV1Chat) + } if taskResponse.Task.ID == "" || taskResponse.Task.Status != "succeeded" || taskResponse.Task.RunMode != "simulation" { t.Fatalf("unexpected task response: %+v", taskResponse.Task) } @@ -513,13 +517,13 @@ LIMIT 1`).Scan(&gptImageModelTypesRaw); err != nil { ErrorCode string `json:"errorCode"` } `json:"task"` } - doJSON(t, server.URL, http.MethodPost, "/api/v1/chat/completions", chatOnlyAPIKeyResponse.Secret, map[string]any{ + doAPIV1ChatCompletionAndLoadTask(t, ctx, testPool, server.URL, chatOnlyAPIKeyResponse.Secret, map[string]any{ "model": deniedModel, "runMode": "simulation", "simulation": true, "simulationDurationMs": 5, "messages": []map[string]any{{"role": "user", "content": "permission deny"}}, - }, http.StatusAccepted, &deniedTask) + }, "permission-deny-"+suffixText, http.StatusNotFound, nil, &deniedTask.Task) if deniedTask.Task.Status != "failed" || deniedTask.Task.ErrorCode != "no_model_candidate" { t.Fatalf("deny access rule should hide denied model from runtime candidates: %+v", deniedTask.Task) } @@ -561,13 +565,13 @@ LIMIT 1`).Scan(&gptImageModelTypesRaw); err != nil { ErrorCode string `json:"errorCode"` } `json:"task"` } - doJSON(t, server.URL, http.MethodPost, "/api/v1/chat/completions", chatOnlyAPIKeyResponse.Secret, map[string]any{ + doAPIV1ChatCompletionAndLoadTask(t, ctx, testPool, server.URL, chatOnlyAPIKeyResponse.Secret, map[string]any{ "model": controlledModel, "runMode": "simulation", "simulation": true, "simulationDurationMs": 5, "messages": []map[string]any{{"role": "user", "content": "allow should block other keys"}}, - }, http.StatusAccepted, &blockedControlledTask) + }, "permission-allow-block-"+suffixText, http.StatusNotFound, nil, &blockedControlledTask.Task) if blockedControlledTask.Task.Status != "failed" || blockedControlledTask.Task.ErrorCode != "no_model_candidate" { t.Fatalf("allow access rule should make the resource unavailable to unmatched subjects: %+v", blockedControlledTask.Task) } @@ -586,13 +590,13 @@ LIMIT 1`).Scan(&gptImageModelTypesRaw); err != nil { Status string `json:"status"` } `json:"task"` } - doJSON(t, server.URL, http.MethodPost, "/api/v1/chat/completions", chatOnlyAPIKeyResponse.Secret, map[string]any{ + doAPIV1ChatCompletionAndLoadTask(t, ctx, testPool, server.URL, chatOnlyAPIKeyResponse.Secret, map[string]any{ "model": controlledModel, "runMode": "simulation", "simulation": true, "simulationDurationMs": 5, "messages": []map[string]any{{"role": "user", "content": "allow should pass"}}, - }, http.StatusAccepted, &allowedControlledTask) + }, "permission-allow-pass-"+suffixText, http.StatusOK, nil, &allowedControlledTask.Task) if allowedControlledTask.Task.Status != "succeeded" { t.Fatalf("matching allow access rule should make the controlled model usable: %+v", allowedControlledTask.Task) } @@ -645,13 +649,13 @@ WHERE gateway_user_id = $1::uuid FinalChargeAmount float64 `json:"finalChargeAmount"` } `json:"task"` } - doJSON(t, server.URL, http.MethodPost, "/api/v1/chat/completions", apiKeyResponse.Secret, map[string]any{ + doAPIV1ChatCompletionAndLoadTask(t, ctx, testPool, server.URL, apiKeyResponse.Secret, map[string]any{ "model": pricingModel, "runMode": "simulation", "simulation": true, "simulationDurationMs": 5, "messages": []map[string]any{{"role": "user", "content": "priced ping"}}, - }, http.StatusAccepted, &pricingTask) + }, "pricing-chat-"+suffixText, http.StatusOK, nil, &pricingTask.Task) if pricingTask.Task.Status != "succeeded" || !floatNear(pricingTask.Task.FinalChargeAmount, 0.028) { t.Fatalf("custom pricing rule set should drive text billing, got task=%+v", pricingTask.Task) } @@ -757,14 +761,14 @@ WHERE reference_type = 'gateway_task' ErrorCode string `json:"errorCode"` } `json:"task"` } - doJSON(t, server.URL, http.MethodPost, "/api/v1/chat/completions", apiKeyResponse.Secret, map[string]any{ + doAPIV1ChatCompletionAndLoadTask(t, ctx, testPool, server.URL, apiKeyResponse.Secret, map[string]any{ "model": rateLimitedModel, "runMode": "simulation", "simulation": true, "simulationDurationMs": 5, "simulationProfile": "non_retryable_failure", "messages": []map[string]any{{"role": "user", "content": "failed first"}}, - }, http.StatusAccepted, &rateLimitFailedTask) + }, "rate-limit-failed-first-"+suffixText, http.StatusBadGateway, nil, &rateLimitFailedTask.Task) if rateLimitFailedTask.Task.Status != "failed" || rateLimitFailedTask.Task.ErrorCode != "bad_request" { t.Fatalf("failed rate-limited task should fail before consuming rpm: %+v", rateLimitFailedTask.Task) } @@ -774,13 +778,13 @@ WHERE reference_type = 'gateway_task' Status string `json:"status"` } `json:"task"` } - doJSON(t, server.URL, http.MethodPost, "/api/v1/chat/completions", apiKeyResponse.Secret, map[string]any{ + doAPIV1ChatCompletionAndLoadTask(t, ctx, testPool, server.URL, apiKeyResponse.Secret, map[string]any{ "model": rateLimitedModel, "runMode": "simulation", "simulation": true, "simulationDurationMs": 5, "messages": []map[string]any{{"role": "user", "content": "first"}}, - }, http.StatusAccepted, &rateLimitTaskOne) + }, "rate-limit-first-"+suffixText, http.StatusOK, nil, &rateLimitTaskOne.Task) if rateLimitTaskOne.Task.Status != "succeeded" { t.Fatalf("first rate-limited task should succeed: %+v", rateLimitTaskOne.Task) } @@ -790,13 +794,13 @@ WHERE reference_type = 'gateway_task' ErrorCode string `json:"errorCode"` } `json:"task"` } - doJSON(t, server.URL, http.MethodPost, "/api/v1/chat/completions", apiKeyResponse.Secret, map[string]any{ + doAPIV1ChatCompletionAndLoadTask(t, ctx, testPool, server.URL, apiKeyResponse.Secret, map[string]any{ "model": rateLimitedModel, "runMode": "simulation", "simulation": true, "simulationDurationMs": 5, "messages": []map[string]any{{"role": "user", "content": "second"}}, - }, http.StatusAccepted, &rateLimitTaskTwo) + }, "rate-limit-second-"+suffixText, http.StatusTooManyRequests, nil, &rateLimitTaskTwo.Task) if rateLimitTaskTwo.Task.Status != "failed" || rateLimitTaskTwo.Task.ErrorCode != "rate_limit" { t.Fatalf("runtime policy rate limit should fail second task with rate_limit: %+v", rateLimitTaskTwo.Task) } @@ -808,12 +812,12 @@ WHERE reference_type = 'gateway_task' AsyncMode bool `json:"asyncMode"` } `json:"task"` } - doJSONWithHeaders(t, server.URL, http.MethodPost, "/api/v1/chat/completions", apiKeyResponse.Secret, map[string]any{ + doJSONWithHeaders(t, server.URL, http.MethodPost, "/api/v1/responses", apiKeyResponse.Secret, map[string]any{ "model": rateLimitedModel, "runMode": "simulation", "simulation": true, "simulationDurationMs": 5, - "messages": []map[string]any{{"role": "user", "content": "async queued"}}, + "input": "async queued", }, map[string]string{"X-Async": "true"}, http.StatusAccepted, &asyncRateLimitTask) if asyncRateLimitTask.TaskID == "" || asyncRateLimitTask.Task.ID != asyncRateLimitTask.TaskID || !asyncRateLimitTask.Task.AsyncMode { t.Fatalf("async task response should expose task id and async mode: %+v", asyncRateLimitTask) @@ -984,11 +988,11 @@ WHERE reference_type = 'gateway_task' Status string `json:"status"` } `json:"task"` } - doJSON(t, server.URL, http.MethodPost, "/api/v1/chat/completions", apiKeyResponse.Secret, map[string]any{ + doAPIV1ChatCompletionAndLoadTask(t, ctx, testPool, server.URL, apiKeyResponse.Secret, map[string]any{ "model": failoverModel, "runMode": "simulation", "messages": []map[string]any{{"role": "user", "content": "retry please"}}, - }, http.StatusAccepted, &failoverTask) + }, "failover-chat-"+suffixText, http.StatusOK, nil, &failoverTask.Task) if failoverTask.Task.Status != "succeeded" { t.Fatalf("failover task should succeed through second client: %+v", failoverTask.Task) } @@ -1103,13 +1107,13 @@ WHERE failed.id = $1::uuid`, failedPlatform.ID, successPlatform.ID, unrelatedPri Status string `json:"status"` } `json:"task"` } - doJSON(t, server.URL, http.MethodPost, "/api/v1/chat/completions", apiKeyResponse.Secret, map[string]any{ + doAPIV1ChatCompletionAndLoadTask(t, ctx, testPool, server.URL, apiKeyResponse.Secret, map[string]any{ "model": degradeModel, "runMode": "simulation", "simulation": true, "simulationDurationMs": 5, "messages": []map[string]any{{"role": "user", "content": "degrade please"}}, - }, http.StatusAccepted, °radeTask) + }, "degrade-chat-"+suffixText, http.StatusOK, nil, °radeTask.Task) if degradeTask.Task.Status != "succeeded" { t.Fatalf("degrade task should fail over after cooling down failed model: %+v", degradeTask.Task) } @@ -1170,13 +1174,13 @@ WHERE m.platform_id = $1::uuid ErrorCode string `json:"errorCode"` } `json:"task"` } - doJSON(t, server.URL, http.MethodPost, "/api/v1/chat/completions", apiKeyResponse.Secret, map[string]any{ + doAPIV1ChatCompletionAndLoadTask(t, ctx, testPool, server.URL, apiKeyResponse.Secret, map[string]any{ "model": autoDisableModel, "runMode": "simulation", "simulation": true, "simulationDurationMs": 5, "messages": []map[string]any{{"role": "user", "content": "disable please"}}, - }, http.StatusAccepted, &autoDisableTask) + }, "auto-disable-chat-"+suffixText, http.StatusBadGateway, nil, &autoDisableTask.Task) if autoDisableTask.Task.Status != "failed" || autoDisableTask.Task.ErrorCode != "invalid_api_key" { t.Fatalf("auto disable task should fail with invalid_api_key: %+v", autoDisableTask.Task) } @@ -1293,12 +1297,12 @@ WHERE m.platform_id = $1::uuid AsyncMode bool `json:"asyncMode"` } `json:"task"` } - doJSONWithHeaders(t, server.URL, http.MethodPost, "/api/v1/chat/completions", apiKeyResponse.Secret, map[string]any{ + doJSONWithHeaders(t, server.URL, http.MethodPost, "/api/v1/responses", apiKeyResponse.Secret, map[string]any{ "model": defaultTextModel, "runMode": "simulation", "simulation": true, "simulationDurationMs": 2000, - "messages": []map[string]any{{"role": "user", "content": "river worker restart"}}, + "input": "river worker restart", }, map[string]string{"X-Async": "true"}, http.StatusAccepted, &restartAsyncTask) if restartAsyncTask.TaskID == "" || !restartAsyncTask.Task.AsyncMode { t.Fatalf("restart async task should be accepted as async: %+v", restartAsyncTask) @@ -1453,6 +1457,20 @@ func doJSONWithHeaders(t *testing.T, baseURL string, method string, path string, } } +func doAPIV1ChatCompletionAndLoadTask(t *testing.T, ctx context.Context, pool *pgxpool.Pool, baseURL string, token string, payload map[string]any, marker string, expectedStatus int, responseOut any, taskDetailOut any) string { + t.Helper() + payload["integrationTestMarker"] = marker + if responseOut == nil { + responseOut = &map[string]any{} + } + doJSON(t, baseURL, http.MethodPost, "/api/v1/chat/completions", token, payload, expectedStatus, responseOut) + taskID := waitForTaskIDByRequestField(t, ctx, pool, "integrationTestMarker", marker, 2*time.Second) + if taskDetailOut != nil { + doJSON(t, baseURL, http.MethodGet, "/api/v1/tasks/"+taskID, token, nil, http.StatusOK, taskDetailOut) + } + return taskID +} + type taskWaitDetail struct { ID string `json:"id"` Status string `json:"status"` @@ -1481,6 +1499,11 @@ func waitForTaskStatus(t *testing.T, baseURL string, token string, taskID string } func waitForTaskIDByRequestMarker(t *testing.T, ctx context.Context, pool *pgxpool.Pool, marker string, timeout time.Duration) string { + t.Helper() + return waitForTaskIDByRequestField(t, ctx, pool, "cancelTestId", marker, timeout) +} + +func waitForTaskIDByRequestField(t *testing.T, ctx context.Context, pool *pgxpool.Pool, key string, value string, timeout time.Duration) string { t.Helper() deadline := time.Now().Add(timeout) for time.Now().Before(deadline) { @@ -1488,15 +1511,15 @@ func waitForTaskIDByRequestMarker(t *testing.T, ctx context.Context, pool *pgxpo err := pool.QueryRow(ctx, ` SELECT id::text FROM gateway_tasks -WHERE request->>'cancelTestId' = $1 +WHERE request->>$1 = $2 ORDER BY created_at DESC -LIMIT 1`, marker).Scan(&taskID) +LIMIT 1`, key, value).Scan(&taskID) if err == nil && taskID != "" { return taskID } time.Sleep(50 * time.Millisecond) } - t.Fatalf("task with request marker %s was not created within %s", marker, timeout) + t.Fatalf("task with request %s=%s was not created within %s", key, value, timeout) return "" } @@ -1577,13 +1600,13 @@ func assertLoadAvoidanceSimulatedRetryChain(t *testing.T, ctx context.Context, t ErrorCode string `json:"errorCode"` } `json:"task"` } - doJSON(t, baseURL, http.MethodPost, "/api/v1/chat/completions", runtimeToken, map[string]any{ + doAPIV1ChatCompletionAndLoadTask(t, ctx, testPool, baseURL, runtimeToken, map[string]any{ "model": model, "runMode": "simulation", "simulation": true, "simulationDurationMs": 5, "messages": []map[string]any{{"role": "user", "content": "load avoidance retry chain"}}, - }, http.StatusAccepted, &taskResponse) + }, "load-avoidance-"+suffixText, http.StatusBadGateway, nil, &taskResponse.Task) if taskResponse.Task.ID == "" || taskResponse.Task.Status != "failed" || taskResponse.Task.ErrorCode != "bad_request" { t.Fatalf("load avoidance task should only fail after avoided clients are retried, got %+v", taskResponse.Task) } diff --git a/apps/api/internal/httpapi/handlers.go b/apps/api/internal/httpapi/handlers.go index 21f3916..714a555 100644 --- a/apps/api/internal/httpapi/handlers.go +++ b/apps/api/internal/httpapi/handlers.go @@ -13,6 +13,7 @@ import ( "github.com/easyai/easyai-ai-gateway/apps/api/internal/auth" "github.com/easyai/easyai-ai-gateway/apps/api/internal/clients" "github.com/easyai/easyai-ai-gateway/apps/api/internal/netproxy" + "github.com/easyai/easyai-ai-gateway/apps/api/internal/runner" "github.com/easyai/easyai-ai-gateway/apps/api/internal/store" ) @@ -858,7 +859,7 @@ func (s *Server) listModelRateLimitStatuses(w http.ResponseWriter, r *http.Reque // createTask godoc // @Summary 创建或执行 AI 任务 -// @Description 网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。 +// @Description 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。 // @Tags tasks // @Accept json // @Produce json @@ -874,7 +875,6 @@ func (s *Server) listModelRateLimitStatuses(w http.ResponseWriter, r *http.Reque // @Failure 404 {object} ErrorEnvelope // @Failure 429 {object} ErrorEnvelope // @Failure 502 {object} ErrorEnvelope -// @Router /api/v1/chat/completions [post] // @Router /api/v1/responses [post] // @Router /api/v1/images/generations [post] // @Router /api/v1/images/edits [post] @@ -909,13 +909,13 @@ func (s *Server) createTask(kind string, compatible bool) http.Handler { writeError(w, http.StatusForbidden, "api key scope does not allow this capability") return } - asyncMode := asyncRequest(r) + responsePlan := planTaskResponse(kind, compatible, body, r) task, err := s.store.CreateTask(r.Context(), store.CreateTaskInput{ Kind: kind, Model: model, RunMode: runModeFromRequest(body), - Async: asyncMode, + Async: responsePlan.asyncMode, Request: body, }, user) if err != nil { @@ -923,7 +923,7 @@ func (s *Server) createTask(kind string, compatible bool) http.Handler { writeError(w, http.StatusInternalServerError, "create task failed") return } - if asyncMode { + if responsePlan.asyncMode { if err := s.runner.EnqueueAsyncTask(r.Context(), task); err != nil { writeError(w, http.StatusInternalServerError, err.Error(), "enqueue_failed") return @@ -933,65 +933,8 @@ func (s *Server) createTask(kind string, compatible bool) http.Handler { } runCtx, cancelRun := s.requestExecutionContext(r) defer cancelRun() - if compatible { - if boolValue(body, "stream") { - flusher := prepareCompatibleStream(w) - result, runErr := s.runner.ExecuteStream(runCtx, task, user, func(delta string) error { - if !requestStillConnected(r) { - return nil - } - writeCompatibleDelta(w, kind, model, delta) - if flusher != nil { - flusher.Flush() - } - return nil - }) - if runErr != nil { - if !requestStillConnected(r) { - return - } - status := statusFromRunError(runErr) - errorPayload := map[string]any{ - "code": runErrorCode(runErr), - "message": runErrorMessage(runErr), - "status": status, - } - if result.Task.ID != "" { - errorPayload["taskId"] = result.Task.ID - } - if result.Task.RequestID != "" { - errorPayload["requestId"] = result.Task.RequestID - } - for key, value := range runErrorDetails(runErr) { - errorPayload[key] = value - } - sendSSE(w, "error", map[string]any{"error": errorPayload}) - if flusher != nil { - flusher.Flush() - } - return - } - if !requestStillConnected(r) { - return - } - writeCompatibleDone(w, kind, model, result.Output) - if flusher != nil { - flusher.Flush() - } - return - } - result, runErr := s.runner.Execute(runCtx, task, user) - if runErr != nil { - if !requestStillConnected(r) { - return - } - writeErrorWithDetails(w, statusFromRunError(runErr), runErrorMessage(runErr), runErrorDetails(runErr), runErrorCode(runErr)) - return - } - if !requestStillConnected(r) { - return - } - writeJSON(w, http.StatusOK, result.Output) + if responsePlan.compatibleMode { + writeCompatibleTaskResponse(runCtx, w, r, s.runner, kind, model, task, user, responsePlan.streamMode) return } result, runErr := s.runner.Execute(runCtx, task, user) @@ -1006,6 +949,29 @@ func (s *Server) createTask(kind string, compatible bool) http.Handler { }) } +// createAPIV1ChatCompletions godoc +// @Summary 创建 Chat Completions +// @Description /api/v1/chat/completions 同步执行:stream=true 返回 text/event-stream SSE;stream=false 或未传返回兼容 JSON;该接口忽略 X-Async。 +// @Tags tasks +// @Accept json +// @Produce json +// @Produce text/event-stream +// @Security BearerAuth +// @Param X-Async header bool false "该接口忽略此参数" +// @Param input body TaskRequest true "Chat Completions 请求" +// @Success 200 {object} ChatCompletionCompatibleResponse +// @Failure 400 {object} ErrorEnvelope +// @Failure 401 {object} ErrorEnvelope +// @Failure 402 {object} ErrorEnvelope +// @Failure 403 {object} ErrorEnvelope +// @Failure 404 {object} ErrorEnvelope +// @Failure 429 {object} ErrorEnvelope +// @Failure 502 {object} ErrorEnvelope +// @Router /api/v1/chat/completions [post] +func (s *Server) createAPIV1ChatCompletions() http.Handler { + return s.createTask("chat.completions", false) +} + func (s *Server) requestExecutionContext(r *http.Request) (context.Context, context.CancelFunc) { base := context.WithoutCancel(r.Context()) if s.ctx == nil { @@ -1031,11 +997,98 @@ func requestStillConnected(r *http.Request) bool { } } +type taskExecutor interface { + Execute(context.Context, store.GatewayTask, *auth.User) (runner.Result, error) + ExecuteStream(context.Context, store.GatewayTask, *auth.User, clients.StreamDelta) (runner.Result, error) +} + +func writeCompatibleTaskResponse(runCtx context.Context, w http.ResponseWriter, r *http.Request, executor taskExecutor, kind string, model string, task store.GatewayTask, user *auth.User, streamMode bool) { + if streamMode { + flusher := prepareCompatibleStream(w) + result, runErr := executor.ExecuteStream(runCtx, task, user, func(delta string) error { + if !requestStillConnected(r) { + return nil + } + writeCompatibleDelta(w, kind, model, delta) + if flusher != nil { + flusher.Flush() + } + return nil + }) + if runErr != nil { + if !requestStillConnected(r) { + return + } + status := statusFromRunError(runErr) + errorPayload := map[string]any{ + "code": runErrorCode(runErr), + "message": runErrorMessage(runErr), + "status": status, + } + if result.Task.ID != "" { + errorPayload["taskId"] = result.Task.ID + } + if result.Task.RequestID != "" { + errorPayload["requestId"] = result.Task.RequestID + } + for key, value := range runErrorDetails(runErr) { + errorPayload[key] = value + } + sendSSE(w, "error", map[string]any{"error": errorPayload}) + if flusher != nil { + flusher.Flush() + } + return + } + if !requestStillConnected(r) { + return + } + writeCompatibleDone(w, kind, model, result.Output) + if flusher != nil { + flusher.Flush() + } + return + } + + result, runErr := executor.Execute(runCtx, task, user) + if runErr != nil { + if !requestStillConnected(r) { + return + } + writeErrorWithDetails(w, statusFromRunError(runErr), runErrorMessage(runErr), runErrorDetails(runErr), runErrorCode(runErr)) + return + } + if !requestStillConnected(r) { + return + } + writeJSON(w, http.StatusOK, result.Output) +} + func asyncRequest(r *http.Request) bool { value := strings.TrimSpace(strings.ToLower(r.Header.Get("x-async"))) return value == "1" || value == "true" || value == "yes" || value == "on" } +type taskResponsePlan struct { + asyncMode bool + compatibleMode bool + streamMode bool +} + +func planTaskResponse(kind string, compatible bool, body map[string]any, r *http.Request) taskResponsePlan { + asyncMode := asyncRequest(r) + compatibleMode := compatible + if kind == "chat.completions" && !compatible { + asyncMode = false + compatibleMode = true + } + return taskResponsePlan{ + asyncMode: asyncMode, + compatibleMode: compatibleMode, + streamMode: boolValue(body, "stream"), + } +} + func writeTaskAccepted(w http.ResponseWriter, task store.GatewayTask) { writeJSON(w, http.StatusAccepted, map[string]any{ "taskId": task.ID, diff --git a/apps/api/internal/httpapi/openapi_models.go b/apps/api/internal/httpapi/openapi_models.go index 614d22b..f80c30b 100644 --- a/apps/api/internal/httpapi/openapi_models.go +++ b/apps/api/internal/httpapi/openapi_models.go @@ -242,6 +242,32 @@ type CompatibleResponse struct { Usage map[string]interface{} `json:"usage,omitempty"` } +type ChatCompletionCompatibleResponse struct { + ID string `json:"id" example:"chatcmpl-123"` + Object string `json:"object" example:"chat.completion"` + Created int64 `json:"created,omitempty" example:"1710000000"` + Model string `json:"model" example:"gpt-4o-mini"` + Choices []ChatCompletionChoice `json:"choices"` + Usage *ChatCompletionUsage `json:"usage,omitempty"` +} + +type ChatCompletionChoice struct { + Index int `json:"index" example:"0"` + Message ChatCompletionChoiceMessage `json:"message"` + FinishReason string `json:"finish_reason,omitempty" example:"stop"` +} + +type ChatCompletionChoiceMessage struct { + Role string `json:"role" example:"assistant"` + Content string `json:"content" example:"Hello"` +} + +type ChatCompletionUsage struct { + PromptTokens int `json:"prompt_tokens,omitempty" example:"12"` + CompletionTokens int `json:"completion_tokens,omitempty" example:"8"` + TotalTokens int `json:"total_tokens,omitempty" example:"20"` +} + type NetworkProxyConfigResponse struct { GlobalHTTPProxy string `json:"globalHttpProxy" example:"http://127.0.0.1:7890"` GlobalHTTPProxySet bool `json:"globalHttpProxySet" example:"true"` diff --git a/apps/api/internal/httpapi/server.go b/apps/api/internal/httpapi/server.go index 85256ca..b9ab6f5 100644 --- a/apps/api/internal/httpapi/server.go +++ b/apps/api/internal/httpapi/server.go @@ -126,7 +126,7 @@ func NewServerWithContext(ctx context.Context, cfg config.Config, db *store.Stor mux.Handle("GET /api/v1/playground/models", server.auth.Require(auth.PermissionBasic, http.HandlerFunc(server.listPlayableModels))) mux.Handle("GET /api/admin/runtime/rate-limit-windows", server.requireAdmin(auth.PermissionPower, http.HandlerFunc(server.listRateLimitWindows))) mux.Handle("GET /api/admin/runtime/model-rate-limits", server.requireAdmin(auth.PermissionPower, http.HandlerFunc(server.listModelRateLimitStatuses))) - mux.Handle("POST /api/v1/chat/completions", server.auth.Require(auth.PermissionBasic, server.createTask("chat.completions", false))) + mux.Handle("POST /api/v1/chat/completions", server.auth.Require(auth.PermissionBasic, server.createAPIV1ChatCompletions())) mux.Handle("POST /api/v1/responses", server.auth.Require(auth.PermissionBasic, server.createTask("responses", false))) mux.Handle("POST /api/v1/images/generations", server.auth.Require(auth.PermissionBasic, server.createTask("images.generations", false))) mux.Handle("POST /api/v1/images/edits", server.auth.Require(auth.PermissionBasic, server.createTask("images.edits", false)))