docs(api): 同步 /api/v1/chat/completions 的 OpenAPI 与同步响应
补充 Chat Completions 的兼容响应模型与路由注释,确保 /api/v1/chat/completions 按同步兼容格式返回并更新对应测试与 Swagger 文档。
This commit is contained in:
parent
34c3251c6d
commit
ae197a742f
@ -4000,26 +4000,27 @@
|
||||
"BearerAuth": []
|
||||
}
|
||||
],
|
||||
"description": "网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
|
||||
"description": "/api/v1/chat/completions 同步执行:stream=true 返回 text/event-stream SSE;stream=false 或未传返回兼容 JSON;该接口忽略 X-Async。",
|
||||
"consumes": [
|
||||
"application/json"
|
||||
],
|
||||
"produces": [
|
||||
"application/json"
|
||||
"application/json",
|
||||
"text/event-stream"
|
||||
],
|
||||
"tags": [
|
||||
"tasks"
|
||||
],
|
||||
"summary": "创建或执行 AI 任务",
|
||||
"summary": "创建 Chat Completions",
|
||||
"parameters": [
|
||||
{
|
||||
"type": "boolean",
|
||||
"description": "true 时异步创建任务并返回 202",
|
||||
"description": "该接口忽略此参数",
|
||||
"name": "X-Async",
|
||||
"in": "header"
|
||||
},
|
||||
{
|
||||
"description": "AI 任务请求,字段随任务类型变化",
|
||||
"description": "Chat Completions 请求",
|
||||
"name": "input",
|
||||
"in": "body",
|
||||
"required": true,
|
||||
@ -4032,13 +4033,7 @@
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/httpapi.CompatibleResponse"
|
||||
}
|
||||
},
|
||||
"202": {
|
||||
"description": "Accepted",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/httpapi.TaskAcceptedResponse"
|
||||
"$ref": "#/definitions/httpapi.ChatCompletionCompatibleResponse"
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
@ -4161,7 +4156,7 @@
|
||||
"BearerAuth": []
|
||||
}
|
||||
],
|
||||
"description": "网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
|
||||
"description": "网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
|
||||
"consumes": [
|
||||
"application/json"
|
||||
],
|
||||
@ -4254,7 +4249,7 @@
|
||||
"BearerAuth": []
|
||||
}
|
||||
],
|
||||
"description": "网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
|
||||
"description": "网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
|
||||
"consumes": [
|
||||
"application/json"
|
||||
],
|
||||
@ -4653,7 +4648,7 @@
|
||||
"BearerAuth": []
|
||||
}
|
||||
],
|
||||
"description": "网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
|
||||
"description": "网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
|
||||
"consumes": [
|
||||
"application/json"
|
||||
],
|
||||
@ -4985,7 +4980,7 @@
|
||||
"BearerAuth": []
|
||||
}
|
||||
],
|
||||
"description": "网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
|
||||
"description": "网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
|
||||
"consumes": [
|
||||
"application/json"
|
||||
],
|
||||
@ -5452,7 +5447,7 @@
|
||||
"BearerAuth": []
|
||||
}
|
||||
],
|
||||
"description": "网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
|
||||
"description": "网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
|
||||
"consumes": [
|
||||
"application/json"
|
||||
],
|
||||
@ -5565,7 +5560,7 @@
|
||||
"BearerAuth": []
|
||||
}
|
||||
],
|
||||
"description": "网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
|
||||
"description": "网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
|
||||
"consumes": [
|
||||
"application/json"
|
||||
],
|
||||
@ -5658,7 +5653,7 @@
|
||||
"BearerAuth": []
|
||||
}
|
||||
],
|
||||
"description": "网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
|
||||
"description": "网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
|
||||
"consumes": [
|
||||
"application/json"
|
||||
],
|
||||
@ -5777,7 +5772,7 @@
|
||||
"BearerAuth": []
|
||||
}
|
||||
],
|
||||
"description": "网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
|
||||
"description": "网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
|
||||
"consumes": [
|
||||
"application/json"
|
||||
],
|
||||
@ -5976,7 +5971,7 @@
|
||||
"BearerAuth": []
|
||||
}
|
||||
],
|
||||
"description": "网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
|
||||
"description": "网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
|
||||
"consumes": [
|
||||
"application/json"
|
||||
],
|
||||
@ -6137,7 +6132,7 @@
|
||||
"BearerAuth": []
|
||||
}
|
||||
],
|
||||
"description": "网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
|
||||
"description": "网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
|
||||
"consumes": [
|
||||
"application/json"
|
||||
],
|
||||
@ -6230,7 +6225,7 @@
|
||||
"BearerAuth": []
|
||||
}
|
||||
],
|
||||
"description": "网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
|
||||
"description": "网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
|
||||
"consumes": [
|
||||
"application/json"
|
||||
],
|
||||
@ -6323,7 +6318,7 @@
|
||||
"BearerAuth": []
|
||||
}
|
||||
],
|
||||
"description": "网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
|
||||
"description": "网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。",
|
||||
"consumes": [
|
||||
"application/json"
|
||||
],
|
||||
@ -6551,6 +6546,82 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"httpapi.ChatCompletionChoice": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"finish_reason": {
|
||||
"type": "string",
|
||||
"example": "stop"
|
||||
},
|
||||
"index": {
|
||||
"type": "integer",
|
||||
"example": 0
|
||||
},
|
||||
"message": {
|
||||
"$ref": "#/definitions/httpapi.ChatCompletionChoiceMessage"
|
||||
}
|
||||
}
|
||||
},
|
||||
"httpapi.ChatCompletionChoiceMessage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content": {
|
||||
"type": "string",
|
||||
"example": "Hello"
|
||||
},
|
||||
"role": {
|
||||
"type": "string",
|
||||
"example": "assistant"
|
||||
}
|
||||
}
|
||||
},
|
||||
"httpapi.ChatCompletionCompatibleResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"choices": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/httpapi.ChatCompletionChoice"
|
||||
}
|
||||
},
|
||||
"created": {
|
||||
"type": "integer",
|
||||
"example": 1710000000
|
||||
},
|
||||
"id": {
|
||||
"type": "string",
|
||||
"example": "chatcmpl-123"
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
"example": "gpt-4o-mini"
|
||||
},
|
||||
"object": {
|
||||
"type": "string",
|
||||
"example": "chat.completion"
|
||||
},
|
||||
"usage": {
|
||||
"$ref": "#/definitions/httpapi.ChatCompletionUsage"
|
||||
}
|
||||
}
|
||||
},
|
||||
"httpapi.ChatCompletionUsage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"completion_tokens": {
|
||||
"type": "integer",
|
||||
"example": 8
|
||||
},
|
||||
"prompt_tokens": {
|
||||
"type": "integer",
|
||||
"example": 12
|
||||
},
|
||||
"total_tokens": {
|
||||
"type": "integer",
|
||||
"example": 20
|
||||
}
|
||||
}
|
||||
},
|
||||
"httpapi.ChatMessage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
||||
@ -92,6 +92,59 @@ definitions:
|
||||
$ref: '#/definitions/store.CatalogProvider'
|
||||
type: array
|
||||
type: object
|
||||
httpapi.ChatCompletionChoice:
|
||||
properties:
|
||||
finish_reason:
|
||||
example: stop
|
||||
type: string
|
||||
index:
|
||||
example: 0
|
||||
type: integer
|
||||
message:
|
||||
$ref: '#/definitions/httpapi.ChatCompletionChoiceMessage'
|
||||
type: object
|
||||
httpapi.ChatCompletionChoiceMessage:
|
||||
properties:
|
||||
content:
|
||||
example: Hello
|
||||
type: string
|
||||
role:
|
||||
example: assistant
|
||||
type: string
|
||||
type: object
|
||||
httpapi.ChatCompletionCompatibleResponse:
|
||||
properties:
|
||||
choices:
|
||||
items:
|
||||
$ref: '#/definitions/httpapi.ChatCompletionChoice'
|
||||
type: array
|
||||
created:
|
||||
example: 1710000000
|
||||
type: integer
|
||||
id:
|
||||
example: chatcmpl-123
|
||||
type: string
|
||||
model:
|
||||
example: gpt-4o-mini
|
||||
type: string
|
||||
object:
|
||||
example: chat.completion
|
||||
type: string
|
||||
usage:
|
||||
$ref: '#/definitions/httpapi.ChatCompletionUsage'
|
||||
type: object
|
||||
httpapi.ChatCompletionUsage:
|
||||
properties:
|
||||
completion_tokens:
|
||||
example: 8
|
||||
type: integer
|
||||
prompt_tokens:
|
||||
example: 12
|
||||
type: integer
|
||||
total_tokens:
|
||||
example: 20
|
||||
type: integer
|
||||
type: object
|
||||
httpapi.ChatMessage:
|
||||
properties:
|
||||
content:
|
||||
@ -4800,14 +4853,14 @@ paths:
|
||||
post:
|
||||
consumes:
|
||||
- application/json
|
||||
description: 网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或
|
||||
SSE 流。
|
||||
description: /api/v1/chat/completions 同步执行:stream=true 返回 text/event-stream
|
||||
SSE;stream=false 或未传返回兼容 JSON;该接口忽略 X-Async。
|
||||
parameters:
|
||||
- description: true 时异步创建任务并返回 202
|
||||
- description: 该接口忽略此参数
|
||||
in: header
|
||||
name: X-Async
|
||||
type: boolean
|
||||
- description: AI 任务请求,字段随任务类型变化
|
||||
- description: Chat Completions 请求
|
||||
in: body
|
||||
name: input
|
||||
required: true
|
||||
@ -4815,15 +4868,12 @@ paths:
|
||||
$ref: '#/definitions/httpapi.TaskRequest'
|
||||
produces:
|
||||
- application/json
|
||||
- text/event-stream
|
||||
responses:
|
||||
"200":
|
||||
description: OK
|
||||
schema:
|
||||
$ref: '#/definitions/httpapi.CompatibleResponse'
|
||||
"202":
|
||||
description: Accepted
|
||||
schema:
|
||||
$ref: '#/definitions/httpapi.TaskAcceptedResponse'
|
||||
$ref: '#/definitions/httpapi.ChatCompletionCompatibleResponse'
|
||||
"400":
|
||||
description: Bad Request
|
||||
schema:
|
||||
@ -4854,7 +4904,7 @@ paths:
|
||||
$ref: '#/definitions/httpapi.ErrorEnvelope'
|
||||
security:
|
||||
- BearerAuth: []
|
||||
summary: 创建或执行 AI 任务
|
||||
summary: 创建 Chat Completions
|
||||
tags:
|
||||
- tasks
|
||||
/api/v1/files/upload:
|
||||
@ -4905,8 +4955,8 @@ paths:
|
||||
post:
|
||||
consumes:
|
||||
- application/json
|
||||
description: 网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或
|
||||
SSE 流。
|
||||
description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible
|
||||
路径同步返回兼容响应或 SSE 流。
|
||||
parameters:
|
||||
- description: true 时异步创建任务并返回 202
|
||||
in: header
|
||||
@ -4966,8 +5016,8 @@ paths:
|
||||
post:
|
||||
consumes:
|
||||
- application/json
|
||||
description: 网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或
|
||||
SSE 流。
|
||||
description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible
|
||||
路径同步返回兼容响应或 SSE 流。
|
||||
parameters:
|
||||
- description: true 时异步创建任务并返回 202
|
||||
in: header
|
||||
@ -5220,8 +5270,8 @@ paths:
|
||||
post:
|
||||
consumes:
|
||||
- application/json
|
||||
description: 网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或
|
||||
SSE 流。
|
||||
description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible
|
||||
路径同步返回兼容响应或 SSE 流。
|
||||
parameters:
|
||||
- description: true 时异步创建任务并返回 202
|
||||
in: header
|
||||
@ -5434,8 +5484,8 @@ paths:
|
||||
post:
|
||||
consumes:
|
||||
- application/json
|
||||
description: 网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或
|
||||
SSE 流。
|
||||
description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible
|
||||
路径同步返回兼容响应或 SSE 流。
|
||||
parameters:
|
||||
- description: true 时异步创建任务并返回 202
|
||||
in: header
|
||||
@ -5735,8 +5785,8 @@ paths:
|
||||
post:
|
||||
consumes:
|
||||
- application/json
|
||||
description: 网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或
|
||||
SSE 流。
|
||||
description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible
|
||||
路径同步返回兼容响应或 SSE 流。
|
||||
parameters:
|
||||
- description: true 时异步创建任务并返回 202
|
||||
in: header
|
||||
@ -5809,8 +5859,8 @@ paths:
|
||||
post:
|
||||
consumes:
|
||||
- application/json
|
||||
description: 网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或
|
||||
SSE 流。
|
||||
description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible
|
||||
路径同步返回兼容响应或 SSE 流。
|
||||
parameters:
|
||||
- description: true 时异步创建任务并返回 202
|
||||
in: header
|
||||
@ -5870,8 +5920,8 @@ paths:
|
||||
post:
|
||||
consumes:
|
||||
- application/json
|
||||
description: 网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或
|
||||
SSE 流。
|
||||
description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible
|
||||
路径同步返回兼容响应或 SSE 流。
|
||||
parameters:
|
||||
- description: true 时异步创建任务并返回 202
|
||||
in: header
|
||||
@ -5948,8 +5998,8 @@ paths:
|
||||
post:
|
||||
consumes:
|
||||
- application/json
|
||||
description: 网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或
|
||||
SSE 流。
|
||||
description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible
|
||||
路径同步返回兼容响应或 SSE 流。
|
||||
parameters:
|
||||
- description: true 时异步创建任务并返回 202
|
||||
in: header
|
||||
@ -6079,8 +6129,8 @@ paths:
|
||||
post:
|
||||
consumes:
|
||||
- application/json
|
||||
description: 网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或
|
||||
SSE 流。
|
||||
description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible
|
||||
路径同步返回兼容响应或 SSE 流。
|
||||
parameters:
|
||||
- description: true 时异步创建任务并返回 202
|
||||
in: header
|
||||
@ -6184,8 +6234,8 @@ paths:
|
||||
post:
|
||||
consumes:
|
||||
- application/json
|
||||
description: 网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或
|
||||
SSE 流。
|
||||
description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible
|
||||
路径同步返回兼容响应或 SSE 流。
|
||||
parameters:
|
||||
- description: true 时异步创建任务并返回 202
|
||||
in: header
|
||||
@ -6245,8 +6295,8 @@ paths:
|
||||
post:
|
||||
consumes:
|
||||
- application/json
|
||||
description: 网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或
|
||||
SSE 流。
|
||||
description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible
|
||||
路径同步返回兼容响应或 SSE 流。
|
||||
parameters:
|
||||
- description: true 时异步创建任务并返回 202
|
||||
in: header
|
||||
@ -6306,8 +6356,8 @@ paths:
|
||||
post:
|
||||
consumes:
|
||||
- application/json
|
||||
description: 网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或
|
||||
SSE 流。
|
||||
description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible
|
||||
路径同步返回兼容响应或 SSE 流。
|
||||
parameters:
|
||||
- description: true 时异步创建任务并返回 202
|
||||
in: header
|
||||
|
||||
114
apps/api/internal/httpapi/chat_completions_mode_test.go
Normal file
114
apps/api/internal/httpapi/chat_completions_mode_test.go
Normal file
@ -0,0 +1,114 @@
|
||||
package httpapi
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/easyai/easyai-ai-gateway/apps/api/internal/auth"
|
||||
"github.com/easyai/easyai-ai-gateway/apps/api/internal/clients"
|
||||
"github.com/easyai/easyai-ai-gateway/apps/api/internal/runner"
|
||||
"github.com/easyai/easyai-ai-gateway/apps/api/internal/store"
|
||||
)
|
||||
|
||||
func TestPlanTaskResponseTreatsAPIV1ChatCompletionsAsSynchronousCompatibleResponse(t *testing.T) {
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/chat/completions", nil)
|
||||
req.Header.Set("X-Async", "true")
|
||||
|
||||
plan := planTaskResponse("chat.completions", false, map[string]any{"stream": true}, req)
|
||||
|
||||
if plan.asyncMode {
|
||||
t.Fatal("/api/v1/chat/completions must not enter async task mode")
|
||||
}
|
||||
if !plan.compatibleMode {
|
||||
t.Fatal("/api/v1/chat/completions should return OpenAI-compatible response payloads")
|
||||
}
|
||||
if !plan.streamMode {
|
||||
t.Fatal("stream=true should select SSE streaming mode")
|
||||
}
|
||||
}
|
||||
|
||||
func TestPlanTaskResponseKeepsAsyncTaskModeForOtherAPIV1Tasks(t *testing.T) {
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/images/generations", nil)
|
||||
req.Header.Set("X-Async", "true")
|
||||
|
||||
plan := planTaskResponse("images.generations", false, map[string]any{"stream": true}, req)
|
||||
|
||||
if !plan.asyncMode {
|
||||
t.Fatal("non-chat /api/v1 task endpoints should keep X-Async task mode")
|
||||
}
|
||||
if plan.compatibleMode {
|
||||
t.Fatal("non-compatible /api/v1 task endpoints should not return OpenAI-compatible payloads")
|
||||
}
|
||||
}
|
||||
|
||||
func TestWriteCompatibleTaskResponseReturnsJSONWhenStreamIsFalse(t *testing.T) {
|
||||
executor := &fakeTaskExecutor{output: map[string]any{"id": "chatcmpl-test", "object": "chat.completion"}}
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/chat/completions", nil)
|
||||
recorder := httptest.NewRecorder()
|
||||
|
||||
writeCompatibleTaskResponse(context.Background(), recorder, req, executor, "chat.completions", "gpt-test", store.GatewayTask{ID: "task-test"}, &auth.User{}, false)
|
||||
|
||||
if recorder.Code != http.StatusOK {
|
||||
t.Fatalf("status=%d want=%d body=%s", recorder.Code, http.StatusOK, recorder.Body.String())
|
||||
}
|
||||
if executor.executeCalls != 1 || executor.streamCalls != 0 {
|
||||
t.Fatalf("expected non-stream execute only, got execute=%d stream=%d", executor.executeCalls, executor.streamCalls)
|
||||
}
|
||||
var body map[string]any
|
||||
if err := json.Unmarshal(recorder.Body.Bytes(), &body); err != nil {
|
||||
t.Fatalf("decode response body: %v body=%s", err, recorder.Body.String())
|
||||
}
|
||||
if body["object"] != "chat.completion" {
|
||||
t.Fatalf("unexpected compatible JSON response: %+v", body)
|
||||
}
|
||||
}
|
||||
|
||||
func TestWriteCompatibleTaskResponseReturnsSSEWhenStreamIsTrue(t *testing.T) {
|
||||
executor := &fakeTaskExecutor{
|
||||
deltas: []string{"hel", "lo"},
|
||||
output: map[string]any{"id": "chatcmpl-test", "object": "chat.completion"},
|
||||
}
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/chat/completions", nil)
|
||||
recorder := httptest.NewRecorder()
|
||||
|
||||
writeCompatibleTaskResponse(context.Background(), recorder, req, executor, "chat.completions", "gpt-test", store.GatewayTask{ID: "task-test"}, &auth.User{}, true)
|
||||
|
||||
if executor.executeCalls != 0 || executor.streamCalls != 1 {
|
||||
t.Fatalf("expected stream execute only, got execute=%d stream=%d", executor.executeCalls, executor.streamCalls)
|
||||
}
|
||||
if contentType := recorder.Header().Get("Content-Type"); contentType != "text/event-stream" {
|
||||
t.Fatalf("Content-Type=%q want text/event-stream", contentType)
|
||||
}
|
||||
body := recorder.Body.String()
|
||||
for _, want := range []string{"event: message", `"content":"hel"`, `"content":"lo"`, `"finish_reason":"stop"`} {
|
||||
if !strings.Contains(body, want) {
|
||||
t.Fatalf("SSE body missing %s: %s", want, body)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type fakeTaskExecutor struct {
|
||||
executeCalls int
|
||||
streamCalls int
|
||||
deltas []string
|
||||
output map[string]any
|
||||
}
|
||||
|
||||
func (f *fakeTaskExecutor) Execute(context.Context, store.GatewayTask, *auth.User) (runner.Result, error) {
|
||||
f.executeCalls++
|
||||
return runner.Result{Output: f.output}, nil
|
||||
}
|
||||
|
||||
func (f *fakeTaskExecutor) ExecuteStream(_ context.Context, _ store.GatewayTask, _ *auth.User, onDelta clients.StreamDelta) (runner.Result, error) {
|
||||
f.streamCalls++
|
||||
for _, delta := range f.deltas {
|
||||
if err := onDelta(delta); err != nil {
|
||||
return runner.Result{}, err
|
||||
}
|
||||
}
|
||||
return runner.Result{Output: f.output}, nil
|
||||
}
|
||||
@ -316,13 +316,17 @@ VALUES ($1, 5, '{"purpose":"core-flow"}'::jsonb)`, inviteCode); err != nil {
|
||||
} `json:"task"`
|
||||
}
|
||||
defaultTextModel := "openai:gpt-4o-mini"
|
||||
doJSON(t, server.URL, http.MethodPost, "/api/v1/chat/completions", apiKeyResponse.Secret, map[string]any{
|
||||
var apiV1Chat map[string]any
|
||||
doAPIV1ChatCompletionAndLoadTask(t, ctx, testPool, server.URL, apiKeyResponse.Secret, map[string]any{
|
||||
"model": defaultTextModel,
|
||||
"runMode": "simulation",
|
||||
"simulation": true,
|
||||
"simulationDurationMs": 5,
|
||||
"messages": []map[string]any{{"role": "user", "content": "ping"}},
|
||||
}, http.StatusAccepted, &taskResponse)
|
||||
}, "default-chat-"+suffixText, http.StatusOK, &apiV1Chat, &taskResponse.Task)
|
||||
if apiV1Chat["object"] != "chat.completion" {
|
||||
t.Fatalf("unexpected api v1 chat response: %+v", apiV1Chat)
|
||||
}
|
||||
if taskResponse.Task.ID == "" || taskResponse.Task.Status != "succeeded" || taskResponse.Task.RunMode != "simulation" {
|
||||
t.Fatalf("unexpected task response: %+v", taskResponse.Task)
|
||||
}
|
||||
@ -513,13 +517,13 @@ LIMIT 1`).Scan(&gptImageModelTypesRaw); err != nil {
|
||||
ErrorCode string `json:"errorCode"`
|
||||
} `json:"task"`
|
||||
}
|
||||
doJSON(t, server.URL, http.MethodPost, "/api/v1/chat/completions", chatOnlyAPIKeyResponse.Secret, map[string]any{
|
||||
doAPIV1ChatCompletionAndLoadTask(t, ctx, testPool, server.URL, chatOnlyAPIKeyResponse.Secret, map[string]any{
|
||||
"model": deniedModel,
|
||||
"runMode": "simulation",
|
||||
"simulation": true,
|
||||
"simulationDurationMs": 5,
|
||||
"messages": []map[string]any{{"role": "user", "content": "permission deny"}},
|
||||
}, http.StatusAccepted, &deniedTask)
|
||||
}, "permission-deny-"+suffixText, http.StatusNotFound, nil, &deniedTask.Task)
|
||||
if deniedTask.Task.Status != "failed" || deniedTask.Task.ErrorCode != "no_model_candidate" {
|
||||
t.Fatalf("deny access rule should hide denied model from runtime candidates: %+v", deniedTask.Task)
|
||||
}
|
||||
@ -561,13 +565,13 @@ LIMIT 1`).Scan(&gptImageModelTypesRaw); err != nil {
|
||||
ErrorCode string `json:"errorCode"`
|
||||
} `json:"task"`
|
||||
}
|
||||
doJSON(t, server.URL, http.MethodPost, "/api/v1/chat/completions", chatOnlyAPIKeyResponse.Secret, map[string]any{
|
||||
doAPIV1ChatCompletionAndLoadTask(t, ctx, testPool, server.URL, chatOnlyAPIKeyResponse.Secret, map[string]any{
|
||||
"model": controlledModel,
|
||||
"runMode": "simulation",
|
||||
"simulation": true,
|
||||
"simulationDurationMs": 5,
|
||||
"messages": []map[string]any{{"role": "user", "content": "allow should block other keys"}},
|
||||
}, http.StatusAccepted, &blockedControlledTask)
|
||||
}, "permission-allow-block-"+suffixText, http.StatusNotFound, nil, &blockedControlledTask.Task)
|
||||
if blockedControlledTask.Task.Status != "failed" || blockedControlledTask.Task.ErrorCode != "no_model_candidate" {
|
||||
t.Fatalf("allow access rule should make the resource unavailable to unmatched subjects: %+v", blockedControlledTask.Task)
|
||||
}
|
||||
@ -586,13 +590,13 @@ LIMIT 1`).Scan(&gptImageModelTypesRaw); err != nil {
|
||||
Status string `json:"status"`
|
||||
} `json:"task"`
|
||||
}
|
||||
doJSON(t, server.URL, http.MethodPost, "/api/v1/chat/completions", chatOnlyAPIKeyResponse.Secret, map[string]any{
|
||||
doAPIV1ChatCompletionAndLoadTask(t, ctx, testPool, server.URL, chatOnlyAPIKeyResponse.Secret, map[string]any{
|
||||
"model": controlledModel,
|
||||
"runMode": "simulation",
|
||||
"simulation": true,
|
||||
"simulationDurationMs": 5,
|
||||
"messages": []map[string]any{{"role": "user", "content": "allow should pass"}},
|
||||
}, http.StatusAccepted, &allowedControlledTask)
|
||||
}, "permission-allow-pass-"+suffixText, http.StatusOK, nil, &allowedControlledTask.Task)
|
||||
if allowedControlledTask.Task.Status != "succeeded" {
|
||||
t.Fatalf("matching allow access rule should make the controlled model usable: %+v", allowedControlledTask.Task)
|
||||
}
|
||||
@ -645,13 +649,13 @@ WHERE gateway_user_id = $1::uuid
|
||||
FinalChargeAmount float64 `json:"finalChargeAmount"`
|
||||
} `json:"task"`
|
||||
}
|
||||
doJSON(t, server.URL, http.MethodPost, "/api/v1/chat/completions", apiKeyResponse.Secret, map[string]any{
|
||||
doAPIV1ChatCompletionAndLoadTask(t, ctx, testPool, server.URL, apiKeyResponse.Secret, map[string]any{
|
||||
"model": pricingModel,
|
||||
"runMode": "simulation",
|
||||
"simulation": true,
|
||||
"simulationDurationMs": 5,
|
||||
"messages": []map[string]any{{"role": "user", "content": "priced ping"}},
|
||||
}, http.StatusAccepted, &pricingTask)
|
||||
}, "pricing-chat-"+suffixText, http.StatusOK, nil, &pricingTask.Task)
|
||||
if pricingTask.Task.Status != "succeeded" || !floatNear(pricingTask.Task.FinalChargeAmount, 0.028) {
|
||||
t.Fatalf("custom pricing rule set should drive text billing, got task=%+v", pricingTask.Task)
|
||||
}
|
||||
@ -757,14 +761,14 @@ WHERE reference_type = 'gateway_task'
|
||||
ErrorCode string `json:"errorCode"`
|
||||
} `json:"task"`
|
||||
}
|
||||
doJSON(t, server.URL, http.MethodPost, "/api/v1/chat/completions", apiKeyResponse.Secret, map[string]any{
|
||||
doAPIV1ChatCompletionAndLoadTask(t, ctx, testPool, server.URL, apiKeyResponse.Secret, map[string]any{
|
||||
"model": rateLimitedModel,
|
||||
"runMode": "simulation",
|
||||
"simulation": true,
|
||||
"simulationDurationMs": 5,
|
||||
"simulationProfile": "non_retryable_failure",
|
||||
"messages": []map[string]any{{"role": "user", "content": "failed first"}},
|
||||
}, http.StatusAccepted, &rateLimitFailedTask)
|
||||
}, "rate-limit-failed-first-"+suffixText, http.StatusBadGateway, nil, &rateLimitFailedTask.Task)
|
||||
if rateLimitFailedTask.Task.Status != "failed" || rateLimitFailedTask.Task.ErrorCode != "bad_request" {
|
||||
t.Fatalf("failed rate-limited task should fail before consuming rpm: %+v", rateLimitFailedTask.Task)
|
||||
}
|
||||
@ -774,13 +778,13 @@ WHERE reference_type = 'gateway_task'
|
||||
Status string `json:"status"`
|
||||
} `json:"task"`
|
||||
}
|
||||
doJSON(t, server.URL, http.MethodPost, "/api/v1/chat/completions", apiKeyResponse.Secret, map[string]any{
|
||||
doAPIV1ChatCompletionAndLoadTask(t, ctx, testPool, server.URL, apiKeyResponse.Secret, map[string]any{
|
||||
"model": rateLimitedModel,
|
||||
"runMode": "simulation",
|
||||
"simulation": true,
|
||||
"simulationDurationMs": 5,
|
||||
"messages": []map[string]any{{"role": "user", "content": "first"}},
|
||||
}, http.StatusAccepted, &rateLimitTaskOne)
|
||||
}, "rate-limit-first-"+suffixText, http.StatusOK, nil, &rateLimitTaskOne.Task)
|
||||
if rateLimitTaskOne.Task.Status != "succeeded" {
|
||||
t.Fatalf("first rate-limited task should succeed: %+v", rateLimitTaskOne.Task)
|
||||
}
|
||||
@ -790,13 +794,13 @@ WHERE reference_type = 'gateway_task'
|
||||
ErrorCode string `json:"errorCode"`
|
||||
} `json:"task"`
|
||||
}
|
||||
doJSON(t, server.URL, http.MethodPost, "/api/v1/chat/completions", apiKeyResponse.Secret, map[string]any{
|
||||
doAPIV1ChatCompletionAndLoadTask(t, ctx, testPool, server.URL, apiKeyResponse.Secret, map[string]any{
|
||||
"model": rateLimitedModel,
|
||||
"runMode": "simulation",
|
||||
"simulation": true,
|
||||
"simulationDurationMs": 5,
|
||||
"messages": []map[string]any{{"role": "user", "content": "second"}},
|
||||
}, http.StatusAccepted, &rateLimitTaskTwo)
|
||||
}, "rate-limit-second-"+suffixText, http.StatusTooManyRequests, nil, &rateLimitTaskTwo.Task)
|
||||
if rateLimitTaskTwo.Task.Status != "failed" || rateLimitTaskTwo.Task.ErrorCode != "rate_limit" {
|
||||
t.Fatalf("runtime policy rate limit should fail second task with rate_limit: %+v", rateLimitTaskTwo.Task)
|
||||
}
|
||||
@ -808,12 +812,12 @@ WHERE reference_type = 'gateway_task'
|
||||
AsyncMode bool `json:"asyncMode"`
|
||||
} `json:"task"`
|
||||
}
|
||||
doJSONWithHeaders(t, server.URL, http.MethodPost, "/api/v1/chat/completions", apiKeyResponse.Secret, map[string]any{
|
||||
doJSONWithHeaders(t, server.URL, http.MethodPost, "/api/v1/responses", apiKeyResponse.Secret, map[string]any{
|
||||
"model": rateLimitedModel,
|
||||
"runMode": "simulation",
|
||||
"simulation": true,
|
||||
"simulationDurationMs": 5,
|
||||
"messages": []map[string]any{{"role": "user", "content": "async queued"}},
|
||||
"input": "async queued",
|
||||
}, map[string]string{"X-Async": "true"}, http.StatusAccepted, &asyncRateLimitTask)
|
||||
if asyncRateLimitTask.TaskID == "" || asyncRateLimitTask.Task.ID != asyncRateLimitTask.TaskID || !asyncRateLimitTask.Task.AsyncMode {
|
||||
t.Fatalf("async task response should expose task id and async mode: %+v", asyncRateLimitTask)
|
||||
@ -984,11 +988,11 @@ WHERE reference_type = 'gateway_task'
|
||||
Status string `json:"status"`
|
||||
} `json:"task"`
|
||||
}
|
||||
doJSON(t, server.URL, http.MethodPost, "/api/v1/chat/completions", apiKeyResponse.Secret, map[string]any{
|
||||
doAPIV1ChatCompletionAndLoadTask(t, ctx, testPool, server.URL, apiKeyResponse.Secret, map[string]any{
|
||||
"model": failoverModel,
|
||||
"runMode": "simulation",
|
||||
"messages": []map[string]any{{"role": "user", "content": "retry please"}},
|
||||
}, http.StatusAccepted, &failoverTask)
|
||||
}, "failover-chat-"+suffixText, http.StatusOK, nil, &failoverTask.Task)
|
||||
if failoverTask.Task.Status != "succeeded" {
|
||||
t.Fatalf("failover task should succeed through second client: %+v", failoverTask.Task)
|
||||
}
|
||||
@ -1103,13 +1107,13 @@ WHERE failed.id = $1::uuid`, failedPlatform.ID, successPlatform.ID, unrelatedPri
|
||||
Status string `json:"status"`
|
||||
} `json:"task"`
|
||||
}
|
||||
doJSON(t, server.URL, http.MethodPost, "/api/v1/chat/completions", apiKeyResponse.Secret, map[string]any{
|
||||
doAPIV1ChatCompletionAndLoadTask(t, ctx, testPool, server.URL, apiKeyResponse.Secret, map[string]any{
|
||||
"model": degradeModel,
|
||||
"runMode": "simulation",
|
||||
"simulation": true,
|
||||
"simulationDurationMs": 5,
|
||||
"messages": []map[string]any{{"role": "user", "content": "degrade please"}},
|
||||
}, http.StatusAccepted, °radeTask)
|
||||
}, "degrade-chat-"+suffixText, http.StatusOK, nil, °radeTask.Task)
|
||||
if degradeTask.Task.Status != "succeeded" {
|
||||
t.Fatalf("degrade task should fail over after cooling down failed model: %+v", degradeTask.Task)
|
||||
}
|
||||
@ -1170,13 +1174,13 @@ WHERE m.platform_id = $1::uuid
|
||||
ErrorCode string `json:"errorCode"`
|
||||
} `json:"task"`
|
||||
}
|
||||
doJSON(t, server.URL, http.MethodPost, "/api/v1/chat/completions", apiKeyResponse.Secret, map[string]any{
|
||||
doAPIV1ChatCompletionAndLoadTask(t, ctx, testPool, server.URL, apiKeyResponse.Secret, map[string]any{
|
||||
"model": autoDisableModel,
|
||||
"runMode": "simulation",
|
||||
"simulation": true,
|
||||
"simulationDurationMs": 5,
|
||||
"messages": []map[string]any{{"role": "user", "content": "disable please"}},
|
||||
}, http.StatusAccepted, &autoDisableTask)
|
||||
}, "auto-disable-chat-"+suffixText, http.StatusBadGateway, nil, &autoDisableTask.Task)
|
||||
if autoDisableTask.Task.Status != "failed" || autoDisableTask.Task.ErrorCode != "invalid_api_key" {
|
||||
t.Fatalf("auto disable task should fail with invalid_api_key: %+v", autoDisableTask.Task)
|
||||
}
|
||||
@ -1293,12 +1297,12 @@ WHERE m.platform_id = $1::uuid
|
||||
AsyncMode bool `json:"asyncMode"`
|
||||
} `json:"task"`
|
||||
}
|
||||
doJSONWithHeaders(t, server.URL, http.MethodPost, "/api/v1/chat/completions", apiKeyResponse.Secret, map[string]any{
|
||||
doJSONWithHeaders(t, server.URL, http.MethodPost, "/api/v1/responses", apiKeyResponse.Secret, map[string]any{
|
||||
"model": defaultTextModel,
|
||||
"runMode": "simulation",
|
||||
"simulation": true,
|
||||
"simulationDurationMs": 2000,
|
||||
"messages": []map[string]any{{"role": "user", "content": "river worker restart"}},
|
||||
"input": "river worker restart",
|
||||
}, map[string]string{"X-Async": "true"}, http.StatusAccepted, &restartAsyncTask)
|
||||
if restartAsyncTask.TaskID == "" || !restartAsyncTask.Task.AsyncMode {
|
||||
t.Fatalf("restart async task should be accepted as async: %+v", restartAsyncTask)
|
||||
@ -1453,6 +1457,20 @@ func doJSONWithHeaders(t *testing.T, baseURL string, method string, path string,
|
||||
}
|
||||
}
|
||||
|
||||
func doAPIV1ChatCompletionAndLoadTask(t *testing.T, ctx context.Context, pool *pgxpool.Pool, baseURL string, token string, payload map[string]any, marker string, expectedStatus int, responseOut any, taskDetailOut any) string {
|
||||
t.Helper()
|
||||
payload["integrationTestMarker"] = marker
|
||||
if responseOut == nil {
|
||||
responseOut = &map[string]any{}
|
||||
}
|
||||
doJSON(t, baseURL, http.MethodPost, "/api/v1/chat/completions", token, payload, expectedStatus, responseOut)
|
||||
taskID := waitForTaskIDByRequestField(t, ctx, pool, "integrationTestMarker", marker, 2*time.Second)
|
||||
if taskDetailOut != nil {
|
||||
doJSON(t, baseURL, http.MethodGet, "/api/v1/tasks/"+taskID, token, nil, http.StatusOK, taskDetailOut)
|
||||
}
|
||||
return taskID
|
||||
}
|
||||
|
||||
type taskWaitDetail struct {
|
||||
ID string `json:"id"`
|
||||
Status string `json:"status"`
|
||||
@ -1481,6 +1499,11 @@ func waitForTaskStatus(t *testing.T, baseURL string, token string, taskID string
|
||||
}
|
||||
|
||||
func waitForTaskIDByRequestMarker(t *testing.T, ctx context.Context, pool *pgxpool.Pool, marker string, timeout time.Duration) string {
|
||||
t.Helper()
|
||||
return waitForTaskIDByRequestField(t, ctx, pool, "cancelTestId", marker, timeout)
|
||||
}
|
||||
|
||||
func waitForTaskIDByRequestField(t *testing.T, ctx context.Context, pool *pgxpool.Pool, key string, value string, timeout time.Duration) string {
|
||||
t.Helper()
|
||||
deadline := time.Now().Add(timeout)
|
||||
for time.Now().Before(deadline) {
|
||||
@ -1488,15 +1511,15 @@ func waitForTaskIDByRequestMarker(t *testing.T, ctx context.Context, pool *pgxpo
|
||||
err := pool.QueryRow(ctx, `
|
||||
SELECT id::text
|
||||
FROM gateway_tasks
|
||||
WHERE request->>'cancelTestId' = $1
|
||||
WHERE request->>$1 = $2
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 1`, marker).Scan(&taskID)
|
||||
LIMIT 1`, key, value).Scan(&taskID)
|
||||
if err == nil && taskID != "" {
|
||||
return taskID
|
||||
}
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
}
|
||||
t.Fatalf("task with request marker %s was not created within %s", marker, timeout)
|
||||
t.Fatalf("task with request %s=%s was not created within %s", key, value, timeout)
|
||||
return ""
|
||||
}
|
||||
|
||||
@ -1577,13 +1600,13 @@ func assertLoadAvoidanceSimulatedRetryChain(t *testing.T, ctx context.Context, t
|
||||
ErrorCode string `json:"errorCode"`
|
||||
} `json:"task"`
|
||||
}
|
||||
doJSON(t, baseURL, http.MethodPost, "/api/v1/chat/completions", runtimeToken, map[string]any{
|
||||
doAPIV1ChatCompletionAndLoadTask(t, ctx, testPool, baseURL, runtimeToken, map[string]any{
|
||||
"model": model,
|
||||
"runMode": "simulation",
|
||||
"simulation": true,
|
||||
"simulationDurationMs": 5,
|
||||
"messages": []map[string]any{{"role": "user", "content": "load avoidance retry chain"}},
|
||||
}, http.StatusAccepted, &taskResponse)
|
||||
}, "load-avoidance-"+suffixText, http.StatusBadGateway, nil, &taskResponse.Task)
|
||||
if taskResponse.Task.ID == "" || taskResponse.Task.Status != "failed" || taskResponse.Task.ErrorCode != "bad_request" {
|
||||
t.Fatalf("load avoidance task should only fail after avoided clients are retried, got %+v", taskResponse.Task)
|
||||
}
|
||||
|
||||
@ -13,6 +13,7 @@ import (
|
||||
"github.com/easyai/easyai-ai-gateway/apps/api/internal/auth"
|
||||
"github.com/easyai/easyai-ai-gateway/apps/api/internal/clients"
|
||||
"github.com/easyai/easyai-ai-gateway/apps/api/internal/netproxy"
|
||||
"github.com/easyai/easyai-ai-gateway/apps/api/internal/runner"
|
||||
"github.com/easyai/easyai-ai-gateway/apps/api/internal/store"
|
||||
)
|
||||
|
||||
@ -858,7 +859,7 @@ func (s *Server) listModelRateLimitStatuses(w http.ResponseWriter, r *http.Reque
|
||||
|
||||
// createTask godoc
|
||||
// @Summary 创建或执行 AI 任务
|
||||
// @Description 网关任务接口按 model 选择平台模型;/api/v1 路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。
|
||||
// @Description 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。
|
||||
// @Tags tasks
|
||||
// @Accept json
|
||||
// @Produce json
|
||||
@ -874,7 +875,6 @@ func (s *Server) listModelRateLimitStatuses(w http.ResponseWriter, r *http.Reque
|
||||
// @Failure 404 {object} ErrorEnvelope
|
||||
// @Failure 429 {object} ErrorEnvelope
|
||||
// @Failure 502 {object} ErrorEnvelope
|
||||
// @Router /api/v1/chat/completions [post]
|
||||
// @Router /api/v1/responses [post]
|
||||
// @Router /api/v1/images/generations [post]
|
||||
// @Router /api/v1/images/edits [post]
|
||||
@ -909,13 +909,13 @@ func (s *Server) createTask(kind string, compatible bool) http.Handler {
|
||||
writeError(w, http.StatusForbidden, "api key scope does not allow this capability")
|
||||
return
|
||||
}
|
||||
asyncMode := asyncRequest(r)
|
||||
responsePlan := planTaskResponse(kind, compatible, body, r)
|
||||
|
||||
task, err := s.store.CreateTask(r.Context(), store.CreateTaskInput{
|
||||
Kind: kind,
|
||||
Model: model,
|
||||
RunMode: runModeFromRequest(body),
|
||||
Async: asyncMode,
|
||||
Async: responsePlan.asyncMode,
|
||||
Request: body,
|
||||
}, user)
|
||||
if err != nil {
|
||||
@ -923,7 +923,7 @@ func (s *Server) createTask(kind string, compatible bool) http.Handler {
|
||||
writeError(w, http.StatusInternalServerError, "create task failed")
|
||||
return
|
||||
}
|
||||
if asyncMode {
|
||||
if responsePlan.asyncMode {
|
||||
if err := s.runner.EnqueueAsyncTask(r.Context(), task); err != nil {
|
||||
writeError(w, http.StatusInternalServerError, err.Error(), "enqueue_failed")
|
||||
return
|
||||
@ -933,65 +933,8 @@ func (s *Server) createTask(kind string, compatible bool) http.Handler {
|
||||
}
|
||||
runCtx, cancelRun := s.requestExecutionContext(r)
|
||||
defer cancelRun()
|
||||
if compatible {
|
||||
if boolValue(body, "stream") {
|
||||
flusher := prepareCompatibleStream(w)
|
||||
result, runErr := s.runner.ExecuteStream(runCtx, task, user, func(delta string) error {
|
||||
if !requestStillConnected(r) {
|
||||
return nil
|
||||
}
|
||||
writeCompatibleDelta(w, kind, model, delta)
|
||||
if flusher != nil {
|
||||
flusher.Flush()
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if runErr != nil {
|
||||
if !requestStillConnected(r) {
|
||||
return
|
||||
}
|
||||
status := statusFromRunError(runErr)
|
||||
errorPayload := map[string]any{
|
||||
"code": runErrorCode(runErr),
|
||||
"message": runErrorMessage(runErr),
|
||||
"status": status,
|
||||
}
|
||||
if result.Task.ID != "" {
|
||||
errorPayload["taskId"] = result.Task.ID
|
||||
}
|
||||
if result.Task.RequestID != "" {
|
||||
errorPayload["requestId"] = result.Task.RequestID
|
||||
}
|
||||
for key, value := range runErrorDetails(runErr) {
|
||||
errorPayload[key] = value
|
||||
}
|
||||
sendSSE(w, "error", map[string]any{"error": errorPayload})
|
||||
if flusher != nil {
|
||||
flusher.Flush()
|
||||
}
|
||||
return
|
||||
}
|
||||
if !requestStillConnected(r) {
|
||||
return
|
||||
}
|
||||
writeCompatibleDone(w, kind, model, result.Output)
|
||||
if flusher != nil {
|
||||
flusher.Flush()
|
||||
}
|
||||
return
|
||||
}
|
||||
result, runErr := s.runner.Execute(runCtx, task, user)
|
||||
if runErr != nil {
|
||||
if !requestStillConnected(r) {
|
||||
return
|
||||
}
|
||||
writeErrorWithDetails(w, statusFromRunError(runErr), runErrorMessage(runErr), runErrorDetails(runErr), runErrorCode(runErr))
|
||||
return
|
||||
}
|
||||
if !requestStillConnected(r) {
|
||||
return
|
||||
}
|
||||
writeJSON(w, http.StatusOK, result.Output)
|
||||
if responsePlan.compatibleMode {
|
||||
writeCompatibleTaskResponse(runCtx, w, r, s.runner, kind, model, task, user, responsePlan.streamMode)
|
||||
return
|
||||
}
|
||||
result, runErr := s.runner.Execute(runCtx, task, user)
|
||||
@ -1006,6 +949,29 @@ func (s *Server) createTask(kind string, compatible bool) http.Handler {
|
||||
})
|
||||
}
|
||||
|
||||
// createAPIV1ChatCompletions godoc
|
||||
// @Summary 创建 Chat Completions
|
||||
// @Description /api/v1/chat/completions 同步执行:stream=true 返回 text/event-stream SSE;stream=false 或未传返回兼容 JSON;该接口忽略 X-Async。
|
||||
// @Tags tasks
|
||||
// @Accept json
|
||||
// @Produce json
|
||||
// @Produce text/event-stream
|
||||
// @Security BearerAuth
|
||||
// @Param X-Async header bool false "该接口忽略此参数"
|
||||
// @Param input body TaskRequest true "Chat Completions 请求"
|
||||
// @Success 200 {object} ChatCompletionCompatibleResponse
|
||||
// @Failure 400 {object} ErrorEnvelope
|
||||
// @Failure 401 {object} ErrorEnvelope
|
||||
// @Failure 402 {object} ErrorEnvelope
|
||||
// @Failure 403 {object} ErrorEnvelope
|
||||
// @Failure 404 {object} ErrorEnvelope
|
||||
// @Failure 429 {object} ErrorEnvelope
|
||||
// @Failure 502 {object} ErrorEnvelope
|
||||
// @Router /api/v1/chat/completions [post]
|
||||
func (s *Server) createAPIV1ChatCompletions() http.Handler {
|
||||
return s.createTask("chat.completions", false)
|
||||
}
|
||||
|
||||
func (s *Server) requestExecutionContext(r *http.Request) (context.Context, context.CancelFunc) {
|
||||
base := context.WithoutCancel(r.Context())
|
||||
if s.ctx == nil {
|
||||
@ -1031,11 +997,98 @@ func requestStillConnected(r *http.Request) bool {
|
||||
}
|
||||
}
|
||||
|
||||
type taskExecutor interface {
|
||||
Execute(context.Context, store.GatewayTask, *auth.User) (runner.Result, error)
|
||||
ExecuteStream(context.Context, store.GatewayTask, *auth.User, clients.StreamDelta) (runner.Result, error)
|
||||
}
|
||||
|
||||
func writeCompatibleTaskResponse(runCtx context.Context, w http.ResponseWriter, r *http.Request, executor taskExecutor, kind string, model string, task store.GatewayTask, user *auth.User, streamMode bool) {
|
||||
if streamMode {
|
||||
flusher := prepareCompatibleStream(w)
|
||||
result, runErr := executor.ExecuteStream(runCtx, task, user, func(delta string) error {
|
||||
if !requestStillConnected(r) {
|
||||
return nil
|
||||
}
|
||||
writeCompatibleDelta(w, kind, model, delta)
|
||||
if flusher != nil {
|
||||
flusher.Flush()
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if runErr != nil {
|
||||
if !requestStillConnected(r) {
|
||||
return
|
||||
}
|
||||
status := statusFromRunError(runErr)
|
||||
errorPayload := map[string]any{
|
||||
"code": runErrorCode(runErr),
|
||||
"message": runErrorMessage(runErr),
|
||||
"status": status,
|
||||
}
|
||||
if result.Task.ID != "" {
|
||||
errorPayload["taskId"] = result.Task.ID
|
||||
}
|
||||
if result.Task.RequestID != "" {
|
||||
errorPayload["requestId"] = result.Task.RequestID
|
||||
}
|
||||
for key, value := range runErrorDetails(runErr) {
|
||||
errorPayload[key] = value
|
||||
}
|
||||
sendSSE(w, "error", map[string]any{"error": errorPayload})
|
||||
if flusher != nil {
|
||||
flusher.Flush()
|
||||
}
|
||||
return
|
||||
}
|
||||
if !requestStillConnected(r) {
|
||||
return
|
||||
}
|
||||
writeCompatibleDone(w, kind, model, result.Output)
|
||||
if flusher != nil {
|
||||
flusher.Flush()
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
result, runErr := executor.Execute(runCtx, task, user)
|
||||
if runErr != nil {
|
||||
if !requestStillConnected(r) {
|
||||
return
|
||||
}
|
||||
writeErrorWithDetails(w, statusFromRunError(runErr), runErrorMessage(runErr), runErrorDetails(runErr), runErrorCode(runErr))
|
||||
return
|
||||
}
|
||||
if !requestStillConnected(r) {
|
||||
return
|
||||
}
|
||||
writeJSON(w, http.StatusOK, result.Output)
|
||||
}
|
||||
|
||||
func asyncRequest(r *http.Request) bool {
|
||||
value := strings.TrimSpace(strings.ToLower(r.Header.Get("x-async")))
|
||||
return value == "1" || value == "true" || value == "yes" || value == "on"
|
||||
}
|
||||
|
||||
type taskResponsePlan struct {
|
||||
asyncMode bool
|
||||
compatibleMode bool
|
||||
streamMode bool
|
||||
}
|
||||
|
||||
func planTaskResponse(kind string, compatible bool, body map[string]any, r *http.Request) taskResponsePlan {
|
||||
asyncMode := asyncRequest(r)
|
||||
compatibleMode := compatible
|
||||
if kind == "chat.completions" && !compatible {
|
||||
asyncMode = false
|
||||
compatibleMode = true
|
||||
}
|
||||
return taskResponsePlan{
|
||||
asyncMode: asyncMode,
|
||||
compatibleMode: compatibleMode,
|
||||
streamMode: boolValue(body, "stream"),
|
||||
}
|
||||
}
|
||||
|
||||
func writeTaskAccepted(w http.ResponseWriter, task store.GatewayTask) {
|
||||
writeJSON(w, http.StatusAccepted, map[string]any{
|
||||
"taskId": task.ID,
|
||||
|
||||
@ -242,6 +242,32 @@ type CompatibleResponse struct {
|
||||
Usage map[string]interface{} `json:"usage,omitempty"`
|
||||
}
|
||||
|
||||
type ChatCompletionCompatibleResponse struct {
|
||||
ID string `json:"id" example:"chatcmpl-123"`
|
||||
Object string `json:"object" example:"chat.completion"`
|
||||
Created int64 `json:"created,omitempty" example:"1710000000"`
|
||||
Model string `json:"model" example:"gpt-4o-mini"`
|
||||
Choices []ChatCompletionChoice `json:"choices"`
|
||||
Usage *ChatCompletionUsage `json:"usage,omitempty"`
|
||||
}
|
||||
|
||||
type ChatCompletionChoice struct {
|
||||
Index int `json:"index" example:"0"`
|
||||
Message ChatCompletionChoiceMessage `json:"message"`
|
||||
FinishReason string `json:"finish_reason,omitempty" example:"stop"`
|
||||
}
|
||||
|
||||
type ChatCompletionChoiceMessage struct {
|
||||
Role string `json:"role" example:"assistant"`
|
||||
Content string `json:"content" example:"Hello"`
|
||||
}
|
||||
|
||||
type ChatCompletionUsage struct {
|
||||
PromptTokens int `json:"prompt_tokens,omitempty" example:"12"`
|
||||
CompletionTokens int `json:"completion_tokens,omitempty" example:"8"`
|
||||
TotalTokens int `json:"total_tokens,omitempty" example:"20"`
|
||||
}
|
||||
|
||||
type NetworkProxyConfigResponse struct {
|
||||
GlobalHTTPProxy string `json:"globalHttpProxy" example:"http://127.0.0.1:7890"`
|
||||
GlobalHTTPProxySet bool `json:"globalHttpProxySet" example:"true"`
|
||||
|
||||
@ -126,7 +126,7 @@ func NewServerWithContext(ctx context.Context, cfg config.Config, db *store.Stor
|
||||
mux.Handle("GET /api/v1/playground/models", server.auth.Require(auth.PermissionBasic, http.HandlerFunc(server.listPlayableModels)))
|
||||
mux.Handle("GET /api/admin/runtime/rate-limit-windows", server.requireAdmin(auth.PermissionPower, http.HandlerFunc(server.listRateLimitWindows)))
|
||||
mux.Handle("GET /api/admin/runtime/model-rate-limits", server.requireAdmin(auth.PermissionPower, http.HandlerFunc(server.listModelRateLimitStatuses)))
|
||||
mux.Handle("POST /api/v1/chat/completions", server.auth.Require(auth.PermissionBasic, server.createTask("chat.completions", false)))
|
||||
mux.Handle("POST /api/v1/chat/completions", server.auth.Require(auth.PermissionBasic, server.createAPIV1ChatCompletions()))
|
||||
mux.Handle("POST /api/v1/responses", server.auth.Require(auth.PermissionBasic, server.createTask("responses", false)))
|
||||
mux.Handle("POST /api/v1/images/generations", server.auth.Require(auth.PermissionBasic, server.createTask("images.generations", false)))
|
||||
mux.Handle("POST /api/v1/images/edits", server.auth.Require(auth.PermissionBasic, server.createTask("images.edits", false)))
|
||||
|
||||
Loading…
Reference in New Issue
Block a user