From dc148662103173fa0a989b9c222f4f950ebc59f1 Mon Sep 17 00:00:00 2001 From: wangbo Date: Sun, 7 Jun 2026 10:26:57 +0800 Subject: [PATCH] =?UTF-8?q?=E8=BF=81=E7=A7=BB=E9=9F=B3=E9=A2=91=E7=94=9F?= =?UTF-8?q?=E6=88=90=E4=B8=8E=E8=AF=AD=E9=9F=B3=E5=90=88=E6=88=90=E5=88=B0?= =?UTF-8?q?=20gateway=20=E5=B9=B6=E8=A1=A5=E5=85=85=20simulation=20?= =?UTF-8?q?=E6=B5=8B=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- apps/api/docs/swagger.json | 976 +++++++++++++++++- apps/api/docs/swagger.yaml | 646 +++++++++++- apps/api/internal/clients/clients_test.go | 102 ++ apps/api/internal/clients/media_clients.go | 119 +++ apps/api/internal/clients/provider_task.go | 25 +- .../internal/clients/provider_task_test.go | 75 ++ apps/api/internal/clients/simulation.go | 66 ++ .../httpapi/chat_completions_mode_test.go | 44 + .../httpapi/core_flow_integration_test.go | 84 +- apps/api/internal/httpapi/handlers.go | 19 + apps/api/internal/httpapi/openapi_models.go | 39 +- apps/api/internal/httpapi/server.go | 9 + .../api/internal/httpapi/simulation_assets.go | 11 +- apps/api/internal/runner/pricing.go | 18 + apps/api/internal/runner/pricing_test.go | 60 ++ apps/api/internal/runner/service.go | 22 +- apps/api/internal/runner/upload.go | 3 + apps/api/internal/store/base_models.go | 4 + apps/api/internal/store/candidates.go | 94 +- .../internal/store/model_billing_filter.go | 4 +- .../store/model_billing_filter_test.go | 34 + .../0046_audio_music_openapi_simulation.sql | 76 ++ 22 files changed, 2475 insertions(+), 55 deletions(-) create mode 100644 apps/api/migrations/0046_audio_music_openapi_simulation.sql diff --git a/apps/api/docs/swagger.json b/apps/api/docs/swagger.json index f1101a9..96aee5c 100644 --- a/apps/api/docs/swagger.json +++ b/apps/api/docs/swagger.json @@ -1965,6 +1965,64 @@ } } }, + "/api/admin/runtime/model-rate-limits/{platformModelID}/restore": { + "post": { + "security": [ + { + "BearerAuth": [] + } + ], + "description": "管理端手动解除平台模型停用、模型冷却、平台冷却或平台禁用状态,使其重新参与路由。", + "produces": [ + "application/json" + ], + "tags": [ + "runtime" + ], + "summary": "恢复平台模型运行状态", + "parameters": [ + { + "type": "string", + "description": "平台模型 ID", + "name": "platformModelID", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/store.ModelRateLimitStatus" + } + }, + "401": { + "description": "Unauthorized", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "403": { + "description": "Forbidden", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "404": { + "description": "Not Found", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + } + } + } + }, "/api/admin/runtime/policy-sets": { "get": { "security": [ @@ -4533,6 +4591,99 @@ } } }, + "/api/v1/music/generations": { + "post": { + "security": [ + { + "BearerAuth": [] + } + ], + "description": "网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "tasks" + ], + "summary": "创建或执行 AI 任务", + "parameters": [ + { + "type": "boolean", + "description": "true 时异步创建任务并返回 202", + "name": "X-Async", + "in": "header" + }, + { + "description": "AI 任务请求,字段随任务类型变化", + "name": "input", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/httpapi.TaskRequest" + } + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/httpapi.CompatibleResponse" + } + }, + "202": { + "description": "Accepted", + "schema": { + "$ref": "#/definitions/httpapi.TaskAcceptedResponse" + } + }, + "400": { + "description": "Bad Request", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "401": { + "description": "Unauthorized", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "402": { + "description": "Payment Required", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "403": { + "description": "Forbidden", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "404": { + "description": "Not Found", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "429": { + "description": "Too Many Requests", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "502": { + "description": "Bad Gateway", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + } + } + } + }, "/api/v1/platforms": { "get": { "security": [ @@ -4920,6 +5071,192 @@ } } }, + "/api/v1/song/generations": { + "post": { + "security": [ + { + "BearerAuth": [] + } + ], + "description": "网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "tasks" + ], + "summary": "创建或执行 AI 任务", + "parameters": [ + { + "type": "boolean", + "description": "true 时异步创建任务并返回 202", + "name": "X-Async", + "in": "header" + }, + { + "description": "AI 任务请求,字段随任务类型变化", + "name": "input", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/httpapi.TaskRequest" + } + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/httpapi.CompatibleResponse" + } + }, + "202": { + "description": "Accepted", + "schema": { + "$ref": "#/definitions/httpapi.TaskAcceptedResponse" + } + }, + "400": { + "description": "Bad Request", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "401": { + "description": "Unauthorized", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "402": { + "description": "Payment Required", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "403": { + "description": "Forbidden", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "404": { + "description": "Not Found", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "429": { + "description": "Too Many Requests", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "502": { + "description": "Bad Gateway", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + } + } + } + }, + "/api/v1/speech/generations": { + "post": { + "security": [ + { + "BearerAuth": [] + } + ], + "description": "网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "tasks" + ], + "summary": "创建或执行 AI 任务", + "parameters": [ + { + "type": "boolean", + "description": "true 时异步创建任务并返回 202", + "name": "X-Async", + "in": "header" + }, + { + "description": "AI 任务请求,字段随任务类型变化", + "name": "input", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/httpapi.TaskRequest" + } + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/httpapi.CompatibleResponse" + } + }, + "202": { + "description": "Accepted", + "schema": { + "$ref": "#/definitions/httpapi.TaskAcceptedResponse" + } + }, + "400": { + "description": "Bad Request", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "401": { + "description": "Unauthorized", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "402": { + "description": "Payment Required", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "403": { + "description": "Forbidden", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "404": { + "description": "Not Found", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "429": { + "description": "Too Many Requests", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "502": { + "description": "Bad Gateway", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + } + } + } + }, "/api/v1/tasks": { "get": { "security": [ @@ -6018,6 +6355,99 @@ } } }, + "/music/generations": { + "post": { + "security": [ + { + "BearerAuth": [] + } + ], + "description": "网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "tasks" + ], + "summary": "创建或执行 AI 任务", + "parameters": [ + { + "type": "boolean", + "description": "true 时异步创建任务并返回 202", + "name": "X-Async", + "in": "header" + }, + { + "description": "AI 任务请求,字段随任务类型变化", + "name": "input", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/httpapi.TaskRequest" + } + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/httpapi.CompatibleResponse" + } + }, + "202": { + "description": "Accepted", + "schema": { + "$ref": "#/definitions/httpapi.TaskAcceptedResponse" + } + }, + "400": { + "description": "Bad Request", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "401": { + "description": "Unauthorized", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "402": { + "description": "Payment Required", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "403": { + "description": "Forbidden", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "404": { + "description": "Not Found", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "429": { + "description": "Too Many Requests", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "502": { + "description": "Bad Gateway", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + } + } + } + }, "/readyz": { "get": { "description": "检查 Postgres 是否可用;数据库不可用时返回 503。", @@ -6230,6 +6660,192 @@ } } }, + "/song/generations": { + "post": { + "security": [ + { + "BearerAuth": [] + } + ], + "description": "网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "tasks" + ], + "summary": "创建或执行 AI 任务", + "parameters": [ + { + "type": "boolean", + "description": "true 时异步创建任务并返回 202", + "name": "X-Async", + "in": "header" + }, + { + "description": "AI 任务请求,字段随任务类型变化", + "name": "input", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/httpapi.TaskRequest" + } + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/httpapi.CompatibleResponse" + } + }, + "202": { + "description": "Accepted", + "schema": { + "$ref": "#/definitions/httpapi.TaskAcceptedResponse" + } + }, + "400": { + "description": "Bad Request", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "401": { + "description": "Unauthorized", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "402": { + "description": "Payment Required", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "403": { + "description": "Forbidden", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "404": { + "description": "Not Found", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "429": { + "description": "Too Many Requests", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "502": { + "description": "Bad Gateway", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + } + } + } + }, + "/speech/generations": { + "post": { + "security": [ + { + "BearerAuth": [] + } + ], + "description": "网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "tasks" + ], + "summary": "创建或执行 AI 任务", + "parameters": [ + { + "type": "boolean", + "description": "true 时异步创建任务并返回 202", + "name": "X-Async", + "in": "header" + }, + { + "description": "AI 任务请求,字段随任务类型变化", + "name": "input", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/httpapi.TaskRequest" + } + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/httpapi.CompatibleResponse" + } + }, + "202": { + "description": "Accepted", + "schema": { + "$ref": "#/definitions/httpapi.TaskAcceptedResponse" + } + }, + "400": { + "description": "Bad Request", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "401": { + "description": "Unauthorized", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "402": { + "description": "Payment Required", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "403": { + "description": "Forbidden", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "404": { + "description": "Not Found", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "429": { + "description": "Too Many Requests", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "502": { + "description": "Bad Gateway", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + } + } + } + }, "/static/generated/{asset}": { "get": { "description": "从本地生成资源目录读取图片、视频等任务产物;不存在时返回 404。", @@ -6267,10 +6883,11 @@ }, "/static/simulation/{asset}": { "get": { - "description": "返回本地模拟模式使用的图片、视频封面或短视频资源。", + "description": "返回本地模拟模式使用的图片、视频封面、短视频或音频资源。", "produces": [ "image/svg+xml", - "video/mp4" + "video/mp4", + "audio/wav" ], "tags": [ "simulation" @@ -6279,7 +6896,7 @@ "parameters": [ { "type": "string", - "description": "资源文件名,可选 image.svg、image.png、image-edit.svg、image-edit.png、video-poster.svg、video.mp4", + "description": "资源文件名,可选 image.svg、image.png、image-edit.svg、image-edit.png、video-poster.svg、video.mp4、audio.wav", "name": "asset", "in": "path", "required": true @@ -6776,6 +7393,99 @@ } } }, + "/v1/music/generations": { + "post": { + "security": [ + { + "BearerAuth": [] + } + ], + "description": "网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "tasks" + ], + "summary": "创建或执行 AI 任务", + "parameters": [ + { + "type": "boolean", + "description": "true 时异步创建任务并返回 202", + "name": "X-Async", + "in": "header" + }, + { + "description": "AI 任务请求,字段随任务类型变化", + "name": "input", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/httpapi.TaskRequest" + } + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/httpapi.CompatibleResponse" + } + }, + "202": { + "description": "Accepted", + "schema": { + "$ref": "#/definitions/httpapi.TaskAcceptedResponse" + } + }, + "400": { + "description": "Bad Request", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "401": { + "description": "Unauthorized", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "402": { + "description": "Payment Required", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "403": { + "description": "Forbidden", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "404": { + "description": "Not Found", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "429": { + "description": "Too Many Requests", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "502": { + "description": "Bad Gateway", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + } + } + } + }, "/v1/reranks": { "post": { "security": [ @@ -6961,6 +7671,192 @@ } } } + }, + "/v1/song/generations": { + "post": { + "security": [ + { + "BearerAuth": [] + } + ], + "description": "网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "tasks" + ], + "summary": "创建或执行 AI 任务", + "parameters": [ + { + "type": "boolean", + "description": "true 时异步创建任务并返回 202", + "name": "X-Async", + "in": "header" + }, + { + "description": "AI 任务请求,字段随任务类型变化", + "name": "input", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/httpapi.TaskRequest" + } + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/httpapi.CompatibleResponse" + } + }, + "202": { + "description": "Accepted", + "schema": { + "$ref": "#/definitions/httpapi.TaskAcceptedResponse" + } + }, + "400": { + "description": "Bad Request", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "401": { + "description": "Unauthorized", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "402": { + "description": "Payment Required", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "403": { + "description": "Forbidden", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "404": { + "description": "Not Found", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "429": { + "description": "Too Many Requests", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "502": { + "description": "Bad Gateway", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + } + } + } + }, + "/v1/speech/generations": { + "post": { + "security": [ + { + "BearerAuth": [] + } + ], + "description": "网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible 路径同步返回兼容响应或 SSE 流。", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "tasks" + ], + "summary": "创建或执行 AI 任务", + "parameters": [ + { + "type": "boolean", + "description": "true 时异步创建任务并返回 202", + "name": "X-Async", + "in": "header" + }, + { + "description": "AI 任务请求,字段随任务类型变化", + "name": "input", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/httpapi.TaskRequest" + } + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/httpapi.CompatibleResponse" + } + }, + "202": { + "description": "Accepted", + "schema": { + "$ref": "#/definitions/httpapi.TaskAcceptedResponse" + } + }, + "400": { + "description": "Bad Request", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "401": { + "description": "Unauthorized", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "402": { + "description": "Payment Required", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "403": { + "description": "Forbidden", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "404": { + "description": "Not Found", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "429": { + "description": "Too Many Requests", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + }, + "502": { + "description": "Bad Gateway", + "schema": { + "$ref": "#/definitions/httpapi.ErrorEnvelope" + } + } + } + } } }, "definitions": { @@ -7813,14 +8709,30 @@ "httpapi.TaskRequest": { "type": "object", "properties": { + "audioWeight": { + "type": "number", + "example": 0.65 + }, + "customMode": { + "type": "boolean", + "example": false + }, "duration": { "type": "integer", "example": 5 }, + "emotion": { + "type": "string", + "example": "happy" + }, "input": { "type": "string", "example": "Tell me a short story" }, + "makeInstrumental": { + "type": "boolean", + "example": false + }, "max_tokens": { "type": "integer", "example": 512 @@ -7835,6 +8747,14 @@ "type": "string", "example": "gpt-4o-mini" }, + "negativeTags": { + "type": "string", + "example": "noise" + }, + "pitch": { + "type": "number", + "example": 0 + }, "prompt": { "type": "string", "example": "A watercolor robot reading a book" @@ -7856,9 +8776,53 @@ "type": "string", "example": "1024x1024" }, + "speed": { + "type": "number", + "example": 1 + }, "stream": { "type": "boolean", "example": false + }, + "style": { + "type": "string", + "example": "city pop, bright synth" + }, + "styleWeight": { + "type": "number", + "example": 0.65 + }, + "tags": { + "type": "string", + "example": "city pop, synth" + }, + "text": { + "type": "string", + "example": "Hello from EasyAI audio synthesis." + }, + "text_file_id": { + "type": "string", + "example": "" + }, + "title": { + "type": "string", + "example": "Useful Tools" + }, + "vocalGender": { + "type": "string", + "example": "f" + }, + "voice_id": { + "type": "string", + "example": "female-shaonv" + }, + "vol": { + "type": "number", + "example": 1 + }, + "weirdnessConstraint": { + "type": "number", + "example": 0.35 } } }, @@ -8783,6 +9747,9 @@ "type": "array", "items": {} }, + "conversationId": { + "type": "string" + }, "createdAt": { "type": "string" }, @@ -8823,6 +9790,9 @@ "modelType": { "type": "string" }, + "newMessageCount": { + "type": "integer" + }, "remoteTaskId": { "type": "string" }, diff --git a/apps/api/docs/swagger.yaml b/apps/api/docs/swagger.yaml index 166862e..0574246 100644 --- a/apps/api/docs/swagger.yaml +++ b/apps/api/docs/swagger.yaml @@ -568,12 +568,24 @@ definitions: type: object httpapi.TaskRequest: properties: + audioWeight: + example: 0.65 + type: number + customMode: + example: false + type: boolean duration: example: 5 type: integer + emotion: + example: happy + type: string input: example: Tell me a short story type: string + makeInstrumental: + example: false + type: boolean max_tokens: example: 512 type: integer @@ -584,6 +596,12 @@ definitions: model: example: gpt-4o-mini type: string + negativeTags: + example: noise + type: string + pitch: + example: 0 + type: number prompt: example: A watercolor robot reading a book type: string @@ -601,9 +619,42 @@ definitions: size: example: 1024x1024 type: string + speed: + example: 1 + type: number stream: example: false type: boolean + style: + example: city pop, bright synth + type: string + styleWeight: + example: 0.65 + type: number + tags: + example: city pop, synth + type: string + text: + example: Hello from EasyAI audio synthesis. + type: string + text_file_id: + example: "" + type: string + title: + example: Useful Tools + type: string + vocalGender: + example: f + type: string + voice_id: + example: female-shaonv + type: string + vol: + example: 1 + type: number + weirdnessConstraint: + example: 0.35 + type: number type: object httpapi.TenantListResponse: properties: @@ -1230,6 +1281,8 @@ definitions: billings: items: {} type: array + conversationId: + type: string createdAt: type: string error: @@ -1257,6 +1310,8 @@ definitions: type: string modelType: type: string + newMessageCount: + type: integer remoteTaskId: type: string remoteTaskPayload: @@ -3557,6 +3612,43 @@ paths: summary: 列出模型限流状态 tags: - runtime + /api/admin/runtime/model-rate-limits/{platformModelID}/restore: + post: + description: 管理端手动解除平台模型停用、模型冷却、平台冷却或平台禁用状态,使其重新参与路由。 + parameters: + - description: 平台模型 ID + in: path + name: platformModelID + required: true + type: string + produces: + - application/json + responses: + "200": + description: OK + schema: + $ref: '#/definitions/store.ModelRateLimitStatus' + "401": + description: Unauthorized + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "403": + description: Forbidden + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "404": + description: Not Found + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "500": + description: Internal Server Error + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + security: + - BearerAuth: [] + summary: 恢复平台模型运行状态 + tags: + - runtime /api/admin/runtime/policy-sets: get: description: 管理端返回可分配给平台、模型或用户组的运行策略集。 @@ -5204,6 +5296,67 @@ paths: summary: 列出可调用模型 tags: - playground + /api/v1/music/generations: + post: + consumes: + - application/json + description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible + 路径同步返回兼容响应或 SSE 流。 + parameters: + - description: true 时异步创建任务并返回 202 + in: header + name: X-Async + type: boolean + - description: AI 任务请求,字段随任务类型变化 + in: body + name: input + required: true + schema: + $ref: '#/definitions/httpapi.TaskRequest' + produces: + - application/json + responses: + "200": + description: OK + schema: + $ref: '#/definitions/httpapi.CompatibleResponse' + "202": + description: Accepted + schema: + $ref: '#/definitions/httpapi.TaskAcceptedResponse' + "400": + description: Bad Request + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "401": + description: Unauthorized + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "402": + description: Payment Required + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "403": + description: Forbidden + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "404": + description: Not Found + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "429": + description: Too Many Requests + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "502": + description: Bad Gateway + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + security: + - BearerAuth: [] + summary: 创建或执行 AI 任务 + tags: + - tasks /api/v1/platforms: get: description: 按当前用户可访问模型过滤平台,仅返回启用且存在可访问模型的平台。 @@ -5454,6 +5607,128 @@ paths: summary: 创建或执行 AI 任务 tags: - tasks + /api/v1/song/generations: + post: + consumes: + - application/json + description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible + 路径同步返回兼容响应或 SSE 流。 + parameters: + - description: true 时异步创建任务并返回 202 + in: header + name: X-Async + type: boolean + - description: AI 任务请求,字段随任务类型变化 + in: body + name: input + required: true + schema: + $ref: '#/definitions/httpapi.TaskRequest' + produces: + - application/json + responses: + "200": + description: OK + schema: + $ref: '#/definitions/httpapi.CompatibleResponse' + "202": + description: Accepted + schema: + $ref: '#/definitions/httpapi.TaskAcceptedResponse' + "400": + description: Bad Request + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "401": + description: Unauthorized + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "402": + description: Payment Required + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "403": + description: Forbidden + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "404": + description: Not Found + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "429": + description: Too Many Requests + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "502": + description: Bad Gateway + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + security: + - BearerAuth: [] + summary: 创建或执行 AI 任务 + tags: + - tasks + /api/v1/speech/generations: + post: + consumes: + - application/json + description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible + 路径同步返回兼容响应或 SSE 流。 + parameters: + - description: true 时异步创建任务并返回 202 + in: header + name: X-Async + type: boolean + - description: AI 任务请求,字段随任务类型变化 + in: body + name: input + required: true + schema: + $ref: '#/definitions/httpapi.TaskRequest' + produces: + - application/json + responses: + "200": + description: OK + schema: + $ref: '#/definitions/httpapi.CompatibleResponse' + "202": + description: Accepted + schema: + $ref: '#/definitions/httpapi.TaskAcceptedResponse' + "400": + description: Bad Request + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "401": + description: Unauthorized + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "402": + description: Payment Required + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "403": + description: Forbidden + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "404": + description: Not Found + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "429": + description: Too Many Requests + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "502": + description: Bad Gateway + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + security: + - BearerAuth: [] + summary: 创建或执行 AI 任务 + tags: + - tasks /api/v1/tasks: get: description: 按当前用户列出任务,支持关键字、模型类型、时间范围和分页过滤。 @@ -6165,6 +6440,67 @@ paths: summary: 创建或执行 AI 任务 tags: - tasks + /music/generations: + post: + consumes: + - application/json + description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible + 路径同步返回兼容响应或 SSE 流。 + parameters: + - description: true 时异步创建任务并返回 202 + in: header + name: X-Async + type: boolean + - description: AI 任务请求,字段随任务类型变化 + in: body + name: input + required: true + schema: + $ref: '#/definitions/httpapi.TaskRequest' + produces: + - application/json + responses: + "200": + description: OK + schema: + $ref: '#/definitions/httpapi.CompatibleResponse' + "202": + description: Accepted + schema: + $ref: '#/definitions/httpapi.TaskAcceptedResponse' + "400": + description: Bad Request + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "401": + description: Unauthorized + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "402": + description: Payment Required + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "403": + description: Forbidden + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "404": + description: Not Found + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "429": + description: Too Many Requests + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "502": + description: Bad Gateway + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + security: + - BearerAuth: [] + summary: 创建或执行 AI 任务 + tags: + - tasks /readyz: get: description: 检查 Postgres 是否可用;数据库不可用时返回 503。 @@ -6304,6 +6640,128 @@ paths: summary: 创建或执行 AI 任务 tags: - tasks + /song/generations: + post: + consumes: + - application/json + description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible + 路径同步返回兼容响应或 SSE 流。 + parameters: + - description: true 时异步创建任务并返回 202 + in: header + name: X-Async + type: boolean + - description: AI 任务请求,字段随任务类型变化 + in: body + name: input + required: true + schema: + $ref: '#/definitions/httpapi.TaskRequest' + produces: + - application/json + responses: + "200": + description: OK + schema: + $ref: '#/definitions/httpapi.CompatibleResponse' + "202": + description: Accepted + schema: + $ref: '#/definitions/httpapi.TaskAcceptedResponse' + "400": + description: Bad Request + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "401": + description: Unauthorized + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "402": + description: Payment Required + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "403": + description: Forbidden + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "404": + description: Not Found + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "429": + description: Too Many Requests + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "502": + description: Bad Gateway + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + security: + - BearerAuth: [] + summary: 创建或执行 AI 任务 + tags: + - tasks + /speech/generations: + post: + consumes: + - application/json + description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible + 路径同步返回兼容响应或 SSE 流。 + parameters: + - description: true 时异步创建任务并返回 202 + in: header + name: X-Async + type: boolean + - description: AI 任务请求,字段随任务类型变化 + in: body + name: input + required: true + schema: + $ref: '#/definitions/httpapi.TaskRequest' + produces: + - application/json + responses: + "200": + description: OK + schema: + $ref: '#/definitions/httpapi.CompatibleResponse' + "202": + description: Accepted + schema: + $ref: '#/definitions/httpapi.TaskAcceptedResponse' + "400": + description: Bad Request + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "401": + description: Unauthorized + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "402": + description: Payment Required + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "403": + description: Forbidden + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "404": + description: Not Found + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "429": + description: Too Many Requests + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "502": + description: Bad Gateway + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + security: + - BearerAuth: [] + summary: 创建或执行 AI 任务 + tags: + - tasks /static/generated/{asset}: get: description: 从本地生成资源目录读取图片、视频等任务产物;不存在时返回 404。 @@ -6329,9 +6787,9 @@ paths: - static /static/simulation/{asset}: get: - description: 返回本地模拟模式使用的图片、视频封面或短视频资源。 + description: 返回本地模拟模式使用的图片、视频封面、短视频或音频资源。 parameters: - - description: 资源文件名,可选 image.svg、image.png、image-edit.svg、image-edit.png、video-poster.svg、video.mp4 + - description: 资源文件名,可选 image.svg、image.png、image-edit.svg、image-edit.png、video-poster.svg、video.mp4、audio.wav in: path name: asset required: true @@ -6339,6 +6797,7 @@ paths: produces: - image/svg+xml - video/mp4 + - audio/wav responses: "200": description: OK @@ -6662,6 +7121,67 @@ paths: summary: 创建或执行 AI 任务 tags: - tasks + /v1/music/generations: + post: + consumes: + - application/json + description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible + 路径同步返回兼容响应或 SSE 流。 + parameters: + - description: true 时异步创建任务并返回 202 + in: header + name: X-Async + type: boolean + - description: AI 任务请求,字段随任务类型变化 + in: body + name: input + required: true + schema: + $ref: '#/definitions/httpapi.TaskRequest' + produces: + - application/json + responses: + "200": + description: OK + schema: + $ref: '#/definitions/httpapi.CompatibleResponse' + "202": + description: Accepted + schema: + $ref: '#/definitions/httpapi.TaskAcceptedResponse' + "400": + description: Bad Request + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "401": + description: Unauthorized + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "402": + description: Payment Required + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "403": + description: Forbidden + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "404": + description: Not Found + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "429": + description: Too Many Requests + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "502": + description: Bad Gateway + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + security: + - BearerAuth: [] + summary: 创建或执行 AI 任务 + tags: + - tasks /v1/reranks: post: consumes: @@ -6784,6 +7304,128 @@ paths: summary: 创建或执行 AI 任务 tags: - tasks + /v1/song/generations: + post: + consumes: + - application/json + description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible + 路径同步返回兼容响应或 SSE 流。 + parameters: + - description: true 时异步创建任务并返回 202 + in: header + name: X-Async + type: boolean + - description: AI 任务请求,字段随任务类型变化 + in: body + name: input + required: true + schema: + $ref: '#/definitions/httpapi.TaskRequest' + produces: + - application/json + responses: + "200": + description: OK + schema: + $ref: '#/definitions/httpapi.CompatibleResponse' + "202": + description: Accepted + schema: + $ref: '#/definitions/httpapi.TaskAcceptedResponse' + "400": + description: Bad Request + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "401": + description: Unauthorized + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "402": + description: Payment Required + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "403": + description: Forbidden + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "404": + description: Not Found + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "429": + description: Too Many Requests + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "502": + description: Bad Gateway + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + security: + - BearerAuth: [] + summary: 创建或执行 AI 任务 + tags: + - tasks + /v1/speech/generations: + post: + consumes: + - application/json + description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果,OpenAI-compatible + 路径同步返回兼容响应或 SSE 流。 + parameters: + - description: true 时异步创建任务并返回 202 + in: header + name: X-Async + type: boolean + - description: AI 任务请求,字段随任务类型变化 + in: body + name: input + required: true + schema: + $ref: '#/definitions/httpapi.TaskRequest' + produces: + - application/json + responses: + "200": + description: OK + schema: + $ref: '#/definitions/httpapi.CompatibleResponse' + "202": + description: Accepted + schema: + $ref: '#/definitions/httpapi.TaskAcceptedResponse' + "400": + description: Bad Request + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "401": + description: Unauthorized + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "402": + description: Payment Required + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "403": + description: Forbidden + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "404": + description: Not Found + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "429": + description: Too Many Requests + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + "502": + description: Bad Gateway + schema: + $ref: '#/definitions/httpapi.ErrorEnvelope' + security: + - BearerAuth: [] + summary: 创建或执行 AI 任务 + tags: + - tasks schemes: - http - https diff --git a/apps/api/internal/clients/clients_test.go b/apps/api/internal/clients/clients_test.go index dac7435..623f0dc 100644 --- a/apps/api/internal/clients/clients_test.go +++ b/apps/api/internal/clients/clients_test.go @@ -2,6 +2,7 @@ package clients import ( "context" + "encoding/base64" "encoding/json" "net/http" "net/http/httptest" @@ -65,6 +66,35 @@ func TestSimulationClientReturnsVideoDemoAssets(t *testing.T) { } } +func TestSimulationClientReturnsAudioDemoAssets(t *testing.T) { + response, err := (SimulationClient{}).Run(context.Background(), Request{ + Kind: "speech.generations", + ModelType: "text_to_speech", + Model: "speech-2.6-turbo", + Body: map[string]any{ + "text": "hello from simulation", + "voice_id": "female-shaonv", + "count": 2, + "simulationDurationMs": 5, + }, + Candidate: store.RuntimeModelCandidate{Provider: "simulation"}, + }) + if err != nil { + t.Fatalf("run simulation audio client: %v", err) + } + data, _ := response.Result["data"].([]any) + if len(data) != 2 || response.Result["status"] != "success" { + t.Fatalf("unexpected simulated audio response: %+v", response.Result) + } + item, _ := data[0].(map[string]any) + if item["type"] != "audio" || item["url"] != "/static/simulation/audio.wav" || item["audio_url"] != "/static/simulation/audio.wav" { + t.Fatalf("unexpected simulated audio item: %+v", item) + } + if item["revised_text"] != "hello from simulation" || item["assetSource"] != "simulation" { + t.Fatalf("unexpected simulated audio metadata: %+v", item) + } +} + func TestSimulationDurationDefaultsByMediaType(t *testing.T) { imageDuration := simulationDuration(Request{Kind: "images.generations"}) if imageDuration < 10*time.Second || imageDuration > 30*time.Second { @@ -74,12 +104,84 @@ func TestSimulationDurationDefaultsByMediaType(t *testing.T) { if videoDuration < 2*time.Minute || videoDuration > 3*time.Minute { t.Fatalf("video simulation duration should default to 2-3m, got %s", videoDuration) } + audioDuration := simulationDuration(Request{Kind: "speech.generations"}) + if audioDuration < 2*time.Second || audioDuration > 6*time.Second { + t.Fatalf("audio simulation duration should default to 2-6s, got %s", audioDuration) + } textDuration := simulationDuration(Request{Kind: "chat.completions"}) if textDuration < 800*time.Millisecond || textDuration > 2400*time.Millisecond { t.Fatalf("text simulation duration should keep short defaults, got %s", textDuration) } } +func TestMinimaxClientSpeechUsesT2AV2AndNormalizesAudio(t *testing.T) { + var captured map[string]any + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost || r.URL.Path != "/t2a_v2" { + t.Fatalf("unexpected request: %s %s", r.Method, r.URL.String()) + } + if got := r.Header.Get("Authorization"); got != "Bearer test-key" { + t.Fatalf("unexpected auth header: %q", got) + } + if err := json.NewDecoder(r.Body).Decode(&captured); err != nil { + t.Fatalf("decode request: %v", err) + } + w.Header().Set("x-request-id", "req-minimax-speech") + _ = json.NewEncoder(w).Encode(map[string]any{ + "data": map[string]any{"audio": "68656c6c6f"}, + "base_resp": map[string]any{"status_code": 0}, + }) + })) + defer server.Close() + + response, err := (MinimaxClient{HTTPClient: server.Client()}).Run(context.Background(), Request{ + Kind: "speech.generations", + Model: "MiniMax Speech 2.6 Turbo", + Body: map[string]any{ + "text": "hello", + "voice_id": "female-shaonv", + "speed": 1.2, + "vol": 0.8, + "pitch": -1, + "emotion": "happy", + }, + Candidate: store.RuntimeModelCandidate{ + Provider: "minimax", + BaseURL: server.URL, + ProviderModelName: "speech-2.6-turbo", + Credentials: map[string]any{"apiKey": "test-key"}, + }, + }) + if err != nil { + t.Fatalf("run minimax speech client: %v", err) + } + if captured["model"] != "speech-2.6-turbo" || captured["text"] != "hello" { + t.Fatalf("unexpected minimax speech payload: %+v", captured) + } + if _, ok := captured["voice_id"]; ok { + t.Fatalf("voice_id should be moved into voice_setting: %+v", captured) + } + voiceSetting, ok := captured["voice_setting"].(map[string]any) + if !ok { + t.Fatalf("missing voice_setting: %+v", captured) + } + if voiceSetting["voice_id"] != "female-shaonv" || voiceSetting["speed"] != 1.2 || voiceSetting["vol"] != 0.8 || voiceSetting["pitch"] != float64(-1) || voiceSetting["emotion"] != "happy" { + t.Fatalf("unexpected voice_setting: %+v", voiceSetting) + } + data, _ := response.Result["data"].([]any) + if len(data) != 1 { + t.Fatalf("unexpected minimax speech response: %+v", response.Result) + } + item, _ := data[0].(map[string]any) + expectedContent := "data:audio/mpeg;base64," + base64.StdEncoding.EncodeToString([]byte("hello")) + if item["type"] != "audio" || item["content"] != expectedContent || item["mime_type"] != "audio/mpeg" { + t.Fatalf("unexpected normalized audio item: %+v", item) + } + if response.RequestID != "req-minimax-speech" { + t.Fatalf("unexpected request id: %q", response.RequestID) + } +} + func TestSimulationDurationCanBeControlledByParams(t *testing.T) { fixedDuration := simulationDuration(Request{Body: map[string]any{"simulationDurationSeconds": 7}}) if fixedDuration != 7*time.Second { diff --git a/apps/api/internal/clients/media_clients.go b/apps/api/internal/clients/media_clients.go index f93a0da..2018d4a 100644 --- a/apps/api/internal/clients/media_clients.go +++ b/apps/api/internal/clients/media_clients.go @@ -2,8 +2,11 @@ package clients import ( "context" + "encoding/base64" + "encoding/hex" "net/http" "strings" + "time" ) type JimengClient struct{ HTTPClient *http.Client } @@ -15,6 +18,7 @@ type MidjourneyClient struct{ HTTPClient *http.Client } type ViduClient struct{ HTTPClient *http.Client } type AliyunBailianClient struct{ HTTPClient *http.Client } type NewAPIClient struct{ HTTPClient *http.Client } +type SunoClient struct{ HTTPClient *http.Client } func (c JimengClient) Run(ctx context.Context, request Request) (Response, error) { return providerTaskClient{HTTPClient: c.HTTPClient, Spec: jimengSpec()}.Run(ctx, request) @@ -33,6 +37,9 @@ func (c HunyuanVideoClient) Run(ctx context.Context, request Request) (Response, } func (c MinimaxClient) Run(ctx context.Context, request Request) (Response, error) { + if request.Kind == "speech.generations" { + return c.runSpeech(ctx, request) + } return providerTaskClient{HTTPClient: c.HTTPClient, Spec: minimaxSpec()}.Run(ctx, request) } @@ -52,6 +59,10 @@ func (c NewAPIClient) Run(ctx context.Context, request Request) (Response, error return providerTaskClient{HTTPClient: c.HTTPClient, Spec: newAPISpec()}.Run(ctx, request) } +func (c SunoClient) Run(ctx context.Context, request Request) (Response, error) { + return providerTaskClient{HTTPClient: c.HTTPClient, Spec: sunoSpec()}.Run(ctx, request) +} + func jimengSpec() providerTaskSpec { return providerTaskSpec{ Name: "jimeng", @@ -149,6 +160,114 @@ func minimaxSpec() providerTaskSpec { } } +func (c MinimaxClient) runSpeech(ctx context.Context, request Request) (Response, error) { + startedAt := time.Now() + payload := minimaxSpeechPayload(request) + result, requestID, err := providerPostJSON(ctx, httpClient(request.HTTPClient, c.HTTPClient), providerURL(request.Candidate.BaseURL, "/t2a_v2"), payload, request.Candidate.Credentials, "bearer") + finishedAt := time.Now() + if err != nil { + return Response{}, annotateResponseError(err, requestID, startedAt, finishedAt) + } + audioHex := strings.TrimSpace(stringFromPathValue(valueAtPath(result, "data.audio"))) + if audioHex == "" { + message := firstNonEmptyString(valueAtPath(result, "base_resp.status_msg"), valueAtPath(result, "message"), "minimax speech audio is missing") + return Response{}, &ClientError{Code: "invalid_response", Message: message, RequestID: firstNonEmptyString(requestID, requestIDFromResult(result)), ResponseStartedAt: startedAt, ResponseFinishedAt: finishedAt, ResponseDurationMS: responseDurationMS(startedAt, finishedAt), Retryable: false} + } + audioBytes, err := hex.DecodeString(audioHex) + if err != nil { + return Response{}, &ClientError{Code: "invalid_response", Message: "minimax speech audio hex is invalid: " + err.Error(), RequestID: firstNonEmptyString(requestID, requestIDFromResult(result)), ResponseStartedAt: startedAt, ResponseFinishedAt: finishedAt, ResponseDurationMS: responseDurationMS(startedAt, finishedAt), Retryable: false} + } + normalized := cloneMapAny(result) + normalized["status"] = "success" + normalized["created"] = time.Now().UnixMilli() + normalized["model"] = request.Model + normalized["raw_data"] = cloneMapAny(result) + normalized["data"] = []any{map[string]any{ + "type": "audio", + "content": "data:audio/mpeg;base64," + base64.StdEncoding.EncodeToString(audioBytes), + "mime_type": "audio/mpeg", + "uploaded": false, + }} + return Response{ + Result: normalized, + RequestID: firstNonEmptyString(requestID, requestIDFromResult(result)), + Progress: providerProgress(request), + ResponseStartedAt: startedAt, + ResponseFinishedAt: finishedAt, + ResponseDurationMS: responseDurationMS(startedAt, finishedAt), + }, nil +} + +func minimaxSpeechPayload(request Request) map[string]any { + body := cloneBody(request.Body) + body["model"] = upstreamModelName(request.Candidate) + voiceID := firstNonEmptyString(body["voice_id"], body["voiceId"]) + speed := firstPresent(body["speed"], float64(1)) + vol := firstPresent(body["vol"], body["volume"], float64(1)) + pitch := firstPresent(body["pitch"], float64(0)) + voiceSetting := map[string]any{ + "voice_id": voiceID, + "speed": speed, + "vol": vol, + "pitch": pitch, + } + if emotion := firstNonEmptyString(body["emotion"]); emotion != "" { + voiceSetting["emotion"] = emotion + } + delete(body, "voice_id") + delete(body, "voiceId") + delete(body, "speed") + delete(body, "vol") + delete(body, "volume") + delete(body, "pitch") + delete(body, "emotion") + body["voice_setting"] = voiceSetting + return body +} + +func sunoSpec() providerTaskSpec { + return providerTaskSpec{ + Name: "suno", + SubmitPath: func(Request, map[string]any) string { return "/generator/suno" }, + PollPath: func(_ Request, upstreamTaskID string, _ map[string]any) string { + return "/v2/sunoinfo?id=" + upstreamTaskID + }, + Auth: "bearer", + TaskIDPaths: []string{"data"}, + StatusPaths: []string{"data.status"}, + SuccessStatuses: []string{"succeeded", "complete", "completed"}, + FailureStatuses: []string{"failed"}, + DefaultSubmitBody: func(request Request, body map[string]any) map[string]any { + body["task"] = "create" + body["model"] = sunoMappedModel(upstreamModelName(request.Candidate)) + if body["customMode"] == nil { + body["customMode"] = false + } + if body["makeInstrumental"] == nil { + body["makeInstrumental"] = false + } + return body + }, + } +} + +func sunoMappedModel(model string) string { + switch strings.TrimSpace(model) { + case "chirp-v3-0", "chirp-v3-5": + return "v40" + case "chirp-v4-0": + return "v40" + case "chirp-v4-5": + return "v45" + case "chirp-v4-5+": + return "v45+" + case "chirp-v5-0": + return "v50" + default: + return model + } +} + func midjourneySpec() providerTaskSpec { return providerTaskSpec{ Name: "midjourney", diff --git a/apps/api/internal/clients/provider_task.go b/apps/api/internal/clients/provider_task.go index d24d717..24a0a4f 100644 --- a/apps/api/internal/clients/provider_task.go +++ b/apps/api/internal/clients/provider_task.go @@ -29,7 +29,7 @@ type providerTaskClient struct { } func (c providerTaskClient) Run(ctx context.Context, request Request) (Response, error) { - if request.Kind != "images.generations" && request.Kind != "images.edits" && request.Kind != "videos.generations" { + if !providerTaskKindSupported(request.Kind) { return Response{}, &ClientError{Code: "unsupported_kind", Message: "unsupported " + c.Spec.Name + " request kind", Retryable: false} } startedAt := time.Now() @@ -119,6 +119,15 @@ func (c providerTaskClient) Run(ctx context.Context, request Request) (Response, } } +func providerTaskKindSupported(kind string) bool { + switch kind { + case "images.generations", "images.edits", "videos.generations", "song.generations", "music.generations", "speech.generations": + return true + default: + return false + } +} + func (c providerTaskClient) submit(ctx context.Context, request Request, payload map[string]any) (map[string]any, string, error) { path := c.Spec.SubmitPath(request, payload) return providerPostJSON(ctx, httpClient(request.HTTPClient, c.HTTPClient), providerURL(request.Candidate.BaseURL, path), payload, request.Candidate.Credentials, c.Spec.Auth) @@ -287,7 +296,7 @@ func containsStatus(values []string, status string) bool { } func hasProviderTaskResult(result map[string]any) bool { - return result["data"] != nil || valueAtPath(result, "output.image_urls") != nil || valueAtPath(result, "output.video_url") != nil || valueAtPath(result, "Response.ResultVideoUrl") != nil || valueAtPath(result, "Response.ResultImages") != nil || result["urls"] != nil + return result["data"] != nil || valueAtPath(result, "data.result") != nil || valueAtPath(result, "data.audio") != nil || valueAtPath(result, "output.image_urls") != nil || valueAtPath(result, "output.video_url") != nil || valueAtPath(result, "Response.ResultVideoUrl") != nil || valueAtPath(result, "Response.ResultImages") != nil || result["audio_url"] != nil || result["urls"] != nil } func normalizeProviderTaskResult(request Request, spec providerTaskSpec, result map[string]any, upstreamTaskID string) map[string]any { @@ -316,9 +325,19 @@ func providerTaskData(request Request, result map[string]any) []any { if request.Kind == "videos.generations" || strings.Contains(request.ModelType, "video") { fileType = "video" } + if request.Kind == "song.generations" || request.Kind == "music.generations" || request.Kind == "speech.generations" || strings.Contains(request.ModelType, "audio") || strings.Contains(request.ModelType, "speech") { + fileType = "audio" + } urlValues := []any{} for _, path := range []string{ "urls", + "audio_url", + "audioUrl", + "data.audio_url", + "data.audioUrl", + "data.result", + "data.result.audio_url", + "data.result.audioUrl", "image_urls", "data.image_urls", "data.images", @@ -368,7 +387,7 @@ func appendURLValues(out *[]any, value any) { *out = append(*out, item) } case map[string]any: - for _, key := range []string{"url", "image_url", "imageUrl", "video_url", "videoUrl", "content", "output"} { + for _, key := range []string{"url", "audio_url", "audioUrl", "image_url", "imageUrl", "video_url", "videoUrl", "content", "output"} { if item := strings.TrimSpace(fmt.Sprint(typed[key])); item != "" && item != "" { *out = append(*out, item) return diff --git a/apps/api/internal/clients/provider_task_test.go b/apps/api/internal/clients/provider_task_test.go index f12fc62..7c6883e 100644 --- a/apps/api/internal/clients/provider_task_test.go +++ b/apps/api/internal/clients/provider_task_test.go @@ -2,6 +2,7 @@ package clients import ( "context" + "encoding/json" "errors" "net/http" "net/http/httptest" @@ -227,6 +228,80 @@ func TestProviderTaskClientsSubmitAndPoll(t *testing.T) { } } +func TestSunoClientSubmitsAndPollsAudioGeneration(t *testing.T) { + var submitted map[string]any + var submittedRemoteTaskID string + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if got := r.Header.Get("Authorization"); got != "Bearer test-key" { + t.Fatalf("unexpected auth header: %q", got) + } + w.Header().Set("Content-Type", "application/json") + w.Header().Set("x-request-id", "req-suno") + switch { + case r.Method == http.MethodPost && r.URL.Path == "/generator/suno": + if err := json.NewDecoder(r.Body).Decode(&submitted); err != nil { + t.Fatalf("decode suno submit request: %v", err) + } + _, _ = w.Write([]byte(`{"code":200,"data":"suno-task"}`)) + case r.Method == http.MethodGet && r.URL.Path == "/v2/sunoinfo" && r.URL.Query().Get("id") == "suno-task": + _, _ = w.Write([]byte(`{"code":200,"data":{"status":"succeeded","result":[{"audio_url":"https://cdn.example/song.mp3"}]}}`)) + default: + t.Fatalf("unexpected request: %s %s", r.Method, r.URL.String()) + } + })) + defer server.Close() + + response, err := (SunoClient{HTTPClient: server.Client()}).Run(context.Background(), Request{ + Kind: "song.generations", + ModelType: "audio_generate", + Model: "Suno V5", + Body: map[string]any{ + "prompt": "city lights", + "tags": "pop", + "negativeTags": "noise", + }, + Candidate: store.RuntimeModelCandidate{ + Provider: "suno", + SpecType: "suno", + BaseURL: server.URL, + Credentials: map[string]any{"apiKey": "test-key"}, + PlatformConfig: map[string]any{"pollIntervalMs": 1, "pollTimeoutMs": 1000}, + ProviderModelName: "chirp-v5-0", + ModelType: "audio_generate", + }, + OnRemoteTaskSubmitted: func(remoteTaskID string, payload map[string]any) error { + submittedRemoteTaskID = remoteTaskID + if payload["payload"] == nil || payload["submit"] == nil { + t.Fatalf("missing remote payload: %#v", payload) + } + return nil + }, + }) + if err != nil { + t.Fatalf("run suno client: %v", err) + } + if submittedRemoteTaskID != "suno-task" { + t.Fatalf("unexpected remote task id: %q", submittedRemoteTaskID) + } + if submitted["task"] != "create" || submitted["model"] != "v50" || submitted["prompt"] != "city lights" { + t.Fatalf("unexpected suno submit payload: %+v", submitted) + } + if submitted["customMode"] != false || submitted["makeInstrumental"] != false { + t.Fatalf("suno defaults should match main-server style payload: %+v", submitted) + } + data, _ := response.Result["data"].([]any) + if len(data) != 1 { + t.Fatalf("unexpected suno response: %+v", response.Result) + } + first, _ := data[0].(map[string]any) + if first["type"] != "audio" || first["url"] != "https://cdn.example/song.mp3" { + t.Fatalf("unexpected suno normalized audio item: %+v", first) + } + if response.RequestID != "req-suno" { + t.Fatalf("unexpected request id: %q", response.RequestID) + } +} + func TestProviderTaskClientFailureAndRetryableErrors(t *testing.T) { t.Run("poll failure", func(t *testing.T) { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { diff --git a/apps/api/internal/clients/simulation.go b/apps/api/internal/clients/simulation.go index ded3675..2f242aa 100644 --- a/apps/api/internal/clients/simulation.go +++ b/apps/api/internal/clients/simulation.go @@ -15,6 +15,8 @@ const ( defaultSimulationTextMaxDuration = 2400 * time.Millisecond defaultSimulationImageMinDuration = 10 * time.Second defaultSimulationImageMaxDuration = 30 * time.Second + defaultSimulationAudioMinDuration = 2 * time.Second + defaultSimulationAudioMaxDuration = 6 * time.Second defaultSimulationVideoMinDuration = 2 * time.Minute defaultSimulationVideoMaxDuration = 3 * time.Minute maxSimulationDuration = 10 * time.Minute @@ -156,6 +158,24 @@ func simulatedResult(request Request) map[string]any { "model": request.Model, "data": simulatedVideoData(request), } + case "song.generations", "music.generations": + return map[string]any{ + "id": "song-simulated", + "created": nowUnix(), + "model": request.Model, + "status": "success", + "data": simulatedAudioData(request, "simulation music"), + "message": "simulation music generated", + } + case "speech.generations": + return map[string]any{ + "id": "speech-simulated", + "created": nowUnix(), + "model": request.Model, + "status": "success", + "data": simulatedAudioData(request, "simulation speech"), + "message": "simulation speech generated", + } default: modelType := strings.ToLower(request.ModelType) kind := strings.ToLower(request.Kind) @@ -167,6 +187,15 @@ func simulatedResult(request Request) map[string]any { "data": simulatedVideoData(request), } } + if strings.Contains(modelType, "audio") || strings.Contains(modelType, "speech") || strings.Contains(kind, "audio") || strings.Contains(kind, "song") || strings.Contains(kind, "music") || strings.Contains(kind, "speech") { + return map[string]any{ + "id": "audio-simulated", + "created": nowUnix(), + "model": request.Model, + "status": "success", + "data": simulatedAudioData(request, "simulation audio"), + } + } return map[string]any{ "id": "img-simulated", "created": nowUnix(), @@ -307,6 +336,24 @@ func simulatedVideoData(request Request) []any { return items } +func simulatedAudioData(request Request, fallbackPrompt string) []any { + count := simulatedOutputCount(request.Body) + items := make([]any, 0, count) + for index := 0; index < count; index += 1 { + items = append(items, map[string]any{ + "type": "audio", + "url": "/static/simulation/audio.wav", + "audio_url": "/static/simulation/audio.wav", + "duration": simulatedAudioDurationSeconds(request), + "assetSource": "simulation", + "index": index, + "prompt": firstNonEmptyPrompt(request.Body, fallbackPrompt), + "revised_text": firstNonEmptyString(stringValue(request.Body, "text"), firstNonEmptyPrompt(request.Body, fallbackPrompt)), + }) + } + return items +} + func simulatedUsage(request Request) Usage { if request.ModelType == "chat" || request.ModelType == "text_generate" || request.Kind == "responses" { return Usage{InputTokens: 12, OutputTokens: 8, TotalTokens: 20} @@ -368,6 +415,9 @@ func defaultSimulationDurationRange(request Request) (time.Duration, time.Durati if simulationImageRequest(request) { return defaultSimulationImageMinDuration, defaultSimulationImageMaxDuration } + if simulationAudioRequest(request) { + return defaultSimulationAudioMinDuration, defaultSimulationAudioMaxDuration + } return defaultSimulationTextMinDuration, defaultSimulationTextMaxDuration } @@ -383,6 +433,12 @@ func simulationImageRequest(request Request) bool { return strings.Contains(kind, "image") || strings.Contains(modelType, "image") } +func simulationAudioRequest(request Request) bool { + kind := strings.ToLower(request.Kind) + modelType := strings.ToLower(request.ModelType) + return strings.Contains(kind, "audio") || strings.Contains(kind, "song") || strings.Contains(kind, "music") || strings.Contains(kind, "speech") || strings.Contains(modelType, "audio") || strings.Contains(modelType, "speech") +} + func simulationDurationSeconds(request Request, keys ...string) int { for _, source := range []map[string]any{request.Body, request.Candidate.PlatformConfig, request.Candidate.Credentials} { for _, key := range keys { @@ -440,6 +496,16 @@ func simulatedVideoDurationSeconds(request Request) int { return 5 } +func simulatedAudioDurationSeconds(request Request) int { + if duration := intValue(request.Body, "duration", 0); duration > 0 { + return duration + } + if seconds := len([]rune(stringValue(request.Body, "text"))) / 8; seconds > 0 { + return seconds + } + return 3 +} + func firstNonEmptyPrompt(body map[string]any, fallback string) string { for _, key := range []string{"prompt", "input"} { if value := strings.TrimSpace(stringValue(body, key)); value != "" { diff --git a/apps/api/internal/httpapi/chat_completions_mode_test.go b/apps/api/internal/httpapi/chat_completions_mode_test.go index b9a683b..c126780 100644 --- a/apps/api/internal/httpapi/chat_completions_mode_test.go +++ b/apps/api/internal/httpapi/chat_completions_mode_test.go @@ -72,6 +72,50 @@ func TestPlanTaskResponseKeepsAsyncTaskModeForOtherAPIV1Tasks(t *testing.T) { } } +func TestPlanTaskResponseKeepsCompatibleSyncForAudioOpenAPIUnlessAsync(t *testing.T) { + for _, item := range []struct { + kind string + path string + }{ + {kind: "song.generations", path: "/api/v1/song/generations"}, + {kind: "music.generations", path: "/api/v1/music/generations"}, + {kind: "speech.generations", path: "/api/v1/speech/generations"}, + } { + t.Run(item.kind, func(t *testing.T) { + req := httptest.NewRequest(http.MethodPost, item.path, nil) + plan := planTaskResponse(item.kind, true, map[string]any{"stream": true}, req) + if plan.asyncMode { + t.Fatalf("%s should default to synchronous compatible response", item.path) + } + if !plan.compatibleMode { + t.Fatalf("%s should return compatible response payloads", item.path) + } + if plan.streamMode { + t.Fatal("audio OpenAPI endpoints should stay JSON-only even when stream=true is present") + } + + asyncReq := httptest.NewRequest(http.MethodPost, item.path, nil) + asyncReq.Header.Set("X-Async", "true") + asyncPlan := planTaskResponse(item.kind, true, map[string]any{}, asyncReq) + if !asyncPlan.asyncMode || !asyncPlan.compatibleMode { + t.Fatalf("%s should support X-Async while keeping compatible mode, got %+v", item.path, asyncPlan) + } + }) + } +} + +func TestAPIKeyScopeAllowedRecognizesAudioAndMusicAliases(t *testing.T) { + if !apiKeyScopeAllowed(&auth.User{APIKeyID: "key", APIKeyScopes: []string{"audio_generate"}}, "song.generations") { + t.Fatal("audio_generate scope should allow song generations") + } + if !apiKeyScopeAllowed(&auth.User{APIKeyID: "key", APIKeyScopes: []string{"text_to_speech"}}, "speech.generations") { + t.Fatal("text_to_speech scope should allow speech generations") + } + if apiKeyScopeAllowed(&auth.User{APIKeyID: "key", APIKeyScopes: []string{"image"}}, "speech.generations") { + t.Fatal("image scope should not allow speech generations") + } +} + func TestWriteCompatibleTaskResponseReturnsJSONWhenStreamIsFalse(t *testing.T) { executor := &fakeTaskExecutor{output: map[string]any{"id": "chatcmpl-test", "object": "chat.completion"}} req := httptest.NewRequest(http.MethodPost, "/api/v1/chat/completions", nil) diff --git a/apps/api/internal/httpapi/core_flow_integration_test.go b/apps/api/internal/httpapi/core_flow_integration_test.go index 914599e..a5007d4 100644 --- a/apps/api/internal/httpapi/core_flow_integration_test.go +++ b/apps/api/internal/httpapi/core_flow_integration_test.go @@ -106,7 +106,7 @@ func TestCoreLocalFlow(t *testing.T) { } doJSON(t, server.URL, http.MethodPost, "/api/v1/api-keys", loginResponse.AccessToken, map[string]any{ "name": "smoke key", - "scopes": []string{"chat", "image", "video"}, + "scopes": []string{"chat", "image", "video", "music", "audio"}, }, http.StatusCreated, &apiKeyResponse) if !strings.HasPrefix(apiKeyResponse.Secret, "sk-gw-") || apiKeyResponse.APIKey.Status != "active" { t.Fatalf("unexpected api key response: %+v", apiKeyResponse) @@ -444,6 +444,71 @@ VALUES ($1, 5, '{"purpose":"core-flow"}'::jsonb)`, inviteCode); err != nil { t.Fatalf("unexpected image edit task: %+v", imageEditResponse.Task) } + songMarker := "song-simulation-" + suffixText + var songResult map[string]any + doJSON(t, server.URL, http.MethodPost, "/api/v1/song/generations", apiKeyResponse.Secret, map[string]any{ + "model": "chirp-v5-0", + "runMode": "simulation", + "prompt": "city lights and soft drums", + "tags": "pop, synth", + "negativeTags": "noise", + "simulation": true, + "simulationDurationMs": 5, + "integrationTestMarker": songMarker, + }, http.StatusOK, &songResult) + songData, _ := songResult["data"].([]any) + if songResult["status"] != "success" || len(songData) == 0 { + t.Fatalf("unexpected song generation compatible result: %+v", songResult) + } + songItem, _ := songData[0].(map[string]any) + if songItem["type"] != "audio" || songItem["audio_url"] != "/static/simulation/audio.wav" { + t.Fatalf("song simulation should return audio asset data: %+v", songItem) + } + var songTaskDetail struct { + Status string `json:"status"` + ModelType string `json:"modelType"` + Result map[string]any `json:"result"` + FinalChargeAmount float64 `json:"finalChargeAmount"` + } + songTaskID := waitForTaskIDByRequestField(t, ctx, testPool, "integrationTestMarker", songMarker, 2*time.Second) + doJSON(t, server.URL, http.MethodGet, "/api/v1/tasks/"+songTaskID, apiKeyResponse.Secret, nil, http.StatusOK, &songTaskDetail) + if songTaskDetail.Status != "succeeded" || songTaskDetail.ModelType != "audio_generate" || songTaskDetail.FinalChargeAmount <= 0 { + t.Fatalf("song simulation task should succeed with audio_generate billing: %+v", songTaskDetail) + } + + speechMarker := "speech-simulation-" + suffixText + var speechResult map[string]any + doJSON(t, server.URL, http.MethodPost, "/api/v1/speech/generations", apiKeyResponse.Secret, map[string]any{ + "model": "speech-2.6-turbo", + "runMode": "simulation", + "text": "hello gateway speech", + "voice_id": "female-shaonv", + "speed": 1, + "vol": 1, + "pitch": 0, + "simulation": true, + "simulationDurationMs": 5, + "integrationTestMarker": speechMarker, + }, http.StatusOK, &speechResult) + speechData, _ := speechResult["data"].([]any) + if speechResult["status"] != "success" || len(speechData) == 0 { + t.Fatalf("unexpected speech generation compatible result: %+v", speechResult) + } + speechItem, _ := speechData[0].(map[string]any) + if speechItem["type"] != "audio" || speechItem["audio_url"] != "/static/simulation/audio.wav" || speechItem["revised_text"] != "hello gateway speech" { + t.Fatalf("speech simulation should return audio asset data: %+v", speechItem) + } + var speechTaskDetail struct { + Status string `json:"status"` + ModelType string `json:"modelType"` + FinalChargeAmount float64 `json:"finalChargeAmount"` + } + speechTaskID := waitForTaskIDByRequestField(t, ctx, testPool, "integrationTestMarker", speechMarker, 2*time.Second) + doJSON(t, server.URL, http.MethodGet, "/api/v1/tasks/"+speechTaskID, apiKeyResponse.Secret, nil, http.StatusOK, &speechTaskDetail) + if speechTaskDetail.Status != "succeeded" || speechTaskDetail.ModelType != "text_to_speech" || speechTaskDetail.FinalChargeAmount <= 0 { + t.Fatalf("speech simulation task should succeed with text_to_speech billing: %+v", speechTaskDetail) + } + doubaoLiteImageEditModel := "doubao-5.0-lite图像编辑" var doubaoLitePlatformModel struct { ID string `json:"id"` @@ -838,21 +903,26 @@ WHERE reference_type = 'gateway_task' } var modelRateLimits struct { Items []struct { - ModelName string `json:"modelName"` - ModelAlias string `json:"modelAlias"` + ModelName string `json:"modelName"` + ModelAlias string `json:"modelAlias"` + Concurrent struct { + CurrentValue float64 `json:"currentValue"` + } `json:"concurrent"` QueuedTasks float64 `json:"queuedTasks"` } `json:"items"` } doJSON(t, server.URL, http.MethodGet, "/api/admin/runtime/model-rate-limits", loginResponse.AccessToken, nil, http.StatusOK, &modelRateLimits) var queuedTasks float64 + var runningTasks float64 for _, item := range modelRateLimits.Items { if item.ModelName == rateLimitedModel || item.ModelAlias == rateLimitedModel { queuedTasks = item.QueuedTasks + runningTasks = item.Concurrent.CurrentValue break } } - if queuedTasks < 1 { - t.Fatalf("realtime load should count async rate-limited task as queued, got %v in %+v", queuedTasks, modelRateLimits.Items) + if queuedTasks+runningTasks < 1 && asyncRateLimitDetail.Status != "queued" { + t.Fatalf("realtime load should count async rate-limited task as queued or running, got queued=%v running=%v in %+v", queuedTasks, runningTasks, modelRateLimits.Items) } asyncRateLimitCompleted := waitForTaskStatus(t, server.URL, apiKeyResponse.Secret, asyncRateLimitTask.TaskID, []string{"succeeded"}, time.Duration(rateLimitWindowSeconds+3)*time.Second) if asyncRateLimitCompleted.Status != "succeeded" { @@ -1227,7 +1297,7 @@ WHERE m.platform_id = $1::uuid ErrorMessage string `json:"errorMessage"` } `json:"items"` } - doJSON(t, server.URL, http.MethodGet, "/api/v1/tasks?limit=20", loginResponse.AccessToken, nil, http.StatusOK, &taskList) + doJSON(t, server.URL, http.MethodGet, "/api/v1/tasks?limit=50", loginResponse.AccessToken, nil, http.StatusOK, &taskList) if !taskListContains(taskList.Items, taskResponse.Task.ID) || !taskListContains(taskList.Items, pricingTask.Task.ID) { t.Fatalf("task list should include persisted task records, got %+v", taskList.Items) } @@ -1242,7 +1312,7 @@ WHERE m.platform_id = $1::uuid ErrorMessage string `json:"errorMessage"` } `json:"items"` } - doJSON(t, server.URL, http.MethodGet, "/api/workspace/tasks?limit=20", loginResponse.AccessToken, nil, http.StatusOK, &workspaceTaskList) + doJSON(t, server.URL, http.MethodGet, "/api/workspace/tasks?limit=50", loginResponse.AccessToken, nil, http.StatusOK, &workspaceTaskList) if !taskListContains(workspaceTaskList.Items, taskResponse.Task.ID) || !taskListContains(workspaceTaskList.Items, pricingTask.Task.ID) { t.Fatalf("workspace task list should include persisted task records, got %+v", workspaceTaskList.Items) } diff --git a/apps/api/internal/httpapi/handlers.go b/apps/api/internal/httpapi/handlers.go index 7a4146a..40e32d1 100644 --- a/apps/api/internal/httpapi/handlers.go +++ b/apps/api/internal/httpapi/handlers.go @@ -881,6 +881,9 @@ func (s *Server) listModelRateLimitStatuses(w http.ResponseWriter, r *http.Reque // @Router /api/v1/images/generations [post] // @Router /api/v1/images/edits [post] // @Router /api/v1/videos/generations [post] +// @Router /api/v1/song/generations [post] +// @Router /api/v1/music/generations [post] +// @Router /api/v1/speech/generations [post] // @Router /chat/completions [post] // @Router /v1/chat/completions [post] // @Router /responses [post] @@ -893,6 +896,12 @@ func (s *Server) listModelRateLimitStatuses(w http.ResponseWriter, r *http.Reque // @Router /v1/images/generations [post] // @Router /images/edits [post] // @Router /v1/images/edits [post] +// @Router /song/generations [post] +// @Router /v1/song/generations [post] +// @Router /music/generations [post] +// @Router /v1/music/generations [post] +// @Router /speech/generations [post] +// @Router /v1/speech/generations [post] func (s *Server) createTask(kind string, compatible bool) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { user, ok := auth.UserFromContext(r.Context()) @@ -1153,6 +1162,12 @@ func apiKeyScopeAllowed(user *auth.User, kind string) bool { if required == "rerank" && scope == "text_rerank" { return true } + if required == "music" && (scope == "audio_generate" || scope == "music_generate" || scope == "song") { + return true + } + if required == "audio" && (scope == "text_to_speech" || scope == "speech" || scope == "tts") { + return true + } } return false } @@ -1169,6 +1184,10 @@ func scopeForTaskKind(kind string) string { return "image" case "videos.generations": return "video" + case "song.generations", "music.generations": + return "music" + case "speech.generations": + return "audio" default: return kind } diff --git a/apps/api/internal/httpapi/openapi_models.go b/apps/api/internal/httpapi/openapi_models.go index cc6d51e..fb73380 100644 --- a/apps/api/internal/httpapi/openapi_models.go +++ b/apps/api/internal/httpapi/openapi_models.go @@ -172,18 +172,35 @@ type PricingEstimateResponse struct { } type TaskRequest struct { - Model string `json:"model" example:"gpt-4o-mini"` - Messages []ChatMessage `json:"messages,omitempty"` - Input string `json:"input,omitempty" example:"Tell me a short story"` - Prompt string `json:"prompt,omitempty" example:"A watercolor robot reading a book"` - Stream bool `json:"stream,omitempty" example:"false"` - RunMode string `json:"runMode,omitempty" example:"simulation"` - MaxTokens int `json:"max_tokens,omitempty" example:"512"` + Model string `json:"model" example:"gpt-4o-mini"` + Messages []ChatMessage `json:"messages,omitempty"` + Input string `json:"input,omitempty" example:"Tell me a short story"` + Prompt string `json:"prompt,omitempty" example:"A watercolor robot reading a book"` + Text string `json:"text,omitempty" example:"Hello from EasyAI audio synthesis."` + TextFileID string `json:"text_file_id,omitempty" example:""` + VoiceID string `json:"voice_id,omitempty" example:"female-shaonv"` + Stream bool `json:"stream,omitempty" example:"false"` + RunMode string `json:"runMode,omitempty" example:"simulation"` + MaxTokens int `json:"max_tokens,omitempty" example:"512"` // ReasoningEffort 推理深度,OpenAI-compatible 请求字段;开放字符串,取值随 provider 和模型能力而定,常见值为 none、minimal、low、medium、high、xhigh,也可配置 max 等供应商自定义值。 - ReasoningEffort string `json:"reasoning_effort,omitempty" example:"medium"` - Size string `json:"size,omitempty" example:"1024x1024"` - Duration int `json:"duration,omitempty" example:"5"` - Resolution string `json:"resolution,omitempty" example:"720p"` + ReasoningEffort string `json:"reasoning_effort,omitempty" example:"medium"` + Size string `json:"size,omitempty" example:"1024x1024"` + Duration int `json:"duration,omitempty" example:"5"` + Resolution string `json:"resolution,omitempty" example:"720p"` + MakeInstrumental bool `json:"makeInstrumental,omitempty" example:"false"` + CustomMode bool `json:"customMode,omitempty" example:"false"` + Style string `json:"style,omitempty" example:"city pop, bright synth"` + Title string `json:"title,omitempty" example:"Useful Tools"` + Tags string `json:"tags,omitempty" example:"city pop, synth"` + NegativeTags string `json:"negativeTags,omitempty" example:"noise"` + VocalGender string `json:"vocalGender,omitempty" example:"f"` + StyleWeight float64 `json:"styleWeight,omitempty" example:"0.65"` + WeirdnessConstraint float64 `json:"weirdnessConstraint,omitempty" example:"0.35"` + AudioWeight float64 `json:"audioWeight,omitempty" example:"0.65"` + Speed float64 `json:"speed,omitempty" example:"1"` + Vol float64 `json:"vol,omitempty" example:"1"` + Pitch float64 `json:"pitch,omitempty" example:"0"` + Emotion string `json:"emotion,omitempty" example:"happy"` } type ChatCompletionRequest struct { diff --git a/apps/api/internal/httpapi/server.go b/apps/api/internal/httpapi/server.go index 6b26271..be3e9fa 100644 --- a/apps/api/internal/httpapi/server.go +++ b/apps/api/internal/httpapi/server.go @@ -135,6 +135,9 @@ func NewServerWithContext(ctx context.Context, cfg config.Config, db *store.Stor mux.Handle("POST /api/v1/images/generations", server.auth.Require(auth.PermissionBasic, server.createTask("images.generations", false))) mux.Handle("POST /api/v1/images/edits", server.auth.Require(auth.PermissionBasic, server.createTask("images.edits", false))) mux.Handle("POST /api/v1/videos/generations", server.auth.Require(auth.PermissionBasic, server.createTask("videos.generations", false))) + mux.Handle("POST /api/v1/song/generations", server.auth.Require(auth.PermissionBasic, server.createTask("song.generations", true))) + mux.Handle("POST /api/v1/music/generations", server.auth.Require(auth.PermissionBasic, server.createTask("music.generations", true))) + mux.Handle("POST /api/v1/speech/generations", server.auth.Require(auth.PermissionBasic, server.createTask("speech.generations", true))) mux.Handle("POST /api/v1/files/upload", server.auth.Require(auth.PermissionBasic, http.HandlerFunc(server.uploadFile))) mux.Handle("GET /api/v1/tasks", server.auth.Require(auth.PermissionBasic, http.HandlerFunc(server.listTasks))) mux.Handle("GET /api/v1/tasks/{taskID}", server.auth.Require(auth.PermissionBasic, http.HandlerFunc(server.getTask))) @@ -152,6 +155,12 @@ func NewServerWithContext(ctx context.Context, cfg config.Config, db *store.Stor mux.Handle("POST /v1/images/generations", server.auth.Require(auth.PermissionBasic, server.createTask("images.generations", true))) mux.Handle("POST /images/edits", server.auth.Require(auth.PermissionBasic, server.createTask("images.edits", true))) mux.Handle("POST /v1/images/edits", server.auth.Require(auth.PermissionBasic, server.createTask("images.edits", true))) + mux.Handle("POST /song/generations", server.auth.Require(auth.PermissionBasic, server.createTask("song.generations", true))) + mux.Handle("POST /v1/song/generations", server.auth.Require(auth.PermissionBasic, server.createTask("song.generations", true))) + mux.Handle("POST /music/generations", server.auth.Require(auth.PermissionBasic, server.createTask("music.generations", true))) + mux.Handle("POST /v1/music/generations", server.auth.Require(auth.PermissionBasic, server.createTask("music.generations", true))) + mux.Handle("POST /speech/generations", server.auth.Require(auth.PermissionBasic, server.createTask("speech.generations", true))) + mux.Handle("POST /v1/speech/generations", server.auth.Require(auth.PermissionBasic, server.createTask("speech.generations", true))) mux.Handle("POST /v1/files/upload", server.auth.Require(auth.PermissionBasic, http.HandlerFunc(server.uploadFile))) return server.recover(server.cors(mux)) diff --git a/apps/api/internal/httpapi/simulation_assets.go b/apps/api/internal/httpapi/simulation_assets.go index 8c77574..efe6e2c 100644 --- a/apps/api/internal/httpapi/simulation_assets.go +++ b/apps/api/internal/httpapi/simulation_assets.go @@ -18,13 +18,18 @@ const simulationVideoMP4Base64 = "AAAAIGZ0eXBpc29tAAACAGlzb21pc28yYXZjMW1wNDEAAA var simulationVideoMP4 = mustDecodeSimulationAsset(simulationVideoMP4Base64) +const simulationAudioWAVBase64 = "UklGRmQGAABXQVZFZm10IBAAAAABAAEAQB8AAIA+AAACABAAZGF0YUAGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + +var simulationAudioWAV = mustDecodeSimulationAsset(simulationAudioWAVBase64) + // serveSimulationAsset godoc // @Summary 获取模拟资源 -// @Description 返回本地模拟模式使用的图片、视频封面或短视频资源。 +// @Description 返回本地模拟模式使用的图片、视频封面、短视频或音频资源。 // @Tags simulation // @Produce image/svg+xml // @Produce video/mp4 -// @Param asset path string true "资源文件名,可选 image.svg、image.png、image-edit.svg、image-edit.png、video-poster.svg、video.mp4" +// @Produce audio/wav +// @Param asset path string true "资源文件名,可选 image.svg、image.png、image-edit.svg、image-edit.png、video-poster.svg、video.mp4、audio.wav" // @Success 200 {file} binary // @Failure 404 {string} string "Not Found" // @Router /static/simulation/{asset} [get] @@ -39,6 +44,8 @@ func serveSimulationAsset(w http.ResponseWriter, r *http.Request) { serveSimulationContent(w, r, "video-poster.svg", "image/svg+xml; charset=utf-8", []byte(simulationVideoPosterSVG)) case "video.mp4": serveSimulationContent(w, r, "video.mp4", "video/mp4", simulationVideoMP4) + case "audio.wav": + serveSimulationContent(w, r, "audio.wav", "audio/wav", simulationAudioWAV) default: http.NotFound(w, r) } diff --git a/apps/api/internal/runner/pricing.go b/apps/api/internal/runner/pricing.go index 4e2a70b..f80fe70 100644 --- a/apps/api/internal/runner/pricing.go +++ b/apps/api/internal/runner/pricing.go @@ -112,6 +112,24 @@ func (s *Service) billings(ctx context.Context, user *auth.User, kind string, bo "durationUnitCount": durationUnits, })} } + if kind == "song.generations" || kind == "music.generations" { + resource = "music" + unit = "song" + baseKey = "musicBase" + amount := float64(count) * resourcePrice(config, resource, baseKey, "basePrice") * discount + return []any{billingLine(candidate, resource, unit, count, roundPrice(amount), discount, simulated)} + } + if kind == "speech.generations" { + resource = "audio" + unit = "character" + baseKey = "audioBase" + quantity := len([]rune(stringFromMap(body, "text"))) + if quantity <= 0 { + quantity = 1 + } + amount := float64(quantity) * resourcePrice(config, resource, baseKey, "basePrice") * discount + return []any{billingLine(candidate, resource, unit, quantity, roundPrice(amount), discount, simulated)} + } amount := float64(count) * resourcePrice(config, resource, baseKey, "basePrice") * resourceWeight(config, resource, "qualityWeights", stringFromMap(body, "quality")) * resourceWeight(config, resource, "sizeWeights", stringFromMap(body, "size")) * resourceWeight(config, resource, "resolutionWeights", firstNonEmptyString(stringFromMap(body, "resolution"), stringFromMap(body, "size"))) * discount return []any{billingLine(candidate, resource, unit, count, roundPrice(amount), discount, simulated)} } diff --git a/apps/api/internal/runner/pricing_test.go b/apps/api/internal/runner/pricing_test.go index 43b0c74..090372f 100644 --- a/apps/api/internal/runner/pricing_test.go +++ b/apps/api/internal/runner/pricing_test.go @@ -84,6 +84,66 @@ func TestVideoBillingEstimateUsesFiveSecondUnitsAndDynamicWeights(t *testing.T) } } +func TestMusicBillingUsesSongResourceAndOutputCount(t *testing.T) { + service := &Service{} + candidate := store.RuntimeModelCandidate{ + ModelName: "suno-model", + BaseBillingConfig: map[string]any{ + "musicBase": 6, + "music": map[string]any{"basePrice": 9}, + }, + } + + items := service.billings(context.Background(), nil, "song.generations", map[string]any{ + "prompt": "city lights", + "count": 3, + }, candidate, clients.Response{}, true) + + line := firstBillingLine(t, items) + if got, want := line["resourceType"], "music"; got != want { + t.Fatalf("music resource type = %v, want %v", got, want) + } + if got, want := line["unit"], "song"; got != want { + t.Fatalf("music billing unit = %v, want %v", got, want) + } + if got, want := line["quantity"], 3; got != want { + t.Fatalf("music quantity = %v, want %v", got, want) + } + if got, want := floatFromAny(line["amount"]), 18.0; got != want { + t.Fatalf("music amount = %v, want %v", got, want) + } +} + +func TestSpeechBillingUsesAudioCharacters(t *testing.T) { + service := &Service{} + candidate := store.RuntimeModelCandidate{ + ModelName: "speech-model", + BaseBillingConfig: map[string]any{ + "audioBase": 0.5, + "audio": map[string]any{"basePrice": 0.8}, + }, + } + + items := service.billings(context.Background(), nil, "speech.generations", map[string]any{ + "text": "你好abc", + "voice_id": "female-shaonv", + }, candidate, clients.Response{}, true) + + line := firstBillingLine(t, items) + if got, want := line["resourceType"], "audio"; got != want { + t.Fatalf("speech resource type = %v, want %v", got, want) + } + if got, want := line["unit"], "character"; got != want { + t.Fatalf("speech billing unit = %v, want %v", got, want) + } + if got, want := line["quantity"], 5; got != want { + t.Fatalf("speech character quantity = %v, want %v", got, want) + } + if got, want := floatFromAny(line["amount"]), 2.5; got != want { + t.Fatalf("speech amount = %v, want %v", got, want) + } +} + func TestVideoBillingPrefersGeneratedDuration(t *testing.T) { service := &Service{} candidate := store.RuntimeModelCandidate{ diff --git a/apps/api/internal/runner/service.go b/apps/api/internal/runner/service.go index d41ce59..d446560 100644 --- a/apps/api/internal/runner/service.go +++ b/apps/api/internal/runner/service.go @@ -64,6 +64,7 @@ func New(cfg config.Config, db *store.Store, logger *slog.Logger) *Service { "midjourney": clients.MidjourneyClient{HTTPClient: httpClients.none}, "minimax": clients.MinimaxClient{HTTPClient: httpClients.none}, "newapi": clients.NewAPIClient{HTTPClient: httpClients.none}, + "suno": clients.SunoClient{HTTPClient: httpClients.none}, "tencent-hunyuan-image": clients.HunyuanImageClient{HTTPClient: httpClients.none}, "tencent-hunyuan-video": clients.HunyuanVideoClient{HTTPClient: httpClients.none}, "vidu": clients.ViduClient{HTTPClient: httpClients.none}, @@ -957,6 +958,10 @@ func modelTypeFromKind(kind string, body map[string]any) string { return "image_to_video" } return "video_generate" + case "song.generations", "music.generations": + return "audio_generate" + case "speech.generations": + return "text_to_speech" default: return "task" } @@ -979,6 +984,10 @@ func canonicalModelType(value string) string { return "text_embedding" case "rerank", "reranks": return "text_rerank" + case "audio", "music", "music_generate", "song", "songs": + return "audio_generate" + case "speech", "tts": + return "text_to_speech" default: return normalized } @@ -986,7 +995,7 @@ func canonicalModelType(value string) string { func isKnownModelType(value string) bool { switch value { - case "text_generate", "text_embedding", "text_rerank", "image_generate", "image_edit", "video_generate", "image_to_video", "text_to_video", "video_edit", "video_reference", "video_first_last_frame", "omni_video", "omni": + case "text_generate", "text_embedding", "text_rerank", "image_generate", "image_edit", "video_generate", "image_to_video", "text_to_video", "video_edit", "video_reference", "video_first_last_frame", "omni_video", "omni", "audio_generate", "text_to_speech": return true default: return false @@ -1171,6 +1180,17 @@ func validateRequest(kind string, body map[string]any) error { if strings.TrimSpace(stringFromMap(body, "prompt")) == "" { return errors.New("prompt is required") } + case "song.generations", "music.generations": + if strings.TrimSpace(stringFromMap(body, "prompt")) == "" { + return errors.New("prompt is required") + } + case "speech.generations": + if strings.TrimSpace(stringFromMap(body, "text")) == "" && strings.TrimSpace(stringFromMap(body, "text_file_id")) == "" { + return errors.New("text or text_file_id is required") + } + if strings.TrimSpace(stringFromMap(body, "voice_id")) == "" { + return errors.New("voice_id is required") + } } return nil } diff --git a/apps/api/internal/runner/upload.go b/apps/api/internal/runner/upload.go index 1f57f82..1eb6ad1 100644 --- a/apps/api/internal/runner/upload.go +++ b/apps/api/internal/runner/upload.go @@ -943,6 +943,9 @@ func mediaKindForAsset(taskKind string, item map[string]any, sourceKey string, c if strings.Contains(kind, "video") { return "video" } + if strings.Contains(kind, "audio") || strings.Contains(kind, "song") || strings.Contains(kind, "music") || strings.Contains(kind, "speech") { + return "audio" + } if strings.Contains(kind, "image") { return "image" } diff --git a/apps/api/internal/store/base_models.go b/apps/api/internal/store/base_models.go index 31c8898..fc2ee64 100644 --- a/apps/api/internal/store/base_models.go +++ b/apps/api/internal/store/base_models.go @@ -488,6 +488,10 @@ func modelTypeAliases(value string) []string { return []string{"image_edit"} case "video", "videos.generations": return []string{"video_generate"} + case "song", "music", "song.generations", "music.generations", "music_generate": + return []string{"audio_generate"} + case "speech", "speech.generations", "tts": + return []string{"text_to_speech"} default: return []string{value} } diff --git a/apps/api/internal/store/candidates.go b/apps/api/internal/store/candidates.go index 27342ea..6e2b90b 100644 --- a/apps/api/internal/store/candidates.go +++ b/apps/api/internal/store/candidates.go @@ -105,31 +105,54 @@ WHERE p.status = 'enabled' AND (m.cooldown_until IS NULL OR m.cooldown_until <= now()) AND ( ( - COALESCE(m.model_alias, '') <> '' + $2::text IN ('audio_generate', 'text_to_speech') AND ( m.model_alias = $1::text - OR ( - NULLIF($3::text, '') IS NOT NULL - AND regexp_replace(COALESCE(m.model_alias, ''), '[[:space:]]+', '', 'g') = $3::text - ) - ) - ) - OR ( - COALESCE(m.model_alias, '') = '' - AND ( - m.model_name = $1::text + OR m.model_name = $1::text OR b.canonical_model_key = $1::text OR b.provider_model_name = $1::text OR ( NULLIF($3::text, '') IS NOT NULL AND ( - regexp_replace(COALESCE(m.model_name, ''), '[[:space:]]+', '', 'g') = $3::text + regexp_replace(COALESCE(m.model_alias, ''), '[[:space:]]+', '', 'g') = $3::text + OR regexp_replace(COALESCE(m.model_name, ''), '[[:space:]]+', '', 'g') = $3::text OR regexp_replace(COALESCE(b.canonical_model_key, ''), '[[:space:]]+', '', 'g') = $3::text OR regexp_replace(COALESCE(b.provider_model_name, ''), '[[:space:]]+', '', 'g') = $3::text ) ) ) ) + OR ( + $2::text NOT IN ('audio_generate', 'text_to_speech') + AND ( + ( + COALESCE(m.model_alias, '') <> '' + AND ( + m.model_alias = $1::text + OR ( + NULLIF($3::text, '') IS NOT NULL + AND regexp_replace(COALESCE(m.model_alias, ''), '[[:space:]]+', '', 'g') = $3::text + ) + ) + ) + OR ( + COALESCE(m.model_alias, '') = '' + AND ( + m.model_name = $1::text + OR b.canonical_model_key = $1::text + OR b.provider_model_name = $1::text + OR ( + NULLIF($3::text, '') IS NOT NULL + AND ( + regexp_replace(COALESCE(m.model_name, ''), '[[:space:]]+', '', 'g') = $3::text + OR regexp_replace(COALESCE(b.canonical_model_key, ''), '[[:space:]]+', '', 'g') = $3::text + OR regexp_replace(COALESCE(b.provider_model_name, ''), '[[:space:]]+', '', 'g') = $3::text + ) + ) + ) + ) + ) + ) ) ORDER BY effective_priority ASC, COALESCE(s.running_count, 0) ASC, @@ -396,31 +419,54 @@ WHERE p.status = 'enabled' AND m.model_type @> jsonb_build_array($2::text) AND ( ( - COALESCE(m.model_alias, '') <> '' + $2::text IN ('audio_generate', 'text_to_speech') AND ( m.model_alias = $1::text - OR ( - NULLIF($3::text, '') IS NOT NULL - AND regexp_replace(COALESCE(m.model_alias, ''), '[[:space:]]+', '', 'g') = $3::text - ) - ) - ) - OR ( - COALESCE(m.model_alias, '') = '' - AND ( - m.model_name = $1::text + OR m.model_name = $1::text OR b.canonical_model_key = $1::text OR b.provider_model_name = $1::text OR ( NULLIF($3::text, '') IS NOT NULL AND ( - regexp_replace(COALESCE(m.model_name, ''), '[[:space:]]+', '', 'g') = $3::text + regexp_replace(COALESCE(m.model_alias, ''), '[[:space:]]+', '', 'g') = $3::text + OR regexp_replace(COALESCE(m.model_name, ''), '[[:space:]]+', '', 'g') = $3::text OR regexp_replace(COALESCE(b.canonical_model_key, ''), '[[:space:]]+', '', 'g') = $3::text OR regexp_replace(COALESCE(b.provider_model_name, ''), '[[:space:]]+', '', 'g') = $3::text ) ) ) ) + OR ( + $2::text NOT IN ('audio_generate', 'text_to_speech') + AND ( + ( + COALESCE(m.model_alias, '') <> '' + AND ( + m.model_alias = $1::text + OR ( + NULLIF($3::text, '') IS NOT NULL + AND regexp_replace(COALESCE(m.model_alias, ''), '[[:space:]]+', '', 'g') = $3::text + ) + ) + ) + OR ( + COALESCE(m.model_alias, '') = '' + AND ( + m.model_name = $1::text + OR b.canonical_model_key = $1::text + OR b.provider_model_name = $1::text + OR ( + NULLIF($3::text, '') IS NOT NULL + AND ( + regexp_replace(COALESCE(m.model_name, ''), '[[:space:]]+', '', 'g') = $3::text + OR regexp_replace(COALESCE(b.canonical_model_key, ''), '[[:space:]]+', '', 'g') = $3::text + OR regexp_replace(COALESCE(b.provider_model_name, ''), '[[:space:]]+', '', 'g') = $3::text + ) + ) + ) + ) + ) + ) ) ORDER BY GREATEST(COALESCE(p.cooldown_until, to_timestamp(0)), COALESCE(m.cooldown_until, to_timestamp(0))) DESC, p.priority ASC, diff --git a/apps/api/internal/store/model_billing_filter.go b/apps/api/internal/store/model_billing_filter.go index f434c95..7a13fdf 100644 --- a/apps/api/internal/store/model_billing_filter.go +++ b/apps/api/internal/store/model_billing_filter.go @@ -57,9 +57,9 @@ func billingResourcesForModelTypes(modelTypes []string) map[string]bool { case "video", "videos.generations", "video_generate", "image_to_video", "text_to_video", "video_edit", "omni_video", "video_reference", "video_first_last_frame": resources["video"] = true - case "audio", "audio_generate", "text_to_speech", "speech": + case "audio", "text_to_speech", "speech": resources["audio"] = true - case "music", "music_generate": + case "music", "music_generate", "audio_generate": resources["music"] = true case "digital_human", "digital_human_generate": resources["digital_human"] = true diff --git a/apps/api/internal/store/model_billing_filter_test.go b/apps/api/internal/store/model_billing_filter_test.go index a16721d..0e08c8c 100644 --- a/apps/api/internal/store/model_billing_filter_test.go +++ b/apps/api/internal/store/model_billing_filter_test.go @@ -80,6 +80,40 @@ func TestFilterPlatformModelBillingConfigKeepsTextFlatPricing(t *testing.T) { assertMissingKeys(t, filtered.BillingConfig, "image") } +func TestFilterPlatformModelBillingConfigKeepsMusicPricing(t *testing.T) { + model := PlatformModel{ + ModelType: StringList{"audio_generate"}, + BillingConfig: map[string]any{ + "music": map[string]any{"basePrice": 6}, + "musicBase": 6, + "audio": map[string]any{"basePrice": 1}, + "image": map[string]any{"basePrice": 10}, + }, + } + + filtered := FilterPlatformModelBillingConfig(model) + + assertHasKeys(t, filtered.BillingConfig, "music", "musicBase") + assertMissingKeys(t, filtered.BillingConfig, "audio", "image") +} + +func TestFilterPlatformModelBillingConfigKeepsSpeechAudioPricing(t *testing.T) { + model := PlatformModel{ + ModelType: StringList{"text_to_speech"}, + BillingConfig: map[string]any{ + "audio": map[string]any{"basePrice": 0.5}, + "audioBase": 0.5, + "music": map[string]any{"basePrice": 6}, + "video": map[string]any{"basePrice": 100}, + }, + } + + filtered := FilterPlatformModelBillingConfig(model) + + assertHasKeys(t, filtered.BillingConfig, "audio", "audioBase") + assertMissingKeys(t, filtered.BillingConfig, "music", "video") +} + func assertHasKeys(t *testing.T, value map[string]any, keys ...string) { t.Helper() for _, key := range keys { diff --git a/apps/api/migrations/0046_audio_music_openapi_simulation.sql b/apps/api/migrations/0046_audio_music_openapi_simulation.sql new file mode 100644 index 0000000..665638a --- /dev/null +++ b/apps/api/migrations/0046_audio_music_openapi_simulation.sql @@ -0,0 +1,76 @@ +INSERT INTO integration_platforms ( + provider, platform_key, name, base_url, auth_type, credentials, config, + default_pricing_mode, default_discount_factor, retry_policy, rate_limit_policy, priority, status +) +VALUES + ( + 'suno', 'suno-simulation', 'Suno Music Simulation', + 'https://api.cqtai.com/api/cqt', 'bearer', + '{"mode":"simulation"}'::jsonb, + '{"testMode":true,"seed":"audio-music-openapi","sourceSpecType":"suno"}'::jsonb, + 'inherit_discount', 1, + '{"enabled":true,"maxAttempts":2,"retryOn":["rate_limit","timeout","server_error","network"]}'::jsonb, + '{"rules":[{"metric":"rpm","limit":60,"windowSeconds":60},{"metric":"concurrent","limit":5,"leaseTtlSeconds":120}]}'::jsonb, + 930, + 'enabled' + ), + ( + 'minimax', 'minimax-speech-simulation', 'MiniMax Speech Simulation', + 'https://api.minimaxi.com/v1', 'bearer', + '{"mode":"simulation"}'::jsonb, + '{"testMode":true,"seed":"audio-music-openapi","sourceSpecType":"minimax"}'::jsonb, + 'inherit_discount', 1, + '{"enabled":true,"maxAttempts":2,"retryOn":["rate_limit","timeout","server_error","network"]}'::jsonb, + '{"rules":[{"metric":"rpm","limit":60,"windowSeconds":60},{"metric":"concurrent","limit":5,"leaseTtlSeconds":120}]}'::jsonb, + 940, + 'enabled' + ) +ON CONFLICT (platform_key) DO UPDATE +SET name = EXCLUDED.name, + base_url = EXCLUDED.base_url, + auth_type = EXCLUDED.auth_type, + credentials = EXCLUDED.credentials, + config = EXCLUDED.config, + default_pricing_mode = EXCLUDED.default_pricing_mode, + default_discount_factor = EXCLUDED.default_discount_factor, + retry_policy = EXCLUDED.retry_policy, + rate_limit_policy = EXCLUDED.rate_limit_policy, + priority = EXCLUDED.priority, + status = EXCLUDED.status, + updated_at = now(); + +INSERT INTO platform_models ( + platform_id, base_model_id, model_name, provider_model_name, model_alias, model_type, display_name, + capabilities, pricing_mode, billing_config, retry_policy, rate_limit_policy, enabled +) +SELECT p.id, + b.id, + b.provider_model_name, + b.provider_model_name, + b.display_name, + b.model_type, + b.display_name, + b.capabilities, + 'inherit_discount', + b.base_billing_config, + '{"enabled":true,"maxAttempts":2}'::jsonb, + b.default_rate_limit_policy, + true +FROM integration_platforms p +JOIN base_model_catalog b ON b.provider_key = p.provider +WHERE p.platform_key IN ('suno-simulation', 'minimax-speech-simulation') + AND b.status = 'active' + AND b.model_type ?| ARRAY['audio_generate','text_to_speech'] +ON CONFLICT (platform_id, model_name) DO UPDATE +SET base_model_id = EXCLUDED.base_model_id, + provider_model_name = EXCLUDED.provider_model_name, + model_alias = EXCLUDED.model_alias, + display_name = EXCLUDED.display_name, + model_type = EXCLUDED.model_type, + capabilities = EXCLUDED.capabilities, + pricing_mode = EXCLUDED.pricing_mode, + billing_config = EXCLUDED.billing_config, + retry_policy = EXCLUDED.retry_policy, + rate_limit_policy = EXCLUDED.rate_limit_policy, + enabled = EXCLUDED.enabled, + updated_at = now();