迁移音频生成与语音合成到 gateway 并补充 simulation 测试

This commit is contained in:
wangbo 2026-06-07 10:26:57 +08:00
parent 78ab867a9f
commit dc14866210
22 changed files with 2475 additions and 55 deletions

File diff suppressed because it is too large Load Diff

View File

@ -568,12 +568,24 @@ definitions:
type: object
httpapi.TaskRequest:
properties:
audioWeight:
example: 0.65
type: number
customMode:
example: false
type: boolean
duration:
example: 5
type: integer
emotion:
example: happy
type: string
input:
example: Tell me a short story
type: string
makeInstrumental:
example: false
type: boolean
max_tokens:
example: 512
type: integer
@ -584,6 +596,12 @@ definitions:
model:
example: gpt-4o-mini
type: string
negativeTags:
example: noise
type: string
pitch:
example: 0
type: number
prompt:
example: A watercolor robot reading a book
type: string
@ -601,9 +619,42 @@ definitions:
size:
example: 1024x1024
type: string
speed:
example: 1
type: number
stream:
example: false
type: boolean
style:
example: city pop, bright synth
type: string
styleWeight:
example: 0.65
type: number
tags:
example: city pop, synth
type: string
text:
example: Hello from EasyAI audio synthesis.
type: string
text_file_id:
example: ""
type: string
title:
example: Useful Tools
type: string
vocalGender:
example: f
type: string
voice_id:
example: female-shaonv
type: string
vol:
example: 1
type: number
weirdnessConstraint:
example: 0.35
type: number
type: object
httpapi.TenantListResponse:
properties:
@ -1230,6 +1281,8 @@ definitions:
billings:
items: {}
type: array
conversationId:
type: string
createdAt:
type: string
error:
@ -1257,6 +1310,8 @@ definitions:
type: string
modelType:
type: string
newMessageCount:
type: integer
remoteTaskId:
type: string
remoteTaskPayload:
@ -3557,6 +3612,43 @@ paths:
summary: 列出模型限流状态
tags:
- runtime
/api/admin/runtime/model-rate-limits/{platformModelID}/restore:
post:
description: 管理端手动解除平台模型停用、模型冷却、平台冷却或平台禁用状态,使其重新参与路由。
parameters:
- description: 平台模型 ID
in: path
name: platformModelID
required: true
type: string
produces:
- application/json
responses:
"200":
description: OK
schema:
$ref: '#/definitions/store.ModelRateLimitStatus'
"401":
description: Unauthorized
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"403":
description: Forbidden
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"404":
description: Not Found
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"500":
description: Internal Server Error
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
security:
- BearerAuth: []
summary: 恢复平台模型运行状态
tags:
- runtime
/api/admin/runtime/policy-sets:
get:
description: 管理端返回可分配给平台、模型或用户组的运行策略集。
@ -5204,6 +5296,67 @@ paths:
summary: 列出可调用模型
tags:
- playground
/api/v1/music/generations:
post:
consumes:
- application/json
description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果OpenAI-compatible
路径同步返回兼容响应或 SSE 流。
parameters:
- description: true 时异步创建任务并返回 202
in: header
name: X-Async
type: boolean
- description: AI 任务请求,字段随任务类型变化
in: body
name: input
required: true
schema:
$ref: '#/definitions/httpapi.TaskRequest'
produces:
- application/json
responses:
"200":
description: OK
schema:
$ref: '#/definitions/httpapi.CompatibleResponse'
"202":
description: Accepted
schema:
$ref: '#/definitions/httpapi.TaskAcceptedResponse'
"400":
description: Bad Request
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"401":
description: Unauthorized
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"402":
description: Payment Required
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"403":
description: Forbidden
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"404":
description: Not Found
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"429":
description: Too Many Requests
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"502":
description: Bad Gateway
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
security:
- BearerAuth: []
summary: 创建或执行 AI 任务
tags:
- tasks
/api/v1/platforms:
get:
description: 按当前用户可访问模型过滤平台,仅返回启用且存在可访问模型的平台。
@ -5454,6 +5607,128 @@ paths:
summary: 创建或执行 AI 任务
tags:
- tasks
/api/v1/song/generations:
post:
consumes:
- application/json
description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果OpenAI-compatible
路径同步返回兼容响应或 SSE 流。
parameters:
- description: true 时异步创建任务并返回 202
in: header
name: X-Async
type: boolean
- description: AI 任务请求,字段随任务类型变化
in: body
name: input
required: true
schema:
$ref: '#/definitions/httpapi.TaskRequest'
produces:
- application/json
responses:
"200":
description: OK
schema:
$ref: '#/definitions/httpapi.CompatibleResponse'
"202":
description: Accepted
schema:
$ref: '#/definitions/httpapi.TaskAcceptedResponse'
"400":
description: Bad Request
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"401":
description: Unauthorized
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"402":
description: Payment Required
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"403":
description: Forbidden
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"404":
description: Not Found
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"429":
description: Too Many Requests
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"502":
description: Bad Gateway
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
security:
- BearerAuth: []
summary: 创建或执行 AI 任务
tags:
- tasks
/api/v1/speech/generations:
post:
consumes:
- application/json
description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果OpenAI-compatible
路径同步返回兼容响应或 SSE 流。
parameters:
- description: true 时异步创建任务并返回 202
in: header
name: X-Async
type: boolean
- description: AI 任务请求,字段随任务类型变化
in: body
name: input
required: true
schema:
$ref: '#/definitions/httpapi.TaskRequest'
produces:
- application/json
responses:
"200":
description: OK
schema:
$ref: '#/definitions/httpapi.CompatibleResponse'
"202":
description: Accepted
schema:
$ref: '#/definitions/httpapi.TaskAcceptedResponse'
"400":
description: Bad Request
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"401":
description: Unauthorized
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"402":
description: Payment Required
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"403":
description: Forbidden
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"404":
description: Not Found
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"429":
description: Too Many Requests
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"502":
description: Bad Gateway
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
security:
- BearerAuth: []
summary: 创建或执行 AI 任务
tags:
- tasks
/api/v1/tasks:
get:
description: 按当前用户列出任务,支持关键字、模型类型、时间范围和分页过滤。
@ -6165,6 +6440,67 @@ paths:
summary: 创建或执行 AI 任务
tags:
- tasks
/music/generations:
post:
consumes:
- application/json
description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果OpenAI-compatible
路径同步返回兼容响应或 SSE 流。
parameters:
- description: true 时异步创建任务并返回 202
in: header
name: X-Async
type: boolean
- description: AI 任务请求,字段随任务类型变化
in: body
name: input
required: true
schema:
$ref: '#/definitions/httpapi.TaskRequest'
produces:
- application/json
responses:
"200":
description: OK
schema:
$ref: '#/definitions/httpapi.CompatibleResponse'
"202":
description: Accepted
schema:
$ref: '#/definitions/httpapi.TaskAcceptedResponse'
"400":
description: Bad Request
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"401":
description: Unauthorized
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"402":
description: Payment Required
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"403":
description: Forbidden
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"404":
description: Not Found
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"429":
description: Too Many Requests
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"502":
description: Bad Gateway
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
security:
- BearerAuth: []
summary: 创建或执行 AI 任务
tags:
- tasks
/readyz:
get:
description: 检查 Postgres 是否可用;数据库不可用时返回 503。
@ -6304,6 +6640,128 @@ paths:
summary: 创建或执行 AI 任务
tags:
- tasks
/song/generations:
post:
consumes:
- application/json
description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果OpenAI-compatible
路径同步返回兼容响应或 SSE 流。
parameters:
- description: true 时异步创建任务并返回 202
in: header
name: X-Async
type: boolean
- description: AI 任务请求,字段随任务类型变化
in: body
name: input
required: true
schema:
$ref: '#/definitions/httpapi.TaskRequest'
produces:
- application/json
responses:
"200":
description: OK
schema:
$ref: '#/definitions/httpapi.CompatibleResponse'
"202":
description: Accepted
schema:
$ref: '#/definitions/httpapi.TaskAcceptedResponse'
"400":
description: Bad Request
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"401":
description: Unauthorized
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"402":
description: Payment Required
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"403":
description: Forbidden
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"404":
description: Not Found
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"429":
description: Too Many Requests
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"502":
description: Bad Gateway
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
security:
- BearerAuth: []
summary: 创建或执行 AI 任务
tags:
- tasks
/speech/generations:
post:
consumes:
- application/json
description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果OpenAI-compatible
路径同步返回兼容响应或 SSE 流。
parameters:
- description: true 时异步创建任务并返回 202
in: header
name: X-Async
type: boolean
- description: AI 任务请求,字段随任务类型变化
in: body
name: input
required: true
schema:
$ref: '#/definitions/httpapi.TaskRequest'
produces:
- application/json
responses:
"200":
description: OK
schema:
$ref: '#/definitions/httpapi.CompatibleResponse'
"202":
description: Accepted
schema:
$ref: '#/definitions/httpapi.TaskAcceptedResponse'
"400":
description: Bad Request
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"401":
description: Unauthorized
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"402":
description: Payment Required
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"403":
description: Forbidden
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"404":
description: Not Found
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"429":
description: Too Many Requests
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"502":
description: Bad Gateway
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
security:
- BearerAuth: []
summary: 创建或执行 AI 任务
tags:
- tasks
/static/generated/{asset}:
get:
description: 从本地生成资源目录读取图片、视频等任务产物;不存在时返回 404。
@ -6329,9 +6787,9 @@ paths:
- static
/static/simulation/{asset}:
get:
description: 返回本地模拟模式使用的图片、视频封面或短视频资源。
description: 返回本地模拟模式使用的图片、视频封面、短视频或音频资源。
parameters:
- description: 资源文件名,可选 image.svg、image.png、image-edit.svg、image-edit.png、video-poster.svg、video.mp4
- description: 资源文件名,可选 image.svg、image.png、image-edit.svg、image-edit.png、video-poster.svg、video.mp4、audio.wav
in: path
name: asset
required: true
@ -6339,6 +6797,7 @@ paths:
produces:
- image/svg+xml
- video/mp4
- audio/wav
responses:
"200":
description: OK
@ -6662,6 +7121,67 @@ paths:
summary: 创建或执行 AI 任务
tags:
- tasks
/v1/music/generations:
post:
consumes:
- application/json
description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果OpenAI-compatible
路径同步返回兼容响应或 SSE 流。
parameters:
- description: true 时异步创建任务并返回 202
in: header
name: X-Async
type: boolean
- description: AI 任务请求,字段随任务类型变化
in: body
name: input
required: true
schema:
$ref: '#/definitions/httpapi.TaskRequest'
produces:
- application/json
responses:
"200":
description: OK
schema:
$ref: '#/definitions/httpapi.CompatibleResponse'
"202":
description: Accepted
schema:
$ref: '#/definitions/httpapi.TaskAcceptedResponse'
"400":
description: Bad Request
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"401":
description: Unauthorized
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"402":
description: Payment Required
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"403":
description: Forbidden
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"404":
description: Not Found
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"429":
description: Too Many Requests
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"502":
description: Bad Gateway
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
security:
- BearerAuth: []
summary: 创建或执行 AI 任务
tags:
- tasks
/v1/reranks:
post:
consumes:
@ -6784,6 +7304,128 @@ paths:
summary: 创建或执行 AI 任务
tags:
- tasks
/v1/song/generations:
post:
consumes:
- application/json
description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果OpenAI-compatible
路径同步返回兼容响应或 SSE 流。
parameters:
- description: true 时异步创建任务并返回 202
in: header
name: X-Async
type: boolean
- description: AI 任务请求,字段随任务类型变化
in: body
name: input
required: true
schema:
$ref: '#/definitions/httpapi.TaskRequest'
produces:
- application/json
responses:
"200":
description: OK
schema:
$ref: '#/definitions/httpapi.CompatibleResponse'
"202":
description: Accepted
schema:
$ref: '#/definitions/httpapi.TaskAcceptedResponse'
"400":
description: Bad Request
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"401":
description: Unauthorized
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"402":
description: Payment Required
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"403":
description: Forbidden
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"404":
description: Not Found
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"429":
description: Too Many Requests
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"502":
description: Bad Gateway
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
security:
- BearerAuth: []
summary: 创建或执行 AI 任务
tags:
- tasks
/v1/speech/generations:
post:
consumes:
- application/json
description: 网关任务接口按 model 选择平台模型;除 /api/v1/chat/completions 以外的 /api/v1 任务路径返回任务受理结果OpenAI-compatible
路径同步返回兼容响应或 SSE 流。
parameters:
- description: true 时异步创建任务并返回 202
in: header
name: X-Async
type: boolean
- description: AI 任务请求,字段随任务类型变化
in: body
name: input
required: true
schema:
$ref: '#/definitions/httpapi.TaskRequest'
produces:
- application/json
responses:
"200":
description: OK
schema:
$ref: '#/definitions/httpapi.CompatibleResponse'
"202":
description: Accepted
schema:
$ref: '#/definitions/httpapi.TaskAcceptedResponse'
"400":
description: Bad Request
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"401":
description: Unauthorized
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"402":
description: Payment Required
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"403":
description: Forbidden
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"404":
description: Not Found
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"429":
description: Too Many Requests
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
"502":
description: Bad Gateway
schema:
$ref: '#/definitions/httpapi.ErrorEnvelope'
security:
- BearerAuth: []
summary: 创建或执行 AI 任务
tags:
- tasks
schemes:
- http
- https

View File

@ -2,6 +2,7 @@ package clients
import (
"context"
"encoding/base64"
"encoding/json"
"net/http"
"net/http/httptest"
@ -65,6 +66,35 @@ func TestSimulationClientReturnsVideoDemoAssets(t *testing.T) {
}
}
func TestSimulationClientReturnsAudioDemoAssets(t *testing.T) {
response, err := (SimulationClient{}).Run(context.Background(), Request{
Kind: "speech.generations",
ModelType: "text_to_speech",
Model: "speech-2.6-turbo",
Body: map[string]any{
"text": "hello from simulation",
"voice_id": "female-shaonv",
"count": 2,
"simulationDurationMs": 5,
},
Candidate: store.RuntimeModelCandidate{Provider: "simulation"},
})
if err != nil {
t.Fatalf("run simulation audio client: %v", err)
}
data, _ := response.Result["data"].([]any)
if len(data) != 2 || response.Result["status"] != "success" {
t.Fatalf("unexpected simulated audio response: %+v", response.Result)
}
item, _ := data[0].(map[string]any)
if item["type"] != "audio" || item["url"] != "/static/simulation/audio.wav" || item["audio_url"] != "/static/simulation/audio.wav" {
t.Fatalf("unexpected simulated audio item: %+v", item)
}
if item["revised_text"] != "hello from simulation" || item["assetSource"] != "simulation" {
t.Fatalf("unexpected simulated audio metadata: %+v", item)
}
}
func TestSimulationDurationDefaultsByMediaType(t *testing.T) {
imageDuration := simulationDuration(Request{Kind: "images.generations"})
if imageDuration < 10*time.Second || imageDuration > 30*time.Second {
@ -74,12 +104,84 @@ func TestSimulationDurationDefaultsByMediaType(t *testing.T) {
if videoDuration < 2*time.Minute || videoDuration > 3*time.Minute {
t.Fatalf("video simulation duration should default to 2-3m, got %s", videoDuration)
}
audioDuration := simulationDuration(Request{Kind: "speech.generations"})
if audioDuration < 2*time.Second || audioDuration > 6*time.Second {
t.Fatalf("audio simulation duration should default to 2-6s, got %s", audioDuration)
}
textDuration := simulationDuration(Request{Kind: "chat.completions"})
if textDuration < 800*time.Millisecond || textDuration > 2400*time.Millisecond {
t.Fatalf("text simulation duration should keep short defaults, got %s", textDuration)
}
}
func TestMinimaxClientSpeechUsesT2AV2AndNormalizesAudio(t *testing.T) {
var captured map[string]any
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost || r.URL.Path != "/t2a_v2" {
t.Fatalf("unexpected request: %s %s", r.Method, r.URL.String())
}
if got := r.Header.Get("Authorization"); got != "Bearer test-key" {
t.Fatalf("unexpected auth header: %q", got)
}
if err := json.NewDecoder(r.Body).Decode(&captured); err != nil {
t.Fatalf("decode request: %v", err)
}
w.Header().Set("x-request-id", "req-minimax-speech")
_ = json.NewEncoder(w).Encode(map[string]any{
"data": map[string]any{"audio": "68656c6c6f"},
"base_resp": map[string]any{"status_code": 0},
})
}))
defer server.Close()
response, err := (MinimaxClient{HTTPClient: server.Client()}).Run(context.Background(), Request{
Kind: "speech.generations",
Model: "MiniMax Speech 2.6 Turbo",
Body: map[string]any{
"text": "hello",
"voice_id": "female-shaonv",
"speed": 1.2,
"vol": 0.8,
"pitch": -1,
"emotion": "happy",
},
Candidate: store.RuntimeModelCandidate{
Provider: "minimax",
BaseURL: server.URL,
ProviderModelName: "speech-2.6-turbo",
Credentials: map[string]any{"apiKey": "test-key"},
},
})
if err != nil {
t.Fatalf("run minimax speech client: %v", err)
}
if captured["model"] != "speech-2.6-turbo" || captured["text"] != "hello" {
t.Fatalf("unexpected minimax speech payload: %+v", captured)
}
if _, ok := captured["voice_id"]; ok {
t.Fatalf("voice_id should be moved into voice_setting: %+v", captured)
}
voiceSetting, ok := captured["voice_setting"].(map[string]any)
if !ok {
t.Fatalf("missing voice_setting: %+v", captured)
}
if voiceSetting["voice_id"] != "female-shaonv" || voiceSetting["speed"] != 1.2 || voiceSetting["vol"] != 0.8 || voiceSetting["pitch"] != float64(-1) || voiceSetting["emotion"] != "happy" {
t.Fatalf("unexpected voice_setting: %+v", voiceSetting)
}
data, _ := response.Result["data"].([]any)
if len(data) != 1 {
t.Fatalf("unexpected minimax speech response: %+v", response.Result)
}
item, _ := data[0].(map[string]any)
expectedContent := "data:audio/mpeg;base64," + base64.StdEncoding.EncodeToString([]byte("hello"))
if item["type"] != "audio" || item["content"] != expectedContent || item["mime_type"] != "audio/mpeg" {
t.Fatalf("unexpected normalized audio item: %+v", item)
}
if response.RequestID != "req-minimax-speech" {
t.Fatalf("unexpected request id: %q", response.RequestID)
}
}
func TestSimulationDurationCanBeControlledByParams(t *testing.T) {
fixedDuration := simulationDuration(Request{Body: map[string]any{"simulationDurationSeconds": 7}})
if fixedDuration != 7*time.Second {

View File

@ -2,8 +2,11 @@ package clients
import (
"context"
"encoding/base64"
"encoding/hex"
"net/http"
"strings"
"time"
)
type JimengClient struct{ HTTPClient *http.Client }
@ -15,6 +18,7 @@ type MidjourneyClient struct{ HTTPClient *http.Client }
type ViduClient struct{ HTTPClient *http.Client }
type AliyunBailianClient struct{ HTTPClient *http.Client }
type NewAPIClient struct{ HTTPClient *http.Client }
type SunoClient struct{ HTTPClient *http.Client }
func (c JimengClient) Run(ctx context.Context, request Request) (Response, error) {
return providerTaskClient{HTTPClient: c.HTTPClient, Spec: jimengSpec()}.Run(ctx, request)
@ -33,6 +37,9 @@ func (c HunyuanVideoClient) Run(ctx context.Context, request Request) (Response,
}
func (c MinimaxClient) Run(ctx context.Context, request Request) (Response, error) {
if request.Kind == "speech.generations" {
return c.runSpeech(ctx, request)
}
return providerTaskClient{HTTPClient: c.HTTPClient, Spec: minimaxSpec()}.Run(ctx, request)
}
@ -52,6 +59,10 @@ func (c NewAPIClient) Run(ctx context.Context, request Request) (Response, error
return providerTaskClient{HTTPClient: c.HTTPClient, Spec: newAPISpec()}.Run(ctx, request)
}
func (c SunoClient) Run(ctx context.Context, request Request) (Response, error) {
return providerTaskClient{HTTPClient: c.HTTPClient, Spec: sunoSpec()}.Run(ctx, request)
}
func jimengSpec() providerTaskSpec {
return providerTaskSpec{
Name: "jimeng",
@ -149,6 +160,114 @@ func minimaxSpec() providerTaskSpec {
}
}
func (c MinimaxClient) runSpeech(ctx context.Context, request Request) (Response, error) {
startedAt := time.Now()
payload := minimaxSpeechPayload(request)
result, requestID, err := providerPostJSON(ctx, httpClient(request.HTTPClient, c.HTTPClient), providerURL(request.Candidate.BaseURL, "/t2a_v2"), payload, request.Candidate.Credentials, "bearer")
finishedAt := time.Now()
if err != nil {
return Response{}, annotateResponseError(err, requestID, startedAt, finishedAt)
}
audioHex := strings.TrimSpace(stringFromPathValue(valueAtPath(result, "data.audio")))
if audioHex == "" {
message := firstNonEmptyString(valueAtPath(result, "base_resp.status_msg"), valueAtPath(result, "message"), "minimax speech audio is missing")
return Response{}, &ClientError{Code: "invalid_response", Message: message, RequestID: firstNonEmptyString(requestID, requestIDFromResult(result)), ResponseStartedAt: startedAt, ResponseFinishedAt: finishedAt, ResponseDurationMS: responseDurationMS(startedAt, finishedAt), Retryable: false}
}
audioBytes, err := hex.DecodeString(audioHex)
if err != nil {
return Response{}, &ClientError{Code: "invalid_response", Message: "minimax speech audio hex is invalid: " + err.Error(), RequestID: firstNonEmptyString(requestID, requestIDFromResult(result)), ResponseStartedAt: startedAt, ResponseFinishedAt: finishedAt, ResponseDurationMS: responseDurationMS(startedAt, finishedAt), Retryable: false}
}
normalized := cloneMapAny(result)
normalized["status"] = "success"
normalized["created"] = time.Now().UnixMilli()
normalized["model"] = request.Model
normalized["raw_data"] = cloneMapAny(result)
normalized["data"] = []any{map[string]any{
"type": "audio",
"content": "data:audio/mpeg;base64," + base64.StdEncoding.EncodeToString(audioBytes),
"mime_type": "audio/mpeg",
"uploaded": false,
}}
return Response{
Result: normalized,
RequestID: firstNonEmptyString(requestID, requestIDFromResult(result)),
Progress: providerProgress(request),
ResponseStartedAt: startedAt,
ResponseFinishedAt: finishedAt,
ResponseDurationMS: responseDurationMS(startedAt, finishedAt),
}, nil
}
func minimaxSpeechPayload(request Request) map[string]any {
body := cloneBody(request.Body)
body["model"] = upstreamModelName(request.Candidate)
voiceID := firstNonEmptyString(body["voice_id"], body["voiceId"])
speed := firstPresent(body["speed"], float64(1))
vol := firstPresent(body["vol"], body["volume"], float64(1))
pitch := firstPresent(body["pitch"], float64(0))
voiceSetting := map[string]any{
"voice_id": voiceID,
"speed": speed,
"vol": vol,
"pitch": pitch,
}
if emotion := firstNonEmptyString(body["emotion"]); emotion != "" {
voiceSetting["emotion"] = emotion
}
delete(body, "voice_id")
delete(body, "voiceId")
delete(body, "speed")
delete(body, "vol")
delete(body, "volume")
delete(body, "pitch")
delete(body, "emotion")
body["voice_setting"] = voiceSetting
return body
}
func sunoSpec() providerTaskSpec {
return providerTaskSpec{
Name: "suno",
SubmitPath: func(Request, map[string]any) string { return "/generator/suno" },
PollPath: func(_ Request, upstreamTaskID string, _ map[string]any) string {
return "/v2/sunoinfo?id=" + upstreamTaskID
},
Auth: "bearer",
TaskIDPaths: []string{"data"},
StatusPaths: []string{"data.status"},
SuccessStatuses: []string{"succeeded", "complete", "completed"},
FailureStatuses: []string{"failed"},
DefaultSubmitBody: func(request Request, body map[string]any) map[string]any {
body["task"] = "create"
body["model"] = sunoMappedModel(upstreamModelName(request.Candidate))
if body["customMode"] == nil {
body["customMode"] = false
}
if body["makeInstrumental"] == nil {
body["makeInstrumental"] = false
}
return body
},
}
}
func sunoMappedModel(model string) string {
switch strings.TrimSpace(model) {
case "chirp-v3-0", "chirp-v3-5":
return "v40"
case "chirp-v4-0":
return "v40"
case "chirp-v4-5":
return "v45"
case "chirp-v4-5+":
return "v45+"
case "chirp-v5-0":
return "v50"
default:
return model
}
}
func midjourneySpec() providerTaskSpec {
return providerTaskSpec{
Name: "midjourney",

View File

@ -29,7 +29,7 @@ type providerTaskClient struct {
}
func (c providerTaskClient) Run(ctx context.Context, request Request) (Response, error) {
if request.Kind != "images.generations" && request.Kind != "images.edits" && request.Kind != "videos.generations" {
if !providerTaskKindSupported(request.Kind) {
return Response{}, &ClientError{Code: "unsupported_kind", Message: "unsupported " + c.Spec.Name + " request kind", Retryable: false}
}
startedAt := time.Now()
@ -119,6 +119,15 @@ func (c providerTaskClient) Run(ctx context.Context, request Request) (Response,
}
}
func providerTaskKindSupported(kind string) bool {
switch kind {
case "images.generations", "images.edits", "videos.generations", "song.generations", "music.generations", "speech.generations":
return true
default:
return false
}
}
func (c providerTaskClient) submit(ctx context.Context, request Request, payload map[string]any) (map[string]any, string, error) {
path := c.Spec.SubmitPath(request, payload)
return providerPostJSON(ctx, httpClient(request.HTTPClient, c.HTTPClient), providerURL(request.Candidate.BaseURL, path), payload, request.Candidate.Credentials, c.Spec.Auth)
@ -287,7 +296,7 @@ func containsStatus(values []string, status string) bool {
}
func hasProviderTaskResult(result map[string]any) bool {
return result["data"] != nil || valueAtPath(result, "output.image_urls") != nil || valueAtPath(result, "output.video_url") != nil || valueAtPath(result, "Response.ResultVideoUrl") != nil || valueAtPath(result, "Response.ResultImages") != nil || result["urls"] != nil
return result["data"] != nil || valueAtPath(result, "data.result") != nil || valueAtPath(result, "data.audio") != nil || valueAtPath(result, "output.image_urls") != nil || valueAtPath(result, "output.video_url") != nil || valueAtPath(result, "Response.ResultVideoUrl") != nil || valueAtPath(result, "Response.ResultImages") != nil || result["audio_url"] != nil || result["urls"] != nil
}
func normalizeProviderTaskResult(request Request, spec providerTaskSpec, result map[string]any, upstreamTaskID string) map[string]any {
@ -316,9 +325,19 @@ func providerTaskData(request Request, result map[string]any) []any {
if request.Kind == "videos.generations" || strings.Contains(request.ModelType, "video") {
fileType = "video"
}
if request.Kind == "song.generations" || request.Kind == "music.generations" || request.Kind == "speech.generations" || strings.Contains(request.ModelType, "audio") || strings.Contains(request.ModelType, "speech") {
fileType = "audio"
}
urlValues := []any{}
for _, path := range []string{
"urls",
"audio_url",
"audioUrl",
"data.audio_url",
"data.audioUrl",
"data.result",
"data.result.audio_url",
"data.result.audioUrl",
"image_urls",
"data.image_urls",
"data.images",
@ -368,7 +387,7 @@ func appendURLValues(out *[]any, value any) {
*out = append(*out, item)
}
case map[string]any:
for _, key := range []string{"url", "image_url", "imageUrl", "video_url", "videoUrl", "content", "output"} {
for _, key := range []string{"url", "audio_url", "audioUrl", "image_url", "imageUrl", "video_url", "videoUrl", "content", "output"} {
if item := strings.TrimSpace(fmt.Sprint(typed[key])); item != "" && item != "<nil>" {
*out = append(*out, item)
return

View File

@ -2,6 +2,7 @@ package clients
import (
"context"
"encoding/json"
"errors"
"net/http"
"net/http/httptest"
@ -227,6 +228,80 @@ func TestProviderTaskClientsSubmitAndPoll(t *testing.T) {
}
}
func TestSunoClientSubmitsAndPollsAudioGeneration(t *testing.T) {
var submitted map[string]any
var submittedRemoteTaskID string
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if got := r.Header.Get("Authorization"); got != "Bearer test-key" {
t.Fatalf("unexpected auth header: %q", got)
}
w.Header().Set("Content-Type", "application/json")
w.Header().Set("x-request-id", "req-suno")
switch {
case r.Method == http.MethodPost && r.URL.Path == "/generator/suno":
if err := json.NewDecoder(r.Body).Decode(&submitted); err != nil {
t.Fatalf("decode suno submit request: %v", err)
}
_, _ = w.Write([]byte(`{"code":200,"data":"suno-task"}`))
case r.Method == http.MethodGet && r.URL.Path == "/v2/sunoinfo" && r.URL.Query().Get("id") == "suno-task":
_, _ = w.Write([]byte(`{"code":200,"data":{"status":"succeeded","result":[{"audio_url":"https://cdn.example/song.mp3"}]}}`))
default:
t.Fatalf("unexpected request: %s %s", r.Method, r.URL.String())
}
}))
defer server.Close()
response, err := (SunoClient{HTTPClient: server.Client()}).Run(context.Background(), Request{
Kind: "song.generations",
ModelType: "audio_generate",
Model: "Suno V5",
Body: map[string]any{
"prompt": "city lights",
"tags": "pop",
"negativeTags": "noise",
},
Candidate: store.RuntimeModelCandidate{
Provider: "suno",
SpecType: "suno",
BaseURL: server.URL,
Credentials: map[string]any{"apiKey": "test-key"},
PlatformConfig: map[string]any{"pollIntervalMs": 1, "pollTimeoutMs": 1000},
ProviderModelName: "chirp-v5-0",
ModelType: "audio_generate",
},
OnRemoteTaskSubmitted: func(remoteTaskID string, payload map[string]any) error {
submittedRemoteTaskID = remoteTaskID
if payload["payload"] == nil || payload["submit"] == nil {
t.Fatalf("missing remote payload: %#v", payload)
}
return nil
},
})
if err != nil {
t.Fatalf("run suno client: %v", err)
}
if submittedRemoteTaskID != "suno-task" {
t.Fatalf("unexpected remote task id: %q", submittedRemoteTaskID)
}
if submitted["task"] != "create" || submitted["model"] != "v50" || submitted["prompt"] != "city lights" {
t.Fatalf("unexpected suno submit payload: %+v", submitted)
}
if submitted["customMode"] != false || submitted["makeInstrumental"] != false {
t.Fatalf("suno defaults should match main-server style payload: %+v", submitted)
}
data, _ := response.Result["data"].([]any)
if len(data) != 1 {
t.Fatalf("unexpected suno response: %+v", response.Result)
}
first, _ := data[0].(map[string]any)
if first["type"] != "audio" || first["url"] != "https://cdn.example/song.mp3" {
t.Fatalf("unexpected suno normalized audio item: %+v", first)
}
if response.RequestID != "req-suno" {
t.Fatalf("unexpected request id: %q", response.RequestID)
}
}
func TestProviderTaskClientFailureAndRetryableErrors(t *testing.T) {
t.Run("poll failure", func(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {

View File

@ -15,6 +15,8 @@ const (
defaultSimulationTextMaxDuration = 2400 * time.Millisecond
defaultSimulationImageMinDuration = 10 * time.Second
defaultSimulationImageMaxDuration = 30 * time.Second
defaultSimulationAudioMinDuration = 2 * time.Second
defaultSimulationAudioMaxDuration = 6 * time.Second
defaultSimulationVideoMinDuration = 2 * time.Minute
defaultSimulationVideoMaxDuration = 3 * time.Minute
maxSimulationDuration = 10 * time.Minute
@ -156,6 +158,24 @@ func simulatedResult(request Request) map[string]any {
"model": request.Model,
"data": simulatedVideoData(request),
}
case "song.generations", "music.generations":
return map[string]any{
"id": "song-simulated",
"created": nowUnix(),
"model": request.Model,
"status": "success",
"data": simulatedAudioData(request, "simulation music"),
"message": "simulation music generated",
}
case "speech.generations":
return map[string]any{
"id": "speech-simulated",
"created": nowUnix(),
"model": request.Model,
"status": "success",
"data": simulatedAudioData(request, "simulation speech"),
"message": "simulation speech generated",
}
default:
modelType := strings.ToLower(request.ModelType)
kind := strings.ToLower(request.Kind)
@ -167,6 +187,15 @@ func simulatedResult(request Request) map[string]any {
"data": simulatedVideoData(request),
}
}
if strings.Contains(modelType, "audio") || strings.Contains(modelType, "speech") || strings.Contains(kind, "audio") || strings.Contains(kind, "song") || strings.Contains(kind, "music") || strings.Contains(kind, "speech") {
return map[string]any{
"id": "audio-simulated",
"created": nowUnix(),
"model": request.Model,
"status": "success",
"data": simulatedAudioData(request, "simulation audio"),
}
}
return map[string]any{
"id": "img-simulated",
"created": nowUnix(),
@ -307,6 +336,24 @@ func simulatedVideoData(request Request) []any {
return items
}
func simulatedAudioData(request Request, fallbackPrompt string) []any {
count := simulatedOutputCount(request.Body)
items := make([]any, 0, count)
for index := 0; index < count; index += 1 {
items = append(items, map[string]any{
"type": "audio",
"url": "/static/simulation/audio.wav",
"audio_url": "/static/simulation/audio.wav",
"duration": simulatedAudioDurationSeconds(request),
"assetSource": "simulation",
"index": index,
"prompt": firstNonEmptyPrompt(request.Body, fallbackPrompt),
"revised_text": firstNonEmptyString(stringValue(request.Body, "text"), firstNonEmptyPrompt(request.Body, fallbackPrompt)),
})
}
return items
}
func simulatedUsage(request Request) Usage {
if request.ModelType == "chat" || request.ModelType == "text_generate" || request.Kind == "responses" {
return Usage{InputTokens: 12, OutputTokens: 8, TotalTokens: 20}
@ -368,6 +415,9 @@ func defaultSimulationDurationRange(request Request) (time.Duration, time.Durati
if simulationImageRequest(request) {
return defaultSimulationImageMinDuration, defaultSimulationImageMaxDuration
}
if simulationAudioRequest(request) {
return defaultSimulationAudioMinDuration, defaultSimulationAudioMaxDuration
}
return defaultSimulationTextMinDuration, defaultSimulationTextMaxDuration
}
@ -383,6 +433,12 @@ func simulationImageRequest(request Request) bool {
return strings.Contains(kind, "image") || strings.Contains(modelType, "image")
}
func simulationAudioRequest(request Request) bool {
kind := strings.ToLower(request.Kind)
modelType := strings.ToLower(request.ModelType)
return strings.Contains(kind, "audio") || strings.Contains(kind, "song") || strings.Contains(kind, "music") || strings.Contains(kind, "speech") || strings.Contains(modelType, "audio") || strings.Contains(modelType, "speech")
}
func simulationDurationSeconds(request Request, keys ...string) int {
for _, source := range []map[string]any{request.Body, request.Candidate.PlatformConfig, request.Candidate.Credentials} {
for _, key := range keys {
@ -440,6 +496,16 @@ func simulatedVideoDurationSeconds(request Request) int {
return 5
}
func simulatedAudioDurationSeconds(request Request) int {
if duration := intValue(request.Body, "duration", 0); duration > 0 {
return duration
}
if seconds := len([]rune(stringValue(request.Body, "text"))) / 8; seconds > 0 {
return seconds
}
return 3
}
func firstNonEmptyPrompt(body map[string]any, fallback string) string {
for _, key := range []string{"prompt", "input"} {
if value := strings.TrimSpace(stringValue(body, key)); value != "" {

View File

@ -72,6 +72,50 @@ func TestPlanTaskResponseKeepsAsyncTaskModeForOtherAPIV1Tasks(t *testing.T) {
}
}
func TestPlanTaskResponseKeepsCompatibleSyncForAudioOpenAPIUnlessAsync(t *testing.T) {
for _, item := range []struct {
kind string
path string
}{
{kind: "song.generations", path: "/api/v1/song/generations"},
{kind: "music.generations", path: "/api/v1/music/generations"},
{kind: "speech.generations", path: "/api/v1/speech/generations"},
} {
t.Run(item.kind, func(t *testing.T) {
req := httptest.NewRequest(http.MethodPost, item.path, nil)
plan := planTaskResponse(item.kind, true, map[string]any{"stream": true}, req)
if plan.asyncMode {
t.Fatalf("%s should default to synchronous compatible response", item.path)
}
if !plan.compatibleMode {
t.Fatalf("%s should return compatible response payloads", item.path)
}
if plan.streamMode {
t.Fatal("audio OpenAPI endpoints should stay JSON-only even when stream=true is present")
}
asyncReq := httptest.NewRequest(http.MethodPost, item.path, nil)
asyncReq.Header.Set("X-Async", "true")
asyncPlan := planTaskResponse(item.kind, true, map[string]any{}, asyncReq)
if !asyncPlan.asyncMode || !asyncPlan.compatibleMode {
t.Fatalf("%s should support X-Async while keeping compatible mode, got %+v", item.path, asyncPlan)
}
})
}
}
func TestAPIKeyScopeAllowedRecognizesAudioAndMusicAliases(t *testing.T) {
if !apiKeyScopeAllowed(&auth.User{APIKeyID: "key", APIKeyScopes: []string{"audio_generate"}}, "song.generations") {
t.Fatal("audio_generate scope should allow song generations")
}
if !apiKeyScopeAllowed(&auth.User{APIKeyID: "key", APIKeyScopes: []string{"text_to_speech"}}, "speech.generations") {
t.Fatal("text_to_speech scope should allow speech generations")
}
if apiKeyScopeAllowed(&auth.User{APIKeyID: "key", APIKeyScopes: []string{"image"}}, "speech.generations") {
t.Fatal("image scope should not allow speech generations")
}
}
func TestWriteCompatibleTaskResponseReturnsJSONWhenStreamIsFalse(t *testing.T) {
executor := &fakeTaskExecutor{output: map[string]any{"id": "chatcmpl-test", "object": "chat.completion"}}
req := httptest.NewRequest(http.MethodPost, "/api/v1/chat/completions", nil)

View File

@ -106,7 +106,7 @@ func TestCoreLocalFlow(t *testing.T) {
}
doJSON(t, server.URL, http.MethodPost, "/api/v1/api-keys", loginResponse.AccessToken, map[string]any{
"name": "smoke key",
"scopes": []string{"chat", "image", "video"},
"scopes": []string{"chat", "image", "video", "music", "audio"},
}, http.StatusCreated, &apiKeyResponse)
if !strings.HasPrefix(apiKeyResponse.Secret, "sk-gw-") || apiKeyResponse.APIKey.Status != "active" {
t.Fatalf("unexpected api key response: %+v", apiKeyResponse)
@ -444,6 +444,71 @@ VALUES ($1, 5, '{"purpose":"core-flow"}'::jsonb)`, inviteCode); err != nil {
t.Fatalf("unexpected image edit task: %+v", imageEditResponse.Task)
}
songMarker := "song-simulation-" + suffixText
var songResult map[string]any
doJSON(t, server.URL, http.MethodPost, "/api/v1/song/generations", apiKeyResponse.Secret, map[string]any{
"model": "chirp-v5-0",
"runMode": "simulation",
"prompt": "city lights and soft drums",
"tags": "pop, synth",
"negativeTags": "noise",
"simulation": true,
"simulationDurationMs": 5,
"integrationTestMarker": songMarker,
}, http.StatusOK, &songResult)
songData, _ := songResult["data"].([]any)
if songResult["status"] != "success" || len(songData) == 0 {
t.Fatalf("unexpected song generation compatible result: %+v", songResult)
}
songItem, _ := songData[0].(map[string]any)
if songItem["type"] != "audio" || songItem["audio_url"] != "/static/simulation/audio.wav" {
t.Fatalf("song simulation should return audio asset data: %+v", songItem)
}
var songTaskDetail struct {
Status string `json:"status"`
ModelType string `json:"modelType"`
Result map[string]any `json:"result"`
FinalChargeAmount float64 `json:"finalChargeAmount"`
}
songTaskID := waitForTaskIDByRequestField(t, ctx, testPool, "integrationTestMarker", songMarker, 2*time.Second)
doJSON(t, server.URL, http.MethodGet, "/api/v1/tasks/"+songTaskID, apiKeyResponse.Secret, nil, http.StatusOK, &songTaskDetail)
if songTaskDetail.Status != "succeeded" || songTaskDetail.ModelType != "audio_generate" || songTaskDetail.FinalChargeAmount <= 0 {
t.Fatalf("song simulation task should succeed with audio_generate billing: %+v", songTaskDetail)
}
speechMarker := "speech-simulation-" + suffixText
var speechResult map[string]any
doJSON(t, server.URL, http.MethodPost, "/api/v1/speech/generations", apiKeyResponse.Secret, map[string]any{
"model": "speech-2.6-turbo",
"runMode": "simulation",
"text": "hello gateway speech",
"voice_id": "female-shaonv",
"speed": 1,
"vol": 1,
"pitch": 0,
"simulation": true,
"simulationDurationMs": 5,
"integrationTestMarker": speechMarker,
}, http.StatusOK, &speechResult)
speechData, _ := speechResult["data"].([]any)
if speechResult["status"] != "success" || len(speechData) == 0 {
t.Fatalf("unexpected speech generation compatible result: %+v", speechResult)
}
speechItem, _ := speechData[0].(map[string]any)
if speechItem["type"] != "audio" || speechItem["audio_url"] != "/static/simulation/audio.wav" || speechItem["revised_text"] != "hello gateway speech" {
t.Fatalf("speech simulation should return audio asset data: %+v", speechItem)
}
var speechTaskDetail struct {
Status string `json:"status"`
ModelType string `json:"modelType"`
FinalChargeAmount float64 `json:"finalChargeAmount"`
}
speechTaskID := waitForTaskIDByRequestField(t, ctx, testPool, "integrationTestMarker", speechMarker, 2*time.Second)
doJSON(t, server.URL, http.MethodGet, "/api/v1/tasks/"+speechTaskID, apiKeyResponse.Secret, nil, http.StatusOK, &speechTaskDetail)
if speechTaskDetail.Status != "succeeded" || speechTaskDetail.ModelType != "text_to_speech" || speechTaskDetail.FinalChargeAmount <= 0 {
t.Fatalf("speech simulation task should succeed with text_to_speech billing: %+v", speechTaskDetail)
}
doubaoLiteImageEditModel := "doubao-5.0-lite图像编辑"
var doubaoLitePlatformModel struct {
ID string `json:"id"`
@ -838,21 +903,26 @@ WHERE reference_type = 'gateway_task'
}
var modelRateLimits struct {
Items []struct {
ModelName string `json:"modelName"`
ModelAlias string `json:"modelAlias"`
ModelName string `json:"modelName"`
ModelAlias string `json:"modelAlias"`
Concurrent struct {
CurrentValue float64 `json:"currentValue"`
} `json:"concurrent"`
QueuedTasks float64 `json:"queuedTasks"`
} `json:"items"`
}
doJSON(t, server.URL, http.MethodGet, "/api/admin/runtime/model-rate-limits", loginResponse.AccessToken, nil, http.StatusOK, &modelRateLimits)
var queuedTasks float64
var runningTasks float64
for _, item := range modelRateLimits.Items {
if item.ModelName == rateLimitedModel || item.ModelAlias == rateLimitedModel {
queuedTasks = item.QueuedTasks
runningTasks = item.Concurrent.CurrentValue
break
}
}
if queuedTasks < 1 {
t.Fatalf("realtime load should count async rate-limited task as queued, got %v in %+v", queuedTasks, modelRateLimits.Items)
if queuedTasks+runningTasks < 1 && asyncRateLimitDetail.Status != "queued" {
t.Fatalf("realtime load should count async rate-limited task as queued or running, got queued=%v running=%v in %+v", queuedTasks, runningTasks, modelRateLimits.Items)
}
asyncRateLimitCompleted := waitForTaskStatus(t, server.URL, apiKeyResponse.Secret, asyncRateLimitTask.TaskID, []string{"succeeded"}, time.Duration(rateLimitWindowSeconds+3)*time.Second)
if asyncRateLimitCompleted.Status != "succeeded" {
@ -1227,7 +1297,7 @@ WHERE m.platform_id = $1::uuid
ErrorMessage string `json:"errorMessage"`
} `json:"items"`
}
doJSON(t, server.URL, http.MethodGet, "/api/v1/tasks?limit=20", loginResponse.AccessToken, nil, http.StatusOK, &taskList)
doJSON(t, server.URL, http.MethodGet, "/api/v1/tasks?limit=50", loginResponse.AccessToken, nil, http.StatusOK, &taskList)
if !taskListContains(taskList.Items, taskResponse.Task.ID) || !taskListContains(taskList.Items, pricingTask.Task.ID) {
t.Fatalf("task list should include persisted task records, got %+v", taskList.Items)
}
@ -1242,7 +1312,7 @@ WHERE m.platform_id = $1::uuid
ErrorMessage string `json:"errorMessage"`
} `json:"items"`
}
doJSON(t, server.URL, http.MethodGet, "/api/workspace/tasks?limit=20", loginResponse.AccessToken, nil, http.StatusOK, &workspaceTaskList)
doJSON(t, server.URL, http.MethodGet, "/api/workspace/tasks?limit=50", loginResponse.AccessToken, nil, http.StatusOK, &workspaceTaskList)
if !taskListContains(workspaceTaskList.Items, taskResponse.Task.ID) || !taskListContains(workspaceTaskList.Items, pricingTask.Task.ID) {
t.Fatalf("workspace task list should include persisted task records, got %+v", workspaceTaskList.Items)
}

View File

@ -881,6 +881,9 @@ func (s *Server) listModelRateLimitStatuses(w http.ResponseWriter, r *http.Reque
// @Router /api/v1/images/generations [post]
// @Router /api/v1/images/edits [post]
// @Router /api/v1/videos/generations [post]
// @Router /api/v1/song/generations [post]
// @Router /api/v1/music/generations [post]
// @Router /api/v1/speech/generations [post]
// @Router /chat/completions [post]
// @Router /v1/chat/completions [post]
// @Router /responses [post]
@ -893,6 +896,12 @@ func (s *Server) listModelRateLimitStatuses(w http.ResponseWriter, r *http.Reque
// @Router /v1/images/generations [post]
// @Router /images/edits [post]
// @Router /v1/images/edits [post]
// @Router /song/generations [post]
// @Router /v1/song/generations [post]
// @Router /music/generations [post]
// @Router /v1/music/generations [post]
// @Router /speech/generations [post]
// @Router /v1/speech/generations [post]
func (s *Server) createTask(kind string, compatible bool) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
user, ok := auth.UserFromContext(r.Context())
@ -1153,6 +1162,12 @@ func apiKeyScopeAllowed(user *auth.User, kind string) bool {
if required == "rerank" && scope == "text_rerank" {
return true
}
if required == "music" && (scope == "audio_generate" || scope == "music_generate" || scope == "song") {
return true
}
if required == "audio" && (scope == "text_to_speech" || scope == "speech" || scope == "tts") {
return true
}
}
return false
}
@ -1169,6 +1184,10 @@ func scopeForTaskKind(kind string) string {
return "image"
case "videos.generations":
return "video"
case "song.generations", "music.generations":
return "music"
case "speech.generations":
return "audio"
default:
return kind
}

View File

@ -172,18 +172,35 @@ type PricingEstimateResponse struct {
}
type TaskRequest struct {
Model string `json:"model" example:"gpt-4o-mini"`
Messages []ChatMessage `json:"messages,omitempty"`
Input string `json:"input,omitempty" example:"Tell me a short story"`
Prompt string `json:"prompt,omitempty" example:"A watercolor robot reading a book"`
Stream bool `json:"stream,omitempty" example:"false"`
RunMode string `json:"runMode,omitempty" example:"simulation"`
MaxTokens int `json:"max_tokens,omitempty" example:"512"`
Model string `json:"model" example:"gpt-4o-mini"`
Messages []ChatMessage `json:"messages,omitempty"`
Input string `json:"input,omitempty" example:"Tell me a short story"`
Prompt string `json:"prompt,omitempty" example:"A watercolor robot reading a book"`
Text string `json:"text,omitempty" example:"Hello from EasyAI audio synthesis."`
TextFileID string `json:"text_file_id,omitempty" example:""`
VoiceID string `json:"voice_id,omitempty" example:"female-shaonv"`
Stream bool `json:"stream,omitempty" example:"false"`
RunMode string `json:"runMode,omitempty" example:"simulation"`
MaxTokens int `json:"max_tokens,omitempty" example:"512"`
// ReasoningEffort 推理深度OpenAI-compatible 请求字段;开放字符串,取值随 provider 和模型能力而定,常见值为 none、minimal、low、medium、high、xhigh也可配置 max 等供应商自定义值。
ReasoningEffort string `json:"reasoning_effort,omitempty" example:"medium"`
Size string `json:"size,omitempty" example:"1024x1024"`
Duration int `json:"duration,omitempty" example:"5"`
Resolution string `json:"resolution,omitempty" example:"720p"`
ReasoningEffort string `json:"reasoning_effort,omitempty" example:"medium"`
Size string `json:"size,omitempty" example:"1024x1024"`
Duration int `json:"duration,omitempty" example:"5"`
Resolution string `json:"resolution,omitempty" example:"720p"`
MakeInstrumental bool `json:"makeInstrumental,omitempty" example:"false"`
CustomMode bool `json:"customMode,omitempty" example:"false"`
Style string `json:"style,omitempty" example:"city pop, bright synth"`
Title string `json:"title,omitempty" example:"Useful Tools"`
Tags string `json:"tags,omitempty" example:"city pop, synth"`
NegativeTags string `json:"negativeTags,omitempty" example:"noise"`
VocalGender string `json:"vocalGender,omitempty" example:"f"`
StyleWeight float64 `json:"styleWeight,omitempty" example:"0.65"`
WeirdnessConstraint float64 `json:"weirdnessConstraint,omitempty" example:"0.35"`
AudioWeight float64 `json:"audioWeight,omitempty" example:"0.65"`
Speed float64 `json:"speed,omitempty" example:"1"`
Vol float64 `json:"vol,omitempty" example:"1"`
Pitch float64 `json:"pitch,omitempty" example:"0"`
Emotion string `json:"emotion,omitempty" example:"happy"`
}
type ChatCompletionRequest struct {

View File

@ -135,6 +135,9 @@ func NewServerWithContext(ctx context.Context, cfg config.Config, db *store.Stor
mux.Handle("POST /api/v1/images/generations", server.auth.Require(auth.PermissionBasic, server.createTask("images.generations", false)))
mux.Handle("POST /api/v1/images/edits", server.auth.Require(auth.PermissionBasic, server.createTask("images.edits", false)))
mux.Handle("POST /api/v1/videos/generations", server.auth.Require(auth.PermissionBasic, server.createTask("videos.generations", false)))
mux.Handle("POST /api/v1/song/generations", server.auth.Require(auth.PermissionBasic, server.createTask("song.generations", true)))
mux.Handle("POST /api/v1/music/generations", server.auth.Require(auth.PermissionBasic, server.createTask("music.generations", true)))
mux.Handle("POST /api/v1/speech/generations", server.auth.Require(auth.PermissionBasic, server.createTask("speech.generations", true)))
mux.Handle("POST /api/v1/files/upload", server.auth.Require(auth.PermissionBasic, http.HandlerFunc(server.uploadFile)))
mux.Handle("GET /api/v1/tasks", server.auth.Require(auth.PermissionBasic, http.HandlerFunc(server.listTasks)))
mux.Handle("GET /api/v1/tasks/{taskID}", server.auth.Require(auth.PermissionBasic, http.HandlerFunc(server.getTask)))
@ -152,6 +155,12 @@ func NewServerWithContext(ctx context.Context, cfg config.Config, db *store.Stor
mux.Handle("POST /v1/images/generations", server.auth.Require(auth.PermissionBasic, server.createTask("images.generations", true)))
mux.Handle("POST /images/edits", server.auth.Require(auth.PermissionBasic, server.createTask("images.edits", true)))
mux.Handle("POST /v1/images/edits", server.auth.Require(auth.PermissionBasic, server.createTask("images.edits", true)))
mux.Handle("POST /song/generations", server.auth.Require(auth.PermissionBasic, server.createTask("song.generations", true)))
mux.Handle("POST /v1/song/generations", server.auth.Require(auth.PermissionBasic, server.createTask("song.generations", true)))
mux.Handle("POST /music/generations", server.auth.Require(auth.PermissionBasic, server.createTask("music.generations", true)))
mux.Handle("POST /v1/music/generations", server.auth.Require(auth.PermissionBasic, server.createTask("music.generations", true)))
mux.Handle("POST /speech/generations", server.auth.Require(auth.PermissionBasic, server.createTask("speech.generations", true)))
mux.Handle("POST /v1/speech/generations", server.auth.Require(auth.PermissionBasic, server.createTask("speech.generations", true)))
mux.Handle("POST /v1/files/upload", server.auth.Require(auth.PermissionBasic, http.HandlerFunc(server.uploadFile)))
return server.recover(server.cors(mux))

View File

@ -18,13 +18,18 @@ const simulationVideoMP4Base64 = "AAAAIGZ0eXBpc29tAAACAGlzb21pc28yYXZjMW1wNDEAAA
var simulationVideoMP4 = mustDecodeSimulationAsset(simulationVideoMP4Base64)
const simulationAudioWAVBase64 = "UklGRmQGAABXQVZFZm10IBAAAAABAAEAQB8AAIA+AAACABAAZGF0YUAGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
var simulationAudioWAV = mustDecodeSimulationAsset(simulationAudioWAVBase64)
// serveSimulationAsset godoc
// @Summary 获取模拟资源
// @Description 返回本地模拟模式使用的图片、视频封面或短视频资源。
// @Description 返回本地模拟模式使用的图片、视频封面、短视频或音频资源。
// @Tags simulation
// @Produce image/svg+xml
// @Produce video/mp4
// @Param asset path string true "资源文件名,可选 image.svg、image.png、image-edit.svg、image-edit.png、video-poster.svg、video.mp4"
// @Produce audio/wav
// @Param asset path string true "资源文件名,可选 image.svg、image.png、image-edit.svg、image-edit.png、video-poster.svg、video.mp4、audio.wav"
// @Success 200 {file} binary
// @Failure 404 {string} string "Not Found"
// @Router /static/simulation/{asset} [get]
@ -39,6 +44,8 @@ func serveSimulationAsset(w http.ResponseWriter, r *http.Request) {
serveSimulationContent(w, r, "video-poster.svg", "image/svg+xml; charset=utf-8", []byte(simulationVideoPosterSVG))
case "video.mp4":
serveSimulationContent(w, r, "video.mp4", "video/mp4", simulationVideoMP4)
case "audio.wav":
serveSimulationContent(w, r, "audio.wav", "audio/wav", simulationAudioWAV)
default:
http.NotFound(w, r)
}

View File

@ -112,6 +112,24 @@ func (s *Service) billings(ctx context.Context, user *auth.User, kind string, bo
"durationUnitCount": durationUnits,
})}
}
if kind == "song.generations" || kind == "music.generations" {
resource = "music"
unit = "song"
baseKey = "musicBase"
amount := float64(count) * resourcePrice(config, resource, baseKey, "basePrice") * discount
return []any{billingLine(candidate, resource, unit, count, roundPrice(amount), discount, simulated)}
}
if kind == "speech.generations" {
resource = "audio"
unit = "character"
baseKey = "audioBase"
quantity := len([]rune(stringFromMap(body, "text")))
if quantity <= 0 {
quantity = 1
}
amount := float64(quantity) * resourcePrice(config, resource, baseKey, "basePrice") * discount
return []any{billingLine(candidate, resource, unit, quantity, roundPrice(amount), discount, simulated)}
}
amount := float64(count) * resourcePrice(config, resource, baseKey, "basePrice") * resourceWeight(config, resource, "qualityWeights", stringFromMap(body, "quality")) * resourceWeight(config, resource, "sizeWeights", stringFromMap(body, "size")) * resourceWeight(config, resource, "resolutionWeights", firstNonEmptyString(stringFromMap(body, "resolution"), stringFromMap(body, "size"))) * discount
return []any{billingLine(candidate, resource, unit, count, roundPrice(amount), discount, simulated)}
}

View File

@ -84,6 +84,66 @@ func TestVideoBillingEstimateUsesFiveSecondUnitsAndDynamicWeights(t *testing.T)
}
}
func TestMusicBillingUsesSongResourceAndOutputCount(t *testing.T) {
service := &Service{}
candidate := store.RuntimeModelCandidate{
ModelName: "suno-model",
BaseBillingConfig: map[string]any{
"musicBase": 6,
"music": map[string]any{"basePrice": 9},
},
}
items := service.billings(context.Background(), nil, "song.generations", map[string]any{
"prompt": "city lights",
"count": 3,
}, candidate, clients.Response{}, true)
line := firstBillingLine(t, items)
if got, want := line["resourceType"], "music"; got != want {
t.Fatalf("music resource type = %v, want %v", got, want)
}
if got, want := line["unit"], "song"; got != want {
t.Fatalf("music billing unit = %v, want %v", got, want)
}
if got, want := line["quantity"], 3; got != want {
t.Fatalf("music quantity = %v, want %v", got, want)
}
if got, want := floatFromAny(line["amount"]), 18.0; got != want {
t.Fatalf("music amount = %v, want %v", got, want)
}
}
func TestSpeechBillingUsesAudioCharacters(t *testing.T) {
service := &Service{}
candidate := store.RuntimeModelCandidate{
ModelName: "speech-model",
BaseBillingConfig: map[string]any{
"audioBase": 0.5,
"audio": map[string]any{"basePrice": 0.8},
},
}
items := service.billings(context.Background(), nil, "speech.generations", map[string]any{
"text": "你好abc",
"voice_id": "female-shaonv",
}, candidate, clients.Response{}, true)
line := firstBillingLine(t, items)
if got, want := line["resourceType"], "audio"; got != want {
t.Fatalf("speech resource type = %v, want %v", got, want)
}
if got, want := line["unit"], "character"; got != want {
t.Fatalf("speech billing unit = %v, want %v", got, want)
}
if got, want := line["quantity"], 5; got != want {
t.Fatalf("speech character quantity = %v, want %v", got, want)
}
if got, want := floatFromAny(line["amount"]), 2.5; got != want {
t.Fatalf("speech amount = %v, want %v", got, want)
}
}
func TestVideoBillingPrefersGeneratedDuration(t *testing.T) {
service := &Service{}
candidate := store.RuntimeModelCandidate{

View File

@ -64,6 +64,7 @@ func New(cfg config.Config, db *store.Store, logger *slog.Logger) *Service {
"midjourney": clients.MidjourneyClient{HTTPClient: httpClients.none},
"minimax": clients.MinimaxClient{HTTPClient: httpClients.none},
"newapi": clients.NewAPIClient{HTTPClient: httpClients.none},
"suno": clients.SunoClient{HTTPClient: httpClients.none},
"tencent-hunyuan-image": clients.HunyuanImageClient{HTTPClient: httpClients.none},
"tencent-hunyuan-video": clients.HunyuanVideoClient{HTTPClient: httpClients.none},
"vidu": clients.ViduClient{HTTPClient: httpClients.none},
@ -957,6 +958,10 @@ func modelTypeFromKind(kind string, body map[string]any) string {
return "image_to_video"
}
return "video_generate"
case "song.generations", "music.generations":
return "audio_generate"
case "speech.generations":
return "text_to_speech"
default:
return "task"
}
@ -979,6 +984,10 @@ func canonicalModelType(value string) string {
return "text_embedding"
case "rerank", "reranks":
return "text_rerank"
case "audio", "music", "music_generate", "song", "songs":
return "audio_generate"
case "speech", "tts":
return "text_to_speech"
default:
return normalized
}
@ -986,7 +995,7 @@ func canonicalModelType(value string) string {
func isKnownModelType(value string) bool {
switch value {
case "text_generate", "text_embedding", "text_rerank", "image_generate", "image_edit", "video_generate", "image_to_video", "text_to_video", "video_edit", "video_reference", "video_first_last_frame", "omni_video", "omni":
case "text_generate", "text_embedding", "text_rerank", "image_generate", "image_edit", "video_generate", "image_to_video", "text_to_video", "video_edit", "video_reference", "video_first_last_frame", "omni_video", "omni", "audio_generate", "text_to_speech":
return true
default:
return false
@ -1171,6 +1180,17 @@ func validateRequest(kind string, body map[string]any) error {
if strings.TrimSpace(stringFromMap(body, "prompt")) == "" {
return errors.New("prompt is required")
}
case "song.generations", "music.generations":
if strings.TrimSpace(stringFromMap(body, "prompt")) == "" {
return errors.New("prompt is required")
}
case "speech.generations":
if strings.TrimSpace(stringFromMap(body, "text")) == "" && strings.TrimSpace(stringFromMap(body, "text_file_id")) == "" {
return errors.New("text or text_file_id is required")
}
if strings.TrimSpace(stringFromMap(body, "voice_id")) == "" {
return errors.New("voice_id is required")
}
}
return nil
}

View File

@ -943,6 +943,9 @@ func mediaKindForAsset(taskKind string, item map[string]any, sourceKey string, c
if strings.Contains(kind, "video") {
return "video"
}
if strings.Contains(kind, "audio") || strings.Contains(kind, "song") || strings.Contains(kind, "music") || strings.Contains(kind, "speech") {
return "audio"
}
if strings.Contains(kind, "image") {
return "image"
}

View File

@ -488,6 +488,10 @@ func modelTypeAliases(value string) []string {
return []string{"image_edit"}
case "video", "videos.generations":
return []string{"video_generate"}
case "song", "music", "song.generations", "music.generations", "music_generate":
return []string{"audio_generate"}
case "speech", "speech.generations", "tts":
return []string{"text_to_speech"}
default:
return []string{value}
}

View File

@ -105,31 +105,54 @@ WHERE p.status = 'enabled'
AND (m.cooldown_until IS NULL OR m.cooldown_until <= now())
AND (
(
COALESCE(m.model_alias, '') <> ''
$2::text IN ('audio_generate', 'text_to_speech')
AND (
m.model_alias = $1::text
OR (
NULLIF($3::text, '') IS NOT NULL
AND regexp_replace(COALESCE(m.model_alias, ''), '[[:space:]]+', '', 'g') = $3::text
)
)
)
OR (
COALESCE(m.model_alias, '') = ''
AND (
m.model_name = $1::text
OR m.model_name = $1::text
OR b.canonical_model_key = $1::text
OR b.provider_model_name = $1::text
OR (
NULLIF($3::text, '') IS NOT NULL
AND (
regexp_replace(COALESCE(m.model_name, ''), '[[:space:]]+', '', 'g') = $3::text
regexp_replace(COALESCE(m.model_alias, ''), '[[:space:]]+', '', 'g') = $3::text
OR regexp_replace(COALESCE(m.model_name, ''), '[[:space:]]+', '', 'g') = $3::text
OR regexp_replace(COALESCE(b.canonical_model_key, ''), '[[:space:]]+', '', 'g') = $3::text
OR regexp_replace(COALESCE(b.provider_model_name, ''), '[[:space:]]+', '', 'g') = $3::text
)
)
)
)
OR (
$2::text NOT IN ('audio_generate', 'text_to_speech')
AND (
(
COALESCE(m.model_alias, '') <> ''
AND (
m.model_alias = $1::text
OR (
NULLIF($3::text, '') IS NOT NULL
AND regexp_replace(COALESCE(m.model_alias, ''), '[[:space:]]+', '', 'g') = $3::text
)
)
)
OR (
COALESCE(m.model_alias, '') = ''
AND (
m.model_name = $1::text
OR b.canonical_model_key = $1::text
OR b.provider_model_name = $1::text
OR (
NULLIF($3::text, '') IS NOT NULL
AND (
regexp_replace(COALESCE(m.model_name, ''), '[[:space:]]+', '', 'g') = $3::text
OR regexp_replace(COALESCE(b.canonical_model_key, ''), '[[:space:]]+', '', 'g') = $3::text
OR regexp_replace(COALESCE(b.provider_model_name, ''), '[[:space:]]+', '', 'g') = $3::text
)
)
)
)
)
)
)
ORDER BY effective_priority ASC,
COALESCE(s.running_count, 0) ASC,
@ -396,31 +419,54 @@ WHERE p.status = 'enabled'
AND m.model_type @> jsonb_build_array($2::text)
AND (
(
COALESCE(m.model_alias, '') <> ''
$2::text IN ('audio_generate', 'text_to_speech')
AND (
m.model_alias = $1::text
OR (
NULLIF($3::text, '') IS NOT NULL
AND regexp_replace(COALESCE(m.model_alias, ''), '[[:space:]]+', '', 'g') = $3::text
)
)
)
OR (
COALESCE(m.model_alias, '') = ''
AND (
m.model_name = $1::text
OR m.model_name = $1::text
OR b.canonical_model_key = $1::text
OR b.provider_model_name = $1::text
OR (
NULLIF($3::text, '') IS NOT NULL
AND (
regexp_replace(COALESCE(m.model_name, ''), '[[:space:]]+', '', 'g') = $3::text
regexp_replace(COALESCE(m.model_alias, ''), '[[:space:]]+', '', 'g') = $3::text
OR regexp_replace(COALESCE(m.model_name, ''), '[[:space:]]+', '', 'g') = $3::text
OR regexp_replace(COALESCE(b.canonical_model_key, ''), '[[:space:]]+', '', 'g') = $3::text
OR regexp_replace(COALESCE(b.provider_model_name, ''), '[[:space:]]+', '', 'g') = $3::text
)
)
)
)
OR (
$2::text NOT IN ('audio_generate', 'text_to_speech')
AND (
(
COALESCE(m.model_alias, '') <> ''
AND (
m.model_alias = $1::text
OR (
NULLIF($3::text, '') IS NOT NULL
AND regexp_replace(COALESCE(m.model_alias, ''), '[[:space:]]+', '', 'g') = $3::text
)
)
)
OR (
COALESCE(m.model_alias, '') = ''
AND (
m.model_name = $1::text
OR b.canonical_model_key = $1::text
OR b.provider_model_name = $1::text
OR (
NULLIF($3::text, '') IS NOT NULL
AND (
regexp_replace(COALESCE(m.model_name, ''), '[[:space:]]+', '', 'g') = $3::text
OR regexp_replace(COALESCE(b.canonical_model_key, ''), '[[:space:]]+', '', 'g') = $3::text
OR regexp_replace(COALESCE(b.provider_model_name, ''), '[[:space:]]+', '', 'g') = $3::text
)
)
)
)
)
)
)
ORDER BY GREATEST(COALESCE(p.cooldown_until, to_timestamp(0)), COALESCE(m.cooldown_until, to_timestamp(0))) DESC,
p.priority ASC,

View File

@ -57,9 +57,9 @@ func billingResourcesForModelTypes(modelTypes []string) map[string]bool {
case "video", "videos.generations", "video_generate", "image_to_video", "text_to_video",
"video_edit", "omni_video", "video_reference", "video_first_last_frame":
resources["video"] = true
case "audio", "audio_generate", "text_to_speech", "speech":
case "audio", "text_to_speech", "speech":
resources["audio"] = true
case "music", "music_generate":
case "music", "music_generate", "audio_generate":
resources["music"] = true
case "digital_human", "digital_human_generate":
resources["digital_human"] = true

View File

@ -80,6 +80,40 @@ func TestFilterPlatformModelBillingConfigKeepsTextFlatPricing(t *testing.T) {
assertMissingKeys(t, filtered.BillingConfig, "image")
}
func TestFilterPlatformModelBillingConfigKeepsMusicPricing(t *testing.T) {
model := PlatformModel{
ModelType: StringList{"audio_generate"},
BillingConfig: map[string]any{
"music": map[string]any{"basePrice": 6},
"musicBase": 6,
"audio": map[string]any{"basePrice": 1},
"image": map[string]any{"basePrice": 10},
},
}
filtered := FilterPlatformModelBillingConfig(model)
assertHasKeys(t, filtered.BillingConfig, "music", "musicBase")
assertMissingKeys(t, filtered.BillingConfig, "audio", "image")
}
func TestFilterPlatformModelBillingConfigKeepsSpeechAudioPricing(t *testing.T) {
model := PlatformModel{
ModelType: StringList{"text_to_speech"},
BillingConfig: map[string]any{
"audio": map[string]any{"basePrice": 0.5},
"audioBase": 0.5,
"music": map[string]any{"basePrice": 6},
"video": map[string]any{"basePrice": 100},
},
}
filtered := FilterPlatformModelBillingConfig(model)
assertHasKeys(t, filtered.BillingConfig, "audio", "audioBase")
assertMissingKeys(t, filtered.BillingConfig, "music", "video")
}
func assertHasKeys(t *testing.T, value map[string]any, keys ...string) {
t.Helper()
for _, key := range keys {

View File

@ -0,0 +1,76 @@
INSERT INTO integration_platforms (
provider, platform_key, name, base_url, auth_type, credentials, config,
default_pricing_mode, default_discount_factor, retry_policy, rate_limit_policy, priority, status
)
VALUES
(
'suno', 'suno-simulation', 'Suno Music Simulation',
'https://api.cqtai.com/api/cqt', 'bearer',
'{"mode":"simulation"}'::jsonb,
'{"testMode":true,"seed":"audio-music-openapi","sourceSpecType":"suno"}'::jsonb,
'inherit_discount', 1,
'{"enabled":true,"maxAttempts":2,"retryOn":["rate_limit","timeout","server_error","network"]}'::jsonb,
'{"rules":[{"metric":"rpm","limit":60,"windowSeconds":60},{"metric":"concurrent","limit":5,"leaseTtlSeconds":120}]}'::jsonb,
930,
'enabled'
),
(
'minimax', 'minimax-speech-simulation', 'MiniMax Speech Simulation',
'https://api.minimaxi.com/v1', 'bearer',
'{"mode":"simulation"}'::jsonb,
'{"testMode":true,"seed":"audio-music-openapi","sourceSpecType":"minimax"}'::jsonb,
'inherit_discount', 1,
'{"enabled":true,"maxAttempts":2,"retryOn":["rate_limit","timeout","server_error","network"]}'::jsonb,
'{"rules":[{"metric":"rpm","limit":60,"windowSeconds":60},{"metric":"concurrent","limit":5,"leaseTtlSeconds":120}]}'::jsonb,
940,
'enabled'
)
ON CONFLICT (platform_key) DO UPDATE
SET name = EXCLUDED.name,
base_url = EXCLUDED.base_url,
auth_type = EXCLUDED.auth_type,
credentials = EXCLUDED.credentials,
config = EXCLUDED.config,
default_pricing_mode = EXCLUDED.default_pricing_mode,
default_discount_factor = EXCLUDED.default_discount_factor,
retry_policy = EXCLUDED.retry_policy,
rate_limit_policy = EXCLUDED.rate_limit_policy,
priority = EXCLUDED.priority,
status = EXCLUDED.status,
updated_at = now();
INSERT INTO platform_models (
platform_id, base_model_id, model_name, provider_model_name, model_alias, model_type, display_name,
capabilities, pricing_mode, billing_config, retry_policy, rate_limit_policy, enabled
)
SELECT p.id,
b.id,
b.provider_model_name,
b.provider_model_name,
b.display_name,
b.model_type,
b.display_name,
b.capabilities,
'inherit_discount',
b.base_billing_config,
'{"enabled":true,"maxAttempts":2}'::jsonb,
b.default_rate_limit_policy,
true
FROM integration_platforms p
JOIN base_model_catalog b ON b.provider_key = p.provider
WHERE p.platform_key IN ('suno-simulation', 'minimax-speech-simulation')
AND b.status = 'active'
AND b.model_type ?| ARRAY['audio_generate','text_to_speech']
ON CONFLICT (platform_id, model_name) DO UPDATE
SET base_model_id = EXCLUDED.base_model_id,
provider_model_name = EXCLUDED.provider_model_name,
model_alias = EXCLUDED.model_alias,
display_name = EXCLUDED.display_name,
model_type = EXCLUDED.model_type,
capabilities = EXCLUDED.capabilities,
pricing_mode = EXCLUDED.pricing_mode,
billing_config = EXCLUDED.billing_config,
retry_policy = EXCLUDED.retry_policy,
rate_limit_policy = EXCLUDED.rate_limit_policy,
enabled = EXCLUDED.enabled,
updated_at = now();