diff --git a/apps/api/docs/swagger.json b/apps/api/docs/swagger.json
index 805b611..f999c21 100644
--- a/apps/api/docs/swagger.json
+++ b/apps/api/docs/swagger.json
@@ -7281,6 +7281,11 @@
"type": "string",
"example": "A watercolor robot reading a book"
},
+ "reasoning_effort": {
+ "description": "ReasoningEffort 推理深度,OpenAI-compatible 请求字段;开放字符串,取值随 provider 和模型能力而定,常见值为 none、minimal、low、medium、high、xhigh,也可配置 max 等供应商自定义值。",
+ "type": "string",
+ "example": "medium"
+ },
"resolution": {
"type": "string",
"example": "720p"
diff --git a/apps/api/docs/swagger.yaml b/apps/api/docs/swagger.yaml
index 16a7825..a583b79 100644
--- a/apps/api/docs/swagger.yaml
+++ b/apps/api/docs/swagger.yaml
@@ -587,6 +587,11 @@ definitions:
prompt:
example: A watercolor robot reading a book
type: string
+ reasoning_effort:
+ description: ReasoningEffort 推理深度,OpenAI-compatible 请求字段;开放字符串,取值随 provider
+ 和模型能力而定,常见值为 none、minimal、low、medium、high、xhigh,也可配置 max 等供应商自定义值。
+ example: medium
+ type: string
resolution:
example: 720p
type: string
diff --git a/apps/api/internal/clients/clients_test.go b/apps/api/internal/clients/clients_test.go
index b94226a..3f3b1fb 100644
--- a/apps/api/internal/clients/clients_test.go
+++ b/apps/api/internal/clients/clients_test.go
@@ -151,6 +151,187 @@ func TestOpenAIClientChatContract(t *testing.T) {
}
}
+func TestOpenAIClientChatRequestNormalizesToolContext(t *testing.T) {
+ var captured map[string]any
+ server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ if err := json.NewDecoder(r.Body).Decode(&captured); err != nil {
+ t.Fatalf("decode request: %v", err)
+ }
+ _ = json.NewEncoder(w).Encode(map[string]any{
+ "id": "chatcmpl-normalized-request",
+ "object": "chat.completion",
+ "model": captured["model"],
+ "choices": []any{map[string]any{
+ "message": map[string]any{"role": "assistant", "content": "ok"},
+ }},
+ })
+ }))
+ defer server.Close()
+
+ _, err := (OpenAIClient{HTTPClient: server.Client()}).Run(context.Background(), Request{
+ Kind: "chat.completions",
+ Model: "openai:gpt-4o-mini",
+ Body: map[string]any{
+ "model": "openai:gpt-4o-mini",
+ "messages": []any{
+ map[string]any{
+ "role": "assistant",
+ "functionCall": map[string]any{
+ "name": "lookup",
+ "arguments": map[string]any{"q": "weather"},
+ },
+ },
+ map[string]any{"role": "tool", "toolCallId": "call_0", "content": "sunny"},
+ map[string]any{
+ "role": "user",
+ "content": []any{
+ map[string]any{"type": "text", "text": "keep this"},
+ map[string]any{"type": "tool_result", "tool_use_id": "toolu_1", "content": map[string]any{"ok": true}},
+ },
+ },
+ },
+ },
+ Candidate: store.RuntimeModelCandidate{
+ BaseURL: server.URL,
+ ProviderModelName: "openai-compatible-gpt-4o-mini",
+ Credentials: map[string]any{"apiKey": "test-key"},
+ },
+ })
+ if err != nil {
+ t.Fatalf("run openai client: %v", err)
+ }
+ messages, _ := captured["messages"].([]any)
+ if len(messages) != 4 {
+ t.Fatalf("unexpected normalized messages: %+v", messages)
+ }
+ assistant, _ := messages[0].(map[string]any)
+ if _, ok := assistant["functionCall"]; ok {
+ t.Fatalf("functionCall should be converted away: %+v", assistant)
+ }
+ toolCalls, _ := assistant["tool_calls"].([]any)
+ toolCall, _ := toolCalls[0].(map[string]any)
+ function, _ := toolCall["function"].(map[string]any)
+ if function["name"] != "lookup" || function["arguments"] != `{"q":"weather"}` {
+ t.Fatalf("unexpected normalized tool call: %+v", assistant)
+ }
+ toolMessage, _ := messages[1].(map[string]any)
+ if toolMessage["tool_call_id"] != "call_0" || toolMessage["toolCallId"] != nil {
+ t.Fatalf("tool message was not normalized: %+v", toolMessage)
+ }
+ keptUser, _ := messages[2].(map[string]any)
+ convertedToolResult, _ := messages[3].(map[string]any)
+ if keptUser["content"] != "keep this" || convertedToolResult["role"] != "tool" || convertedToolResult["tool_call_id"] != "toolu_1" || convertedToolResult["content"] != `{"ok":true}` {
+ t.Fatalf("tool_result block was not restored: user=%+v tool=%+v", keptUser, convertedToolResult)
+ }
+}
+
+func TestOpenAIClientChatResponseNormalizesReasoning(t *testing.T) {
+ server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ _ = json.NewEncoder(w).Encode(map[string]any{
+ "id": "chatcmpl-reasoning",
+ "object": "chat.completion",
+ "model": "openrouter-test",
+ "choices": []any{map[string]any{
+ "message": map[string]any{
+ "role": "assistant",
+ "reasoning_details": []any{
+ map[string]any{"type": "reasoning.text", "text": "detail-"},
+ map[string]any{"type": "reasoning.summary", "summary": "summary"},
+ map[string]any{"type": "reasoning.encrypted", "data": "secret"},
+ },
+ "content": "taggedanswer",
+ },
+ }},
+ })
+ }))
+ defer server.Close()
+
+ response, err := (OpenAIClient{HTTPClient: server.Client()}).Run(context.Background(), Request{
+ Kind: "chat.completions",
+ Model: "OpenRouter-Test",
+ Body: map[string]any{"model": "OpenRouter-Test", "messages": []any{map[string]any{"role": "user", "content": "ping"}}},
+ Candidate: store.RuntimeModelCandidate{
+ BaseURL: server.URL,
+ ModelName: "openrouter-test",
+ Credentials: map[string]any{"apiKey": "test-key"},
+ },
+ })
+ if err != nil {
+ t.Fatalf("run openai client: %v", err)
+ }
+ choices, _ := response.Result["choices"].([]any)
+ choice, _ := choices[0].(map[string]any)
+ message, _ := choice["message"].(map[string]any)
+ if message["reasoning_content"] != "detail-summarytagged" || message["content"] != "answer" {
+ t.Fatalf("reasoning was not normalized: %+v", response.Result)
+ }
+ if _, ok := message["reasoning_details"]; ok {
+ t.Fatalf("reasoning_details should be converted away: %+v", message)
+ }
+}
+
+func TestOpenAIClientChatResponseNormalizesToolCallFormats(t *testing.T) {
+ server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ _ = json.NewEncoder(w).Encode(map[string]any{
+ "id": "chatcmpl-tools",
+ "object": "chat.completion",
+ "model": "tool-format-test",
+ "choices": []any{map[string]any{
+ "message": map[string]any{
+ "role": "assistant",
+ "content": []any{
+ map[string]any{"type": "text", "text": "calling tools"},
+ map[string]any{"type": "tool_use", "id": "toolu_1", "name": "anthropic_lookup", "input": map[string]any{"city": "Boston"}},
+ },
+ "toolCalls": []any{map[string]any{"id": "call_camel", "functionCall": map[string]any{"name": "camel_lookup", "args": map[string]any{"city": "SF"}}}},
+ "function_call": map[string]any{"name": "legacy_lookup", "arguments": "{\"city\":\"NYC\"}"},
+ },
+ }},
+ })
+ }))
+ defer server.Close()
+
+ response, err := (OpenAIClient{HTTPClient: server.Client()}).Run(context.Background(), Request{
+ Kind: "chat.completions",
+ Model: "Tool-Format-Test",
+ Body: map[string]any{"model": "Tool-Format-Test", "messages": []any{map[string]any{"role": "user", "content": "ping"}}},
+ Candidate: store.RuntimeModelCandidate{
+ BaseURL: server.URL,
+ ModelName: "tool-format-test",
+ Credentials: map[string]any{"apiKey": "test-key"},
+ },
+ })
+ if err != nil {
+ t.Fatalf("run openai client: %v", err)
+ }
+ choices, _ := response.Result["choices"].([]any)
+ choice, _ := choices[0].(map[string]any)
+ message, _ := choice["message"].(map[string]any)
+ if message["content"] != "calling tools" {
+ t.Fatalf("tool_use block should be removed from content: %+v", message)
+ }
+ for _, key := range []string{"toolCalls", "function_call"} {
+ if _, ok := message[key]; ok {
+ t.Fatalf("%s should be converted away: %+v", key, message)
+ }
+ }
+ toolCalls, _ := message["tool_calls"].([]any)
+ if len(toolCalls) != 3 {
+ t.Fatalf("expected 3 normalized tool calls, got %+v", message)
+ }
+ assertToolCall := func(index int, id string, name string, arguments string) {
+ t.Helper()
+ toolCall, _ := toolCalls[index].(map[string]any)
+ function, _ := toolCall["function"].(map[string]any)
+ if toolCall["id"] != id || toolCall["type"] != "function" || function["name"] != name || function["arguments"] != arguments {
+ t.Fatalf("unexpected tool call[%d]: %+v", index, toolCall)
+ }
+ }
+ assertToolCall(0, "call_camel", "camel_lookup", "{\"city\":\"SF\"}")
+ assertToolCall(1, "call_1", "legacy_lookup", "{\"city\":\"NYC\"}")
+ assertToolCall(2, "toolu_1", "anthropic_lookup", "{\"city\":\"Boston\"}")
+}
+
func TestOpenAIClientChatStreamContract(t *testing.T) {
var gotStream bool
var gotIncludeUsage bool
@@ -204,6 +385,133 @@ func TestOpenAIClientChatStreamContract(t *testing.T) {
}
}
+func TestOpenAIClientChatStreamPreservesStructuredDeltas(t *testing.T) {
+ server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Content-Type", "text/event-stream")
+ _, _ = w.Write([]byte("data: {\"id\":\"chatcmpl-structured\",\"object\":\"chat.completion.chunk\",\"model\":\"openrouter-reasoner\",\"choices\":[{\"delta\":{\"reasoning_details\":[{\"type\":\"reasoning.text\",\"text\":\"detail-\"},{\"type\":\"reasoning.summary\",\"summary\":\"summary\"},{\"type\":\"reasoning.encrypted\",\"data\":\"secret\"}]}}],\"usage\":null}\n\n"))
+ _, _ = w.Write([]byte("data: {\"id\":\"chatcmpl-structured\",\"object\":\"chat.completion.chunk\",\"model\":\"openrouter-reasoner\",\"choices\":[{\"delta\":{\"content\":\"taggedanswer\"}}],\"usage\":null}\n\n"))
+ _, _ = w.Write([]byte("data: {\"id\":\"chatcmpl-structured\",\"object\":\"chat.completion.chunk\",\"model\":\"deepseek-v4\",\"choices\":[{\"delta\":{\"tool_calls\":[{\"index\":0,\"id\":\"call_1\",\"type\":\"function\",\"function\":{\"name\":\"lookup\",\"arguments\":\"{\\\"q\\\":\"}}]}}],\"usage\":null}\n\n"))
+ _, _ = w.Write([]byte("data: {\"id\":\"chatcmpl-structured\",\"object\":\"chat.completion.chunk\",\"model\":\"deepseek-v4\",\"choices\":[{\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"\\\"weather\\\"}\"}}]},\"finish_reason\":\"tool_calls\"}],\"usage\":null}\n\n"))
+ _, _ = w.Write([]byte("data: [DONE]\n\n"))
+ }))
+ defer server.Close()
+
+ captured := make([]StreamDeltaEvent, 0)
+ response, err := (OpenAIClient{HTTPClient: server.Client()}).Run(context.Background(), Request{
+ Kind: "chat.completions",
+ Model: "DeepSeek-V4",
+ Body: map[string]any{
+ "model": "DeepSeek-V4",
+ "messages": []any{map[string]any{"role": "user", "content": "ping"}},
+ "stream": true,
+ },
+ Candidate: store.RuntimeModelCandidate{
+ BaseURL: server.URL,
+ ModelName: "deepseek-v4",
+ Credentials: map[string]any{"apiKey": "test-key"},
+ },
+ StreamDelta: func(event StreamDeltaEvent) error {
+ captured = append(captured, event)
+ return nil
+ },
+ })
+ if err != nil {
+ t.Fatalf("run openai structured stream client: %v", err)
+ }
+ if len(captured) != 4 || captured[0].ReasoningContent != "detail-summary" || captured[1].ReasoningContent != "tagged" || captured[1].Text != "answer" || captured[2].Event == nil {
+ t.Fatalf("structured stream events were not preserved: %+v", captured)
+ }
+ firstChoices, _ := captured[0].Event["choices"].([]any)
+ firstChoice, _ := firstChoices[0].(map[string]any)
+ firstDelta, _ := firstChoice["delta"].(map[string]any)
+ if firstDelta["reasoning_content"] != "detail-summary" {
+ t.Fatalf("reasoning_details were not converted in stream event: %+v", captured[0].Event)
+ }
+ if _, ok := firstDelta["reasoning_details"]; ok {
+ t.Fatalf("reasoning_details should be removed from stream event: %+v", captured[0].Event)
+ }
+ choices, _ := response.Result["choices"].([]any)
+ choice, _ := choices[0].(map[string]any)
+ message, _ := choice["message"].(map[string]any)
+ if message["reasoning_content"] != "detail-summarytagged" || message["content"] != "answer" || choice["finish_reason"] != "tool_calls" {
+ t.Fatalf("reasoning or finish reason missing from aggregated result: %+v", response.Result)
+ }
+ toolCalls, _ := message["tool_calls"].([]any)
+ toolCall, _ := toolCalls[0].(map[string]any)
+ function, _ := toolCall["function"].(map[string]any)
+ if function["arguments"] != "{\"q\":\"weather\"}" {
+ t.Fatalf("tool call arguments were not aggregated: %+v", response.Result)
+ }
+}
+
+func TestOpenAIClientChatStreamNormalizesToolCallFormats(t *testing.T) {
+ server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Content-Type", "text/event-stream")
+ _, _ = w.Write([]byte("data: {\"id\":\"chatcmpl-tools-stream\",\"object\":\"chat.completion.chunk\",\"model\":\"tool-format-test\",\"choices\":[{\"delta\":{\"function_call\":{\"name\":\"legacy_lookup\",\"arguments\":\"{\\\"city\\\":\"}}}],\"usage\":null}\n\n"))
+ _, _ = w.Write([]byte("data: {\"id\":\"chatcmpl-tools-stream\",\"object\":\"chat.completion.chunk\",\"model\":\"tool-format-test\",\"choices\":[{\"delta\":{\"functionCall\":{\"arguments\":\"\\\"Boston\\\"}\"}}}],\"usage\":null}\n\n"))
+ _, _ = w.Write([]byte("data: {\"id\":\"chatcmpl-tools-stream\",\"object\":\"chat.completion.chunk\",\"model\":\"tool-format-test\",\"choices\":[{\"delta\":{\"toolCall\":{\"index\":1,\"id\":\"call_camel\",\"functionCall\":{\"name\":\"camel_lookup\",\"args\":{\"city\":\"SF\"}}}},\"finish_reason\":\"tool_calls\"}],\"usage\":null}\n\n"))
+ _, _ = w.Write([]byte("data: [DONE]\n\n"))
+ }))
+ defer server.Close()
+
+ captured := make([]StreamDeltaEvent, 0)
+ response, err := (OpenAIClient{HTTPClient: server.Client()}).Run(context.Background(), Request{
+ Kind: "chat.completions",
+ Model: "Tool-Format-Test",
+ Body: map[string]any{
+ "model": "Tool-Format-Test",
+ "messages": []any{map[string]any{"role": "user", "content": "ping"}},
+ "stream": true,
+ },
+ Candidate: store.RuntimeModelCandidate{
+ BaseURL: server.URL,
+ ModelName: "tool-format-test",
+ Credentials: map[string]any{"apiKey": "test-key"},
+ },
+ StreamDelta: func(event StreamDeltaEvent) error {
+ captured = append(captured, event)
+ return nil
+ },
+ })
+ if err != nil {
+ t.Fatalf("run openai stream client: %v", err)
+ }
+ if len(captured) != 3 {
+ t.Fatalf("unexpected captured events: %+v", captured)
+ }
+ for _, event := range captured {
+ choices, _ := event.Event["choices"].([]any)
+ choice, _ := choices[0].(map[string]any)
+ delta, _ := choice["delta"].(map[string]any)
+ if _, ok := delta["function_call"]; ok {
+ t.Fatalf("function_call should be converted away: %+v", event.Event)
+ }
+ if _, ok := delta["functionCall"]; ok {
+ t.Fatalf("functionCall should be converted away: %+v", event.Event)
+ }
+ if _, ok := delta["toolCall"]; ok {
+ t.Fatalf("toolCall should be converted away: %+v", event.Event)
+ }
+ }
+ choices, _ := response.Result["choices"].([]any)
+ choice, _ := choices[0].(map[string]any)
+ message, _ := choice["message"].(map[string]any)
+ toolCalls, _ := message["tool_calls"].([]any)
+ if len(toolCalls) != 2 || choice["finish_reason"] != "tool_calls" {
+ t.Fatalf("unexpected normalized stream result: %+v", response.Result)
+ }
+ legacyCall, _ := toolCalls[0].(map[string]any)
+ legacyFunction, _ := legacyCall["function"].(map[string]any)
+ if legacyFunction["name"] != "legacy_lookup" || legacyFunction["arguments"] != "{\"city\":\"Boston\"}" {
+ t.Fatalf("legacy function_call was not aggregated: %+v", response.Result)
+ }
+ camelCall, _ := toolCalls[1].(map[string]any)
+ camelFunction, _ := camelCall["function"].(map[string]any)
+ if camelCall["id"] != "call_camel" || camelFunction["name"] != "camel_lookup" || camelFunction["arguments"] != "{\"city\":\"SF\"}" {
+ t.Fatalf("camel toolCall was not normalized: %+v", response.Result)
+ }
+}
+
func TestGeminiClientChatContract(t *testing.T) {
var gotPath string
var gotKey string
@@ -261,6 +569,92 @@ func TestGeminiClientChatContract(t *testing.T) {
}
}
+func TestGeminiClientChatRestoresToolContext(t *testing.T) {
+ var captured map[string]any
+ server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ if err := json.NewDecoder(r.Body).Decode(&captured); err != nil {
+ t.Fatalf("decode request: %v", err)
+ }
+ _ = json.NewEncoder(w).Encode(map[string]any{
+ "candidates": []any{map[string]any{
+ "content": map[string]any{"parts": []any{map[string]any{"text": "gemini ok"}}},
+ }},
+ })
+ }))
+ defer server.Close()
+
+ _, err := (GeminiClient{HTTPClient: server.Client()}).Run(context.Background(), Request{
+ Kind: "chat.completions",
+ Model: "gemini:gemini-2.5-flash",
+ Body: map[string]any{
+ "model": "gemini:gemini-2.5-flash",
+ "messages": []any{
+ map[string]any{"role": "user", "content": "weather?"},
+ map[string]any{
+ "role": "assistant",
+ "content": "checking",
+ "tool_calls": []any{map[string]any{
+ "id": "call_weather",
+ "type": "function",
+ "function": map[string]any{
+ "name": "get_weather",
+ "arguments": `{"city":"SF"}`,
+ },
+ }},
+ },
+ map[string]any{"role": "tool", "tool_call_id": "call_weather", "content": `{"temperature":"72F"}`},
+ },
+ "tools": []any{map[string]any{
+ "type": "function",
+ "function": map[string]any{
+ "name": "get_weather",
+ "description": "lookup weather",
+ "parameters": map[string]any{"type": "object"},
+ },
+ }},
+ },
+ Candidate: store.RuntimeModelCandidate{
+ BaseURL: server.URL,
+ ProviderModelName: "gemini-2.5-flash",
+ ModelType: "chat",
+ Credentials: map[string]any{"apiKey": "gemini-key"},
+ },
+ })
+ if err != nil {
+ t.Fatalf("run gemini client: %v", err)
+ }
+ contents, _ := captured["contents"].([]any)
+ if len(contents) != 3 {
+ t.Fatalf("unexpected Gemini contents: %+v", captured)
+ }
+ modelTurn, _ := contents[1].(map[string]any)
+ if modelTurn["role"] != "model" {
+ t.Fatalf("assistant turn should become Gemini model turn: %+v", modelTurn)
+ }
+ modelParts, _ := modelTurn["parts"].([]any)
+ callPart, _ := modelParts[1].(map[string]any)
+ functionCall, _ := callPart["functionCall"].(map[string]any)
+ args, _ := functionCall["args"].(map[string]any)
+ if functionCall["name"] != "get_weather" || args["city"] != "SF" {
+ t.Fatalf("tool call was not restored for Gemini: %+v", modelTurn)
+ }
+ toolTurn, _ := contents[2].(map[string]any)
+ toolParts, _ := toolTurn["parts"].([]any)
+ responsePart, _ := toolParts[0].(map[string]any)
+ functionResponse, _ := responsePart["functionResponse"].(map[string]any)
+ response, _ := functionResponse["response"].(map[string]any)
+ if toolTurn["role"] != "user" || functionResponse["name"] != "get_weather" || response["temperature"] != "72F" {
+ t.Fatalf("tool result was not restored for Gemini: %+v", toolTurn)
+ }
+ tools, _ := captured["tools"].([]any)
+ declarationGroup, _ := tools[0].(map[string]any)
+ declarations, _ := declarationGroup["functionDeclarations"].([]any)
+ declaration, _ := declarations[0].(map[string]any)
+ if declaration["name"] != "get_weather" || declaration["description"] != "lookup weather" {
+ t.Fatalf("tool declaration was not converted for Gemini: %+v", captured["tools"])
+ }
+}
+
func TestGeminiURLAcceptsVersionedBaseURL(t *testing.T) {
got := geminiURL("https://generativelanguage.googleapis.com/v1beta", "gemini-2.5-flash", "test-key")
want := "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key=test-key"
diff --git a/apps/api/internal/clients/gemini.go b/apps/api/internal/clients/gemini.go
index 6a8f6ed..cdf32ab 100644
--- a/apps/api/internal/clients/gemini.go
+++ b/apps/api/internal/clients/gemini.go
@@ -70,15 +70,185 @@ func geminiBody(request Request) map[string]any {
return map[string]any{"contents": contents}
}
prompt := firstNonEmptyPrompt(request.Body, "")
- if prompt == "" {
- prompt = textFromMessages(request.Body)
+ if prompt != "" {
+ return map[string]any{
+ "contents": []any{map[string]any{
+ "role": "user",
+ "parts": []any{map[string]any{"text": prompt}},
+ }},
+ }
}
- return map[string]any{
- "contents": []any{map[string]any{
- "role": "user",
- "parts": []any{map[string]any{"text": prompt}},
- }},
+ body := map[string]any{"contents": geminiContentsFromMessages(request.Body)}
+ if tools := geminiToolsFromOpenAITools(request.Body["tools"]); len(tools) > 0 {
+ body["tools"] = tools
}
+ contents, _ := body["contents"].([]any)
+ if len(contents) > 0 {
+ return body
+ }
+ return map[string]any{"contents": []any{map[string]any{
+ "role": "user",
+ "parts": []any{map[string]any{"text": textFromMessages(request.Body)}},
+ }}}
+}
+
+func geminiContentsFromMessages(body map[string]any) []any {
+ normalized := NormalizeChatCompletionRequestBody(body)
+ messages, _ := normalized["messages"].([]any)
+ contents := make([]any, 0, len(messages))
+ toolNames := map[string]string{}
+ for _, rawMessage := range messages {
+ message, _ := rawMessage.(map[string]any)
+ if len(message) == 0 {
+ continue
+ }
+ role := stringFromAny(message["role"])
+ if role == "tool" {
+ toolCallID := stringFromAny(message["tool_call_id"])
+ name := toolNames[toolCallID]
+ if name == "" {
+ name = toolCallID
+ }
+ if name == "" {
+ name = "tool"
+ }
+ contents = append(contents, map[string]any{
+ "role": "user",
+ "parts": []any{map[string]any{"functionResponse": map[string]any{
+ "name": name,
+ "response": geminiFunctionResponsePayload(message["content"]),
+ }}},
+ })
+ continue
+ }
+ parts := geminiTextParts(message["content"])
+ if role == "assistant" {
+ for _, rawToolCall := range toolCallsSlice(message["tool_calls"]) {
+ toolCall, _ := rawToolCall.(map[string]any)
+ function, _ := toolCall["function"].(map[string]any)
+ name := stringFromAny(function["name"])
+ if name == "" {
+ continue
+ }
+ if id := stringFromAny(toolCall["id"]); id != "" {
+ toolNames[id] = name
+ }
+ parts = append(parts, map[string]any{"functionCall": map[string]any{
+ "name": name,
+ "args": geminiFunctionArgs(function["arguments"]),
+ }})
+ }
+ }
+ if len(parts) == 0 {
+ continue
+ }
+ contents = append(contents, map[string]any{
+ "role": geminiRole(role),
+ "parts": parts,
+ })
+ }
+ return contents
+}
+
+func geminiRole(role string) string {
+ if role == "assistant" {
+ return "model"
+ }
+ return "user"
+}
+
+func geminiTextParts(content any) []any {
+ parts := make([]any, 0)
+ switch typed := content.(type) {
+ case string:
+ if strings.TrimSpace(typed) != "" {
+ parts = append(parts, map[string]any{"text": typed})
+ }
+ case []any:
+ for _, rawPart := range typed {
+ part, _ := rawPart.(map[string]any)
+ if text := stringFromAny(firstPresent(part["text"], part["content"])); strings.TrimSpace(text) != "" {
+ parts = append(parts, map[string]any{"text": text})
+ }
+ }
+ }
+ return parts
+}
+
+func toolCallsSlice(value any) []any {
+ switch typed := value.(type) {
+ case []any:
+ return typed
+ case map[string]any:
+ return []any{typed}
+ default:
+ return nil
+ }
+}
+
+func geminiFunctionArgs(value any) map[string]any {
+ if value == nil {
+ return map[string]any{}
+ }
+ if args, ok := value.(map[string]any); ok {
+ return args
+ }
+ if text, ok := value.(string); ok {
+ if strings.TrimSpace(text) == "" {
+ return map[string]any{}
+ }
+ var args map[string]any
+ if err := json.Unmarshal([]byte(text), &args); err == nil {
+ return args
+ }
+ return map[string]any{"arguments": text}
+ }
+ return map[string]any{"arguments": value}
+}
+
+func geminiFunctionResponsePayload(value any) map[string]any {
+ if payload, ok := value.(map[string]any); ok {
+ return payload
+ }
+ if text, ok := value.(string); ok {
+ var payload map[string]any
+ if err := json.Unmarshal([]byte(text), &payload); err == nil {
+ return payload
+ }
+ return map[string]any{"content": text}
+ }
+ if value == nil {
+ return map[string]any{}
+ }
+ return map[string]any{"content": value}
+}
+
+func geminiToolsFromOpenAITools(value any) []any {
+ tools, ok := value.([]any)
+ if !ok || len(tools) == 0 {
+ return nil
+ }
+ declarations := make([]any, 0, len(tools))
+ for _, rawTool := range tools {
+ tool, _ := rawTool.(map[string]any)
+ function, _ := tool["function"].(map[string]any)
+ name := stringFromAny(function["name"])
+ if name == "" {
+ continue
+ }
+ declaration := map[string]any{"name": name}
+ if description := stringFromAny(function["description"]); description != "" {
+ declaration["description"] = description
+ }
+ if parameters, ok := function["parameters"]; ok {
+ declaration["parameters"] = parameters
+ }
+ declarations = append(declarations, declaration)
+ }
+ if len(declarations) == 0 {
+ return nil
+ }
+ return []any{map[string]any{"functionDeclarations": declarations}}
}
func geminiResult(request Request, raw map[string]any) map[string]any {
diff --git a/apps/api/internal/clients/helpers.go b/apps/api/internal/clients/helpers.go
index 8461a38..cc853de 100644
--- a/apps/api/internal/clients/helpers.go
+++ b/apps/api/internal/clients/helpers.go
@@ -8,6 +8,7 @@ import (
"io"
"math"
"net/http"
+ "sort"
"strings"
"time"
)
@@ -87,8 +88,11 @@ func decodeOpenAIStreamReader(reader io.Reader, onDelta StreamDelta) (map[string
scanner.Buffer(make([]byte, 0, 64*1024), 16*1024*1024)
rawLines := make([]string, 0)
parts := make([]string, 0)
+ reasoningParts := make([]string, 0)
var last map[string]any
var usage Usage
+ finishReason := ""
+ toolCalls := map[int]map[string]any{}
for scanner.Scan() {
rawLine := scanner.Text()
rawLines = append(rawLines, rawLine)
@@ -104,13 +108,23 @@ func decodeOpenAIStreamReader(reader io.Reader, onDelta StreamDelta) (map[string
if err := json.Unmarshal([]byte(payload), &event); err != nil {
continue
}
+ event = NormalizeChatCompletionStreamEvent(event)
last = event
- if text := streamEventText(event); text != "" {
+ text := streamEventText(event)
+ reasoningText := streamEventReasoningContent(event)
+ if text != "" {
parts = append(parts, text)
- if onDelta != nil {
- if err := onDelta(text); err != nil {
- return nil, true, err
- }
+ }
+ if reasoningText != "" {
+ reasoningParts = append(reasoningParts, reasoningText)
+ }
+ aggregateStreamToolCalls(event, toolCalls)
+ if reason := streamEventFinishReason(event); reason != "" {
+ finishReason = reason
+ }
+ if onDelta != nil {
+ if err := onDelta(StreamDeltaEvent{Text: text, ReasoningContent: reasoningText, Event: event}); err != nil {
+ return nil, true, err
}
}
if eventUsage := usageFromOpenAI(event); eventUsage.TotalTokens > 0 {
@@ -131,7 +145,7 @@ func decodeOpenAIStreamReader(reader io.Reader, onDelta StreamDelta) (map[string
}
return out, true, nil
}
- return buildOpenAIStreamResult(last, parts, usage), true, nil
+ return buildOpenAIStreamResult(last, parts, reasoningParts, toolCalls, finishReason, usage), true, nil
}
func decodeOpenAIStream(raw []byte) (map[string]any, bool) {
@@ -142,22 +156,32 @@ func decodeOpenAIStream(raw []byte) (map[string]any, bool) {
return result, ok && err == nil
}
-func buildOpenAIStreamResult(last map[string]any, parts []string, usage Usage) map[string]any {
- if len(parts) == 0 {
+func buildOpenAIStreamResult(last map[string]any, parts []string, reasoningParts []string, toolCalls map[int]map[string]any, finishReason string, usage Usage) map[string]any {
+ if len(parts) == 0 && len(reasoningParts) == 0 && len(toolCalls) == 0 {
return last
}
+ message := map[string]any{
+ "role": "assistant",
+ "content": strings.Join(parts, ""),
+ }
+ if len(reasoningParts) > 0 {
+ message["reasoning_content"] = strings.Join(reasoningParts, "")
+ }
+ if len(toolCalls) > 0 {
+ message["tool_calls"] = sortedStreamToolCalls(toolCalls)
+ }
+ if finishReason == "" {
+ finishReason = "stop"
+ }
var out map[string]any
out = map[string]any{
"id": stringFromAny(firstPresent(last["id"], "chatcmpl-stream")),
"object": "chat.completion",
"model": stringFromAny(last["model"]),
"choices": []any{map[string]any{
- "index": 0,
- "message": map[string]any{
- "role": "assistant",
- "content": strings.Join(parts, ""),
- },
- "finish_reason": "stop",
+ "index": 0,
+ "message": message,
+ "finish_reason": finishReason,
}},
}
if usage.TotalTokens > 0 {
@@ -170,6 +194,571 @@ func buildOpenAIStreamResult(last map[string]any, parts []string, usage Usage) m
return out
}
+// NormalizeChatCompletionRequestBody 将后续请求里的工具调用上下文还原为
+// OpenAI Chat Completions 标准格式,便于再次发送给 OpenAI-compatible 上游。
+func NormalizeChatCompletionRequestBody(body map[string]any) map[string]any {
+ if body == nil {
+ return nil
+ }
+ out := cloneBody(body)
+ messages, ok := out["messages"].([]any)
+ if !ok {
+ return out
+ }
+ normalizedMessages := make([]any, 0, len(messages))
+ for _, rawMessage := range messages {
+ message, ok := rawMessage.(map[string]any)
+ if !ok {
+ normalizedMessages = append(normalizedMessages, rawMessage)
+ continue
+ }
+ copied := cloneMapAny(message)
+ normalizeToolCallsContainer(copied, false)
+ normalizeToolMessageFields(copied)
+ toolMessages, cleanContent, changed := toolResultMessagesFromContent(copied["content"])
+ if changed {
+ if cleanContent != nil && contentHasText(cleanContent) {
+ copied["content"] = cleanContent
+ normalizedMessages = append(normalizedMessages, copied)
+ } else if len(copied) > 1 || copied["role"] != nil {
+ delete(copied, "content")
+ if len(copied) > 1 {
+ normalizedMessages = append(normalizedMessages, copied)
+ }
+ }
+ normalizedMessages = append(normalizedMessages, toolMessages...)
+ continue
+ }
+ normalizedMessages = append(normalizedMessages, copied)
+ }
+ out["messages"] = normalizedMessages
+ return out
+}
+
+func cloneMapAny(source map[string]any) map[string]any {
+ if source == nil {
+ return nil
+ }
+ out := make(map[string]any, len(source))
+ for key, value := range source {
+ out[key] = value
+ }
+ return out
+}
+
+// NormalizeChatCompletionResult 将供应商自定义推理字段归一化到
+// message.reasoning_content,并从最终回答 content 中剥离内联推理块。
+// 加密推理载荷不可展示,且不应作为正文输出,因此会被忽略。
+func NormalizeChatCompletionResult(result map[string]any) map[string]any {
+ if result == nil {
+ return nil
+ }
+ choices, _ := result["choices"].([]any)
+ for _, rawChoice := range choices {
+ choice, _ := rawChoice.(map[string]any)
+ if message, ok := choice["message"].(map[string]any); ok {
+ normalizeToolCallsContainer(message, false)
+ normalizeReasoningContainer(message, false)
+ }
+ if delta, ok := choice["delta"].(map[string]any); ok {
+ normalizeToolCallsContainer(delta, true)
+ normalizeReasoningContainer(delta, true)
+ }
+ }
+ return result
+}
+
+// NormalizeChatCompletionStreamEvent 将供应商自定义流式推理字段
+// (例如 reasoning_details 或 reasoning)归一化到 delta.reasoning_content。
+func NormalizeChatCompletionStreamEvent(event map[string]any) map[string]any {
+ if event == nil {
+ return nil
+ }
+ choices, _ := event["choices"].([]any)
+ for _, rawChoice := range choices {
+ choice, _ := rawChoice.(map[string]any)
+ if delta, ok := choice["delta"].(map[string]any); ok {
+ normalizeToolCallsContainer(delta, true)
+ normalizeReasoningContainer(delta, true)
+ }
+ if message, ok := choice["message"].(map[string]any); ok {
+ normalizeToolCallsContainer(message, false)
+ normalizeReasoningContainer(message, false)
+ }
+ }
+ return event
+}
+
+func normalizeToolCallsContainer(container map[string]any, stream bool) {
+ if container == nil {
+ return
+ }
+ toolCalls := make([]any, 0)
+ for _, rawToolCall := range rawToolCallValues(container) {
+ for _, normalized := range normalizeRawToolCalls(rawToolCall, len(toolCalls), stream) {
+ toolCalls = append(toolCalls, normalized)
+ }
+ }
+ if contentToolCalls, cleanContent, changed := toolCallsFromContent(container["content"], len(toolCalls), stream); changed {
+ toolCalls = append(toolCalls, contentToolCalls...)
+ setNormalizedContent(container, cleanContent, stream)
+ }
+ if partToolCalls := toolCallsFromParts(container["parts"], len(toolCalls), stream); len(partToolCalls) > 0 {
+ toolCalls = append(toolCalls, partToolCalls...)
+ delete(container, "parts")
+ }
+ if len(toolCalls) > 0 {
+ container["tool_calls"] = toolCalls
+ }
+ for _, key := range []string{"tool_call", "toolCall", "toolCalls", "function_call", "functionCall"} {
+ delete(container, key)
+ }
+}
+
+func normalizeToolMessageFields(message map[string]any) {
+ if message == nil {
+ return
+ }
+ if id := firstNonEmptyString(message["tool_call_id"], message["toolCallId"], message["tool_use_id"], message["toolUseId"], message["call_id"], message["callId"]); id != "" {
+ message["tool_call_id"] = id
+ }
+ for _, key := range []string{"toolCallId", "tool_use_id", "toolUseId", "call_id", "callId"} {
+ delete(message, key)
+ }
+}
+
+func toolResultMessagesFromContent(value any) ([]any, any, bool) {
+ blocks, ok := value.([]any)
+ if !ok {
+ return nil, nil, false
+ }
+ toolMessages := make([]any, 0)
+ remaining := make([]any, 0, len(blocks))
+ for _, rawBlock := range blocks {
+ block, _ := rawBlock.(map[string]any)
+ if len(block) == 0 || stringFromAny(block["type"]) != "tool_result" {
+ remaining = append(remaining, rawBlock)
+ continue
+ }
+ message := map[string]any{
+ "role": "tool",
+ "tool_call_id": firstNonEmptyString(block["tool_call_id"], block["toolCallId"], block["tool_use_id"], block["toolUseId"], block["id"]),
+ "content": toolResultContent(block["content"]),
+ }
+ toolMessages = append(toolMessages, message)
+ }
+ if len(toolMessages) == 0 {
+ return nil, nil, false
+ }
+ return toolMessages, contentBlocksText(remaining), true
+}
+
+func toolResultContent(value any) any {
+ if text, ok := value.(string); ok {
+ return text
+ }
+ return jsonStringFromAny(value)
+}
+
+func contentHasText(value any) bool {
+ switch typed := value.(type) {
+ case string:
+ return strings.TrimSpace(typed) != ""
+ case []any:
+ return len(typed) > 0
+ default:
+ return value != nil
+ }
+}
+
+func rawToolCallValues(container map[string]any) []any {
+ values := make([]any, 0, 6)
+ for _, key := range []string{"tool_calls", "tool_call", "toolCalls", "toolCall", "function_call", "functionCall"} {
+ if value, ok := container[key]; ok {
+ values = append(values, value)
+ }
+ }
+ return values
+}
+
+func normalizeRawToolCalls(value any, startIndex int, stream bool) []any {
+ switch typed := value.(type) {
+ case []any:
+ out := make([]any, 0, len(typed))
+ for _, raw := range typed {
+ if toolCall := normalizeToolCall(raw, startIndex+len(out), stream); toolCall != nil {
+ out = append(out, toolCall)
+ }
+ }
+ return out
+ default:
+ if toolCall := normalizeToolCall(value, startIndex, stream); toolCall != nil {
+ return []any{toolCall}
+ }
+ return nil
+ }
+}
+
+func normalizeToolCall(value any, index int, stream bool) map[string]any {
+ source, _ := value.(map[string]any)
+ if len(source) == 0 {
+ return nil
+ }
+ functionSource := mapFromAny(source["function"])
+ if len(functionSource) == 0 {
+ functionSource = mapFromAny(firstPresent(source["function_call"], source["functionCall"]))
+ }
+ name := firstNonEmptyString(
+ functionSource["name"], source["name"], source["function_name"], source["functionName"], source["tool_name"], source["toolName"],
+ )
+ arguments, hasArguments := toolCallArguments(functionSource)
+ if !hasArguments {
+ arguments, hasArguments = toolCallArguments(source)
+ }
+ if name == "" && !hasArguments && firstNonEmptyString(source["id"], source["call_id"], source["callId"], source["tool_call_id"], source["toolCallId"]) == "" {
+ return nil
+ }
+ function := map[string]any{}
+ if name != "" {
+ function["name"] = name
+ }
+ if hasArguments {
+ function["arguments"] = arguments
+ }
+ toolCall := map[string]any{
+ "type": firstNonEmptyString(source["type"], "function"),
+ "function": function,
+ }
+ if id := firstNonEmptyString(source["id"], source["call_id"], source["callId"], source["tool_call_id"], source["toolCallId"]); id != "" {
+ toolCall["id"] = id
+ } else if !stream {
+ toolCall["id"] = fmt.Sprintf("call_%d", index)
+ }
+ if stream {
+ if rawIndex, ok := firstPresent(source["index"], source["idx"]).(float64); ok {
+ toolCall["index"] = int(math.Round(rawIndex))
+ } else if rawIndex, ok := firstPresent(source["index"], source["idx"]).(int); ok {
+ toolCall["index"] = rawIndex
+ } else {
+ toolCall["index"] = index
+ }
+ }
+ return toolCall
+}
+
+func toolCallsFromContent(value any, startIndex int, stream bool) ([]any, any, bool) {
+ blocks, ok := value.([]any)
+ if !ok {
+ return nil, nil, false
+ }
+ toolCalls := make([]any, 0)
+ remaining := make([]any, 0, len(blocks))
+ containsReasoning := false
+ for _, rawBlock := range blocks {
+ block, _ := rawBlock.(map[string]any)
+ if len(block) == 0 {
+ remaining = append(remaining, rawBlock)
+ continue
+ }
+ switch stringFromAny(block["type"]) {
+ case "tool_use":
+ if toolCall := normalizeToolUseBlock(block, startIndex+len(toolCalls), stream); toolCall != nil {
+ toolCalls = append(toolCalls, toolCall)
+ }
+ case "tool_result":
+ remaining = append(remaining, rawBlock)
+ default:
+ if isReasoningContentBlock(block) {
+ containsReasoning = true
+ }
+ remaining = append(remaining, rawBlock)
+ }
+ }
+ if len(toolCalls) == 0 {
+ return nil, nil, false
+ }
+ if containsReasoning {
+ return toolCalls, remaining, true
+ }
+ return toolCalls, contentBlocksText(remaining), true
+}
+
+func normalizeToolUseBlock(block map[string]any, index int, stream bool) map[string]any {
+ toolCall := map[string]any{
+ "type": "function",
+ "function": map[string]any{
+ "name": stringFromAny(block["name"]),
+ "arguments": jsonStringFromAny(block["input"]),
+ },
+ }
+ if id := firstNonEmptyString(block["id"], block["tool_use_id"], block["toolUseId"]); id != "" {
+ toolCall["id"] = id
+ } else if !stream {
+ toolCall["id"] = fmt.Sprintf("call_%d", index)
+ }
+ if stream {
+ toolCall["index"] = index
+ }
+ return toolCall
+}
+
+func toolCallsFromParts(value any, startIndex int, stream bool) []any {
+ parts, ok := value.([]any)
+ if !ok {
+ return nil
+ }
+ out := make([]any, 0)
+ for _, rawPart := range parts {
+ part, _ := rawPart.(map[string]any)
+ if functionCall := mapFromAny(firstPresent(part["functionCall"], part["function_call"])); len(functionCall) > 0 {
+ if toolCall := normalizeGeminiFunctionCall(functionCall, startIndex+len(out), stream); toolCall != nil {
+ out = append(out, toolCall)
+ }
+ }
+ }
+ return out
+}
+
+func normalizeGeminiFunctionCall(functionCall map[string]any, index int, stream bool) map[string]any {
+ toolCall := map[string]any{
+ "type": "function",
+ "function": map[string]any{
+ "name": stringFromAny(functionCall["name"]),
+ "arguments": jsonStringFromAny(firstPresent(functionCall["args"], functionCall["arguments"])),
+ },
+ }
+ if id := firstNonEmptyString(functionCall["id"], functionCall["call_id"], functionCall["callId"]); id != "" {
+ toolCall["id"] = id
+ } else if !stream {
+ toolCall["id"] = fmt.Sprintf("call_%d", index)
+ }
+ if stream {
+ toolCall["index"] = index
+ }
+ return toolCall
+}
+
+func setNormalizedContent(container map[string]any, value any, stream bool) {
+ if text, ok := value.(string); ok && text == "" {
+ if stream {
+ delete(container, "content")
+ return
+ }
+ container["content"] = nil
+ return
+ }
+ container["content"] = value
+}
+
+func isReasoningContentBlock(block map[string]any) bool {
+ switch stringFromAny(block["type"]) {
+ case "thinking", "redacted_thinking", "reasoning.text", "reasoning.summary", "reasoning.encrypted":
+ return true
+ default:
+ return false
+ }
+}
+
+func contentBlocksText(blocks []any) string {
+ parts := make([]string, 0, len(blocks))
+ for _, rawBlock := range blocks {
+ switch block := rawBlock.(type) {
+ case string:
+ parts = append(parts, block)
+ case map[string]any:
+ if text := stringFromAny(firstPresent(block["text"], block["content"])); text != "" {
+ parts = append(parts, text)
+ }
+ }
+ }
+ return strings.Join(parts, "")
+}
+
+func toolCallArguments(source map[string]any) (string, bool) {
+ for _, key := range []string{"arguments", "args", "input", "parameters"} {
+ if value, ok := source[key]; ok {
+ return jsonStringFromAny(value), true
+ }
+ }
+ return "", false
+}
+
+func jsonStringFromAny(value any) string {
+ if value == nil {
+ return ""
+ }
+ if text, ok := value.(string); ok {
+ return text
+ }
+ encoded, err := json.Marshal(value)
+ if err != nil {
+ return ""
+ }
+ return string(encoded)
+}
+
+func normalizeReasoningContainer(container map[string]any, deleteEmptyContent bool) {
+ if container == nil {
+ return
+ }
+ reasoningParts := make([]string, 0, 3)
+ if reasoning := reasoningDetailsText(container["reasoning_details"]); reasoning != "" {
+ reasoningParts = append(reasoningParts, reasoning)
+ } else if reasoning := stringFromAny(container["reasoning_content"]); reasoning != "" {
+ reasoningParts = append(reasoningParts, reasoning)
+ } else if reasoning := stringFromAny(container["reasoning"]); reasoning != "" {
+ reasoningParts = append(reasoningParts, reasoning)
+ }
+ if content, ok := container["content"]; ok {
+ cleanContent, contentReasoning, changed := normalizeReasoningContentValue(content)
+ if changed {
+ if deleteEmptyContent {
+ if text, ok := cleanContent.(string); ok && text == "" {
+ delete(container, "content")
+ } else {
+ container["content"] = cleanContent
+ }
+ } else {
+ container["content"] = cleanContent
+ }
+ }
+ if contentReasoning != "" {
+ reasoningParts = append(reasoningParts, contentReasoning)
+ }
+ }
+ if len(reasoningParts) > 0 {
+ container["reasoning_content"] = strings.Join(reasoningParts, "")
+ }
+ delete(container, "reasoning_details")
+ delete(container, "reasoning")
+}
+
+func reasoningDetailsText(value any) string {
+ rawItems, ok := value.([]any)
+ if !ok {
+ return ""
+ }
+ parts := make([]string, 0, len(rawItems))
+ for _, rawItem := range rawItems {
+ item, _ := rawItem.(map[string]any)
+ switch stringFromAny(item["type"]) {
+ case "reasoning.text":
+ if text := stringFromAny(item["text"]); text != "" {
+ parts = append(parts, text)
+ }
+ case "reasoning.summary":
+ if summary := stringFromAny(item["summary"]); summary != "" {
+ parts = append(parts, summary)
+ }
+ }
+ }
+ return strings.Join(parts, "")
+}
+
+func normalizeReasoningContentValue(value any) (any, string, bool) {
+ switch typed := value.(type) {
+ case string:
+ cleanContent, reasoning, changed := splitTaggedReasoningText(typed)
+ return cleanContent, reasoning, changed
+ case []any:
+ contentParts := make([]string, 0, len(typed))
+ reasoningParts := make([]string, 0)
+ changed := false
+ for _, rawItem := range typed {
+ switch item := rawItem.(type) {
+ case string:
+ contentParts = append(contentParts, item)
+ case map[string]any:
+ switch stringFromAny(item["type"]) {
+ case "thinking":
+ if thinking := stringFromAny(item["thinking"]); thinking != "" {
+ reasoningParts = append(reasoningParts, thinking)
+ }
+ changed = true
+ case "redacted_thinking", "reasoning.encrypted":
+ changed = true
+ case "reasoning.text":
+ if text := stringFromAny(item["text"]); text != "" {
+ reasoningParts = append(reasoningParts, text)
+ }
+ changed = true
+ case "reasoning.summary":
+ if summary := stringFromAny(item["summary"]); summary != "" {
+ reasoningParts = append(reasoningParts, summary)
+ }
+ changed = true
+ case "text", "output_text":
+ if text := stringFromAny(firstPresent(item["text"], item["content"])); text != "" {
+ cleanText, reasoning, tagged := splitTaggedReasoningText(text)
+ contentParts = append(contentParts, cleanText)
+ if reasoning != "" {
+ reasoningParts = append(reasoningParts, reasoning)
+ }
+ changed = changed || tagged
+ }
+ default:
+ if text := stringFromAny(firstPresent(item["text"], item["content"])); text != "" {
+ contentParts = append(contentParts, text)
+ }
+ }
+ }
+ }
+ if !changed {
+ return value, "", false
+ }
+ return strings.Join(contentParts, ""), strings.Join(reasoningParts, ""), true
+ default:
+ return value, "", false
+ }
+}
+
+func splitTaggedReasoningText(text string) (string, string, bool) {
+ lower := strings.ToLower(text)
+ clean := strings.Builder{}
+ reasoning := strings.Builder{}
+ changed := false
+ for offset := 0; offset < len(text); {
+ start, tag := nextReasoningOpenTag(lower, offset)
+ if start < 0 {
+ clean.WriteString(text[offset:])
+ break
+ }
+ clean.WriteString(text[offset:start])
+ openEnd := start + len("<"+tag+">")
+ closeToken := "" + tag + ">"
+ closeStart := strings.Index(lower[openEnd:], closeToken)
+ if closeStart < 0 {
+ reasoning.WriteString(text[openEnd:])
+ offset = len(text)
+ changed = true
+ break
+ }
+ closeStart += openEnd
+ reasoning.WriteString(text[openEnd:closeStart])
+ offset = closeStart + len(closeToken)
+ changed = true
+ }
+ return clean.String(), reasoning.String(), changed
+}
+
+func nextReasoningOpenTag(lower string, offset int) (int, string) {
+ bestStart := -1
+ bestTag := ""
+ for _, tag := range []string{"think", "reasoning", "analysis"} {
+ needle := "<" + tag + ">"
+ idx := strings.Index(lower[offset:], needle)
+ if idx < 0 {
+ continue
+ }
+ absolute := offset + idx
+ if bestStart < 0 || absolute < bestStart {
+ bestStart = absolute
+ bestTag = tag
+ }
+ }
+ return bestStart, bestTag
+}
+
func streamEventText(event map[string]any) string {
if choices, ok := event["choices"].([]any); ok {
for _, rawChoice := range choices {
@@ -195,6 +784,91 @@ func streamEventText(event map[string]any) string {
return ""
}
+func streamEventReasoningContent(event map[string]any) string {
+ if choices, ok := event["choices"].([]any); ok {
+ for _, rawChoice := range choices {
+ choice, _ := rawChoice.(map[string]any)
+ if delta, ok := choice["delta"].(map[string]any); ok {
+ if content, ok := delta["reasoning_content"].(string); ok {
+ return content
+ }
+ if content, ok := delta["reasoning"].(string); ok {
+ return content
+ }
+ }
+ if message, ok := choice["message"].(map[string]any); ok {
+ if content, ok := message["reasoning_content"].(string); ok {
+ return content
+ }
+ }
+ }
+ }
+ return ""
+}
+
+func streamEventFinishReason(event map[string]any) string {
+ if choices, ok := event["choices"].([]any); ok {
+ for _, rawChoice := range choices {
+ choice, _ := rawChoice.(map[string]any)
+ if reason, ok := choice["finish_reason"].(string); ok && reason != "" {
+ return reason
+ }
+ }
+ }
+ return ""
+}
+
+func aggregateStreamToolCalls(event map[string]any, toolCalls map[int]map[string]any) {
+ choices, _ := event["choices"].([]any)
+ for _, rawChoice := range choices {
+ choice, _ := rawChoice.(map[string]any)
+ delta, _ := choice["delta"].(map[string]any)
+ rawToolCalls, _ := delta["tool_calls"].([]any)
+ for _, rawToolCall := range rawToolCalls {
+ incoming, _ := rawToolCall.(map[string]any)
+ index := intFromAny(incoming["index"])
+ current := toolCalls[index]
+ if current == nil {
+ current = map[string]any{}
+ toolCalls[index] = current
+ }
+ for _, key := range []string{"id", "type"} {
+ if value, ok := incoming[key].(string); ok && value != "" {
+ current[key] = value
+ }
+ }
+ incomingFn, _ := incoming["function"].(map[string]any)
+ if len(incomingFn) == 0 {
+ continue
+ }
+ currentFn, _ := current["function"].(map[string]any)
+ if currentFn == nil {
+ currentFn = map[string]any{}
+ current["function"] = currentFn
+ }
+ if name, ok := incomingFn["name"].(string); ok && name != "" {
+ currentFn["name"] = stringFromAny(currentFn["name"]) + name
+ }
+ if arguments, ok := incomingFn["arguments"].(string); ok && arguments != "" {
+ currentFn["arguments"] = stringFromAny(currentFn["arguments"]) + arguments
+ }
+ }
+ }
+}
+
+func sortedStreamToolCalls(toolCalls map[int]map[string]any) []any {
+ indices := make([]int, 0, len(toolCalls))
+ for index := range toolCalls {
+ indices = append(indices, index)
+ }
+ sort.Ints(indices)
+ out := make([]any, 0, len(indices))
+ for _, index := range indices {
+ out = append(out, toolCalls[index])
+ }
+ return out
+}
+
func usageFromOpenAI(result map[string]any) Usage {
usage, _ := result["usage"].(map[string]any)
input := intFromAny(firstPresent(usage["prompt_tokens"], usage["input_tokens"]))
@@ -254,6 +928,15 @@ func stringFromAny(value any) string {
return ""
}
+func firstNonEmptyString(values ...any) string {
+ for _, value := range values {
+ if text := strings.TrimSpace(stringFromAny(value)); text != "" {
+ return text
+ }
+ }
+ return ""
+}
+
func firstPresent(values ...any) any {
for _, value := range values {
if value != nil {
diff --git a/apps/api/internal/clients/openai.go b/apps/api/internal/clients/openai.go
index 8a579e1..12ef168 100644
--- a/apps/api/internal/clients/openai.go
+++ b/apps/api/internal/clients/openai.go
@@ -23,6 +23,9 @@ func (c OpenAIClient) Run(ctx context.Context, request Request) (Response, error
return Response{}, &ClientError{Code: "unsupported_kind", Message: "unsupported openai request kind", Retryable: false}
}
body := cloneBody(request.Body)
+ if request.Kind == "chat.completions" {
+ body = NormalizeChatCompletionRequestBody(body)
+ }
body["model"] = upstreamModelName(request.Candidate)
stream := request.Stream || boolValue(body, "stream")
ensureOpenAIStreamUsage(body, request.Kind, stream)
@@ -40,6 +43,9 @@ func (c OpenAIClient) Run(ctx context.Context, request Request) (Response, error
}
requestID := requestIDFromHTTPResponse(resp)
result, err := decodeOpenAIResponse(resp, stream, request.StreamDelta)
+ if err == nil && request.Kind == "chat.completions" {
+ result = NormalizeChatCompletionResult(result)
+ }
responseFinishedAt := time.Now()
if err != nil {
return Response{}, annotateResponseError(err, requestID, responseStartedAt, responseFinishedAt)
diff --git a/apps/api/internal/clients/types.go b/apps/api/internal/clients/types.go
index 8d88793..8e46d8c 100644
--- a/apps/api/internal/clients/types.go
+++ b/apps/api/internal/clients/types.go
@@ -48,7 +48,13 @@ type Progress struct {
Payload map[string]any
}
-type StreamDelta func(text string) error
+type StreamDeltaEvent struct {
+ Text string
+ ReasoningContent string
+ Event map[string]any
+}
+
+type StreamDelta func(event StreamDeltaEvent) error
type Client interface {
Run(ctx context.Context, request Request) (Response, error)
diff --git a/apps/api/internal/httpapi/chat_completions_mode_test.go b/apps/api/internal/httpapi/chat_completions_mode_test.go
index 599046c..d16b9f4 100644
--- a/apps/api/internal/httpapi/chat_completions_mode_test.go
+++ b/apps/api/internal/httpapi/chat_completions_mode_test.go
@@ -50,7 +50,7 @@ func TestWriteCompatibleTaskResponseReturnsJSONWhenStreamIsFalse(t *testing.T) {
req := httptest.NewRequest(http.MethodPost, "/api/v1/chat/completions", nil)
recorder := httptest.NewRecorder()
- writeCompatibleTaskResponse(context.Background(), recorder, req, executor, "chat.completions", "gpt-test", store.GatewayTask{ID: "task-test"}, &auth.User{}, false)
+ writeCompatibleTaskResponse(context.Background(), recorder, req, executor, "chat.completions", "gpt-test", store.GatewayTask{ID: "task-test"}, &auth.User{}, false, false)
if recorder.Code != http.StatusOK {
t.Fatalf("status=%d want=%d body=%s", recorder.Code, http.StatusOK, recorder.Body.String())
@@ -69,13 +69,13 @@ func TestWriteCompatibleTaskResponseReturnsJSONWhenStreamIsFalse(t *testing.T) {
func TestWriteCompatibleTaskResponseReturnsSSEWhenStreamIsTrue(t *testing.T) {
executor := &fakeTaskExecutor{
- deltas: []string{"hel", "lo"},
- output: map[string]any{"id": "chatcmpl-test", "object": "chat.completion"},
+ deltas: []clients.StreamDeltaEvent{{Text: "hel"}, {Text: "lo"}},
+ output: map[string]any{"id": "chatcmpl-test", "object": "chat.completion", "usage": map[string]any{"prompt_tokens": 1, "completion_tokens": 2, "total_tokens": 3}},
}
req := httptest.NewRequest(http.MethodPost, "/api/v1/chat/completions", nil)
recorder := httptest.NewRecorder()
- writeCompatibleTaskResponse(context.Background(), recorder, req, executor, "chat.completions", "gpt-test", store.GatewayTask{ID: "task-test"}, &auth.User{}, true)
+ writeCompatibleTaskResponse(context.Background(), recorder, req, executor, "chat.completions", "gpt-test", store.GatewayTask{ID: "task-test"}, &auth.User{}, true, true)
if executor.executeCalls != 0 || executor.streamCalls != 1 {
t.Fatalf("expected stream execute only, got execute=%d stream=%d", executor.executeCalls, executor.streamCalls)
@@ -84,17 +84,53 @@ func TestWriteCompatibleTaskResponseReturnsSSEWhenStreamIsTrue(t *testing.T) {
t.Fatalf("Content-Type=%q want text/event-stream", contentType)
}
body := recorder.Body.String()
- for _, want := range []string{"event: message", `"content":"hel"`, `"content":"lo"`, `"finish_reason":"stop"`} {
+ for _, want := range []string{`data: {`, `"role":"assistant"`, `"created":`, `"system_fingerprint":`, `"content":"hel"`, `"content":"lo"`, `"finish_reason":"stop"`, `"usage":{"completion_tokens":2,"prompt_tokens":1,"total_tokens":3}`, "data: [DONE]"} {
if !strings.Contains(body, want) {
t.Fatalf("SSE body missing %s: %s", want, body)
}
}
+ if strings.Contains(body, "event: message") {
+ t.Fatalf("chat completions stream should use OpenAI data-only SSE frames: %s", body)
+ }
+}
+
+func TestWriteCompatibleTaskResponseStreamsStructuredToolAndReasoningDeltas(t *testing.T) {
+ executor := &fakeTaskExecutor{
+ deltas: []clients.StreamDeltaEvent{
+ {Event: map[string]any{"id": "chatcmpl-upstream", "object": "chat.completion.chunk", "created": float64(1710000000), "model": "deepseek-v4", "system_fingerprint": "fp-test", "choices": []any{map[string]any{"index": float64(0), "delta": map[string]any{"reasoning_details": []any{map[string]any{"type": "reasoning.text", "text": "detail-"}, map[string]any{"type": "reasoning.summary", "summary": "summary"}, map[string]any{"type": "reasoning.encrypted", "data": "secret"}}}, "finish_reason": nil}}}},
+ {Event: map[string]any{"id": "chatcmpl-upstream", "object": "chat.completion.chunk", "created": float64(1710000000), "model": "deepseek-v4", "system_fingerprint": "fp-test", "choices": []any{map[string]any{"index": float64(0), "delta": map[string]any{"content": "taggedanswer"}, "finish_reason": nil}}}},
+ {Event: map[string]any{"id": "chatcmpl-upstream", "object": "chat.completion.chunk", "created": float64(1710000000), "model": "deepseek-v4", "system_fingerprint": "fp-test", "choices": []any{map[string]any{"index": float64(0), "delta": map[string]any{"functionCall": map[string]any{"name": "legacy_lookup", "arguments": "{\"city\":\"Boston\"}"}}, "finish_reason": nil}}}},
+ {Event: map[string]any{"id": "chatcmpl-upstream", "object": "chat.completion.chunk", "created": float64(1710000000), "model": "deepseek-v4", "system_fingerprint": "fp-test", "choices": []any{map[string]any{"index": float64(0), "delta": map[string]any{"tool_calls": []any{map[string]any{"index": float64(0), "id": "call_1", "type": "function", "function": map[string]any{"name": "lookup", "arguments": "{\"q\":"}}}}, "finish_reason": nil}}}},
+ {Event: map[string]any{"id": "chatcmpl-upstream", "object": "chat.completion.chunk", "created": float64(1710000000), "model": "deepseek-v4", "system_fingerprint": "fp-test", "choices": []any{map[string]any{"index": float64(0), "delta": map[string]any{"tool_calls": []any{map[string]any{"index": float64(0), "function": map[string]any{"arguments": "\"weather\"}"}}}}, "finish_reason": "tool_calls"}}}},
+ {Event: map[string]any{"id": "chatcmpl-upstream", "object": "chat.completion.chunk", "created": float64(1710000000), "model": "deepseek-v4", "choices": []any{}, "usage": map[string]any{"prompt_tokens": float64(4), "completion_tokens": float64(5), "total_tokens": float64(9)}}},
+ },
+ output: map[string]any{"id": "chatcmpl-upstream", "object": "chat.completion", "model": "deepseek-v4"},
+ }
+ req := httptest.NewRequest(http.MethodPost, "/api/v1/chat/completions", nil)
+ recorder := httptest.NewRecorder()
+
+ writeCompatibleTaskResponse(context.Background(), recorder, req, executor, "chat.completions", "gpt-test", store.GatewayTask{ID: "task-test"}, &auth.User{}, true, true)
+
+ body := recorder.Body.String()
+ roleIndex := strings.Index(body, `"role":"assistant"`)
+ reasoningIndex := strings.Index(body, `"reasoning_content":"detail-summary"`)
+ if roleIndex < 0 || reasoningIndex < 0 || roleIndex > reasoningIndex {
+ t.Fatalf("assistant role should be emitted before structured deltas: %s", body)
+ }
+ for _, want := range []string{`"system_fingerprint":"fp-test"`, `"created":1710000000`, `"reasoning_content":"tagged"`, `"content":"answer"`, `"tool_calls":[{"function":{"arguments":"{\"city\":\"Boston\"}","name":"legacy_lookup"}`, `"tool_calls":[{"function":{"arguments":"{\"q\":"`, `"finish_reason":"tool_calls"`, `"choices":[],"created":1710000000`, `"usage":{"completion_tokens":5,"prompt_tokens":4,"total_tokens":9}`, "data: [DONE]"} {
+ if !strings.Contains(body, want) {
+ t.Fatalf("SSE body missing %s: %s", want, body)
+ }
+ }
+ if strings.Contains(body, "reasoning_details") || strings.Contains(body, "") || strings.Contains(body, "functionCall") {
+ t.Fatalf("provider-specific reasoning/tool fields should be converted away: %s", body)
+ }
}
type fakeTaskExecutor struct {
executeCalls int
streamCalls int
- deltas []string
+ deltas []clients.StreamDeltaEvent
output map[string]any
}
diff --git a/apps/api/internal/httpapi/handlers.go b/apps/api/internal/httpapi/handlers.go
index 5b7e7d0..8f97b94 100644
--- a/apps/api/internal/httpapi/handlers.go
+++ b/apps/api/internal/httpapi/handlers.go
@@ -934,7 +934,7 @@ func (s *Server) createTask(kind string, compatible bool) http.Handler {
runCtx, cancelRun := s.requestExecutionContext(r)
defer cancelRun()
if responsePlan.compatibleMode {
- writeCompatibleTaskResponse(runCtx, w, r, s.runner, kind, model, task, user, responsePlan.streamMode)
+ writeCompatibleTaskResponse(runCtx, w, r, s.runner, kind, model, task, user, responsePlan.streamMode, streamIncludeUsage(body))
return
}
result, runErr := s.runner.Execute(runCtx, task, user)
@@ -1002,14 +1002,15 @@ type taskExecutor interface {
ExecuteStream(context.Context, store.GatewayTask, *auth.User, clients.StreamDelta) (runner.Result, error)
}
-func writeCompatibleTaskResponse(runCtx context.Context, w http.ResponseWriter, r *http.Request, executor taskExecutor, kind string, model string, task store.GatewayTask, user *auth.User, streamMode bool) {
+func writeCompatibleTaskResponse(runCtx context.Context, w http.ResponseWriter, r *http.Request, executor taskExecutor, kind string, model string, task store.GatewayTask, user *auth.User, streamMode bool, includeUsage bool) {
if streamMode {
flusher := prepareCompatibleStream(w)
- result, runErr := executor.ExecuteStream(runCtx, task, user, func(delta string) error {
+ streamWriter := newCompatibleStreamWriter(kind, model, includeUsage)
+ result, runErr := executor.ExecuteStream(runCtx, task, user, func(delta clients.StreamDeltaEvent) error {
if !requestStillConnected(r) {
return nil
}
- writeCompatibleDelta(w, kind, model, delta)
+ streamWriter.writeDelta(w, delta)
if flusher != nil {
flusher.Flush()
}
@@ -1043,7 +1044,7 @@ func writeCompatibleTaskResponse(runCtx context.Context, w http.ResponseWriter,
if !requestStillConnected(r) {
return
}
- writeCompatibleDone(w, kind, model, result.Output)
+ streamWriter.writeDone(w, result.Output)
if flusher != nil {
flusher.Flush()
}
@@ -1064,6 +1065,12 @@ func writeCompatibleTaskResponse(runCtx context.Context, w http.ResponseWriter,
writeJSON(w, http.StatusOK, result.Output)
}
+func streamIncludeUsage(body map[string]any) bool {
+ streamOptions, _ := body["stream_options"].(map[string]any)
+ includeUsage, _ := streamOptions["include_usage"].(bool)
+ return includeUsage
+}
+
func asyncRequest(r *http.Request) bool {
value := strings.TrimSpace(strings.ToLower(r.Header.Get("x-async")))
return value == "1" || value == "true" || value == "yes" || value == "on"
diff --git a/apps/api/internal/httpapi/openapi_models.go b/apps/api/internal/httpapi/openapi_models.go
index f80c30b..9871156 100644
--- a/apps/api/internal/httpapi/openapi_models.go
+++ b/apps/api/internal/httpapi/openapi_models.go
@@ -172,16 +172,18 @@ type PricingEstimateResponse struct {
}
type TaskRequest struct {
- Model string `json:"model" example:"gpt-4o-mini"`
- Messages []ChatMessage `json:"messages,omitempty"`
- Input string `json:"input,omitempty" example:"Tell me a short story"`
- Prompt string `json:"prompt,omitempty" example:"A watercolor robot reading a book"`
- Stream bool `json:"stream,omitempty" example:"false"`
- RunMode string `json:"runMode,omitempty" example:"simulation"`
- MaxTokens int `json:"max_tokens,omitempty" example:"512"`
- Size string `json:"size,omitempty" example:"1024x1024"`
- Duration int `json:"duration,omitempty" example:"5"`
- Resolution string `json:"resolution,omitempty" example:"720p"`
+ Model string `json:"model" example:"gpt-4o-mini"`
+ Messages []ChatMessage `json:"messages,omitempty"`
+ Input string `json:"input,omitempty" example:"Tell me a short story"`
+ Prompt string `json:"prompt,omitempty" example:"A watercolor robot reading a book"`
+ Stream bool `json:"stream,omitempty" example:"false"`
+ RunMode string `json:"runMode,omitempty" example:"simulation"`
+ MaxTokens int `json:"max_tokens,omitempty" example:"512"`
+ // ReasoningEffort 推理深度,OpenAI-compatible 请求字段;开放字符串,取值随 provider 和模型能力而定,常见值为 none、minimal、low、medium、high、xhigh,也可配置 max 等供应商自定义值。
+ ReasoningEffort string `json:"reasoning_effort,omitempty" example:"medium"`
+ Size string `json:"size,omitempty" example:"1024x1024"`
+ Duration int `json:"duration,omitempty" example:"5"`
+ Resolution string `json:"resolution,omitempty" example:"720p"`
}
type ChatCompletionRequest struct {
@@ -189,8 +191,10 @@ type ChatCompletionRequest struct {
Messages []ChatMessage `json:"messages"`
Temperature float64 `json:"temperature,omitempty" example:"0.7"`
MaxTokens int `json:"max_tokens,omitempty" example:"512"`
- Stream bool `json:"stream,omitempty" example:"false"`
- RunMode string `json:"runMode,omitempty" example:"simulation"`
+ // ReasoningEffort 推理深度,OpenAI-compatible 请求字段;开放字符串,取值随 provider 和模型能力而定,常见值为 none、minimal、low、medium、high、xhigh,也可配置 max 等供应商自定义值。
+ ReasoningEffort string `json:"reasoning_effort,omitempty" example:"medium"`
+ Stream bool `json:"stream,omitempty" example:"false"`
+ RunMode string `json:"runMode,omitempty" example:"simulation"`
}
type ChatMessage struct {
diff --git a/apps/api/internal/httpapi/streaming.go b/apps/api/internal/httpapi/streaming.go
index 310221e..be58ef3 100644
--- a/apps/api/internal/httpapi/streaming.go
+++ b/apps/api/internal/httpapi/streaming.go
@@ -1,6 +1,13 @@
package httpapi
-import "net/http"
+import (
+ "encoding/json"
+ "fmt"
+ "net/http"
+ "time"
+
+ "github.com/easyai/easyai-ai-gateway/apps/api/internal/clients"
+)
func prepareCompatibleStream(w http.ResponseWriter) http.Flusher {
w.Header().Set("Content-Type", "text/event-stream")
@@ -10,55 +17,180 @@ func prepareCompatibleStream(w http.ResponseWriter) http.Flusher {
return flusher
}
-func writeCompatibleDelta(w http.ResponseWriter, kind string, model string, content string) {
- if kind == "responses" {
- sendSSE(w, "response.output_text.delta", map[string]any{"type": "response.output_text.delta", "delta": content})
- return
- }
- sendSSE(w, "message", map[string]any{
- "id": "chatcmpl-stream",
- "object": "chat.completion.chunk",
- "model": model,
- "choices": []any{map[string]any{"index": 0, "delta": map[string]any{"content": content}, "finish_reason": nil}},
- })
+type compatibleStreamWriter struct {
+ kind string
+ model string
+ includeUsage bool
+
+ id string
+ created int64
+ systemFingerprint any
+ sentRole bool
+ sentFinish bool
+ sentUsage bool
}
-func writeCompatibleDone(w http.ResponseWriter, kind string, model string, output map[string]any) {
- if kind == "responses" {
+func newCompatibleStreamWriter(kind string, model string, includeUsage bool) *compatibleStreamWriter {
+ return &compatibleStreamWriter{
+ kind: kind,
+ model: model,
+ includeUsage: includeUsage,
+ id: "chatcmpl-stream",
+ created: time.Now().Unix(),
+ }
+}
+
+func (s *compatibleStreamWriter) writeDelta(w http.ResponseWriter, event clients.StreamDeltaEvent) {
+ if s.kind == "responses" {
+ if event.Text != "" {
+ sendSSE(w, "response.output_text.delta", map[string]any{"type": "response.output_text.delta", "delta": event.Text})
+ }
+ return
+ }
+
+ if event.Event != nil && isChatCompletionChunk(event.Event) {
+ s.writeChatChunk(w, event.Event)
+ return
+ }
+
+ if event.Text == "" && event.ReasoningContent == "" {
+ return
+ }
+ s.ensureRoleChunk(w)
+ if event.ReasoningContent != "" {
+ s.writeChatData(w, s.chatChunk([]any{map[string]any{"index": 0, "delta": map[string]any{"reasoning_content": event.ReasoningContent}, "finish_reason": nil}}, nil))
+ }
+ if event.Text != "" {
+ s.writeChatData(w, s.chatChunk([]any{map[string]any{"index": 0, "delta": map[string]any{"content": event.Text}, "finish_reason": nil}}, nil))
+ }
+}
+
+func (s *compatibleStreamWriter) writeDone(w http.ResponseWriter, output map[string]any) {
+ if s.kind == "responses" {
sendSSE(w, "response.completed", map[string]any{"type": "response.completed", "response": output})
return
}
- sendSSE(w, "message", map[string]any{
- "id": firstString(output["id"], "chatcmpl-stream"),
- "object": "chat.completion.chunk",
- "model": model,
- "choices": []any{map[string]any{"index": 0, "delta": map[string]any{}, "finish_reason": "stop"}},
- })
+ s.captureOutputMetadata(output)
+ if !s.sentRole {
+ s.ensureRoleChunk(w)
+ }
+ if !s.sentFinish {
+ s.writeChatData(w, s.chatChunk([]any{map[string]any{"index": 0, "delta": map[string]any{}, "finish_reason": finishReasonFromOutput(output)}}, nil))
+ s.sentFinish = true
+ }
+ if s.includeUsage && !s.sentUsage {
+ if usage, ok := output["usage"].(map[string]any); ok && len(usage) > 0 {
+ s.writeChatData(w, s.chatChunk([]any{}, usage))
+ s.sentUsage = true
+ }
+ }
+ s.writeDoneMarker(w)
+}
+
+func (s *compatibleStreamWriter) writeChatChunk(w http.ResponseWriter, chunk map[string]any) {
+ chunk = clients.NormalizeChatCompletionStreamEvent(chunk)
+ s.captureChunkMetadata(chunk)
+ choices, _ := chunk["choices"].([]any)
+ usage, hasUsage := chunk["usage"].(map[string]any)
+ if len(choices) == 0 && hasUsage {
+ if !s.includeUsage {
+ return
+ }
+ s.writeChatData(w, s.chatChunk([]any{}, usage))
+ s.sentUsage = true
+ return
+ }
+ if len(choices) > 0 && !chunkHasRole(choices) && !s.sentRole {
+ s.ensureRoleChunk(w)
+ }
+ if chunkHasRole(choices) {
+ s.sentRole = true
+ }
+ if chunkHasFinishReason(choices) {
+ s.sentFinish = true
+ }
+ normalized := cloneMap(chunk)
+ normalized["id"] = s.id
+ normalized["object"] = "chat.completion.chunk"
+ normalized["created"] = s.created
+ normalized["model"] = firstString(normalized["model"], s.model)
+ normalized["system_fingerprint"] = s.systemFingerprint
+ s.writeChatData(w, normalized)
+}
+
+func (s *compatibleStreamWriter) ensureRoleChunk(w http.ResponseWriter) {
+ if s.sentRole {
+ return
+ }
+ s.writeChatData(w, s.chatChunk([]any{map[string]any{"index": 0, "delta": map[string]any{"role": "assistant"}, "finish_reason": nil}}, nil))
+ s.sentRole = true
+}
+
+func (s *compatibleStreamWriter) chatChunk(choices []any, usage map[string]any) map[string]any {
+ chunk := map[string]any{
+ "id": s.id,
+ "object": "chat.completion.chunk",
+ "created": s.created,
+ "model": s.model,
+ "system_fingerprint": s.systemFingerprint,
+ "choices": choices,
+ }
+ if usage != nil {
+ chunk["usage"] = usage
+ } else {
+ chunk["usage"] = nil
+ }
+ return chunk
+}
+
+func (s *compatibleStreamWriter) writeChatData(w http.ResponseWriter, payload map[string]any) {
+ bytes, _ := json.Marshal(payload)
+ _, _ = fmt.Fprintf(w, "data: %s\n\n", bytes)
+}
+
+func (s *compatibleStreamWriter) writeDoneMarker(w http.ResponseWriter) {
+ _, _ = fmt.Fprint(w, "data: [DONE]\n\n")
+}
+
+func (s *compatibleStreamWriter) captureChunkMetadata(chunk map[string]any) {
+ if id := firstString(chunk["id"], ""); id != "" {
+ s.id = id
+ }
+ if model := firstString(chunk["model"], ""); model != "" {
+ s.model = model
+ }
+ if created := int64FromAny(chunk["created"]); created > 0 {
+ s.created = created
+ }
+ if value, ok := chunk["system_fingerprint"]; ok {
+ s.systemFingerprint = value
+ }
+}
+
+func (s *compatibleStreamWriter) captureOutputMetadata(output map[string]any) {
+ if id := firstString(output["id"], ""); id != "" {
+ s.id = id
+ }
+ if model := firstString(output["model"], ""); model != "" {
+ s.model = model
+ }
+ if created := int64FromAny(output["created"]); created > 0 {
+ s.created = created
+ }
+ if value, ok := output["system_fingerprint"]; ok {
+ s.systemFingerprint = value
+ }
}
func writeCompatibleStream(w http.ResponseWriter, kind string, model string, output map[string]any) {
prepareCompatibleStream(w)
+ writer := newCompatibleStreamWriter(kind, model, true)
content := extractOutputText(output)
if content == "" {
content = "done"
}
- if kind == "responses" {
- sendSSE(w, "response.output_text.delta", map[string]any{"type": "response.output_text.delta", "delta": content})
- sendSSE(w, "response.completed", map[string]any{"type": "response.completed", "response": output})
- return
- }
- sendSSE(w, "message", map[string]any{
- "id": output["id"],
- "object": "chat.completion.chunk",
- "model": model,
- "choices": []any{map[string]any{"index": 0, "delta": map[string]any{"content": content}, "finish_reason": nil}},
- })
- sendSSE(w, "message", map[string]any{
- "id": output["id"],
- "object": "chat.completion.chunk",
- "model": model,
- "choices": []any{map[string]any{"index": 0, "delta": map[string]any{}, "finish_reason": "stop"}},
- })
+ writer.writeDelta(w, clients.StreamDeltaEvent{Text: content})
+ writer.writeDone(w, output)
}
func firstString(value any, fallback string) string {
@@ -68,6 +200,68 @@ func firstString(value any, fallback string) string {
return fallback
}
+func int64FromAny(value any) int64 {
+ switch typed := value.(type) {
+ case int64:
+ return typed
+ case int:
+ return int64(typed)
+ case float64:
+ return int64(typed)
+ default:
+ return 0
+ }
+}
+
+func isChatCompletionChunk(event map[string]any) bool {
+ object, _ := event["object"].(string)
+ if object == "chat.completion.chunk" {
+ return true
+ }
+ _, hasChoices := event["choices"].([]any)
+ return hasChoices
+}
+
+func chunkHasRole(choices []any) bool {
+ for _, rawChoice := range choices {
+ choice, _ := rawChoice.(map[string]any)
+ delta, _ := choice["delta"].(map[string]any)
+ if role, ok := delta["role"].(string); ok && role != "" {
+ return true
+ }
+ }
+ return false
+}
+
+func chunkHasFinishReason(choices []any) bool {
+ for _, rawChoice := range choices {
+ choice, _ := rawChoice.(map[string]any)
+ if reason, ok := choice["finish_reason"].(string); ok && reason != "" {
+ return true
+ }
+ }
+ return false
+}
+
+func finishReasonFromOutput(output map[string]any) string {
+ choices, _ := output["choices"].([]any)
+ for _, rawChoice := range choices {
+ choice, _ := rawChoice.(map[string]any)
+ if reason, ok := choice["finish_reason"].(string); ok && reason != "" {
+ return reason
+ }
+ }
+ return "stop"
+}
+
+func cloneMap(value map[string]any) map[string]any {
+ out := map[string]any{}
+ for key, item := range value {
+ out[key] = item
+ }
+ return out
+}
+
func extractOutputText(output map[string]any) string {
if text, ok := output["output_text"].(string); ok {
return text
diff --git a/apps/web/src/pages/admin/BaseModelCapabilityEditor.tsx b/apps/web/src/pages/admin/BaseModelCapabilityEditor.tsx
index ef619cd..e7c0740 100644
--- a/apps/web/src/pages/admin/BaseModelCapabilityEditor.tsx
+++ b/apps/web/src/pages/admin/BaseModelCapabilityEditor.tsx
@@ -40,14 +40,14 @@ type ValueOption = { label: string; value: string };
const textFields: FieldDefinition[] = [
{ key: 'supportTool', label: '工具调用', hint: 'function calling / tools', type: 'boolean' },
{ key: 'supportStructuredOutput', label: '结构化输出', hint: 'JSON Schema 等输出', type: 'boolean' },
- { key: 'supportThinking', label: '思考能力', hint: '支持 thinking 参数', type: 'boolean' },
+ { key: 'supportThinking', label: '推理能力', hint: '支持 reasoning / thinking 参数', type: 'boolean' },
{ key: 'supportThinkingModeSwitch', label: '思考开关', hint: '可按请求切换', type: 'boolean' },
{ key: 'supportWebSearch', label: '联网搜索', type: 'boolean' },
{ key: 'max_context_tokens', label: '上下文 Token', placeholder: '128000', type: 'number' },
{ key: 'max_input_tokens', label: '最大输入 Token', placeholder: '64000', type: 'number' },
{ key: 'max_output_tokens', label: '最大输出 Token', placeholder: '8192', type: 'number' },
{ key: 'max_thinking_tokens', label: '最大思考 Token', placeholder: '32768', type: 'number' },
- { key: 'thinkingEffortLevels', label: '思考强度', placeholder: 'minimal, low, medium, high', type: 'list' },
+ { key: 'thinkingEffortLevels', label: '推理深度', hint: '声明模型支持的 reasoning_effort 取值,可填写 max 等供应商自定义值', placeholder: 'none, minimal, low, medium, high, xhigh, max', type: 'list' },
];
const embeddingFields: FieldDefinition[] = [
@@ -535,7 +535,7 @@ const imageAspectRatioOptions = [
'7:4',
'4:7',
];
-const thinkingEffortOptions = ['minimal', 'low', 'medium', 'high'];
+const thinkingEffortOptions = ['none', 'minimal', 'low', 'medium', 'high', 'xhigh', 'max'];
const omniVideoModeOptions = ['text_to_video', 'image_reference', 'element_reference', 'first_last_frame', 'video_reference', 'video_edit', 'multi_shot'];
const durationOptionValues = ['1', '2', '3', '4', '5', '6', '8', '10', '15', '20', '25', '30'];
const exclusiveCapabilityFields: Record = {
diff --git a/docs/design.md b/docs/design.md
index 79cc6aa..c29504f 100644
--- a/docs/design.md
+++ b/docs/design.md
@@ -1505,6 +1505,22 @@ type ModelClient interface {
- progress event snapshot:确保前端进度面板兼容。
- billing snapshot:确保预估扣费和最终 billings 语义一致。
+OpenAI-compatible 文本请求中的推理深度统一使用 `reasoning_effort` 表达。该字段是请求参数,不是响应中的推理内容;模型能力中用 `thinkingEffortLevels` 声明该模型支持的可选取值。`reasoning_effort` 必须按开放字符串处理,不在网关层写死枚举;实际可用集合必须以 provider 和模型能力为准。常见取值定义如下:
+
+| 值 | 含义 |
+| --- | --- |
+| `none` | 不启用额外推理,适用于不需要思考链路的低延迟请求。 |
+| `minimal` | 最小推理预算,优先降低延迟和成本。 |
+| `low` | 较低推理预算,用于简单推理任务。 |
+| `medium` | 默认/均衡推理深度,在质量、延迟和成本之间折中。 |
+| `high` | 较高推理预算,用于复杂规划、代码和多步推理。 |
+| `xhigh` | 最高推理预算,仅在模型和 provider 明确支持时使用,通常成本和延迟最高。 |
+| `max` | 供应商自定义最高档示例,例如 DeepSeek V4 类模型可能使用该值;语义以 provider 文档为准。 |
+
+除上表外,`thinkingEffortLevels` 可以保存任意供应商自定义值,例如 `max`、`ultra` 或后续模型新增档位。管理端只提供常见值作为快捷选项,不应阻止自定义输入;请求透传时按模型能力校验或直接交由上游 provider 返回错误。
+
+`reasoning_content`、推理过程 delta 或思考摘要在 Chat Completions 中不是 OpenAI 标准必需字段;如需兼容 DeepSeek、Qwen 等供应商扩展,应在 adapter 层作为可选扩展透传,并避免把 hidden reasoning 默认暴露给普通兼容客户端。
+
## 11. 队列持久化、恢复与限流执行
### 11.1 持久化队列原则