From 13186f8ed165dbeca80861f4ad8d853fb73cc863 Mon Sep 17 00:00:00 2001 From: chensipeng Date: Tue, 19 May 2026 17:46:27 +0800 Subject: [PATCH] =?UTF-8?q?feat(chat):=20=E5=AE=8C=E5=96=84=20Chat=20Compl?= =?UTF-8?q?etions=20=E5=85=BC=E5=AE=B9=E5=B1=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- apps/api/docs/swagger.json | 5 + apps/api/docs/swagger.yaml | 5 + apps/api/internal/clients/clients_test.go | 394 ++++++++++ apps/api/internal/clients/gemini.go | 184 ++++- apps/api/internal/clients/helpers.go | 711 +++++++++++++++++- apps/api/internal/clients/openai.go | 6 + apps/api/internal/clients/types.go | 8 +- .../httpapi/chat_completions_mode_test.go | 48 +- apps/api/internal/httpapi/handlers.go | 17 +- apps/api/internal/httpapi/openapi_models.go | 28 +- apps/api/internal/httpapi/streaming.go | 268 ++++++- .../pages/admin/BaseModelCapabilityEditor.tsx | 6 +- docs/design.md | 16 + 13 files changed, 1611 insertions(+), 85 deletions(-) diff --git a/apps/api/docs/swagger.json b/apps/api/docs/swagger.json index 805b611..f999c21 100644 --- a/apps/api/docs/swagger.json +++ b/apps/api/docs/swagger.json @@ -7281,6 +7281,11 @@ "type": "string", "example": "A watercolor robot reading a book" }, + "reasoning_effort": { + "description": "ReasoningEffort 推理深度,OpenAI-compatible 请求字段;开放字符串,取值随 provider 和模型能力而定,常见值为 none、minimal、low、medium、high、xhigh,也可配置 max 等供应商自定义值。", + "type": "string", + "example": "medium" + }, "resolution": { "type": "string", "example": "720p" diff --git a/apps/api/docs/swagger.yaml b/apps/api/docs/swagger.yaml index 16a7825..a583b79 100644 --- a/apps/api/docs/swagger.yaml +++ b/apps/api/docs/swagger.yaml @@ -587,6 +587,11 @@ definitions: prompt: example: A watercolor robot reading a book type: string + reasoning_effort: + description: ReasoningEffort 推理深度,OpenAI-compatible 请求字段;开放字符串,取值随 provider + 和模型能力而定,常见值为 none、minimal、low、medium、high、xhigh,也可配置 max 等供应商自定义值。 + example: medium + type: string resolution: example: 720p type: string diff --git a/apps/api/internal/clients/clients_test.go b/apps/api/internal/clients/clients_test.go index b94226a..3f3b1fb 100644 --- a/apps/api/internal/clients/clients_test.go +++ b/apps/api/internal/clients/clients_test.go @@ -151,6 +151,187 @@ func TestOpenAIClientChatContract(t *testing.T) { } } +func TestOpenAIClientChatRequestNormalizesToolContext(t *testing.T) { + var captured map[string]any + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if err := json.NewDecoder(r.Body).Decode(&captured); err != nil { + t.Fatalf("decode request: %v", err) + } + _ = json.NewEncoder(w).Encode(map[string]any{ + "id": "chatcmpl-normalized-request", + "object": "chat.completion", + "model": captured["model"], + "choices": []any{map[string]any{ + "message": map[string]any{"role": "assistant", "content": "ok"}, + }}, + }) + })) + defer server.Close() + + _, err := (OpenAIClient{HTTPClient: server.Client()}).Run(context.Background(), Request{ + Kind: "chat.completions", + Model: "openai:gpt-4o-mini", + Body: map[string]any{ + "model": "openai:gpt-4o-mini", + "messages": []any{ + map[string]any{ + "role": "assistant", + "functionCall": map[string]any{ + "name": "lookup", + "arguments": map[string]any{"q": "weather"}, + }, + }, + map[string]any{"role": "tool", "toolCallId": "call_0", "content": "sunny"}, + map[string]any{ + "role": "user", + "content": []any{ + map[string]any{"type": "text", "text": "keep this"}, + map[string]any{"type": "tool_result", "tool_use_id": "toolu_1", "content": map[string]any{"ok": true}}, + }, + }, + }, + }, + Candidate: store.RuntimeModelCandidate{ + BaseURL: server.URL, + ProviderModelName: "openai-compatible-gpt-4o-mini", + Credentials: map[string]any{"apiKey": "test-key"}, + }, + }) + if err != nil { + t.Fatalf("run openai client: %v", err) + } + messages, _ := captured["messages"].([]any) + if len(messages) != 4 { + t.Fatalf("unexpected normalized messages: %+v", messages) + } + assistant, _ := messages[0].(map[string]any) + if _, ok := assistant["functionCall"]; ok { + t.Fatalf("functionCall should be converted away: %+v", assistant) + } + toolCalls, _ := assistant["tool_calls"].([]any) + toolCall, _ := toolCalls[0].(map[string]any) + function, _ := toolCall["function"].(map[string]any) + if function["name"] != "lookup" || function["arguments"] != `{"q":"weather"}` { + t.Fatalf("unexpected normalized tool call: %+v", assistant) + } + toolMessage, _ := messages[1].(map[string]any) + if toolMessage["tool_call_id"] != "call_0" || toolMessage["toolCallId"] != nil { + t.Fatalf("tool message was not normalized: %+v", toolMessage) + } + keptUser, _ := messages[2].(map[string]any) + convertedToolResult, _ := messages[3].(map[string]any) + if keptUser["content"] != "keep this" || convertedToolResult["role"] != "tool" || convertedToolResult["tool_call_id"] != "toolu_1" || convertedToolResult["content"] != `{"ok":true}` { + t.Fatalf("tool_result block was not restored: user=%+v tool=%+v", keptUser, convertedToolResult) + } +} + +func TestOpenAIClientChatResponseNormalizesReasoning(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + _ = json.NewEncoder(w).Encode(map[string]any{ + "id": "chatcmpl-reasoning", + "object": "chat.completion", + "model": "openrouter-test", + "choices": []any{map[string]any{ + "message": map[string]any{ + "role": "assistant", + "reasoning_details": []any{ + map[string]any{"type": "reasoning.text", "text": "detail-"}, + map[string]any{"type": "reasoning.summary", "summary": "summary"}, + map[string]any{"type": "reasoning.encrypted", "data": "secret"}, + }, + "content": "taggedanswer", + }, + }}, + }) + })) + defer server.Close() + + response, err := (OpenAIClient{HTTPClient: server.Client()}).Run(context.Background(), Request{ + Kind: "chat.completions", + Model: "OpenRouter-Test", + Body: map[string]any{"model": "OpenRouter-Test", "messages": []any{map[string]any{"role": "user", "content": "ping"}}}, + Candidate: store.RuntimeModelCandidate{ + BaseURL: server.URL, + ModelName: "openrouter-test", + Credentials: map[string]any{"apiKey": "test-key"}, + }, + }) + if err != nil { + t.Fatalf("run openai client: %v", err) + } + choices, _ := response.Result["choices"].([]any) + choice, _ := choices[0].(map[string]any) + message, _ := choice["message"].(map[string]any) + if message["reasoning_content"] != "detail-summarytagged" || message["content"] != "answer" { + t.Fatalf("reasoning was not normalized: %+v", response.Result) + } + if _, ok := message["reasoning_details"]; ok { + t.Fatalf("reasoning_details should be converted away: %+v", message) + } +} + +func TestOpenAIClientChatResponseNormalizesToolCallFormats(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + _ = json.NewEncoder(w).Encode(map[string]any{ + "id": "chatcmpl-tools", + "object": "chat.completion", + "model": "tool-format-test", + "choices": []any{map[string]any{ + "message": map[string]any{ + "role": "assistant", + "content": []any{ + map[string]any{"type": "text", "text": "calling tools"}, + map[string]any{"type": "tool_use", "id": "toolu_1", "name": "anthropic_lookup", "input": map[string]any{"city": "Boston"}}, + }, + "toolCalls": []any{map[string]any{"id": "call_camel", "functionCall": map[string]any{"name": "camel_lookup", "args": map[string]any{"city": "SF"}}}}, + "function_call": map[string]any{"name": "legacy_lookup", "arguments": "{\"city\":\"NYC\"}"}, + }, + }}, + }) + })) + defer server.Close() + + response, err := (OpenAIClient{HTTPClient: server.Client()}).Run(context.Background(), Request{ + Kind: "chat.completions", + Model: "Tool-Format-Test", + Body: map[string]any{"model": "Tool-Format-Test", "messages": []any{map[string]any{"role": "user", "content": "ping"}}}, + Candidate: store.RuntimeModelCandidate{ + BaseURL: server.URL, + ModelName: "tool-format-test", + Credentials: map[string]any{"apiKey": "test-key"}, + }, + }) + if err != nil { + t.Fatalf("run openai client: %v", err) + } + choices, _ := response.Result["choices"].([]any) + choice, _ := choices[0].(map[string]any) + message, _ := choice["message"].(map[string]any) + if message["content"] != "calling tools" { + t.Fatalf("tool_use block should be removed from content: %+v", message) + } + for _, key := range []string{"toolCalls", "function_call"} { + if _, ok := message[key]; ok { + t.Fatalf("%s should be converted away: %+v", key, message) + } + } + toolCalls, _ := message["tool_calls"].([]any) + if len(toolCalls) != 3 { + t.Fatalf("expected 3 normalized tool calls, got %+v", message) + } + assertToolCall := func(index int, id string, name string, arguments string) { + t.Helper() + toolCall, _ := toolCalls[index].(map[string]any) + function, _ := toolCall["function"].(map[string]any) + if toolCall["id"] != id || toolCall["type"] != "function" || function["name"] != name || function["arguments"] != arguments { + t.Fatalf("unexpected tool call[%d]: %+v", index, toolCall) + } + } + assertToolCall(0, "call_camel", "camel_lookup", "{\"city\":\"SF\"}") + assertToolCall(1, "call_1", "legacy_lookup", "{\"city\":\"NYC\"}") + assertToolCall(2, "toolu_1", "anthropic_lookup", "{\"city\":\"Boston\"}") +} + func TestOpenAIClientChatStreamContract(t *testing.T) { var gotStream bool var gotIncludeUsage bool @@ -204,6 +385,133 @@ func TestOpenAIClientChatStreamContract(t *testing.T) { } } +func TestOpenAIClientChatStreamPreservesStructuredDeltas(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/event-stream") + _, _ = w.Write([]byte("data: {\"id\":\"chatcmpl-structured\",\"object\":\"chat.completion.chunk\",\"model\":\"openrouter-reasoner\",\"choices\":[{\"delta\":{\"reasoning_details\":[{\"type\":\"reasoning.text\",\"text\":\"detail-\"},{\"type\":\"reasoning.summary\",\"summary\":\"summary\"},{\"type\":\"reasoning.encrypted\",\"data\":\"secret\"}]}}],\"usage\":null}\n\n")) + _, _ = w.Write([]byte("data: {\"id\":\"chatcmpl-structured\",\"object\":\"chat.completion.chunk\",\"model\":\"openrouter-reasoner\",\"choices\":[{\"delta\":{\"content\":\"taggedanswer\"}}],\"usage\":null}\n\n")) + _, _ = w.Write([]byte("data: {\"id\":\"chatcmpl-structured\",\"object\":\"chat.completion.chunk\",\"model\":\"deepseek-v4\",\"choices\":[{\"delta\":{\"tool_calls\":[{\"index\":0,\"id\":\"call_1\",\"type\":\"function\",\"function\":{\"name\":\"lookup\",\"arguments\":\"{\\\"q\\\":\"}}]}}],\"usage\":null}\n\n")) + _, _ = w.Write([]byte("data: {\"id\":\"chatcmpl-structured\",\"object\":\"chat.completion.chunk\",\"model\":\"deepseek-v4\",\"choices\":[{\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"\\\"weather\\\"}\"}}]},\"finish_reason\":\"tool_calls\"}],\"usage\":null}\n\n")) + _, _ = w.Write([]byte("data: [DONE]\n\n")) + })) + defer server.Close() + + captured := make([]StreamDeltaEvent, 0) + response, err := (OpenAIClient{HTTPClient: server.Client()}).Run(context.Background(), Request{ + Kind: "chat.completions", + Model: "DeepSeek-V4", + Body: map[string]any{ + "model": "DeepSeek-V4", + "messages": []any{map[string]any{"role": "user", "content": "ping"}}, + "stream": true, + }, + Candidate: store.RuntimeModelCandidate{ + BaseURL: server.URL, + ModelName: "deepseek-v4", + Credentials: map[string]any{"apiKey": "test-key"}, + }, + StreamDelta: func(event StreamDeltaEvent) error { + captured = append(captured, event) + return nil + }, + }) + if err != nil { + t.Fatalf("run openai structured stream client: %v", err) + } + if len(captured) != 4 || captured[0].ReasoningContent != "detail-summary" || captured[1].ReasoningContent != "tagged" || captured[1].Text != "answer" || captured[2].Event == nil { + t.Fatalf("structured stream events were not preserved: %+v", captured) + } + firstChoices, _ := captured[0].Event["choices"].([]any) + firstChoice, _ := firstChoices[0].(map[string]any) + firstDelta, _ := firstChoice["delta"].(map[string]any) + if firstDelta["reasoning_content"] != "detail-summary" { + t.Fatalf("reasoning_details were not converted in stream event: %+v", captured[0].Event) + } + if _, ok := firstDelta["reasoning_details"]; ok { + t.Fatalf("reasoning_details should be removed from stream event: %+v", captured[0].Event) + } + choices, _ := response.Result["choices"].([]any) + choice, _ := choices[0].(map[string]any) + message, _ := choice["message"].(map[string]any) + if message["reasoning_content"] != "detail-summarytagged" || message["content"] != "answer" || choice["finish_reason"] != "tool_calls" { + t.Fatalf("reasoning or finish reason missing from aggregated result: %+v", response.Result) + } + toolCalls, _ := message["tool_calls"].([]any) + toolCall, _ := toolCalls[0].(map[string]any) + function, _ := toolCall["function"].(map[string]any) + if function["arguments"] != "{\"q\":\"weather\"}" { + t.Fatalf("tool call arguments were not aggregated: %+v", response.Result) + } +} + +func TestOpenAIClientChatStreamNormalizesToolCallFormats(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/event-stream") + _, _ = w.Write([]byte("data: {\"id\":\"chatcmpl-tools-stream\",\"object\":\"chat.completion.chunk\",\"model\":\"tool-format-test\",\"choices\":[{\"delta\":{\"function_call\":{\"name\":\"legacy_lookup\",\"arguments\":\"{\\\"city\\\":\"}}}],\"usage\":null}\n\n")) + _, _ = w.Write([]byte("data: {\"id\":\"chatcmpl-tools-stream\",\"object\":\"chat.completion.chunk\",\"model\":\"tool-format-test\",\"choices\":[{\"delta\":{\"functionCall\":{\"arguments\":\"\\\"Boston\\\"}\"}}}],\"usage\":null}\n\n")) + _, _ = w.Write([]byte("data: {\"id\":\"chatcmpl-tools-stream\",\"object\":\"chat.completion.chunk\",\"model\":\"tool-format-test\",\"choices\":[{\"delta\":{\"toolCall\":{\"index\":1,\"id\":\"call_camel\",\"functionCall\":{\"name\":\"camel_lookup\",\"args\":{\"city\":\"SF\"}}}},\"finish_reason\":\"tool_calls\"}],\"usage\":null}\n\n")) + _, _ = w.Write([]byte("data: [DONE]\n\n")) + })) + defer server.Close() + + captured := make([]StreamDeltaEvent, 0) + response, err := (OpenAIClient{HTTPClient: server.Client()}).Run(context.Background(), Request{ + Kind: "chat.completions", + Model: "Tool-Format-Test", + Body: map[string]any{ + "model": "Tool-Format-Test", + "messages": []any{map[string]any{"role": "user", "content": "ping"}}, + "stream": true, + }, + Candidate: store.RuntimeModelCandidate{ + BaseURL: server.URL, + ModelName: "tool-format-test", + Credentials: map[string]any{"apiKey": "test-key"}, + }, + StreamDelta: func(event StreamDeltaEvent) error { + captured = append(captured, event) + return nil + }, + }) + if err != nil { + t.Fatalf("run openai stream client: %v", err) + } + if len(captured) != 3 { + t.Fatalf("unexpected captured events: %+v", captured) + } + for _, event := range captured { + choices, _ := event.Event["choices"].([]any) + choice, _ := choices[0].(map[string]any) + delta, _ := choice["delta"].(map[string]any) + if _, ok := delta["function_call"]; ok { + t.Fatalf("function_call should be converted away: %+v", event.Event) + } + if _, ok := delta["functionCall"]; ok { + t.Fatalf("functionCall should be converted away: %+v", event.Event) + } + if _, ok := delta["toolCall"]; ok { + t.Fatalf("toolCall should be converted away: %+v", event.Event) + } + } + choices, _ := response.Result["choices"].([]any) + choice, _ := choices[0].(map[string]any) + message, _ := choice["message"].(map[string]any) + toolCalls, _ := message["tool_calls"].([]any) + if len(toolCalls) != 2 || choice["finish_reason"] != "tool_calls" { + t.Fatalf("unexpected normalized stream result: %+v", response.Result) + } + legacyCall, _ := toolCalls[0].(map[string]any) + legacyFunction, _ := legacyCall["function"].(map[string]any) + if legacyFunction["name"] != "legacy_lookup" || legacyFunction["arguments"] != "{\"city\":\"Boston\"}" { + t.Fatalf("legacy function_call was not aggregated: %+v", response.Result) + } + camelCall, _ := toolCalls[1].(map[string]any) + camelFunction, _ := camelCall["function"].(map[string]any) + if camelCall["id"] != "call_camel" || camelFunction["name"] != "camel_lookup" || camelFunction["arguments"] != "{\"city\":\"SF\"}" { + t.Fatalf("camel toolCall was not normalized: %+v", response.Result) + } +} + func TestGeminiClientChatContract(t *testing.T) { var gotPath string var gotKey string @@ -261,6 +569,92 @@ func TestGeminiClientChatContract(t *testing.T) { } } +func TestGeminiClientChatRestoresToolContext(t *testing.T) { + var captured map[string]any + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if err := json.NewDecoder(r.Body).Decode(&captured); err != nil { + t.Fatalf("decode request: %v", err) + } + _ = json.NewEncoder(w).Encode(map[string]any{ + "candidates": []any{map[string]any{ + "content": map[string]any{"parts": []any{map[string]any{"text": "gemini ok"}}}, + }}, + }) + })) + defer server.Close() + + _, err := (GeminiClient{HTTPClient: server.Client()}).Run(context.Background(), Request{ + Kind: "chat.completions", + Model: "gemini:gemini-2.5-flash", + Body: map[string]any{ + "model": "gemini:gemini-2.5-flash", + "messages": []any{ + map[string]any{"role": "user", "content": "weather?"}, + map[string]any{ + "role": "assistant", + "content": "checking", + "tool_calls": []any{map[string]any{ + "id": "call_weather", + "type": "function", + "function": map[string]any{ + "name": "get_weather", + "arguments": `{"city":"SF"}`, + }, + }}, + }, + map[string]any{"role": "tool", "tool_call_id": "call_weather", "content": `{"temperature":"72F"}`}, + }, + "tools": []any{map[string]any{ + "type": "function", + "function": map[string]any{ + "name": "get_weather", + "description": "lookup weather", + "parameters": map[string]any{"type": "object"}, + }, + }}, + }, + Candidate: store.RuntimeModelCandidate{ + BaseURL: server.URL, + ProviderModelName: "gemini-2.5-flash", + ModelType: "chat", + Credentials: map[string]any{"apiKey": "gemini-key"}, + }, + }) + if err != nil { + t.Fatalf("run gemini client: %v", err) + } + contents, _ := captured["contents"].([]any) + if len(contents) != 3 { + t.Fatalf("unexpected Gemini contents: %+v", captured) + } + modelTurn, _ := contents[1].(map[string]any) + if modelTurn["role"] != "model" { + t.Fatalf("assistant turn should become Gemini model turn: %+v", modelTurn) + } + modelParts, _ := modelTurn["parts"].([]any) + callPart, _ := modelParts[1].(map[string]any) + functionCall, _ := callPart["functionCall"].(map[string]any) + args, _ := functionCall["args"].(map[string]any) + if functionCall["name"] != "get_weather" || args["city"] != "SF" { + t.Fatalf("tool call was not restored for Gemini: %+v", modelTurn) + } + toolTurn, _ := contents[2].(map[string]any) + toolParts, _ := toolTurn["parts"].([]any) + responsePart, _ := toolParts[0].(map[string]any) + functionResponse, _ := responsePart["functionResponse"].(map[string]any) + response, _ := functionResponse["response"].(map[string]any) + if toolTurn["role"] != "user" || functionResponse["name"] != "get_weather" || response["temperature"] != "72F" { + t.Fatalf("tool result was not restored for Gemini: %+v", toolTurn) + } + tools, _ := captured["tools"].([]any) + declarationGroup, _ := tools[0].(map[string]any) + declarations, _ := declarationGroup["functionDeclarations"].([]any) + declaration, _ := declarations[0].(map[string]any) + if declaration["name"] != "get_weather" || declaration["description"] != "lookup weather" { + t.Fatalf("tool declaration was not converted for Gemini: %+v", captured["tools"]) + } +} + func TestGeminiURLAcceptsVersionedBaseURL(t *testing.T) { got := geminiURL("https://generativelanguage.googleapis.com/v1beta", "gemini-2.5-flash", "test-key") want := "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key=test-key" diff --git a/apps/api/internal/clients/gemini.go b/apps/api/internal/clients/gemini.go index 6a8f6ed..cdf32ab 100644 --- a/apps/api/internal/clients/gemini.go +++ b/apps/api/internal/clients/gemini.go @@ -70,15 +70,185 @@ func geminiBody(request Request) map[string]any { return map[string]any{"contents": contents} } prompt := firstNonEmptyPrompt(request.Body, "") - if prompt == "" { - prompt = textFromMessages(request.Body) + if prompt != "" { + return map[string]any{ + "contents": []any{map[string]any{ + "role": "user", + "parts": []any{map[string]any{"text": prompt}}, + }}, + } } - return map[string]any{ - "contents": []any{map[string]any{ - "role": "user", - "parts": []any{map[string]any{"text": prompt}}, - }}, + body := map[string]any{"contents": geminiContentsFromMessages(request.Body)} + if tools := geminiToolsFromOpenAITools(request.Body["tools"]); len(tools) > 0 { + body["tools"] = tools } + contents, _ := body["contents"].([]any) + if len(contents) > 0 { + return body + } + return map[string]any{"contents": []any{map[string]any{ + "role": "user", + "parts": []any{map[string]any{"text": textFromMessages(request.Body)}}, + }}} +} + +func geminiContentsFromMessages(body map[string]any) []any { + normalized := NormalizeChatCompletionRequestBody(body) + messages, _ := normalized["messages"].([]any) + contents := make([]any, 0, len(messages)) + toolNames := map[string]string{} + for _, rawMessage := range messages { + message, _ := rawMessage.(map[string]any) + if len(message) == 0 { + continue + } + role := stringFromAny(message["role"]) + if role == "tool" { + toolCallID := stringFromAny(message["tool_call_id"]) + name := toolNames[toolCallID] + if name == "" { + name = toolCallID + } + if name == "" { + name = "tool" + } + contents = append(contents, map[string]any{ + "role": "user", + "parts": []any{map[string]any{"functionResponse": map[string]any{ + "name": name, + "response": geminiFunctionResponsePayload(message["content"]), + }}}, + }) + continue + } + parts := geminiTextParts(message["content"]) + if role == "assistant" { + for _, rawToolCall := range toolCallsSlice(message["tool_calls"]) { + toolCall, _ := rawToolCall.(map[string]any) + function, _ := toolCall["function"].(map[string]any) + name := stringFromAny(function["name"]) + if name == "" { + continue + } + if id := stringFromAny(toolCall["id"]); id != "" { + toolNames[id] = name + } + parts = append(parts, map[string]any{"functionCall": map[string]any{ + "name": name, + "args": geminiFunctionArgs(function["arguments"]), + }}) + } + } + if len(parts) == 0 { + continue + } + contents = append(contents, map[string]any{ + "role": geminiRole(role), + "parts": parts, + }) + } + return contents +} + +func geminiRole(role string) string { + if role == "assistant" { + return "model" + } + return "user" +} + +func geminiTextParts(content any) []any { + parts := make([]any, 0) + switch typed := content.(type) { + case string: + if strings.TrimSpace(typed) != "" { + parts = append(parts, map[string]any{"text": typed}) + } + case []any: + for _, rawPart := range typed { + part, _ := rawPart.(map[string]any) + if text := stringFromAny(firstPresent(part["text"], part["content"])); strings.TrimSpace(text) != "" { + parts = append(parts, map[string]any{"text": text}) + } + } + } + return parts +} + +func toolCallsSlice(value any) []any { + switch typed := value.(type) { + case []any: + return typed + case map[string]any: + return []any{typed} + default: + return nil + } +} + +func geminiFunctionArgs(value any) map[string]any { + if value == nil { + return map[string]any{} + } + if args, ok := value.(map[string]any); ok { + return args + } + if text, ok := value.(string); ok { + if strings.TrimSpace(text) == "" { + return map[string]any{} + } + var args map[string]any + if err := json.Unmarshal([]byte(text), &args); err == nil { + return args + } + return map[string]any{"arguments": text} + } + return map[string]any{"arguments": value} +} + +func geminiFunctionResponsePayload(value any) map[string]any { + if payload, ok := value.(map[string]any); ok { + return payload + } + if text, ok := value.(string); ok { + var payload map[string]any + if err := json.Unmarshal([]byte(text), &payload); err == nil { + return payload + } + return map[string]any{"content": text} + } + if value == nil { + return map[string]any{} + } + return map[string]any{"content": value} +} + +func geminiToolsFromOpenAITools(value any) []any { + tools, ok := value.([]any) + if !ok || len(tools) == 0 { + return nil + } + declarations := make([]any, 0, len(tools)) + for _, rawTool := range tools { + tool, _ := rawTool.(map[string]any) + function, _ := tool["function"].(map[string]any) + name := stringFromAny(function["name"]) + if name == "" { + continue + } + declaration := map[string]any{"name": name} + if description := stringFromAny(function["description"]); description != "" { + declaration["description"] = description + } + if parameters, ok := function["parameters"]; ok { + declaration["parameters"] = parameters + } + declarations = append(declarations, declaration) + } + if len(declarations) == 0 { + return nil + } + return []any{map[string]any{"functionDeclarations": declarations}} } func geminiResult(request Request, raw map[string]any) map[string]any { diff --git a/apps/api/internal/clients/helpers.go b/apps/api/internal/clients/helpers.go index 8461a38..cc853de 100644 --- a/apps/api/internal/clients/helpers.go +++ b/apps/api/internal/clients/helpers.go @@ -8,6 +8,7 @@ import ( "io" "math" "net/http" + "sort" "strings" "time" ) @@ -87,8 +88,11 @@ func decodeOpenAIStreamReader(reader io.Reader, onDelta StreamDelta) (map[string scanner.Buffer(make([]byte, 0, 64*1024), 16*1024*1024) rawLines := make([]string, 0) parts := make([]string, 0) + reasoningParts := make([]string, 0) var last map[string]any var usage Usage + finishReason := "" + toolCalls := map[int]map[string]any{} for scanner.Scan() { rawLine := scanner.Text() rawLines = append(rawLines, rawLine) @@ -104,13 +108,23 @@ func decodeOpenAIStreamReader(reader io.Reader, onDelta StreamDelta) (map[string if err := json.Unmarshal([]byte(payload), &event); err != nil { continue } + event = NormalizeChatCompletionStreamEvent(event) last = event - if text := streamEventText(event); text != "" { + text := streamEventText(event) + reasoningText := streamEventReasoningContent(event) + if text != "" { parts = append(parts, text) - if onDelta != nil { - if err := onDelta(text); err != nil { - return nil, true, err - } + } + if reasoningText != "" { + reasoningParts = append(reasoningParts, reasoningText) + } + aggregateStreamToolCalls(event, toolCalls) + if reason := streamEventFinishReason(event); reason != "" { + finishReason = reason + } + if onDelta != nil { + if err := onDelta(StreamDeltaEvent{Text: text, ReasoningContent: reasoningText, Event: event}); err != nil { + return nil, true, err } } if eventUsage := usageFromOpenAI(event); eventUsage.TotalTokens > 0 { @@ -131,7 +145,7 @@ func decodeOpenAIStreamReader(reader io.Reader, onDelta StreamDelta) (map[string } return out, true, nil } - return buildOpenAIStreamResult(last, parts, usage), true, nil + return buildOpenAIStreamResult(last, parts, reasoningParts, toolCalls, finishReason, usage), true, nil } func decodeOpenAIStream(raw []byte) (map[string]any, bool) { @@ -142,22 +156,32 @@ func decodeOpenAIStream(raw []byte) (map[string]any, bool) { return result, ok && err == nil } -func buildOpenAIStreamResult(last map[string]any, parts []string, usage Usage) map[string]any { - if len(parts) == 0 { +func buildOpenAIStreamResult(last map[string]any, parts []string, reasoningParts []string, toolCalls map[int]map[string]any, finishReason string, usage Usage) map[string]any { + if len(parts) == 0 && len(reasoningParts) == 0 && len(toolCalls) == 0 { return last } + message := map[string]any{ + "role": "assistant", + "content": strings.Join(parts, ""), + } + if len(reasoningParts) > 0 { + message["reasoning_content"] = strings.Join(reasoningParts, "") + } + if len(toolCalls) > 0 { + message["tool_calls"] = sortedStreamToolCalls(toolCalls) + } + if finishReason == "" { + finishReason = "stop" + } var out map[string]any out = map[string]any{ "id": stringFromAny(firstPresent(last["id"], "chatcmpl-stream")), "object": "chat.completion", "model": stringFromAny(last["model"]), "choices": []any{map[string]any{ - "index": 0, - "message": map[string]any{ - "role": "assistant", - "content": strings.Join(parts, ""), - }, - "finish_reason": "stop", + "index": 0, + "message": message, + "finish_reason": finishReason, }}, } if usage.TotalTokens > 0 { @@ -170,6 +194,571 @@ func buildOpenAIStreamResult(last map[string]any, parts []string, usage Usage) m return out } +// NormalizeChatCompletionRequestBody 将后续请求里的工具调用上下文还原为 +// OpenAI Chat Completions 标准格式,便于再次发送给 OpenAI-compatible 上游。 +func NormalizeChatCompletionRequestBody(body map[string]any) map[string]any { + if body == nil { + return nil + } + out := cloneBody(body) + messages, ok := out["messages"].([]any) + if !ok { + return out + } + normalizedMessages := make([]any, 0, len(messages)) + for _, rawMessage := range messages { + message, ok := rawMessage.(map[string]any) + if !ok { + normalizedMessages = append(normalizedMessages, rawMessage) + continue + } + copied := cloneMapAny(message) + normalizeToolCallsContainer(copied, false) + normalizeToolMessageFields(copied) + toolMessages, cleanContent, changed := toolResultMessagesFromContent(copied["content"]) + if changed { + if cleanContent != nil && contentHasText(cleanContent) { + copied["content"] = cleanContent + normalizedMessages = append(normalizedMessages, copied) + } else if len(copied) > 1 || copied["role"] != nil { + delete(copied, "content") + if len(copied) > 1 { + normalizedMessages = append(normalizedMessages, copied) + } + } + normalizedMessages = append(normalizedMessages, toolMessages...) + continue + } + normalizedMessages = append(normalizedMessages, copied) + } + out["messages"] = normalizedMessages + return out +} + +func cloneMapAny(source map[string]any) map[string]any { + if source == nil { + return nil + } + out := make(map[string]any, len(source)) + for key, value := range source { + out[key] = value + } + return out +} + +// NormalizeChatCompletionResult 将供应商自定义推理字段归一化到 +// message.reasoning_content,并从最终回答 content 中剥离内联推理块。 +// 加密推理载荷不可展示,且不应作为正文输出,因此会被忽略。 +func NormalizeChatCompletionResult(result map[string]any) map[string]any { + if result == nil { + return nil + } + choices, _ := result["choices"].([]any) + for _, rawChoice := range choices { + choice, _ := rawChoice.(map[string]any) + if message, ok := choice["message"].(map[string]any); ok { + normalizeToolCallsContainer(message, false) + normalizeReasoningContainer(message, false) + } + if delta, ok := choice["delta"].(map[string]any); ok { + normalizeToolCallsContainer(delta, true) + normalizeReasoningContainer(delta, true) + } + } + return result +} + +// NormalizeChatCompletionStreamEvent 将供应商自定义流式推理字段 +// (例如 reasoning_details 或 reasoning)归一化到 delta.reasoning_content。 +func NormalizeChatCompletionStreamEvent(event map[string]any) map[string]any { + if event == nil { + return nil + } + choices, _ := event["choices"].([]any) + for _, rawChoice := range choices { + choice, _ := rawChoice.(map[string]any) + if delta, ok := choice["delta"].(map[string]any); ok { + normalizeToolCallsContainer(delta, true) + normalizeReasoningContainer(delta, true) + } + if message, ok := choice["message"].(map[string]any); ok { + normalizeToolCallsContainer(message, false) + normalizeReasoningContainer(message, false) + } + } + return event +} + +func normalizeToolCallsContainer(container map[string]any, stream bool) { + if container == nil { + return + } + toolCalls := make([]any, 0) + for _, rawToolCall := range rawToolCallValues(container) { + for _, normalized := range normalizeRawToolCalls(rawToolCall, len(toolCalls), stream) { + toolCalls = append(toolCalls, normalized) + } + } + if contentToolCalls, cleanContent, changed := toolCallsFromContent(container["content"], len(toolCalls), stream); changed { + toolCalls = append(toolCalls, contentToolCalls...) + setNormalizedContent(container, cleanContent, stream) + } + if partToolCalls := toolCallsFromParts(container["parts"], len(toolCalls), stream); len(partToolCalls) > 0 { + toolCalls = append(toolCalls, partToolCalls...) + delete(container, "parts") + } + if len(toolCalls) > 0 { + container["tool_calls"] = toolCalls + } + for _, key := range []string{"tool_call", "toolCall", "toolCalls", "function_call", "functionCall"} { + delete(container, key) + } +} + +func normalizeToolMessageFields(message map[string]any) { + if message == nil { + return + } + if id := firstNonEmptyString(message["tool_call_id"], message["toolCallId"], message["tool_use_id"], message["toolUseId"], message["call_id"], message["callId"]); id != "" { + message["tool_call_id"] = id + } + for _, key := range []string{"toolCallId", "tool_use_id", "toolUseId", "call_id", "callId"} { + delete(message, key) + } +} + +func toolResultMessagesFromContent(value any) ([]any, any, bool) { + blocks, ok := value.([]any) + if !ok { + return nil, nil, false + } + toolMessages := make([]any, 0) + remaining := make([]any, 0, len(blocks)) + for _, rawBlock := range blocks { + block, _ := rawBlock.(map[string]any) + if len(block) == 0 || stringFromAny(block["type"]) != "tool_result" { + remaining = append(remaining, rawBlock) + continue + } + message := map[string]any{ + "role": "tool", + "tool_call_id": firstNonEmptyString(block["tool_call_id"], block["toolCallId"], block["tool_use_id"], block["toolUseId"], block["id"]), + "content": toolResultContent(block["content"]), + } + toolMessages = append(toolMessages, message) + } + if len(toolMessages) == 0 { + return nil, nil, false + } + return toolMessages, contentBlocksText(remaining), true +} + +func toolResultContent(value any) any { + if text, ok := value.(string); ok { + return text + } + return jsonStringFromAny(value) +} + +func contentHasText(value any) bool { + switch typed := value.(type) { + case string: + return strings.TrimSpace(typed) != "" + case []any: + return len(typed) > 0 + default: + return value != nil + } +} + +func rawToolCallValues(container map[string]any) []any { + values := make([]any, 0, 6) + for _, key := range []string{"tool_calls", "tool_call", "toolCalls", "toolCall", "function_call", "functionCall"} { + if value, ok := container[key]; ok { + values = append(values, value) + } + } + return values +} + +func normalizeRawToolCalls(value any, startIndex int, stream bool) []any { + switch typed := value.(type) { + case []any: + out := make([]any, 0, len(typed)) + for _, raw := range typed { + if toolCall := normalizeToolCall(raw, startIndex+len(out), stream); toolCall != nil { + out = append(out, toolCall) + } + } + return out + default: + if toolCall := normalizeToolCall(value, startIndex, stream); toolCall != nil { + return []any{toolCall} + } + return nil + } +} + +func normalizeToolCall(value any, index int, stream bool) map[string]any { + source, _ := value.(map[string]any) + if len(source) == 0 { + return nil + } + functionSource := mapFromAny(source["function"]) + if len(functionSource) == 0 { + functionSource = mapFromAny(firstPresent(source["function_call"], source["functionCall"])) + } + name := firstNonEmptyString( + functionSource["name"], source["name"], source["function_name"], source["functionName"], source["tool_name"], source["toolName"], + ) + arguments, hasArguments := toolCallArguments(functionSource) + if !hasArguments { + arguments, hasArguments = toolCallArguments(source) + } + if name == "" && !hasArguments && firstNonEmptyString(source["id"], source["call_id"], source["callId"], source["tool_call_id"], source["toolCallId"]) == "" { + return nil + } + function := map[string]any{} + if name != "" { + function["name"] = name + } + if hasArguments { + function["arguments"] = arguments + } + toolCall := map[string]any{ + "type": firstNonEmptyString(source["type"], "function"), + "function": function, + } + if id := firstNonEmptyString(source["id"], source["call_id"], source["callId"], source["tool_call_id"], source["toolCallId"]); id != "" { + toolCall["id"] = id + } else if !stream { + toolCall["id"] = fmt.Sprintf("call_%d", index) + } + if stream { + if rawIndex, ok := firstPresent(source["index"], source["idx"]).(float64); ok { + toolCall["index"] = int(math.Round(rawIndex)) + } else if rawIndex, ok := firstPresent(source["index"], source["idx"]).(int); ok { + toolCall["index"] = rawIndex + } else { + toolCall["index"] = index + } + } + return toolCall +} + +func toolCallsFromContent(value any, startIndex int, stream bool) ([]any, any, bool) { + blocks, ok := value.([]any) + if !ok { + return nil, nil, false + } + toolCalls := make([]any, 0) + remaining := make([]any, 0, len(blocks)) + containsReasoning := false + for _, rawBlock := range blocks { + block, _ := rawBlock.(map[string]any) + if len(block) == 0 { + remaining = append(remaining, rawBlock) + continue + } + switch stringFromAny(block["type"]) { + case "tool_use": + if toolCall := normalizeToolUseBlock(block, startIndex+len(toolCalls), stream); toolCall != nil { + toolCalls = append(toolCalls, toolCall) + } + case "tool_result": + remaining = append(remaining, rawBlock) + default: + if isReasoningContentBlock(block) { + containsReasoning = true + } + remaining = append(remaining, rawBlock) + } + } + if len(toolCalls) == 0 { + return nil, nil, false + } + if containsReasoning { + return toolCalls, remaining, true + } + return toolCalls, contentBlocksText(remaining), true +} + +func normalizeToolUseBlock(block map[string]any, index int, stream bool) map[string]any { + toolCall := map[string]any{ + "type": "function", + "function": map[string]any{ + "name": stringFromAny(block["name"]), + "arguments": jsonStringFromAny(block["input"]), + }, + } + if id := firstNonEmptyString(block["id"], block["tool_use_id"], block["toolUseId"]); id != "" { + toolCall["id"] = id + } else if !stream { + toolCall["id"] = fmt.Sprintf("call_%d", index) + } + if stream { + toolCall["index"] = index + } + return toolCall +} + +func toolCallsFromParts(value any, startIndex int, stream bool) []any { + parts, ok := value.([]any) + if !ok { + return nil + } + out := make([]any, 0) + for _, rawPart := range parts { + part, _ := rawPart.(map[string]any) + if functionCall := mapFromAny(firstPresent(part["functionCall"], part["function_call"])); len(functionCall) > 0 { + if toolCall := normalizeGeminiFunctionCall(functionCall, startIndex+len(out), stream); toolCall != nil { + out = append(out, toolCall) + } + } + } + return out +} + +func normalizeGeminiFunctionCall(functionCall map[string]any, index int, stream bool) map[string]any { + toolCall := map[string]any{ + "type": "function", + "function": map[string]any{ + "name": stringFromAny(functionCall["name"]), + "arguments": jsonStringFromAny(firstPresent(functionCall["args"], functionCall["arguments"])), + }, + } + if id := firstNonEmptyString(functionCall["id"], functionCall["call_id"], functionCall["callId"]); id != "" { + toolCall["id"] = id + } else if !stream { + toolCall["id"] = fmt.Sprintf("call_%d", index) + } + if stream { + toolCall["index"] = index + } + return toolCall +} + +func setNormalizedContent(container map[string]any, value any, stream bool) { + if text, ok := value.(string); ok && text == "" { + if stream { + delete(container, "content") + return + } + container["content"] = nil + return + } + container["content"] = value +} + +func isReasoningContentBlock(block map[string]any) bool { + switch stringFromAny(block["type"]) { + case "thinking", "redacted_thinking", "reasoning.text", "reasoning.summary", "reasoning.encrypted": + return true + default: + return false + } +} + +func contentBlocksText(blocks []any) string { + parts := make([]string, 0, len(blocks)) + for _, rawBlock := range blocks { + switch block := rawBlock.(type) { + case string: + parts = append(parts, block) + case map[string]any: + if text := stringFromAny(firstPresent(block["text"], block["content"])); text != "" { + parts = append(parts, text) + } + } + } + return strings.Join(parts, "") +} + +func toolCallArguments(source map[string]any) (string, bool) { + for _, key := range []string{"arguments", "args", "input", "parameters"} { + if value, ok := source[key]; ok { + return jsonStringFromAny(value), true + } + } + return "", false +} + +func jsonStringFromAny(value any) string { + if value == nil { + return "" + } + if text, ok := value.(string); ok { + return text + } + encoded, err := json.Marshal(value) + if err != nil { + return "" + } + return string(encoded) +} + +func normalizeReasoningContainer(container map[string]any, deleteEmptyContent bool) { + if container == nil { + return + } + reasoningParts := make([]string, 0, 3) + if reasoning := reasoningDetailsText(container["reasoning_details"]); reasoning != "" { + reasoningParts = append(reasoningParts, reasoning) + } else if reasoning := stringFromAny(container["reasoning_content"]); reasoning != "" { + reasoningParts = append(reasoningParts, reasoning) + } else if reasoning := stringFromAny(container["reasoning"]); reasoning != "" { + reasoningParts = append(reasoningParts, reasoning) + } + if content, ok := container["content"]; ok { + cleanContent, contentReasoning, changed := normalizeReasoningContentValue(content) + if changed { + if deleteEmptyContent { + if text, ok := cleanContent.(string); ok && text == "" { + delete(container, "content") + } else { + container["content"] = cleanContent + } + } else { + container["content"] = cleanContent + } + } + if contentReasoning != "" { + reasoningParts = append(reasoningParts, contentReasoning) + } + } + if len(reasoningParts) > 0 { + container["reasoning_content"] = strings.Join(reasoningParts, "") + } + delete(container, "reasoning_details") + delete(container, "reasoning") +} + +func reasoningDetailsText(value any) string { + rawItems, ok := value.([]any) + if !ok { + return "" + } + parts := make([]string, 0, len(rawItems)) + for _, rawItem := range rawItems { + item, _ := rawItem.(map[string]any) + switch stringFromAny(item["type"]) { + case "reasoning.text": + if text := stringFromAny(item["text"]); text != "" { + parts = append(parts, text) + } + case "reasoning.summary": + if summary := stringFromAny(item["summary"]); summary != "" { + parts = append(parts, summary) + } + } + } + return strings.Join(parts, "") +} + +func normalizeReasoningContentValue(value any) (any, string, bool) { + switch typed := value.(type) { + case string: + cleanContent, reasoning, changed := splitTaggedReasoningText(typed) + return cleanContent, reasoning, changed + case []any: + contentParts := make([]string, 0, len(typed)) + reasoningParts := make([]string, 0) + changed := false + for _, rawItem := range typed { + switch item := rawItem.(type) { + case string: + contentParts = append(contentParts, item) + case map[string]any: + switch stringFromAny(item["type"]) { + case "thinking": + if thinking := stringFromAny(item["thinking"]); thinking != "" { + reasoningParts = append(reasoningParts, thinking) + } + changed = true + case "redacted_thinking", "reasoning.encrypted": + changed = true + case "reasoning.text": + if text := stringFromAny(item["text"]); text != "" { + reasoningParts = append(reasoningParts, text) + } + changed = true + case "reasoning.summary": + if summary := stringFromAny(item["summary"]); summary != "" { + reasoningParts = append(reasoningParts, summary) + } + changed = true + case "text", "output_text": + if text := stringFromAny(firstPresent(item["text"], item["content"])); text != "" { + cleanText, reasoning, tagged := splitTaggedReasoningText(text) + contentParts = append(contentParts, cleanText) + if reasoning != "" { + reasoningParts = append(reasoningParts, reasoning) + } + changed = changed || tagged + } + default: + if text := stringFromAny(firstPresent(item["text"], item["content"])); text != "" { + contentParts = append(contentParts, text) + } + } + } + } + if !changed { + return value, "", false + } + return strings.Join(contentParts, ""), strings.Join(reasoningParts, ""), true + default: + return value, "", false + } +} + +func splitTaggedReasoningText(text string) (string, string, bool) { + lower := strings.ToLower(text) + clean := strings.Builder{} + reasoning := strings.Builder{} + changed := false + for offset := 0; offset < len(text); { + start, tag := nextReasoningOpenTag(lower, offset) + if start < 0 { + clean.WriteString(text[offset:]) + break + } + clean.WriteString(text[offset:start]) + openEnd := start + len("<"+tag+">") + closeToken := "" + closeStart := strings.Index(lower[openEnd:], closeToken) + if closeStart < 0 { + reasoning.WriteString(text[openEnd:]) + offset = len(text) + changed = true + break + } + closeStart += openEnd + reasoning.WriteString(text[openEnd:closeStart]) + offset = closeStart + len(closeToken) + changed = true + } + return clean.String(), reasoning.String(), changed +} + +func nextReasoningOpenTag(lower string, offset int) (int, string) { + bestStart := -1 + bestTag := "" + for _, tag := range []string{"think", "reasoning", "analysis"} { + needle := "<" + tag + ">" + idx := strings.Index(lower[offset:], needle) + if idx < 0 { + continue + } + absolute := offset + idx + if bestStart < 0 || absolute < bestStart { + bestStart = absolute + bestTag = tag + } + } + return bestStart, bestTag +} + func streamEventText(event map[string]any) string { if choices, ok := event["choices"].([]any); ok { for _, rawChoice := range choices { @@ -195,6 +784,91 @@ func streamEventText(event map[string]any) string { return "" } +func streamEventReasoningContent(event map[string]any) string { + if choices, ok := event["choices"].([]any); ok { + for _, rawChoice := range choices { + choice, _ := rawChoice.(map[string]any) + if delta, ok := choice["delta"].(map[string]any); ok { + if content, ok := delta["reasoning_content"].(string); ok { + return content + } + if content, ok := delta["reasoning"].(string); ok { + return content + } + } + if message, ok := choice["message"].(map[string]any); ok { + if content, ok := message["reasoning_content"].(string); ok { + return content + } + } + } + } + return "" +} + +func streamEventFinishReason(event map[string]any) string { + if choices, ok := event["choices"].([]any); ok { + for _, rawChoice := range choices { + choice, _ := rawChoice.(map[string]any) + if reason, ok := choice["finish_reason"].(string); ok && reason != "" { + return reason + } + } + } + return "" +} + +func aggregateStreamToolCalls(event map[string]any, toolCalls map[int]map[string]any) { + choices, _ := event["choices"].([]any) + for _, rawChoice := range choices { + choice, _ := rawChoice.(map[string]any) + delta, _ := choice["delta"].(map[string]any) + rawToolCalls, _ := delta["tool_calls"].([]any) + for _, rawToolCall := range rawToolCalls { + incoming, _ := rawToolCall.(map[string]any) + index := intFromAny(incoming["index"]) + current := toolCalls[index] + if current == nil { + current = map[string]any{} + toolCalls[index] = current + } + for _, key := range []string{"id", "type"} { + if value, ok := incoming[key].(string); ok && value != "" { + current[key] = value + } + } + incomingFn, _ := incoming["function"].(map[string]any) + if len(incomingFn) == 0 { + continue + } + currentFn, _ := current["function"].(map[string]any) + if currentFn == nil { + currentFn = map[string]any{} + current["function"] = currentFn + } + if name, ok := incomingFn["name"].(string); ok && name != "" { + currentFn["name"] = stringFromAny(currentFn["name"]) + name + } + if arguments, ok := incomingFn["arguments"].(string); ok && arguments != "" { + currentFn["arguments"] = stringFromAny(currentFn["arguments"]) + arguments + } + } + } +} + +func sortedStreamToolCalls(toolCalls map[int]map[string]any) []any { + indices := make([]int, 0, len(toolCalls)) + for index := range toolCalls { + indices = append(indices, index) + } + sort.Ints(indices) + out := make([]any, 0, len(indices)) + for _, index := range indices { + out = append(out, toolCalls[index]) + } + return out +} + func usageFromOpenAI(result map[string]any) Usage { usage, _ := result["usage"].(map[string]any) input := intFromAny(firstPresent(usage["prompt_tokens"], usage["input_tokens"])) @@ -254,6 +928,15 @@ func stringFromAny(value any) string { return "" } +func firstNonEmptyString(values ...any) string { + for _, value := range values { + if text := strings.TrimSpace(stringFromAny(value)); text != "" { + return text + } + } + return "" +} + func firstPresent(values ...any) any { for _, value := range values { if value != nil { diff --git a/apps/api/internal/clients/openai.go b/apps/api/internal/clients/openai.go index 8a579e1..12ef168 100644 --- a/apps/api/internal/clients/openai.go +++ b/apps/api/internal/clients/openai.go @@ -23,6 +23,9 @@ func (c OpenAIClient) Run(ctx context.Context, request Request) (Response, error return Response{}, &ClientError{Code: "unsupported_kind", Message: "unsupported openai request kind", Retryable: false} } body := cloneBody(request.Body) + if request.Kind == "chat.completions" { + body = NormalizeChatCompletionRequestBody(body) + } body["model"] = upstreamModelName(request.Candidate) stream := request.Stream || boolValue(body, "stream") ensureOpenAIStreamUsage(body, request.Kind, stream) @@ -40,6 +43,9 @@ func (c OpenAIClient) Run(ctx context.Context, request Request) (Response, error } requestID := requestIDFromHTTPResponse(resp) result, err := decodeOpenAIResponse(resp, stream, request.StreamDelta) + if err == nil && request.Kind == "chat.completions" { + result = NormalizeChatCompletionResult(result) + } responseFinishedAt := time.Now() if err != nil { return Response{}, annotateResponseError(err, requestID, responseStartedAt, responseFinishedAt) diff --git a/apps/api/internal/clients/types.go b/apps/api/internal/clients/types.go index 8d88793..8e46d8c 100644 --- a/apps/api/internal/clients/types.go +++ b/apps/api/internal/clients/types.go @@ -48,7 +48,13 @@ type Progress struct { Payload map[string]any } -type StreamDelta func(text string) error +type StreamDeltaEvent struct { + Text string + ReasoningContent string + Event map[string]any +} + +type StreamDelta func(event StreamDeltaEvent) error type Client interface { Run(ctx context.Context, request Request) (Response, error) diff --git a/apps/api/internal/httpapi/chat_completions_mode_test.go b/apps/api/internal/httpapi/chat_completions_mode_test.go index 599046c..d16b9f4 100644 --- a/apps/api/internal/httpapi/chat_completions_mode_test.go +++ b/apps/api/internal/httpapi/chat_completions_mode_test.go @@ -50,7 +50,7 @@ func TestWriteCompatibleTaskResponseReturnsJSONWhenStreamIsFalse(t *testing.T) { req := httptest.NewRequest(http.MethodPost, "/api/v1/chat/completions", nil) recorder := httptest.NewRecorder() - writeCompatibleTaskResponse(context.Background(), recorder, req, executor, "chat.completions", "gpt-test", store.GatewayTask{ID: "task-test"}, &auth.User{}, false) + writeCompatibleTaskResponse(context.Background(), recorder, req, executor, "chat.completions", "gpt-test", store.GatewayTask{ID: "task-test"}, &auth.User{}, false, false) if recorder.Code != http.StatusOK { t.Fatalf("status=%d want=%d body=%s", recorder.Code, http.StatusOK, recorder.Body.String()) @@ -69,13 +69,13 @@ func TestWriteCompatibleTaskResponseReturnsJSONWhenStreamIsFalse(t *testing.T) { func TestWriteCompatibleTaskResponseReturnsSSEWhenStreamIsTrue(t *testing.T) { executor := &fakeTaskExecutor{ - deltas: []string{"hel", "lo"}, - output: map[string]any{"id": "chatcmpl-test", "object": "chat.completion"}, + deltas: []clients.StreamDeltaEvent{{Text: "hel"}, {Text: "lo"}}, + output: map[string]any{"id": "chatcmpl-test", "object": "chat.completion", "usage": map[string]any{"prompt_tokens": 1, "completion_tokens": 2, "total_tokens": 3}}, } req := httptest.NewRequest(http.MethodPost, "/api/v1/chat/completions", nil) recorder := httptest.NewRecorder() - writeCompatibleTaskResponse(context.Background(), recorder, req, executor, "chat.completions", "gpt-test", store.GatewayTask{ID: "task-test"}, &auth.User{}, true) + writeCompatibleTaskResponse(context.Background(), recorder, req, executor, "chat.completions", "gpt-test", store.GatewayTask{ID: "task-test"}, &auth.User{}, true, true) if executor.executeCalls != 0 || executor.streamCalls != 1 { t.Fatalf("expected stream execute only, got execute=%d stream=%d", executor.executeCalls, executor.streamCalls) @@ -84,17 +84,53 @@ func TestWriteCompatibleTaskResponseReturnsSSEWhenStreamIsTrue(t *testing.T) { t.Fatalf("Content-Type=%q want text/event-stream", contentType) } body := recorder.Body.String() - for _, want := range []string{"event: message", `"content":"hel"`, `"content":"lo"`, `"finish_reason":"stop"`} { + for _, want := range []string{`data: {`, `"role":"assistant"`, `"created":`, `"system_fingerprint":`, `"content":"hel"`, `"content":"lo"`, `"finish_reason":"stop"`, `"usage":{"completion_tokens":2,"prompt_tokens":1,"total_tokens":3}`, "data: [DONE]"} { if !strings.Contains(body, want) { t.Fatalf("SSE body missing %s: %s", want, body) } } + if strings.Contains(body, "event: message") { + t.Fatalf("chat completions stream should use OpenAI data-only SSE frames: %s", body) + } +} + +func TestWriteCompatibleTaskResponseStreamsStructuredToolAndReasoningDeltas(t *testing.T) { + executor := &fakeTaskExecutor{ + deltas: []clients.StreamDeltaEvent{ + {Event: map[string]any{"id": "chatcmpl-upstream", "object": "chat.completion.chunk", "created": float64(1710000000), "model": "deepseek-v4", "system_fingerprint": "fp-test", "choices": []any{map[string]any{"index": float64(0), "delta": map[string]any{"reasoning_details": []any{map[string]any{"type": "reasoning.text", "text": "detail-"}, map[string]any{"type": "reasoning.summary", "summary": "summary"}, map[string]any{"type": "reasoning.encrypted", "data": "secret"}}}, "finish_reason": nil}}}}, + {Event: map[string]any{"id": "chatcmpl-upstream", "object": "chat.completion.chunk", "created": float64(1710000000), "model": "deepseek-v4", "system_fingerprint": "fp-test", "choices": []any{map[string]any{"index": float64(0), "delta": map[string]any{"content": "taggedanswer"}, "finish_reason": nil}}}}, + {Event: map[string]any{"id": "chatcmpl-upstream", "object": "chat.completion.chunk", "created": float64(1710000000), "model": "deepseek-v4", "system_fingerprint": "fp-test", "choices": []any{map[string]any{"index": float64(0), "delta": map[string]any{"functionCall": map[string]any{"name": "legacy_lookup", "arguments": "{\"city\":\"Boston\"}"}}, "finish_reason": nil}}}}, + {Event: map[string]any{"id": "chatcmpl-upstream", "object": "chat.completion.chunk", "created": float64(1710000000), "model": "deepseek-v4", "system_fingerprint": "fp-test", "choices": []any{map[string]any{"index": float64(0), "delta": map[string]any{"tool_calls": []any{map[string]any{"index": float64(0), "id": "call_1", "type": "function", "function": map[string]any{"name": "lookup", "arguments": "{\"q\":"}}}}, "finish_reason": nil}}}}, + {Event: map[string]any{"id": "chatcmpl-upstream", "object": "chat.completion.chunk", "created": float64(1710000000), "model": "deepseek-v4", "system_fingerprint": "fp-test", "choices": []any{map[string]any{"index": float64(0), "delta": map[string]any{"tool_calls": []any{map[string]any{"index": float64(0), "function": map[string]any{"arguments": "\"weather\"}"}}}}, "finish_reason": "tool_calls"}}}}, + {Event: map[string]any{"id": "chatcmpl-upstream", "object": "chat.completion.chunk", "created": float64(1710000000), "model": "deepseek-v4", "choices": []any{}, "usage": map[string]any{"prompt_tokens": float64(4), "completion_tokens": float64(5), "total_tokens": float64(9)}}}, + }, + output: map[string]any{"id": "chatcmpl-upstream", "object": "chat.completion", "model": "deepseek-v4"}, + } + req := httptest.NewRequest(http.MethodPost, "/api/v1/chat/completions", nil) + recorder := httptest.NewRecorder() + + writeCompatibleTaskResponse(context.Background(), recorder, req, executor, "chat.completions", "gpt-test", store.GatewayTask{ID: "task-test"}, &auth.User{}, true, true) + + body := recorder.Body.String() + roleIndex := strings.Index(body, `"role":"assistant"`) + reasoningIndex := strings.Index(body, `"reasoning_content":"detail-summary"`) + if roleIndex < 0 || reasoningIndex < 0 || roleIndex > reasoningIndex { + t.Fatalf("assistant role should be emitted before structured deltas: %s", body) + } + for _, want := range []string{`"system_fingerprint":"fp-test"`, `"created":1710000000`, `"reasoning_content":"tagged"`, `"content":"answer"`, `"tool_calls":[{"function":{"arguments":"{\"city\":\"Boston\"}","name":"legacy_lookup"}`, `"tool_calls":[{"function":{"arguments":"{\"q\":"`, `"finish_reason":"tool_calls"`, `"choices":[],"created":1710000000`, `"usage":{"completion_tokens":5,"prompt_tokens":4,"total_tokens":9}`, "data: [DONE]"} { + if !strings.Contains(body, want) { + t.Fatalf("SSE body missing %s: %s", want, body) + } + } + if strings.Contains(body, "reasoning_details") || strings.Contains(body, "") || strings.Contains(body, "functionCall") { + t.Fatalf("provider-specific reasoning/tool fields should be converted away: %s", body) + } } type fakeTaskExecutor struct { executeCalls int streamCalls int - deltas []string + deltas []clients.StreamDeltaEvent output map[string]any } diff --git a/apps/api/internal/httpapi/handlers.go b/apps/api/internal/httpapi/handlers.go index 714a555..ec5f512 100644 --- a/apps/api/internal/httpapi/handlers.go +++ b/apps/api/internal/httpapi/handlers.go @@ -934,7 +934,7 @@ func (s *Server) createTask(kind string, compatible bool) http.Handler { runCtx, cancelRun := s.requestExecutionContext(r) defer cancelRun() if responsePlan.compatibleMode { - writeCompatibleTaskResponse(runCtx, w, r, s.runner, kind, model, task, user, responsePlan.streamMode) + writeCompatibleTaskResponse(runCtx, w, r, s.runner, kind, model, task, user, responsePlan.streamMode, streamIncludeUsage(body)) return } result, runErr := s.runner.Execute(runCtx, task, user) @@ -1002,14 +1002,15 @@ type taskExecutor interface { ExecuteStream(context.Context, store.GatewayTask, *auth.User, clients.StreamDelta) (runner.Result, error) } -func writeCompatibleTaskResponse(runCtx context.Context, w http.ResponseWriter, r *http.Request, executor taskExecutor, kind string, model string, task store.GatewayTask, user *auth.User, streamMode bool) { +func writeCompatibleTaskResponse(runCtx context.Context, w http.ResponseWriter, r *http.Request, executor taskExecutor, kind string, model string, task store.GatewayTask, user *auth.User, streamMode bool, includeUsage bool) { if streamMode { flusher := prepareCompatibleStream(w) - result, runErr := executor.ExecuteStream(runCtx, task, user, func(delta string) error { + streamWriter := newCompatibleStreamWriter(kind, model, includeUsage) + result, runErr := executor.ExecuteStream(runCtx, task, user, func(delta clients.StreamDeltaEvent) error { if !requestStillConnected(r) { return nil } - writeCompatibleDelta(w, kind, model, delta) + streamWriter.writeDelta(w, delta) if flusher != nil { flusher.Flush() } @@ -1043,7 +1044,7 @@ func writeCompatibleTaskResponse(runCtx context.Context, w http.ResponseWriter, if !requestStillConnected(r) { return } - writeCompatibleDone(w, kind, model, result.Output) + streamWriter.writeDone(w, result.Output) if flusher != nil { flusher.Flush() } @@ -1064,6 +1065,12 @@ func writeCompatibleTaskResponse(runCtx context.Context, w http.ResponseWriter, writeJSON(w, http.StatusOK, result.Output) } +func streamIncludeUsage(body map[string]any) bool { + streamOptions, _ := body["stream_options"].(map[string]any) + includeUsage, _ := streamOptions["include_usage"].(bool) + return includeUsage +} + func asyncRequest(r *http.Request) bool { value := strings.TrimSpace(strings.ToLower(r.Header.Get("x-async"))) return value == "1" || value == "true" || value == "yes" || value == "on" diff --git a/apps/api/internal/httpapi/openapi_models.go b/apps/api/internal/httpapi/openapi_models.go index f80c30b..9871156 100644 --- a/apps/api/internal/httpapi/openapi_models.go +++ b/apps/api/internal/httpapi/openapi_models.go @@ -172,16 +172,18 @@ type PricingEstimateResponse struct { } type TaskRequest struct { - Model string `json:"model" example:"gpt-4o-mini"` - Messages []ChatMessage `json:"messages,omitempty"` - Input string `json:"input,omitempty" example:"Tell me a short story"` - Prompt string `json:"prompt,omitempty" example:"A watercolor robot reading a book"` - Stream bool `json:"stream,omitempty" example:"false"` - RunMode string `json:"runMode,omitempty" example:"simulation"` - MaxTokens int `json:"max_tokens,omitempty" example:"512"` - Size string `json:"size,omitempty" example:"1024x1024"` - Duration int `json:"duration,omitempty" example:"5"` - Resolution string `json:"resolution,omitempty" example:"720p"` + Model string `json:"model" example:"gpt-4o-mini"` + Messages []ChatMessage `json:"messages,omitempty"` + Input string `json:"input,omitempty" example:"Tell me a short story"` + Prompt string `json:"prompt,omitempty" example:"A watercolor robot reading a book"` + Stream bool `json:"stream,omitempty" example:"false"` + RunMode string `json:"runMode,omitempty" example:"simulation"` + MaxTokens int `json:"max_tokens,omitempty" example:"512"` + // ReasoningEffort 推理深度,OpenAI-compatible 请求字段;开放字符串,取值随 provider 和模型能力而定,常见值为 none、minimal、low、medium、high、xhigh,也可配置 max 等供应商自定义值。 + ReasoningEffort string `json:"reasoning_effort,omitempty" example:"medium"` + Size string `json:"size,omitempty" example:"1024x1024"` + Duration int `json:"duration,omitempty" example:"5"` + Resolution string `json:"resolution,omitempty" example:"720p"` } type ChatCompletionRequest struct { @@ -189,8 +191,10 @@ type ChatCompletionRequest struct { Messages []ChatMessage `json:"messages"` Temperature float64 `json:"temperature,omitempty" example:"0.7"` MaxTokens int `json:"max_tokens,omitempty" example:"512"` - Stream bool `json:"stream,omitempty" example:"false"` - RunMode string `json:"runMode,omitempty" example:"simulation"` + // ReasoningEffort 推理深度,OpenAI-compatible 请求字段;开放字符串,取值随 provider 和模型能力而定,常见值为 none、minimal、low、medium、high、xhigh,也可配置 max 等供应商自定义值。 + ReasoningEffort string `json:"reasoning_effort,omitempty" example:"medium"` + Stream bool `json:"stream,omitempty" example:"false"` + RunMode string `json:"runMode,omitempty" example:"simulation"` } type ChatMessage struct { diff --git a/apps/api/internal/httpapi/streaming.go b/apps/api/internal/httpapi/streaming.go index 310221e..be58ef3 100644 --- a/apps/api/internal/httpapi/streaming.go +++ b/apps/api/internal/httpapi/streaming.go @@ -1,6 +1,13 @@ package httpapi -import "net/http" +import ( + "encoding/json" + "fmt" + "net/http" + "time" + + "github.com/easyai/easyai-ai-gateway/apps/api/internal/clients" +) func prepareCompatibleStream(w http.ResponseWriter) http.Flusher { w.Header().Set("Content-Type", "text/event-stream") @@ -10,55 +17,180 @@ func prepareCompatibleStream(w http.ResponseWriter) http.Flusher { return flusher } -func writeCompatibleDelta(w http.ResponseWriter, kind string, model string, content string) { - if kind == "responses" { - sendSSE(w, "response.output_text.delta", map[string]any{"type": "response.output_text.delta", "delta": content}) - return - } - sendSSE(w, "message", map[string]any{ - "id": "chatcmpl-stream", - "object": "chat.completion.chunk", - "model": model, - "choices": []any{map[string]any{"index": 0, "delta": map[string]any{"content": content}, "finish_reason": nil}}, - }) +type compatibleStreamWriter struct { + kind string + model string + includeUsage bool + + id string + created int64 + systemFingerprint any + sentRole bool + sentFinish bool + sentUsage bool } -func writeCompatibleDone(w http.ResponseWriter, kind string, model string, output map[string]any) { - if kind == "responses" { +func newCompatibleStreamWriter(kind string, model string, includeUsage bool) *compatibleStreamWriter { + return &compatibleStreamWriter{ + kind: kind, + model: model, + includeUsage: includeUsage, + id: "chatcmpl-stream", + created: time.Now().Unix(), + } +} + +func (s *compatibleStreamWriter) writeDelta(w http.ResponseWriter, event clients.StreamDeltaEvent) { + if s.kind == "responses" { + if event.Text != "" { + sendSSE(w, "response.output_text.delta", map[string]any{"type": "response.output_text.delta", "delta": event.Text}) + } + return + } + + if event.Event != nil && isChatCompletionChunk(event.Event) { + s.writeChatChunk(w, event.Event) + return + } + + if event.Text == "" && event.ReasoningContent == "" { + return + } + s.ensureRoleChunk(w) + if event.ReasoningContent != "" { + s.writeChatData(w, s.chatChunk([]any{map[string]any{"index": 0, "delta": map[string]any{"reasoning_content": event.ReasoningContent}, "finish_reason": nil}}, nil)) + } + if event.Text != "" { + s.writeChatData(w, s.chatChunk([]any{map[string]any{"index": 0, "delta": map[string]any{"content": event.Text}, "finish_reason": nil}}, nil)) + } +} + +func (s *compatibleStreamWriter) writeDone(w http.ResponseWriter, output map[string]any) { + if s.kind == "responses" { sendSSE(w, "response.completed", map[string]any{"type": "response.completed", "response": output}) return } - sendSSE(w, "message", map[string]any{ - "id": firstString(output["id"], "chatcmpl-stream"), - "object": "chat.completion.chunk", - "model": model, - "choices": []any{map[string]any{"index": 0, "delta": map[string]any{}, "finish_reason": "stop"}}, - }) + s.captureOutputMetadata(output) + if !s.sentRole { + s.ensureRoleChunk(w) + } + if !s.sentFinish { + s.writeChatData(w, s.chatChunk([]any{map[string]any{"index": 0, "delta": map[string]any{}, "finish_reason": finishReasonFromOutput(output)}}, nil)) + s.sentFinish = true + } + if s.includeUsage && !s.sentUsage { + if usage, ok := output["usage"].(map[string]any); ok && len(usage) > 0 { + s.writeChatData(w, s.chatChunk([]any{}, usage)) + s.sentUsage = true + } + } + s.writeDoneMarker(w) +} + +func (s *compatibleStreamWriter) writeChatChunk(w http.ResponseWriter, chunk map[string]any) { + chunk = clients.NormalizeChatCompletionStreamEvent(chunk) + s.captureChunkMetadata(chunk) + choices, _ := chunk["choices"].([]any) + usage, hasUsage := chunk["usage"].(map[string]any) + if len(choices) == 0 && hasUsage { + if !s.includeUsage { + return + } + s.writeChatData(w, s.chatChunk([]any{}, usage)) + s.sentUsage = true + return + } + if len(choices) > 0 && !chunkHasRole(choices) && !s.sentRole { + s.ensureRoleChunk(w) + } + if chunkHasRole(choices) { + s.sentRole = true + } + if chunkHasFinishReason(choices) { + s.sentFinish = true + } + normalized := cloneMap(chunk) + normalized["id"] = s.id + normalized["object"] = "chat.completion.chunk" + normalized["created"] = s.created + normalized["model"] = firstString(normalized["model"], s.model) + normalized["system_fingerprint"] = s.systemFingerprint + s.writeChatData(w, normalized) +} + +func (s *compatibleStreamWriter) ensureRoleChunk(w http.ResponseWriter) { + if s.sentRole { + return + } + s.writeChatData(w, s.chatChunk([]any{map[string]any{"index": 0, "delta": map[string]any{"role": "assistant"}, "finish_reason": nil}}, nil)) + s.sentRole = true +} + +func (s *compatibleStreamWriter) chatChunk(choices []any, usage map[string]any) map[string]any { + chunk := map[string]any{ + "id": s.id, + "object": "chat.completion.chunk", + "created": s.created, + "model": s.model, + "system_fingerprint": s.systemFingerprint, + "choices": choices, + } + if usage != nil { + chunk["usage"] = usage + } else { + chunk["usage"] = nil + } + return chunk +} + +func (s *compatibleStreamWriter) writeChatData(w http.ResponseWriter, payload map[string]any) { + bytes, _ := json.Marshal(payload) + _, _ = fmt.Fprintf(w, "data: %s\n\n", bytes) +} + +func (s *compatibleStreamWriter) writeDoneMarker(w http.ResponseWriter) { + _, _ = fmt.Fprint(w, "data: [DONE]\n\n") +} + +func (s *compatibleStreamWriter) captureChunkMetadata(chunk map[string]any) { + if id := firstString(chunk["id"], ""); id != "" { + s.id = id + } + if model := firstString(chunk["model"], ""); model != "" { + s.model = model + } + if created := int64FromAny(chunk["created"]); created > 0 { + s.created = created + } + if value, ok := chunk["system_fingerprint"]; ok { + s.systemFingerprint = value + } +} + +func (s *compatibleStreamWriter) captureOutputMetadata(output map[string]any) { + if id := firstString(output["id"], ""); id != "" { + s.id = id + } + if model := firstString(output["model"], ""); model != "" { + s.model = model + } + if created := int64FromAny(output["created"]); created > 0 { + s.created = created + } + if value, ok := output["system_fingerprint"]; ok { + s.systemFingerprint = value + } } func writeCompatibleStream(w http.ResponseWriter, kind string, model string, output map[string]any) { prepareCompatibleStream(w) + writer := newCompatibleStreamWriter(kind, model, true) content := extractOutputText(output) if content == "" { content = "done" } - if kind == "responses" { - sendSSE(w, "response.output_text.delta", map[string]any{"type": "response.output_text.delta", "delta": content}) - sendSSE(w, "response.completed", map[string]any{"type": "response.completed", "response": output}) - return - } - sendSSE(w, "message", map[string]any{ - "id": output["id"], - "object": "chat.completion.chunk", - "model": model, - "choices": []any{map[string]any{"index": 0, "delta": map[string]any{"content": content}, "finish_reason": nil}}, - }) - sendSSE(w, "message", map[string]any{ - "id": output["id"], - "object": "chat.completion.chunk", - "model": model, - "choices": []any{map[string]any{"index": 0, "delta": map[string]any{}, "finish_reason": "stop"}}, - }) + writer.writeDelta(w, clients.StreamDeltaEvent{Text: content}) + writer.writeDone(w, output) } func firstString(value any, fallback string) string { @@ -68,6 +200,68 @@ func firstString(value any, fallback string) string { return fallback } +func int64FromAny(value any) int64 { + switch typed := value.(type) { + case int64: + return typed + case int: + return int64(typed) + case float64: + return int64(typed) + default: + return 0 + } +} + +func isChatCompletionChunk(event map[string]any) bool { + object, _ := event["object"].(string) + if object == "chat.completion.chunk" { + return true + } + _, hasChoices := event["choices"].([]any) + return hasChoices +} + +func chunkHasRole(choices []any) bool { + for _, rawChoice := range choices { + choice, _ := rawChoice.(map[string]any) + delta, _ := choice["delta"].(map[string]any) + if role, ok := delta["role"].(string); ok && role != "" { + return true + } + } + return false +} + +func chunkHasFinishReason(choices []any) bool { + for _, rawChoice := range choices { + choice, _ := rawChoice.(map[string]any) + if reason, ok := choice["finish_reason"].(string); ok && reason != "" { + return true + } + } + return false +} + +func finishReasonFromOutput(output map[string]any) string { + choices, _ := output["choices"].([]any) + for _, rawChoice := range choices { + choice, _ := rawChoice.(map[string]any) + if reason, ok := choice["finish_reason"].(string); ok && reason != "" { + return reason + } + } + return "stop" +} + +func cloneMap(value map[string]any) map[string]any { + out := map[string]any{} + for key, item := range value { + out[key] = item + } + return out +} + func extractOutputText(output map[string]any) string { if text, ok := output["output_text"].(string); ok { return text diff --git a/apps/web/src/pages/admin/BaseModelCapabilityEditor.tsx b/apps/web/src/pages/admin/BaseModelCapabilityEditor.tsx index ef619cd..e7c0740 100644 --- a/apps/web/src/pages/admin/BaseModelCapabilityEditor.tsx +++ b/apps/web/src/pages/admin/BaseModelCapabilityEditor.tsx @@ -40,14 +40,14 @@ type ValueOption = { label: string; value: string }; const textFields: FieldDefinition[] = [ { key: 'supportTool', label: '工具调用', hint: 'function calling / tools', type: 'boolean' }, { key: 'supportStructuredOutput', label: '结构化输出', hint: 'JSON Schema 等输出', type: 'boolean' }, - { key: 'supportThinking', label: '思考能力', hint: '支持 thinking 参数', type: 'boolean' }, + { key: 'supportThinking', label: '推理能力', hint: '支持 reasoning / thinking 参数', type: 'boolean' }, { key: 'supportThinkingModeSwitch', label: '思考开关', hint: '可按请求切换', type: 'boolean' }, { key: 'supportWebSearch', label: '联网搜索', type: 'boolean' }, { key: 'max_context_tokens', label: '上下文 Token', placeholder: '128000', type: 'number' }, { key: 'max_input_tokens', label: '最大输入 Token', placeholder: '64000', type: 'number' }, { key: 'max_output_tokens', label: '最大输出 Token', placeholder: '8192', type: 'number' }, { key: 'max_thinking_tokens', label: '最大思考 Token', placeholder: '32768', type: 'number' }, - { key: 'thinkingEffortLevels', label: '思考强度', placeholder: 'minimal, low, medium, high', type: 'list' }, + { key: 'thinkingEffortLevels', label: '推理深度', hint: '声明模型支持的 reasoning_effort 取值,可填写 max 等供应商自定义值', placeholder: 'none, minimal, low, medium, high, xhigh, max', type: 'list' }, ]; const embeddingFields: FieldDefinition[] = [ @@ -535,7 +535,7 @@ const imageAspectRatioOptions = [ '7:4', '4:7', ]; -const thinkingEffortOptions = ['minimal', 'low', 'medium', 'high']; +const thinkingEffortOptions = ['none', 'minimal', 'low', 'medium', 'high', 'xhigh', 'max']; const omniVideoModeOptions = ['text_to_video', 'image_reference', 'element_reference', 'first_last_frame', 'video_reference', 'video_edit', 'multi_shot']; const durationOptionValues = ['1', '2', '3', '4', '5', '6', '8', '10', '15', '20', '25', '30']; const exclusiveCapabilityFields: Record = { diff --git a/docs/design.md b/docs/design.md index 79cc6aa..c29504f 100644 --- a/docs/design.md +++ b/docs/design.md @@ -1505,6 +1505,22 @@ type ModelClient interface { - progress event snapshot:确保前端进度面板兼容。 - billing snapshot:确保预估扣费和最终 billings 语义一致。 +OpenAI-compatible 文本请求中的推理深度统一使用 `reasoning_effort` 表达。该字段是请求参数,不是响应中的推理内容;模型能力中用 `thinkingEffortLevels` 声明该模型支持的可选取值。`reasoning_effort` 必须按开放字符串处理,不在网关层写死枚举;实际可用集合必须以 provider 和模型能力为准。常见取值定义如下: + +| 值 | 含义 | +| --- | --- | +| `none` | 不启用额外推理,适用于不需要思考链路的低延迟请求。 | +| `minimal` | 最小推理预算,优先降低延迟和成本。 | +| `low` | 较低推理预算,用于简单推理任务。 | +| `medium` | 默认/均衡推理深度,在质量、延迟和成本之间折中。 | +| `high` | 较高推理预算,用于复杂规划、代码和多步推理。 | +| `xhigh` | 最高推理预算,仅在模型和 provider 明确支持时使用,通常成本和延迟最高。 | +| `max` | 供应商自定义最高档示例,例如 DeepSeek V4 类模型可能使用该值;语义以 provider 文档为准。 | + +除上表外,`thinkingEffortLevels` 可以保存任意供应商自定义值,例如 `max`、`ultra` 或后续模型新增档位。管理端只提供常见值作为快捷选项,不应阻止自定义输入;请求透传时按模型能力校验或直接交由上游 provider 返回错误。 + +`reasoning_content`、推理过程 delta 或思考摘要在 Chat Completions 中不是 OpenAI 标准必需字段;如需兼容 DeepSeek、Qwen 等供应商扩展,应在 adapter 层作为可选扩展透传,并避免把 hidden reasoning 默认暴露给普通兼容客户端。 + ## 11. 队列持久化、恢复与限流执行 ### 11.1 持久化队列原则