From 13186f8ed165dbeca80861f4ad8d853fb73cc863 Mon Sep 17 00:00:00 2001
From: chensipeng <chensipeng@51easyai.com>
Date: Tue, 19 May 2026 17:46:27 +0800
Subject: [PATCH] =?UTF-8?q?feat(chat):=20=E5=AE=8C=E5=96=84=20Chat=20Compl?=
 =?UTF-8?q?etions=20=E5=85=BC=E5=AE=B9=E5=B1=82?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 apps/api/docs/swagger.json                    |   5 +
 apps/api/docs/swagger.yaml                    |   5 +
 apps/api/internal/clients/clients_test.go     | 394 ++++++++++
 apps/api/internal/clients/gemini.go           | 184 ++++-
 apps/api/internal/clients/helpers.go          | 711 +++++++++++++++++-
 apps/api/internal/clients/openai.go           |   6 +
 apps/api/internal/clients/types.go            |   8 +-
 .../httpapi/chat_completions_mode_test.go     |  48 +-
 apps/api/internal/httpapi/handlers.go         |  17 +-
 apps/api/internal/httpapi/openapi_models.go   |  28 +-
 apps/api/internal/httpapi/streaming.go        | 268 ++++++-
 .../pages/admin/BaseModelCapabilityEditor.tsx |   6 +-
 docs/design.md                                |  16 +
 13 files changed, 1611 insertions(+), 85 deletions(-)

diff --git a/apps/api/docs/swagger.json b/apps/api/docs/swagger.json
index 805b611..f999c21 100644
--- a/apps/api/docs/swagger.json
+++ b/apps/api/docs/swagger.json
@@ -7281,6 +7281,11 @@
                     "type": "string",
                     "example": "A watercolor robot reading a book"
                 },
+                "reasoning_effort": {
+                    "description": "ReasoningEffort 推理深度，OpenAI-compatible 请求字段；开放字符串，取值随 provider 和模型能力而定，常见值为 none、minimal、low、medium、high、xhigh，也可配置 max 等供应商自定义值。",
+                    "type": "string",
+                    "example": "medium"
+                },
                 "resolution": {
                     "type": "string",
                     "example": "720p"
diff --git a/apps/api/docs/swagger.yaml b/apps/api/docs/swagger.yaml
index 16a7825..a583b79 100644
--- a/apps/api/docs/swagger.yaml
+++ b/apps/api/docs/swagger.yaml
@@ -587,6 +587,11 @@ definitions:
       prompt:
         example: A watercolor robot reading a book
         type: string
+      reasoning_effort:
+        description: ReasoningEffort 推理深度，OpenAI-compatible 请求字段；开放字符串，取值随 provider
+          和模型能力而定，常见值为 none、minimal、low、medium、high、xhigh，也可配置 max 等供应商自定义值。
+        example: medium
+        type: string
       resolution:
         example: 720p
         type: string
diff --git a/apps/api/internal/clients/clients_test.go b/apps/api/internal/clients/clients_test.go
index b94226a..3f3b1fb 100644
--- a/apps/api/internal/clients/clients_test.go
+++ b/apps/api/internal/clients/clients_test.go
@@ -151,6 +151,187 @@ func TestOpenAIClientChatContract(t *testing.T) {
 	}
 }
 
+func TestOpenAIClientChatRequestNormalizesToolContext(t *testing.T) {
+	var captured map[string]any
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if err := json.NewDecoder(r.Body).Decode(&captured); err != nil {
+			t.Fatalf("decode request: %v", err)
+		}
+		_ = json.NewEncoder(w).Encode(map[string]any{
+			"id":     "chatcmpl-normalized-request",
+			"object": "chat.completion",
+			"model":  captured["model"],
+			"choices": []any{map[string]any{
+				"message": map[string]any{"role": "assistant", "content": "ok"},
+			}},
+		})
+	}))
+	defer server.Close()
+
+	_, err := (OpenAIClient{HTTPClient: server.Client()}).Run(context.Background(), Request{
+		Kind:  "chat.completions",
+		Model: "openai:gpt-4o-mini",
+		Body: map[string]any{
+			"model": "openai:gpt-4o-mini",
+			"messages": []any{
+				map[string]any{
+					"role": "assistant",
+					"functionCall": map[string]any{
+						"name":      "lookup",
+						"arguments": map[string]any{"q": "weather"},
+					},
+				},
+				map[string]any{"role": "tool", "toolCallId": "call_0", "content": "sunny"},
+				map[string]any{
+					"role": "user",
+					"content": []any{
+						map[string]any{"type": "text", "text": "keep this"},
+						map[string]any{"type": "tool_result", "tool_use_id": "toolu_1", "content": map[string]any{"ok": true}},
+					},
+				},
+			},
+		},
+		Candidate: store.RuntimeModelCandidate{
+			BaseURL:           server.URL,
+			ProviderModelName: "openai-compatible-gpt-4o-mini",
+			Credentials:       map[string]any{"apiKey": "test-key"},
+		},
+	})
+	if err != nil {
+		t.Fatalf("run openai client: %v", err)
+	}
+	messages, _ := captured["messages"].([]any)
+	if len(messages) != 4 {
+		t.Fatalf("unexpected normalized messages: %+v", messages)
+	}
+	assistant, _ := messages[0].(map[string]any)
+	if _, ok := assistant["functionCall"]; ok {
+		t.Fatalf("functionCall should be converted away: %+v", assistant)
+	}
+	toolCalls, _ := assistant["tool_calls"].([]any)
+	toolCall, _ := toolCalls[0].(map[string]any)
+	function, _ := toolCall["function"].(map[string]any)
+	if function["name"] != "lookup" || function["arguments"] != `{"q":"weather"}` {
+		t.Fatalf("unexpected normalized tool call: %+v", assistant)
+	}
+	toolMessage, _ := messages[1].(map[string]any)
+	if toolMessage["tool_call_id"] != "call_0" || toolMessage["toolCallId"] != nil {
+		t.Fatalf("tool message was not normalized: %+v", toolMessage)
+	}
+	keptUser, _ := messages[2].(map[string]any)
+	convertedToolResult, _ := messages[3].(map[string]any)
+	if keptUser["content"] != "keep this" || convertedToolResult["role"] != "tool" || convertedToolResult["tool_call_id"] != "toolu_1" || convertedToolResult["content"] != `{"ok":true}` {
+		t.Fatalf("tool_result block was not restored: user=%+v tool=%+v", keptUser, convertedToolResult)
+	}
+}
+
+func TestOpenAIClientChatResponseNormalizesReasoning(t *testing.T) {
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		_ = json.NewEncoder(w).Encode(map[string]any{
+			"id":     "chatcmpl-reasoning",
+			"object": "chat.completion",
+			"model":  "openrouter-test",
+			"choices": []any{map[string]any{
+				"message": map[string]any{
+					"role": "assistant",
+					"reasoning_details": []any{
+						map[string]any{"type": "reasoning.text", "text": "detail-"},
+						map[string]any{"type": "reasoning.summary", "summary": "summary"},
+						map[string]any{"type": "reasoning.encrypted", "data": "secret"},
+					},
+					"content": "<think>tagged</think>answer",
+				},
+			}},
+		})
+	}))
+	defer server.Close()
+
+	response, err := (OpenAIClient{HTTPClient: server.Client()}).Run(context.Background(), Request{
+		Kind:  "chat.completions",
+		Model: "OpenRouter-Test",
+		Body:  map[string]any{"model": "OpenRouter-Test", "messages": []any{map[string]any{"role": "user", "content": "ping"}}},
+		Candidate: store.RuntimeModelCandidate{
+			BaseURL:     server.URL,
+			ModelName:   "openrouter-test",
+			Credentials: map[string]any{"apiKey": "test-key"},
+		},
+	})
+	if err != nil {
+		t.Fatalf("run openai client: %v", err)
+	}
+	choices, _ := response.Result["choices"].([]any)
+	choice, _ := choices[0].(map[string]any)
+	message, _ := choice["message"].(map[string]any)
+	if message["reasoning_content"] != "detail-summarytagged" || message["content"] != "answer" {
+		t.Fatalf("reasoning was not normalized: %+v", response.Result)
+	}
+	if _, ok := message["reasoning_details"]; ok {
+		t.Fatalf("reasoning_details should be converted away: %+v", message)
+	}
+}
+
+func TestOpenAIClientChatResponseNormalizesToolCallFormats(t *testing.T) {
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		_ = json.NewEncoder(w).Encode(map[string]any{
+			"id":     "chatcmpl-tools",
+			"object": "chat.completion",
+			"model":  "tool-format-test",
+			"choices": []any{map[string]any{
+				"message": map[string]any{
+					"role": "assistant",
+					"content": []any{
+						map[string]any{"type": "text", "text": "calling tools"},
+						map[string]any{"type": "tool_use", "id": "toolu_1", "name": "anthropic_lookup", "input": map[string]any{"city": "Boston"}},
+					},
+					"toolCalls":     []any{map[string]any{"id": "call_camel", "functionCall": map[string]any{"name": "camel_lookup", "args": map[string]any{"city": "SF"}}}},
+					"function_call": map[string]any{"name": "legacy_lookup", "arguments": "{\"city\":\"NYC\"}"},
+				},
+			}},
+		})
+	}))
+	defer server.Close()
+
+	response, err := (OpenAIClient{HTTPClient: server.Client()}).Run(context.Background(), Request{
+		Kind:  "chat.completions",
+		Model: "Tool-Format-Test",
+		Body:  map[string]any{"model": "Tool-Format-Test", "messages": []any{map[string]any{"role": "user", "content": "ping"}}},
+		Candidate: store.RuntimeModelCandidate{
+			BaseURL:     server.URL,
+			ModelName:   "tool-format-test",
+			Credentials: map[string]any{"apiKey": "test-key"},
+		},
+	})
+	if err != nil {
+		t.Fatalf("run openai client: %v", err)
+	}
+	choices, _ := response.Result["choices"].([]any)
+	choice, _ := choices[0].(map[string]any)
+	message, _ := choice["message"].(map[string]any)
+	if message["content"] != "calling tools" {
+		t.Fatalf("tool_use block should be removed from content: %+v", message)
+	}
+	for _, key := range []string{"toolCalls", "function_call"} {
+		if _, ok := message[key]; ok {
+			t.Fatalf("%s should be converted away: %+v", key, message)
+		}
+	}
+	toolCalls, _ := message["tool_calls"].([]any)
+	if len(toolCalls) != 3 {
+		t.Fatalf("expected 3 normalized tool calls, got %+v", message)
+	}
+	assertToolCall := func(index int, id string, name string, arguments string) {
+		t.Helper()
+		toolCall, _ := toolCalls[index].(map[string]any)
+		function, _ := toolCall["function"].(map[string]any)
+		if toolCall["id"] != id || toolCall["type"] != "function" || function["name"] != name || function["arguments"] != arguments {
+			t.Fatalf("unexpected tool call[%d]: %+v", index, toolCall)
+		}
+	}
+	assertToolCall(0, "call_camel", "camel_lookup", "{\"city\":\"SF\"}")
+	assertToolCall(1, "call_1", "legacy_lookup", "{\"city\":\"NYC\"}")
+	assertToolCall(2, "toolu_1", "anthropic_lookup", "{\"city\":\"Boston\"}")
+}
+
 func TestOpenAIClientChatStreamContract(t *testing.T) {
 	var gotStream bool
 	var gotIncludeUsage bool
@@ -204,6 +385,133 @@ func TestOpenAIClientChatStreamContract(t *testing.T) {
 	}
 }
 
+func TestOpenAIClientChatStreamPreservesStructuredDeltas(t *testing.T) {
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "text/event-stream")
+		_, _ = w.Write([]byte("data: {\"id\":\"chatcmpl-structured\",\"object\":\"chat.completion.chunk\",\"model\":\"openrouter-reasoner\",\"choices\":[{\"delta\":{\"reasoning_details\":[{\"type\":\"reasoning.text\",\"text\":\"detail-\"},{\"type\":\"reasoning.summary\",\"summary\":\"summary\"},{\"type\":\"reasoning.encrypted\",\"data\":\"secret\"}]}}],\"usage\":null}\n\n"))
+		_, _ = w.Write([]byte("data: {\"id\":\"chatcmpl-structured\",\"object\":\"chat.completion.chunk\",\"model\":\"openrouter-reasoner\",\"choices\":[{\"delta\":{\"content\":\"<think>tagged</think>answer\"}}],\"usage\":null}\n\n"))
+		_, _ = w.Write([]byte("data: {\"id\":\"chatcmpl-structured\",\"object\":\"chat.completion.chunk\",\"model\":\"deepseek-v4\",\"choices\":[{\"delta\":{\"tool_calls\":[{\"index\":0,\"id\":\"call_1\",\"type\":\"function\",\"function\":{\"name\":\"lookup\",\"arguments\":\"{\\\"q\\\":\"}}]}}],\"usage\":null}\n\n"))
+		_, _ = w.Write([]byte("data: {\"id\":\"chatcmpl-structured\",\"object\":\"chat.completion.chunk\",\"model\":\"deepseek-v4\",\"choices\":[{\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"\\\"weather\\\"}\"}}]},\"finish_reason\":\"tool_calls\"}],\"usage\":null}\n\n"))
+		_, _ = w.Write([]byte("data: [DONE]\n\n"))
+	}))
+	defer server.Close()
+
+	captured := make([]StreamDeltaEvent, 0)
+	response, err := (OpenAIClient{HTTPClient: server.Client()}).Run(context.Background(), Request{
+		Kind:  "chat.completions",
+		Model: "DeepSeek-V4",
+		Body: map[string]any{
+			"model":    "DeepSeek-V4",
+			"messages": []any{map[string]any{"role": "user", "content": "ping"}},
+			"stream":   true,
+		},
+		Candidate: store.RuntimeModelCandidate{
+			BaseURL:     server.URL,
+			ModelName:   "deepseek-v4",
+			Credentials: map[string]any{"apiKey": "test-key"},
+		},
+		StreamDelta: func(event StreamDeltaEvent) error {
+			captured = append(captured, event)
+			return nil
+		},
+	})
+	if err != nil {
+		t.Fatalf("run openai structured stream client: %v", err)
+	}
+	if len(captured) != 4 || captured[0].ReasoningContent != "detail-summary" || captured[1].ReasoningContent != "tagged" || captured[1].Text != "answer" || captured[2].Event == nil {
+		t.Fatalf("structured stream events were not preserved: %+v", captured)
+	}
+	firstChoices, _ := captured[0].Event["choices"].([]any)
+	firstChoice, _ := firstChoices[0].(map[string]any)
+	firstDelta, _ := firstChoice["delta"].(map[string]any)
+	if firstDelta["reasoning_content"] != "detail-summary" {
+		t.Fatalf("reasoning_details were not converted in stream event: %+v", captured[0].Event)
+	}
+	if _, ok := firstDelta["reasoning_details"]; ok {
+		t.Fatalf("reasoning_details should be removed from stream event: %+v", captured[0].Event)
+	}
+	choices, _ := response.Result["choices"].([]any)
+	choice, _ := choices[0].(map[string]any)
+	message, _ := choice["message"].(map[string]any)
+	if message["reasoning_content"] != "detail-summarytagged" || message["content"] != "answer" || choice["finish_reason"] != "tool_calls" {
+		t.Fatalf("reasoning or finish reason missing from aggregated result: %+v", response.Result)
+	}
+	toolCalls, _ := message["tool_calls"].([]any)
+	toolCall, _ := toolCalls[0].(map[string]any)
+	function, _ := toolCall["function"].(map[string]any)
+	if function["arguments"] != "{\"q\":\"weather\"}" {
+		t.Fatalf("tool call arguments were not aggregated: %+v", response.Result)
+	}
+}
+
+func TestOpenAIClientChatStreamNormalizesToolCallFormats(t *testing.T) {
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "text/event-stream")
+		_, _ = w.Write([]byte("data: {\"id\":\"chatcmpl-tools-stream\",\"object\":\"chat.completion.chunk\",\"model\":\"tool-format-test\",\"choices\":[{\"delta\":{\"function_call\":{\"name\":\"legacy_lookup\",\"arguments\":\"{\\\"city\\\":\"}}}],\"usage\":null}\n\n"))
+		_, _ = w.Write([]byte("data: {\"id\":\"chatcmpl-tools-stream\",\"object\":\"chat.completion.chunk\",\"model\":\"tool-format-test\",\"choices\":[{\"delta\":{\"functionCall\":{\"arguments\":\"\\\"Boston\\\"}\"}}}],\"usage\":null}\n\n"))
+		_, _ = w.Write([]byte("data: {\"id\":\"chatcmpl-tools-stream\",\"object\":\"chat.completion.chunk\",\"model\":\"tool-format-test\",\"choices\":[{\"delta\":{\"toolCall\":{\"index\":1,\"id\":\"call_camel\",\"functionCall\":{\"name\":\"camel_lookup\",\"args\":{\"city\":\"SF\"}}}},\"finish_reason\":\"tool_calls\"}],\"usage\":null}\n\n"))
+		_, _ = w.Write([]byte("data: [DONE]\n\n"))
+	}))
+	defer server.Close()
+
+	captured := make([]StreamDeltaEvent, 0)
+	response, err := (OpenAIClient{HTTPClient: server.Client()}).Run(context.Background(), Request{
+		Kind:  "chat.completions",
+		Model: "Tool-Format-Test",
+		Body: map[string]any{
+			"model":    "Tool-Format-Test",
+			"messages": []any{map[string]any{"role": "user", "content": "ping"}},
+			"stream":   true,
+		},
+		Candidate: store.RuntimeModelCandidate{
+			BaseURL:     server.URL,
+			ModelName:   "tool-format-test",
+			Credentials: map[string]any{"apiKey": "test-key"},
+		},
+		StreamDelta: func(event StreamDeltaEvent) error {
+			captured = append(captured, event)
+			return nil
+		},
+	})
+	if err != nil {
+		t.Fatalf("run openai stream client: %v", err)
+	}
+	if len(captured) != 3 {
+		t.Fatalf("unexpected captured events: %+v", captured)
+	}
+	for _, event := range captured {
+		choices, _ := event.Event["choices"].([]any)
+		choice, _ := choices[0].(map[string]any)
+		delta, _ := choice["delta"].(map[string]any)
+		if _, ok := delta["function_call"]; ok {
+			t.Fatalf("function_call should be converted away: %+v", event.Event)
+		}
+		if _, ok := delta["functionCall"]; ok {
+			t.Fatalf("functionCall should be converted away: %+v", event.Event)
+		}
+		if _, ok := delta["toolCall"]; ok {
+			t.Fatalf("toolCall should be converted away: %+v", event.Event)
+		}
+	}
+	choices, _ := response.Result["choices"].([]any)
+	choice, _ := choices[0].(map[string]any)
+	message, _ := choice["message"].(map[string]any)
+	toolCalls, _ := message["tool_calls"].([]any)
+	if len(toolCalls) != 2 || choice["finish_reason"] != "tool_calls" {
+		t.Fatalf("unexpected normalized stream result: %+v", response.Result)
+	}
+	legacyCall, _ := toolCalls[0].(map[string]any)
+	legacyFunction, _ := legacyCall["function"].(map[string]any)
+	if legacyFunction["name"] != "legacy_lookup" || legacyFunction["arguments"] != "{\"city\":\"Boston\"}" {
+		t.Fatalf("legacy function_call was not aggregated: %+v", response.Result)
+	}
+	camelCall, _ := toolCalls[1].(map[string]any)
+	camelFunction, _ := camelCall["function"].(map[string]any)
+	if camelCall["id"] != "call_camel" || camelFunction["name"] != "camel_lookup" || camelFunction["arguments"] != "{\"city\":\"SF\"}" {
+		t.Fatalf("camel toolCall was not normalized: %+v", response.Result)
+	}
+}
+
 func TestGeminiClientChatContract(t *testing.T) {
 	var gotPath string
 	var gotKey string
@@ -261,6 +569,92 @@ func TestGeminiClientChatContract(t *testing.T) {
 	}
 }
 
+func TestGeminiClientChatRestoresToolContext(t *testing.T) {
+	var captured map[string]any
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if err := json.NewDecoder(r.Body).Decode(&captured); err != nil {
+			t.Fatalf("decode request: %v", err)
+		}
+		_ = json.NewEncoder(w).Encode(map[string]any{
+			"candidates": []any{map[string]any{
+				"content": map[string]any{"parts": []any{map[string]any{"text": "gemini ok"}}},
+			}},
+		})
+	}))
+	defer server.Close()
+
+	_, err := (GeminiClient{HTTPClient: server.Client()}).Run(context.Background(), Request{
+		Kind:  "chat.completions",
+		Model: "gemini:gemini-2.5-flash",
+		Body: map[string]any{
+			"model": "gemini:gemini-2.5-flash",
+			"messages": []any{
+				map[string]any{"role": "user", "content": "weather?"},
+				map[string]any{
+					"role":    "assistant",
+					"content": "checking",
+					"tool_calls": []any{map[string]any{
+						"id":   "call_weather",
+						"type": "function",
+						"function": map[string]any{
+							"name":      "get_weather",
+							"arguments": `{"city":"SF"}`,
+						},
+					}},
+				},
+				map[string]any{"role": "tool", "tool_call_id": "call_weather", "content": `{"temperature":"72F"}`},
+			},
+			"tools": []any{map[string]any{
+				"type": "function",
+				"function": map[string]any{
+					"name":        "get_weather",
+					"description": "lookup weather",
+					"parameters":  map[string]any{"type": "object"},
+				},
+			}},
+		},
+		Candidate: store.RuntimeModelCandidate{
+			BaseURL:           server.URL,
+			ProviderModelName: "gemini-2.5-flash",
+			ModelType:         "chat",
+			Credentials:       map[string]any{"apiKey": "gemini-key"},
+		},
+	})
+	if err != nil {
+		t.Fatalf("run gemini client: %v", err)
+	}
+	contents, _ := captured["contents"].([]any)
+	if len(contents) != 3 {
+		t.Fatalf("unexpected Gemini contents: %+v", captured)
+	}
+	modelTurn, _ := contents[1].(map[string]any)
+	if modelTurn["role"] != "model" {
+		t.Fatalf("assistant turn should become Gemini model turn: %+v", modelTurn)
+	}
+	modelParts, _ := modelTurn["parts"].([]any)
+	callPart, _ := modelParts[1].(map[string]any)
+	functionCall, _ := callPart["functionCall"].(map[string]any)
+	args, _ := functionCall["args"].(map[string]any)
+	if functionCall["name"] != "get_weather" || args["city"] != "SF" {
+		t.Fatalf("tool call was not restored for Gemini: %+v", modelTurn)
+	}
+	toolTurn, _ := contents[2].(map[string]any)
+	toolParts, _ := toolTurn["parts"].([]any)
+	responsePart, _ := toolParts[0].(map[string]any)
+	functionResponse, _ := responsePart["functionResponse"].(map[string]any)
+	response, _ := functionResponse["response"].(map[string]any)
+	if toolTurn["role"] != "user" || functionResponse["name"] != "get_weather" || response["temperature"] != "72F" {
+		t.Fatalf("tool result was not restored for Gemini: %+v", toolTurn)
+	}
+	tools, _ := captured["tools"].([]any)
+	declarationGroup, _ := tools[0].(map[string]any)
+	declarations, _ := declarationGroup["functionDeclarations"].([]any)
+	declaration, _ := declarations[0].(map[string]any)
+	if declaration["name"] != "get_weather" || declaration["description"] != "lookup weather" {
+		t.Fatalf("tool declaration was not converted for Gemini: %+v", captured["tools"])
+	}
+}
+
 func TestGeminiURLAcceptsVersionedBaseURL(t *testing.T) {
 	got := geminiURL("https://generativelanguage.googleapis.com/v1beta", "gemini-2.5-flash", "test-key")
 	want := "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key=test-key"
diff --git a/apps/api/internal/clients/gemini.go b/apps/api/internal/clients/gemini.go
index 6a8f6ed..cdf32ab 100644
--- a/apps/api/internal/clients/gemini.go
+++ b/apps/api/internal/clients/gemini.go
@@ -70,15 +70,185 @@ func geminiBody(request Request) map[string]any {
 		return map[string]any{"contents": contents}
 	}
 	prompt := firstNonEmptyPrompt(request.Body, "")
-	if prompt == "" {
-		prompt = textFromMessages(request.Body)
+	if prompt != "" {
+		return map[string]any{
+			"contents": []any{map[string]any{
+				"role":  "user",
+				"parts": []any{map[string]any{"text": prompt}},
+			}},
+		}
 	}
-	return map[string]any{
-		"contents": []any{map[string]any{
-			"role":  "user",
-			"parts": []any{map[string]any{"text": prompt}},
-		}},
+	body := map[string]any{"contents": geminiContentsFromMessages(request.Body)}
+	if tools := geminiToolsFromOpenAITools(request.Body["tools"]); len(tools) > 0 {
+		body["tools"] = tools
 	}
+	contents, _ := body["contents"].([]any)
+	if len(contents) > 0 {
+		return body
+	}
+	return map[string]any{"contents": []any{map[string]any{
+		"role":  "user",
+		"parts": []any{map[string]any{"text": textFromMessages(request.Body)}},
+	}}}
+}
+
+func geminiContentsFromMessages(body map[string]any) []any {
+	normalized := NormalizeChatCompletionRequestBody(body)
+	messages, _ := normalized["messages"].([]any)
+	contents := make([]any, 0, len(messages))
+	toolNames := map[string]string{}
+	for _, rawMessage := range messages {
+		message, _ := rawMessage.(map[string]any)
+		if len(message) == 0 {
+			continue
+		}
+		role := stringFromAny(message["role"])
+		if role == "tool" {
+			toolCallID := stringFromAny(message["tool_call_id"])
+			name := toolNames[toolCallID]
+			if name == "" {
+				name = toolCallID
+			}
+			if name == "" {
+				name = "tool"
+			}
+			contents = append(contents, map[string]any{
+				"role": "user",
+				"parts": []any{map[string]any{"functionResponse": map[string]any{
+					"name":     name,
+					"response": geminiFunctionResponsePayload(message["content"]),
+				}}},
+			})
+			continue
+		}
+		parts := geminiTextParts(message["content"])
+		if role == "assistant" {
+			for _, rawToolCall := range toolCallsSlice(message["tool_calls"]) {
+				toolCall, _ := rawToolCall.(map[string]any)
+				function, _ := toolCall["function"].(map[string]any)
+				name := stringFromAny(function["name"])
+				if name == "" {
+					continue
+				}
+				if id := stringFromAny(toolCall["id"]); id != "" {
+					toolNames[id] = name
+				}
+				parts = append(parts, map[string]any{"functionCall": map[string]any{
+					"name": name,
+					"args": geminiFunctionArgs(function["arguments"]),
+				}})
+			}
+		}
+		if len(parts) == 0 {
+			continue
+		}
+		contents = append(contents, map[string]any{
+			"role":  geminiRole(role),
+			"parts": parts,
+		})
+	}
+	return contents
+}
+
+func geminiRole(role string) string {
+	if role == "assistant" {
+		return "model"
+	}
+	return "user"
+}
+
+func geminiTextParts(content any) []any {
+	parts := make([]any, 0)
+	switch typed := content.(type) {
+	case string:
+		if strings.TrimSpace(typed) != "" {
+			parts = append(parts, map[string]any{"text": typed})
+		}
+	case []any:
+		for _, rawPart := range typed {
+			part, _ := rawPart.(map[string]any)
+			if text := stringFromAny(firstPresent(part["text"], part["content"])); strings.TrimSpace(text) != "" {
+				parts = append(parts, map[string]any{"text": text})
+			}
+		}
+	}
+	return parts
+}
+
+func toolCallsSlice(value any) []any {
+	switch typed := value.(type) {
+	case []any:
+		return typed
+	case map[string]any:
+		return []any{typed}
+	default:
+		return nil
+	}
+}
+
+func geminiFunctionArgs(value any) map[string]any {
+	if value == nil {
+		return map[string]any{}
+	}
+	if args, ok := value.(map[string]any); ok {
+		return args
+	}
+	if text, ok := value.(string); ok {
+		if strings.TrimSpace(text) == "" {
+			return map[string]any{}
+		}
+		var args map[string]any
+		if err := json.Unmarshal([]byte(text), &args); err == nil {
+			return args
+		}
+		return map[string]any{"arguments": text}
+	}
+	return map[string]any{"arguments": value}
+}
+
+func geminiFunctionResponsePayload(value any) map[string]any {
+	if payload, ok := value.(map[string]any); ok {
+		return payload
+	}
+	if text, ok := value.(string); ok {
+		var payload map[string]any
+		if err := json.Unmarshal([]byte(text), &payload); err == nil {
+			return payload
+		}
+		return map[string]any{"content": text}
+	}
+	if value == nil {
+		return map[string]any{}
+	}
+	return map[string]any{"content": value}
+}
+
+func geminiToolsFromOpenAITools(value any) []any {
+	tools, ok := value.([]any)
+	if !ok || len(tools) == 0 {
+		return nil
+	}
+	declarations := make([]any, 0, len(tools))
+	for _, rawTool := range tools {
+		tool, _ := rawTool.(map[string]any)
+		function, _ := tool["function"].(map[string]any)
+		name := stringFromAny(function["name"])
+		if name == "" {
+			continue
+		}
+		declaration := map[string]any{"name": name}
+		if description := stringFromAny(function["description"]); description != "" {
+			declaration["description"] = description
+		}
+		if parameters, ok := function["parameters"]; ok {
+			declaration["parameters"] = parameters
+		}
+		declarations = append(declarations, declaration)
+	}
+	if len(declarations) == 0 {
+		return nil
+	}
+	return []any{map[string]any{"functionDeclarations": declarations}}
 }
 
 func geminiResult(request Request, raw map[string]any) map[string]any {
diff --git a/apps/api/internal/clients/helpers.go b/apps/api/internal/clients/helpers.go
index 8461a38..cc853de 100644
--- a/apps/api/internal/clients/helpers.go
+++ b/apps/api/internal/clients/helpers.go
@@ -8,6 +8,7 @@ import (
 	"io"
 	"math"
 	"net/http"
+	"sort"
 	"strings"
 	"time"
 )
@@ -87,8 +88,11 @@ func decodeOpenAIStreamReader(reader io.Reader, onDelta StreamDelta) (map[string
 	scanner.Buffer(make([]byte, 0, 64*1024), 16*1024*1024)
 	rawLines := make([]string, 0)
 	parts := make([]string, 0)
+	reasoningParts := make([]string, 0)
 	var last map[string]any
 	var usage Usage
+	finishReason := ""
+	toolCalls := map[int]map[string]any{}
 	for scanner.Scan() {
 		rawLine := scanner.Text()
 		rawLines = append(rawLines, rawLine)
@@ -104,13 +108,23 @@ func decodeOpenAIStreamReader(reader io.Reader, onDelta StreamDelta) (map[string
 		if err := json.Unmarshal([]byte(payload), &event); err != nil {
 			continue
 		}
+		event = NormalizeChatCompletionStreamEvent(event)
 		last = event
-		if text := streamEventText(event); text != "" {
+		text := streamEventText(event)
+		reasoningText := streamEventReasoningContent(event)
+		if text != "" {
 			parts = append(parts, text)
-			if onDelta != nil {
-				if err := onDelta(text); err != nil {
-					return nil, true, err
-				}
+		}
+		if reasoningText != "" {
+			reasoningParts = append(reasoningParts, reasoningText)
+		}
+		aggregateStreamToolCalls(event, toolCalls)
+		if reason := streamEventFinishReason(event); reason != "" {
+			finishReason = reason
+		}
+		if onDelta != nil {
+			if err := onDelta(StreamDeltaEvent{Text: text, ReasoningContent: reasoningText, Event: event}); err != nil {
+				return nil, true, err
 			}
 		}
 		if eventUsage := usageFromOpenAI(event); eventUsage.TotalTokens > 0 {
@@ -131,7 +145,7 @@ func decodeOpenAIStreamReader(reader io.Reader, onDelta StreamDelta) (map[string
 		}
 		return out, true, nil
 	}
-	return buildOpenAIStreamResult(last, parts, usage), true, nil
+	return buildOpenAIStreamResult(last, parts, reasoningParts, toolCalls, finishReason, usage), true, nil
 }
 
 func decodeOpenAIStream(raw []byte) (map[string]any, bool) {
@@ -142,22 +156,32 @@ func decodeOpenAIStream(raw []byte) (map[string]any, bool) {
 	return result, ok && err == nil
 }
 
-func buildOpenAIStreamResult(last map[string]any, parts []string, usage Usage) map[string]any {
-	if len(parts) == 0 {
+func buildOpenAIStreamResult(last map[string]any, parts []string, reasoningParts []string, toolCalls map[int]map[string]any, finishReason string, usage Usage) map[string]any {
+	if len(parts) == 0 && len(reasoningParts) == 0 && len(toolCalls) == 0 {
 		return last
 	}
+	message := map[string]any{
+		"role":    "assistant",
+		"content": strings.Join(parts, ""),
+	}
+	if len(reasoningParts) > 0 {
+		message["reasoning_content"] = strings.Join(reasoningParts, "")
+	}
+	if len(toolCalls) > 0 {
+		message["tool_calls"] = sortedStreamToolCalls(toolCalls)
+	}
+	if finishReason == "" {
+		finishReason = "stop"
+	}
 	var out map[string]any
 	out = map[string]any{
 		"id":     stringFromAny(firstPresent(last["id"], "chatcmpl-stream")),
 		"object": "chat.completion",
 		"model":  stringFromAny(last["model"]),
 		"choices": []any{map[string]any{
-			"index": 0,
-			"message": map[string]any{
-				"role":    "assistant",
-				"content": strings.Join(parts, ""),
-			},
-			"finish_reason": "stop",
+			"index":         0,
+			"message":       message,
+			"finish_reason": finishReason,
 		}},
 	}
 	if usage.TotalTokens > 0 {
@@ -170,6 +194,571 @@ func buildOpenAIStreamResult(last map[string]any, parts []string, usage Usage) m
 	return out
 }
 
+// NormalizeChatCompletionRequestBody 将后续请求里的工具调用上下文还原为
+// OpenAI Chat Completions 标准格式，便于再次发送给 OpenAI-compatible 上游。
+func NormalizeChatCompletionRequestBody(body map[string]any) map[string]any {
+	if body == nil {
+		return nil
+	}
+	out := cloneBody(body)
+	messages, ok := out["messages"].([]any)
+	if !ok {
+		return out
+	}
+	normalizedMessages := make([]any, 0, len(messages))
+	for _, rawMessage := range messages {
+		message, ok := rawMessage.(map[string]any)
+		if !ok {
+			normalizedMessages = append(normalizedMessages, rawMessage)
+			continue
+		}
+		copied := cloneMapAny(message)
+		normalizeToolCallsContainer(copied, false)
+		normalizeToolMessageFields(copied)
+		toolMessages, cleanContent, changed := toolResultMessagesFromContent(copied["content"])
+		if changed {
+			if cleanContent != nil && contentHasText(cleanContent) {
+				copied["content"] = cleanContent
+				normalizedMessages = append(normalizedMessages, copied)
+			} else if len(copied) > 1 || copied["role"] != nil {
+				delete(copied, "content")
+				if len(copied) > 1 {
+					normalizedMessages = append(normalizedMessages, copied)
+				}
+			}
+			normalizedMessages = append(normalizedMessages, toolMessages...)
+			continue
+		}
+		normalizedMessages = append(normalizedMessages, copied)
+	}
+	out["messages"] = normalizedMessages
+	return out
+}
+
+func cloneMapAny(source map[string]any) map[string]any {
+	if source == nil {
+		return nil
+	}
+	out := make(map[string]any, len(source))
+	for key, value := range source {
+		out[key] = value
+	}
+	return out
+}
+
+// NormalizeChatCompletionResult 将供应商自定义推理字段归一化到
+// message.reasoning_content，并从最终回答 content 中剥离内联推理块。
+// 加密推理载荷不可展示，且不应作为正文输出，因此会被忽略。
+func NormalizeChatCompletionResult(result map[string]any) map[string]any {
+	if result == nil {
+		return nil
+	}
+	choices, _ := result["choices"].([]any)
+	for _, rawChoice := range choices {
+		choice, _ := rawChoice.(map[string]any)
+		if message, ok := choice["message"].(map[string]any); ok {
+			normalizeToolCallsContainer(message, false)
+			normalizeReasoningContainer(message, false)
+		}
+		if delta, ok := choice["delta"].(map[string]any); ok {
+			normalizeToolCallsContainer(delta, true)
+			normalizeReasoningContainer(delta, true)
+		}
+	}
+	return result
+}
+
+// NormalizeChatCompletionStreamEvent 将供应商自定义流式推理字段
+// （例如 reasoning_details 或 reasoning）归一化到 delta.reasoning_content。
+func NormalizeChatCompletionStreamEvent(event map[string]any) map[string]any {
+	if event == nil {
+		return nil
+	}
+	choices, _ := event["choices"].([]any)
+	for _, rawChoice := range choices {
+		choice, _ := rawChoice.(map[string]any)
+		if delta, ok := choice["delta"].(map[string]any); ok {
+			normalizeToolCallsContainer(delta, true)
+			normalizeReasoningContainer(delta, true)
+		}
+		if message, ok := choice["message"].(map[string]any); ok {
+			normalizeToolCallsContainer(message, false)
+			normalizeReasoningContainer(message, false)
+		}
+	}
+	return event
+}
+
+func normalizeToolCallsContainer(container map[string]any, stream bool) {
+	if container == nil {
+		return
+	}
+	toolCalls := make([]any, 0)
+	for _, rawToolCall := range rawToolCallValues(container) {
+		for _, normalized := range normalizeRawToolCalls(rawToolCall, len(toolCalls), stream) {
+			toolCalls = append(toolCalls, normalized)
+		}
+	}
+	if contentToolCalls, cleanContent, changed := toolCallsFromContent(container["content"], len(toolCalls), stream); changed {
+		toolCalls = append(toolCalls, contentToolCalls...)
+		setNormalizedContent(container, cleanContent, stream)
+	}
+	if partToolCalls := toolCallsFromParts(container["parts"], len(toolCalls), stream); len(partToolCalls) > 0 {
+		toolCalls = append(toolCalls, partToolCalls...)
+		delete(container, "parts")
+	}
+	if len(toolCalls) > 0 {
+		container["tool_calls"] = toolCalls
+	}
+	for _, key := range []string{"tool_call", "toolCall", "toolCalls", "function_call", "functionCall"} {
+		delete(container, key)
+	}
+}
+
+func normalizeToolMessageFields(message map[string]any) {
+	if message == nil {
+		return
+	}
+	if id := firstNonEmptyString(message["tool_call_id"], message["toolCallId"], message["tool_use_id"], message["toolUseId"], message["call_id"], message["callId"]); id != "" {
+		message["tool_call_id"] = id
+	}
+	for _, key := range []string{"toolCallId", "tool_use_id", "toolUseId", "call_id", "callId"} {
+		delete(message, key)
+	}
+}
+
+func toolResultMessagesFromContent(value any) ([]any, any, bool) {
+	blocks, ok := value.([]any)
+	if !ok {
+		return nil, nil, false
+	}
+	toolMessages := make([]any, 0)
+	remaining := make([]any, 0, len(blocks))
+	for _, rawBlock := range blocks {
+		block, _ := rawBlock.(map[string]any)
+		if len(block) == 0 || stringFromAny(block["type"]) != "tool_result" {
+			remaining = append(remaining, rawBlock)
+			continue
+		}
+		message := map[string]any{
+			"role":         "tool",
+			"tool_call_id": firstNonEmptyString(block["tool_call_id"], block["toolCallId"], block["tool_use_id"], block["toolUseId"], block["id"]),
+			"content":      toolResultContent(block["content"]),
+		}
+		toolMessages = append(toolMessages, message)
+	}
+	if len(toolMessages) == 0 {
+		return nil, nil, false
+	}
+	return toolMessages, contentBlocksText(remaining), true
+}
+
+func toolResultContent(value any) any {
+	if text, ok := value.(string); ok {
+		return text
+	}
+	return jsonStringFromAny(value)
+}
+
+func contentHasText(value any) bool {
+	switch typed := value.(type) {
+	case string:
+		return strings.TrimSpace(typed) != ""
+	case []any:
+		return len(typed) > 0
+	default:
+		return value != nil
+	}
+}
+
+func rawToolCallValues(container map[string]any) []any {
+	values := make([]any, 0, 6)
+	for _, key := range []string{"tool_calls", "tool_call", "toolCalls", "toolCall", "function_call", "functionCall"} {
+		if value, ok := container[key]; ok {
+			values = append(values, value)
+		}
+	}
+	return values
+}
+
+func normalizeRawToolCalls(value any, startIndex int, stream bool) []any {
+	switch typed := value.(type) {
+	case []any:
+		out := make([]any, 0, len(typed))
+		for _, raw := range typed {
+			if toolCall := normalizeToolCall(raw, startIndex+len(out), stream); toolCall != nil {
+				out = append(out, toolCall)
+			}
+		}
+		return out
+	default:
+		if toolCall := normalizeToolCall(value, startIndex, stream); toolCall != nil {
+			return []any{toolCall}
+		}
+		return nil
+	}
+}
+
+func normalizeToolCall(value any, index int, stream bool) map[string]any {
+	source, _ := value.(map[string]any)
+	if len(source) == 0 {
+		return nil
+	}
+	functionSource := mapFromAny(source["function"])
+	if len(functionSource) == 0 {
+		functionSource = mapFromAny(firstPresent(source["function_call"], source["functionCall"]))
+	}
+	name := firstNonEmptyString(
+		functionSource["name"], source["name"], source["function_name"], source["functionName"], source["tool_name"], source["toolName"],
+	)
+	arguments, hasArguments := toolCallArguments(functionSource)
+	if !hasArguments {
+		arguments, hasArguments = toolCallArguments(source)
+	}
+	if name == "" && !hasArguments && firstNonEmptyString(source["id"], source["call_id"], source["callId"], source["tool_call_id"], source["toolCallId"]) == "" {
+		return nil
+	}
+	function := map[string]any{}
+	if name != "" {
+		function["name"] = name
+	}
+	if hasArguments {
+		function["arguments"] = arguments
+	}
+	toolCall := map[string]any{
+		"type":     firstNonEmptyString(source["type"], "function"),
+		"function": function,
+	}
+	if id := firstNonEmptyString(source["id"], source["call_id"], source["callId"], source["tool_call_id"], source["toolCallId"]); id != "" {
+		toolCall["id"] = id
+	} else if !stream {
+		toolCall["id"] = fmt.Sprintf("call_%d", index)
+	}
+	if stream {
+		if rawIndex, ok := firstPresent(source["index"], source["idx"]).(float64); ok {
+			toolCall["index"] = int(math.Round(rawIndex))
+		} else if rawIndex, ok := firstPresent(source["index"], source["idx"]).(int); ok {
+			toolCall["index"] = rawIndex
+		} else {
+			toolCall["index"] = index
+		}
+	}
+	return toolCall
+}
+
+func toolCallsFromContent(value any, startIndex int, stream bool) ([]any, any, bool) {
+	blocks, ok := value.([]any)
+	if !ok {
+		return nil, nil, false
+	}
+	toolCalls := make([]any, 0)
+	remaining := make([]any, 0, len(blocks))
+	containsReasoning := false
+	for _, rawBlock := range blocks {
+		block, _ := rawBlock.(map[string]any)
+		if len(block) == 0 {
+			remaining = append(remaining, rawBlock)
+			continue
+		}
+		switch stringFromAny(block["type"]) {
+		case "tool_use":
+			if toolCall := normalizeToolUseBlock(block, startIndex+len(toolCalls), stream); toolCall != nil {
+				toolCalls = append(toolCalls, toolCall)
+			}
+		case "tool_result":
+			remaining = append(remaining, rawBlock)
+		default:
+			if isReasoningContentBlock(block) {
+				containsReasoning = true
+			}
+			remaining = append(remaining, rawBlock)
+		}
+	}
+	if len(toolCalls) == 0 {
+		return nil, nil, false
+	}
+	if containsReasoning {
+		return toolCalls, remaining, true
+	}
+	return toolCalls, contentBlocksText(remaining), true
+}
+
+func normalizeToolUseBlock(block map[string]any, index int, stream bool) map[string]any {
+	toolCall := map[string]any{
+		"type": "function",
+		"function": map[string]any{
+			"name":      stringFromAny(block["name"]),
+			"arguments": jsonStringFromAny(block["input"]),
+		},
+	}
+	if id := firstNonEmptyString(block["id"], block["tool_use_id"], block["toolUseId"]); id != "" {
+		toolCall["id"] = id
+	} else if !stream {
+		toolCall["id"] = fmt.Sprintf("call_%d", index)
+	}
+	if stream {
+		toolCall["index"] = index
+	}
+	return toolCall
+}
+
+func toolCallsFromParts(value any, startIndex int, stream bool) []any {
+	parts, ok := value.([]any)
+	if !ok {
+		return nil
+	}
+	out := make([]any, 0)
+	for _, rawPart := range parts {
+		part, _ := rawPart.(map[string]any)
+		if functionCall := mapFromAny(firstPresent(part["functionCall"], part["function_call"])); len(functionCall) > 0 {
+			if toolCall := normalizeGeminiFunctionCall(functionCall, startIndex+len(out), stream); toolCall != nil {
+				out = append(out, toolCall)
+			}
+		}
+	}
+	return out
+}
+
+func normalizeGeminiFunctionCall(functionCall map[string]any, index int, stream bool) map[string]any {
+	toolCall := map[string]any{
+		"type": "function",
+		"function": map[string]any{
+			"name":      stringFromAny(functionCall["name"]),
+			"arguments": jsonStringFromAny(firstPresent(functionCall["args"], functionCall["arguments"])),
+		},
+	}
+	if id := firstNonEmptyString(functionCall["id"], functionCall["call_id"], functionCall["callId"]); id != "" {
+		toolCall["id"] = id
+	} else if !stream {
+		toolCall["id"] = fmt.Sprintf("call_%d", index)
+	}
+	if stream {
+		toolCall["index"] = index
+	}
+	return toolCall
+}
+
+func setNormalizedContent(container map[string]any, value any, stream bool) {
+	if text, ok := value.(string); ok && text == "" {
+		if stream {
+			delete(container, "content")
+			return
+		}
+		container["content"] = nil
+		return
+	}
+	container["content"] = value
+}
+
+func isReasoningContentBlock(block map[string]any) bool {
+	switch stringFromAny(block["type"]) {
+	case "thinking", "redacted_thinking", "reasoning.text", "reasoning.summary", "reasoning.encrypted":
+		return true
+	default:
+		return false
+	}
+}
+
+func contentBlocksText(blocks []any) string {
+	parts := make([]string, 0, len(blocks))
+	for _, rawBlock := range blocks {
+		switch block := rawBlock.(type) {
+		case string:
+			parts = append(parts, block)
+		case map[string]any:
+			if text := stringFromAny(firstPresent(block["text"], block["content"])); text != "" {
+				parts = append(parts, text)
+			}
+		}
+	}
+	return strings.Join(parts, "")
+}
+
+func toolCallArguments(source map[string]any) (string, bool) {
+	for _, key := range []string{"arguments", "args", "input", "parameters"} {
+		if value, ok := source[key]; ok {
+			return jsonStringFromAny(value), true
+		}
+	}
+	return "", false
+}
+
+func jsonStringFromAny(value any) string {
+	if value == nil {
+		return ""
+	}
+	if text, ok := value.(string); ok {
+		return text
+	}
+	encoded, err := json.Marshal(value)
+	if err != nil {
+		return ""
+	}
+	return string(encoded)
+}
+
+func normalizeReasoningContainer(container map[string]any, deleteEmptyContent bool) {
+	if container == nil {
+		return
+	}
+	reasoningParts := make([]string, 0, 3)
+	if reasoning := reasoningDetailsText(container["reasoning_details"]); reasoning != "" {
+		reasoningParts = append(reasoningParts, reasoning)
+	} else if reasoning := stringFromAny(container["reasoning_content"]); reasoning != "" {
+		reasoningParts = append(reasoningParts, reasoning)
+	} else if reasoning := stringFromAny(container["reasoning"]); reasoning != "" {
+		reasoningParts = append(reasoningParts, reasoning)
+	}
+	if content, ok := container["content"]; ok {
+		cleanContent, contentReasoning, changed := normalizeReasoningContentValue(content)
+		if changed {
+			if deleteEmptyContent {
+				if text, ok := cleanContent.(string); ok && text == "" {
+					delete(container, "content")
+				} else {
+					container["content"] = cleanContent
+				}
+			} else {
+				container["content"] = cleanContent
+			}
+		}
+		if contentReasoning != "" {
+			reasoningParts = append(reasoningParts, contentReasoning)
+		}
+	}
+	if len(reasoningParts) > 0 {
+		container["reasoning_content"] = strings.Join(reasoningParts, "")
+	}
+	delete(container, "reasoning_details")
+	delete(container, "reasoning")
+}
+
+func reasoningDetailsText(value any) string {
+	rawItems, ok := value.([]any)
+	if !ok {
+		return ""
+	}
+	parts := make([]string, 0, len(rawItems))
+	for _, rawItem := range rawItems {
+		item, _ := rawItem.(map[string]any)
+		switch stringFromAny(item["type"]) {
+		case "reasoning.text":
+			if text := stringFromAny(item["text"]); text != "" {
+				parts = append(parts, text)
+			}
+		case "reasoning.summary":
+			if summary := stringFromAny(item["summary"]); summary != "" {
+				parts = append(parts, summary)
+			}
+		}
+	}
+	return strings.Join(parts, "")
+}
+
+func normalizeReasoningContentValue(value any) (any, string, bool) {
+	switch typed := value.(type) {
+	case string:
+		cleanContent, reasoning, changed := splitTaggedReasoningText(typed)
+		return cleanContent, reasoning, changed
+	case []any:
+		contentParts := make([]string, 0, len(typed))
+		reasoningParts := make([]string, 0)
+		changed := false
+		for _, rawItem := range typed {
+			switch item := rawItem.(type) {
+			case string:
+				contentParts = append(contentParts, item)
+			case map[string]any:
+				switch stringFromAny(item["type"]) {
+				case "thinking":
+					if thinking := stringFromAny(item["thinking"]); thinking != "" {
+						reasoningParts = append(reasoningParts, thinking)
+					}
+					changed = true
+				case "redacted_thinking", "reasoning.encrypted":
+					changed = true
+				case "reasoning.text":
+					if text := stringFromAny(item["text"]); text != "" {
+						reasoningParts = append(reasoningParts, text)
+					}
+					changed = true
+				case "reasoning.summary":
+					if summary := stringFromAny(item["summary"]); summary != "" {
+						reasoningParts = append(reasoningParts, summary)
+					}
+					changed = true
+				case "text", "output_text":
+					if text := stringFromAny(firstPresent(item["text"], item["content"])); text != "" {
+						cleanText, reasoning, tagged := splitTaggedReasoningText(text)
+						contentParts = append(contentParts, cleanText)
+						if reasoning != "" {
+							reasoningParts = append(reasoningParts, reasoning)
+						}
+						changed = changed || tagged
+					}
+				default:
+					if text := stringFromAny(firstPresent(item["text"], item["content"])); text != "" {
+						contentParts = append(contentParts, text)
+					}
+				}
+			}
+		}
+		if !changed {
+			return value, "", false
+		}
+		return strings.Join(contentParts, ""), strings.Join(reasoningParts, ""), true
+	default:
+		return value, "", false
+	}
+}
+
+func splitTaggedReasoningText(text string) (string, string, bool) {
+	lower := strings.ToLower(text)
+	clean := strings.Builder{}
+	reasoning := strings.Builder{}
+	changed := false
+	for offset := 0; offset < len(text); {
+		start, tag := nextReasoningOpenTag(lower, offset)
+		if start < 0 {
+			clean.WriteString(text[offset:])
+			break
+		}
+		clean.WriteString(text[offset:start])
+		openEnd := start + len("<"+tag+">")
+		closeToken := "</" + tag + ">"
+		closeStart := strings.Index(lower[openEnd:], closeToken)
+		if closeStart < 0 {
+			reasoning.WriteString(text[openEnd:])
+			offset = len(text)
+			changed = true
+			break
+		}
+		closeStart += openEnd
+		reasoning.WriteString(text[openEnd:closeStart])
+		offset = closeStart + len(closeToken)
+		changed = true
+	}
+	return clean.String(), reasoning.String(), changed
+}
+
+func nextReasoningOpenTag(lower string, offset int) (int, string) {
+	bestStart := -1
+	bestTag := ""
+	for _, tag := range []string{"think", "reasoning", "analysis"} {
+		needle := "<" + tag + ">"
+		idx := strings.Index(lower[offset:], needle)
+		if idx < 0 {
+			continue
+		}
+		absolute := offset + idx
+		if bestStart < 0 || absolute < bestStart {
+			bestStart = absolute
+			bestTag = tag
+		}
+	}
+	return bestStart, bestTag
+}
+
 func streamEventText(event map[string]any) string {
 	if choices, ok := event["choices"].([]any); ok {
 		for _, rawChoice := range choices {
@@ -195,6 +784,91 @@ func streamEventText(event map[string]any) string {
 	return ""
 }
 
+func streamEventReasoningContent(event map[string]any) string {
+	if choices, ok := event["choices"].([]any); ok {
+		for _, rawChoice := range choices {
+			choice, _ := rawChoice.(map[string]any)
+			if delta, ok := choice["delta"].(map[string]any); ok {
+				if content, ok := delta["reasoning_content"].(string); ok {
+					return content
+				}
+				if content, ok := delta["reasoning"].(string); ok {
+					return content
+				}
+			}
+			if message, ok := choice["message"].(map[string]any); ok {
+				if content, ok := message["reasoning_content"].(string); ok {
+					return content
+				}
+			}
+		}
+	}
+	return ""
+}
+
+func streamEventFinishReason(event map[string]any) string {
+	if choices, ok := event["choices"].([]any); ok {
+		for _, rawChoice := range choices {
+			choice, _ := rawChoice.(map[string]any)
+			if reason, ok := choice["finish_reason"].(string); ok && reason != "" {
+				return reason
+			}
+		}
+	}
+	return ""
+}
+
+func aggregateStreamToolCalls(event map[string]any, toolCalls map[int]map[string]any) {
+	choices, _ := event["choices"].([]any)
+	for _, rawChoice := range choices {
+		choice, _ := rawChoice.(map[string]any)
+		delta, _ := choice["delta"].(map[string]any)
+		rawToolCalls, _ := delta["tool_calls"].([]any)
+		for _, rawToolCall := range rawToolCalls {
+			incoming, _ := rawToolCall.(map[string]any)
+			index := intFromAny(incoming["index"])
+			current := toolCalls[index]
+			if current == nil {
+				current = map[string]any{}
+				toolCalls[index] = current
+			}
+			for _, key := range []string{"id", "type"} {
+				if value, ok := incoming[key].(string); ok && value != "" {
+					current[key] = value
+				}
+			}
+			incomingFn, _ := incoming["function"].(map[string]any)
+			if len(incomingFn) == 0 {
+				continue
+			}
+			currentFn, _ := current["function"].(map[string]any)
+			if currentFn == nil {
+				currentFn = map[string]any{}
+				current["function"] = currentFn
+			}
+			if name, ok := incomingFn["name"].(string); ok && name != "" {
+				currentFn["name"] = stringFromAny(currentFn["name"]) + name
+			}
+			if arguments, ok := incomingFn["arguments"].(string); ok && arguments != "" {
+				currentFn["arguments"] = stringFromAny(currentFn["arguments"]) + arguments
+			}
+		}
+	}
+}
+
+func sortedStreamToolCalls(toolCalls map[int]map[string]any) []any {
+	indices := make([]int, 0, len(toolCalls))
+	for index := range toolCalls {
+		indices = append(indices, index)
+	}
+	sort.Ints(indices)
+	out := make([]any, 0, len(indices))
+	for _, index := range indices {
+		out = append(out, toolCalls[index])
+	}
+	return out
+}
+
 func usageFromOpenAI(result map[string]any) Usage {
 	usage, _ := result["usage"].(map[string]any)
 	input := intFromAny(firstPresent(usage["prompt_tokens"], usage["input_tokens"]))
@@ -254,6 +928,15 @@ func stringFromAny(value any) string {
 	return ""
 }
 
+func firstNonEmptyString(values ...any) string {
+	for _, value := range values {
+		if text := strings.TrimSpace(stringFromAny(value)); text != "" {
+			return text
+		}
+	}
+	return ""
+}
+
 func firstPresent(values ...any) any {
 	for _, value := range values {
 		if value != nil {
diff --git a/apps/api/internal/clients/openai.go b/apps/api/internal/clients/openai.go
index 8a579e1..12ef168 100644
--- a/apps/api/internal/clients/openai.go
+++ b/apps/api/internal/clients/openai.go
@@ -23,6 +23,9 @@ func (c OpenAIClient) Run(ctx context.Context, request Request) (Response, error
 		return Response{}, &ClientError{Code: "unsupported_kind", Message: "unsupported openai request kind", Retryable: false}
 	}
 	body := cloneBody(request.Body)
+	if request.Kind == "chat.completions" {
+		body = NormalizeChatCompletionRequestBody(body)
+	}
 	body["model"] = upstreamModelName(request.Candidate)
 	stream := request.Stream || boolValue(body, "stream")
 	ensureOpenAIStreamUsage(body, request.Kind, stream)
@@ -40,6 +43,9 @@ func (c OpenAIClient) Run(ctx context.Context, request Request) (Response, error
 	}
 	requestID := requestIDFromHTTPResponse(resp)
 	result, err := decodeOpenAIResponse(resp, stream, request.StreamDelta)
+	if err == nil && request.Kind == "chat.completions" {
+		result = NormalizeChatCompletionResult(result)
+	}
 	responseFinishedAt := time.Now()
 	if err != nil {
 		return Response{}, annotateResponseError(err, requestID, responseStartedAt, responseFinishedAt)
diff --git a/apps/api/internal/clients/types.go b/apps/api/internal/clients/types.go
index 8d88793..8e46d8c 100644
--- a/apps/api/internal/clients/types.go
+++ b/apps/api/internal/clients/types.go
@@ -48,7 +48,13 @@ type Progress struct {
 	Payload  map[string]any
 }
 
-type StreamDelta func(text string) error
+type StreamDeltaEvent struct {
+	Text             string
+	ReasoningContent string
+	Event            map[string]any
+}
+
+type StreamDelta func(event StreamDeltaEvent) error
 
 type Client interface {
 	Run(ctx context.Context, request Request) (Response, error)
diff --git a/apps/api/internal/httpapi/chat_completions_mode_test.go b/apps/api/internal/httpapi/chat_completions_mode_test.go
index 599046c..d16b9f4 100644
--- a/apps/api/internal/httpapi/chat_completions_mode_test.go
+++ b/apps/api/internal/httpapi/chat_completions_mode_test.go
@@ -50,7 +50,7 @@ func TestWriteCompatibleTaskResponseReturnsJSONWhenStreamIsFalse(t *testing.T) {
 	req := httptest.NewRequest(http.MethodPost, "/api/v1/chat/completions", nil)
 	recorder := httptest.NewRecorder()
 
-	writeCompatibleTaskResponse(context.Background(), recorder, req, executor, "chat.completions", "gpt-test", store.GatewayTask{ID: "task-test"}, &auth.User{}, false)
+	writeCompatibleTaskResponse(context.Background(), recorder, req, executor, "chat.completions", "gpt-test", store.GatewayTask{ID: "task-test"}, &auth.User{}, false, false)
 
 	if recorder.Code != http.StatusOK {
 		t.Fatalf("status=%d want=%d body=%s", recorder.Code, http.StatusOK, recorder.Body.String())
@@ -69,13 +69,13 @@ func TestWriteCompatibleTaskResponseReturnsJSONWhenStreamIsFalse(t *testing.T) {
 
 func TestWriteCompatibleTaskResponseReturnsSSEWhenStreamIsTrue(t *testing.T) {
 	executor := &fakeTaskExecutor{
-		deltas: []string{"hel", "lo"},
-		output: map[string]any{"id": "chatcmpl-test", "object": "chat.completion"},
+		deltas: []clients.StreamDeltaEvent{{Text: "hel"}, {Text: "lo"}},
+		output: map[string]any{"id": "chatcmpl-test", "object": "chat.completion", "usage": map[string]any{"prompt_tokens": 1, "completion_tokens": 2, "total_tokens": 3}},
 	}
 	req := httptest.NewRequest(http.MethodPost, "/api/v1/chat/completions", nil)
 	recorder := httptest.NewRecorder()
 
-	writeCompatibleTaskResponse(context.Background(), recorder, req, executor, "chat.completions", "gpt-test", store.GatewayTask{ID: "task-test"}, &auth.User{}, true)
+	writeCompatibleTaskResponse(context.Background(), recorder, req, executor, "chat.completions", "gpt-test", store.GatewayTask{ID: "task-test"}, &auth.User{}, true, true)
 
 	if executor.executeCalls != 0 || executor.streamCalls != 1 {
 		t.Fatalf("expected stream execute only, got execute=%d stream=%d", executor.executeCalls, executor.streamCalls)
@@ -84,17 +84,53 @@ func TestWriteCompatibleTaskResponseReturnsSSEWhenStreamIsTrue(t *testing.T) {
 		t.Fatalf("Content-Type=%q want text/event-stream", contentType)
 	}
 	body := recorder.Body.String()
-	for _, want := range []string{"event: message", `"content":"hel"`, `"content":"lo"`, `"finish_reason":"stop"`} {
+	for _, want := range []string{`data: {`, `"role":"assistant"`, `"created":`, `"system_fingerprint":`, `"content":"hel"`, `"content":"lo"`, `"finish_reason":"stop"`, `"usage":{"completion_tokens":2,"prompt_tokens":1,"total_tokens":3}`, "data: [DONE]"} {
 		if !strings.Contains(body, want) {
 			t.Fatalf("SSE body missing %s: %s", want, body)
 		}
 	}
+	if strings.Contains(body, "event: message") {
+		t.Fatalf("chat completions stream should use OpenAI data-only SSE frames: %s", body)
+	}
+}
+
+func TestWriteCompatibleTaskResponseStreamsStructuredToolAndReasoningDeltas(t *testing.T) {
+	executor := &fakeTaskExecutor{
+		deltas: []clients.StreamDeltaEvent{
+			{Event: map[string]any{"id": "chatcmpl-upstream", "object": "chat.completion.chunk", "created": float64(1710000000), "model": "deepseek-v4", "system_fingerprint": "fp-test", "choices": []any{map[string]any{"index": float64(0), "delta": map[string]any{"reasoning_details": []any{map[string]any{"type": "reasoning.text", "text": "detail-"}, map[string]any{"type": "reasoning.summary", "summary": "summary"}, map[string]any{"type": "reasoning.encrypted", "data": "secret"}}}, "finish_reason": nil}}}},
+			{Event: map[string]any{"id": "chatcmpl-upstream", "object": "chat.completion.chunk", "created": float64(1710000000), "model": "deepseek-v4", "system_fingerprint": "fp-test", "choices": []any{map[string]any{"index": float64(0), "delta": map[string]any{"content": "<think>tagged</think>answer"}, "finish_reason": nil}}}},
+			{Event: map[string]any{"id": "chatcmpl-upstream", "object": "chat.completion.chunk", "created": float64(1710000000), "model": "deepseek-v4", "system_fingerprint": "fp-test", "choices": []any{map[string]any{"index": float64(0), "delta": map[string]any{"functionCall": map[string]any{"name": "legacy_lookup", "arguments": "{\"city\":\"Boston\"}"}}, "finish_reason": nil}}}},
+			{Event: map[string]any{"id": "chatcmpl-upstream", "object": "chat.completion.chunk", "created": float64(1710000000), "model": "deepseek-v4", "system_fingerprint": "fp-test", "choices": []any{map[string]any{"index": float64(0), "delta": map[string]any{"tool_calls": []any{map[string]any{"index": float64(0), "id": "call_1", "type": "function", "function": map[string]any{"name": "lookup", "arguments": "{\"q\":"}}}}, "finish_reason": nil}}}},
+			{Event: map[string]any{"id": "chatcmpl-upstream", "object": "chat.completion.chunk", "created": float64(1710000000), "model": "deepseek-v4", "system_fingerprint": "fp-test", "choices": []any{map[string]any{"index": float64(0), "delta": map[string]any{"tool_calls": []any{map[string]any{"index": float64(0), "function": map[string]any{"arguments": "\"weather\"}"}}}}, "finish_reason": "tool_calls"}}}},
+			{Event: map[string]any{"id": "chatcmpl-upstream", "object": "chat.completion.chunk", "created": float64(1710000000), "model": "deepseek-v4", "choices": []any{}, "usage": map[string]any{"prompt_tokens": float64(4), "completion_tokens": float64(5), "total_tokens": float64(9)}}},
+		},
+		output: map[string]any{"id": "chatcmpl-upstream", "object": "chat.completion", "model": "deepseek-v4"},
+	}
+	req := httptest.NewRequest(http.MethodPost, "/api/v1/chat/completions", nil)
+	recorder := httptest.NewRecorder()
+
+	writeCompatibleTaskResponse(context.Background(), recorder, req, executor, "chat.completions", "gpt-test", store.GatewayTask{ID: "task-test"}, &auth.User{}, true, true)
+
+	body := recorder.Body.String()
+	roleIndex := strings.Index(body, `"role":"assistant"`)
+	reasoningIndex := strings.Index(body, `"reasoning_content":"detail-summary"`)
+	if roleIndex < 0 || reasoningIndex < 0 || roleIndex > reasoningIndex {
+		t.Fatalf("assistant role should be emitted before structured deltas: %s", body)
+	}
+	for _, want := range []string{`"system_fingerprint":"fp-test"`, `"created":1710000000`, `"reasoning_content":"tagged"`, `"content":"answer"`, `"tool_calls":[{"function":{"arguments":"{\"city\":\"Boston\"}","name":"legacy_lookup"}`, `"tool_calls":[{"function":{"arguments":"{\"q\":"`, `"finish_reason":"tool_calls"`, `"choices":[],"created":1710000000`, `"usage":{"completion_tokens":5,"prompt_tokens":4,"total_tokens":9}`, "data: [DONE]"} {
+		if !strings.Contains(body, want) {
+			t.Fatalf("SSE body missing %s: %s", want, body)
+		}
+	}
+	if strings.Contains(body, "reasoning_details") || strings.Contains(body, "<think>") || strings.Contains(body, "functionCall") {
+		t.Fatalf("provider-specific reasoning/tool fields should be converted away: %s", body)
+	}
 }
 
 type fakeTaskExecutor struct {
 	executeCalls int
 	streamCalls  int
-	deltas       []string
+	deltas       []clients.StreamDeltaEvent
 	output       map[string]any
 }
 
diff --git a/apps/api/internal/httpapi/handlers.go b/apps/api/internal/httpapi/handlers.go
index 714a555..ec5f512 100644
--- a/apps/api/internal/httpapi/handlers.go
+++ b/apps/api/internal/httpapi/handlers.go
@@ -934,7 +934,7 @@ func (s *Server) createTask(kind string, compatible bool) http.Handler {
 		runCtx, cancelRun := s.requestExecutionContext(r)
 		defer cancelRun()
 		if responsePlan.compatibleMode {
-			writeCompatibleTaskResponse(runCtx, w, r, s.runner, kind, model, task, user, responsePlan.streamMode)
+			writeCompatibleTaskResponse(runCtx, w, r, s.runner, kind, model, task, user, responsePlan.streamMode, streamIncludeUsage(body))
 			return
 		}
 		result, runErr := s.runner.Execute(runCtx, task, user)
@@ -1002,14 +1002,15 @@ type taskExecutor interface {
 	ExecuteStream(context.Context, store.GatewayTask, *auth.User, clients.StreamDelta) (runner.Result, error)
 }
 
-func writeCompatibleTaskResponse(runCtx context.Context, w http.ResponseWriter, r *http.Request, executor taskExecutor, kind string, model string, task store.GatewayTask, user *auth.User, streamMode bool) {
+func writeCompatibleTaskResponse(runCtx context.Context, w http.ResponseWriter, r *http.Request, executor taskExecutor, kind string, model string, task store.GatewayTask, user *auth.User, streamMode bool, includeUsage bool) {
 	if streamMode {
 		flusher := prepareCompatibleStream(w)
-		result, runErr := executor.ExecuteStream(runCtx, task, user, func(delta string) error {
+		streamWriter := newCompatibleStreamWriter(kind, model, includeUsage)
+		result, runErr := executor.ExecuteStream(runCtx, task, user, func(delta clients.StreamDeltaEvent) error {
 			if !requestStillConnected(r) {
 				return nil
 			}
-			writeCompatibleDelta(w, kind, model, delta)
+			streamWriter.writeDelta(w, delta)
 			if flusher != nil {
 				flusher.Flush()
 			}
@@ -1043,7 +1044,7 @@ func writeCompatibleTaskResponse(runCtx context.Context, w http.ResponseWriter,
 		if !requestStillConnected(r) {
 			return
 		}
-		writeCompatibleDone(w, kind, model, result.Output)
+		streamWriter.writeDone(w, result.Output)
 		if flusher != nil {
 			flusher.Flush()
 		}
@@ -1064,6 +1065,12 @@ func writeCompatibleTaskResponse(runCtx context.Context, w http.ResponseWriter,
 	writeJSON(w, http.StatusOK, result.Output)
 }
 
+func streamIncludeUsage(body map[string]any) bool {
+	streamOptions, _ := body["stream_options"].(map[string]any)
+	includeUsage, _ := streamOptions["include_usage"].(bool)
+	return includeUsage
+}
+
 func asyncRequest(r *http.Request) bool {
 	value := strings.TrimSpace(strings.ToLower(r.Header.Get("x-async")))
 	return value == "1" || value == "true" || value == "yes" || value == "on"
diff --git a/apps/api/internal/httpapi/openapi_models.go b/apps/api/internal/httpapi/openapi_models.go
index f80c30b..9871156 100644
--- a/apps/api/internal/httpapi/openapi_models.go
+++ b/apps/api/internal/httpapi/openapi_models.go
@@ -172,16 +172,18 @@ type PricingEstimateResponse struct {
 }
 
 type TaskRequest struct {
-	Model      string        `json:"model" example:"gpt-4o-mini"`
-	Messages   []ChatMessage `json:"messages,omitempty"`
-	Input      string        `json:"input,omitempty" example:"Tell me a short story"`
-	Prompt     string        `json:"prompt,omitempty" example:"A watercolor robot reading a book"`
-	Stream     bool          `json:"stream,omitempty" example:"false"`
-	RunMode    string        `json:"runMode,omitempty" example:"simulation"`
-	MaxTokens  int           `json:"max_tokens,omitempty" example:"512"`
-	Size       string        `json:"size,omitempty" example:"1024x1024"`
-	Duration   int           `json:"duration,omitempty" example:"5"`
-	Resolution string        `json:"resolution,omitempty" example:"720p"`
+	Model     string        `json:"model" example:"gpt-4o-mini"`
+	Messages  []ChatMessage `json:"messages,omitempty"`
+	Input     string        `json:"input,omitempty" example:"Tell me a short story"`
+	Prompt    string        `json:"prompt,omitempty" example:"A watercolor robot reading a book"`
+	Stream    bool          `json:"stream,omitempty" example:"false"`
+	RunMode   string        `json:"runMode,omitempty" example:"simulation"`
+	MaxTokens int           `json:"max_tokens,omitempty" example:"512"`
+	// ReasoningEffort 推理深度，OpenAI-compatible 请求字段；开放字符串，取值随 provider 和模型能力而定，常见值为 none、minimal、low、medium、high、xhigh，也可配置 max 等供应商自定义值。
+	ReasoningEffort string `json:"reasoning_effort,omitempty" example:"medium"`
+	Size            string `json:"size,omitempty" example:"1024x1024"`
+	Duration        int    `json:"duration,omitempty" example:"5"`
+	Resolution      string `json:"resolution,omitempty" example:"720p"`
 }
 
 type ChatCompletionRequest struct {
@@ -189,8 +191,10 @@ type ChatCompletionRequest struct {
 	Messages    []ChatMessage `json:"messages"`
 	Temperature float64       `json:"temperature,omitempty" example:"0.7"`
 	MaxTokens   int           `json:"max_tokens,omitempty" example:"512"`
-	Stream      bool          `json:"stream,omitempty" example:"false"`
-	RunMode     string        `json:"runMode,omitempty" example:"simulation"`
+	// ReasoningEffort 推理深度，OpenAI-compatible 请求字段；开放字符串，取值随 provider 和模型能力而定，常见值为 none、minimal、low、medium、high、xhigh，也可配置 max 等供应商自定义值。
+	ReasoningEffort string `json:"reasoning_effort,omitempty" example:"medium"`
+	Stream          bool   `json:"stream,omitempty" example:"false"`
+	RunMode         string `json:"runMode,omitempty" example:"simulation"`
 }
 
 type ChatMessage struct {
diff --git a/apps/api/internal/httpapi/streaming.go b/apps/api/internal/httpapi/streaming.go
index 310221e..be58ef3 100644
--- a/apps/api/internal/httpapi/streaming.go
+++ b/apps/api/internal/httpapi/streaming.go
@@ -1,6 +1,13 @@
 package httpapi
 
-import "net/http"
+import (
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"time"
+
+	"github.com/easyai/easyai-ai-gateway/apps/api/internal/clients"
+)
 
 func prepareCompatibleStream(w http.ResponseWriter) http.Flusher {
 	w.Header().Set("Content-Type", "text/event-stream")
@@ -10,55 +17,180 @@ func prepareCompatibleStream(w http.ResponseWriter) http.Flusher {
 	return flusher
 }
 
-func writeCompatibleDelta(w http.ResponseWriter, kind string, model string, content string) {
-	if kind == "responses" {
-		sendSSE(w, "response.output_text.delta", map[string]any{"type": "response.output_text.delta", "delta": content})
-		return
-	}
-	sendSSE(w, "message", map[string]any{
-		"id":      "chatcmpl-stream",
-		"object":  "chat.completion.chunk",
-		"model":   model,
-		"choices": []any{map[string]any{"index": 0, "delta": map[string]any{"content": content}, "finish_reason": nil}},
-	})
+type compatibleStreamWriter struct {
+	kind         string
+	model        string
+	includeUsage bool
+
+	id                string
+	created           int64
+	systemFingerprint any
+	sentRole          bool
+	sentFinish        bool
+	sentUsage         bool
 }
 
-func writeCompatibleDone(w http.ResponseWriter, kind string, model string, output map[string]any) {
-	if kind == "responses" {
+func newCompatibleStreamWriter(kind string, model string, includeUsage bool) *compatibleStreamWriter {
+	return &compatibleStreamWriter{
+		kind:         kind,
+		model:        model,
+		includeUsage: includeUsage,
+		id:           "chatcmpl-stream",
+		created:      time.Now().Unix(),
+	}
+}
+
+func (s *compatibleStreamWriter) writeDelta(w http.ResponseWriter, event clients.StreamDeltaEvent) {
+	if s.kind == "responses" {
+		if event.Text != "" {
+			sendSSE(w, "response.output_text.delta", map[string]any{"type": "response.output_text.delta", "delta": event.Text})
+		}
+		return
+	}
+
+	if event.Event != nil && isChatCompletionChunk(event.Event) {
+		s.writeChatChunk(w, event.Event)
+		return
+	}
+
+	if event.Text == "" && event.ReasoningContent == "" {
+		return
+	}
+	s.ensureRoleChunk(w)
+	if event.ReasoningContent != "" {
+		s.writeChatData(w, s.chatChunk([]any{map[string]any{"index": 0, "delta": map[string]any{"reasoning_content": event.ReasoningContent}, "finish_reason": nil}}, nil))
+	}
+	if event.Text != "" {
+		s.writeChatData(w, s.chatChunk([]any{map[string]any{"index": 0, "delta": map[string]any{"content": event.Text}, "finish_reason": nil}}, nil))
+	}
+}
+
+func (s *compatibleStreamWriter) writeDone(w http.ResponseWriter, output map[string]any) {
+	if s.kind == "responses" {
 		sendSSE(w, "response.completed", map[string]any{"type": "response.completed", "response": output})
 		return
 	}
-	sendSSE(w, "message", map[string]any{
-		"id":      firstString(output["id"], "chatcmpl-stream"),
-		"object":  "chat.completion.chunk",
-		"model":   model,
-		"choices": []any{map[string]any{"index": 0, "delta": map[string]any{}, "finish_reason": "stop"}},
-	})
+	s.captureOutputMetadata(output)
+	if !s.sentRole {
+		s.ensureRoleChunk(w)
+	}
+	if !s.sentFinish {
+		s.writeChatData(w, s.chatChunk([]any{map[string]any{"index": 0, "delta": map[string]any{}, "finish_reason": finishReasonFromOutput(output)}}, nil))
+		s.sentFinish = true
+	}
+	if s.includeUsage && !s.sentUsage {
+		if usage, ok := output["usage"].(map[string]any); ok && len(usage) > 0 {
+			s.writeChatData(w, s.chatChunk([]any{}, usage))
+			s.sentUsage = true
+		}
+	}
+	s.writeDoneMarker(w)
+}
+
+func (s *compatibleStreamWriter) writeChatChunk(w http.ResponseWriter, chunk map[string]any) {
+	chunk = clients.NormalizeChatCompletionStreamEvent(chunk)
+	s.captureChunkMetadata(chunk)
+	choices, _ := chunk["choices"].([]any)
+	usage, hasUsage := chunk["usage"].(map[string]any)
+	if len(choices) == 0 && hasUsage {
+		if !s.includeUsage {
+			return
+		}
+		s.writeChatData(w, s.chatChunk([]any{}, usage))
+		s.sentUsage = true
+		return
+	}
+	if len(choices) > 0 && !chunkHasRole(choices) && !s.sentRole {
+		s.ensureRoleChunk(w)
+	}
+	if chunkHasRole(choices) {
+		s.sentRole = true
+	}
+	if chunkHasFinishReason(choices) {
+		s.sentFinish = true
+	}
+	normalized := cloneMap(chunk)
+	normalized["id"] = s.id
+	normalized["object"] = "chat.completion.chunk"
+	normalized["created"] = s.created
+	normalized["model"] = firstString(normalized["model"], s.model)
+	normalized["system_fingerprint"] = s.systemFingerprint
+	s.writeChatData(w, normalized)
+}
+
+func (s *compatibleStreamWriter) ensureRoleChunk(w http.ResponseWriter) {
+	if s.sentRole {
+		return
+	}
+	s.writeChatData(w, s.chatChunk([]any{map[string]any{"index": 0, "delta": map[string]any{"role": "assistant"}, "finish_reason": nil}}, nil))
+	s.sentRole = true
+}
+
+func (s *compatibleStreamWriter) chatChunk(choices []any, usage map[string]any) map[string]any {
+	chunk := map[string]any{
+		"id":                 s.id,
+		"object":             "chat.completion.chunk",
+		"created":            s.created,
+		"model":              s.model,
+		"system_fingerprint": s.systemFingerprint,
+		"choices":            choices,
+	}
+	if usage != nil {
+		chunk["usage"] = usage
+	} else {
+		chunk["usage"] = nil
+	}
+	return chunk
+}
+
+func (s *compatibleStreamWriter) writeChatData(w http.ResponseWriter, payload map[string]any) {
+	bytes, _ := json.Marshal(payload)
+	_, _ = fmt.Fprintf(w, "data: %s\n\n", bytes)
+}
+
+func (s *compatibleStreamWriter) writeDoneMarker(w http.ResponseWriter) {
+	_, _ = fmt.Fprint(w, "data: [DONE]\n\n")
+}
+
+func (s *compatibleStreamWriter) captureChunkMetadata(chunk map[string]any) {
+	if id := firstString(chunk["id"], ""); id != "" {
+		s.id = id
+	}
+	if model := firstString(chunk["model"], ""); model != "" {
+		s.model = model
+	}
+	if created := int64FromAny(chunk["created"]); created > 0 {
+		s.created = created
+	}
+	if value, ok := chunk["system_fingerprint"]; ok {
+		s.systemFingerprint = value
+	}
+}
+
+func (s *compatibleStreamWriter) captureOutputMetadata(output map[string]any) {
+	if id := firstString(output["id"], ""); id != "" {
+		s.id = id
+	}
+	if model := firstString(output["model"], ""); model != "" {
+		s.model = model
+	}
+	if created := int64FromAny(output["created"]); created > 0 {
+		s.created = created
+	}
+	if value, ok := output["system_fingerprint"]; ok {
+		s.systemFingerprint = value
+	}
 }
 
 func writeCompatibleStream(w http.ResponseWriter, kind string, model string, output map[string]any) {
 	prepareCompatibleStream(w)
+	writer := newCompatibleStreamWriter(kind, model, true)
 	content := extractOutputText(output)
 	if content == "" {
 		content = "done"
 	}
-	if kind == "responses" {
-		sendSSE(w, "response.output_text.delta", map[string]any{"type": "response.output_text.delta", "delta": content})
-		sendSSE(w, "response.completed", map[string]any{"type": "response.completed", "response": output})
-		return
-	}
-	sendSSE(w, "message", map[string]any{
-		"id":      output["id"],
-		"object":  "chat.completion.chunk",
-		"model":   model,
-		"choices": []any{map[string]any{"index": 0, "delta": map[string]any{"content": content}, "finish_reason": nil}},
-	})
-	sendSSE(w, "message", map[string]any{
-		"id":      output["id"],
-		"object":  "chat.completion.chunk",
-		"model":   model,
-		"choices": []any{map[string]any{"index": 0, "delta": map[string]any{}, "finish_reason": "stop"}},
-	})
+	writer.writeDelta(w, clients.StreamDeltaEvent{Text: content})
+	writer.writeDone(w, output)
 }
 
 func firstString(value any, fallback string) string {
@@ -68,6 +200,68 @@ func firstString(value any, fallback string) string {
 	return fallback
 }
 
+func int64FromAny(value any) int64 {
+	switch typed := value.(type) {
+	case int64:
+		return typed
+	case int:
+		return int64(typed)
+	case float64:
+		return int64(typed)
+	default:
+		return 0
+	}
+}
+
+func isChatCompletionChunk(event map[string]any) bool {
+	object, _ := event["object"].(string)
+	if object == "chat.completion.chunk" {
+		return true
+	}
+	_, hasChoices := event["choices"].([]any)
+	return hasChoices
+}
+
+func chunkHasRole(choices []any) bool {
+	for _, rawChoice := range choices {
+		choice, _ := rawChoice.(map[string]any)
+		delta, _ := choice["delta"].(map[string]any)
+		if role, ok := delta["role"].(string); ok && role != "" {
+			return true
+		}
+	}
+	return false
+}
+
+func chunkHasFinishReason(choices []any) bool {
+	for _, rawChoice := range choices {
+		choice, _ := rawChoice.(map[string]any)
+		if reason, ok := choice["finish_reason"].(string); ok && reason != "" {
+			return true
+		}
+	}
+	return false
+}
+
+func finishReasonFromOutput(output map[string]any) string {
+	choices, _ := output["choices"].([]any)
+	for _, rawChoice := range choices {
+		choice, _ := rawChoice.(map[string]any)
+		if reason, ok := choice["finish_reason"].(string); ok && reason != "" {
+			return reason
+		}
+	}
+	return "stop"
+}
+
+func cloneMap(value map[string]any) map[string]any {
+	out := map[string]any{}
+	for key, item := range value {
+		out[key] = item
+	}
+	return out
+}
+
 func extractOutputText(output map[string]any) string {
 	if text, ok := output["output_text"].(string); ok {
 		return text
diff --git a/apps/web/src/pages/admin/BaseModelCapabilityEditor.tsx b/apps/web/src/pages/admin/BaseModelCapabilityEditor.tsx
index ef619cd..e7c0740 100644
--- a/apps/web/src/pages/admin/BaseModelCapabilityEditor.tsx
+++ b/apps/web/src/pages/admin/BaseModelCapabilityEditor.tsx
@@ -40,14 +40,14 @@ type ValueOption = { label: string; value: string };
 const textFields: FieldDefinition[] = [
   { key: 'supportTool', label: '工具调用', hint: 'function calling / tools', type: 'boolean' },
   { key: 'supportStructuredOutput', label: '结构化输出', hint: 'JSON Schema 等输出', type: 'boolean' },
-  { key: 'supportThinking', label: '思考能力', hint: '支持 thinking 参数', type: 'boolean' },
+  { key: 'supportThinking', label: '推理能力', hint: '支持 reasoning / thinking 参数', type: 'boolean' },
   { key: 'supportThinkingModeSwitch', label: '思考开关', hint: '可按请求切换', type: 'boolean' },
   { key: 'supportWebSearch', label: '联网搜索', type: 'boolean' },
   { key: 'max_context_tokens', label: '上下文 Token', placeholder: '128000', type: 'number' },
   { key: 'max_input_tokens', label: '最大输入 Token', placeholder: '64000', type: 'number' },
   { key: 'max_output_tokens', label: '最大输出 Token', placeholder: '8192', type: 'number' },
   { key: 'max_thinking_tokens', label: '最大思考 Token', placeholder: '32768', type: 'number' },
-  { key: 'thinkingEffortLevels', label: '思考强度', placeholder: 'minimal, low, medium, high', type: 'list' },
+  { key: 'thinkingEffortLevels', label: '推理深度', hint: '声明模型支持的 reasoning_effort 取值，可填写 max 等供应商自定义值', placeholder: 'none, minimal, low, medium, high, xhigh, max', type: 'list' },
 ];
 
 const embeddingFields: FieldDefinition[] = [
@@ -535,7 +535,7 @@ const imageAspectRatioOptions = [
   '7:4',
   '4:7',
 ];
-const thinkingEffortOptions = ['minimal', 'low', 'medium', 'high'];
+const thinkingEffortOptions = ['none', 'minimal', 'low', 'medium', 'high', 'xhigh', 'max'];
 const omniVideoModeOptions = ['text_to_video', 'image_reference', 'element_reference', 'first_last_frame', 'video_reference', 'video_edit', 'multi_shot'];
 const durationOptionValues = ['1', '2', '3', '4', '5', '6', '8', '10', '15', '20', '25', '30'];
 const exclusiveCapabilityFields: Record<string, string> = {
diff --git a/docs/design.md b/docs/design.md
index 79cc6aa..c29504f 100644
--- a/docs/design.md
+++ b/docs/design.md
@@ -1505,6 +1505,22 @@ type ModelClient interface {
 - progress event snapshot：确保前端进度面板兼容。
 - billing snapshot：确保预估扣费和最终 billings 语义一致。
 
+OpenAI-compatible 文本请求中的推理深度统一使用 `reasoning_effort` 表达。该字段是请求参数，不是响应中的推理内容；模型能力中用 `thinkingEffortLevels` 声明该模型支持的可选取值。`reasoning_effort` 必须按开放字符串处理，不在网关层写死枚举；实际可用集合必须以 provider 和模型能力为准。常见取值定义如下：
+
+| 值 | 含义 |
+| --- | --- |
+| `none` | 不启用额外推理，适用于不需要思考链路的低延迟请求。 |
+| `minimal` | 最小推理预算，优先降低延迟和成本。 |
+| `low` | 较低推理预算，用于简单推理任务。 |
+| `medium` | 默认/均衡推理深度，在质量、延迟和成本之间折中。 |
+| `high` | 较高推理预算，用于复杂规划、代码和多步推理。 |
+| `xhigh` | 最高推理预算，仅在模型和 provider 明确支持时使用，通常成本和延迟最高。 |
+| `max` | 供应商自定义最高档示例，例如 DeepSeek V4 类模型可能使用该值；语义以 provider 文档为准。 |
+
+除上表外，`thinkingEffortLevels` 可以保存任意供应商自定义值，例如 `max`、`ultra` 或后续模型新增档位。管理端只提供常见值作为快捷选项，不应阻止自定义输入；请求透传时按模型能力校验或直接交由上游 provider 返回错误。
+
+`reasoning_content`、推理过程 delta 或思考摘要在 Chat Completions 中不是 OpenAI 标准必需字段；如需兼容 DeepSeek、Qwen 等供应商扩展，应在 adapter 层作为可选扩展透传，并避免把 hidden reasoning 默认暴露给普通兼容客户端。
+
 ## 11. 队列持久化、恢复与限流执行
 
 ### 11.1 持久化队列原则