package runner import ( "testing" "github.com/easyai/easyai-ai-gateway/apps/api/internal/store" ) func TestParamProcessorOmniFiltersUnsupportedVideoAndAudioContent(t *testing.T) { body := map[string]any{ "model": "可灵O1", "prompt": "edit the source video", "content": []any{ map[string]any{"type": "text", "text": "edit the source video"}, map[string]any{"type": "video_url", "role": "video_base", "video_url": map[string]any{"url": "https://example.com/base.mp4", "refer_type": "base"}}, map[string]any{"type": "video_url", "role": "reference_video", "video_url": map[string]any{"url": "https://example.com/ref.mp4", "refer_type": "feature"}}, map[string]any{"type": "audio_url", "role": "reference_audio", "audio_url": map[string]any{"url": "https://example.com/ref.mp3"}}, }, } candidate := store.RuntimeModelCandidate{ ModelType: "omni_video", Capabilities: map[string]any{ "omni_video": map[string]any{ "supported_modes": []any{"video_edit"}, "max_videos": 1, "input_audio": false, "max_audios": 0, }, }, } result := preprocessRequestWithLog("videos.generations", body, candidate) processed := result.Body content := contentItems(processed["content"]) if len(content) != 2 { t.Fatalf("expected text plus one video item, got %+v", content) } if stringFromAny(content[1]["role"]) != "video_base" || isAudioContent(content[1]) { t.Fatalf("unexpected retained content: %+v", content) } for _, item := range content { if isAudioContent(item) || stringFromAny(item["role"]) == "reference_video" { t.Fatalf("unsupported content was not filtered: %+v", content) } } if !result.Log.Changed || len(result.Log.Changes) < 2 { t.Fatalf("expected preprocessing log with filtered video and audio changes, got %+v", result.Log) } if result.Log.Input["content"] == nil || result.Log.Output["content"] == nil { t.Fatalf("preprocessing log should keep actual input and converted output: %+v", result.Log) } foundAudioReason := false for _, change := range result.Log.Changes { if change.Path == "content[3]" && change.CapabilityPath == "capabilities.omni_video.input_audio" { foundAudioReason = true break } } if !foundAudioReason { t.Fatalf("expected audio filtering reason to reference omni_video.input_audio, got %+v", result.Log.Changes) } } func TestParamProcessorOmniFiltersConvenienceReferenceFields(t *testing.T) { body := map[string]any{ "model": "可灵V3多模态", "prompt": "text only", "reference_video": "https://example.com/ref.mp4", "reference_audio": "https://example.com/ref.mp3", } candidate := store.RuntimeModelCandidate{ ModelType: "omni_video", Capabilities: map[string]any{ "omni_video": map[string]any{ "supported_modes": []any{"text_to_video"}, "max_videos": 0, "input_audio": false, "max_audios": 0, }, }, } result := preprocessRequestWithLog("videos.generations", body, candidate) processed := result.Body content := contentItems(processed["content"]) if len(content) != 1 || stringFromAny(content[0]["type"]) != "text" { t.Fatalf("expected only text content, got %+v", content) } for _, key := range []string{"reference_video", "reference_audio"} { if processed[key] != nil { t.Fatalf("%s should be removed when capability rejects it: %+v", key, processed) } } if len(result.Log.Changes) == 0 { t.Fatalf("expected convenience-field filtering to be logged") } } func TestParamProcessorOmniCapabilityLogUsesActualCapabilityKey(t *testing.T) { body := map[string]any{ "model": "Omni", "content": []any{ map[string]any{"type": "text", "text": "animate"}, map[string]any{"type": "audio_url", "role": "reference_audio", "audio_url": map[string]any{"url": "https://example.com/ref.mp3"}}, }, } candidate := store.RuntimeModelCandidate{ ModelType: "omni", Capabilities: map[string]any{ "omni": map[string]any{ "input_audio": false, "max_audios": 0, }, }, } result := preprocessRequestWithLog("videos.generations", body, candidate) for _, change := range result.Log.Changes { if change.Path == "content[1]" && change.CapabilityPath == "capabilities.omni.input_audio" { return } } t.Fatalf("expected log to reference capabilities.omni.input_audio, got %+v", result.Log.Changes) } func TestParamProcessorChatConvertsUnsupportedMediaMessageContentToText(t *testing.T) { body := map[string]any{ "model": "text-only", "messages": []any{ map[string]any{ "role": "user", "content": []any{ map[string]any{"type": "text", "text": "describe these"}, map[string]any{"type": "image_url", "image_url": map[string]any{"url": "https://example.com/image.png"}}, map[string]any{"type": "video_url", "video_url": map[string]any{"url": "https://example.com/video.mp4"}}, map[string]any{"type": "audio_url", "audio_url": map[string]any{"url": "https://example.com/audio.mp3"}}, map[string]any{"type": "input_audio", "input_audio": map[string]any{"data": "https://example.com/input.wav"}}, }, }, }, } candidate := store.RuntimeModelCandidate{ ModelType: "text_generate", Capabilities: map[string]any{ "text_generate": map[string]any{}, "originalTypes": []any{"text_generate"}, }, } result := preprocessRequestWithLog("chat.completions", body, candidate) messages, _ := result.Body["messages"].([]any) if len(messages) != 1 { t.Fatalf("expected one message, got %+v", result.Body["messages"]) } message, _ := messages[0].(map[string]any) content, _ := message["content"].([]any) if len(content) != 5 { t.Fatalf("expected five content parts, got %+v", message["content"]) } expectedText := []string{ "describe these", "Image link: https://example.com/image.png", "video URL: https://example.com/video.mp4", "audio URL: https://example.com/audio.mp3", "audio URL: https://example.com/input.wav", } for index, expected := range expectedText { part, _ := content[index].(map[string]any) if stringFromAny(part["text"]) != expected { t.Fatalf("content[%d] text = %q, want %q; all=%+v", index, stringFromAny(part["text"]), expected, content) } } if len(result.Log.Changes) != 4 { t.Fatalf("expected four media conversion changes, got %+v", result.Log.Changes) } expectedCapabilityPaths := map[string]bool{ "capabilities.image_analysis": false, "capabilities.video_understanding": false, "capabilities.audio_understanding": false, } for _, change := range result.Log.Changes { if _, ok := expectedCapabilityPaths[change.CapabilityPath]; ok { expectedCapabilityPaths[change.CapabilityPath] = true } } for path, found := range expectedCapabilityPaths { if !found { t.Fatalf("expected conversion log for %s, got %+v", path, result.Log.Changes) } } } func TestParamProcessorChatKeepsOmniMessageContent(t *testing.T) { body := map[string]any{ "model": "omni", "messages": []any{ map[string]any{ "role": "user", "content": []any{ map[string]any{"type": "image_url", "image_url": map[string]any{"url": "https://example.com/image.png"}}, map[string]any{"type": "video_url", "video_url": map[string]any{"url": "https://example.com/video.mp4"}}, map[string]any{"type": "audio_url", "audio_url": map[string]any{"url": "https://example.com/audio.mp3"}}, }, }, }, } candidate := store.RuntimeModelCandidate{ ModelType: "text_generate", Capabilities: map[string]any{ "text_generate": map[string]any{}, "omni": map[string]any{}, "originalTypes": []any{"text_generate", "omni"}, }, } result := preprocessRequestWithLog("chat.completions", body, candidate) if result.Log.Changed { t.Fatalf("omni model should keep message media content unchanged, got %+v", result.Log.Changes) } messages, _ := result.Body["messages"].([]any) message, _ := messages[0].(map[string]any) content, _ := message["content"].([]any) for _, item := range content { part, _ := item.(map[string]any) if stringFromAny(part["type"]) == "text" { t.Fatalf("media content should not be converted for omni model: %+v", content) } } } func TestParamProcessorChatConvertsOnlyUnsupportedModalities(t *testing.T) { body := map[string]any{ "model": "vision-only", "messages": []any{ map[string]any{ "role": "user", "content": []any{ map[string]any{"type": "image_url", "image_url": map[string]any{"url": "https://example.com/image.png"}}, map[string]any{"type": "video_url", "video_url": map[string]any{"url": "https://example.com/video.mp4"}}, }, }, }, } candidate := store.RuntimeModelCandidate{ ModelType: "text_generate", Capabilities: map[string]any{ "text_generate": map[string]any{}, "image_analysis": map[string]any{}, "originalTypes": []any{"text_generate", "image_analysis"}, }, } result := preprocessRequestWithLog("chat.completions", body, candidate) messages, _ := result.Body["messages"].([]any) message, _ := messages[0].(map[string]any) content, _ := message["content"].([]any) first, _ := content[0].(map[string]any) second, _ := content[1].(map[string]any) if stringFromAny(first["type"]) != "image_url" { t.Fatalf("image content should be kept when image_analysis is supported: %+v", content) } if stringFromAny(second["text"]) != "video URL: https://example.com/video.mp4" { t.Fatalf("video content should be converted, got %+v", second) } if len(result.Log.Changes) != 1 || result.Log.Changes[0].CapabilityPath != "capabilities.video_understanding" { t.Fatalf("expected only video conversion to be logged, got %+v", result.Log.Changes) } } func TestSkipTaskParameterPreprocessingLogForTextModelTypes(t *testing.T) { for _, modelType := range []string{"text_generate", "chat", "responses", "text"} { if !skipTaskParameterPreprocessingLog(modelType) { t.Fatalf("%s should skip task parameter preprocessing log", modelType) } } for _, modelType := range []string{"image_generate", "image_edit", "video_generate", "omni_video"} { if skipTaskParameterPreprocessingLog(modelType) { t.Fatalf("%s should keep task parameter preprocessing log", modelType) } } } func TestParamProcessorVideoCapabilitiesNormalizeAndFilter(t *testing.T) { body := map[string]any{ "model": "Seedance", "duration": 13, "aspect_ratio": "4:3", "resolution": "1080p", "audio": true, "output_audio": true, "content": []any{ map[string]any{"type": "text", "text": "animate it"}, map[string]any{"type": "image_url", "role": "first_frame", "image_url": map[string]any{"url": "https://example.com/first.png"}}, map[string]any{"type": "image_url", "role": "last_frame", "image_url": map[string]any{"url": "https://example.com/last.png"}}, map[string]any{"type": "audio_url", "role": "reference_audio", "audio_url": map[string]any{"url": "https://example.com/ref.mp3"}}, }, } candidate := store.RuntimeModelCandidate{ ModelType: "image_to_video", Capabilities: map[string]any{ "image_to_video": map[string]any{ "aspect_ratio_allowed": []any{"16:9", "1:1"}, "duration_options": []any{4, 8, 12}, "input_first_last_frame": false, "input_audio": false, "output_audio": false, "max_images_for_last_frame": 0, }, }, } result := preprocessRequestWithLog("videos.generations", body, candidate) processed := result.Body if processed["duration"] != float64(12) && processed["duration"] != 12 { t.Fatalf("duration should be snapped to 12, got %+v", processed["duration"]) } if processed["aspect_ratio"] != "16:9" { t.Fatalf("aspect_ratio should fall back to first allowed value, got %+v", processed["aspect_ratio"]) } if processed["audio"] != nil || processed["output_audio"] != nil { t.Fatalf("output audio flags should be removed: %+v", processed) } for _, item := range contentItems(processed["content"]) { if stringFromAny(item["role"]) == "last_frame" || isAudioContent(item) { t.Fatalf("unsupported content remained: %+v", processed["content"]) } } foundDuration := false for _, change := range result.Log.Changes { if change.Path == "duration" && change.CapabilityPath == "capabilities.image_to_video.duration_options" { foundDuration = true break } } if !foundDuration { t.Fatalf("expected duration adjustment to reference duration_options, got %+v", result.Log.Changes) } } func TestParamProcessorDurationRangeRoundsFractionalSecondsUp(t *testing.T) { body := map[string]any{ "model": "Seedance", "prompt": "animate it", "duration": 5.5, } candidate := store.RuntimeModelCandidate{ ModelType: "video_generate", Capabilities: map[string]any{ "video_generate": map[string]any{ "duration_range": []any{3, 12}, }, }, } result := preprocessRequestWithLog("videos.generations", body, candidate) if result.Body["duration"] != float64(6) && result.Body["duration"] != 6 { t.Fatalf("fractional duration should be rounded up to default 1s step, got %+v", result.Body["duration"]) } } func TestParamProcessorDurationWithoutRangeStillRoundsUp(t *testing.T) { body := map[string]any{ "model": "Seedance", "prompt": "animate it", "duration": 5.2, } candidate := store.RuntimeModelCandidate{ ModelType: "video_generate", Capabilities: map[string]any{ "video_generate": map[string]any{}, }, } result := preprocessRequestWithLog("videos.generations", body, candidate) if result.Body["duration"] != float64(6) && result.Body["duration"] != 6 { t.Fatalf("duration should default to a 1s upward step without range, got %+v", result.Body["duration"]) } } func TestParamProcessorDurationRangeUsesStepCeilingAndRange(t *testing.T) { body := map[string]any{ "model": "Seedance", "prompt": "animate it", "duration": 6.1, "duration_seconds": 6.1, } candidate := store.RuntimeModelCandidate{ ModelType: "image_to_video", Capabilities: map[string]any{ "image_to_video": map[string]any{ "duration_range": []any{5, 10}, "duration_step": 2, }, }, } result := preprocessRequestWithLog("videos.generations", body, candidate) if result.Body["duration"] != float64(7) && result.Body["duration"] != 7 { t.Fatalf("duration should be rounded up by configured step, got %+v", result.Body["duration"]) } if result.Body["duration_seconds"] != result.Body["duration"] { t.Fatalf("duration_seconds should sync with normalized duration, got %+v", result.Body) } body["duration"] = 10.1 body["duration_seconds"] = 10.1 result = preprocessRequestWithLog("videos.generations", body, candidate) if result.Body["duration"] != float64(10) && result.Body["duration"] != 10 { t.Fatalf("duration should be capped by range max, got %+v", result.Body["duration"]) } } func TestParamProcessorDurationOptionsChooseNextAllowedValue(t *testing.T) { body := map[string]any{ "model": "Seedance", "prompt": "animate it", "duration": 8.1, } candidate := store.RuntimeModelCandidate{ ModelType: "image_to_video", Capabilities: map[string]any{ "image_to_video": map[string]any{ "duration_options": []any{4, 8, 12}, }, }, } result := preprocessRequestWithLog("videos.generations", body, candidate) if result.Body["duration"] != float64(12) && result.Body["duration"] != 12 { t.Fatalf("duration should use next allowed option, got %+v", result.Body["duration"]) } } func TestParamProcessorVideoGenerateLogsFirstFrameRemoval(t *testing.T) { body := map[string]any{ "model": "Seedance T2V", "prompt": "animate it", "content": []any{ map[string]any{"type": "text", "text": "animate it"}, map[string]any{"type": "image_url", "role": "first_frame", "image_url": "https://example.com/first.png"}, }, } candidate := store.RuntimeModelCandidate{ ModelType: "video_generate", Capabilities: map[string]any{ "video_generate": map[string]any{ "duration_range": []any{3, 12}, }, }, } result := preprocessRequestWithLog("videos.generations", body, candidate) for _, item := range contentItems(result.Body["content"]) { if isImageContent(item) { t.Fatalf("first frame image should be removed for video_generate: %+v", result.Body["content"]) } } for _, change := range result.Log.Changes { if change.Path == "content[1]" { if change.Reason != "模型能力未开启首帧输入,已移除 first_frame。" { t.Fatalf("unexpected first frame removal reason: %+v", change) } if change.CapabilityPath != "capabilities.video_generate.input_first_frame" { t.Fatalf("unexpected first frame capability path: %+v", change) } return } } t.Fatalf("expected first frame removal log, got %+v", result.Log.Changes) } func TestParamProcessorImageResolutionAndOutputCount(t *testing.T) { body := map[string]any{ "model": "即梦V4.0", "prompt": "draw", "size": "2K", "n": 8, } candidate := store.RuntimeModelCandidate{ ModelType: "image_generate", Capabilities: map[string]any{ "image_generate": map[string]any{ "output_multiple_images": true, "output_max_images_count": 4, }, }, } processed := preprocessRequest("images.generations", body, candidate) if processed["resolution"] != "2K" { t.Fatalf("size resolution should be copied to resolution, got %+v", processed) } if processed["n"] != 4 { t.Fatalf("image count should be capped to 4, got %+v", processed["n"]) } }