541 lines
19 KiB
Go
541 lines
19 KiB
Go
package runner
|
|
|
|
import (
|
|
"testing"
|
|
|
|
"github.com/easyai/easyai-ai-gateway/apps/api/internal/store"
|
|
)
|
|
|
|
func TestVideoModelTypeInferenceReadsContentArray(t *testing.T) {
|
|
imageToVideo := modelTypeFromKind("videos.generations", map[string]any{
|
|
"model": "demo-video",
|
|
"content": []any{
|
|
map[string]any{"type": "text", "text": "animate it"},
|
|
map[string]any{"type": "image_url", "role": "first_frame", "image_url": map[string]any{"url": "https://example.com/frame.png"}},
|
|
},
|
|
})
|
|
if imageToVideo != "image_to_video" {
|
|
t.Fatalf("image content should infer image_to_video, got %s", imageToVideo)
|
|
}
|
|
|
|
omniVideo := modelTypeFromKind("videos.generations", map[string]any{
|
|
"model": "demo-video",
|
|
"content": []any{
|
|
map[string]any{"type": "text", "text": "edit it"},
|
|
map[string]any{"type": "video_url", "role": "reference_video", "video_url": map[string]any{"url": "https://example.com/ref.mp4"}},
|
|
},
|
|
})
|
|
if omniVideo != "omni_video" {
|
|
t.Fatalf("video content should infer omni_video, got %s", omniVideo)
|
|
}
|
|
|
|
textToVideo := modelTypeFromKind("videos.generations", map[string]any{
|
|
"model": "demo-video",
|
|
"content": []any{map[string]any{"type": "text", "text": "make a clip"}},
|
|
})
|
|
if textToVideo != "video_generate" {
|
|
t.Fatalf("text-only content should infer video_generate, got %s", textToVideo)
|
|
}
|
|
}
|
|
|
|
func TestVideoContentTextContributesToTokenEstimate(t *testing.T) {
|
|
tokens := estimateRequestTokens(map[string]any{
|
|
"model": "demo-video",
|
|
"content": []any{
|
|
map[string]any{"type": "text", "text": "a cinematic product reveal"},
|
|
},
|
|
})
|
|
if tokens <= 1 {
|
|
t.Fatalf("content text should contribute to token estimate, got %d", tokens)
|
|
}
|
|
}
|
|
|
|
func TestParamProcessorOmniFiltersUnsupportedVideoAndAudioContent(t *testing.T) {
|
|
body := map[string]any{
|
|
"model": "可灵O1",
|
|
"prompt": "edit the source video",
|
|
"content": []any{
|
|
map[string]any{"type": "text", "text": "edit the source video"},
|
|
map[string]any{"type": "video_url", "role": "video_base", "video_url": map[string]any{"url": "https://example.com/base.mp4", "refer_type": "base"}},
|
|
map[string]any{"type": "video_url", "role": "reference_video", "video_url": map[string]any{"url": "https://example.com/ref.mp4", "refer_type": "feature"}},
|
|
map[string]any{"type": "audio_url", "role": "reference_audio", "audio_url": map[string]any{"url": "https://example.com/ref.mp3"}},
|
|
},
|
|
}
|
|
candidate := store.RuntimeModelCandidate{
|
|
ModelType: "omni_video",
|
|
Capabilities: map[string]any{
|
|
"omni_video": map[string]any{
|
|
"supported_modes": []any{"video_edit"},
|
|
"max_videos": 1,
|
|
"input_audio": false,
|
|
"max_audios": 0,
|
|
},
|
|
},
|
|
}
|
|
|
|
result := preprocessRequestWithLog("videos.generations", body, candidate)
|
|
processed := result.Body
|
|
content := contentItems(processed["content"])
|
|
if len(content) != 2 {
|
|
t.Fatalf("expected text plus one video item, got %+v", content)
|
|
}
|
|
if stringFromAny(content[1]["role"]) != "video_base" || isAudioContent(content[1]) {
|
|
t.Fatalf("unexpected retained content: %+v", content)
|
|
}
|
|
for _, item := range content {
|
|
if isAudioContent(item) || stringFromAny(item["role"]) == "reference_video" {
|
|
t.Fatalf("unsupported content was not filtered: %+v", content)
|
|
}
|
|
}
|
|
if !result.Log.Changed || len(result.Log.Changes) < 2 {
|
|
t.Fatalf("expected preprocessing log with filtered video and audio changes, got %+v", result.Log)
|
|
}
|
|
if result.Log.Input["content"] == nil || result.Log.Output["content"] == nil {
|
|
t.Fatalf("preprocessing log should keep actual input and converted output: %+v", result.Log)
|
|
}
|
|
foundAudioReason := false
|
|
for _, change := range result.Log.Changes {
|
|
if change.Path == "content[3]" && change.CapabilityPath == "capabilities.omni_video.input_audio" {
|
|
foundAudioReason = true
|
|
break
|
|
}
|
|
}
|
|
if !foundAudioReason {
|
|
t.Fatalf("expected audio filtering reason to reference omni_video.input_audio, got %+v", result.Log.Changes)
|
|
}
|
|
}
|
|
|
|
func TestParamProcessorOmniFiltersConvenienceReferenceFields(t *testing.T) {
|
|
body := map[string]any{
|
|
"model": "可灵V3多模态",
|
|
"prompt": "text only",
|
|
"reference_video": "https://example.com/ref.mp4",
|
|
"reference_audio": "https://example.com/ref.mp3",
|
|
}
|
|
candidate := store.RuntimeModelCandidate{
|
|
ModelType: "omni_video",
|
|
Capabilities: map[string]any{
|
|
"omni_video": map[string]any{
|
|
"supported_modes": []any{"text_to_video"},
|
|
"max_videos": 0,
|
|
"input_audio": false,
|
|
"max_audios": 0,
|
|
},
|
|
},
|
|
}
|
|
|
|
result := preprocessRequestWithLog("videos.generations", body, candidate)
|
|
processed := result.Body
|
|
content := contentItems(processed["content"])
|
|
if len(content) != 1 || stringFromAny(content[0]["type"]) != "text" {
|
|
t.Fatalf("expected only text content, got %+v", content)
|
|
}
|
|
for _, key := range []string{"reference_video", "reference_audio"} {
|
|
if processed[key] != nil {
|
|
t.Fatalf("%s should be removed when capability rejects it: %+v", key, processed)
|
|
}
|
|
}
|
|
if len(result.Log.Changes) == 0 {
|
|
t.Fatalf("expected convenience-field filtering to be logged")
|
|
}
|
|
}
|
|
|
|
func TestParamProcessorOmniCapabilityLogUsesActualCapabilityKey(t *testing.T) {
|
|
body := map[string]any{
|
|
"model": "Omni",
|
|
"content": []any{
|
|
map[string]any{"type": "text", "text": "animate"},
|
|
map[string]any{"type": "audio_url", "role": "reference_audio", "audio_url": map[string]any{"url": "https://example.com/ref.mp3"}},
|
|
},
|
|
}
|
|
candidate := store.RuntimeModelCandidate{
|
|
ModelType: "omni",
|
|
Capabilities: map[string]any{
|
|
"omni": map[string]any{
|
|
"input_audio": false,
|
|
"max_audios": 0,
|
|
},
|
|
},
|
|
}
|
|
|
|
result := preprocessRequestWithLog("videos.generations", body, candidate)
|
|
for _, change := range result.Log.Changes {
|
|
if change.Path == "content[1]" && change.CapabilityPath == "capabilities.omni.input_audio" {
|
|
return
|
|
}
|
|
}
|
|
t.Fatalf("expected log to reference capabilities.omni.input_audio, got %+v", result.Log.Changes)
|
|
}
|
|
|
|
func TestParamProcessorChatConvertsUnsupportedMediaMessageContentToText(t *testing.T) {
|
|
body := map[string]any{
|
|
"model": "text-only",
|
|
"messages": []any{
|
|
map[string]any{
|
|
"role": "user",
|
|
"content": []any{
|
|
map[string]any{"type": "text", "text": "describe these"},
|
|
map[string]any{"type": "image_url", "image_url": map[string]any{"url": "https://example.com/image.png"}},
|
|
map[string]any{"type": "video_url", "video_url": map[string]any{"url": "https://example.com/video.mp4"}},
|
|
map[string]any{"type": "audio_url", "audio_url": map[string]any{"url": "https://example.com/audio.mp3"}},
|
|
map[string]any{"type": "input_audio", "input_audio": map[string]any{"data": "https://example.com/input.wav"}},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
candidate := store.RuntimeModelCandidate{
|
|
ModelType: "text_generate",
|
|
Capabilities: map[string]any{
|
|
"text_generate": map[string]any{},
|
|
"originalTypes": []any{"text_generate"},
|
|
},
|
|
}
|
|
|
|
result := preprocessRequestWithLog("chat.completions", body, candidate)
|
|
messages, _ := result.Body["messages"].([]any)
|
|
if len(messages) != 1 {
|
|
t.Fatalf("expected one message, got %+v", result.Body["messages"])
|
|
}
|
|
message, _ := messages[0].(map[string]any)
|
|
content, _ := message["content"].([]any)
|
|
if len(content) != 5 {
|
|
t.Fatalf("expected five content parts, got %+v", message["content"])
|
|
}
|
|
expectedText := []string{
|
|
"describe these",
|
|
"Image link: https://example.com/image.png",
|
|
"video URL: https://example.com/video.mp4",
|
|
"audio URL: https://example.com/audio.mp3",
|
|
"audio URL: https://example.com/input.wav",
|
|
}
|
|
for index, expected := range expectedText {
|
|
part, _ := content[index].(map[string]any)
|
|
if stringFromAny(part["text"]) != expected {
|
|
t.Fatalf("content[%d] text = %q, want %q; all=%+v", index, stringFromAny(part["text"]), expected, content)
|
|
}
|
|
}
|
|
if len(result.Log.Changes) != 4 {
|
|
t.Fatalf("expected four media conversion changes, got %+v", result.Log.Changes)
|
|
}
|
|
expectedCapabilityPaths := map[string]bool{
|
|
"capabilities.image_analysis": false,
|
|
"capabilities.video_understanding": false,
|
|
"capabilities.audio_understanding": false,
|
|
}
|
|
for _, change := range result.Log.Changes {
|
|
if _, ok := expectedCapabilityPaths[change.CapabilityPath]; ok {
|
|
expectedCapabilityPaths[change.CapabilityPath] = true
|
|
}
|
|
}
|
|
for path, found := range expectedCapabilityPaths {
|
|
if !found {
|
|
t.Fatalf("expected conversion log for %s, got %+v", path, result.Log.Changes)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestParamProcessorChatKeepsOmniMessageContent(t *testing.T) {
|
|
body := map[string]any{
|
|
"model": "omni",
|
|
"messages": []any{
|
|
map[string]any{
|
|
"role": "user",
|
|
"content": []any{
|
|
map[string]any{"type": "image_url", "image_url": map[string]any{"url": "https://example.com/image.png"}},
|
|
map[string]any{"type": "video_url", "video_url": map[string]any{"url": "https://example.com/video.mp4"}},
|
|
map[string]any{"type": "audio_url", "audio_url": map[string]any{"url": "https://example.com/audio.mp3"}},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
candidate := store.RuntimeModelCandidate{
|
|
ModelType: "text_generate",
|
|
Capabilities: map[string]any{
|
|
"text_generate": map[string]any{},
|
|
"omni": map[string]any{},
|
|
"originalTypes": []any{"text_generate", "omni"},
|
|
},
|
|
}
|
|
|
|
result := preprocessRequestWithLog("chat.completions", body, candidate)
|
|
if result.Log.Changed {
|
|
t.Fatalf("omni model should keep message media content unchanged, got %+v", result.Log.Changes)
|
|
}
|
|
messages, _ := result.Body["messages"].([]any)
|
|
message, _ := messages[0].(map[string]any)
|
|
content, _ := message["content"].([]any)
|
|
for _, item := range content {
|
|
part, _ := item.(map[string]any)
|
|
if stringFromAny(part["type"]) == "text" {
|
|
t.Fatalf("media content should not be converted for omni model: %+v", content)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestParamProcessorChatConvertsOnlyUnsupportedModalities(t *testing.T) {
|
|
body := map[string]any{
|
|
"model": "vision-only",
|
|
"messages": []any{
|
|
map[string]any{
|
|
"role": "user",
|
|
"content": []any{
|
|
map[string]any{"type": "image_url", "image_url": map[string]any{"url": "https://example.com/image.png"}},
|
|
map[string]any{"type": "video_url", "video_url": map[string]any{"url": "https://example.com/video.mp4"}},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
candidate := store.RuntimeModelCandidate{
|
|
ModelType: "text_generate",
|
|
Capabilities: map[string]any{
|
|
"text_generate": map[string]any{},
|
|
"image_analysis": map[string]any{},
|
|
"originalTypes": []any{"text_generate", "image_analysis"},
|
|
},
|
|
}
|
|
|
|
result := preprocessRequestWithLog("chat.completions", body, candidate)
|
|
messages, _ := result.Body["messages"].([]any)
|
|
message, _ := messages[0].(map[string]any)
|
|
content, _ := message["content"].([]any)
|
|
first, _ := content[0].(map[string]any)
|
|
second, _ := content[1].(map[string]any)
|
|
if stringFromAny(first["type"]) != "image_url" {
|
|
t.Fatalf("image content should be kept when image_analysis is supported: %+v", content)
|
|
}
|
|
if stringFromAny(second["text"]) != "video URL: https://example.com/video.mp4" {
|
|
t.Fatalf("video content should be converted, got %+v", second)
|
|
}
|
|
if len(result.Log.Changes) != 1 || result.Log.Changes[0].CapabilityPath != "capabilities.video_understanding" {
|
|
t.Fatalf("expected only video conversion to be logged, got %+v", result.Log.Changes)
|
|
}
|
|
}
|
|
|
|
func TestSkipTaskParameterPreprocessingLogForTextModelTypes(t *testing.T) {
|
|
for _, modelType := range []string{"text_generate", "chat", "responses", "text"} {
|
|
if !skipTaskParameterPreprocessingLog(modelType) {
|
|
t.Fatalf("%s should skip task parameter preprocessing log", modelType)
|
|
}
|
|
}
|
|
for _, modelType := range []string{"image_generate", "image_edit", "video_generate", "omni_video"} {
|
|
if skipTaskParameterPreprocessingLog(modelType) {
|
|
t.Fatalf("%s should keep task parameter preprocessing log", modelType)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestParamProcessorVideoCapabilitiesNormalizeAndFilter(t *testing.T) {
|
|
body := map[string]any{
|
|
"model": "Seedance",
|
|
"duration": 13,
|
|
"aspect_ratio": "4:3",
|
|
"resolution": "1080p",
|
|
"audio": true,
|
|
"output_audio": true,
|
|
"content": []any{
|
|
map[string]any{"type": "text", "text": "animate it"},
|
|
map[string]any{"type": "image_url", "role": "first_frame", "image_url": map[string]any{"url": "https://example.com/first.png"}},
|
|
map[string]any{"type": "image_url", "role": "last_frame", "image_url": map[string]any{"url": "https://example.com/last.png"}},
|
|
map[string]any{"type": "audio_url", "role": "reference_audio", "audio_url": map[string]any{"url": "https://example.com/ref.mp3"}},
|
|
},
|
|
}
|
|
candidate := store.RuntimeModelCandidate{
|
|
ModelType: "image_to_video",
|
|
Capabilities: map[string]any{
|
|
"image_to_video": map[string]any{
|
|
"aspect_ratio_allowed": []any{"16:9", "1:1"},
|
|
"duration_options": []any{4, 8, 12},
|
|
"input_first_last_frame": false,
|
|
"input_audio": false,
|
|
"output_audio": false,
|
|
"max_images_for_last_frame": 0,
|
|
},
|
|
},
|
|
}
|
|
|
|
result := preprocessRequestWithLog("videos.generations", body, candidate)
|
|
processed := result.Body
|
|
if processed["duration"] != float64(12) && processed["duration"] != 12 {
|
|
t.Fatalf("duration should be snapped to 12, got %+v", processed["duration"])
|
|
}
|
|
if processed["aspect_ratio"] != "16:9" {
|
|
t.Fatalf("aspect_ratio should fall back to first allowed value, got %+v", processed["aspect_ratio"])
|
|
}
|
|
if processed["audio"] != nil || processed["output_audio"] != nil {
|
|
t.Fatalf("output audio flags should be removed: %+v", processed)
|
|
}
|
|
for _, item := range contentItems(processed["content"]) {
|
|
if stringFromAny(item["role"]) == "last_frame" || isAudioContent(item) {
|
|
t.Fatalf("unsupported content remained: %+v", processed["content"])
|
|
}
|
|
}
|
|
foundDuration := false
|
|
for _, change := range result.Log.Changes {
|
|
if change.Path == "duration" && change.CapabilityPath == "capabilities.image_to_video.duration_options" {
|
|
foundDuration = true
|
|
break
|
|
}
|
|
}
|
|
if !foundDuration {
|
|
t.Fatalf("expected duration adjustment to reference duration_options, got %+v", result.Log.Changes)
|
|
}
|
|
}
|
|
|
|
func TestParamProcessorDurationRangeRoundsFractionalSecondsUp(t *testing.T) {
|
|
body := map[string]any{
|
|
"model": "Seedance",
|
|
"prompt": "animate it",
|
|
"duration": 5.5,
|
|
}
|
|
candidate := store.RuntimeModelCandidate{
|
|
ModelType: "video_generate",
|
|
Capabilities: map[string]any{
|
|
"video_generate": map[string]any{
|
|
"duration_range": []any{3, 12},
|
|
},
|
|
},
|
|
}
|
|
|
|
result := preprocessRequestWithLog("videos.generations", body, candidate)
|
|
if result.Body["duration"] != float64(6) && result.Body["duration"] != 6 {
|
|
t.Fatalf("fractional duration should be rounded up to default 1s step, got %+v", result.Body["duration"])
|
|
}
|
|
}
|
|
|
|
func TestParamProcessorDurationWithoutRangeStillRoundsUp(t *testing.T) {
|
|
body := map[string]any{
|
|
"model": "Seedance",
|
|
"prompt": "animate it",
|
|
"duration": 5.2,
|
|
}
|
|
candidate := store.RuntimeModelCandidate{
|
|
ModelType: "video_generate",
|
|
Capabilities: map[string]any{
|
|
"video_generate": map[string]any{},
|
|
},
|
|
}
|
|
|
|
result := preprocessRequestWithLog("videos.generations", body, candidate)
|
|
if result.Body["duration"] != float64(6) && result.Body["duration"] != 6 {
|
|
t.Fatalf("duration should default to a 1s upward step without range, got %+v", result.Body["duration"])
|
|
}
|
|
}
|
|
|
|
func TestParamProcessorDurationRangeUsesStepCeilingAndRange(t *testing.T) {
|
|
body := map[string]any{
|
|
"model": "Seedance",
|
|
"prompt": "animate it",
|
|
"duration": 6.1,
|
|
"duration_seconds": 6.1,
|
|
}
|
|
candidate := store.RuntimeModelCandidate{
|
|
ModelType: "image_to_video",
|
|
Capabilities: map[string]any{
|
|
"image_to_video": map[string]any{
|
|
"duration_range": []any{5, 10},
|
|
"duration_step": 2,
|
|
},
|
|
},
|
|
}
|
|
|
|
result := preprocessRequestWithLog("videos.generations", body, candidate)
|
|
if result.Body["duration"] != float64(7) && result.Body["duration"] != 7 {
|
|
t.Fatalf("duration should be rounded up by configured step, got %+v", result.Body["duration"])
|
|
}
|
|
if result.Body["duration_seconds"] != result.Body["duration"] {
|
|
t.Fatalf("duration_seconds should sync with normalized duration, got %+v", result.Body)
|
|
}
|
|
|
|
body["duration"] = 10.1
|
|
body["duration_seconds"] = 10.1
|
|
result = preprocessRequestWithLog("videos.generations", body, candidate)
|
|
if result.Body["duration"] != float64(10) && result.Body["duration"] != 10 {
|
|
t.Fatalf("duration should be capped by range max, got %+v", result.Body["duration"])
|
|
}
|
|
}
|
|
|
|
func TestParamProcessorDurationOptionsChooseNextAllowedValue(t *testing.T) {
|
|
body := map[string]any{
|
|
"model": "Seedance",
|
|
"prompt": "animate it",
|
|
"duration": 8.1,
|
|
}
|
|
candidate := store.RuntimeModelCandidate{
|
|
ModelType: "image_to_video",
|
|
Capabilities: map[string]any{
|
|
"image_to_video": map[string]any{
|
|
"duration_options": []any{4, 8, 12},
|
|
},
|
|
},
|
|
}
|
|
|
|
result := preprocessRequestWithLog("videos.generations", body, candidate)
|
|
if result.Body["duration"] != float64(12) && result.Body["duration"] != 12 {
|
|
t.Fatalf("duration should use next allowed option, got %+v", result.Body["duration"])
|
|
}
|
|
}
|
|
|
|
func TestParamProcessorVideoGenerateLogsFirstFrameRemoval(t *testing.T) {
|
|
body := map[string]any{
|
|
"model": "Seedance T2V",
|
|
"prompt": "animate it",
|
|
"content": []any{
|
|
map[string]any{"type": "text", "text": "animate it"},
|
|
map[string]any{"type": "image_url", "role": "first_frame", "image_url": "https://example.com/first.png"},
|
|
},
|
|
}
|
|
candidate := store.RuntimeModelCandidate{
|
|
ModelType: "video_generate",
|
|
Capabilities: map[string]any{
|
|
"video_generate": map[string]any{
|
|
"duration_range": []any{3, 12},
|
|
},
|
|
},
|
|
}
|
|
|
|
result := preprocessRequestWithLog("videos.generations", body, candidate)
|
|
for _, item := range contentItems(result.Body["content"]) {
|
|
if isImageContent(item) {
|
|
t.Fatalf("first frame image should be removed for video_generate: %+v", result.Body["content"])
|
|
}
|
|
}
|
|
for _, change := range result.Log.Changes {
|
|
if change.Path == "content[1]" {
|
|
if change.Reason != "模型能力未开启首帧输入,已移除 first_frame。" {
|
|
t.Fatalf("unexpected first frame removal reason: %+v", change)
|
|
}
|
|
if change.CapabilityPath != "capabilities.video_generate.input_first_frame" {
|
|
t.Fatalf("unexpected first frame capability path: %+v", change)
|
|
}
|
|
return
|
|
}
|
|
}
|
|
t.Fatalf("expected first frame removal log, got %+v", result.Log.Changes)
|
|
}
|
|
|
|
func TestParamProcessorImageResolutionAndOutputCount(t *testing.T) {
|
|
body := map[string]any{
|
|
"model": "即梦V4.0",
|
|
"prompt": "draw",
|
|
"size": "2K",
|
|
"n": 8,
|
|
}
|
|
candidate := store.RuntimeModelCandidate{
|
|
ModelType: "image_generate",
|
|
Capabilities: map[string]any{
|
|
"image_generate": map[string]any{
|
|
"output_multiple_images": true,
|
|
"output_max_images_count": 4,
|
|
},
|
|
},
|
|
}
|
|
|
|
processed := preprocessRequest("images.generations", body, candidate)
|
|
if processed["resolution"] != "2K" {
|
|
t.Fatalf("size resolution should be copied to resolution, got %+v", processed)
|
|
}
|
|
if processed["n"] != 4 {
|
|
t.Fatalf("image count should be capped to 4, got %+v", processed["n"])
|
|
}
|
|
}
|