package runner import "fmt" type messageContentProcessor struct{} func (messageContentProcessor) Name() string { return "MessageContentProcessor" } func (messageContentProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool { return isTextGenerationKind(context.kind) && params["messages"] != nil } func (messageContentProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool { messages, changed := processMessageListContent(params["messages"], context) if changed { params["messages"] = messages } return true } func processMessageListContent(value any, context *paramProcessContext) ([]any, bool) { rawMessages, ok := value.([]any) if !ok { return nil, false } out := make([]any, 0, len(rawMessages)) changed := false for messageIndex, rawMessage := range rawMessages { message, ok := rawMessage.(map[string]any) if !ok { out = append(out, rawMessage) continue } nextMessage := cloneMap(message) if contentParts, ok := message["content"].([]any); ok { nextContent, contentChanged := processMessageContentParts( contentParts, fmt.Sprintf("messages[%d].content", messageIndex), context, ) if contentChanged { nextMessage["content"] = nextContent changed = true } } out = append(out, nextMessage) } return out, changed } func processMessageContentParts(parts []any, basePath string, context *paramProcessContext) ([]any, bool) { out := make([]any, 0, len(parts)) changed := false for partIndex, rawPart := range parts { part, ok := rawPart.(map[string]any) if !ok { out = append(out, rawPart) continue } if replacement, replacementChanged := messageContentPartReplacement(part, context); replacementChanged { out = append(out, replacement) context.recordChange( "MessageContentProcessor", "convert", fmt.Sprintf("%s[%d]", basePath, partIndex), part, replacement, messageContentConversionReason(part), messageContentCapabilityPath(part), messageContentCapabilityValue(part, context), ) changed = true continue } out = append(out, cloneMap(part)) } return out, changed } func messageContentPartReplacement(part map[string]any, context *paramProcessContext) (map[string]any, bool) { switch { case isImageContent(part): if modelSupportsMessageModality(context, "image_analysis") { return nil, false } if url := imageURLFromContentPart(part); url != "" { return map[string]any{"type": "text", "text": "Image link: " + url}, true } case isVideoContent(part): if modelSupportsMessageModality(context, "video_understanding") { return nil, false } if url := videoURLFromContentPart(part); url != "" { return map[string]any{"type": "text", "text": "video URL: " + url}, true } case isAudioContent(part) || stringFromAny(part["type"]) == "input_audio": if modelSupportsMessageModality(context, "audio_understanding") { return nil, false } if url := audioURLFromContentPart(part); url != "" { return map[string]any{"type": "text", "text": "audio URL: " + url}, true } } return nil, false } func messageContentConversionReason(part map[string]any) string { switch { case isImageContent(part): return "模型不支持图像理解,已将 image_url 转为文本链接。" case isVideoContent(part): return "模型不支持视频理解,已将 video_url 转为文本链接。" default: return "模型不支持音频理解,已将音频输入转为文本链接。" } } func messageContentCapabilityPath(part map[string]any) string { switch { case isImageContent(part): return "capabilities.image_analysis" case isVideoContent(part): return "capabilities.video_understanding" default: return "capabilities.audio_understanding" } } func messageContentCapabilityValue(part map[string]any, context *paramProcessContext) any { if context == nil { return nil } switch { case isImageContent(part): return capabilityValue(context.modelCapability, "image_analysis", "") case isVideoContent(part): return capabilityValue(context.modelCapability, "video_understanding", "") default: return capabilityValue(context.modelCapability, "audio_understanding", "") } } func modelSupportsMessageModality(context *paramProcessContext, capabilityName string) bool { if context == nil { return false } capabilities := context.modelCapability if capabilityForType(capabilities, capabilityName) != nil { return true } if capabilityForType(capabilities, "omni") != nil { return true } originalTypes := stringListFromAny(capabilities["originalTypes"]) return containsString(originalTypes, capabilityName) || containsString(originalTypes, "omni") } func imageURLFromContentPart(part map[string]any) string { return urlFromNestedContentPart(part, "image_url", "url", "imageUrl") } func videoURLFromContentPart(part map[string]any) string { return urlFromNestedContentPart(part, "video_url", "url", "videoUrl") } func audioURLFromContentPart(part map[string]any) string { if stringFromAny(part["type"]) == "input_audio" { if audio, ok := part["input_audio"].(map[string]any); ok { if url := firstNonEmptyString(stringFromAny(audio["data"]), stringFromAny(audio["url"])); url != "" { return url } } } return urlFromNestedContentPart(part, "audio_url", "url", "audioUrl") } func urlFromNestedContentPart(part map[string]any, keys ...string) string { for _, key := range keys { value := part[key] if url := stringFromAny(value); url != "" { return url } if nested, ok := value.(map[string]any); ok { if url := stringFromAny(nested["url"]); url != "" { return url } } } return "" }