easyai-ai-gateway/apps/api/internal/runner/param_processor_message.go

191 lines
5.6 KiB
Go

package runner
import "fmt"
type messageContentProcessor struct{}
func (messageContentProcessor) Name() string { return "MessageContentProcessor" }
func (messageContentProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
return isTextGenerationKind(context.kind) && params["messages"] != nil
}
func (messageContentProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
messages, changed := processMessageListContent(params["messages"], context)
if changed {
params["messages"] = messages
}
return true
}
func processMessageListContent(value any, context *paramProcessContext) ([]any, bool) {
rawMessages, ok := value.([]any)
if !ok {
return nil, false
}
out := make([]any, 0, len(rawMessages))
changed := false
for messageIndex, rawMessage := range rawMessages {
message, ok := rawMessage.(map[string]any)
if !ok {
out = append(out, rawMessage)
continue
}
nextMessage := cloneMap(message)
if contentParts, ok := message["content"].([]any); ok {
nextContent, contentChanged := processMessageContentParts(
contentParts,
fmt.Sprintf("messages[%d].content", messageIndex),
context,
)
if contentChanged {
nextMessage["content"] = nextContent
changed = true
}
}
out = append(out, nextMessage)
}
return out, changed
}
func processMessageContentParts(parts []any, basePath string, context *paramProcessContext) ([]any, bool) {
out := make([]any, 0, len(parts))
changed := false
for partIndex, rawPart := range parts {
part, ok := rawPart.(map[string]any)
if !ok {
out = append(out, rawPart)
continue
}
if replacement, replacementChanged := messageContentPartReplacement(part, context); replacementChanged {
out = append(out, replacement)
context.recordChange(
"MessageContentProcessor",
"convert",
fmt.Sprintf("%s[%d]", basePath, partIndex),
part,
replacement,
messageContentConversionReason(part),
messageContentCapabilityPath(part),
messageContentCapabilityValue(part, context),
)
changed = true
continue
}
out = append(out, cloneMap(part))
}
return out, changed
}
func messageContentPartReplacement(part map[string]any, context *paramProcessContext) (map[string]any, bool) {
switch {
case isImageContent(part):
if modelSupportsMessageModality(context, "image_analysis") {
return nil, false
}
if url := imageURLFromContentPart(part); url != "" {
return map[string]any{"type": "text", "text": "Image link: " + url}, true
}
case isVideoContent(part):
if modelSupportsMessageModality(context, "video_understanding") {
return nil, false
}
if url := videoURLFromContentPart(part); url != "" {
return map[string]any{"type": "text", "text": "video URL: " + url}, true
}
case isAudioContent(part) || stringFromAny(part["type"]) == "input_audio":
if modelSupportsMessageModality(context, "audio_understanding") {
return nil, false
}
if url := audioURLFromContentPart(part); url != "" {
return map[string]any{"type": "text", "text": "audio URL: " + url}, true
}
}
return nil, false
}
func messageContentConversionReason(part map[string]any) string {
switch {
case isImageContent(part):
return "模型不支持图像理解,已将 image_url 转为文本链接。"
case isVideoContent(part):
return "模型不支持视频理解,已将 video_url 转为文本链接。"
default:
return "模型不支持音频理解,已将音频输入转为文本链接。"
}
}
func messageContentCapabilityPath(part map[string]any) string {
switch {
case isImageContent(part):
return "capabilities.image_analysis"
case isVideoContent(part):
return "capabilities.video_understanding"
default:
return "capabilities.audio_understanding"
}
}
func messageContentCapabilityValue(part map[string]any, context *paramProcessContext) any {
if context == nil {
return nil
}
switch {
case isImageContent(part):
return capabilityValue(context.modelCapability, "image_analysis", "")
case isVideoContent(part):
return capabilityValue(context.modelCapability, "video_understanding", "")
default:
return capabilityValue(context.modelCapability, "audio_understanding", "")
}
}
func modelSupportsMessageModality(context *paramProcessContext, capabilityName string) bool {
if context == nil {
return false
}
capabilities := context.modelCapability
if capabilityForType(capabilities, capabilityName) != nil {
return true
}
if capabilityForType(capabilities, "omni") != nil {
return true
}
originalTypes := stringListFromAny(capabilities["originalTypes"])
return containsString(originalTypes, capabilityName) || containsString(originalTypes, "omni")
}
func imageURLFromContentPart(part map[string]any) string {
return urlFromNestedContentPart(part, "image_url", "url", "imageUrl")
}
func videoURLFromContentPart(part map[string]any) string {
return urlFromNestedContentPart(part, "video_url", "url", "videoUrl")
}
func audioURLFromContentPart(part map[string]any) string {
if stringFromAny(part["type"]) == "input_audio" {
if audio, ok := part["input_audio"].(map[string]any); ok {
if url := firstNonEmptyString(stringFromAny(audio["data"]), stringFromAny(audio["url"])); url != "" {
return url
}
}
}
return urlFromNestedContentPart(part, "audio_url", "url", "audioUrl")
}
func urlFromNestedContentPart(part map[string]any, keys ...string) string {
for _, key := range keys {
value := part[key]
if url := stringFromAny(value); url != "" {
return url
}
if nested, ok := value.(map[string]any); ok {
if url := stringFromAny(nested["url"]); url != "" {
return url
}
}
}
return ""
}