664 lines
22 KiB
Go
664 lines
22 KiB
Go
package runner
|
||
|
||
import (
|
||
"fmt"
|
||
"math"
|
||
"strings"
|
||
|
||
"github.com/easyai/easyai-ai-gateway/apps/api/internal/store"
|
||
)
|
||
|
||
type contentFilterProcessor struct{}
|
||
|
||
func (contentFilterProcessor) Name() string { return "ContentFilterProcessor" }
|
||
|
||
func (contentFilterProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
|
||
_, ok := params["content"]
|
||
return ok
|
||
}
|
||
|
||
func (contentFilterProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
|
||
content := contentItems(params["content"])
|
||
if len(content) == 0 {
|
||
return true
|
||
}
|
||
|
||
if isOmniVideoLike(context) {
|
||
filtered := filterUnsupportedOmniVideoContent(content, context)
|
||
params["content"] = mapsToAnySlice(filtered)
|
||
syncVideoConvenienceFields(params, filtered, context)
|
||
return true
|
||
}
|
||
|
||
if err := downgradeReferenceImageIfNeeded(params, content, modelType, context); err != nil {
|
||
return false
|
||
}
|
||
if modelType == "video_generate" || modelType == "text_to_video" {
|
||
next := make([]map[string]any, 0, len(content))
|
||
for index, item := range content {
|
||
if isImageContent(item) {
|
||
reason, path, value := imageContentRemovalEvidence(item, modelType, context)
|
||
context.recordChange(
|
||
"ContentFilterProcessor",
|
||
"remove",
|
||
fmt.Sprintf("content[%d]", index),
|
||
item,
|
||
nil,
|
||
reason,
|
||
path,
|
||
value,
|
||
)
|
||
continue
|
||
}
|
||
next = append(next, item)
|
||
}
|
||
content = next
|
||
}
|
||
if modelType == "image_to_video" || modelType == "omni_video" || modelType == "omni" {
|
||
if !supportsFirstAndLastFrame(context.modelCapability, modelType) {
|
||
next := make([]map[string]any, 0, len(content))
|
||
for index, item := range content {
|
||
if stringFromAny(item["role"]) == "last_frame" {
|
||
context.recordChange(
|
||
"ContentFilterProcessor",
|
||
"remove",
|
||
fmt.Sprintf("content[%d]", index),
|
||
item,
|
||
nil,
|
||
"模型不支持首尾帧输入,已移除 last_frame。",
|
||
capabilityPath(modelType, "input_first_last_frame"),
|
||
map[string]any{
|
||
"input_first_last_frame": capabilityValue(context.modelCapability, modelType, "input_first_last_frame"),
|
||
"max_images_for_last_frame": capabilityValue(context.modelCapability, modelType, "max_images_for_last_frame"),
|
||
},
|
||
)
|
||
continue
|
||
}
|
||
next = append(next, item)
|
||
}
|
||
content = next
|
||
deleteFieldsWithLog(params, context, "ContentFilterProcessor", []string{"last_frame", "lastFrame"}, "模型不支持首尾帧输入,已移除快捷字段。", capabilityPath(modelType, "input_first_last_frame"), map[string]any{
|
||
"input_first_last_frame": capabilityValue(context.modelCapability, modelType, "input_first_last_frame"),
|
||
"max_images_for_last_frame": capabilityValue(context.modelCapability, modelType, "max_images_for_last_frame"),
|
||
})
|
||
}
|
||
}
|
||
params["content"] = mapsToAnySlice(content)
|
||
return true
|
||
}
|
||
|
||
func imageContentRemovalEvidence(item map[string]any, modelType string, context *paramProcessContext) (string, string, any) {
|
||
role := stringFromAny(item["role"])
|
||
switch role {
|
||
case "first_frame":
|
||
return "模型能力未开启首帧输入,已移除 first_frame。", capabilityPath(modelType, "input_first_frame"), map[string]any{
|
||
"input_first_frame": capabilityValue(context.modelCapability, modelType, "input_first_frame"),
|
||
"input_first_last_frame": capabilityValue(context.modelCapability, modelType, "input_first_last_frame"),
|
||
}
|
||
case "last_frame":
|
||
return "模型能力未开启尾帧或首尾帧输入,已移除 last_frame。", capabilityPath(modelType, "input_first_last_frame"), map[string]any{
|
||
"input_last_frame": capabilityValue(context.modelCapability, modelType, "input_last_frame"),
|
||
"input_first_last_frame": capabilityValue(context.modelCapability, modelType, "input_first_last_frame"),
|
||
"max_images_for_last_frame": capabilityValue(context.modelCapability, modelType, "max_images_for_last_frame"),
|
||
"max_images_for_first_frame": capabilityValue(context.modelCapability, modelType, "max_images_for_first_frame"),
|
||
"max_images_for_middle_frame": capabilityValue(context.modelCapability, modelType, "max_images_for_middle_frame"),
|
||
}
|
||
case "reference_image":
|
||
return "模型能力未开启参考图输入,已移除 reference_image。", capabilityPath(modelType, "input_reference_generate_single"), map[string]any{
|
||
"input_reference_generate_single": capabilityValue(context.modelCapability, modelType, "input_reference_generate_single"),
|
||
"input_reference_generate_multiple": capabilityValue(context.modelCapability, modelType, "input_reference_generate_multiple"),
|
||
"max_images": capabilityValue(context.modelCapability, modelType, "max_images"),
|
||
}
|
||
default:
|
||
return "当前模型能力未开启图像输入,已移除 image_url。", capabilityPath(modelType, "input_first_frame"), map[string]any{
|
||
"input_first_frame": capabilityValue(context.modelCapability, modelType, "input_first_frame"),
|
||
"input_first_last_frame": capabilityValue(context.modelCapability, modelType, "input_first_last_frame"),
|
||
"input_reference_generate_single": capabilityValue(context.modelCapability, modelType, "input_reference_generate_single"),
|
||
"input_reference_generate_multiple": capabilityValue(context.modelCapability, modelType, "input_reference_generate_multiple"),
|
||
}
|
||
}
|
||
}
|
||
|
||
func ensureVideoContent(params map[string]any, context *paramProcessContext) {
|
||
if len(contentItems(params["content"])) > 0 {
|
||
return
|
||
}
|
||
content := make([]map[string]any, 0)
|
||
if prompt := firstNonEmptyString(stringFromAny(params["prompt"]), stringFromAny(params["input"])); prompt != "" {
|
||
content = append(content, map[string]any{"type": "text", "text": prompt})
|
||
}
|
||
appendURL := func(kind string, role string, url string) {
|
||
url = strings.TrimSpace(url)
|
||
if url == "" {
|
||
return
|
||
}
|
||
item := map[string]any{"type": kind, "role": role}
|
||
switch kind {
|
||
case "image_url":
|
||
item["image_url"] = map[string]any{"url": url}
|
||
case "video_url":
|
||
item["video_url"] = map[string]any{"url": url}
|
||
case "audio_url":
|
||
item["audio_url"] = map[string]any{"url": url}
|
||
}
|
||
content = append(content, item)
|
||
}
|
||
|
||
firstFrame := firstNonEmptyStringValue(params, "first_frame", "firstFrame")
|
||
appendURL("image_url", "first_frame", firstFrame)
|
||
appendURL("image_url", "last_frame", firstNonEmptyStringValue(params, "last_frame", "lastFrame"))
|
||
imageURLs := firstNonEmptyStringListFromAny(params["image"], params["images"], params["image_url"], params["imageUrl"], params["image_urls"], params["imageUrls"])
|
||
if firstFrame == "" && len(imageURLs) > 0 {
|
||
appendURL("image_url", "first_frame", imageURLs[0])
|
||
imageURLs = imageURLs[1:]
|
||
}
|
||
for _, url := range imageURLs {
|
||
appendURL("image_url", "reference_image", url)
|
||
}
|
||
for _, url := range firstNonEmptyStringListFromAny(params["reference_image"], params["referenceImage"]) {
|
||
appendURL("image_url", "reference_image", url)
|
||
}
|
||
for _, url := range firstNonEmptyStringListFromAny(params["video"], params["video_url"], params["videoUrl"], params["reference_video"], params["referenceVideo"]) {
|
||
appendURL("video_url", "reference_video", url)
|
||
}
|
||
for _, url := range firstNonEmptyStringListFromAny(params["audio_url"], params["audioUrl"], params["reference_audio"], params["referenceAudio"]) {
|
||
appendURL("audio_url", "reference_audio", url)
|
||
}
|
||
if len(content) > 0 {
|
||
params["content"] = mapsToAnySlice(content)
|
||
context.recordChange(
|
||
"ContentBuildProcessor",
|
||
"set",
|
||
"content",
|
||
nil,
|
||
params["content"],
|
||
"将 prompt/first_frame/reference_* 等快捷字段转换为 content 数组,后续处理器可按模型能力逐项过滤。",
|
||
"",
|
||
nil,
|
||
)
|
||
}
|
||
}
|
||
|
||
func effectiveModelCapability(candidate store.RuntimeModelCandidate) map[string]any {
|
||
base := cloneMap(candidate.Capabilities)
|
||
for key, value := range candidate.CapabilityOverride {
|
||
if baseChild, ok := base[key].(map[string]any); ok {
|
||
if overrideChild, ok := value.(map[string]any); ok {
|
||
base[key] = mergeMap(baseChild, overrideChild)
|
||
continue
|
||
}
|
||
}
|
||
base[key] = cloneAny(value)
|
||
}
|
||
return base
|
||
}
|
||
|
||
func filterUnsupportedOmniVideoContent(content []map[string]any, context *paramProcessContext) []map[string]any {
|
||
capability := omniVideoCapability(context)
|
||
maxVideos := math.Inf(1)
|
||
if capability != nil {
|
||
if value, ok := numericField(capability, "max_videos"); ok {
|
||
maxVideos = value
|
||
}
|
||
}
|
||
maxAudios := 0.0
|
||
if capability != nil {
|
||
if value, ok := numericField(capability, "max_audios"); ok {
|
||
maxAudios = value
|
||
} else if supportsOmniAudioReference(context) {
|
||
maxAudios = math.Inf(1)
|
||
}
|
||
}
|
||
|
||
videoCount := 0.0
|
||
audioCount := 0.0
|
||
out := make([]map[string]any, 0, len(content))
|
||
for index, item := range content {
|
||
if isVideoContent(item) {
|
||
if !supportsOmniVideoReference(item, capability) {
|
||
path, value := omniCapabilityEvidence(context, "supported_modes")
|
||
context.recordChange(
|
||
"ContentFilterProcessor",
|
||
"remove",
|
||
fmt.Sprintf("content[%d]", index),
|
||
item,
|
||
nil,
|
||
"视频参考类型不在 omni_video.supported_modes 允许范围内。",
|
||
path,
|
||
value,
|
||
)
|
||
continue
|
||
}
|
||
if videoCount >= maxVideos {
|
||
path, value := omniCapabilityEvidence(context, "max_videos")
|
||
context.recordChange(
|
||
"ContentFilterProcessor",
|
||
"remove",
|
||
fmt.Sprintf("content[%d]", index),
|
||
item,
|
||
nil,
|
||
"视频参考数量超过 omni_video.max_videos 限制。",
|
||
path,
|
||
value,
|
||
)
|
||
continue
|
||
}
|
||
videoCount++
|
||
out = append(out, item)
|
||
continue
|
||
}
|
||
if isAudioContent(item) {
|
||
if !supportsOmniAudioReference(context) {
|
||
path, value := omniCapabilityEvidence(context, "input_audio")
|
||
context.recordChange(
|
||
"ContentFilterProcessor",
|
||
"remove",
|
||
fmt.Sprintf("content[%d]", index),
|
||
item,
|
||
nil,
|
||
"模型能力不支持音频参考,已移除 audio_url。",
|
||
path,
|
||
mergeMetrics(map[string]any{"input_audio": value}, omniCapabilityBundle(context, "max_audios")),
|
||
)
|
||
continue
|
||
}
|
||
if audioCount >= maxAudios {
|
||
path, value := omniCapabilityEvidence(context, "max_audios")
|
||
context.recordChange(
|
||
"ContentFilterProcessor",
|
||
"remove",
|
||
fmt.Sprintf("content[%d]", index),
|
||
item,
|
||
nil,
|
||
"音频参考数量超过 omni_video.max_audios 限制。",
|
||
path,
|
||
value,
|
||
)
|
||
continue
|
||
}
|
||
audioCount++
|
||
out = append(out, item)
|
||
continue
|
||
}
|
||
out = append(out, item)
|
||
}
|
||
return out
|
||
}
|
||
|
||
func isOmniVideoLike(context *paramProcessContext) bool {
|
||
modelType := strings.TrimSpace(context.candidate.ModelType)
|
||
return modelType == "omni_video" ||
|
||
modelType == "omni" ||
|
||
context.modelCapability["omni_video"] != nil ||
|
||
context.modelCapability["omni"] != nil
|
||
}
|
||
|
||
func omniVideoCapability(context *paramProcessContext) map[string]any {
|
||
if capability := capabilityForType(context.modelCapability, "omni_video"); capability != nil {
|
||
return capability
|
||
}
|
||
return capabilityForType(context.modelCapability, "omni")
|
||
}
|
||
|
||
func supportsOmniAudioReference(context *paramProcessContext) bool {
|
||
capability := omniVideoCapability(context)
|
||
return capability != nil && (boolFromAny(capability["input_audio"]) || floatFromAny(capability["max_audios"]) > 0)
|
||
}
|
||
|
||
func supportsOmniVideoReference(item map[string]any, capability map[string]any) bool {
|
||
if capability == nil {
|
||
return true
|
||
}
|
||
if value, ok := numericField(capability, "max_videos"); ok && value == 0 {
|
||
return false
|
||
}
|
||
supportedModes := stringListFromAny(capability["supported_modes"])
|
||
supportsReference := containsString(supportedModes, "video_reference")
|
||
supportsEdit := containsString(supportedModes, "video_edit")
|
||
video, _ := item["video_url"].(map[string]any)
|
||
referType := stringFromAny(video["refer_type"])
|
||
isEditVideo := stringFromAny(item["role"]) == "video_base" || referType == "base"
|
||
isReferenceVideo := stringFromAny(item["role"]) == "video_feature" ||
|
||
stringFromAny(item["role"]) == "reference_video" ||
|
||
referType == "feature"
|
||
if isEditVideo {
|
||
return supportsEdit
|
||
}
|
||
if isReferenceVideo {
|
||
return supportsReference
|
||
}
|
||
return supportsReference || supportsEdit
|
||
}
|
||
|
||
func downgradeReferenceImageIfNeeded(params map[string]any, content []map[string]any, modelType string, context *paramProcessContext) error {
|
||
if !isVideoModelType(modelType) {
|
||
return nil
|
||
}
|
||
if supportsReferenceImage(context.modelCapability, modelType) {
|
||
return nil
|
||
}
|
||
|
||
imageIndexes := make([]int, 0)
|
||
referenceIndexes := make([]int, 0)
|
||
hasVideoOrAudioReference := false
|
||
for index, item := range content {
|
||
if isVideoContent(item) || isAudioContent(item) {
|
||
hasVideoOrAudioReference = true
|
||
continue
|
||
}
|
||
if !isImageContent(item) {
|
||
continue
|
||
}
|
||
imageIndexes = append(imageIndexes, index)
|
||
role := stringFromAny(item["role"])
|
||
if role == "" || role == "reference_image" {
|
||
referenceIndexes = append(referenceIndexes, index)
|
||
}
|
||
}
|
||
if len(referenceIndexes) == 0 {
|
||
return nil
|
||
}
|
||
|
||
evidence := referenceImageDowngradeCapabilityEvidence(context.modelCapability, modelType)
|
||
if hasVideoOrAudioReference {
|
||
context.reject(
|
||
"ContentFilterProcessor",
|
||
"content",
|
||
content,
|
||
"当前模型不支持多模态参考,不能将视频或音频参考降级为首尾帧,请移除视频/音频参考或选择支持多模态参考的模型。",
|
||
evidence.path,
|
||
evidence.value,
|
||
)
|
||
return context.err
|
||
}
|
||
if len(imageIndexes) > 2 {
|
||
context.reject(
|
||
"ContentFilterProcessor",
|
||
"content",
|
||
content,
|
||
"当前模型不支持多参考图输入,最多只允许 2 张图片降级为首尾帧。",
|
||
evidence.path,
|
||
evidence.value,
|
||
)
|
||
return context.err
|
||
}
|
||
if len(imageIndexes) == 2 && !supportsFirstAndLastFrame(context.modelCapability, modelType) {
|
||
context.reject(
|
||
"ContentFilterProcessor",
|
||
"content",
|
||
content,
|
||
"当前模型不支持首尾帧输入,不能将 2 张参考图降级为首尾帧。",
|
||
evidence.path,
|
||
evidence.value,
|
||
)
|
||
return context.err
|
||
}
|
||
if len(imageIndexes) == 1 && !supportsFirstFrame(context.modelCapability, modelType) {
|
||
context.reject(
|
||
"ContentFilterProcessor",
|
||
"content",
|
||
content,
|
||
"当前模型不支持首帧输入,不能将参考图降级为首帧。",
|
||
evidence.path,
|
||
evidence.value,
|
||
)
|
||
return context.err
|
||
}
|
||
|
||
if len(imageIndexes) == 1 {
|
||
adjustImageContentRole(content, imageIndexes[0], "first_frame", context, modelType, "模型不支持 reference_image,且只有 1 张图片,已降级为 first_frame。")
|
||
appendParamWarning(params, "reference_image is unsupported by the selected model and was downgraded to first_frame")
|
||
return nil
|
||
}
|
||
|
||
firstIndex, lastIndex := firstLastFrameIndexes(content, imageIndexes)
|
||
adjustImageContentRole(content, firstIndex, "first_frame", context, modelType, "模型不支持 reference_image,2 张图片已降级为首尾帧的 first_frame。")
|
||
adjustImageContentRole(content, lastIndex, "last_frame", context, modelType, "模型不支持 reference_image,2 张图片已降级为首尾帧的 last_frame。")
|
||
appendParamWarning(params, "reference_image is unsupported by the selected model and was downgraded to first/last frame")
|
||
return nil
|
||
}
|
||
|
||
type capabilityEvidenceValue struct {
|
||
path string
|
||
value any
|
||
}
|
||
|
||
func referenceImageDowngradeCapabilityEvidence(modelCapability map[string]any, modelType string) capabilityEvidenceValue {
|
||
actualType, capability := firstVideoInputCapability(modelCapability, modelType)
|
||
if actualType == "" {
|
||
actualType = modelType
|
||
}
|
||
value := map[string]any{}
|
||
if capability != nil {
|
||
for _, key := range []string{
|
||
"input_reference_generate_single",
|
||
"input_reference_generate_multiple",
|
||
"max_images",
|
||
"input_first_frame",
|
||
"input_first_last_frame",
|
||
"max_images_for_last_frame",
|
||
} {
|
||
value[key] = cloneAny(capability[key])
|
||
}
|
||
}
|
||
return capabilityEvidenceValue{path: capabilityPath(actualType, ""), value: value}
|
||
}
|
||
|
||
func adjustImageContentRole(content []map[string]any, index int, role string, context *paramProcessContext, modelType string, reason string) {
|
||
if index < 0 || index >= len(content) {
|
||
return
|
||
}
|
||
item := content[index]
|
||
if stringFromAny(item["role"]) == role {
|
||
return
|
||
}
|
||
before := cloneMap(item)
|
||
item["role"] = role
|
||
context.recordChange(
|
||
"ContentFilterProcessor",
|
||
"adjust",
|
||
fmt.Sprintf("content[%d].role", index),
|
||
before,
|
||
item,
|
||
reason,
|
||
capabilityPath(modelType, "input_reference_generate_single"),
|
||
referenceImageDowngradeCapabilityEvidence(context.modelCapability, modelType).value,
|
||
)
|
||
}
|
||
|
||
func firstLastFrameIndexes(content []map[string]any, imageIndexes []int) (int, int) {
|
||
firstIndex := -1
|
||
lastIndex := -1
|
||
for _, index := range imageIndexes {
|
||
switch stringFromAny(content[index]["role"]) {
|
||
case "first_frame":
|
||
if firstIndex == -1 {
|
||
firstIndex = index
|
||
}
|
||
case "last_frame":
|
||
if lastIndex == -1 {
|
||
lastIndex = index
|
||
}
|
||
}
|
||
}
|
||
if firstIndex == -1 && lastIndex == -1 {
|
||
return imageIndexes[0], imageIndexes[1]
|
||
}
|
||
if firstIndex == -1 {
|
||
for _, index := range imageIndexes {
|
||
if index != lastIndex {
|
||
firstIndex = index
|
||
break
|
||
}
|
||
}
|
||
}
|
||
if lastIndex == -1 {
|
||
for _, index := range imageIndexes {
|
||
if index != firstIndex {
|
||
lastIndex = index
|
||
break
|
||
}
|
||
}
|
||
}
|
||
if firstIndex == lastIndex {
|
||
return imageIndexes[0], imageIndexes[1]
|
||
}
|
||
return firstIndex, lastIndex
|
||
}
|
||
|
||
type videoInputCapabilityValue struct {
|
||
modelType string
|
||
capability map[string]any
|
||
}
|
||
|
||
func firstVideoInputCapability(modelCapability map[string]any, modelType string) (string, map[string]any) {
|
||
for _, candidate := range videoInputCapabilityCandidates(modelCapability, modelType) {
|
||
return candidate.modelType, candidate.capability
|
||
}
|
||
return "", nil
|
||
}
|
||
|
||
func videoInputCapabilityCandidates(modelCapability map[string]any, modelType string) []videoInputCapabilityValue {
|
||
keys := []string{modelType, "image_to_video", "video_first_last_frame"}
|
||
if modelType == "omni_video" || modelType == "omni" {
|
||
keys = append(keys, "omni_video", "omni")
|
||
}
|
||
seen := map[string]bool{}
|
||
out := make([]videoInputCapabilityValue, 0, len(keys))
|
||
for _, key := range keys {
|
||
key = strings.TrimSpace(key)
|
||
if key == "" || seen[key] {
|
||
continue
|
||
}
|
||
seen[key] = true
|
||
if capability := capabilityForType(modelCapability, key); capability != nil {
|
||
out = append(out, videoInputCapabilityValue{modelType: key, capability: capability})
|
||
}
|
||
}
|
||
return out
|
||
}
|
||
|
||
func supportsReferenceImage(modelCapability map[string]any, modelType string) bool {
|
||
candidates := videoInputCapabilityCandidates(modelCapability, modelType)
|
||
if len(candidates) == 0 {
|
||
return true
|
||
}
|
||
for _, candidate := range candidates {
|
||
capability := candidate.capability
|
||
_, hasSingle := capability["input_reference_generate_single"]
|
||
_, hasMultiple := capability["input_reference_generate_multiple"]
|
||
if hasSingle || hasMultiple {
|
||
if boolFromAny(capability["input_reference_generate_single"]) || boolFromAny(capability["input_reference_generate_multiple"]) {
|
||
return true
|
||
}
|
||
continue
|
||
}
|
||
if value, ok := numericField(capability, "max_images"); ok {
|
||
if value > 1 {
|
||
return true
|
||
}
|
||
continue
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
|
||
func supportsFirstFrame(modelCapability map[string]any, modelType string) bool {
|
||
for _, candidate := range videoInputCapabilityCandidates(modelCapability, modelType) {
|
||
capability := candidate.capability
|
||
if boolFromAny(capability["input_first_frame"]) ||
|
||
boolFromAny(capability["input_first_last_frame"]) ||
|
||
floatFromAny(capability["max_images_for_first_frame"]) > 0 ||
|
||
floatFromAny(capability["max_images_for_last_frame"]) > 0 {
|
||
return true
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
|
||
func supportsFirstAndLastFrame(modelCapability map[string]any, modelType string) bool {
|
||
for _, candidate := range videoInputCapabilityCandidates(modelCapability, modelType) {
|
||
capability := candidate.capability
|
||
if boolFromAny(capability["input_first_last_frame"]) || floatFromAny(capability["max_images_for_last_frame"]) > 0 {
|
||
return true
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
|
||
func videoModeKey(params map[string]any) string {
|
||
content := contentItems(params["content"])
|
||
hasFirstFrame := false
|
||
hasLastFrame := false
|
||
for _, item := range content {
|
||
switch stringFromAny(item["role"]) {
|
||
case "first_frame":
|
||
hasFirstFrame = true
|
||
case "last_frame":
|
||
hasLastFrame = true
|
||
}
|
||
}
|
||
switch {
|
||
case hasFirstFrame && hasLastFrame:
|
||
return "input_first_last_frame"
|
||
case hasFirstFrame:
|
||
return "input_first_frame"
|
||
case hasLastFrame:
|
||
return "input_last_frame"
|
||
default:
|
||
return ""
|
||
}
|
||
}
|
||
|
||
func syncDurationSeconds(params map[string]any) {
|
||
if params["duration_seconds"] != nil {
|
||
params["duration_seconds"] = params["duration"]
|
||
}
|
||
}
|
||
|
||
func syncVideoConvenienceFields(params map[string]any, content []map[string]any, context *paramProcessContext) {
|
||
hasVideo := false
|
||
hasAudio := false
|
||
for _, item := range content {
|
||
hasVideo = hasVideo || isVideoContent(item)
|
||
hasAudio = hasAudio || isAudioContent(item)
|
||
}
|
||
if !hasVideo {
|
||
path, value := omniCapabilityEvidence(context, "supported_modes")
|
||
deleteFieldsWithLog(params, context, "ContentFilterProcessor", []string{"video", "video_url", "videoUrl", "reference_video", "referenceVideo"}, "对应视频 content 已被模型能力过滤,移除视频参考快捷字段。", path, value)
|
||
}
|
||
if !hasAudio {
|
||
path, value := omniCapabilityEvidence(context, "input_audio")
|
||
deleteFieldsWithLog(params, context, "ContentFilterProcessor", []string{"audio_url", "audioUrl", "reference_audio", "referenceAudio"}, "对应音频 content 已被模型能力过滤,移除音频参考快捷字段。", path, mergeMetrics(map[string]any{"input_audio": value}, omniCapabilityBundle(context, "max_audios")))
|
||
}
|
||
}
|
||
|
||
func deleteFieldsWithLog(params map[string]any, context *paramProcessContext, processor string, keys []string, reason string, capabilityPath string, capabilityValue any) {
|
||
for _, key := range keys {
|
||
if before, ok := params[key]; ok {
|
||
delete(params, key)
|
||
context.recordChange(processor, "remove", key, before, nil, reason, capabilityPath, capabilityValue)
|
||
}
|
||
}
|
||
}
|
||
|
||
func appendParamWarning(params map[string]any, warning string) {
|
||
warnings, _ := params["_param_warnings"].([]any)
|
||
for _, item := range warnings {
|
||
if stringFromAny(item) == warning {
|
||
return
|
||
}
|
||
}
|
||
params["_param_warnings"] = append(warnings, warning)
|
||
}
|
||
|
||
func filterContent(content []map[string]any, keep func(map[string]any) bool) []map[string]any {
|
||
out := make([]map[string]any, 0, len(content))
|
||
for _, item := range content {
|
||
if keep(item) {
|
||
out = append(out, item)
|
||
}
|
||
}
|
||
return out
|
||
}
|