easyai-ai-gateway/apps/api/internal/runner/param_processor_media.go

381 lines
11 KiB
Go

package runner
import (
"fmt"
"math"
"strings"
)
type resolutionNormalizeProcessor struct{}
func (resolutionNormalizeProcessor) Name() string { return "ResolutionNormalizeProcessor" }
func (resolutionNormalizeProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
if stringFromAny(params["resolution"]) != "" {
return false
}
size := stringFromAny(params["size"])
if size == "" {
return false
}
return isImageResolution(modelType, size) || isVideoResolution(modelType, size)
}
func (resolutionNormalizeProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
size := stringFromAny(params["size"])
if stringFromAny(params["resolution"]) == "" && (isImageResolution(modelType, size) || isVideoResolution(modelType, size)) {
_, capabilityValue := capabilityEvidence(context.modelCapability, modelType, "output_resolutions")
params["resolution"] = size
context.resolution = size
context.recordChange(
"ResolutionNormalizeProcessor",
"set",
"resolution",
nil,
size,
"size 使用分辨率格式,归一到 resolution 供后续能力校验和计费使用。",
capabilityPath(modelType, "output_resolutions"),
capabilityValue,
)
}
return true
}
type aspectRatioProcessor struct{}
func (aspectRatioProcessor) Name() string { return "AspectRatioProcessor" }
func (aspectRatioProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
return modelType != "text_generate" && (stringFromAny(params["aspect_ratio"]) != "" || stringFromAny(params["size"]) != "")
}
func (aspectRatioProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
capability := capabilityForType(context.modelCapability, modelType)
if capability == nil {
return true
}
aspectRatio := stringFromAny(params["aspect_ratio"])
if isEmptyParamString(aspectRatio) {
before := params["aspect_ratio"]
delete(params, "aspect_ratio")
context.aspectRatio = ""
context.recordChange(
"AspectRatioProcessor",
"remove",
"aspect_ratio",
before,
nil,
"aspect_ratio 是空值字符串,不能作为有效比例传给上游。",
"",
nil,
)
return true
}
resolution := firstNonEmptyString(stringFromAny(params["resolution"]), context.resolution)
if resolution == "" {
if values := stringListFromAny(capability["output_resolutions"]); len(values) > 0 {
resolution = values[0]
} else if size := stringFromAny(params["size"]); strings.HasSuffix(size, "K") || strings.HasSuffix(size, "p") {
resolution = size
}
}
allowed := aspectRatioAllowed(capability["aspect_ratio_allowed"], resolution)
if allowed != nil && len(allowed) == 1 && allowed[0] == "adaptive" {
before := params["aspect_ratio"]
params["aspect_ratio"] = "adaptive"
context.aspectRatio = "adaptive"
if before != "adaptive" {
context.recordChange(
"AspectRatioProcessor",
"adjust",
"aspect_ratio",
before,
"adaptive",
"模型当前分辨率只允许 adaptive 宽高比。",
capabilityPath(modelType, "aspect_ratio_allowed"),
capability["aspect_ratio_allowed"],
)
}
return true
}
if allowed != nil && len(allowed) == 0 {
before := params["aspect_ratio"]
delete(params, "aspect_ratio")
context.aspectRatio = ""
context.recordChange(
"AspectRatioProcessor",
"remove",
"aspect_ratio",
before,
nil,
"模型能力配置不允许传入任何 aspect_ratio。",
capabilityPath(modelType, "aspect_ratio_allowed"),
capability["aspect_ratio_allowed"],
)
return true
}
if aspectRatio == "" {
return true
}
if allowed == nil && validAspectRatio(aspectRatio) {
params["aspect_ratio"] = aspectRatio
context.aspectRatio = aspectRatio
return true
}
processed, ok := validateAndAdjustAspectRatio(aspectRatio, capability, allowed)
if !ok {
before := params["aspect_ratio"]
delete(params, "aspect_ratio")
context.aspectRatio = ""
context.recordChange(
"AspectRatioProcessor",
"remove",
"aspect_ratio",
before,
nil,
"传入的 aspect_ratio 不在模型允许范围内,且没有可用替代值。",
capabilityPath(modelType, "aspect_ratio_allowed"),
capability["aspect_ratio_allowed"],
)
return true
}
if processed != "" {
before := params["aspect_ratio"]
params["aspect_ratio"] = processed
context.aspectRatio = processed
if before != processed {
path := capabilityPath(modelType, "aspect_ratio_allowed")
value := capability["aspect_ratio_allowed"]
if ratioRange, ok := numberPair(capability["aspect_ratio_range"]); ok {
ratio, valid := aspectRatioNumber(aspectRatio)
if !valid || ratio < ratioRange[0] || ratio > ratioRange[1] {
path = capabilityPath(modelType, "aspect_ratio_range")
value = capability["aspect_ratio_range"]
}
}
context.recordChange(
"AspectRatioProcessor",
"adjust",
"aspect_ratio",
before,
processed,
"传入的 aspect_ratio 不符合模型能力配置,已调整为允许值。",
path,
value,
)
}
}
return true
}
type inputAudioProcessor struct{}
func (inputAudioProcessor) Name() string { return "InputAudioProcessor" }
func (inputAudioProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
if !isVideoModelType(modelType) {
return false
}
content := contentItems(params["content"])
for _, item := range content {
if isAudioContent(item) {
return true
}
}
return false
}
func (inputAudioProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
content := contentItems(params["content"])
if len(content) == 0 {
return true
}
supportsInputAudio := false
if len(context.modelCapability) > 0 {
if isOmniVideoLike(context) {
supportsInputAudio = supportsOmniAudioReference(context)
} else if capability := capabilityForType(context.modelCapability, modelType); capability != nil {
supportsInputAudio = boolFromAny(capability["input_audio"])
}
}
if supportsInputAudio {
return true
}
next := make([]map[string]any, 0, len(content))
for index, item := range content {
if isAudioContent(item) {
path, value := audioInputCapabilityEvidence(context, modelType)
context.recordChange(
"InputAudioProcessor",
"remove",
fmt.Sprintf("content[%d]", index),
item,
nil,
"模型能力未开启输入音频,已移除 audio_url。",
path,
value,
)
continue
}
next = append(next, item)
}
params["content"] = mapsToAnySlice(next)
path, value := audioInputCapabilityEvidence(context, modelType)
deleteFieldsWithLog(params, context, "InputAudioProcessor", []string{"audio_url", "audioUrl", "reference_audio", "referenceAudio"}, "模型能力未开启输入音频,已移除音频参考快捷字段。", path, value)
return true
}
type durationProcessor struct{}
func (durationProcessor) Name() string { return "DurationProcessor" }
func (durationProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
return isVideoModelType(modelType) && params["duration"] != nil
}
func (durationProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
capability := capabilityForType(context.modelCapability, modelType)
if capability == nil {
return true
}
duration := floatFromAny(params["duration"])
if duration <= 0 {
return true
}
resolution := firstNonEmptyString(stringFromAny(params["resolution"]), context.resolution)
modeKey := videoModeKey(params)
if options := scopedNumberList(capability["duration_options"], resolution, modeKey); len(options) > 0 {
normalized := nextAllowedNumber(duration, options)
params["duration"] = normalized
syncDurationSeconds(params)
if normalized != duration {
context.recordChange(
"DurationProcessor",
"adjust",
"duration",
duration,
normalized,
"duration 不在模型固定时长选项内,已向上调整为允许值。",
capabilityPath(modelType, "duration_options"),
capability["duration_options"],
)
}
return true
}
if minValue, maxValue, ok := scopedRange(capability["duration_range"], resolution, modeKey); ok {
step := durationStep(capability["duration_step"], resolution, modeKey)
normalized := normalizeDurationByRange(duration, minValue, maxValue, step)
params["duration"] = normalized
syncDurationSeconds(params)
if normalized != duration {
context.recordChange(
"DurationProcessor",
"adjust",
"duration",
duration,
normalized,
"duration 超出模型时长范围或步进配置,已按能力配置归一。",
capabilityPath(modelType, "duration_range"),
map[string]any{
"duration_range": capability["duration_range"],
"duration_step": capability["duration_step"],
},
)
}
return true
}
step := durationStep(capability["duration_step"], resolution, modeKey)
normalized := normalizeDurationByStep(duration, step)
params["duration"] = normalized
syncDurationSeconds(params)
if normalized != duration {
context.recordChange(
"DurationProcessor",
"adjust",
"duration",
duration,
normalized,
"duration 不符合模型时长步进,已按步进向上归一。",
capabilityPath(modelType, "duration_step"),
capability["duration_step"],
)
}
return true
}
type audioProcessor struct{}
func (audioProcessor) Name() string { return "AudioProcessor" }
func (audioProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
return isVideoModelType(modelType) && (params["audio"] != nil || params["output_audio"] != nil)
}
func (audioProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
capability := capabilityForType(context.modelCapability, modelType)
if capability == nil || !boolFromAny(capability["output_audio"]) {
for _, key := range []string{"audio", "output_audio"} {
if before, ok := params[key]; ok {
delete(params, key)
context.recordChange(
"AudioProcessor",
"remove",
key,
before,
nil,
"模型能力未开启输出音频,已移除音频输出参数。",
capabilityPath(modelType, "output_audio"),
capabilityValue(context.modelCapability, modelType, "output_audio"),
)
}
}
}
return true
}
type imageCountProcessor struct{}
func (imageCountProcessor) Name() string { return "ImageCountProcessor" }
func (imageCountProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
return modelType == "image_generate" || modelType == "image_edit"
}
func (imageCountProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
capability := capabilityForType(context.modelCapability, modelType)
if capability == nil || !boolFromAny(capability["output_multiple_images"]) {
return true
}
maxCount := int(math.Round(floatFromAny(capability["output_max_images_count"])))
if maxCount <= 0 {
return true
}
count := int(math.Round(floatFromAny(params["n"])))
if count <= 0 {
count = int(math.Round(floatFromAny(params["batch_size"])))
}
if count <= 0 {
count = 1
}
if count > maxCount {
before := count
count = maxCount
context.recordChange(
"ImageCountProcessor",
"adjust",
"n",
before,
count,
"请求图片数量超过模型输出上限,已按 output_max_images_count 截断。",
capabilityPath(modelType, "output_max_images_count"),
capability["output_max_images_count"],
)
}
params["n"] = count
return true
}