easyai-ai-gateway/apps/api/internal/runner/param_processor_media.go

804 lines
24 KiB
Go

package runner
import (
"fmt"
"math"
"strings"
)
type resolutionNormalizeProcessor struct{}
func (resolutionNormalizeProcessor) Name() string { return "ResolutionNormalizeProcessor" }
func (resolutionNormalizeProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
if stringFromAny(params["resolution"]) != "" {
return false
}
size := stringFromAny(params["size"])
if size == "" {
return false
}
return isImageResolution(modelType, size) || isVideoResolution(modelType, size)
}
func (resolutionNormalizeProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
size := stringFromAny(params["size"])
if stringFromAny(params["resolution"]) == "" && (isImageResolution(modelType, size) || isVideoResolution(modelType, size)) {
_, capabilityValue := capabilityEvidence(context.modelCapability, modelType, "output_resolutions")
params["resolution"] = size
context.resolution = size
context.recordChange(
"ResolutionNormalizeProcessor",
"set",
"resolution",
nil,
size,
"size 使用分辨率格式,归一到 resolution 供后续能力校验和计费使用。",
capabilityPath(modelType, "output_resolutions"),
capabilityValue,
)
}
return true
}
type aspectRatioProcessor struct{}
func (aspectRatioProcessor) Name() string { return "AspectRatioProcessor" }
func (aspectRatioProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
return modelType != "text_generate" && (stringFromAny(params["aspect_ratio"]) != "" || stringFromAny(params["size"]) != "")
}
func (aspectRatioProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
capability := capabilityForType(context.modelCapability, modelType)
if capability == nil {
return true
}
aspectRatio := stringFromAny(params["aspect_ratio"])
if isEmptyParamString(aspectRatio) {
before := params["aspect_ratio"]
delete(params, "aspect_ratio")
context.aspectRatio = ""
context.recordChange(
"AspectRatioProcessor",
"remove",
"aspect_ratio",
before,
nil,
"aspect_ratio 是空值字符串,不能作为有效比例传给上游。",
"",
nil,
)
return true
}
resolution := firstNonEmptyString(stringFromAny(params["resolution"]), context.resolution)
if resolution == "" {
if values := stringListFromAny(capability["output_resolutions"]); len(values) > 0 {
resolution = values[0]
} else if size := stringFromAny(params["size"]); strings.HasSuffix(size, "K") || strings.HasSuffix(size, "p") {
resolution = size
}
}
allowed := aspectRatioAllowed(capability["aspect_ratio_allowed"], resolution)
if allowed != nil && len(allowed) == 1 && allowed[0] == "adaptive" {
before := params["aspect_ratio"]
params["aspect_ratio"] = "adaptive"
context.aspectRatio = "adaptive"
if before != "adaptive" {
context.recordChange(
"AspectRatioProcessor",
"adjust",
"aspect_ratio",
before,
"adaptive",
"模型当前分辨率只允许 adaptive 宽高比。",
capabilityPath(modelType, "aspect_ratio_allowed"),
capability["aspect_ratio_allowed"],
)
}
return true
}
if allowed != nil && len(allowed) == 0 {
before := params["aspect_ratio"]
delete(params, "aspect_ratio")
context.aspectRatio = ""
context.recordChange(
"AspectRatioProcessor",
"remove",
"aspect_ratio",
before,
nil,
"模型能力配置不允许传入任何 aspect_ratio。",
capabilityPath(modelType, "aspect_ratio_allowed"),
capability["aspect_ratio_allowed"],
)
return true
}
if aspectRatio == "" {
return true
}
if allowed == nil && validAspectRatio(aspectRatio) {
params["aspect_ratio"] = aspectRatio
context.aspectRatio = aspectRatio
return true
}
processed, ok := validateAndAdjustAspectRatio(aspectRatio, capability, allowed)
if !ok {
before := params["aspect_ratio"]
delete(params, "aspect_ratio")
context.aspectRatio = ""
context.recordChange(
"AspectRatioProcessor",
"remove",
"aspect_ratio",
before,
nil,
"传入的 aspect_ratio 不在模型允许范围内,且没有可用替代值。",
capabilityPath(modelType, "aspect_ratio_allowed"),
capability["aspect_ratio_allowed"],
)
return true
}
if processed != "" {
before := params["aspect_ratio"]
params["aspect_ratio"] = processed
context.aspectRatio = processed
if before != processed {
path := capabilityPath(modelType, "aspect_ratio_allowed")
value := capability["aspect_ratio_allowed"]
if ratioRange, ok := numberPair(capability["aspect_ratio_range"]); ok {
ratio, valid := aspectRatioNumber(aspectRatio)
if !valid || ratio < ratioRange[0] || ratio > ratioRange[1] {
path = capabilityPath(modelType, "aspect_ratio_range")
value = capability["aspect_ratio_range"]
}
}
context.recordChange(
"AspectRatioProcessor",
"adjust",
"aspect_ratio",
before,
processed,
"传入的 aspect_ratio 不符合模型能力配置,已调整为允许值。",
path,
value,
)
}
}
return true
}
type imageSizeProcessor struct{}
func (imageSizeProcessor) Name() string { return "ImageSizeProcessor" }
func (imageSizeProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
if modelType != "image_generate" && modelType != "image_edit" {
return false
}
if _, _, ok := imageDimensionsFromParams(params); !ok {
return false
}
capability := capabilityForType(context.modelCapability, modelType)
return capability != nil && imageSizeCapabilityConfigured(capability)
}
func (imageSizeProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
capability := capabilityForType(context.modelCapability, modelType)
if capability == nil {
return true
}
width, height, ok := imageDimensionsFromParams(params)
if !ok {
return true
}
before := map[string]any{}
for _, key := range []string{"width", "height", "size", "resolution"} {
if value, exists := params[key]; exists {
before[key] = cloneAny(value)
}
}
width, height = constrainImageDimensions(width, height, capability)
params["width"] = width
params["height"] = height
if stringFromAny(params["aspect_ratio"]) == "" {
aspectRatio := aspectRatioFromDimensions(width, height)
allowed := aspectRatioAllowed(capability["aspect_ratio_allowed"], firstNonEmptyString(stringFromAny(params["resolution"]), context.resolution))
if processed, ok := validateAndAdjustAspectRatio(aspectRatio, capability, allowed); ok && processed != "" {
params["aspect_ratio"] = processed
context.aspectRatio = processed
}
}
resolution := normalizeImageResolutionForCapability(firstNonEmptyString(stringFromAny(params["resolution"]), context.resolution), width, height, capability)
if resolution != "" {
params["resolution"] = resolution
context.resolution = resolution
}
if stringFromAny(capability["size_param_format"]) == "resolution" && resolution != "" {
params["size"] = resolution
} else {
params["size"] = fmt.Sprintf("%dx%d", width, height)
}
after := map[string]any{
"width": params["width"],
"height": params["height"],
"size": params["size"],
}
if value := stringFromAny(params["resolution"]); value != "" {
after["resolution"] = value
}
if !imageSizeMapsEqual(before, after) {
path, value := imageSizeConstraintEvidence(modelType, capability)
context.recordChange(
"ImageSizeProcessor",
"adjust",
"size",
before,
after,
"图像宽高不符合模型尺寸限制,已按模型能力调整 width、height 和 size。",
path,
value,
)
}
return true
}
func imageDimensionsFromParams(params map[string]any) (int, int, bool) {
width := positiveIntegerFromAny(params["width"])
height := positiveIntegerFromAny(params["height"])
if width > 0 && height > 0 {
return width, height, true
}
if width, height, ok := parsePixelSizeString(stringFromAny(params["size"])); ok {
return width, height, true
}
return parsePixelSizeString(stringFromAny(params["resolution"]))
}
func imageSizeCapabilityConfigured(capability map[string]any) bool {
if capability == nil {
return false
}
if _, _, ok := imageOutputSizeRange(capability); ok {
return true
}
if _, _, ok := numberRangeFromAny(capability["width_height_range"]); ok {
return true
}
if _, _, ok := numberRangeFromAny(capability["aspect_ratio_range"]); ok {
return true
}
return positiveIntegerFromAny(capability["width_height_multiple"]) > 1
}
func constrainImageDimensions(width int, height int, capability map[string]any) (int, int) {
if width <= 0 || height <= 0 {
return width, height
}
result := imageDimensions{Width: width, Height: height}
for i := 0; i < 4; i++ {
before := result
applyImageOutputSizeRange(&result, capability)
applyImageWidthHeightRange(&result, capability)
applyImageAspectRatioRange(&result, capability)
applyImageWidthHeightMultiple(&result, capability)
if result == before {
break
}
}
return result.Width, result.Height
}
type imageDimensions struct {
Width int
Height int
}
func applyImageOutputSizeRange(result *imageDimensions, capability map[string]any) {
minValue, maxValue, ok := imageOutputSizeRange(capability)
if !ok || result.Width <= 0 || result.Height <= 0 {
return
}
pixels := float64(result.Width * result.Height)
if minValue > 0 && pixels < minValue {
scaleImageDimensions(result, math.Sqrt(minValue/pixels), true)
return
}
if maxValue > 0 && pixels > maxValue {
scaleImageDimensions(result, math.Sqrt(maxValue/pixels), false)
}
}
func applyImageWidthHeightRange(result *imageDimensions, capability map[string]any) {
minValue, maxValue, ok := numberRangeFromAny(capability["width_height_range"])
if !ok || result.Width <= 0 || result.Height <= 0 {
return
}
maxSide := float64(max(result.Width, result.Height))
minSide := float64(min(result.Width, result.Height))
if maxValue > 0 && maxSide > maxValue {
scaleImageDimensions(result, maxValue/maxSide, false)
return
}
if minValue > 0 && minSide < minValue {
scaleImageDimensions(result, minValue/minSide, true)
}
}
func applyImageAspectRatioRange(result *imageDimensions, capability map[string]any) {
minValue, maxValue, ok := numberRangeFromAny(capability["aspect_ratio_range"])
if !ok || result.Width <= 0 || result.Height <= 0 {
return
}
ratio := float64(result.Width) / float64(result.Height)
if maxValue > 0 && ratio > maxValue {
result.Height = max(1, int(math.Ceil(float64(result.Width)/maxValue)))
return
}
if minValue > 0 && ratio < minValue {
result.Width = max(1, int(math.Ceil(float64(result.Height)*minValue)))
}
}
func applyImageWidthHeightMultiple(result *imageDimensions, capability map[string]any) {
multiple := positiveIntegerFromAny(capability["width_height_multiple"])
if multiple <= 1 || result.Width <= 0 || result.Height <= 0 {
return
}
result.Width = roundImageDimensionToMultiple(result.Width, multiple)
result.Height = roundImageDimensionToMultiple(result.Height, multiple)
}
func imageOutputSizeRange(capability map[string]any) (float64, float64, bool) {
for _, key := range []string{"output_size_range", "outputSizeRange", "size_range", "sizeRange"} {
if minValue, maxValue, ok := numberRangeFromAny(capability[key]); ok {
return minValue, maxValue, true
}
}
minValue := firstPositiveNumber(capability, []string{"output_min_size", "outputMinSize", "min_output_size", "minOutputSize", "min_size", "minSize", "output_min_pixels", "outputMinPixels", "min_pixels", "minPixels"})
maxValue := firstPositiveNumber(capability, []string{"output_max_size", "outputMaxSize", "max_output_size", "maxOutputSize", "max_size", "maxSize", "output_max_pixels", "outputMaxPixels", "max_pixels", "maxPixels"})
return minValue, maxValue, minValue > 0 || maxValue > 0
}
func firstPositiveNumber(values map[string]any, keys []string) float64 {
for _, key := range keys {
if value := positiveFloatFromAny(values[key]); value > 0 {
return value
}
}
return 0
}
func numberRangeFromAny(value any) (float64, float64, bool) {
pair, ok := numberPair(value)
if !ok {
return 0, 0, false
}
minValue := pair[0]
maxValue := pair[1]
if minValue < 0 || maxValue < 0 {
return 0, 0, false
}
if minValue > maxValue && maxValue > 0 {
minValue, maxValue = maxValue, minValue
}
return minValue, maxValue, minValue > 0 || maxValue > 0
}
func scaleImageDimensions(result *imageDimensions, scale float64, roundUp bool) {
if scale <= 0 || math.IsNaN(scale) || math.IsInf(scale, 0) {
return
}
round := math.Floor
if roundUp {
round = math.Ceil
}
result.Width = max(1, int(round(float64(result.Width)*scale)))
result.Height = max(1, int(round(float64(result.Height)*scale)))
}
func roundImageDimensionToMultiple(value int, multiple int) int {
if multiple <= 1 {
return value
}
return max(multiple, int(math.Round(float64(value)/float64(multiple)))*multiple)
}
func normalizeImageResolutionForCapability(current string, width int, height int, capability map[string]any) string {
allowed := stringListFromAny(capability["output_resolutions"])
if containsString(allowed, current) {
return current
}
resolution := imageResolutionFromDimensions(width, height)
if len(allowed) == 0 {
return firstNonEmptyString(current, resolution)
}
if containsString(allowed, resolution) {
return resolution
}
return closestImageResolution(resolution, allowed)
}
func imageResolutionFromDimensions(width int, height int) string {
maxSide := max(width, height)
switch {
case maxSide <= 1920:
return "1K"
case maxSide <= 2560:
return "2K"
case maxSide <= 3328:
return "3K"
case maxSide <= 3840:
return "4K"
default:
return "8K"
}
}
func aspectRatioFromDimensions(width int, height int) string {
if width <= 0 || height <= 0 {
return ""
}
divisor := gcd(width, height)
return fmt.Sprintf("%d:%d", width/divisor, height/divisor)
}
func gcd(a int, b int) int {
if a < 0 {
a = -a
}
if b < 0 {
b = -b
}
for b != 0 {
a, b = b, a%b
}
if a == 0 {
return 1
}
return a
}
func closestImageResolution(target string, allowed []string) string {
order := []string{"1K", "2K", "3K", "4K", "8K"}
targetIndex := indexOfString(order, target)
valid := make([]string, 0, len(allowed))
for _, value := range allowed {
if indexOfString(order, value) >= 0 {
valid = append(valid, value)
}
}
if len(valid) == 0 {
if len(allowed) > 0 {
return allowed[0]
}
return target
}
if targetIndex < 0 {
return valid[0]
}
for index := targetIndex; index >= 0; index-- {
if containsString(valid, order[index]) {
return order[index]
}
}
for _, value := range order[targetIndex+1:] {
if containsString(valid, value) {
return value
}
}
return valid[0]
}
func indexOfString(values []string, target string) int {
for index, value := range values {
if value == target {
return index
}
}
return -1
}
func imageSizeConstraintEvidence(modelType string, capability map[string]any) (string, any) {
for _, key := range []string{"output_size_range", "outputSizeRange", "output_min_size", "outputMinSize", "min_pixels", "minPixels", "output_max_size", "outputMaxSize"} {
if value, ok := capability[key]; ok {
return capabilityPath(modelType, key), cloneAny(value)
}
}
for _, key := range []string{"width_height_range", "aspect_ratio_range", "width_height_multiple"} {
if value, ok := capability[key]; ok {
return capabilityPath(modelType, key), cloneAny(value)
}
}
return capabilityPath(modelType, ""), cloneMap(capability)
}
func imageSizeMapsEqual(before map[string]any, after map[string]any) bool {
for key, value := range after {
if before[key] != value {
return false
}
}
for key := range before {
if _, ok := after[key]; !ok {
return false
}
}
return true
}
type inputAudioProcessor struct{}
func (inputAudioProcessor) Name() string { return "InputAudioProcessor" }
func (inputAudioProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
if !isVideoModelType(modelType) {
return false
}
content := contentItems(params["content"])
for _, item := range content {
if isAudioContent(item) {
return true
}
}
return false
}
func (inputAudioProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
content := contentItems(params["content"])
if len(content) == 0 {
return true
}
supportsInputAudio := false
if len(context.modelCapability) > 0 {
if isOmniVideoLike(context) {
supportsInputAudio = supportsOmniAudioReference(context)
} else if capability := capabilityForType(context.modelCapability, modelType); capability != nil {
supportsInputAudio = boolFromAny(capability["input_audio"])
}
}
if supportsInputAudio {
return true
}
next := make([]map[string]any, 0, len(content))
for index, item := range content {
if isAudioContent(item) {
path, value := audioInputCapabilityEvidence(context, modelType)
context.recordChange(
"InputAudioProcessor",
"remove",
fmt.Sprintf("content[%d]", index),
item,
nil,
"模型能力未开启输入音频,已移除 audio_url。",
path,
value,
)
continue
}
next = append(next, item)
}
params["content"] = mapsToAnySlice(next)
path, value := audioInputCapabilityEvidence(context, modelType)
deleteFieldsWithLog(params, context, "InputAudioProcessor", []string{"audio_url", "audioUrl", "reference_audio", "referenceAudio"}, "模型能力未开启输入音频,已移除音频参考快捷字段。", path, value)
return true
}
type durationProcessor struct{}
func (durationProcessor) Name() string { return "DurationProcessor" }
func (durationProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
return isVideoModelType(modelType) && params["duration"] != nil
}
func (durationProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
capability := capabilityForType(context.modelCapability, modelType)
if capability == nil {
return true
}
duration := floatFromAny(params["duration"])
if duration <= 0 {
return true
}
resolution := firstNonEmptyString(stringFromAny(params["resolution"]), context.resolution)
modeKey := videoModeKey(params)
if options := scopedNumberList(capability["duration_options"], resolution, modeKey); len(options) > 0 {
normalized := nextAllowedNumber(duration, options)
params["duration"] = normalized
syncDurationSeconds(params)
if normalized != duration {
context.recordChange(
"DurationProcessor",
"adjust",
"duration",
duration,
normalized,
"duration 不在模型固定时长选项内,已向上调整为允许值。",
capabilityPath(modelType, "duration_options"),
capability["duration_options"],
)
}
return true
}
if minValue, maxValue, ok := scopedRange(capability["duration_range"], resolution, modeKey); ok {
step := durationStep(capability["duration_step"], resolution, modeKey)
normalized := normalizeDurationByRange(duration, minValue, maxValue, step)
params["duration"] = normalized
syncDurationSeconds(params)
if normalized != duration {
context.recordChange(
"DurationProcessor",
"adjust",
"duration",
duration,
normalized,
"duration 超出模型时长范围或步进配置,已按能力配置归一。",
capabilityPath(modelType, "duration_range"),
map[string]any{
"duration_range": capability["duration_range"],
"duration_step": capability["duration_step"],
},
)
}
return true
}
step := durationStep(capability["duration_step"], resolution, modeKey)
normalized := normalizeDurationByStep(duration, step)
params["duration"] = normalized
syncDurationSeconds(params)
if normalized != duration {
context.recordChange(
"DurationProcessor",
"adjust",
"duration",
duration,
normalized,
"duration 不符合模型时长步进,已按步进向上归一。",
capabilityPath(modelType, "duration_step"),
capability["duration_step"],
)
}
return true
}
type audioProcessor struct{}
func (audioProcessor) Name() string { return "AudioProcessor" }
func (audioProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
return isVideoModelType(modelType) && (params["audio"] != nil || params["output_audio"] != nil)
}
func (audioProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
capability := capabilityForType(context.modelCapability, modelType)
if capability == nil || !boolFromAny(capability["output_audio"]) {
for _, key := range []string{"audio", "output_audio"} {
if before, ok := params[key]; ok {
delete(params, key)
context.recordChange(
"AudioProcessor",
"remove",
key,
before,
nil,
"模型能力未开启输出音频,已移除音频输出参数。",
capabilityPath(modelType, "output_audio"),
capabilityValue(context.modelCapability, modelType, "output_audio"),
)
}
}
}
return true
}
type imageCountProcessor struct{}
func (imageCountProcessor) Name() string { return "ImageCountProcessor" }
func (imageCountProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
return modelType == "image_generate" || modelType == "image_edit"
}
func (imageCountProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
capability := capabilityForType(context.modelCapability, modelType)
if capability == nil || !boolFromAny(capability["output_multiple_images"]) {
return true
}
maxCount := int(math.Round(floatFromAny(capability["output_max_images_count"])))
if maxCount <= 0 {
return true
}
count := int(math.Round(floatFromAny(params["n"])))
if count <= 0 {
count = int(math.Round(floatFromAny(params["batch_size"])))
}
if count <= 0 {
count = 1
}
if count > maxCount {
before := count
count = maxCount
context.recordChange(
"ImageCountProcessor",
"adjust",
"n",
before,
count,
"请求图片数量超过模型输出上限,已按 output_max_images_count 截断。",
capabilityPath(modelType, "output_max_images_count"),
capability["output_max_images_count"],
)
}
params["n"] = count
return true
}
type imageQualityProcessor struct{}
func (imageQualityProcessor) Name() string { return "ImageQualityProcessor" }
var openAICompatibleImageQualities = map[string]struct{}{
"low": {},
"medium": {},
"high": {},
"auto": {},
}
func (imageQualityProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
if modelType != "image_generate" && modelType != "image_edit" {
return false
}
_, ok := params["quality"]
return ok
}
func (imageQualityProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
capability := capabilityForType(context.modelCapability, modelType)
quality := stringFromAny(params["quality"])
if supportsImageQualityControl(capability) && isOpenAICompatibleImageQuality(quality) {
return true
}
before := params["quality"]
delete(params, "quality")
context.recordChange(
"ImageQualityProcessor",
"remove",
"quality",
before,
nil,
"模型能力未开启生成质量控制,已移除 quality 参数。",
capabilityPath(modelType, "support_quality_control"),
capabilityValue(context.modelCapability, modelType, "support_quality_control"),
)
return true
}
func supportsImageQualityControl(capability map[string]any) bool {
if capability == nil {
return false
}
for _, key := range []string{"support_quality_control", "supportQualityControl", "quality_control", "qualityControl", "quality"} {
if boolFromAny(capability[key]) {
return true
}
}
return false
}
func isOpenAICompatibleImageQuality(value string) bool {
if value == "" {
return false
}
_, ok := openAICompatibleImageQualities[value]
return ok
}