easyai-ai-gateway/apps/api/internal/runner/param_processor_media.go

package runner

import (
	"fmt"
	"math"
	"strings"
)

type resolutionNormalizeProcessor struct{}

func (resolutionNormalizeProcessor) Name() string { return "ResolutionNormalizeProcessor" }

func (resolutionNormalizeProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
	if stringFromAny(params["resolution"]) != "" {
		return false
	}
	size := stringFromAny(params["size"])
	if size == "" {
		return false
	}
	return isImageResolution(modelType, size) || isVideoResolution(modelType, size)
}

func (resolutionNormalizeProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
	size := stringFromAny(params["size"])
	if stringFromAny(params["resolution"]) == "" && (isImageResolution(modelType, size) || isVideoResolution(modelType, size)) {
		_, capabilityValue := capabilityEvidence(context.modelCapability, modelType, "output_resolutions")
		params["resolution"] = size
		context.resolution = size
		context.recordChange(
			"ResolutionNormalizeProcessor",
			"set",
			"resolution",
			nil,
			size,
			"size 使用分辨率格式，归一到 resolution 供后续能力校验和计费使用。",
			capabilityPath(modelType, "output_resolutions"),
			capabilityValue,
		)
	}
	return true
}

type aspectRatioProcessor struct{}

func (aspectRatioProcessor) Name() string { return "AspectRatioProcessor" }

func (aspectRatioProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
	return modelType != "text_generate" && (stringFromAny(params["aspect_ratio"]) != "" || stringFromAny(params["size"]) != "")
}

func (aspectRatioProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
	capability := capabilityForType(context.modelCapability, modelType)
	if capability == nil {
		return true
	}

	aspectRatio := stringFromAny(params["aspect_ratio"])
	if isEmptyParamString(aspectRatio) {
		before := params["aspect_ratio"]
		delete(params, "aspect_ratio")
		context.aspectRatio = ""
		context.recordChange(
			"AspectRatioProcessor",
			"remove",
			"aspect_ratio",
			before,
			nil,
			"aspect_ratio 是空值字符串，不能作为有效比例传给上游。",
			"",
			nil,
		)
		return true
	}

	resolution := firstNonEmptyString(stringFromAny(params["resolution"]), context.resolution)
	if resolution == "" {
		if values := stringListFromAny(capability["output_resolutions"]); len(values) > 0 {
			resolution = values[0]
		} else if size := stringFromAny(params["size"]); strings.HasSuffix(size, "K") || strings.HasSuffix(size, "p") {
			resolution = size
		}
	}

	allowed := aspectRatioAllowed(capability["aspect_ratio_allowed"], resolution)
	if allowed != nil && len(allowed) == 1 && allowed[0] == "adaptive" {
		before := params["aspect_ratio"]
		params["aspect_ratio"] = "adaptive"
		context.aspectRatio = "adaptive"
		if before != "adaptive" {
			context.recordChange(
				"AspectRatioProcessor",
				"adjust",
				"aspect_ratio",
				before,
				"adaptive",
				"模型当前分辨率只允许 adaptive 宽高比。",
				capabilityPath(modelType, "aspect_ratio_allowed"),
				capability["aspect_ratio_allowed"],
			)
		}
		return true
	}
	if allowed != nil && len(allowed) == 0 {
		before := params["aspect_ratio"]
		delete(params, "aspect_ratio")
		context.aspectRatio = ""
		context.recordChange(
			"AspectRatioProcessor",
			"remove",
			"aspect_ratio",
			before,
			nil,
			"模型能力配置不允许传入任何 aspect_ratio。",
			capabilityPath(modelType, "aspect_ratio_allowed"),
			capability["aspect_ratio_allowed"],
		)
		return true
	}
	if aspectRatio == "" {
		return true
	}
	if allowed == nil && validAspectRatio(aspectRatio) {
		params["aspect_ratio"] = aspectRatio
		context.aspectRatio = aspectRatio
		return true
	}

	processed, ok := validateAndAdjustAspectRatio(aspectRatio, capability, allowed)
	if !ok {
		before := params["aspect_ratio"]
		delete(params, "aspect_ratio")
		context.aspectRatio = ""
		context.recordChange(
			"AspectRatioProcessor",
			"remove",
			"aspect_ratio",
			before,
			nil,
			"传入的 aspect_ratio 不在模型允许范围内，且没有可用替代值。",
			capabilityPath(modelType, "aspect_ratio_allowed"),
			capability["aspect_ratio_allowed"],
		)
		return true
	}
	if processed != "" {
		before := params["aspect_ratio"]
		params["aspect_ratio"] = processed
		context.aspectRatio = processed
		if before != processed {
			path := capabilityPath(modelType, "aspect_ratio_allowed")
			value := capability["aspect_ratio_allowed"]
			if ratioRange, ok := numberPair(capability["aspect_ratio_range"]); ok {
				ratio, valid := aspectRatioNumber(aspectRatio)
				if !valid || ratio < ratioRange[0] || ratio > ratioRange[1] {
					path = capabilityPath(modelType, "aspect_ratio_range")
					value = capability["aspect_ratio_range"]
				}
			}
			context.recordChange(
				"AspectRatioProcessor",
				"adjust",
				"aspect_ratio",
				before,
				processed,
				"传入的 aspect_ratio 不符合模型能力配置，已调整为允许值。",
				path,
				value,
			)
		}
	}
	return true
}

type imageSizeProcessor struct{}

func (imageSizeProcessor) Name() string { return "ImageSizeProcessor" }

func (imageSizeProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
	if modelType != "image_generate" && modelType != "image_edit" {
		return false
	}
	if _, _, ok := imageDimensionsFromParams(params); !ok {
		return false
	}
	capability := capabilityForType(context.modelCapability, modelType)
	return capability != nil && imageSizeCapabilityConfigured(capability)
}

func (imageSizeProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
	capability := capabilityForType(context.modelCapability, modelType)
	if capability == nil {
		return true
	}
	width, height, ok := imageDimensionsFromParams(params)
	if !ok {
		return true
	}

	before := map[string]any{}
	for _, key := range []string{"width", "height", "size", "resolution"} {
		if value, exists := params[key]; exists {
			before[key] = cloneAny(value)
		}
	}

	width, height = constrainImageDimensions(width, height, capability)
	params["width"] = width
	params["height"] = height
	if stringFromAny(params["aspect_ratio"]) == "" {
		aspectRatio := aspectRatioFromDimensions(width, height)
		allowed := aspectRatioAllowed(capability["aspect_ratio_allowed"], firstNonEmptyString(stringFromAny(params["resolution"]), context.resolution))
		if processed, ok := validateAndAdjustAspectRatio(aspectRatio, capability, allowed); ok && processed != "" {
			params["aspect_ratio"] = processed
			context.aspectRatio = processed
		}
	}
	resolution := normalizeImageResolutionForCapability(firstNonEmptyString(stringFromAny(params["resolution"]), context.resolution), width, height, capability)
	if resolution != "" {
		params["resolution"] = resolution
		context.resolution = resolution
	}
	if stringFromAny(capability["size_param_format"]) == "resolution" && resolution != "" {
		params["size"] = resolution
	} else {
		params["size"] = fmt.Sprintf("%dx%d", width, height)
	}

	after := map[string]any{
		"width":  params["width"],
		"height": params["height"],
		"size":   params["size"],
	}
	if value := stringFromAny(params["resolution"]); value != "" {
		after["resolution"] = value
	}
	if !imageSizeMapsEqual(before, after) {
		path, value := imageSizeConstraintEvidence(modelType, capability)
		context.recordChange(
			"ImageSizeProcessor",
			"adjust",
			"size",
			before,
			after,
			"图像宽高不符合模型尺寸限制，已按模型能力调整 width、height 和 size。",
			path,
			value,
		)
	}
	return true
}

func imageDimensionsFromParams(params map[string]any) (int, int, bool) {
	width := positiveIntegerFromAny(params["width"])
	height := positiveIntegerFromAny(params["height"])
	if width > 0 && height > 0 {
		return width, height, true
	}
	if width, height, ok := parsePixelSizeString(stringFromAny(params["size"])); ok {
		return width, height, true
	}
	return parsePixelSizeString(stringFromAny(params["resolution"]))
}

func imageSizeCapabilityConfigured(capability map[string]any) bool {
	if capability == nil {
		return false
	}
	if _, _, ok := imageOutputSizeRange(capability); ok {
		return true
	}
	if _, _, ok := numberRangeFromAny(capability["width_height_range"]); ok {
		return true
	}
	if _, _, ok := numberRangeFromAny(capability["aspect_ratio_range"]); ok {
		return true
	}
	return positiveIntegerFromAny(capability["width_height_multiple"]) > 1
}

func constrainImageDimensions(width int, height int, capability map[string]any) (int, int) {
	if width <= 0 || height <= 0 {
		return width, height
	}
	result := imageDimensions{Width: width, Height: height}
	for i := 0; i < 4; i++ {
		before := result
		applyImageOutputSizeRange(&result, capability)
		applyImageWidthHeightRange(&result, capability)
		applyImageAspectRatioRange(&result, capability)
		applyImageWidthHeightMultiple(&result, capability)
		if result == before {
			break
		}
	}
	return result.Width, result.Height
}

type imageDimensions struct {
	Width  int
	Height int
}

func applyImageOutputSizeRange(result *imageDimensions, capability map[string]any) {
	minValue, maxValue, ok := imageOutputSizeRange(capability)
	if !ok || result.Width <= 0 || result.Height <= 0 {
		return
	}
	pixels := float64(result.Width * result.Height)
	if minValue > 0 && pixels < minValue {
		scaleImageDimensions(result, math.Sqrt(minValue/pixels), true)
		return
	}
	if maxValue > 0 && pixels > maxValue {
		scaleImageDimensions(result, math.Sqrt(maxValue/pixels), false)
	}
}

func applyImageWidthHeightRange(result *imageDimensions, capability map[string]any) {
	minValue, maxValue, ok := numberRangeFromAny(capability["width_height_range"])
	if !ok || result.Width <= 0 || result.Height <= 0 {
		return
	}
	maxSide := float64(max(result.Width, result.Height))
	minSide := float64(min(result.Width, result.Height))
	if maxValue > 0 && maxSide > maxValue {
		scaleImageDimensions(result, maxValue/maxSide, false)
		return
	}
	if minValue > 0 && minSide < minValue {
		scaleImageDimensions(result, minValue/minSide, true)
	}
}

func applyImageAspectRatioRange(result *imageDimensions, capability map[string]any) {
	minValue, maxValue, ok := numberRangeFromAny(capability["aspect_ratio_range"])
	if !ok || result.Width <= 0 || result.Height <= 0 {
		return
	}
	ratio := float64(result.Width) / float64(result.Height)
	if maxValue > 0 && ratio > maxValue {
		result.Height = max(1, int(math.Ceil(float64(result.Width)/maxValue)))
		return
	}
	if minValue > 0 && ratio < minValue {
		result.Width = max(1, int(math.Ceil(float64(result.Height)*minValue)))
	}
}

func applyImageWidthHeightMultiple(result *imageDimensions, capability map[string]any) {
	multiple := positiveIntegerFromAny(capability["width_height_multiple"])
	if multiple <= 1 || result.Width <= 0 || result.Height <= 0 {
		return
	}
	result.Width = roundImageDimensionToMultiple(result.Width, multiple)
	result.Height = roundImageDimensionToMultiple(result.Height, multiple)
}

func imageOutputSizeRange(capability map[string]any) (float64, float64, bool) {
	for _, key := range []string{"output_size_range", "outputSizeRange", "size_range", "sizeRange"} {
		if minValue, maxValue, ok := numberRangeFromAny(capability[key]); ok {
			return minValue, maxValue, true
		}
	}
	minValue := firstPositiveNumber(capability, []string{"output_min_size", "outputMinSize", "min_output_size", "minOutputSize", "min_size", "minSize", "output_min_pixels", "outputMinPixels", "min_pixels", "minPixels"})
	maxValue := firstPositiveNumber(capability, []string{"output_max_size", "outputMaxSize", "max_output_size", "maxOutputSize", "max_size", "maxSize", "output_max_pixels", "outputMaxPixels", "max_pixels", "maxPixels"})
	return minValue, maxValue, minValue > 0 || maxValue > 0
}

func firstPositiveNumber(values map[string]any, keys []string) float64 {
	for _, key := range keys {
		if value := positiveFloatFromAny(values[key]); value > 0 {
			return value
		}
	}
	return 0
}

func numberRangeFromAny(value any) (float64, float64, bool) {
	pair, ok := numberPair(value)
	if !ok {
		return 0, 0, false
	}
	minValue := pair[0]
	maxValue := pair[1]
	if minValue < 0 || maxValue < 0 {
		return 0, 0, false
	}
	if minValue > maxValue && maxValue > 0 {
		minValue, maxValue = maxValue, minValue
	}
	return minValue, maxValue, minValue > 0 || maxValue > 0
}

func scaleImageDimensions(result *imageDimensions, scale float64, roundUp bool) {
	if scale <= 0 || math.IsNaN(scale) || math.IsInf(scale, 0) {
		return
	}
	round := math.Floor
	if roundUp {
		round = math.Ceil
	}
	result.Width = max(1, int(round(float64(result.Width)*scale)))
	result.Height = max(1, int(round(float64(result.Height)*scale)))
}

func roundImageDimensionToMultiple(value int, multiple int) int {
	if multiple <= 1 {
		return value
	}
	return max(multiple, int(math.Round(float64(value)/float64(multiple)))*multiple)
}

func normalizeImageResolutionForCapability(current string, width int, height int, capability map[string]any) string {
	allowed := stringListFromAny(capability["output_resolutions"])
	if containsString(allowed, current) {
		return current
	}
	resolution := imageResolutionFromDimensions(width, height)
	if len(allowed) == 0 {
		return firstNonEmptyString(current, resolution)
	}
	if containsString(allowed, resolution) {
		return resolution
	}
	return closestImageResolution(resolution, allowed)
}

func imageResolutionFromDimensions(width int, height int) string {
	maxSide := max(width, height)
	switch {
	case maxSide <= 1920:
		return "1K"
	case maxSide <= 2560:
		return "2K"
	case maxSide <= 3328:
		return "3K"
	case maxSide <= 3840:
		return "4K"
	default:
		return "8K"
	}
}

func aspectRatioFromDimensions(width int, height int) string {
	if width <= 0 || height <= 0 {
		return ""
	}
	divisor := gcd(width, height)
	return fmt.Sprintf("%d:%d", width/divisor, height/divisor)
}

func gcd(a int, b int) int {
	if a < 0 {
		a = -a
	}
	if b < 0 {
		b = -b
	}
	for b != 0 {
		a, b = b, a%b
	}
	if a == 0 {
		return 1
	}
	return a
}

func closestImageResolution(target string, allowed []string) string {
	order := []string{"1K", "2K", "3K", "4K", "8K"}
	targetIndex := indexOfString(order, target)
	valid := make([]string, 0, len(allowed))
	for _, value := range allowed {
		if indexOfString(order, value) >= 0 {
			valid = append(valid, value)
		}
	}
	if len(valid) == 0 {
		if len(allowed) > 0 {
			return allowed[0]
		}
		return target
	}
	if targetIndex < 0 {
		return valid[0]
	}
	for index := targetIndex; index >= 0; index-- {
		if containsString(valid, order[index]) {
			return order[index]
		}
	}
	for _, value := range order[targetIndex+1:] {
		if containsString(valid, value) {
			return value
		}
	}
	return valid[0]
}

func indexOfString(values []string, target string) int {
	for index, value := range values {
		if value == target {
			return index
		}
	}
	return -1
}

func imageSizeConstraintEvidence(modelType string, capability map[string]any) (string, any) {
	for _, key := range []string{"output_size_range", "outputSizeRange", "output_min_size", "outputMinSize", "min_pixels", "minPixels", "output_max_size", "outputMaxSize"} {
		if value, ok := capability[key]; ok {
			return capabilityPath(modelType, key), cloneAny(value)
		}
	}
	for _, key := range []string{"width_height_range", "aspect_ratio_range", "width_height_multiple"} {
		if value, ok := capability[key]; ok {
			return capabilityPath(modelType, key), cloneAny(value)
		}
	}
	return capabilityPath(modelType, ""), cloneMap(capability)
}

func imageSizeMapsEqual(before map[string]any, after map[string]any) bool {
	for key, value := range after {
		if before[key] != value {
			return false
		}
	}
	for key := range before {
		if _, ok := after[key]; !ok {
			return false
		}
	}
	return true
}

type inputAudioProcessor struct{}

func (inputAudioProcessor) Name() string { return "InputAudioProcessor" }

func (inputAudioProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
	if !isVideoModelType(modelType) {
		return false
	}
	content := contentItems(params["content"])
	for _, item := range content {
		if isAudioContent(item) {
			return true
		}
	}
	return false
}

func (inputAudioProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
	content := contentItems(params["content"])
	if len(content) == 0 {
		return true
	}
	supportsInputAudio := false
	if len(context.modelCapability) > 0 {
		if isOmniVideoLike(context) {
			supportsInputAudio = supportsOmniAudioReference(context)
		} else if capability := capabilityForType(context.modelCapability, modelType); capability != nil {
			supportsInputAudio = boolFromAny(capability["input_audio"])
		}
	}
	if supportsInputAudio {
		return true
	}
	next := make([]map[string]any, 0, len(content))
	for index, item := range content {
		if isAudioContent(item) {
			path, value := audioInputCapabilityEvidence(context, modelType)
			context.recordChange(
				"InputAudioProcessor",
				"remove",
				fmt.Sprintf("content[%d]", index),
				item,
				nil,
				"模型能力未开启输入音频，已移除 audio_url。",
				path,
				value,
			)
			continue
		}
		next = append(next, item)
	}
	params["content"] = mapsToAnySlice(next)
	path, value := audioInputCapabilityEvidence(context, modelType)
	deleteFieldsWithLog(params, context, "InputAudioProcessor", []string{"audio_url", "audioUrl", "reference_audio", "referenceAudio"}, "模型能力未开启输入音频，已移除音频参考快捷字段。", path, value)
	return true
}

type durationProcessor struct{}

func (durationProcessor) Name() string { return "DurationProcessor" }

func (durationProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
	return isVideoModelType(modelType) && params["duration"] != nil
}

func (durationProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
	capability := capabilityForType(context.modelCapability, modelType)
	if capability == nil {
		return true
	}
	duration := floatFromAny(params["duration"])
	if duration <= 0 {
		return true
	}
	resolution := firstNonEmptyString(stringFromAny(params["resolution"]), context.resolution)
	modeKey := videoModeKey(params)
	if options := scopedNumberList(capability["duration_options"], resolution, modeKey); len(options) > 0 {
		normalized := nextAllowedNumber(duration, options)
		params["duration"] = normalized
		syncDurationSeconds(params)
		if normalized != duration {
			context.recordChange(
				"DurationProcessor",
				"adjust",
				"duration",
				duration,
				normalized,
				"duration 不在模型固定时长选项内，已向上调整为允许值。",
				capabilityPath(modelType, "duration_options"),
				capability["duration_options"],
			)
		}
		return true
	}
	if minValue, maxValue, ok := scopedRange(capability["duration_range"], resolution, modeKey); ok {
		step := durationStep(capability["duration_step"], resolution, modeKey)
		normalized := normalizeDurationByRange(duration, minValue, maxValue, step)
		params["duration"] = normalized
		syncDurationSeconds(params)
		if normalized != duration {
			context.recordChange(
				"DurationProcessor",
				"adjust",
				"duration",
				duration,
				normalized,
				"duration 超出模型时长范围或步进配置，已按能力配置归一。",
				capabilityPath(modelType, "duration_range"),
				map[string]any{
					"duration_range": capability["duration_range"],
					"duration_step":  capability["duration_step"],
				},
			)
		}
		return true
	}
	step := durationStep(capability["duration_step"], resolution, modeKey)
	normalized := normalizeDurationByStep(duration, step)
	params["duration"] = normalized
	syncDurationSeconds(params)
	if normalized != duration {
		context.recordChange(
			"DurationProcessor",
			"adjust",
			"duration",
			duration,
			normalized,
			"duration 不符合模型时长步进，已按步进向上归一。",
			capabilityPath(modelType, "duration_step"),
			capability["duration_step"],
		)
	}
	return true
}

type audioProcessor struct{}

func (audioProcessor) Name() string { return "AudioProcessor" }

func (audioProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
	return isVideoModelType(modelType) && (params["audio"] != nil || params["output_audio"] != nil)
}

func (audioProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
	capability := capabilityForType(context.modelCapability, modelType)
	if capability == nil || !boolFromAny(capability["output_audio"]) {
		for _, key := range []string{"audio", "output_audio"} {
			if before, ok := params[key]; ok {
				delete(params, key)
				context.recordChange(
					"AudioProcessor",
					"remove",
					key,
					before,
					nil,
					"模型能力未开启输出音频，已移除音频输出参数。",
					capabilityPath(modelType, "output_audio"),
					capabilityValue(context.modelCapability, modelType, "output_audio"),
				)
			}
		}
	}
	return true
}

type imageCountProcessor struct{}

func (imageCountProcessor) Name() string { return "ImageCountProcessor" }

func (imageCountProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
	return modelType == "image_generate" || modelType == "image_edit"
}

func (imageCountProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
	capability := capabilityForType(context.modelCapability, modelType)
	if capability == nil || !boolFromAny(capability["output_multiple_images"]) {
		return true
	}
	maxCount := int(math.Round(floatFromAny(capability["output_max_images_count"])))
	if maxCount <= 0 {
		return true
	}
	count := int(math.Round(floatFromAny(params["n"])))
	if count <= 0 {
		count = int(math.Round(floatFromAny(params["batch_size"])))
	}
	if count <= 0 {
		count = 1
	}
	if count > maxCount {
		before := count
		count = maxCount
		context.recordChange(
			"ImageCountProcessor",
			"adjust",
			"n",
			before,
			count,
			"请求图片数量超过模型输出上限，已按 output_max_images_count 截断。",
			capabilityPath(modelType, "output_max_images_count"),
			capability["output_max_images_count"],
		)
	}
	params["n"] = count
	return true
}

type imageQualityProcessor struct{}

func (imageQualityProcessor) Name() string { return "ImageQualityProcessor" }

var openAICompatibleImageQualities = map[string]struct{}{
	"low":    {},
	"medium": {},
	"high":   {},
	"auto":   {},
}

func (imageQualityProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
	if modelType != "image_generate" && modelType != "image_edit" {
		return false
	}
	_, ok := params["quality"]
	return ok
}

func (imageQualityProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
	capability := capabilityForType(context.modelCapability, modelType)
	quality := stringFromAny(params["quality"])
	if supportsImageQualityControl(capability) && isOpenAICompatibleImageQuality(quality) {
		return true
	}

	before := params["quality"]
	delete(params, "quality")
	context.recordChange(
		"ImageQualityProcessor",
		"remove",
		"quality",
		before,
		nil,
		"模型能力未开启生成质量控制，已移除 quality 参数。",
		capabilityPath(modelType, "support_quality_control"),
		capabilityValue(context.modelCapability, modelType, "support_quality_control"),
	)
	return true
}

func supportsImageQualityControl(capability map[string]any) bool {
	if capability == nil {
		return false
	}
	for _, key := range []string{"support_quality_control", "supportQualityControl", "quality_control", "qualityControl", "quality"} {
		if boolFromAny(capability[key]) {
			return true
		}
	}
	return false
}

func isOpenAICompatibleImageQuality(value string) bool {
	if value == "" {
		return false
	}
	_, ok := openAICompatibleImageQualities[value]
	return ok
}