easyai-ai-gateway/apps/api/internal/runner/param_processor_media.go

package runner

import (
	"fmt"
	"math"
	"strings"
)

type resolutionNormalizeProcessor struct{}

func (resolutionNormalizeProcessor) Name() string { return "ResolutionNormalizeProcessor" }

func (resolutionNormalizeProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
	if stringFromAny(params["resolution"]) != "" {
		return false
	}
	size := stringFromAny(params["size"])
	if size == "" {
		return false
	}
	return isImageResolution(modelType, size) || isVideoResolution(modelType, size)
}

func (resolutionNormalizeProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
	size := stringFromAny(params["size"])
	if stringFromAny(params["resolution"]) == "" && (isImageResolution(modelType, size) || isVideoResolution(modelType, size)) {
		_, capabilityValue := capabilityEvidence(context.modelCapability, modelType, "output_resolutions")
		params["resolution"] = size
		context.resolution = size
		context.recordChange(
			"ResolutionNormalizeProcessor",
			"set",
			"resolution",
			nil,
			size,
			"size 使用分辨率格式，归一到 resolution 供后续能力校验和计费使用。",
			capabilityPath(modelType, "output_resolutions"),
			capabilityValue,
		)
	}
	return true
}

type aspectRatioProcessor struct{}

func (aspectRatioProcessor) Name() string { return "AspectRatioProcessor" }

func (aspectRatioProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
	return modelType != "text_generate" && (stringFromAny(params["aspect_ratio"]) != "" || stringFromAny(params["size"]) != "")
}

func (aspectRatioProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
	capability := capabilityForType(context.modelCapability, modelType)
	if capability == nil {
		return true
	}

	aspectRatio := stringFromAny(params["aspect_ratio"])
	if isEmptyParamString(aspectRatio) {
		before := params["aspect_ratio"]
		delete(params, "aspect_ratio")
		context.aspectRatio = ""
		context.recordChange(
			"AspectRatioProcessor",
			"remove",
			"aspect_ratio",
			before,
			nil,
			"aspect_ratio 是空值字符串，不能作为有效比例传给上游。",
			"",
			nil,
		)
		return true
	}

	resolution := firstNonEmptyString(stringFromAny(params["resolution"]), context.resolution)
	if resolution == "" {
		if values := stringListFromAny(capability["output_resolutions"]); len(values) > 0 {
			resolution = values[0]
		} else if size := stringFromAny(params["size"]); strings.HasSuffix(size, "K") || strings.HasSuffix(size, "p") {
			resolution = size
		}
	}

	allowed := aspectRatioAllowed(capability["aspect_ratio_allowed"], resolution)
	if allowed != nil && len(allowed) == 1 && allowed[0] == "adaptive" {
		before := params["aspect_ratio"]
		params["aspect_ratio"] = "adaptive"
		context.aspectRatio = "adaptive"
		if before != "adaptive" {
			context.recordChange(
				"AspectRatioProcessor",
				"adjust",
				"aspect_ratio",
				before,
				"adaptive",
				"模型当前分辨率只允许 adaptive 宽高比。",
				capabilityPath(modelType, "aspect_ratio_allowed"),
				capability["aspect_ratio_allowed"],
			)
		}
		return true
	}
	if allowed != nil && len(allowed) == 0 {
		before := params["aspect_ratio"]
		delete(params, "aspect_ratio")
		context.aspectRatio = ""
		context.recordChange(
			"AspectRatioProcessor",
			"remove",
			"aspect_ratio",
			before,
			nil,
			"模型能力配置不允许传入任何 aspect_ratio。",
			capabilityPath(modelType, "aspect_ratio_allowed"),
			capability["aspect_ratio_allowed"],
		)
		return true
	}
	if aspectRatio == "" {
		return true
	}
	if allowed == nil && validAspectRatio(aspectRatio) {
		params["aspect_ratio"] = aspectRatio
		context.aspectRatio = aspectRatio
		return true
	}

	processed, ok := validateAndAdjustAspectRatio(aspectRatio, capability, allowed)
	if !ok {
		before := params["aspect_ratio"]
		delete(params, "aspect_ratio")
		context.aspectRatio = ""
		context.recordChange(
			"AspectRatioProcessor",
			"remove",
			"aspect_ratio",
			before,
			nil,
			"传入的 aspect_ratio 不在模型允许范围内，且没有可用替代值。",
			capabilityPath(modelType, "aspect_ratio_allowed"),
			capability["aspect_ratio_allowed"],
		)
		return true
	}
	if processed != "" {
		before := params["aspect_ratio"]
		params["aspect_ratio"] = processed
		context.aspectRatio = processed
		if before != processed {
			path := capabilityPath(modelType, "aspect_ratio_allowed")
			value := capability["aspect_ratio_allowed"]
			if ratioRange, ok := numberPair(capability["aspect_ratio_range"]); ok {
				ratio, valid := aspectRatioNumber(aspectRatio)
				if !valid || ratio < ratioRange[0] || ratio > ratioRange[1] {
					path = capabilityPath(modelType, "aspect_ratio_range")
					value = capability["aspect_ratio_range"]
				}
			}
			context.recordChange(
				"AspectRatioProcessor",
				"adjust",
				"aspect_ratio",
				before,
				processed,
				"传入的 aspect_ratio 不符合模型能力配置，已调整为允许值。",
				path,
				value,
			)
		}
	}
	return true
}

type inputAudioProcessor struct{}

func (inputAudioProcessor) Name() string { return "InputAudioProcessor" }

func (inputAudioProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
	if !isVideoModelType(modelType) {
		return false
	}
	content := contentItems(params["content"])
	for _, item := range content {
		if isAudioContent(item) {
			return true
		}
	}
	return false
}

func (inputAudioProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
	content := contentItems(params["content"])
	if len(content) == 0 {
		return true
	}
	supportsInputAudio := false
	if len(context.modelCapability) > 0 {
		if isOmniVideoLike(context) {
			supportsInputAudio = supportsOmniAudioReference(context)
		} else if capability := capabilityForType(context.modelCapability, modelType); capability != nil {
			supportsInputAudio = boolFromAny(capability["input_audio"])
		}
	}
	if supportsInputAudio {
		return true
	}
	next := make([]map[string]any, 0, len(content))
	for index, item := range content {
		if isAudioContent(item) {
			path, value := audioInputCapabilityEvidence(context, modelType)
			context.recordChange(
				"InputAudioProcessor",
				"remove",
				fmt.Sprintf("content[%d]", index),
				item,
				nil,
				"模型能力未开启输入音频，已移除 audio_url。",
				path,
				value,
			)
			continue
		}
		next = append(next, item)
	}
	params["content"] = mapsToAnySlice(next)
	path, value := audioInputCapabilityEvidence(context, modelType)
	deleteFieldsWithLog(params, context, "InputAudioProcessor", []string{"audio_url", "audioUrl", "reference_audio", "referenceAudio"}, "模型能力未开启输入音频，已移除音频参考快捷字段。", path, value)
	return true
}

type durationProcessor struct{}

func (durationProcessor) Name() string { return "DurationProcessor" }

func (durationProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
	return isVideoModelType(modelType) && params["duration"] != nil
}

func (durationProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
	capability := capabilityForType(context.modelCapability, modelType)
	if capability == nil {
		return true
	}
	duration := floatFromAny(params["duration"])
	if duration <= 0 {
		return true
	}
	resolution := firstNonEmptyString(stringFromAny(params["resolution"]), context.resolution)
	modeKey := videoModeKey(params)
	if options := scopedNumberList(capability["duration_options"], resolution, modeKey); len(options) > 0 {
		normalized := nextAllowedNumber(duration, options)
		params["duration"] = normalized
		syncDurationSeconds(params)
		if normalized != duration {
			context.recordChange(
				"DurationProcessor",
				"adjust",
				"duration",
				duration,
				normalized,
				"duration 不在模型固定时长选项内，已向上调整为允许值。",
				capabilityPath(modelType, "duration_options"),
				capability["duration_options"],
			)
		}
		return true
	}
	if minValue, maxValue, ok := scopedRange(capability["duration_range"], resolution, modeKey); ok {
		step := durationStep(capability["duration_step"], resolution, modeKey)
		normalized := normalizeDurationByRange(duration, minValue, maxValue, step)
		params["duration"] = normalized
		syncDurationSeconds(params)
		if normalized != duration {
			context.recordChange(
				"DurationProcessor",
				"adjust",
				"duration",
				duration,
				normalized,
				"duration 超出模型时长范围或步进配置，已按能力配置归一。",
				capabilityPath(modelType, "duration_range"),
				map[string]any{
					"duration_range": capability["duration_range"],
					"duration_step":  capability["duration_step"],
				},
			)
		}
		return true
	}
	step := durationStep(capability["duration_step"], resolution, modeKey)
	normalized := normalizeDurationByStep(duration, step)
	params["duration"] = normalized
	syncDurationSeconds(params)
	if normalized != duration {
		context.recordChange(
			"DurationProcessor",
			"adjust",
			"duration",
			duration,
			normalized,
			"duration 不符合模型时长步进，已按步进向上归一。",
			capabilityPath(modelType, "duration_step"),
			capability["duration_step"],
		)
	}
	return true
}

type audioProcessor struct{}

func (audioProcessor) Name() string { return "AudioProcessor" }

func (audioProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
	return isVideoModelType(modelType) && (params["audio"] != nil || params["output_audio"] != nil)
}

func (audioProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
	capability := capabilityForType(context.modelCapability, modelType)
	if capability == nil || !boolFromAny(capability["output_audio"]) {
		for _, key := range []string{"audio", "output_audio"} {
			if before, ok := params[key]; ok {
				delete(params, key)
				context.recordChange(
					"AudioProcessor",
					"remove",
					key,
					before,
					nil,
					"模型能力未开启输出音频，已移除音频输出参数。",
					capabilityPath(modelType, "output_audio"),
					capabilityValue(context.modelCapability, modelType, "output_audio"),
				)
			}
		}
	}
	return true
}

type imageCountProcessor struct{}

func (imageCountProcessor) Name() string { return "ImageCountProcessor" }

func (imageCountProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
	return modelType == "image_generate" || modelType == "image_edit"
}

func (imageCountProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
	capability := capabilityForType(context.modelCapability, modelType)
	if capability == nil || !boolFromAny(capability["output_multiple_images"]) {
		return true
	}
	maxCount := int(math.Round(floatFromAny(capability["output_max_images_count"])))
	if maxCount <= 0 {
		return true
	}
	count := int(math.Round(floatFromAny(params["n"])))
	if count <= 0 {
		count = int(math.Round(floatFromAny(params["batch_size"])))
	}
	if count <= 0 {
		count = 1
	}
	if count > maxCount {
		before := count
		count = maxCount
		context.recordChange(
			"ImageCountProcessor",
			"adjust",
			"n",
			before,
			count,
			"请求图片数量超过模型输出上限，已按 output_max_images_count 截断。",
			capabilityPath(modelType, "output_max_images_count"),
			capability["output_max_images_count"],
		)
	}
	params["n"] = count
	return true
}