easyai-ai-gateway/apps/api/internal/runner/param_processor.go

package runner

import (
	"fmt"
	"math"
	"strconv"
	"strings"

	"github.com/easyai/easyai-ai-gateway/apps/api/internal/store"
)

type paramProcessContext struct {
	modelCapability map[string]any
	candidate       store.RuntimeModelCandidate
	log             *parameterPreprocessingLog
	aspectRatio     string
	resolution      string
}

type paramProcessor interface {
	Name() string
	ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool
	Process(params map[string]any, modelType string, context *paramProcessContext) bool
}

type ParamProcessorChain struct {
	processors []paramProcessor
}

type parameterPreprocessResult struct {
	Body map[string]any
	Log  parameterPreprocessingLog
}

type parameterPreprocessingLog struct {
	ModelType string                      `json:"modelType"`
	Input     map[string]any              `json:"actualInput"`
	Output    map[string]any              `json:"convertedOutput"`
	Changed   bool                        `json:"changed"`
	Changes   []parameterPreprocessChange `json:"changes"`
	Model     map[string]any              `json:"model,omitempty"`
}

type parameterPreprocessChange struct {
	Processor       string `json:"processor"`
	Action          string `json:"action"`
	Path            string `json:"path"`
	Before          any    `json:"before"`
	After           any    `json:"after"`
	Reason          string `json:"reason"`
	CapabilityPath  string `json:"capabilityPath,omitempty"`
	CapabilityValue any    `json:"capabilityValue,omitempty"`
}

func NewParamProcessorChain() ParamProcessorChain {
	return ParamProcessorChain{
		processors: []paramProcessor{
			resolutionNormalizeProcessor{},
			aspectRatioProcessor{},
			contentFilterProcessor{},
			inputAudioProcessor{},
			durationProcessor{},
			audioProcessor{},
			imageCountProcessor{},
		},
	}
}

func preprocessRequest(kind string, body map[string]any, candidate store.RuntimeModelCandidate) map[string]any {
	return preprocessRequestWithLog(kind, body, candidate).Body
}

func preprocessRequestWithLog(kind string, body map[string]any, candidate store.RuntimeModelCandidate) parameterPreprocessResult {
	params := cloneMap(body)
	modelType := strings.TrimSpace(candidate.ModelType)
	if modelType == "" {
		modelType = modelTypeFromKind(kind, params)
	}
	log := parameterPreprocessingLog{
		ModelType: modelType,
		Input:     cloneMap(params),
		Changes:   []parameterPreprocessChange{},
		Model: map[string]any{
			"modelName":         candidate.ModelName,
			"modelAlias":        candidate.ModelAlias,
			"providerModelName": candidate.ProviderModelName,
			"provider":          candidate.Provider,
			"platformId":        candidate.PlatformID,
			"platformModelId":   candidate.PlatformModelID,
		},
	}
	context := &paramProcessContext{
		modelCapability: effectiveModelCapability(candidate),
		candidate:       candidate,
		log:             &log,
	}
	if kind == "videos.generations" {
		ensureVideoContent(params, context)
	}
	chain := NewParamProcessorChain()
	processed := chain.Process(params, modelType, context)
	log.Output = cloneMap(processed)
	log.Changed = len(log.Changes) > 0
	return parameterPreprocessResult{Body: processed, Log: log}
}

func (chain ParamProcessorChain) Process(params map[string]any, modelType string, context *paramProcessContext) map[string]any {
	if params == nil {
		return map[string]any{}
	}
	for _, processor := range chain.processors {
		if !processor.ShouldProcess(params, modelType, context) {
			continue
		}
		if !processor.Process(params, modelType, context) {
			break
		}
	}
	return params
}

func (context *paramProcessContext) recordChange(processor string, action string, path string, before any, after any, reason string, capabilityPath string, capabilityValue any) {
	if context == nil || context.log == nil {
		return
	}
	context.log.Changes = append(context.log.Changes, parameterPreprocessChange{
		Processor:       processor,
		Action:          action,
		Path:            path,
		Before:          cloneAny(before),
		After:           cloneAny(after),
		Reason:          reason,
		CapabilityPath:  capabilityPath,
		CapabilityValue: cloneAny(capabilityValue),
	})
}

func parameterPreprocessingMetrics(log parameterPreprocessingLog) map[string]any {
	return map[string]any{
		"parameterPreprocessingSummary": parameterPreprocessingSummary(log),
	}
}

func parameterPreprocessingSummary(log parameterPreprocessingLog) map[string]any {
	summary := map[string]any{
		"modelType":   log.ModelType,
		"changed":     log.Changed,
		"changeCount": len(log.Changes),
	}
	if len(log.Changes) == 0 {
		return summary
	}
	actions := make([]string, 0)
	paths := make([]string, 0)
	capabilityPaths := make([]string, 0)
	for _, change := range log.Changes {
		appendUniqueString(&actions, change.Action)
		appendUniqueString(&paths, change.Path)
		appendUniqueString(&capabilityPaths, change.CapabilityPath)
	}
	summary["actions"] = actions
	summary["paths"] = paths
	if len(capabilityPaths) > 0 {
		summary["capabilityPaths"] = capabilityPaths
	}
	return summary
}

type resolutionNormalizeProcessor struct{}

func (resolutionNormalizeProcessor) Name() string { return "ResolutionNormalizeProcessor" }

func (resolutionNormalizeProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
	if stringFromAny(params["resolution"]) != "" {
		return false
	}
	size := stringFromAny(params["size"])
	if size == "" {
		return false
	}
	return isImageResolution(modelType, size) || isVideoResolution(modelType, size)
}

func (resolutionNormalizeProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
	size := stringFromAny(params["size"])
	if stringFromAny(params["resolution"]) == "" && (isImageResolution(modelType, size) || isVideoResolution(modelType, size)) {
		_, capabilityValue := capabilityEvidence(context.modelCapability, modelType, "output_resolutions")
		params["resolution"] = size
		context.resolution = size
		context.recordChange(
			"ResolutionNormalizeProcessor",
			"set",
			"resolution",
			nil,
			size,
			"size 使用分辨率格式，归一到 resolution 供后续能力校验和计费使用。",
			capabilityPath(modelType, "output_resolutions"),
			capabilityValue,
		)
	}
	return true
}

type aspectRatioProcessor struct{}

func (aspectRatioProcessor) Name() string { return "AspectRatioProcessor" }

func (aspectRatioProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
	return modelType != "text_generate" && (stringFromAny(params["aspect_ratio"]) != "" || stringFromAny(params["size"]) != "")
}

func (aspectRatioProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
	capability := capabilityForType(context.modelCapability, modelType)
	if capability == nil {
		return true
	}

	aspectRatio := stringFromAny(params["aspect_ratio"])
	if isEmptyParamString(aspectRatio) {
		before := params["aspect_ratio"]
		delete(params, "aspect_ratio")
		context.aspectRatio = ""
		context.recordChange(
			"AspectRatioProcessor",
			"remove",
			"aspect_ratio",
			before,
			nil,
			"aspect_ratio 是空值字符串，不能作为有效比例传给上游。",
			"",
			nil,
		)
		return true
	}

	resolution := firstNonEmptyString(stringFromAny(params["resolution"]), context.resolution)
	if resolution == "" {
		if values := stringListFromAny(capability["output_resolutions"]); len(values) > 0 {
			resolution = values[0]
		} else if size := stringFromAny(params["size"]); strings.HasSuffix(size, "K") || strings.HasSuffix(size, "p") {
			resolution = size
		}
	}

	allowed := aspectRatioAllowed(capability["aspect_ratio_allowed"], resolution)
	if allowed != nil && len(allowed) == 1 && allowed[0] == "adaptive" {
		before := params["aspect_ratio"]
		params["aspect_ratio"] = "adaptive"
		context.aspectRatio = "adaptive"
		if before != "adaptive" {
			context.recordChange(
				"AspectRatioProcessor",
				"adjust",
				"aspect_ratio",
				before,
				"adaptive",
				"模型当前分辨率只允许 adaptive 宽高比。",
				capabilityPath(modelType, "aspect_ratio_allowed"),
				capability["aspect_ratio_allowed"],
			)
		}
		return true
	}
	if allowed != nil && len(allowed) == 0 {
		before := params["aspect_ratio"]
		delete(params, "aspect_ratio")
		context.aspectRatio = ""
		context.recordChange(
			"AspectRatioProcessor",
			"remove",
			"aspect_ratio",
			before,
			nil,
			"模型能力配置不允许传入任何 aspect_ratio。",
			capabilityPath(modelType, "aspect_ratio_allowed"),
			capability["aspect_ratio_allowed"],
		)
		return true
	}
	if aspectRatio == "" {
		return true
	}
	if allowed == nil && validAspectRatio(aspectRatio) {
		params["aspect_ratio"] = aspectRatio
		context.aspectRatio = aspectRatio
		return true
	}

	processed, ok := validateAndAdjustAspectRatio(aspectRatio, capability, allowed)
	if !ok {
		before := params["aspect_ratio"]
		delete(params, "aspect_ratio")
		context.aspectRatio = ""
		context.recordChange(
			"AspectRatioProcessor",
			"remove",
			"aspect_ratio",
			before,
			nil,
			"传入的 aspect_ratio 不在模型允许范围内，且没有可用替代值。",
			capabilityPath(modelType, "aspect_ratio_allowed"),
			capability["aspect_ratio_allowed"],
		)
		return true
	}
	if processed != "" {
		before := params["aspect_ratio"]
		params["aspect_ratio"] = processed
		context.aspectRatio = processed
		if before != processed {
			path := capabilityPath(modelType, "aspect_ratio_allowed")
			value := capability["aspect_ratio_allowed"]
			if ratioRange, ok := numberPair(capability["aspect_ratio_range"]); ok {
				ratio, valid := aspectRatioNumber(aspectRatio)
				if !valid || ratio < ratioRange[0] || ratio > ratioRange[1] {
					path = capabilityPath(modelType, "aspect_ratio_range")
					value = capability["aspect_ratio_range"]
				}
			}
			context.recordChange(
				"AspectRatioProcessor",
				"adjust",
				"aspect_ratio",
				before,
				processed,
				"传入的 aspect_ratio 不符合模型能力配置，已调整为允许值。",
				path,
				value,
			)
		}
	}
	return true
}

type contentFilterProcessor struct{}

func (contentFilterProcessor) Name() string { return "ContentFilterProcessor" }

func (contentFilterProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
	_, ok := params["content"]
	return ok
}

func (contentFilterProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
	content := contentItems(params["content"])
	if len(content) == 0 {
		return true
	}

	if isOmniVideoLike(context) {
		filtered := filterUnsupportedOmniVideoContent(content, context)
		params["content"] = mapsToAnySlice(filtered)
		syncVideoConvenienceFields(params, filtered, context)
		return true
	}

	downgradeReferenceImageIfNeeded(params, content, modelType, context)
	if modelType == "video_generate" || modelType == "text_to_video" {
		next := make([]map[string]any, 0, len(content))
		for index, item := range content {
			if isImageContent(item) {
				reason, path, value := imageContentRemovalEvidence(item, modelType, context)
				context.recordChange(
					"ContentFilterProcessor",
					"remove",
					fmt.Sprintf("content[%d]", index),
					item,
					nil,
					reason,
					path,
					value,
				)
				continue
			}
			next = append(next, item)
		}
		content = next
	}
	if modelType == "image_to_video" || modelType == "omni_video" || modelType == "omni" {
		if !supportsFirstAndLastFrame(context.modelCapability, modelType) {
			next := make([]map[string]any, 0, len(content))
			for index, item := range content {
				if stringFromAny(item["role"]) == "last_frame" {
					context.recordChange(
						"ContentFilterProcessor",
						"remove",
						fmt.Sprintf("content[%d]", index),
						item,
						nil,
						"模型不支持首尾帧输入，已移除 last_frame。",
						capabilityPath(modelType, "input_first_last_frame"),
						map[string]any{
							"input_first_last_frame":    capabilityValue(context.modelCapability, modelType, "input_first_last_frame"),
							"max_images_for_last_frame": capabilityValue(context.modelCapability, modelType, "max_images_for_last_frame"),
						},
					)
					continue
				}
				next = append(next, item)
			}
			content = next
			deleteFieldsWithLog(params, context, "ContentFilterProcessor", []string{"last_frame", "lastFrame"}, "模型不支持首尾帧输入，已移除快捷字段。", capabilityPath(modelType, "input_first_last_frame"), map[string]any{
				"input_first_last_frame":    capabilityValue(context.modelCapability, modelType, "input_first_last_frame"),
				"max_images_for_last_frame": capabilityValue(context.modelCapability, modelType, "max_images_for_last_frame"),
			})
		}
	}
	params["content"] = mapsToAnySlice(content)
	return true
}

func imageContentRemovalEvidence(item map[string]any, modelType string, context *paramProcessContext) (string, string, any) {
	role := stringFromAny(item["role"])
	switch role {
	case "first_frame":
		return "模型能力未开启首帧输入，已移除 first_frame。", capabilityPath(modelType, "input_first_frame"), map[string]any{
			"input_first_frame":      capabilityValue(context.modelCapability, modelType, "input_first_frame"),
			"input_first_last_frame": capabilityValue(context.modelCapability, modelType, "input_first_last_frame"),
		}
	case "last_frame":
		return "模型能力未开启尾帧或首尾帧输入，已移除 last_frame。", capabilityPath(modelType, "input_first_last_frame"), map[string]any{
			"input_last_frame":            capabilityValue(context.modelCapability, modelType, "input_last_frame"),
			"input_first_last_frame":      capabilityValue(context.modelCapability, modelType, "input_first_last_frame"),
			"max_images_for_last_frame":   capabilityValue(context.modelCapability, modelType, "max_images_for_last_frame"),
			"max_images_for_first_frame":  capabilityValue(context.modelCapability, modelType, "max_images_for_first_frame"),
			"max_images_for_middle_frame": capabilityValue(context.modelCapability, modelType, "max_images_for_middle_frame"),
		}
	case "reference_image":
		return "模型能力未开启参考图输入，已移除 reference_image。", capabilityPath(modelType, "input_reference_generate_single"), map[string]any{
			"input_reference_generate_single":   capabilityValue(context.modelCapability, modelType, "input_reference_generate_single"),
			"input_reference_generate_multiple": capabilityValue(context.modelCapability, modelType, "input_reference_generate_multiple"),
			"max_images":                        capabilityValue(context.modelCapability, modelType, "max_images"),
		}
	default:
		return "当前模型能力未开启图像输入，已移除 image_url。", capabilityPath(modelType, "input_first_frame"), map[string]any{
			"input_first_frame":                 capabilityValue(context.modelCapability, modelType, "input_first_frame"),
			"input_first_last_frame":            capabilityValue(context.modelCapability, modelType, "input_first_last_frame"),
			"input_reference_generate_single":   capabilityValue(context.modelCapability, modelType, "input_reference_generate_single"),
			"input_reference_generate_multiple": capabilityValue(context.modelCapability, modelType, "input_reference_generate_multiple"),
		}
	}
}

type inputAudioProcessor struct{}

func (inputAudioProcessor) Name() string { return "InputAudioProcessor" }

func (inputAudioProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
	if !isVideoModelType(modelType) {
		return false
	}
	content := contentItems(params["content"])
	for _, item := range content {
		if isAudioContent(item) {
			return true
		}
	}
	return false
}

func (inputAudioProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
	content := contentItems(params["content"])
	if len(content) == 0 {
		return true
	}
	supportsInputAudio := false
	if len(context.modelCapability) > 0 {
		if isOmniVideoLike(context) {
			supportsInputAudio = supportsOmniAudioReference(context)
		} else if capability := capabilityForType(context.modelCapability, modelType); capability != nil {
			supportsInputAudio = boolFromAny(capability["input_audio"])
		}
	}
	if supportsInputAudio {
		return true
	}
	next := make([]map[string]any, 0, len(content))
	for index, item := range content {
		if isAudioContent(item) {
			path, value := audioInputCapabilityEvidence(context, modelType)
			context.recordChange(
				"InputAudioProcessor",
				"remove",
				fmt.Sprintf("content[%d]", index),
				item,
				nil,
				"模型能力未开启输入音频，已移除 audio_url。",
				path,
				value,
			)
			continue
		}
		next = append(next, item)
	}
	params["content"] = mapsToAnySlice(next)
	path, value := audioInputCapabilityEvidence(context, modelType)
	deleteFieldsWithLog(params, context, "InputAudioProcessor", []string{"audio_url", "audioUrl", "reference_audio", "referenceAudio"}, "模型能力未开启输入音频，已移除音频参考快捷字段。", path, value)
	return true
}

type durationProcessor struct{}

func (durationProcessor) Name() string { return "DurationProcessor" }

func (durationProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
	return isVideoModelType(modelType) && params["duration"] != nil
}

func (durationProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
	capability := capabilityForType(context.modelCapability, modelType)
	if capability == nil {
		return true
	}
	duration := floatFromAny(params["duration"])
	if duration <= 0 {
		return true
	}
	resolution := firstNonEmptyString(stringFromAny(params["resolution"]), context.resolution)
	modeKey := videoModeKey(params)
	if options := scopedNumberList(capability["duration_options"], resolution, modeKey); len(options) > 0 {
		normalized := closestNumber(duration, options)
		params["duration"] = normalized
		syncDurationSeconds(params)
		if normalized != duration {
			context.recordChange(
				"DurationProcessor",
				"adjust",
				"duration",
				duration,
				normalized,
				"duration 不在模型固定时长选项内，已调整为最近的允许值。",
				capabilityPath(modelType, "duration_options"),
				capability["duration_options"],
			)
		}
		return true
	}
	if minValue, maxValue, ok := scopedRange(capability["duration_range"], resolution, modeKey); ok {
		step := durationStep(capability["duration_step"], resolution, modeKey)
		normalized := normalizeDurationByRange(duration, minValue, maxValue, step)
		params["duration"] = normalized
		syncDurationSeconds(params)
		if normalized != duration {
			context.recordChange(
				"DurationProcessor",
				"adjust",
				"duration",
				duration,
				normalized,
				"duration 超出模型时长范围或步进配置，已按能力配置归一。",
				capabilityPath(modelType, "duration_range"),
				map[string]any{
					"duration_range": capability["duration_range"],
					"duration_step":  capability["duration_step"],
				},
			)
		}
	}
	return true
}

type audioProcessor struct{}

func (audioProcessor) Name() string { return "AudioProcessor" }

func (audioProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
	return isVideoModelType(modelType) && (params["audio"] != nil || params["output_audio"] != nil)
}

func (audioProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
	capability := capabilityForType(context.modelCapability, modelType)
	if capability == nil || !boolFromAny(capability["output_audio"]) {
		for _, key := range []string{"audio", "output_audio"} {
			if before, ok := params[key]; ok {
				delete(params, key)
				context.recordChange(
					"AudioProcessor",
					"remove",
					key,
					before,
					nil,
					"模型能力未开启输出音频，已移除音频输出参数。",
					capabilityPath(modelType, "output_audio"),
					capabilityValue(context.modelCapability, modelType, "output_audio"),
				)
			}
		}
	}
	return true
}

type imageCountProcessor struct{}

func (imageCountProcessor) Name() string { return "ImageCountProcessor" }

func (imageCountProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
	return modelType == "image_generate" || modelType == "image_edit"
}

func (imageCountProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
	capability := capabilityForType(context.modelCapability, modelType)
	if capability == nil || !boolFromAny(capability["output_multiple_images"]) {
		return true
	}
	maxCount := int(math.Round(floatFromAny(capability["output_max_images_count"])))
	if maxCount <= 0 {
		return true
	}
	count := int(math.Round(floatFromAny(params["n"])))
	if count <= 0 {
		count = int(math.Round(floatFromAny(params["batch_size"])))
	}
	if count <= 0 {
		count = 1
	}
	if count > maxCount {
		before := count
		count = maxCount
		context.recordChange(
			"ImageCountProcessor",
			"adjust",
			"n",
			before,
			count,
			"请求图片数量超过模型输出上限，已按 output_max_images_count 截断。",
			capabilityPath(modelType, "output_max_images_count"),
			capability["output_max_images_count"],
		)
	}
	params["n"] = count
	return true
}

func ensureVideoContent(params map[string]any, context *paramProcessContext) {
	if len(contentItems(params["content"])) > 0 {
		return
	}
	content := make([]map[string]any, 0)
	if prompt := firstNonEmptyString(stringFromAny(params["prompt"]), stringFromAny(params["input"])); prompt != "" {
		content = append(content, map[string]any{"type": "text", "text": prompt})
	}
	appendURL := func(kind string, role string, url string) {
		url = strings.TrimSpace(url)
		if url == "" {
			return
		}
		item := map[string]any{"type": kind, "role": role}
		switch kind {
		case "image_url":
			item["image_url"] = map[string]any{"url": url}
		case "video_url":
			item["video_url"] = map[string]any{"url": url}
		case "audio_url":
			item["audio_url"] = map[string]any{"url": url}
		}
		content = append(content, item)
	}

	firstFrame := firstNonEmptyStringValue(params, "first_frame", "firstFrame")
	appendURL("image_url", "first_frame", firstFrame)
	appendURL("image_url", "last_frame", firstNonEmptyStringValue(params, "last_frame", "lastFrame"))
	imageURLs := firstNonEmptyStringListFromAny(params["image"], params["images"], params["image_url"], params["imageUrl"], params["image_urls"], params["imageUrls"])
	if firstFrame == "" && len(imageURLs) > 0 {
		appendURL("image_url", "first_frame", imageURLs[0])
		imageURLs = imageURLs[1:]
	}
	for _, url := range imageURLs {
		appendURL("image_url", "reference_image", url)
	}
	for _, url := range firstNonEmptyStringListFromAny(params["reference_image"], params["referenceImage"]) {
		appendURL("image_url", "reference_image", url)
	}
	for _, url := range firstNonEmptyStringListFromAny(params["video"], params["video_url"], params["videoUrl"], params["reference_video"], params["referenceVideo"]) {
		appendURL("video_url", "reference_video", url)
	}
	for _, url := range firstNonEmptyStringListFromAny(params["audio_url"], params["audioUrl"], params["reference_audio"], params["referenceAudio"]) {
		appendURL("audio_url", "reference_audio", url)
	}
	if len(content) > 0 {
		params["content"] = mapsToAnySlice(content)
		context.recordChange(
			"ContentBuildProcessor",
			"set",
			"content",
			nil,
			params["content"],
			"将 prompt/first_frame/reference_* 等快捷字段转换为 content 数组，后续处理器可按模型能力逐项过滤。",
			"",
			nil,
		)
	}
}

func effectiveModelCapability(candidate store.RuntimeModelCandidate) map[string]any {
	base := cloneMap(candidate.Capabilities)
	for key, value := range candidate.CapabilityOverride {
		if baseChild, ok := base[key].(map[string]any); ok {
			if overrideChild, ok := value.(map[string]any); ok {
				base[key] = mergeMap(baseChild, overrideChild)
				continue
			}
		}
		base[key] = cloneAny(value)
	}
	return base
}

func filterUnsupportedOmniVideoContent(content []map[string]any, context *paramProcessContext) []map[string]any {
	capability := omniVideoCapability(context)
	maxVideos := math.Inf(1)
	if capability != nil {
		if value, ok := numericField(capability, "max_videos"); ok {
			maxVideos = value
		}
	}
	maxAudios := 0.0
	if capability != nil {
		if value, ok := numericField(capability, "max_audios"); ok {
			maxAudios = value
		} else if supportsOmniAudioReference(context) {
			maxAudios = math.Inf(1)
		}
	}

	videoCount := 0.0
	audioCount := 0.0
	out := make([]map[string]any, 0, len(content))
	for index, item := range content {
		if isVideoContent(item) {
			if !supportsOmniVideoReference(item, capability) {
				path, value := omniCapabilityEvidence(context, "supported_modes")
				context.recordChange(
					"ContentFilterProcessor",
					"remove",
					fmt.Sprintf("content[%d]", index),
					item,
					nil,
					"视频参考类型不在 omni_video.supported_modes 允许范围内。",
					path,
					value,
				)
				continue
			}
			if videoCount >= maxVideos {
				path, value := omniCapabilityEvidence(context, "max_videos")
				context.recordChange(
					"ContentFilterProcessor",
					"remove",
					fmt.Sprintf("content[%d]", index),
					item,
					nil,
					"视频参考数量超过 omni_video.max_videos 限制。",
					path,
					value,
				)
				continue
			}
			videoCount++
			out = append(out, item)
			continue
		}
		if isAudioContent(item) {
			if !supportsOmniAudioReference(context) {
				path, value := omniCapabilityEvidence(context, "input_audio")
				context.recordChange(
					"ContentFilterProcessor",
					"remove",
					fmt.Sprintf("content[%d]", index),
					item,
					nil,
					"模型能力不支持音频参考，已移除 audio_url。",
					path,
					mergeMetrics(map[string]any{"input_audio": value}, omniCapabilityBundle(context, "max_audios")),
				)
				continue
			}
			if audioCount >= maxAudios {
				path, value := omniCapabilityEvidence(context, "max_audios")
				context.recordChange(
					"ContentFilterProcessor",
					"remove",
					fmt.Sprintf("content[%d]", index),
					item,
					nil,
					"音频参考数量超过 omni_video.max_audios 限制。",
					path,
					value,
				)
				continue
			}
			audioCount++
			out = append(out, item)
			continue
		}
		out = append(out, item)
	}
	return out
}

func isOmniVideoLike(context *paramProcessContext) bool {
	modelType := strings.TrimSpace(context.candidate.ModelType)
	return modelType == "omni_video" ||
		modelType == "omni" ||
		context.modelCapability["omni_video"] != nil ||
		context.modelCapability["omni"] != nil
}

func omniVideoCapability(context *paramProcessContext) map[string]any {
	if capability := capabilityForType(context.modelCapability, "omni_video"); capability != nil {
		return capability
	}
	return capabilityForType(context.modelCapability, "omni")
}

func supportsOmniAudioReference(context *paramProcessContext) bool {
	capability := omniVideoCapability(context)
	return capability != nil && (boolFromAny(capability["input_audio"]) || floatFromAny(capability["max_audios"]) > 0)
}

func supportsOmniVideoReference(item map[string]any, capability map[string]any) bool {
	if capability == nil {
		return true
	}
	if value, ok := numericField(capability, "max_videos"); ok && value == 0 {
		return false
	}
	supportedModes := stringListFromAny(capability["supported_modes"])
	supportsReference := containsString(supportedModes, "video_reference")
	supportsEdit := containsString(supportedModes, "video_edit")
	video, _ := item["video_url"].(map[string]any)
	referType := stringFromAny(video["refer_type"])
	isEditVideo := stringFromAny(item["role"]) == "video_base" || referType == "base"
	isReferenceVideo := stringFromAny(item["role"]) == "video_feature" ||
		stringFromAny(item["role"]) == "reference_video" ||
		referType == "feature"
	if isEditVideo {
		return supportsEdit
	}
	if isReferenceVideo {
		return supportsReference
	}
	return supportsReference || supportsEdit
}

func downgradeReferenceImageIfNeeded(params map[string]any, content []map[string]any, modelType string, context *paramProcessContext) {
	if modelType != "image_to_video" && modelType != "video_generate" && modelType != "video_edit" && modelType != "omni_video" && modelType != "omni" {
		return
	}
	if supportsReferenceImage(context.modelCapability, modelType) {
		return
	}
	count := 0
	for index, item := range content {
		if stringFromAny(item["type"]) == "image_url" && stringFromAny(item["role"]) == "reference_image" {
			before := cloneMap(item)
			item["role"] = "first_frame"
			context.recordChange(
				"ContentFilterProcessor",
				"adjust",
				fmt.Sprintf("content[%d].role", index),
				before,
				item,
				"模型不支持 reference_image，已降级为 first_frame。",
				capabilityPath(modelType, "input_reference_generate_single"),
				map[string]any{
					"input_reference_generate_single":   capabilityValue(context.modelCapability, modelType, "input_reference_generate_single"),
					"input_reference_generate_multiple": capabilityValue(context.modelCapability, modelType, "input_reference_generate_multiple"),
					"max_images":                        capabilityValue(context.modelCapability, modelType, "max_images"),
				},
			)
			count++
		}
	}
	if count > 0 {
		appendParamWarning(params, "reference_image is unsupported by the selected model and was downgraded to first_frame")
	}
}

func supportsReferenceImage(modelCapability map[string]any, modelType string) bool {
	candidates := []map[string]any{}
	if capability := capabilityForType(modelCapability, modelType); capability != nil {
		candidates = append(candidates, capability)
	}
	if modelType != "image_to_video" {
		if capability := capabilityForType(modelCapability, "image_to_video"); capability != nil {
			candidates = append(candidates, capability)
		}
	}
	if len(candidates) == 0 {
		return true
	}
	for _, capability := range candidates {
		_, hasSingle := capability["input_reference_generate_single"]
		_, hasMultiple := capability["input_reference_generate_multiple"]
		if hasSingle || hasMultiple {
			if boolFromAny(capability["input_reference_generate_single"]) || boolFromAny(capability["input_reference_generate_multiple"]) {
				return true
			}
			continue
		}
		if value, ok := numericField(capability, "max_images"); ok {
			if value > 1 {
				return true
			}
			continue
		}
	}
	return false
}

func supportsFirstAndLastFrame(modelCapability map[string]any, modelType string) bool {
	capability := capabilityForType(modelCapability, modelType)
	if capability == nil {
		return false
	}
	return boolFromAny(capability["input_first_last_frame"]) || floatFromAny(capability["max_images_for_last_frame"]) > 0
}

func validateAndAdjustAspectRatio(aspectRatio string, capability map[string]any, allowed []string) (string, bool) {
	if !isMediaModelTypeWithAspectRatio(capability) {
		return "", false
	}
	if ratioRange, ok := numberPair(capability["aspect_ratio_range"]); ok {
		ratio, valid := aspectRatioNumber(aspectRatio)
		if !valid || ratio < ratioRange[0] || ratio > ratioRange[1] {
			return adjustAspectRatioToRange(aspectRatio, ratioRange[0], ratioRange[1], allowed), true
		}
	}
	if allowed == nil {
		return aspectRatio, true
	}
	if len(allowed) == 0 {
		return "", false
	}
	if (aspectRatio == "adaptive" || aspectRatio == "keep_ratio") && !containsString(allowed, aspectRatio) {
		return "", false
	}
	if containsString(allowed, aspectRatio) {
		return aspectRatio, true
	}
	return allowed[0], true
}

func isMediaModelTypeWithAspectRatio(capability map[string]any) bool {
	return capability != nil
}

func aspectRatioAllowed(value any, resolution string) []string {
	switch typed := value.(type) {
	case []any:
		return stringListFromAny(typed)
	case []string:
		return typed
	case map[string]any:
		if resolution != "" {
			if values := stringListFromAny(typed[resolution]); len(values) > 0 {
				return values
			}
		}
		return nil
	default:
		return nil
	}
}

func scopedNumberList(value any, scopes ...string) []float64 {
	switch typed := value.(type) {
	case []any:
		out := make([]float64, 0, len(typed))
		for _, item := range typed {
			if number := floatFromAny(item); number > 0 {
				out = append(out, number)
			}
		}
		return out
	case []float64:
		return typed
	case []int:
		out := make([]float64, 0, len(typed))
		for _, item := range typed {
			out = append(out, float64(item))
		}
		return out
	case map[string]any:
		for _, scope := range scopes {
			if scope == "" {
				continue
			}
			if values := scopedNumberList(typed[scope]); len(values) > 0 {
				return values
			}
		}
		for _, item := range typed {
			if values := scopedNumberList(item); len(values) > 0 {
				return values
			}
		}
	}
	return nil
}

func scopedRange(value any, scopes ...string) (float64, float64, bool) {
	if pair, ok := numberPair(value); ok {
		return pair[0], pair[1], true
	}
	if typed, ok := value.(map[string]any); ok {
		for _, scope := range scopes {
			if scope == "" {
				continue
			}
			if minValue, maxValue, ok := scopedRange(typed[scope]); ok {
				return minValue, maxValue, true
			}
		}
		for _, item := range typed {
			if minValue, maxValue, ok := scopedRange(item); ok {
				return minValue, maxValue, true
			}
		}
	}
	return 0, 0, false
}

func durationStep(value any, scopes ...string) float64 {
	if step := floatFromAny(value); step > 0 {
		return step
	}
	if typed, ok := value.(map[string]any); ok {
		for _, scope := range scopes {
			if scope == "" {
				continue
			}
			if step := durationStep(typed[scope]); step > 0 {
				return step
			}
		}
		for _, item := range typed {
			if step := durationStep(item); step > 0 {
				return step
			}
		}
	}
	return 0
}

func normalizeDurationByRange(target float64, minValue float64, maxValue float64, step float64) float64 {
	clamped := math.Min(math.Max(target, minValue), maxValue)
	if step <= 0 {
		return clamped
	}
	snapped := math.Round((clamped-minValue)/step)*step + minValue
	return math.Round(snapped*1_000_000) / 1_000_000
}

func closestNumber(target float64, values []float64) float64 {
	if len(values) == 0 {
		return target
	}
	closest := values[0]
	minDiff := math.Abs(target - closest)
	for _, value := range values[1:] {
		diff := math.Abs(target - value)
		if diff < minDiff {
			minDiff = diff
			closest = value
		}
	}
	return closest
}

func videoModeKey(params map[string]any) string {
	content := contentItems(params["content"])
	hasFirstFrame := false
	hasLastFrame := false
	for _, item := range content {
		switch stringFromAny(item["role"]) {
		case "first_frame":
			hasFirstFrame = true
		case "last_frame":
			hasLastFrame = true
		}
	}
	switch {
	case hasFirstFrame && hasLastFrame:
		return "input_first_last_frame"
	case hasFirstFrame:
		return "input_first_frame"
	case hasLastFrame:
		return "input_last_frame"
	default:
		return ""
	}
}

func syncDurationSeconds(params map[string]any) {
	if params["duration_seconds"] != nil {
		params["duration_seconds"] = params["duration"]
	}
}

func syncVideoConvenienceFields(params map[string]any, content []map[string]any, context *paramProcessContext) {
	hasVideo := false
	hasAudio := false
	for _, item := range content {
		hasVideo = hasVideo || isVideoContent(item)
		hasAudio = hasAudio || isAudioContent(item)
	}
	if !hasVideo {
		path, value := omniCapabilityEvidence(context, "supported_modes")
		deleteFieldsWithLog(params, context, "ContentFilterProcessor", []string{"video", "video_url", "videoUrl", "reference_video", "referenceVideo"}, "对应视频 content 已被模型能力过滤，移除视频参考快捷字段。", path, value)
	}
	if !hasAudio {
		path, value := omniCapabilityEvidence(context, "input_audio")
		deleteFieldsWithLog(params, context, "ContentFilterProcessor", []string{"audio_url", "audioUrl", "reference_audio", "referenceAudio"}, "对应音频 content 已被模型能力过滤，移除音频参考快捷字段。", path, mergeMetrics(map[string]any{"input_audio": value}, omniCapabilityBundle(context, "max_audios")))
	}
}

func deleteFieldsWithLog(params map[string]any, context *paramProcessContext, processor string, keys []string, reason string, capabilityPath string, capabilityValue any) {
	for _, key := range keys {
		if before, ok := params[key]; ok {
			delete(params, key)
			context.recordChange(processor, "remove", key, before, nil, reason, capabilityPath, capabilityValue)
		}
	}
}

func appendParamWarning(params map[string]any, warning string) {
	warnings, _ := params["_param_warnings"].([]any)
	for _, item := range warnings {
		if stringFromAny(item) == warning {
			return
		}
	}
	params["_param_warnings"] = append(warnings, warning)
}

func filterContent(content []map[string]any, keep func(map[string]any) bool) []map[string]any {
	out := make([]map[string]any, 0, len(content))
	for _, item := range content {
		if keep(item) {
			out = append(out, item)
		}
	}
	return out
}

func contentItems(value any) []map[string]any {
	switch typed := value.(type) {
	case []any:
		out := make([]map[string]any, 0, len(typed))
		for _, item := range typed {
			if object, ok := item.(map[string]any); ok {
				out = append(out, cloneMap(object))
			}
		}
		return out
	case []map[string]any:
		out := make([]map[string]any, 0, len(typed))
		for _, item := range typed {
			out = append(out, cloneMap(item))
		}
		return out
	default:
		return nil
	}
}

func mapsToAnySlice(values []map[string]any) []any {
	out := make([]any, 0, len(values))
	for _, value := range values {
		out = append(out, value)
	}
	return out
}

func isImageContent(item map[string]any) bool {
	return stringFromAny(item["type"]) == "image_url" || item["image_url"] != nil
}

func isVideoContent(item map[string]any) bool {
	return stringFromAny(item["type"]) == "video_url" || item["video_url"] != nil
}

func isAudioContent(item map[string]any) bool {
	return stringFromAny(item["type"]) == "audio_url" || item["audio_url"] != nil
}

func capabilityForType(capabilities map[string]any, modelType string) map[string]any {
	if capabilities == nil {
		return nil
	}
	if typed, ok := capabilities[modelType].(map[string]any); ok {
		return typed
	}
	return nil
}

func capabilityPath(modelType string, key string) string {
	modelType = strings.TrimSpace(modelType)
	if modelType == "" {
		return ""
	}
	if strings.TrimSpace(key) == "" {
		return "capabilities." + modelType
	}
	return "capabilities." + modelType + "." + key
}

func capabilityValue(capabilities map[string]any, modelType string, key string) any {
	capability := capabilityForType(capabilities, modelType)
	if capability == nil {
		return nil
	}
	return cloneAny(capability[key])
}

func capabilityEvidence(capabilities map[string]any, modelType string, key string) (string, any) {
	return capabilityPath(modelType, key), capabilityValue(capabilities, modelType, key)
}

func audioInputCapabilityEvidence(context *paramProcessContext, modelType string) (string, any) {
	if isOmniVideoLike(context) {
		path, value := omniCapabilityEvidence(context, "input_audio")
		return path, mergeMetrics(map[string]any{"input_audio": value}, omniCapabilityBundle(context, "max_audios"))
	}
	return capabilityEvidence(context.modelCapability, modelType, "input_audio")
}

func omniCapabilityType(context *paramProcessContext) string {
	if context != nil && capabilityForType(context.modelCapability, "omni_video") != nil {
		return "omni_video"
	}
	if context != nil && capabilityForType(context.modelCapability, "omni") != nil {
		return "omni"
	}
	return "omni_video"
}

func omniCapabilityEvidence(context *paramProcessContext, key string) (string, any) {
	modelType := omniCapabilityType(context)
	var capabilities map[string]any
	if context != nil {
		capabilities = context.modelCapability
	}
	return capabilityPath(modelType, key), capabilityValue(capabilities, modelType, key)
}

func omniCapabilityBundle(context *paramProcessContext, keys ...string) map[string]any {
	modelType := omniCapabilityType(context)
	var capabilities map[string]any
	if context != nil {
		capabilities = context.modelCapability
	}
	out := map[string]any{}
	for _, key := range keys {
		out[key] = capabilityValue(capabilities, modelType, key)
	}
	return out
}

func numericField(values map[string]any, key string) (float64, bool) {
	if values == nil {
		return 0, false
	}
	if _, ok := values[key]; !ok {
		return 0, false
	}
	return floatFromAny(values[key]), true
}

func boolFromAny(value any) bool {
	typed, _ := value.(bool)
	return typed
}

func firstNonEmptyStringValue(values map[string]any, keys ...string) string {
	for _, key := range keys {
		if value := stringFromAny(values[key]); value != "" {
			return value
		}
	}
	return ""
}

func firstNonEmptyStringListFromAny(values ...any) []string {
	for _, value := range values {
		items := stringListFromAny(value)
		if len(items) > 0 {
			return items
		}
	}
	return nil
}

func stringListFromAny(value any) []string {
	switch typed := value.(type) {
	case []string:
		out := make([]string, 0, len(typed))
		for _, item := range typed {
			if text := strings.TrimSpace(item); text != "" {
				out = append(out, text)
			}
		}
		return out
	case []any:
		out := make([]string, 0, len(typed))
		for _, item := range typed {
			if text := stringFromAny(item); text != "" {
				out = append(out, text)
			}
		}
		return out
	case string:
		if strings.TrimSpace(typed) == "" {
			return nil
		}
		return []string{strings.TrimSpace(typed)}
	default:
		return nil
	}
}

func containsString(values []string, target string) bool {
	for _, value := range values {
		if value == target {
			return true
		}
	}
	return false
}

func appendUniqueString(values *[]string, value string) {
	value = strings.TrimSpace(value)
	if value == "" {
		return
	}
	for _, existing := range *values {
		if existing == value {
			return
		}
	}
	*values = append(*values, value)
}

func numberPair(value any) ([2]float64, bool) {
	switch typed := value.(type) {
	case []any:
		if len(typed) < 2 {
			return [2]float64{}, false
		}
		return [2]float64{floatFromAny(typed[0]), floatFromAny(typed[1])}, true
	case []float64:
		if len(typed) < 2 {
			return [2]float64{}, false
		}
		return [2]float64{typed[0], typed[1]}, true
	case []int:
		if len(typed) < 2 {
			return [2]float64{}, false
		}
		return [2]float64{float64(typed[0]), float64(typed[1])}, true
	default:
		return [2]float64{}, false
	}
}

func validAspectRatio(value string) bool {
	if value == "adaptive" || value == "keep_ratio" {
		return true
	}
	_, ok := aspectRatioNumber(value)
	return ok
}

func aspectRatioNumber(value string) (float64, bool) {
	parts := strings.Split(value, ":")
	if len(parts) != 2 {
		return 0, false
	}
	width := parsePositiveFloat(parts[0])
	height := parsePositiveFloat(parts[1])
	if width <= 0 || height <= 0 {
		return 0, false
	}
	return width / height, true
}

func adjustAspectRatioToRange(value string, minValue float64, maxValue float64, allowed []string) string {
	current, ok := aspectRatioNumber(value)
	if !ok {
		if len(allowed) > 0 {
			return allowed[0]
		}
		return "1:1"
	}
	if len(allowed) > 0 {
		closest := ""
		minDiff := math.Inf(1)
		for _, candidate := range allowed {
			ratio, ok := aspectRatioNumber(candidate)
			if !ok || ratio < minValue || ratio > maxValue {
				continue
			}
			diff := math.Abs(ratio - current)
			if diff < minDiff {
				minDiff = diff
				closest = candidate
			}
		}
		if closest != "" {
			return closest
		}
	}
	if current < minValue {
		return ratioString(minValue)
	}
	return ratioString(maxValue)
}

func ratioString(value float64) string {
	if value <= 0 {
		return "1:1"
	}
	return strings.TrimRight(strings.TrimRight(strconv.FormatFloat(value, 'f', 6, 64), "0"), ".") + ":1"
}

func parsePositiveFloat(value string) float64 {
	for _, r := range strings.TrimSpace(value) {
		if r < '0' || r > '9' {
			if r != '.' {
				return 0
			}
		}
	}
	out, _ := strconv.ParseFloat(strings.TrimSpace(value), 64)
	return out
}

func isEmptyParamString(value string) bool {
	normalized := strings.ToLower(strings.TrimSpace(value))
	return normalized == "null" || normalized == "undefined"
}

func isImageResolution(modelType string, value string) bool {
	return (modelType == "image_generate" || modelType == "image_edit") && containsString([]string{"1K", "2K", "4K", "8K"}, value)
}

func isVideoResolution(modelType string, value string) bool {
	return isVideoModelType(modelType) && containsString([]string{"480p", "720p", "1080p", "1440p", "2160p"}, value)
}

func isVideoModelType(modelType string) bool {
	return modelType == "video_generate" || modelType == "text_to_video" || modelType == "image_to_video" || modelType == "video_edit" || modelType == "omni_video" || modelType == "omni"
}

func cloneMap(values map[string]any) map[string]any {
	out := map[string]any{}
	for key, value := range values {
		out[key] = cloneAny(value)
	}
	return out
}

func cloneAny(value any) any {
	switch typed := value.(type) {
	case map[string]any:
		return cloneMap(typed)
	case []any:
		out := make([]any, 0, len(typed))
		for _, item := range typed {
			out = append(out, cloneAny(item))
		}
		return out
	case []map[string]any:
		out := make([]any, 0, len(typed))
		for _, item := range typed {
			out = append(out, cloneMap(item))
		}
		return out
	default:
		return value
	}
}