package runner import ( "fmt" "math" "strconv" "strings" "github.com/easyai/easyai-ai-gateway/apps/api/internal/store" ) type paramProcessContext struct { modelCapability map[string]any candidate store.RuntimeModelCandidate log *parameterPreprocessingLog aspectRatio string resolution string } type paramProcessor interface { Name() string ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool Process(params map[string]any, modelType string, context *paramProcessContext) bool } type ParamProcessorChain struct { processors []paramProcessor } type parameterPreprocessResult struct { Body map[string]any Log parameterPreprocessingLog } type parameterPreprocessingLog struct { ModelType string `json:"modelType"` Input map[string]any `json:"actualInput"` Output map[string]any `json:"convertedOutput"` Changed bool `json:"changed"` Changes []parameterPreprocessChange `json:"changes"` Model map[string]any `json:"model,omitempty"` } type parameterPreprocessChange struct { Processor string `json:"processor"` Action string `json:"action"` Path string `json:"path"` Before any `json:"before"` After any `json:"after"` Reason string `json:"reason"` CapabilityPath string `json:"capabilityPath,omitempty"` CapabilityValue any `json:"capabilityValue,omitempty"` } func NewParamProcessorChain() ParamProcessorChain { return ParamProcessorChain{ processors: []paramProcessor{ resolutionNormalizeProcessor{}, aspectRatioProcessor{}, contentFilterProcessor{}, inputAudioProcessor{}, durationProcessor{}, audioProcessor{}, imageCountProcessor{}, }, } } func preprocessRequest(kind string, body map[string]any, candidate store.RuntimeModelCandidate) map[string]any { return preprocessRequestWithLog(kind, body, candidate).Body } func preprocessRequestWithLog(kind string, body map[string]any, candidate store.RuntimeModelCandidate) parameterPreprocessResult { params := cloneMap(body) modelType := strings.TrimSpace(candidate.ModelType) if modelType == "" { modelType = modelTypeFromKind(kind, params) } log := parameterPreprocessingLog{ ModelType: modelType, Input: cloneMap(params), Changes: []parameterPreprocessChange{}, Model: map[string]any{ "modelName": candidate.ModelName, "modelAlias": candidate.ModelAlias, "providerModelName": candidate.ProviderModelName, "provider": candidate.Provider, "platformId": candidate.PlatformID, "platformModelId": candidate.PlatformModelID, }, } context := ¶mProcessContext{ modelCapability: effectiveModelCapability(candidate), candidate: candidate, log: &log, } if kind == "videos.generations" { ensureVideoContent(params, context) } chain := NewParamProcessorChain() processed := chain.Process(params, modelType, context) log.Output = cloneMap(processed) log.Changed = len(log.Changes) > 0 return parameterPreprocessResult{Body: processed, Log: log} } func (chain ParamProcessorChain) Process(params map[string]any, modelType string, context *paramProcessContext) map[string]any { if params == nil { return map[string]any{} } for _, processor := range chain.processors { if !processor.ShouldProcess(params, modelType, context) { continue } if !processor.Process(params, modelType, context) { break } } return params } func (context *paramProcessContext) recordChange(processor string, action string, path string, before any, after any, reason string, capabilityPath string, capabilityValue any) { if context == nil || context.log == nil { return } context.log.Changes = append(context.log.Changes, parameterPreprocessChange{ Processor: processor, Action: action, Path: path, Before: cloneAny(before), After: cloneAny(after), Reason: reason, CapabilityPath: capabilityPath, CapabilityValue: cloneAny(capabilityValue), }) } func parameterPreprocessingMetrics(log parameterPreprocessingLog) map[string]any { return map[string]any{ "parameterPreprocessingSummary": parameterPreprocessingSummary(log), } } func parameterPreprocessingSummary(log parameterPreprocessingLog) map[string]any { summary := map[string]any{ "modelType": log.ModelType, "changed": log.Changed, "changeCount": len(log.Changes), } if len(log.Changes) == 0 { return summary } actions := make([]string, 0) paths := make([]string, 0) capabilityPaths := make([]string, 0) for _, change := range log.Changes { appendUniqueString(&actions, change.Action) appendUniqueString(&paths, change.Path) appendUniqueString(&capabilityPaths, change.CapabilityPath) } summary["actions"] = actions summary["paths"] = paths if len(capabilityPaths) > 0 { summary["capabilityPaths"] = capabilityPaths } return summary } type resolutionNormalizeProcessor struct{} func (resolutionNormalizeProcessor) Name() string { return "ResolutionNormalizeProcessor" } func (resolutionNormalizeProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool { if stringFromAny(params["resolution"]) != "" { return false } size := stringFromAny(params["size"]) if size == "" { return false } return isImageResolution(modelType, size) || isVideoResolution(modelType, size) } func (resolutionNormalizeProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool { size := stringFromAny(params["size"]) if stringFromAny(params["resolution"]) == "" && (isImageResolution(modelType, size) || isVideoResolution(modelType, size)) { _, capabilityValue := capabilityEvidence(context.modelCapability, modelType, "output_resolutions") params["resolution"] = size context.resolution = size context.recordChange( "ResolutionNormalizeProcessor", "set", "resolution", nil, size, "size 使用分辨率格式,归一到 resolution 供后续能力校验和计费使用。", capabilityPath(modelType, "output_resolutions"), capabilityValue, ) } return true } type aspectRatioProcessor struct{} func (aspectRatioProcessor) Name() string { return "AspectRatioProcessor" } func (aspectRatioProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool { return modelType != "text_generate" && (stringFromAny(params["aspect_ratio"]) != "" || stringFromAny(params["size"]) != "") } func (aspectRatioProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool { capability := capabilityForType(context.modelCapability, modelType) if capability == nil { return true } aspectRatio := stringFromAny(params["aspect_ratio"]) if isEmptyParamString(aspectRatio) { before := params["aspect_ratio"] delete(params, "aspect_ratio") context.aspectRatio = "" context.recordChange( "AspectRatioProcessor", "remove", "aspect_ratio", before, nil, "aspect_ratio 是空值字符串,不能作为有效比例传给上游。", "", nil, ) return true } resolution := firstNonEmptyString(stringFromAny(params["resolution"]), context.resolution) if resolution == "" { if values := stringListFromAny(capability["output_resolutions"]); len(values) > 0 { resolution = values[0] } else if size := stringFromAny(params["size"]); strings.HasSuffix(size, "K") || strings.HasSuffix(size, "p") { resolution = size } } allowed := aspectRatioAllowed(capability["aspect_ratio_allowed"], resolution) if allowed != nil && len(allowed) == 1 && allowed[0] == "adaptive" { before := params["aspect_ratio"] params["aspect_ratio"] = "adaptive" context.aspectRatio = "adaptive" if before != "adaptive" { context.recordChange( "AspectRatioProcessor", "adjust", "aspect_ratio", before, "adaptive", "模型当前分辨率只允许 adaptive 宽高比。", capabilityPath(modelType, "aspect_ratio_allowed"), capability["aspect_ratio_allowed"], ) } return true } if allowed != nil && len(allowed) == 0 { before := params["aspect_ratio"] delete(params, "aspect_ratio") context.aspectRatio = "" context.recordChange( "AspectRatioProcessor", "remove", "aspect_ratio", before, nil, "模型能力配置不允许传入任何 aspect_ratio。", capabilityPath(modelType, "aspect_ratio_allowed"), capability["aspect_ratio_allowed"], ) return true } if aspectRatio == "" { return true } if allowed == nil && validAspectRatio(aspectRatio) { params["aspect_ratio"] = aspectRatio context.aspectRatio = aspectRatio return true } processed, ok := validateAndAdjustAspectRatio(aspectRatio, capability, allowed) if !ok { before := params["aspect_ratio"] delete(params, "aspect_ratio") context.aspectRatio = "" context.recordChange( "AspectRatioProcessor", "remove", "aspect_ratio", before, nil, "传入的 aspect_ratio 不在模型允许范围内,且没有可用替代值。", capabilityPath(modelType, "aspect_ratio_allowed"), capability["aspect_ratio_allowed"], ) return true } if processed != "" { before := params["aspect_ratio"] params["aspect_ratio"] = processed context.aspectRatio = processed if before != processed { path := capabilityPath(modelType, "aspect_ratio_allowed") value := capability["aspect_ratio_allowed"] if ratioRange, ok := numberPair(capability["aspect_ratio_range"]); ok { ratio, valid := aspectRatioNumber(aspectRatio) if !valid || ratio < ratioRange[0] || ratio > ratioRange[1] { path = capabilityPath(modelType, "aspect_ratio_range") value = capability["aspect_ratio_range"] } } context.recordChange( "AspectRatioProcessor", "adjust", "aspect_ratio", before, processed, "传入的 aspect_ratio 不符合模型能力配置,已调整为允许值。", path, value, ) } } return true } type contentFilterProcessor struct{} func (contentFilterProcessor) Name() string { return "ContentFilterProcessor" } func (contentFilterProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool { _, ok := params["content"] return ok } func (contentFilterProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool { content := contentItems(params["content"]) if len(content) == 0 { return true } if isOmniVideoLike(context) { filtered := filterUnsupportedOmniVideoContent(content, context) params["content"] = mapsToAnySlice(filtered) syncVideoConvenienceFields(params, filtered, context) return true } downgradeReferenceImageIfNeeded(params, content, modelType, context) if modelType == "video_generate" || modelType == "text_to_video" { next := make([]map[string]any, 0, len(content)) for index, item := range content { if isImageContent(item) { context.recordChange( "ContentFilterProcessor", "remove", fmt.Sprintf("content[%d]", index), item, nil, "当前候选模型没有图像参考输入模式,需移除 image_url。", capabilityPath(modelType, ""), capabilityForType(context.modelCapability, modelType), ) continue } next = append(next, item) } content = next } if modelType == "image_to_video" || modelType == "omni_video" || modelType == "omni" { if !supportsFirstAndLastFrame(context.modelCapability, modelType) { next := make([]map[string]any, 0, len(content)) for index, item := range content { if stringFromAny(item["role"]) == "last_frame" { context.recordChange( "ContentFilterProcessor", "remove", fmt.Sprintf("content[%d]", index), item, nil, "模型不支持首尾帧输入,已移除 last_frame。", capabilityPath(modelType, "input_first_last_frame"), map[string]any{ "input_first_last_frame": capabilityValue(context.modelCapability, modelType, "input_first_last_frame"), "max_images_for_last_frame": capabilityValue(context.modelCapability, modelType, "max_images_for_last_frame"), }, ) continue } next = append(next, item) } content = next deleteFieldsWithLog(params, context, "ContentFilterProcessor", []string{"last_frame", "lastFrame"}, "模型不支持首尾帧输入,已移除快捷字段。", capabilityPath(modelType, "input_first_last_frame"), map[string]any{ "input_first_last_frame": capabilityValue(context.modelCapability, modelType, "input_first_last_frame"), "max_images_for_last_frame": capabilityValue(context.modelCapability, modelType, "max_images_for_last_frame"), }) } } params["content"] = mapsToAnySlice(content) return true } type inputAudioProcessor struct{} func (inputAudioProcessor) Name() string { return "InputAudioProcessor" } func (inputAudioProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool { if !isVideoModelType(modelType) { return false } content := contentItems(params["content"]) for _, item := range content { if isAudioContent(item) { return true } } return false } func (inputAudioProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool { content := contentItems(params["content"]) if len(content) == 0 { return true } supportsInputAudio := false if len(context.modelCapability) > 0 { if isOmniVideoLike(context) { supportsInputAudio = supportsOmniAudioReference(context) } else if capability := capabilityForType(context.modelCapability, modelType); capability != nil { supportsInputAudio = boolFromAny(capability["input_audio"]) } } if supportsInputAudio { return true } next := make([]map[string]any, 0, len(content)) for index, item := range content { if isAudioContent(item) { path, value := audioInputCapabilityEvidence(context, modelType) context.recordChange( "InputAudioProcessor", "remove", fmt.Sprintf("content[%d]", index), item, nil, "模型能力未开启输入音频,已移除 audio_url。", path, value, ) continue } next = append(next, item) } params["content"] = mapsToAnySlice(next) path, value := audioInputCapabilityEvidence(context, modelType) deleteFieldsWithLog(params, context, "InputAudioProcessor", []string{"audio_url", "audioUrl", "reference_audio", "referenceAudio"}, "模型能力未开启输入音频,已移除音频参考快捷字段。", path, value) return true } type durationProcessor struct{} func (durationProcessor) Name() string { return "DurationProcessor" } func (durationProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool { return isVideoModelType(modelType) && params["duration"] != nil } func (durationProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool { capability := capabilityForType(context.modelCapability, modelType) if capability == nil { return true } duration := floatFromAny(params["duration"]) if duration <= 0 { return true } resolution := firstNonEmptyString(stringFromAny(params["resolution"]), context.resolution) modeKey := videoModeKey(params) if options := scopedNumberList(capability["duration_options"], resolution, modeKey); len(options) > 0 { normalized := closestNumber(duration, options) params["duration"] = normalized syncDurationSeconds(params) if normalized != duration { context.recordChange( "DurationProcessor", "adjust", "duration", duration, normalized, "duration 不在模型固定时长选项内,已调整为最近的允许值。", capabilityPath(modelType, "duration_options"), capability["duration_options"], ) } return true } if minValue, maxValue, ok := scopedRange(capability["duration_range"], resolution, modeKey); ok { step := durationStep(capability["duration_step"], resolution, modeKey) normalized := normalizeDurationByRange(duration, minValue, maxValue, step) params["duration"] = normalized syncDurationSeconds(params) if normalized != duration { context.recordChange( "DurationProcessor", "adjust", "duration", duration, normalized, "duration 超出模型时长范围或步进配置,已按能力配置归一。", capabilityPath(modelType, "duration_range"), map[string]any{ "duration_range": capability["duration_range"], "duration_step": capability["duration_step"], }, ) } } return true } type audioProcessor struct{} func (audioProcessor) Name() string { return "AudioProcessor" } func (audioProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool { return isVideoModelType(modelType) && (params["audio"] != nil || params["output_audio"] != nil) } func (audioProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool { capability := capabilityForType(context.modelCapability, modelType) if capability == nil || !boolFromAny(capability["output_audio"]) { for _, key := range []string{"audio", "output_audio"} { if before, ok := params[key]; ok { delete(params, key) context.recordChange( "AudioProcessor", "remove", key, before, nil, "模型能力未开启输出音频,已移除音频输出参数。", capabilityPath(modelType, "output_audio"), capabilityValue(context.modelCapability, modelType, "output_audio"), ) } } } return true } type imageCountProcessor struct{} func (imageCountProcessor) Name() string { return "ImageCountProcessor" } func (imageCountProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool { return modelType == "image_generate" || modelType == "image_edit" } func (imageCountProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool { capability := capabilityForType(context.modelCapability, modelType) if capability == nil || !boolFromAny(capability["output_multiple_images"]) { return true } maxCount := int(math.Round(floatFromAny(capability["output_max_images_count"]))) if maxCount <= 0 { return true } count := int(math.Round(floatFromAny(params["n"]))) if count <= 0 { count = int(math.Round(floatFromAny(params["batch_size"]))) } if count <= 0 { count = 1 } if count > maxCount { before := count count = maxCount context.recordChange( "ImageCountProcessor", "adjust", "n", before, count, "请求图片数量超过模型输出上限,已按 output_max_images_count 截断。", capabilityPath(modelType, "output_max_images_count"), capability["output_max_images_count"], ) } params["n"] = count return true } func ensureVideoContent(params map[string]any, context *paramProcessContext) { if len(contentItems(params["content"])) > 0 { return } content := make([]map[string]any, 0) if prompt := firstNonEmptyString(stringFromAny(params["prompt"]), stringFromAny(params["input"])); prompt != "" { content = append(content, map[string]any{"type": "text", "text": prompt}) } appendURL := func(kind string, role string, url string) { url = strings.TrimSpace(url) if url == "" { return } item := map[string]any{"type": kind, "role": role} switch kind { case "image_url": item["image_url"] = map[string]any{"url": url} case "video_url": item["video_url"] = map[string]any{"url": url} case "audio_url": item["audio_url"] = map[string]any{"url": url} } content = append(content, item) } firstFrame := firstNonEmptyStringValue(params, "first_frame", "firstFrame") appendURL("image_url", "first_frame", firstFrame) appendURL("image_url", "last_frame", firstNonEmptyStringValue(params, "last_frame", "lastFrame")) imageURLs := firstNonEmptyStringListFromAny(params["image"], params["images"], params["image_url"], params["imageUrl"], params["image_urls"], params["imageUrls"]) if firstFrame == "" && len(imageURLs) > 0 { appendURL("image_url", "first_frame", imageURLs[0]) imageURLs = imageURLs[1:] } for _, url := range imageURLs { appendURL("image_url", "reference_image", url) } for _, url := range firstNonEmptyStringListFromAny(params["reference_image"], params["referenceImage"]) { appendURL("image_url", "reference_image", url) } for _, url := range firstNonEmptyStringListFromAny(params["video"], params["video_url"], params["videoUrl"], params["reference_video"], params["referenceVideo"]) { appendURL("video_url", "reference_video", url) } for _, url := range firstNonEmptyStringListFromAny(params["audio_url"], params["audioUrl"], params["reference_audio"], params["referenceAudio"]) { appendURL("audio_url", "reference_audio", url) } if len(content) > 0 { params["content"] = mapsToAnySlice(content) context.recordChange( "ContentBuildProcessor", "set", "content", nil, params["content"], "将 prompt/first_frame/reference_* 等快捷字段转换为 content 数组,后续处理器可按模型能力逐项过滤。", "", nil, ) } } func effectiveModelCapability(candidate store.RuntimeModelCandidate) map[string]any { base := cloneMap(candidate.Capabilities) for key, value := range candidate.CapabilityOverride { if baseChild, ok := base[key].(map[string]any); ok { if overrideChild, ok := value.(map[string]any); ok { base[key] = mergeMap(baseChild, overrideChild) continue } } base[key] = cloneAny(value) } return base } func filterUnsupportedOmniVideoContent(content []map[string]any, context *paramProcessContext) []map[string]any { capability := omniVideoCapability(context) maxVideos := math.Inf(1) if capability != nil { if value, ok := numericField(capability, "max_videos"); ok { maxVideos = value } } maxAudios := 0.0 if capability != nil { if value, ok := numericField(capability, "max_audios"); ok { maxAudios = value } else if supportsOmniAudioReference(context) { maxAudios = math.Inf(1) } } videoCount := 0.0 audioCount := 0.0 out := make([]map[string]any, 0, len(content)) for index, item := range content { if isVideoContent(item) { if !supportsOmniVideoReference(item, capability) { path, value := omniCapabilityEvidence(context, "supported_modes") context.recordChange( "ContentFilterProcessor", "remove", fmt.Sprintf("content[%d]", index), item, nil, "视频参考类型不在 omni_video.supported_modes 允许范围内。", path, value, ) continue } if videoCount >= maxVideos { path, value := omniCapabilityEvidence(context, "max_videos") context.recordChange( "ContentFilterProcessor", "remove", fmt.Sprintf("content[%d]", index), item, nil, "视频参考数量超过 omni_video.max_videos 限制。", path, value, ) continue } videoCount++ out = append(out, item) continue } if isAudioContent(item) { if !supportsOmniAudioReference(context) { path, value := omniCapabilityEvidence(context, "input_audio") context.recordChange( "ContentFilterProcessor", "remove", fmt.Sprintf("content[%d]", index), item, nil, "模型能力不支持音频参考,已移除 audio_url。", path, mergeMetrics(map[string]any{"input_audio": value}, omniCapabilityBundle(context, "max_audios")), ) continue } if audioCount >= maxAudios { path, value := omniCapabilityEvidence(context, "max_audios") context.recordChange( "ContentFilterProcessor", "remove", fmt.Sprintf("content[%d]", index), item, nil, "音频参考数量超过 omni_video.max_audios 限制。", path, value, ) continue } audioCount++ out = append(out, item) continue } out = append(out, item) } return out } func isOmniVideoLike(context *paramProcessContext) bool { modelType := strings.TrimSpace(context.candidate.ModelType) return modelType == "omni_video" || modelType == "omni" || context.modelCapability["omni_video"] != nil || context.modelCapability["omni"] != nil } func omniVideoCapability(context *paramProcessContext) map[string]any { if capability := capabilityForType(context.modelCapability, "omni_video"); capability != nil { return capability } return capabilityForType(context.modelCapability, "omni") } func supportsOmniAudioReference(context *paramProcessContext) bool { capability := omniVideoCapability(context) return capability != nil && (boolFromAny(capability["input_audio"]) || floatFromAny(capability["max_audios"]) > 0) } func supportsOmniVideoReference(item map[string]any, capability map[string]any) bool { if capability == nil { return true } if value, ok := numericField(capability, "max_videos"); ok && value == 0 { return false } supportedModes := stringListFromAny(capability["supported_modes"]) supportsReference := containsString(supportedModes, "video_reference") supportsEdit := containsString(supportedModes, "video_edit") video, _ := item["video_url"].(map[string]any) referType := stringFromAny(video["refer_type"]) isEditVideo := stringFromAny(item["role"]) == "video_base" || referType == "base" isReferenceVideo := stringFromAny(item["role"]) == "video_feature" || stringFromAny(item["role"]) == "reference_video" || referType == "feature" if isEditVideo { return supportsEdit } if isReferenceVideo { return supportsReference } return supportsReference || supportsEdit } func downgradeReferenceImageIfNeeded(params map[string]any, content []map[string]any, modelType string, context *paramProcessContext) { if modelType != "image_to_video" && modelType != "video_generate" && modelType != "video_edit" && modelType != "omni_video" && modelType != "omni" { return } if supportsReferenceImage(context.modelCapability, modelType) { return } count := 0 for index, item := range content { if stringFromAny(item["type"]) == "image_url" && stringFromAny(item["role"]) == "reference_image" { before := cloneMap(item) item["role"] = "first_frame" context.recordChange( "ContentFilterProcessor", "adjust", fmt.Sprintf("content[%d].role", index), before, item, "模型不支持 reference_image,已降级为 first_frame。", capabilityPath(modelType, "input_reference_generate_single"), map[string]any{ "input_reference_generate_single": capabilityValue(context.modelCapability, modelType, "input_reference_generate_single"), "input_reference_generate_multiple": capabilityValue(context.modelCapability, modelType, "input_reference_generate_multiple"), "max_images": capabilityValue(context.modelCapability, modelType, "max_images"), }, ) count++ } } if count > 0 { appendParamWarning(params, "reference_image is unsupported by the selected model and was downgraded to first_frame") } } func supportsReferenceImage(modelCapability map[string]any, modelType string) bool { candidates := []map[string]any{} if capability := capabilityForType(modelCapability, modelType); capability != nil { candidates = append(candidates, capability) } if modelType != "image_to_video" { if capability := capabilityForType(modelCapability, "image_to_video"); capability != nil { candidates = append(candidates, capability) } } if len(candidates) == 0 { return true } for _, capability := range candidates { _, hasSingle := capability["input_reference_generate_single"] _, hasMultiple := capability["input_reference_generate_multiple"] if hasSingle || hasMultiple { if boolFromAny(capability["input_reference_generate_single"]) || boolFromAny(capability["input_reference_generate_multiple"]) { return true } continue } if value, ok := numericField(capability, "max_images"); ok { if value > 1 { return true } continue } } return false } func supportsFirstAndLastFrame(modelCapability map[string]any, modelType string) bool { capability := capabilityForType(modelCapability, modelType) if capability == nil { return false } return boolFromAny(capability["input_first_last_frame"]) || floatFromAny(capability["max_images_for_last_frame"]) > 0 } func validateAndAdjustAspectRatio(aspectRatio string, capability map[string]any, allowed []string) (string, bool) { if !isMediaModelTypeWithAspectRatio(capability) { return "", false } if ratioRange, ok := numberPair(capability["aspect_ratio_range"]); ok { ratio, valid := aspectRatioNumber(aspectRatio) if !valid || ratio < ratioRange[0] || ratio > ratioRange[1] { return adjustAspectRatioToRange(aspectRatio, ratioRange[0], ratioRange[1], allowed), true } } if allowed == nil { return aspectRatio, true } if len(allowed) == 0 { return "", false } if (aspectRatio == "adaptive" || aspectRatio == "keep_ratio") && !containsString(allowed, aspectRatio) { return "", false } if containsString(allowed, aspectRatio) { return aspectRatio, true } return allowed[0], true } func isMediaModelTypeWithAspectRatio(capability map[string]any) bool { return capability != nil } func aspectRatioAllowed(value any, resolution string) []string { switch typed := value.(type) { case []any: return stringListFromAny(typed) case []string: return typed case map[string]any: if resolution != "" { if values := stringListFromAny(typed[resolution]); len(values) > 0 { return values } } return nil default: return nil } } func scopedNumberList(value any, scopes ...string) []float64 { switch typed := value.(type) { case []any: out := make([]float64, 0, len(typed)) for _, item := range typed { if number := floatFromAny(item); number > 0 { out = append(out, number) } } return out case []float64: return typed case []int: out := make([]float64, 0, len(typed)) for _, item := range typed { out = append(out, float64(item)) } return out case map[string]any: for _, scope := range scopes { if scope == "" { continue } if values := scopedNumberList(typed[scope]); len(values) > 0 { return values } } for _, item := range typed { if values := scopedNumberList(item); len(values) > 0 { return values } } } return nil } func scopedRange(value any, scopes ...string) (float64, float64, bool) { if pair, ok := numberPair(value); ok { return pair[0], pair[1], true } if typed, ok := value.(map[string]any); ok { for _, scope := range scopes { if scope == "" { continue } if minValue, maxValue, ok := scopedRange(typed[scope]); ok { return minValue, maxValue, true } } for _, item := range typed { if minValue, maxValue, ok := scopedRange(item); ok { return minValue, maxValue, true } } } return 0, 0, false } func durationStep(value any, scopes ...string) float64 { if step := floatFromAny(value); step > 0 { return step } if typed, ok := value.(map[string]any); ok { for _, scope := range scopes { if scope == "" { continue } if step := durationStep(typed[scope]); step > 0 { return step } } for _, item := range typed { if step := durationStep(item); step > 0 { return step } } } return 0 } func normalizeDurationByRange(target float64, minValue float64, maxValue float64, step float64) float64 { clamped := math.Min(math.Max(target, minValue), maxValue) if step <= 0 { return clamped } snapped := math.Round((clamped-minValue)/step)*step + minValue return math.Round(snapped*1_000_000) / 1_000_000 } func closestNumber(target float64, values []float64) float64 { if len(values) == 0 { return target } closest := values[0] minDiff := math.Abs(target - closest) for _, value := range values[1:] { diff := math.Abs(target - value) if diff < minDiff { minDiff = diff closest = value } } return closest } func videoModeKey(params map[string]any) string { content := contentItems(params["content"]) hasFirstFrame := false hasLastFrame := false for _, item := range content { switch stringFromAny(item["role"]) { case "first_frame": hasFirstFrame = true case "last_frame": hasLastFrame = true } } switch { case hasFirstFrame && hasLastFrame: return "input_first_last_frame" case hasFirstFrame: return "input_first_frame" case hasLastFrame: return "input_last_frame" default: return "" } } func syncDurationSeconds(params map[string]any) { if params["duration_seconds"] != nil { params["duration_seconds"] = params["duration"] } } func syncVideoConvenienceFields(params map[string]any, content []map[string]any, context *paramProcessContext) { hasVideo := false hasAudio := false for _, item := range content { hasVideo = hasVideo || isVideoContent(item) hasAudio = hasAudio || isAudioContent(item) } if !hasVideo { path, value := omniCapabilityEvidence(context, "supported_modes") deleteFieldsWithLog(params, context, "ContentFilterProcessor", []string{"video", "video_url", "videoUrl", "reference_video", "referenceVideo"}, "对应视频 content 已被模型能力过滤,移除视频参考快捷字段。", path, value) } if !hasAudio { path, value := omniCapabilityEvidence(context, "input_audio") deleteFieldsWithLog(params, context, "ContentFilterProcessor", []string{"audio_url", "audioUrl", "reference_audio", "referenceAudio"}, "对应音频 content 已被模型能力过滤,移除音频参考快捷字段。", path, mergeMetrics(map[string]any{"input_audio": value}, omniCapabilityBundle(context, "max_audios"))) } } func deleteFieldsWithLog(params map[string]any, context *paramProcessContext, processor string, keys []string, reason string, capabilityPath string, capabilityValue any) { for _, key := range keys { if before, ok := params[key]; ok { delete(params, key) context.recordChange(processor, "remove", key, before, nil, reason, capabilityPath, capabilityValue) } } } func appendParamWarning(params map[string]any, warning string) { warnings, _ := params["_param_warnings"].([]any) for _, item := range warnings { if stringFromAny(item) == warning { return } } params["_param_warnings"] = append(warnings, warning) } func filterContent(content []map[string]any, keep func(map[string]any) bool) []map[string]any { out := make([]map[string]any, 0, len(content)) for _, item := range content { if keep(item) { out = append(out, item) } } return out } func contentItems(value any) []map[string]any { switch typed := value.(type) { case []any: out := make([]map[string]any, 0, len(typed)) for _, item := range typed { if object, ok := item.(map[string]any); ok { out = append(out, cloneMap(object)) } } return out case []map[string]any: out := make([]map[string]any, 0, len(typed)) for _, item := range typed { out = append(out, cloneMap(item)) } return out default: return nil } } func mapsToAnySlice(values []map[string]any) []any { out := make([]any, 0, len(values)) for _, value := range values { out = append(out, value) } return out } func isImageContent(item map[string]any) bool { return stringFromAny(item["type"]) == "image_url" || item["image_url"] != nil } func isVideoContent(item map[string]any) bool { return stringFromAny(item["type"]) == "video_url" || item["video_url"] != nil } func isAudioContent(item map[string]any) bool { return stringFromAny(item["type"]) == "audio_url" || item["audio_url"] != nil } func capabilityForType(capabilities map[string]any, modelType string) map[string]any { if capabilities == nil { return nil } if typed, ok := capabilities[modelType].(map[string]any); ok { return typed } return nil } func capabilityPath(modelType string, key string) string { modelType = strings.TrimSpace(modelType) if modelType == "" { return "" } if strings.TrimSpace(key) == "" { return "capabilities." + modelType } return "capabilities." + modelType + "." + key } func capabilityValue(capabilities map[string]any, modelType string, key string) any { capability := capabilityForType(capabilities, modelType) if capability == nil { return nil } return cloneAny(capability[key]) } func capabilityEvidence(capabilities map[string]any, modelType string, key string) (string, any) { return capabilityPath(modelType, key), capabilityValue(capabilities, modelType, key) } func audioInputCapabilityEvidence(context *paramProcessContext, modelType string) (string, any) { if isOmniVideoLike(context) { path, value := omniCapabilityEvidence(context, "input_audio") return path, mergeMetrics(map[string]any{"input_audio": value}, omniCapabilityBundle(context, "max_audios")) } return capabilityEvidence(context.modelCapability, modelType, "input_audio") } func omniCapabilityType(context *paramProcessContext) string { if context != nil && capabilityForType(context.modelCapability, "omni_video") != nil { return "omni_video" } if context != nil && capabilityForType(context.modelCapability, "omni") != nil { return "omni" } return "omni_video" } func omniCapabilityEvidence(context *paramProcessContext, key string) (string, any) { modelType := omniCapabilityType(context) var capabilities map[string]any if context != nil { capabilities = context.modelCapability } return capabilityPath(modelType, key), capabilityValue(capabilities, modelType, key) } func omniCapabilityBundle(context *paramProcessContext, keys ...string) map[string]any { modelType := omniCapabilityType(context) var capabilities map[string]any if context != nil { capabilities = context.modelCapability } out := map[string]any{} for _, key := range keys { out[key] = capabilityValue(capabilities, modelType, key) } return out } func numericField(values map[string]any, key string) (float64, bool) { if values == nil { return 0, false } if _, ok := values[key]; !ok { return 0, false } return floatFromAny(values[key]), true } func boolFromAny(value any) bool { typed, _ := value.(bool) return typed } func firstNonEmptyStringValue(values map[string]any, keys ...string) string { for _, key := range keys { if value := stringFromAny(values[key]); value != "" { return value } } return "" } func firstNonEmptyStringListFromAny(values ...any) []string { for _, value := range values { items := stringListFromAny(value) if len(items) > 0 { return items } } return nil } func stringListFromAny(value any) []string { switch typed := value.(type) { case []string: out := make([]string, 0, len(typed)) for _, item := range typed { if text := strings.TrimSpace(item); text != "" { out = append(out, text) } } return out case []any: out := make([]string, 0, len(typed)) for _, item := range typed { if text := stringFromAny(item); text != "" { out = append(out, text) } } return out case string: if strings.TrimSpace(typed) == "" { return nil } return []string{strings.TrimSpace(typed)} default: return nil } } func containsString(values []string, target string) bool { for _, value := range values { if value == target { return true } } return false } func appendUniqueString(values *[]string, value string) { value = strings.TrimSpace(value) if value == "" { return } for _, existing := range *values { if existing == value { return } } *values = append(*values, value) } func numberPair(value any) ([2]float64, bool) { switch typed := value.(type) { case []any: if len(typed) < 2 { return [2]float64{}, false } return [2]float64{floatFromAny(typed[0]), floatFromAny(typed[1])}, true case []float64: if len(typed) < 2 { return [2]float64{}, false } return [2]float64{typed[0], typed[1]}, true case []int: if len(typed) < 2 { return [2]float64{}, false } return [2]float64{float64(typed[0]), float64(typed[1])}, true default: return [2]float64{}, false } } func validAspectRatio(value string) bool { if value == "adaptive" || value == "keep_ratio" { return true } _, ok := aspectRatioNumber(value) return ok } func aspectRatioNumber(value string) (float64, bool) { parts := strings.Split(value, ":") if len(parts) != 2 { return 0, false } width := parsePositiveFloat(parts[0]) height := parsePositiveFloat(parts[1]) if width <= 0 || height <= 0 { return 0, false } return width / height, true } func adjustAspectRatioToRange(value string, minValue float64, maxValue float64, allowed []string) string { current, ok := aspectRatioNumber(value) if !ok { if len(allowed) > 0 { return allowed[0] } return "1:1" } if len(allowed) > 0 { closest := "" minDiff := math.Inf(1) for _, candidate := range allowed { ratio, ok := aspectRatioNumber(candidate) if !ok || ratio < minValue || ratio > maxValue { continue } diff := math.Abs(ratio - current) if diff < minDiff { minDiff = diff closest = candidate } } if closest != "" { return closest } } if current < minValue { return ratioString(minValue) } return ratioString(maxValue) } func ratioString(value float64) string { if value <= 0 { return "1:1" } return strings.TrimRight(strings.TrimRight(strconv.FormatFloat(value, 'f', 6, 64), "0"), ".") + ":1" } func parsePositiveFloat(value string) float64 { for _, r := range strings.TrimSpace(value) { if r < '0' || r > '9' { if r != '.' { return 0 } } } out, _ := strconv.ParseFloat(strings.TrimSpace(value), 64) return out } func isEmptyParamString(value string) bool { normalized := strings.ToLower(strings.TrimSpace(value)) return normalized == "null" || normalized == "undefined" } func isImageResolution(modelType string, value string) bool { return (modelType == "image_generate" || modelType == "image_edit") && containsString([]string{"1K", "2K", "4K", "8K"}, value) } func isVideoResolution(modelType string, value string) bool { return isVideoModelType(modelType) && containsString([]string{"480p", "720p", "1080p", "1440p", "2160p"}, value) } func isVideoModelType(modelType string) bool { return modelType == "video_generate" || modelType == "text_to_video" || modelType == "image_to_video" || modelType == "video_edit" || modelType == "omni_video" || modelType == "omni" } func cloneMap(values map[string]any) map[string]any { out := map[string]any{} for key, value := range values { out[key] = cloneAny(value) } return out } func cloneAny(value any) any { switch typed := value.(type) { case map[string]any: return cloneMap(typed) case []any: out := make([]any, 0, len(typed)) for _, item := range typed { out = append(out, cloneAny(item)) } return out case []map[string]any: out := make([]any, 0, len(typed)) for _, item := range typed { out = append(out, cloneMap(item)) } return out default: return value } }