From b7500d81d15b66bfc06c9a723f4bf016c900c0f4 Mon Sep 17 00:00:00 2001 From: wangbo Date: Sun, 7 Jun 2026 23:55:25 +0800 Subject: [PATCH] =?UTF-8?q?=E8=A1=A5=E5=85=A8=E5=9B=BE=E5=83=8F=E5=B0=BA?= =?UTF-8?q?=E5=AF=B8=E9=A2=84=E5=A4=84=E7=90=86=E7=BA=A6=E6=9D=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- apps/api/internal/runner/param_processor.go | 1 + .../internal/runner/param_processor_media.go | 327 ++++++++++++++++++ .../internal/runner/param_processor_test.go | 74 ++++ .../internal/runner/param_processor_utils.go | 40 +++ 4 files changed, 442 insertions(+) diff --git a/apps/api/internal/runner/param_processor.go b/apps/api/internal/runner/param_processor.go index 24349b3..f1216bf 100644 --- a/apps/api/internal/runner/param_processor.go +++ b/apps/api/internal/runner/param_processor.go @@ -57,6 +57,7 @@ func NewParamProcessorChain() ParamProcessorChain { processors: []paramProcessor{ resolutionNormalizeProcessor{}, aspectRatioProcessor{}, + imageSizeProcessor{}, messageContentProcessor{}, contentFilterProcessor{}, inputAudioProcessor{}, diff --git a/apps/api/internal/runner/param_processor_media.go b/apps/api/internal/runner/param_processor_media.go index 7414098..8c576d3 100644 --- a/apps/api/internal/runner/param_processor_media.go +++ b/apps/api/internal/runner/param_processor_media.go @@ -172,6 +172,333 @@ func (aspectRatioProcessor) Process(params map[string]any, modelType string, con return true } +type imageSizeProcessor struct{} + +func (imageSizeProcessor) Name() string { return "ImageSizeProcessor" } + +func (imageSizeProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool { + if modelType != "image_generate" && modelType != "image_edit" { + return false + } + if _, _, ok := imageDimensionsFromParams(params); !ok { + return false + } + capability := capabilityForType(context.modelCapability, modelType) + return capability != nil && imageSizeCapabilityConfigured(capability) +} + +func (imageSizeProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool { + capability := capabilityForType(context.modelCapability, modelType) + if capability == nil { + return true + } + width, height, ok := imageDimensionsFromParams(params) + if !ok { + return true + } + + before := map[string]any{} + for _, key := range []string{"width", "height", "size", "resolution"} { + if value, exists := params[key]; exists { + before[key] = cloneAny(value) + } + } + + width, height = constrainImageDimensions(width, height, capability) + params["width"] = width + params["height"] = height + resolution := normalizeImageResolutionForCapability(firstNonEmptyString(stringFromAny(params["resolution"]), context.resolution), width, height, capability) + if resolution != "" { + params["resolution"] = resolution + context.resolution = resolution + } + if stringFromAny(capability["size_param_format"]) == "resolution" && resolution != "" { + params["size"] = resolution + } else { + params["size"] = fmt.Sprintf("%dx%d", width, height) + } + + after := map[string]any{ + "width": params["width"], + "height": params["height"], + "size": params["size"], + } + if value := stringFromAny(params["resolution"]); value != "" { + after["resolution"] = value + } + if !imageSizeMapsEqual(before, after) { + path, value := imageSizeConstraintEvidence(modelType, capability) + context.recordChange( + "ImageSizeProcessor", + "adjust", + "size", + before, + after, + "图像宽高不符合模型尺寸限制,已按模型能力调整 width、height 和 size。", + path, + value, + ) + } + return true +} + +func imageDimensionsFromParams(params map[string]any) (int, int, bool) { + width := positiveIntegerFromAny(params["width"]) + height := positiveIntegerFromAny(params["height"]) + if width > 0 && height > 0 { + return width, height, true + } + return parsePixelSizeString(stringFromAny(params["size"])) +} + +func imageSizeCapabilityConfigured(capability map[string]any) bool { + if capability == nil { + return false + } + if _, _, ok := imageOutputSizeRange(capability); ok { + return true + } + if _, _, ok := numberRangeFromAny(capability["width_height_range"]); ok { + return true + } + if _, _, ok := numberRangeFromAny(capability["aspect_ratio_range"]); ok { + return true + } + return positiveIntegerFromAny(capability["width_height_multiple"]) > 1 +} + +func constrainImageDimensions(width int, height int, capability map[string]any) (int, int) { + if width <= 0 || height <= 0 { + return width, height + } + result := imageDimensions{Width: width, Height: height} + for i := 0; i < 4; i++ { + before := result + applyImageOutputSizeRange(&result, capability) + applyImageWidthHeightRange(&result, capability) + applyImageAspectRatioRange(&result, capability) + applyImageWidthHeightMultiple(&result, capability) + if result == before { + break + } + } + return result.Width, result.Height +} + +type imageDimensions struct { + Width int + Height int +} + +func applyImageOutputSizeRange(result *imageDimensions, capability map[string]any) { + minValue, maxValue, ok := imageOutputSizeRange(capability) + if !ok || result.Width <= 0 || result.Height <= 0 { + return + } + pixels := float64(result.Width * result.Height) + if minValue > 0 && pixels < minValue { + scaleImageDimensions(result, math.Sqrt(minValue/pixels), true) + return + } + if maxValue > 0 && pixels > maxValue { + scaleImageDimensions(result, math.Sqrt(maxValue/pixels), false) + } +} + +func applyImageWidthHeightRange(result *imageDimensions, capability map[string]any) { + minValue, maxValue, ok := numberRangeFromAny(capability["width_height_range"]) + if !ok || result.Width <= 0 || result.Height <= 0 { + return + } + maxSide := float64(max(result.Width, result.Height)) + minSide := float64(min(result.Width, result.Height)) + if maxValue > 0 && maxSide > maxValue { + scaleImageDimensions(result, maxValue/maxSide, false) + return + } + if minValue > 0 && minSide < minValue { + scaleImageDimensions(result, minValue/minSide, true) + } +} + +func applyImageAspectRatioRange(result *imageDimensions, capability map[string]any) { + minValue, maxValue, ok := numberRangeFromAny(capability["aspect_ratio_range"]) + if !ok || result.Width <= 0 || result.Height <= 0 { + return + } + ratio := float64(result.Width) / float64(result.Height) + if maxValue > 0 && ratio > maxValue { + result.Height = max(1, int(math.Ceil(float64(result.Width)/maxValue))) + return + } + if minValue > 0 && ratio < minValue { + result.Width = max(1, int(math.Ceil(float64(result.Height)*minValue))) + } +} + +func applyImageWidthHeightMultiple(result *imageDimensions, capability map[string]any) { + multiple := positiveIntegerFromAny(capability["width_height_multiple"]) + if multiple <= 1 || result.Width <= 0 || result.Height <= 0 { + return + } + result.Width = roundImageDimensionToMultiple(result.Width, multiple) + result.Height = roundImageDimensionToMultiple(result.Height, multiple) +} + +func imageOutputSizeRange(capability map[string]any) (float64, float64, bool) { + for _, key := range []string{"output_size_range", "outputSizeRange", "size_range", "sizeRange"} { + if minValue, maxValue, ok := numberRangeFromAny(capability[key]); ok { + return minValue, maxValue, true + } + } + minValue := firstPositiveNumber(capability, []string{"output_min_size", "outputMinSize", "min_output_size", "minOutputSize", "min_size", "minSize", "output_min_pixels", "outputMinPixels", "min_pixels", "minPixels"}) + maxValue := firstPositiveNumber(capability, []string{"output_max_size", "outputMaxSize", "max_output_size", "maxOutputSize", "max_size", "maxSize", "output_max_pixels", "outputMaxPixels", "max_pixels", "maxPixels"}) + return minValue, maxValue, minValue > 0 || maxValue > 0 +} + +func firstPositiveNumber(values map[string]any, keys []string) float64 { + for _, key := range keys { + if value := positiveFloatFromAny(values[key]); value > 0 { + return value + } + } + return 0 +} + +func numberRangeFromAny(value any) (float64, float64, bool) { + pair, ok := numberPair(value) + if !ok { + return 0, 0, false + } + minValue := pair[0] + maxValue := pair[1] + if minValue < 0 || maxValue < 0 { + return 0, 0, false + } + if minValue > maxValue && maxValue > 0 { + minValue, maxValue = maxValue, minValue + } + return minValue, maxValue, minValue > 0 || maxValue > 0 +} + +func scaleImageDimensions(result *imageDimensions, scale float64, roundUp bool) { + if scale <= 0 || math.IsNaN(scale) || math.IsInf(scale, 0) { + return + } + round := math.Floor + if roundUp { + round = math.Ceil + } + result.Width = max(1, int(round(float64(result.Width)*scale))) + result.Height = max(1, int(round(float64(result.Height)*scale))) +} + +func roundImageDimensionToMultiple(value int, multiple int) int { + if multiple <= 1 { + return value + } + return max(multiple, int(math.Round(float64(value)/float64(multiple)))*multiple) +} + +func normalizeImageResolutionForCapability(current string, width int, height int, capability map[string]any) string { + allowed := stringListFromAny(capability["output_resolutions"]) + if containsString(allowed, current) { + return current + } + resolution := imageResolutionFromDimensions(width, height) + if len(allowed) == 0 { + return firstNonEmptyString(current, resolution) + } + if containsString(allowed, resolution) { + return resolution + } + return closestImageResolution(resolution, allowed) +} + +func imageResolutionFromDimensions(width int, height int) string { + maxSide := max(width, height) + switch { + case maxSide <= 1920: + return "1K" + case maxSide <= 2560: + return "2K" + case maxSide <= 3328: + return "3K" + case maxSide <= 3840: + return "4K" + default: + return "8K" + } +} + +func closestImageResolution(target string, allowed []string) string { + order := []string{"1K", "2K", "3K", "4K", "8K"} + targetIndex := indexOfString(order, target) + valid := make([]string, 0, len(allowed)) + for _, value := range allowed { + if indexOfString(order, value) >= 0 { + valid = append(valid, value) + } + } + if len(valid) == 0 { + if len(allowed) > 0 { + return allowed[0] + } + return target + } + if targetIndex < 0 { + return valid[0] + } + for index := targetIndex; index >= 0; index-- { + if containsString(valid, order[index]) { + return order[index] + } + } + for _, value := range order[targetIndex+1:] { + if containsString(valid, value) { + return value + } + } + return valid[0] +} + +func indexOfString(values []string, target string) int { + for index, value := range values { + if value == target { + return index + } + } + return -1 +} + +func imageSizeConstraintEvidence(modelType string, capability map[string]any) (string, any) { + for _, key := range []string{"output_size_range", "outputSizeRange", "output_min_size", "outputMinSize", "min_pixels", "minPixels", "output_max_size", "outputMaxSize"} { + if value, ok := capability[key]; ok { + return capabilityPath(modelType, key), cloneAny(value) + } + } + for _, key := range []string{"width_height_range", "aspect_ratio_range", "width_height_multiple"} { + if value, ok := capability[key]; ok { + return capabilityPath(modelType, key), cloneAny(value) + } + } + return capabilityPath(modelType, ""), cloneMap(capability) +} + +func imageSizeMapsEqual(before map[string]any, after map[string]any) bool { + for key, value := range after { + if before[key] != value { + return false + } + } + for key := range before { + if _, ok := after[key]; !ok { + return false + } + } + return true +} + type inputAudioProcessor struct{} func (inputAudioProcessor) Name() string { return "InputAudioProcessor" } diff --git a/apps/api/internal/runner/param_processor_test.go b/apps/api/internal/runner/param_processor_test.go index c0b7894..af105d1 100644 --- a/apps/api/internal/runner/param_processor_test.go +++ b/apps/api/internal/runner/param_processor_test.go @@ -661,6 +661,80 @@ func TestParamProcessorImageResolutionAndOutputCount(t *testing.T) { } } +func TestParamProcessorImageSizeConstraintsNormalizeExplicitDimensions(t *testing.T) { + body := map[string]any{ + "model": "doubao-5.0图像编辑", + "prompt": "draw", + "resolution": "2K", + "width": 1024, + "height": 1024, + "size": "1024x1024", + } + candidate := store.RuntimeModelCandidate{ + ModelType: "image_generate", + Capabilities: map[string]any{ + "image_generate": map[string]any{ + "output_resolutions": []any{"2K", "3K"}, + "output_size_range": []any{3686400, 10404496}, + "aspect_ratio_range": []any{0.0625, 16}, + }, + }, + } + + result := preprocessRequestWithLog("images.generations", body, candidate) + if result.Body["width"] != 1920 || result.Body["height"] != 1920 { + t.Fatalf("explicit dimensions below model minimum should be scaled to 1920x1920, got %+v", result.Body) + } + if result.Body["size"] != "1920x1920" { + t.Fatalf("size should be synchronized with normalized width/height, got %+v", result.Body) + } + if result.Body["resolution"] != "2K" { + t.Fatalf("resolution should stay on allowed 2K, got %+v", result.Body) + } + for _, change := range result.Log.Changes { + if change.Processor == "ImageSizeProcessor" && change.CapabilityPath == "capabilities.image_generate.output_size_range" { + return + } + } + t.Fatalf("expected image size preprocessing log against output_size_range, got %+v", result.Log.Changes) +} + +func TestParamProcessorImageSizeConstraintsNormalizeEditDimensions(t *testing.T) { + body := map[string]any{ + "model": "gpt-image-2", + "prompt": "edit", + "image": "https://example.com/input.png", + "width": "513", + "height": "513", + } + candidate := store.RuntimeModelCandidate{ + ModelType: "image_edit", + Capabilities: map[string]any{ + "image_edit": map[string]any{ + "aspect_ratio_allowed": []any{"1:1", "16:9"}, + "aspect_ratio_range": []any{1.0 / 3.0, 3.0}, + "output_size_range": []any{655360, 8294400}, + "width_height_range": []any{1, 3840}, + "width_height_multiple": 16, + "input_multiple_images": true, + }, + }, + } + + processed := preprocessRequest("images.edits", body, candidate) + width := int(floatFromAny(processed["width"])) + height := int(floatFromAny(processed["height"])) + if width != 816 || height != 816 { + t.Fatalf("edit dimensions should scale up and align to 16px multiples, got %+v", processed) + } + if processed["size"] != "816x816" { + t.Fatalf("edit size should be synchronized with normalized dimensions, got %+v", processed) + } + if width*height < 655360 || width%16 != 0 || height%16 != 0 { + t.Fatalf("edit dimensions should satisfy model constraints, got %+v", processed) + } +} + func TestParamProcessorImageQualityControl(t *testing.T) { body := map[string]any{ "model": "mock-image", diff --git a/apps/api/internal/runner/param_processor_utils.go b/apps/api/internal/runner/param_processor_utils.go index 7f9c2d2..ebfec31 100644 --- a/apps/api/internal/runner/param_processor_utils.go +++ b/apps/api/internal/runner/param_processor_utils.go @@ -464,6 +464,46 @@ func parsePositiveFloat(value string) float64 { return out } +func positiveFloatFromAny(value any) float64 { + switch typed := value.(type) { + case int: + return float64(typed) + case int64: + return float64(typed) + case float64: + return typed + case string: + return parsePositiveFloat(typed) + default: + return 0 + } +} + +func positiveIntegerFromAny(value any) int { + number := positiveFloatFromAny(value) + if number <= 0 { + return 0 + } + return int(math.Round(number)) +} + +func parsePixelSizeString(value string) (int, int, bool) { + value = strings.TrimSpace(strings.ToLower(value)) + if value == "" || isEmptyParamString(value) { + return 0, 0, false + } + parts := strings.Split(value, "x") + if len(parts) != 2 { + return 0, 0, false + } + width := positiveIntegerFromAny(parts[0]) + height := positiveIntegerFromAny(parts[1]) + if width <= 0 || height <= 0 { + return 0, 0, false + } + return width, height, true +} + func isEmptyParamString(value string) bool { normalized := strings.ToLower(strings.TrimSpace(value)) return normalized == "null" || normalized == "undefined"