补全图像尺寸预处理约束

This commit is contained in:
wangbo 2026-06-07 23:55:25 +08:00
parent 4d1a01ec71
commit b7500d81d1
4 changed files with 442 additions and 0 deletions

View File

@ -57,6 +57,7 @@ func NewParamProcessorChain() ParamProcessorChain {
processors: []paramProcessor{
resolutionNormalizeProcessor{},
aspectRatioProcessor{},
imageSizeProcessor{},
messageContentProcessor{},
contentFilterProcessor{},
inputAudioProcessor{},

View File

@ -172,6 +172,333 @@ func (aspectRatioProcessor) Process(params map[string]any, modelType string, con
return true
}
type imageSizeProcessor struct{}
func (imageSizeProcessor) Name() string { return "ImageSizeProcessor" }
func (imageSizeProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
if modelType != "image_generate" && modelType != "image_edit" {
return false
}
if _, _, ok := imageDimensionsFromParams(params); !ok {
return false
}
capability := capabilityForType(context.modelCapability, modelType)
return capability != nil && imageSizeCapabilityConfigured(capability)
}
func (imageSizeProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
capability := capabilityForType(context.modelCapability, modelType)
if capability == nil {
return true
}
width, height, ok := imageDimensionsFromParams(params)
if !ok {
return true
}
before := map[string]any{}
for _, key := range []string{"width", "height", "size", "resolution"} {
if value, exists := params[key]; exists {
before[key] = cloneAny(value)
}
}
width, height = constrainImageDimensions(width, height, capability)
params["width"] = width
params["height"] = height
resolution := normalizeImageResolutionForCapability(firstNonEmptyString(stringFromAny(params["resolution"]), context.resolution), width, height, capability)
if resolution != "" {
params["resolution"] = resolution
context.resolution = resolution
}
if stringFromAny(capability["size_param_format"]) == "resolution" && resolution != "" {
params["size"] = resolution
} else {
params["size"] = fmt.Sprintf("%dx%d", width, height)
}
after := map[string]any{
"width": params["width"],
"height": params["height"],
"size": params["size"],
}
if value := stringFromAny(params["resolution"]); value != "" {
after["resolution"] = value
}
if !imageSizeMapsEqual(before, after) {
path, value := imageSizeConstraintEvidence(modelType, capability)
context.recordChange(
"ImageSizeProcessor",
"adjust",
"size",
before,
after,
"图像宽高不符合模型尺寸限制,已按模型能力调整 width、height 和 size。",
path,
value,
)
}
return true
}
func imageDimensionsFromParams(params map[string]any) (int, int, bool) {
width := positiveIntegerFromAny(params["width"])
height := positiveIntegerFromAny(params["height"])
if width > 0 && height > 0 {
return width, height, true
}
return parsePixelSizeString(stringFromAny(params["size"]))
}
func imageSizeCapabilityConfigured(capability map[string]any) bool {
if capability == nil {
return false
}
if _, _, ok := imageOutputSizeRange(capability); ok {
return true
}
if _, _, ok := numberRangeFromAny(capability["width_height_range"]); ok {
return true
}
if _, _, ok := numberRangeFromAny(capability["aspect_ratio_range"]); ok {
return true
}
return positiveIntegerFromAny(capability["width_height_multiple"]) > 1
}
func constrainImageDimensions(width int, height int, capability map[string]any) (int, int) {
if width <= 0 || height <= 0 {
return width, height
}
result := imageDimensions{Width: width, Height: height}
for i := 0; i < 4; i++ {
before := result
applyImageOutputSizeRange(&result, capability)
applyImageWidthHeightRange(&result, capability)
applyImageAspectRatioRange(&result, capability)
applyImageWidthHeightMultiple(&result, capability)
if result == before {
break
}
}
return result.Width, result.Height
}
type imageDimensions struct {
Width int
Height int
}
func applyImageOutputSizeRange(result *imageDimensions, capability map[string]any) {
minValue, maxValue, ok := imageOutputSizeRange(capability)
if !ok || result.Width <= 0 || result.Height <= 0 {
return
}
pixels := float64(result.Width * result.Height)
if minValue > 0 && pixels < minValue {
scaleImageDimensions(result, math.Sqrt(minValue/pixels), true)
return
}
if maxValue > 0 && pixels > maxValue {
scaleImageDimensions(result, math.Sqrt(maxValue/pixels), false)
}
}
func applyImageWidthHeightRange(result *imageDimensions, capability map[string]any) {
minValue, maxValue, ok := numberRangeFromAny(capability["width_height_range"])
if !ok || result.Width <= 0 || result.Height <= 0 {
return
}
maxSide := float64(max(result.Width, result.Height))
minSide := float64(min(result.Width, result.Height))
if maxValue > 0 && maxSide > maxValue {
scaleImageDimensions(result, maxValue/maxSide, false)
return
}
if minValue > 0 && minSide < minValue {
scaleImageDimensions(result, minValue/minSide, true)
}
}
func applyImageAspectRatioRange(result *imageDimensions, capability map[string]any) {
minValue, maxValue, ok := numberRangeFromAny(capability["aspect_ratio_range"])
if !ok || result.Width <= 0 || result.Height <= 0 {
return
}
ratio := float64(result.Width) / float64(result.Height)
if maxValue > 0 && ratio > maxValue {
result.Height = max(1, int(math.Ceil(float64(result.Width)/maxValue)))
return
}
if minValue > 0 && ratio < minValue {
result.Width = max(1, int(math.Ceil(float64(result.Height)*minValue)))
}
}
func applyImageWidthHeightMultiple(result *imageDimensions, capability map[string]any) {
multiple := positiveIntegerFromAny(capability["width_height_multiple"])
if multiple <= 1 || result.Width <= 0 || result.Height <= 0 {
return
}
result.Width = roundImageDimensionToMultiple(result.Width, multiple)
result.Height = roundImageDimensionToMultiple(result.Height, multiple)
}
func imageOutputSizeRange(capability map[string]any) (float64, float64, bool) {
for _, key := range []string{"output_size_range", "outputSizeRange", "size_range", "sizeRange"} {
if minValue, maxValue, ok := numberRangeFromAny(capability[key]); ok {
return minValue, maxValue, true
}
}
minValue := firstPositiveNumber(capability, []string{"output_min_size", "outputMinSize", "min_output_size", "minOutputSize", "min_size", "minSize", "output_min_pixels", "outputMinPixels", "min_pixels", "minPixels"})
maxValue := firstPositiveNumber(capability, []string{"output_max_size", "outputMaxSize", "max_output_size", "maxOutputSize", "max_size", "maxSize", "output_max_pixels", "outputMaxPixels", "max_pixels", "maxPixels"})
return minValue, maxValue, minValue > 0 || maxValue > 0
}
func firstPositiveNumber(values map[string]any, keys []string) float64 {
for _, key := range keys {
if value := positiveFloatFromAny(values[key]); value > 0 {
return value
}
}
return 0
}
func numberRangeFromAny(value any) (float64, float64, bool) {
pair, ok := numberPair(value)
if !ok {
return 0, 0, false
}
minValue := pair[0]
maxValue := pair[1]
if minValue < 0 || maxValue < 0 {
return 0, 0, false
}
if minValue > maxValue && maxValue > 0 {
minValue, maxValue = maxValue, minValue
}
return minValue, maxValue, minValue > 0 || maxValue > 0
}
func scaleImageDimensions(result *imageDimensions, scale float64, roundUp bool) {
if scale <= 0 || math.IsNaN(scale) || math.IsInf(scale, 0) {
return
}
round := math.Floor
if roundUp {
round = math.Ceil
}
result.Width = max(1, int(round(float64(result.Width)*scale)))
result.Height = max(1, int(round(float64(result.Height)*scale)))
}
func roundImageDimensionToMultiple(value int, multiple int) int {
if multiple <= 1 {
return value
}
return max(multiple, int(math.Round(float64(value)/float64(multiple)))*multiple)
}
func normalizeImageResolutionForCapability(current string, width int, height int, capability map[string]any) string {
allowed := stringListFromAny(capability["output_resolutions"])
if containsString(allowed, current) {
return current
}
resolution := imageResolutionFromDimensions(width, height)
if len(allowed) == 0 {
return firstNonEmptyString(current, resolution)
}
if containsString(allowed, resolution) {
return resolution
}
return closestImageResolution(resolution, allowed)
}
func imageResolutionFromDimensions(width int, height int) string {
maxSide := max(width, height)
switch {
case maxSide <= 1920:
return "1K"
case maxSide <= 2560:
return "2K"
case maxSide <= 3328:
return "3K"
case maxSide <= 3840:
return "4K"
default:
return "8K"
}
}
func closestImageResolution(target string, allowed []string) string {
order := []string{"1K", "2K", "3K", "4K", "8K"}
targetIndex := indexOfString(order, target)
valid := make([]string, 0, len(allowed))
for _, value := range allowed {
if indexOfString(order, value) >= 0 {
valid = append(valid, value)
}
}
if len(valid) == 0 {
if len(allowed) > 0 {
return allowed[0]
}
return target
}
if targetIndex < 0 {
return valid[0]
}
for index := targetIndex; index >= 0; index-- {
if containsString(valid, order[index]) {
return order[index]
}
}
for _, value := range order[targetIndex+1:] {
if containsString(valid, value) {
return value
}
}
return valid[0]
}
func indexOfString(values []string, target string) int {
for index, value := range values {
if value == target {
return index
}
}
return -1
}
func imageSizeConstraintEvidence(modelType string, capability map[string]any) (string, any) {
for _, key := range []string{"output_size_range", "outputSizeRange", "output_min_size", "outputMinSize", "min_pixels", "minPixels", "output_max_size", "outputMaxSize"} {
if value, ok := capability[key]; ok {
return capabilityPath(modelType, key), cloneAny(value)
}
}
for _, key := range []string{"width_height_range", "aspect_ratio_range", "width_height_multiple"} {
if value, ok := capability[key]; ok {
return capabilityPath(modelType, key), cloneAny(value)
}
}
return capabilityPath(modelType, ""), cloneMap(capability)
}
func imageSizeMapsEqual(before map[string]any, after map[string]any) bool {
for key, value := range after {
if before[key] != value {
return false
}
}
for key := range before {
if _, ok := after[key]; !ok {
return false
}
}
return true
}
type inputAudioProcessor struct{}
func (inputAudioProcessor) Name() string { return "InputAudioProcessor" }

View File

@ -661,6 +661,80 @@ func TestParamProcessorImageResolutionAndOutputCount(t *testing.T) {
}
}
func TestParamProcessorImageSizeConstraintsNormalizeExplicitDimensions(t *testing.T) {
body := map[string]any{
"model": "doubao-5.0图像编辑",
"prompt": "draw",
"resolution": "2K",
"width": 1024,
"height": 1024,
"size": "1024x1024",
}
candidate := store.RuntimeModelCandidate{
ModelType: "image_generate",
Capabilities: map[string]any{
"image_generate": map[string]any{
"output_resolutions": []any{"2K", "3K"},
"output_size_range": []any{3686400, 10404496},
"aspect_ratio_range": []any{0.0625, 16},
},
},
}
result := preprocessRequestWithLog("images.generations", body, candidate)
if result.Body["width"] != 1920 || result.Body["height"] != 1920 {
t.Fatalf("explicit dimensions below model minimum should be scaled to 1920x1920, got %+v", result.Body)
}
if result.Body["size"] != "1920x1920" {
t.Fatalf("size should be synchronized with normalized width/height, got %+v", result.Body)
}
if result.Body["resolution"] != "2K" {
t.Fatalf("resolution should stay on allowed 2K, got %+v", result.Body)
}
for _, change := range result.Log.Changes {
if change.Processor == "ImageSizeProcessor" && change.CapabilityPath == "capabilities.image_generate.output_size_range" {
return
}
}
t.Fatalf("expected image size preprocessing log against output_size_range, got %+v", result.Log.Changes)
}
func TestParamProcessorImageSizeConstraintsNormalizeEditDimensions(t *testing.T) {
body := map[string]any{
"model": "gpt-image-2",
"prompt": "edit",
"image": "https://example.com/input.png",
"width": "513",
"height": "513",
}
candidate := store.RuntimeModelCandidate{
ModelType: "image_edit",
Capabilities: map[string]any{
"image_edit": map[string]any{
"aspect_ratio_allowed": []any{"1:1", "16:9"},
"aspect_ratio_range": []any{1.0 / 3.0, 3.0},
"output_size_range": []any{655360, 8294400},
"width_height_range": []any{1, 3840},
"width_height_multiple": 16,
"input_multiple_images": true,
},
},
}
processed := preprocessRequest("images.edits", body, candidate)
width := int(floatFromAny(processed["width"]))
height := int(floatFromAny(processed["height"]))
if width != 816 || height != 816 {
t.Fatalf("edit dimensions should scale up and align to 16px multiples, got %+v", processed)
}
if processed["size"] != "816x816" {
t.Fatalf("edit size should be synchronized with normalized dimensions, got %+v", processed)
}
if width*height < 655360 || width%16 != 0 || height%16 != 0 {
t.Fatalf("edit dimensions should satisfy model constraints, got %+v", processed)
}
}
func TestParamProcessorImageQualityControl(t *testing.T) {
body := map[string]any{
"model": "mock-image",

View File

@ -464,6 +464,46 @@ func parsePositiveFloat(value string) float64 {
return out
}
func positiveFloatFromAny(value any) float64 {
switch typed := value.(type) {
case int:
return float64(typed)
case int64:
return float64(typed)
case float64:
return typed
case string:
return parsePositiveFloat(typed)
default:
return 0
}
}
func positiveIntegerFromAny(value any) int {
number := positiveFloatFromAny(value)
if number <= 0 {
return 0
}
return int(math.Round(number))
}
func parsePixelSizeString(value string) (int, int, bool) {
value = strings.TrimSpace(strings.ToLower(value))
if value == "" || isEmptyParamString(value) {
return 0, 0, false
}
parts := strings.Split(value, "x")
if len(parts) != 2 {
return 0, 0, false
}
width := positiveIntegerFromAny(parts[0])
height := positiveIntegerFromAny(parts[1])
if width <= 0 || height <= 0 {
return 0, 0, false
}
return width, height, true
}
func isEmptyParamString(value string) bool {
normalized := strings.ToLower(strings.TrimSpace(value))
return normalized == "null" || normalized == "undefined"