补全图像尺寸预处理约束
This commit is contained in:
parent
4d1a01ec71
commit
b7500d81d1
@ -57,6 +57,7 @@ func NewParamProcessorChain() ParamProcessorChain {
|
||||
processors: []paramProcessor{
|
||||
resolutionNormalizeProcessor{},
|
||||
aspectRatioProcessor{},
|
||||
imageSizeProcessor{},
|
||||
messageContentProcessor{},
|
||||
contentFilterProcessor{},
|
||||
inputAudioProcessor{},
|
||||
|
||||
@ -172,6 +172,333 @@ func (aspectRatioProcessor) Process(params map[string]any, modelType string, con
|
||||
return true
|
||||
}
|
||||
|
||||
type imageSizeProcessor struct{}
|
||||
|
||||
func (imageSizeProcessor) Name() string { return "ImageSizeProcessor" }
|
||||
|
||||
func (imageSizeProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
|
||||
if modelType != "image_generate" && modelType != "image_edit" {
|
||||
return false
|
||||
}
|
||||
if _, _, ok := imageDimensionsFromParams(params); !ok {
|
||||
return false
|
||||
}
|
||||
capability := capabilityForType(context.modelCapability, modelType)
|
||||
return capability != nil && imageSizeCapabilityConfigured(capability)
|
||||
}
|
||||
|
||||
func (imageSizeProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
|
||||
capability := capabilityForType(context.modelCapability, modelType)
|
||||
if capability == nil {
|
||||
return true
|
||||
}
|
||||
width, height, ok := imageDimensionsFromParams(params)
|
||||
if !ok {
|
||||
return true
|
||||
}
|
||||
|
||||
before := map[string]any{}
|
||||
for _, key := range []string{"width", "height", "size", "resolution"} {
|
||||
if value, exists := params[key]; exists {
|
||||
before[key] = cloneAny(value)
|
||||
}
|
||||
}
|
||||
|
||||
width, height = constrainImageDimensions(width, height, capability)
|
||||
params["width"] = width
|
||||
params["height"] = height
|
||||
resolution := normalizeImageResolutionForCapability(firstNonEmptyString(stringFromAny(params["resolution"]), context.resolution), width, height, capability)
|
||||
if resolution != "" {
|
||||
params["resolution"] = resolution
|
||||
context.resolution = resolution
|
||||
}
|
||||
if stringFromAny(capability["size_param_format"]) == "resolution" && resolution != "" {
|
||||
params["size"] = resolution
|
||||
} else {
|
||||
params["size"] = fmt.Sprintf("%dx%d", width, height)
|
||||
}
|
||||
|
||||
after := map[string]any{
|
||||
"width": params["width"],
|
||||
"height": params["height"],
|
||||
"size": params["size"],
|
||||
}
|
||||
if value := stringFromAny(params["resolution"]); value != "" {
|
||||
after["resolution"] = value
|
||||
}
|
||||
if !imageSizeMapsEqual(before, after) {
|
||||
path, value := imageSizeConstraintEvidence(modelType, capability)
|
||||
context.recordChange(
|
||||
"ImageSizeProcessor",
|
||||
"adjust",
|
||||
"size",
|
||||
before,
|
||||
after,
|
||||
"图像宽高不符合模型尺寸限制,已按模型能力调整 width、height 和 size。",
|
||||
path,
|
||||
value,
|
||||
)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func imageDimensionsFromParams(params map[string]any) (int, int, bool) {
|
||||
width := positiveIntegerFromAny(params["width"])
|
||||
height := positiveIntegerFromAny(params["height"])
|
||||
if width > 0 && height > 0 {
|
||||
return width, height, true
|
||||
}
|
||||
return parsePixelSizeString(stringFromAny(params["size"]))
|
||||
}
|
||||
|
||||
func imageSizeCapabilityConfigured(capability map[string]any) bool {
|
||||
if capability == nil {
|
||||
return false
|
||||
}
|
||||
if _, _, ok := imageOutputSizeRange(capability); ok {
|
||||
return true
|
||||
}
|
||||
if _, _, ok := numberRangeFromAny(capability["width_height_range"]); ok {
|
||||
return true
|
||||
}
|
||||
if _, _, ok := numberRangeFromAny(capability["aspect_ratio_range"]); ok {
|
||||
return true
|
||||
}
|
||||
return positiveIntegerFromAny(capability["width_height_multiple"]) > 1
|
||||
}
|
||||
|
||||
func constrainImageDimensions(width int, height int, capability map[string]any) (int, int) {
|
||||
if width <= 0 || height <= 0 {
|
||||
return width, height
|
||||
}
|
||||
result := imageDimensions{Width: width, Height: height}
|
||||
for i := 0; i < 4; i++ {
|
||||
before := result
|
||||
applyImageOutputSizeRange(&result, capability)
|
||||
applyImageWidthHeightRange(&result, capability)
|
||||
applyImageAspectRatioRange(&result, capability)
|
||||
applyImageWidthHeightMultiple(&result, capability)
|
||||
if result == before {
|
||||
break
|
||||
}
|
||||
}
|
||||
return result.Width, result.Height
|
||||
}
|
||||
|
||||
type imageDimensions struct {
|
||||
Width int
|
||||
Height int
|
||||
}
|
||||
|
||||
func applyImageOutputSizeRange(result *imageDimensions, capability map[string]any) {
|
||||
minValue, maxValue, ok := imageOutputSizeRange(capability)
|
||||
if !ok || result.Width <= 0 || result.Height <= 0 {
|
||||
return
|
||||
}
|
||||
pixels := float64(result.Width * result.Height)
|
||||
if minValue > 0 && pixels < minValue {
|
||||
scaleImageDimensions(result, math.Sqrt(minValue/pixels), true)
|
||||
return
|
||||
}
|
||||
if maxValue > 0 && pixels > maxValue {
|
||||
scaleImageDimensions(result, math.Sqrt(maxValue/pixels), false)
|
||||
}
|
||||
}
|
||||
|
||||
func applyImageWidthHeightRange(result *imageDimensions, capability map[string]any) {
|
||||
minValue, maxValue, ok := numberRangeFromAny(capability["width_height_range"])
|
||||
if !ok || result.Width <= 0 || result.Height <= 0 {
|
||||
return
|
||||
}
|
||||
maxSide := float64(max(result.Width, result.Height))
|
||||
minSide := float64(min(result.Width, result.Height))
|
||||
if maxValue > 0 && maxSide > maxValue {
|
||||
scaleImageDimensions(result, maxValue/maxSide, false)
|
||||
return
|
||||
}
|
||||
if minValue > 0 && minSide < minValue {
|
||||
scaleImageDimensions(result, minValue/minSide, true)
|
||||
}
|
||||
}
|
||||
|
||||
func applyImageAspectRatioRange(result *imageDimensions, capability map[string]any) {
|
||||
minValue, maxValue, ok := numberRangeFromAny(capability["aspect_ratio_range"])
|
||||
if !ok || result.Width <= 0 || result.Height <= 0 {
|
||||
return
|
||||
}
|
||||
ratio := float64(result.Width) / float64(result.Height)
|
||||
if maxValue > 0 && ratio > maxValue {
|
||||
result.Height = max(1, int(math.Ceil(float64(result.Width)/maxValue)))
|
||||
return
|
||||
}
|
||||
if minValue > 0 && ratio < minValue {
|
||||
result.Width = max(1, int(math.Ceil(float64(result.Height)*minValue)))
|
||||
}
|
||||
}
|
||||
|
||||
func applyImageWidthHeightMultiple(result *imageDimensions, capability map[string]any) {
|
||||
multiple := positiveIntegerFromAny(capability["width_height_multiple"])
|
||||
if multiple <= 1 || result.Width <= 0 || result.Height <= 0 {
|
||||
return
|
||||
}
|
||||
result.Width = roundImageDimensionToMultiple(result.Width, multiple)
|
||||
result.Height = roundImageDimensionToMultiple(result.Height, multiple)
|
||||
}
|
||||
|
||||
func imageOutputSizeRange(capability map[string]any) (float64, float64, bool) {
|
||||
for _, key := range []string{"output_size_range", "outputSizeRange", "size_range", "sizeRange"} {
|
||||
if minValue, maxValue, ok := numberRangeFromAny(capability[key]); ok {
|
||||
return minValue, maxValue, true
|
||||
}
|
||||
}
|
||||
minValue := firstPositiveNumber(capability, []string{"output_min_size", "outputMinSize", "min_output_size", "minOutputSize", "min_size", "minSize", "output_min_pixels", "outputMinPixels", "min_pixels", "minPixels"})
|
||||
maxValue := firstPositiveNumber(capability, []string{"output_max_size", "outputMaxSize", "max_output_size", "maxOutputSize", "max_size", "maxSize", "output_max_pixels", "outputMaxPixels", "max_pixels", "maxPixels"})
|
||||
return minValue, maxValue, minValue > 0 || maxValue > 0
|
||||
}
|
||||
|
||||
func firstPositiveNumber(values map[string]any, keys []string) float64 {
|
||||
for _, key := range keys {
|
||||
if value := positiveFloatFromAny(values[key]); value > 0 {
|
||||
return value
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func numberRangeFromAny(value any) (float64, float64, bool) {
|
||||
pair, ok := numberPair(value)
|
||||
if !ok {
|
||||
return 0, 0, false
|
||||
}
|
||||
minValue := pair[0]
|
||||
maxValue := pair[1]
|
||||
if minValue < 0 || maxValue < 0 {
|
||||
return 0, 0, false
|
||||
}
|
||||
if minValue > maxValue && maxValue > 0 {
|
||||
minValue, maxValue = maxValue, minValue
|
||||
}
|
||||
return minValue, maxValue, minValue > 0 || maxValue > 0
|
||||
}
|
||||
|
||||
func scaleImageDimensions(result *imageDimensions, scale float64, roundUp bool) {
|
||||
if scale <= 0 || math.IsNaN(scale) || math.IsInf(scale, 0) {
|
||||
return
|
||||
}
|
||||
round := math.Floor
|
||||
if roundUp {
|
||||
round = math.Ceil
|
||||
}
|
||||
result.Width = max(1, int(round(float64(result.Width)*scale)))
|
||||
result.Height = max(1, int(round(float64(result.Height)*scale)))
|
||||
}
|
||||
|
||||
func roundImageDimensionToMultiple(value int, multiple int) int {
|
||||
if multiple <= 1 {
|
||||
return value
|
||||
}
|
||||
return max(multiple, int(math.Round(float64(value)/float64(multiple)))*multiple)
|
||||
}
|
||||
|
||||
func normalizeImageResolutionForCapability(current string, width int, height int, capability map[string]any) string {
|
||||
allowed := stringListFromAny(capability["output_resolutions"])
|
||||
if containsString(allowed, current) {
|
||||
return current
|
||||
}
|
||||
resolution := imageResolutionFromDimensions(width, height)
|
||||
if len(allowed) == 0 {
|
||||
return firstNonEmptyString(current, resolution)
|
||||
}
|
||||
if containsString(allowed, resolution) {
|
||||
return resolution
|
||||
}
|
||||
return closestImageResolution(resolution, allowed)
|
||||
}
|
||||
|
||||
func imageResolutionFromDimensions(width int, height int) string {
|
||||
maxSide := max(width, height)
|
||||
switch {
|
||||
case maxSide <= 1920:
|
||||
return "1K"
|
||||
case maxSide <= 2560:
|
||||
return "2K"
|
||||
case maxSide <= 3328:
|
||||
return "3K"
|
||||
case maxSide <= 3840:
|
||||
return "4K"
|
||||
default:
|
||||
return "8K"
|
||||
}
|
||||
}
|
||||
|
||||
func closestImageResolution(target string, allowed []string) string {
|
||||
order := []string{"1K", "2K", "3K", "4K", "8K"}
|
||||
targetIndex := indexOfString(order, target)
|
||||
valid := make([]string, 0, len(allowed))
|
||||
for _, value := range allowed {
|
||||
if indexOfString(order, value) >= 0 {
|
||||
valid = append(valid, value)
|
||||
}
|
||||
}
|
||||
if len(valid) == 0 {
|
||||
if len(allowed) > 0 {
|
||||
return allowed[0]
|
||||
}
|
||||
return target
|
||||
}
|
||||
if targetIndex < 0 {
|
||||
return valid[0]
|
||||
}
|
||||
for index := targetIndex; index >= 0; index-- {
|
||||
if containsString(valid, order[index]) {
|
||||
return order[index]
|
||||
}
|
||||
}
|
||||
for _, value := range order[targetIndex+1:] {
|
||||
if containsString(valid, value) {
|
||||
return value
|
||||
}
|
||||
}
|
||||
return valid[0]
|
||||
}
|
||||
|
||||
func indexOfString(values []string, target string) int {
|
||||
for index, value := range values {
|
||||
if value == target {
|
||||
return index
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
func imageSizeConstraintEvidence(modelType string, capability map[string]any) (string, any) {
|
||||
for _, key := range []string{"output_size_range", "outputSizeRange", "output_min_size", "outputMinSize", "min_pixels", "minPixels", "output_max_size", "outputMaxSize"} {
|
||||
if value, ok := capability[key]; ok {
|
||||
return capabilityPath(modelType, key), cloneAny(value)
|
||||
}
|
||||
}
|
||||
for _, key := range []string{"width_height_range", "aspect_ratio_range", "width_height_multiple"} {
|
||||
if value, ok := capability[key]; ok {
|
||||
return capabilityPath(modelType, key), cloneAny(value)
|
||||
}
|
||||
}
|
||||
return capabilityPath(modelType, ""), cloneMap(capability)
|
||||
}
|
||||
|
||||
func imageSizeMapsEqual(before map[string]any, after map[string]any) bool {
|
||||
for key, value := range after {
|
||||
if before[key] != value {
|
||||
return false
|
||||
}
|
||||
}
|
||||
for key := range before {
|
||||
if _, ok := after[key]; !ok {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
type inputAudioProcessor struct{}
|
||||
|
||||
func (inputAudioProcessor) Name() string { return "InputAudioProcessor" }
|
||||
|
||||
@ -661,6 +661,80 @@ func TestParamProcessorImageResolutionAndOutputCount(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestParamProcessorImageSizeConstraintsNormalizeExplicitDimensions(t *testing.T) {
|
||||
body := map[string]any{
|
||||
"model": "doubao-5.0图像编辑",
|
||||
"prompt": "draw",
|
||||
"resolution": "2K",
|
||||
"width": 1024,
|
||||
"height": 1024,
|
||||
"size": "1024x1024",
|
||||
}
|
||||
candidate := store.RuntimeModelCandidate{
|
||||
ModelType: "image_generate",
|
||||
Capabilities: map[string]any{
|
||||
"image_generate": map[string]any{
|
||||
"output_resolutions": []any{"2K", "3K"},
|
||||
"output_size_range": []any{3686400, 10404496},
|
||||
"aspect_ratio_range": []any{0.0625, 16},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
result := preprocessRequestWithLog("images.generations", body, candidate)
|
||||
if result.Body["width"] != 1920 || result.Body["height"] != 1920 {
|
||||
t.Fatalf("explicit dimensions below model minimum should be scaled to 1920x1920, got %+v", result.Body)
|
||||
}
|
||||
if result.Body["size"] != "1920x1920" {
|
||||
t.Fatalf("size should be synchronized with normalized width/height, got %+v", result.Body)
|
||||
}
|
||||
if result.Body["resolution"] != "2K" {
|
||||
t.Fatalf("resolution should stay on allowed 2K, got %+v", result.Body)
|
||||
}
|
||||
for _, change := range result.Log.Changes {
|
||||
if change.Processor == "ImageSizeProcessor" && change.CapabilityPath == "capabilities.image_generate.output_size_range" {
|
||||
return
|
||||
}
|
||||
}
|
||||
t.Fatalf("expected image size preprocessing log against output_size_range, got %+v", result.Log.Changes)
|
||||
}
|
||||
|
||||
func TestParamProcessorImageSizeConstraintsNormalizeEditDimensions(t *testing.T) {
|
||||
body := map[string]any{
|
||||
"model": "gpt-image-2",
|
||||
"prompt": "edit",
|
||||
"image": "https://example.com/input.png",
|
||||
"width": "513",
|
||||
"height": "513",
|
||||
}
|
||||
candidate := store.RuntimeModelCandidate{
|
||||
ModelType: "image_edit",
|
||||
Capabilities: map[string]any{
|
||||
"image_edit": map[string]any{
|
||||
"aspect_ratio_allowed": []any{"1:1", "16:9"},
|
||||
"aspect_ratio_range": []any{1.0 / 3.0, 3.0},
|
||||
"output_size_range": []any{655360, 8294400},
|
||||
"width_height_range": []any{1, 3840},
|
||||
"width_height_multiple": 16,
|
||||
"input_multiple_images": true,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
processed := preprocessRequest("images.edits", body, candidate)
|
||||
width := int(floatFromAny(processed["width"]))
|
||||
height := int(floatFromAny(processed["height"]))
|
||||
if width != 816 || height != 816 {
|
||||
t.Fatalf("edit dimensions should scale up and align to 16px multiples, got %+v", processed)
|
||||
}
|
||||
if processed["size"] != "816x816" {
|
||||
t.Fatalf("edit size should be synchronized with normalized dimensions, got %+v", processed)
|
||||
}
|
||||
if width*height < 655360 || width%16 != 0 || height%16 != 0 {
|
||||
t.Fatalf("edit dimensions should satisfy model constraints, got %+v", processed)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParamProcessorImageQualityControl(t *testing.T) {
|
||||
body := map[string]any{
|
||||
"model": "mock-image",
|
||||
|
||||
@ -464,6 +464,46 @@ func parsePositiveFloat(value string) float64 {
|
||||
return out
|
||||
}
|
||||
|
||||
func positiveFloatFromAny(value any) float64 {
|
||||
switch typed := value.(type) {
|
||||
case int:
|
||||
return float64(typed)
|
||||
case int64:
|
||||
return float64(typed)
|
||||
case float64:
|
||||
return typed
|
||||
case string:
|
||||
return parsePositiveFloat(typed)
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
func positiveIntegerFromAny(value any) int {
|
||||
number := positiveFloatFromAny(value)
|
||||
if number <= 0 {
|
||||
return 0
|
||||
}
|
||||
return int(math.Round(number))
|
||||
}
|
||||
|
||||
func parsePixelSizeString(value string) (int, int, bool) {
|
||||
value = strings.TrimSpace(strings.ToLower(value))
|
||||
if value == "" || isEmptyParamString(value) {
|
||||
return 0, 0, false
|
||||
}
|
||||
parts := strings.Split(value, "x")
|
||||
if len(parts) != 2 {
|
||||
return 0, 0, false
|
||||
}
|
||||
width := positiveIntegerFromAny(parts[0])
|
||||
height := positiveIntegerFromAny(parts[1])
|
||||
if width <= 0 || height <= 0 {
|
||||
return 0, 0, false
|
||||
}
|
||||
return width, height, true
|
||||
}
|
||||
|
||||
func isEmptyParamString(value string) bool {
|
||||
normalized := strings.ToLower(strings.TrimSpace(value))
|
||||
return normalized == "null" || normalized == "undefined"
|
||||
|
||||
Loading…
Reference in New Issue
Block a user