Prefer public URLs and fall back to base64 for image inputs

This commit is contained in:
wangbo 2026-06-08 08:38:53 +08:00
parent 679bfeb9c9
commit ca5e71c8e8
2 changed files with 464 additions and 5 deletions

View File

@ -6,6 +6,7 @@ import (
"fmt"
"io"
"net/http"
"net/netip"
"net/url"
"os"
"path/filepath"
@ -62,11 +63,22 @@ func (s *Service) slimParameterPreprocessingLog(task store.GatewayTask, log para
type requestAssetHydrationStyle string
const (
requestAssetHydrateURL requestAssetHydrationStyle = "url"
requestAssetHydrateDataURL requestAssetHydrationStyle = "data_url"
requestAssetHydrateRawBase64 requestAssetHydrationStyle = "raw_base64"
requestAssetHydrateURL requestAssetHydrationStyle = "url"
requestAssetHydrateDataURL requestAssetHydrationStyle = "data_url"
requestAssetHydrateRawBase64 requestAssetHydrationStyle = "raw_base64"
requestAssetHydrateUnsupported requestAssetHydrationStyle = "unsupported"
)
type requestAssetInputFormatFlag struct {
Set bool
Value bool
}
type requestAssetInputFormatSupport struct {
URL requestAssetInputFormatFlag
Base64 requestAssetInputFormatFlag
}
func (s *Service) hydrateProviderRequestAssets(ctx context.Context, body map[string]any, candidate store.RuntimeModelCandidate) (map[string]any, error) {
value, err := s.hydrateProviderRequestAssetValue(ctx, body, nil, candidate)
if err != nil {
@ -104,6 +116,8 @@ func (s *Service) hydrateProviderRequestAssetValue(ctx context.Context, value an
next = append(next, hydrated)
}
return next, nil
case string:
return s.hydrateProviderRequestAssetString(ctx, typed, path, candidate)
default:
return value, nil
}
@ -114,7 +128,9 @@ func (s *Service) hydrateProviderRequestAssetRef(ctx context.Context, ref map[st
if err != nil {
return nil, err
}
switch requestAssetHydrationForField(path, candidate) {
switch requestAssetHydrationForField(path, asset, candidate) {
case requestAssetHydrateUnsupported:
return nil, requestAssetUnsupportedInputFormatError(path, asset)
case requestAssetHydrateDataURL:
payload, err := s.readRequestAssetBytes(ctx, asset)
if err != nil {
@ -138,6 +154,67 @@ func (s *Service) hydrateProviderRequestAssetRef(ctx context.Context, ref map[st
return asset.URL, nil
}
func (s *Service) hydrateProviderRequestAssetString(ctx context.Context, value string, path []string, candidate store.RuntimeModelCandidate) (any, error) {
raw := strings.TrimSpace(value)
if raw == "" || !imageInputFieldNeedsHydration(path) {
return value, nil
}
style, ok := requestAssetCapabilityHydrationForMedia("image", candidate, raw, "")
if !ok {
return value, nil
}
switch style {
case requestAssetHydrateUnsupported:
return nil, requestAssetUnsupportedInputFormatError(path, store.RequestAsset{URL: raw})
case requestAssetHydrateURL:
return value, nil
case requestAssetHydrateRawBase64:
return s.hydrateProviderRequestAssetStringBase64(ctx, raw, false)
case requestAssetHydrateDataURL:
return s.hydrateProviderRequestAssetStringBase64(ctx, raw, true)
default:
return value, nil
}
}
func (s *Service) hydrateProviderRequestAssetStringBase64(ctx context.Context, value string, withPrefix bool) (string, error) {
if strings.HasPrefix(value, "data:") {
if withPrefix {
return value, nil
}
return stripDataURLPrefix(value), nil
}
if requestAssetStringLooksURL(value) {
asset := store.RequestAsset{URL: value}
payload, err := s.readRequestAssetBytes(ctx, asset)
if err != nil {
return "", err
}
contentType := http.DetectContentType(payload)
if contentType == "" {
contentType = "application/octet-stream"
}
encoded := base64.StdEncoding.EncodeToString(payload)
if withPrefix {
return "data:" + contentType + ";base64," + encoded, nil
}
return encoded, nil
}
payload, err := decodeBase64Payload(value)
if err != nil {
return "", &clients.ClientError{Code: "request_asset_input_format_unsupported", Message: "image input must be a URL or base64 payload for the selected model", Retryable: false}
}
encoded := base64.StdEncoding.EncodeToString(payload)
if !withPrefix {
return encoded, nil
}
contentType := http.DetectContentType(payload)
if contentType == "" {
contentType = "application/octet-stream"
}
return "data:" + contentType + ";base64," + encoded, nil
}
func (s *Service) resolveRequestAsset(ctx context.Context, ref map[string]any) (store.RequestAsset, error) {
sha := stringFromAny(ref["sha256"])
contentType := stringFromAny(ref["contentType"])
@ -232,10 +309,15 @@ func (s *Service) localPathFromRequestAssetURL(value string) string {
return filepath.Join(storageDir, fileName)
}
func requestAssetHydrationForField(path []string, candidate store.RuntimeModelCandidate) requestAssetHydrationStyle {
func requestAssetHydrationForField(path []string, asset store.RequestAsset, candidate store.RuntimeModelCandidate) requestAssetHydrationStyle {
if providerFieldNeedsRawBase64(path) {
return requestAssetHydrateRawBase64
}
if requestAssetMediaKindForHydration(path, asset) == "image" {
if style, ok := requestAssetCapabilityHydrationForMedia("image", candidate, asset.URL, asset.StorageProvider); ok {
return style
}
}
if mediaURLFieldNeedsHydration(path) {
if style := configuredRequestAssetMediaURLHydration(candidate, requestAssetMediaURLKind(path)); style != "" {
return style
@ -247,6 +329,192 @@ func requestAssetHydrationForField(path []string, candidate store.RuntimeModelCa
return requestAssetHydrateURL
}
func requestAssetMediaKindForHydration(path []string, asset store.RequestAsset) string {
if mediaURLFieldNeedsHydration(path) {
return requestAssetMediaURLKind(path)
}
if imageInputFieldNeedsHydration(path) {
return "image"
}
return ""
}
func requestAssetCapabilityHydrationForMedia(kind string, candidate store.RuntimeModelCandidate, urlValue string, storageProvider string) (requestAssetHydrationStyle, bool) {
if kind != "image" {
return "", false
}
support := requestAssetInputFormatSupportForCandidate(candidate)
if !support.URL.Set && !support.Base64.Set {
return "", false
}
hasPublicURL := requestAssetURLIsPublic(storageProvider, urlValue)
if support.URL.Set && !support.URL.Value && support.Base64.Set && !support.Base64.Value {
return requestAssetHydrateUnsupported, true
}
if support.URL.Set && !support.URL.Value {
if support.Base64.Set && support.Base64.Value {
return requestAssetBase64CapabilityHydration(candidate), true
}
return requestAssetBase64CapabilityHydration(candidate), true
}
if support.Base64.Set && !support.Base64.Value {
if support.URL.Set && support.URL.Value {
if hasPublicURL {
return requestAssetHydrateURL, true
}
return requestAssetHydrateUnsupported, true
}
if hasPublicURL {
return requestAssetHydrateURL, true
}
return requestAssetHydrateUnsupported, true
}
if support.URL.Set && support.URL.Value && hasPublicURL {
return requestAssetHydrateURL, true
}
if support.URL.Set && support.URL.Value {
return requestAssetBase64CapabilityHydration(candidate), true
}
if support.Base64.Set && support.Base64.Value {
return requestAssetBase64CapabilityHydration(candidate), true
}
return "", false
}
func requestAssetBase64CapabilityHydration(candidate store.RuntimeModelCandidate) requestAssetHydrationStyle {
switch configuredRequestAssetMediaURLHydration(candidate, "image") {
case requestAssetHydrateRawBase64:
return requestAssetHydrateRawBase64
case requestAssetHydrateDataURL:
return requestAssetHydrateDataURL
default:
return requestAssetHydrateDataURL
}
}
func requestAssetInputFormatSupportForCandidate(candidate store.RuntimeModelCandidate) requestAssetInputFormatSupport {
support := requestAssetInputFormatSupport{}
for _, values := range requestAssetCandidateCapabilityMaps(candidate) {
mergeRequestAssetInputFormatSupport(&support, values)
if support.URL.Set || support.Base64.Set {
return support
}
}
mergeRequestAssetInputFormatSupport(&support, candidate.PlatformConfig)
return support
}
func requestAssetCandidateCapabilityMaps(candidate store.RuntimeModelCandidate) []map[string]any {
capabilities := effectiveModelCapability(candidate)
out := make([]map[string]any, 0)
seen := map[string]bool{}
for _, modelType := range []string{candidate.ModelType, "image_edit", "image_generate", "image_analysis", "image_to_video", "video_generate", "omni_video", "omni"} {
modelType = strings.TrimSpace(modelType)
if modelType == "" || seen[modelType] {
continue
}
seen[modelType] = true
if values := capabilityForType(capabilities, modelType); len(values) > 0 {
out = append(out, values)
}
}
if len(capabilities) > 0 {
out = append(out, capabilities)
}
return out
}
func mergeRequestAssetInputFormatSupport(support *requestAssetInputFormatSupport, values map[string]any) {
if values == nil {
return
}
if value, ok := requestAssetBoolSetting(values, "support_url_input", "supportUrlInput", "support_url", "supportUrl", "url_input", "urlInput", "supportURLInput"); ok {
support.URL = requestAssetInputFormatFlag{Set: true, Value: value}
}
if value, ok := requestAssetBoolSetting(values, "support_base64_input", "supportBase64Input", "support_base64", "supportBase64", "base64_input", "base64Input", "supportBase64"); ok {
support.Base64 = requestAssetInputFormatFlag{Set: true, Value: value}
}
}
func requestAssetBoolSetting(values map[string]any, keys ...string) (bool, bool) {
for _, key := range keys {
value, ok := values[key]
if !ok {
continue
}
switch typed := value.(type) {
case bool:
return typed, true
case string:
switch strings.ToLower(strings.TrimSpace(typed)) {
case "true", "1", "yes", "y", "on", "enabled", "enable":
return true, true
case "false", "0", "no", "n", "off", "disabled", "disable":
return false, true
}
}
}
return false, false
}
func imageInputFieldNeedsHydration(path []string) bool {
key, parent := requestAssetFieldPath(path)
switch key {
case "image", "images", "image_url", "imageurl", "image_urls", "imageurls", "mask",
"first_frame", "firstframe", "last_frame", "lastframe",
"reference_image", "referenceimage", "reference_images", "referenceimages",
"frontal_image_url", "frontalimageurl", "front_image_url", "frontimageurl",
"refer_images", "referimages":
return true
case "url":
switch parent {
case "image_url", "imageurl", "image_urls", "imageurls", "refer_images", "referimages":
return true
}
}
return false
}
func requestAssetStringLooksURL(value string) bool {
lower := strings.ToLower(strings.TrimSpace(value))
return strings.HasPrefix(lower, "http://") ||
strings.HasPrefix(lower, "https://") ||
strings.HasPrefix(lower, "/static/uploaded/")
}
func requestAssetURLIsPublic(storageProvider string, value string) bool {
if strings.EqualFold(strings.TrimSpace(storageProvider), "local_static") {
return false
}
raw := strings.TrimSpace(value)
if raw == "" {
return false
}
parsed, err := url.Parse(raw)
if err != nil {
return false
}
if !strings.EqualFold(parsed.Scheme, "http") && !strings.EqualFold(parsed.Scheme, "https") {
return false
}
return requestAssetHostIsPublic(parsed.Hostname())
}
func requestAssetHostIsPublic(host string) bool {
normalized := strings.ToLower(strings.Trim(strings.TrimSpace(host), "[]"))
if normalized == "" || normalized == "localhost" || strings.HasSuffix(normalized, ".local") {
return false
}
if addr, err := netip.ParseAddr(normalized); err == nil {
return !addr.IsPrivate() &&
!addr.IsLoopback() &&
!addr.IsLinkLocalUnicast() &&
!addr.IsLinkLocalMulticast() &&
!addr.IsUnspecified()
}
return strings.Contains(normalized, ".")
}
func mediaURLFieldNeedsHydration(path []string) bool {
key, parent := requestAssetFieldPath(path)
return key == "url" && (parent == "image_url" || parent == "audio_url" || parent == "video_url" || parent == "file_url")
@ -389,3 +657,15 @@ func requestAssetExpiredError(asset store.RequestAsset) error {
}
return &clients.ClientError{Code: "request_asset_expired", Message: message, Retryable: false}
}
func requestAssetUnsupportedInputFormatError(path []string, asset store.RequestAsset) error {
field := strings.Join(path, ".")
if field == "" {
field = "media"
}
message := "selected model does not support URL or base64 input for " + field
if asset.SHA256 != "" {
message += ": " + asset.SHA256
}
return &clients.ClientError{Code: "request_asset_input_format_unsupported", Message: message, Retryable: false}
}

View File

@ -4,6 +4,8 @@ import (
"context"
"encoding/base64"
"errors"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"testing"
@ -120,6 +122,183 @@ func TestHydrateProviderRequestAssetsConvertsVolcesImageURLAssetToDataURL(t *tes
}
}
func TestHydrateProviderRequestAssetsUsesImageCapabilityBase64ForTopLevelImageAsset(t *testing.T) {
storageDir := t.TempDir()
fileName := "gateway-request-asset-edit-image.png"
payload := []byte("edit image bytes")
if err := os.WriteFile(filepath.Join(storageDir, fileName), payload, 0o644); err != nil {
t.Fatalf("write request asset: %v", err)
}
service := &Service{cfg: config.Config{LocalUploadedStorageDir: storageDir}}
body := map[string]any{
"image": map[string]any{
"assetRef": map[string]any{
"sha256": "sha-edit-image",
"contentType": "image/png",
"url": "/static/uploaded/" + fileName,
"storageProvider": "local_static",
},
"url": "/static/uploaded/" + fileName,
},
}
hydrated, err := service.hydrateProviderRequestAssets(context.Background(), body, store.RuntimeModelCandidate{
ModelType: "image_edit",
Capabilities: map[string]any{
"image_edit": map[string]any{
"support_url_input": false,
"support_base64_input": true,
},
},
})
if err != nil {
t.Fatalf("hydrate request assets: %v", err)
}
if got, want := stringFromAny(hydrated["image"]), "data:image/png;base64,"+base64.StdEncoding.EncodeToString(payload); got != want {
t.Fatalf("unexpected hydrated image data url: got %q want %q", got, want)
}
}
func TestHydrateProviderRequestAssetsImageCapabilityOverridesProviderDataURLDefault(t *testing.T) {
service := &Service{}
body := map[string]any{
"messages": []any{
map[string]any{
"role": "user",
"content": []any{
map[string]any{
"type": "image_url",
"image_url": map[string]any{
"url": map[string]any{
"assetRef": map[string]any{
"sha256": "sha-url-only-image",
"contentType": "image/png",
"url": "https://cdn.example.com/request.png",
"storageProvider": "remote",
},
"url": "https://cdn.example.com/request.png",
},
},
},
},
},
},
}
hydrated, err := service.hydrateProviderRequestAssets(context.Background(), body, store.RuntimeModelCandidate{
Provider: "volces",
ModelType: "image_edit",
Capabilities: map[string]any{
"image_edit": map[string]any{
"support_url_input": true,
"support_base64_input": false,
},
},
})
if err != nil {
t.Fatalf("hydrate request assets: %v", err)
}
messages := hydrated["messages"].([]any)
message := messages[0].(map[string]any)
content := message["content"].([]any)
imagePart := content[0].(map[string]any)
imageURL := imagePart["image_url"].(map[string]any)
if got, want := stringFromAny(imageURL["url"]), "https://cdn.example.com/request.png"; got != want {
t.Fatalf("image capability should keep URL despite provider default, got %q want %q", got, want)
}
}
func TestHydrateProviderRequestAssetsUsesBase64ForLocalAssetEvenWhenModelSupportsURL(t *testing.T) {
storageDir := t.TempDir()
fileName := "gateway-request-asset-local-image.png"
payload := []byte("local image bytes")
if err := os.WriteFile(filepath.Join(storageDir, fileName), payload, 0o644); err != nil {
t.Fatalf("write request asset: %v", err)
}
service := &Service{cfg: config.Config{LocalUploadedStorageDir: storageDir}}
body := map[string]any{
"image": map[string]any{
"assetRef": map[string]any{
"sha256": "sha-local-image",
"contentType": "image/png",
"url": "/static/uploaded/" + fileName,
"storageProvider": "local_static",
},
"url": "/static/uploaded/" + fileName,
},
}
hydrated, err := service.hydrateProviderRequestAssets(context.Background(), body, store.RuntimeModelCandidate{
ModelType: "image_edit",
Capabilities: map[string]any{
"image_edit": map[string]any{
"support_url_input": true,
"support_base64_input": true,
},
},
})
if err != nil {
t.Fatalf("hydrate request assets: %v", err)
}
if got, want := stringFromAny(hydrated["image"]), "data:image/png;base64,"+base64.StdEncoding.EncodeToString(payload); got != want {
t.Fatalf("local asset should fall back to base64, got %q want %q", got, want)
}
}
func TestHydrateProviderRequestAssetsConvertsPlainImageURLWhenModelRequiresBase64(t *testing.T) {
payload := []byte{0x89, 'P', 'N', 'G', '\r', '\n', 0x1a, '\n', 0, 0, 0, 0}
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "image/png")
_, _ = w.Write(payload)
}))
defer server.Close()
service := &Service{}
body := map[string]any{"image": server.URL + "/source.png"}
hydrated, err := service.hydrateProviderRequestAssets(context.Background(), body, store.RuntimeModelCandidate{
ModelType: "image_edit",
Capabilities: map[string]any{
"image_edit": map[string]any{
"support_url_input": false,
"support_base64_input": true,
},
},
})
if err != nil {
t.Fatalf("hydrate request assets: %v", err)
}
if got, want := stringFromAny(hydrated["image"]), "data:image/png;base64,"+base64.StdEncoding.EncodeToString(payload); got != want {
t.Fatalf("plain image URL should be converted to data URL, got %q want %q", got, want)
}
}
func TestHydrateProviderRequestAssetsConvertsPrivateImageURLEvenWhenModelSupportsURL(t *testing.T) {
payload := []byte{0x89, 'P', 'N', 'G', '\r', '\n', 0x1a, '\n', 1, 2, 3, 4}
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "image/png")
_, _ = w.Write(payload)
}))
defer server.Close()
service := &Service{}
body := map[string]any{"image": server.URL + "/source.png"}
hydrated, err := service.hydrateProviderRequestAssets(context.Background(), body, store.RuntimeModelCandidate{
ModelType: "image_edit",
Capabilities: map[string]any{
"image_edit": map[string]any{
"support_url_input": true,
"support_base64_input": true,
},
},
})
if err != nil {
t.Fatalf("hydrate request assets: %v", err)
}
if got, want := stringFromAny(hydrated["image"]), "data:image/png;base64,"+base64.StdEncoding.EncodeToString(payload); got != want {
t.Fatalf("private image URL should fall back to data URL, got %q want %q", got, want)
}
}
func TestHydrateProviderRequestAssetsKeepsImageURLAssetAsURLForProviderURLDefault(t *testing.T) {
service := &Service{}
body := map[string]any{