From ca5e71c8e8a174669bff79d5576ab93b25daaa1c Mon Sep 17 00:00:00 2001 From: wangbo Date: Mon, 8 Jun 2026 08:38:53 +0800 Subject: [PATCH] Prefer public URLs and fall back to base64 for image inputs --- apps/api/internal/runner/request_assets.go | 290 +++++++++++++++++- .../internal/runner/request_assets_test.go | 179 +++++++++++ 2 files changed, 464 insertions(+), 5 deletions(-) diff --git a/apps/api/internal/runner/request_assets.go b/apps/api/internal/runner/request_assets.go index 5c9723b..8f29e49 100644 --- a/apps/api/internal/runner/request_assets.go +++ b/apps/api/internal/runner/request_assets.go @@ -6,6 +6,7 @@ import ( "fmt" "io" "net/http" + "net/netip" "net/url" "os" "path/filepath" @@ -62,11 +63,22 @@ func (s *Service) slimParameterPreprocessingLog(task store.GatewayTask, log para type requestAssetHydrationStyle string const ( - requestAssetHydrateURL requestAssetHydrationStyle = "url" - requestAssetHydrateDataURL requestAssetHydrationStyle = "data_url" - requestAssetHydrateRawBase64 requestAssetHydrationStyle = "raw_base64" + requestAssetHydrateURL requestAssetHydrationStyle = "url" + requestAssetHydrateDataURL requestAssetHydrationStyle = "data_url" + requestAssetHydrateRawBase64 requestAssetHydrationStyle = "raw_base64" + requestAssetHydrateUnsupported requestAssetHydrationStyle = "unsupported" ) +type requestAssetInputFormatFlag struct { + Set bool + Value bool +} + +type requestAssetInputFormatSupport struct { + URL requestAssetInputFormatFlag + Base64 requestAssetInputFormatFlag +} + func (s *Service) hydrateProviderRequestAssets(ctx context.Context, body map[string]any, candidate store.RuntimeModelCandidate) (map[string]any, error) { value, err := s.hydrateProviderRequestAssetValue(ctx, body, nil, candidate) if err != nil { @@ -104,6 +116,8 @@ func (s *Service) hydrateProviderRequestAssetValue(ctx context.Context, value an next = append(next, hydrated) } return next, nil + case string: + return s.hydrateProviderRequestAssetString(ctx, typed, path, candidate) default: return value, nil } @@ -114,7 +128,9 @@ func (s *Service) hydrateProviderRequestAssetRef(ctx context.Context, ref map[st if err != nil { return nil, err } - switch requestAssetHydrationForField(path, candidate) { + switch requestAssetHydrationForField(path, asset, candidate) { + case requestAssetHydrateUnsupported: + return nil, requestAssetUnsupportedInputFormatError(path, asset) case requestAssetHydrateDataURL: payload, err := s.readRequestAssetBytes(ctx, asset) if err != nil { @@ -138,6 +154,67 @@ func (s *Service) hydrateProviderRequestAssetRef(ctx context.Context, ref map[st return asset.URL, nil } +func (s *Service) hydrateProviderRequestAssetString(ctx context.Context, value string, path []string, candidate store.RuntimeModelCandidate) (any, error) { + raw := strings.TrimSpace(value) + if raw == "" || !imageInputFieldNeedsHydration(path) { + return value, nil + } + style, ok := requestAssetCapabilityHydrationForMedia("image", candidate, raw, "") + if !ok { + return value, nil + } + switch style { + case requestAssetHydrateUnsupported: + return nil, requestAssetUnsupportedInputFormatError(path, store.RequestAsset{URL: raw}) + case requestAssetHydrateURL: + return value, nil + case requestAssetHydrateRawBase64: + return s.hydrateProviderRequestAssetStringBase64(ctx, raw, false) + case requestAssetHydrateDataURL: + return s.hydrateProviderRequestAssetStringBase64(ctx, raw, true) + default: + return value, nil + } +} + +func (s *Service) hydrateProviderRequestAssetStringBase64(ctx context.Context, value string, withPrefix bool) (string, error) { + if strings.HasPrefix(value, "data:") { + if withPrefix { + return value, nil + } + return stripDataURLPrefix(value), nil + } + if requestAssetStringLooksURL(value) { + asset := store.RequestAsset{URL: value} + payload, err := s.readRequestAssetBytes(ctx, asset) + if err != nil { + return "", err + } + contentType := http.DetectContentType(payload) + if contentType == "" { + contentType = "application/octet-stream" + } + encoded := base64.StdEncoding.EncodeToString(payload) + if withPrefix { + return "data:" + contentType + ";base64," + encoded, nil + } + return encoded, nil + } + payload, err := decodeBase64Payload(value) + if err != nil { + return "", &clients.ClientError{Code: "request_asset_input_format_unsupported", Message: "image input must be a URL or base64 payload for the selected model", Retryable: false} + } + encoded := base64.StdEncoding.EncodeToString(payload) + if !withPrefix { + return encoded, nil + } + contentType := http.DetectContentType(payload) + if contentType == "" { + contentType = "application/octet-stream" + } + return "data:" + contentType + ";base64," + encoded, nil +} + func (s *Service) resolveRequestAsset(ctx context.Context, ref map[string]any) (store.RequestAsset, error) { sha := stringFromAny(ref["sha256"]) contentType := stringFromAny(ref["contentType"]) @@ -232,10 +309,15 @@ func (s *Service) localPathFromRequestAssetURL(value string) string { return filepath.Join(storageDir, fileName) } -func requestAssetHydrationForField(path []string, candidate store.RuntimeModelCandidate) requestAssetHydrationStyle { +func requestAssetHydrationForField(path []string, asset store.RequestAsset, candidate store.RuntimeModelCandidate) requestAssetHydrationStyle { if providerFieldNeedsRawBase64(path) { return requestAssetHydrateRawBase64 } + if requestAssetMediaKindForHydration(path, asset) == "image" { + if style, ok := requestAssetCapabilityHydrationForMedia("image", candidate, asset.URL, asset.StorageProvider); ok { + return style + } + } if mediaURLFieldNeedsHydration(path) { if style := configuredRequestAssetMediaURLHydration(candidate, requestAssetMediaURLKind(path)); style != "" { return style @@ -247,6 +329,192 @@ func requestAssetHydrationForField(path []string, candidate store.RuntimeModelCa return requestAssetHydrateURL } +func requestAssetMediaKindForHydration(path []string, asset store.RequestAsset) string { + if mediaURLFieldNeedsHydration(path) { + return requestAssetMediaURLKind(path) + } + if imageInputFieldNeedsHydration(path) { + return "image" + } + return "" +} + +func requestAssetCapabilityHydrationForMedia(kind string, candidate store.RuntimeModelCandidate, urlValue string, storageProvider string) (requestAssetHydrationStyle, bool) { + if kind != "image" { + return "", false + } + support := requestAssetInputFormatSupportForCandidate(candidate) + if !support.URL.Set && !support.Base64.Set { + return "", false + } + hasPublicURL := requestAssetURLIsPublic(storageProvider, urlValue) + if support.URL.Set && !support.URL.Value && support.Base64.Set && !support.Base64.Value { + return requestAssetHydrateUnsupported, true + } + if support.URL.Set && !support.URL.Value { + if support.Base64.Set && support.Base64.Value { + return requestAssetBase64CapabilityHydration(candidate), true + } + return requestAssetBase64CapabilityHydration(candidate), true + } + if support.Base64.Set && !support.Base64.Value { + if support.URL.Set && support.URL.Value { + if hasPublicURL { + return requestAssetHydrateURL, true + } + return requestAssetHydrateUnsupported, true + } + if hasPublicURL { + return requestAssetHydrateURL, true + } + return requestAssetHydrateUnsupported, true + } + if support.URL.Set && support.URL.Value && hasPublicURL { + return requestAssetHydrateURL, true + } + if support.URL.Set && support.URL.Value { + return requestAssetBase64CapabilityHydration(candidate), true + } + if support.Base64.Set && support.Base64.Value { + return requestAssetBase64CapabilityHydration(candidate), true + } + return "", false +} + +func requestAssetBase64CapabilityHydration(candidate store.RuntimeModelCandidate) requestAssetHydrationStyle { + switch configuredRequestAssetMediaURLHydration(candidate, "image") { + case requestAssetHydrateRawBase64: + return requestAssetHydrateRawBase64 + case requestAssetHydrateDataURL: + return requestAssetHydrateDataURL + default: + return requestAssetHydrateDataURL + } +} + +func requestAssetInputFormatSupportForCandidate(candidate store.RuntimeModelCandidate) requestAssetInputFormatSupport { + support := requestAssetInputFormatSupport{} + for _, values := range requestAssetCandidateCapabilityMaps(candidate) { + mergeRequestAssetInputFormatSupport(&support, values) + if support.URL.Set || support.Base64.Set { + return support + } + } + mergeRequestAssetInputFormatSupport(&support, candidate.PlatformConfig) + return support +} + +func requestAssetCandidateCapabilityMaps(candidate store.RuntimeModelCandidate) []map[string]any { + capabilities := effectiveModelCapability(candidate) + out := make([]map[string]any, 0) + seen := map[string]bool{} + for _, modelType := range []string{candidate.ModelType, "image_edit", "image_generate", "image_analysis", "image_to_video", "video_generate", "omni_video", "omni"} { + modelType = strings.TrimSpace(modelType) + if modelType == "" || seen[modelType] { + continue + } + seen[modelType] = true + if values := capabilityForType(capabilities, modelType); len(values) > 0 { + out = append(out, values) + } + } + if len(capabilities) > 0 { + out = append(out, capabilities) + } + return out +} + +func mergeRequestAssetInputFormatSupport(support *requestAssetInputFormatSupport, values map[string]any) { + if values == nil { + return + } + if value, ok := requestAssetBoolSetting(values, "support_url_input", "supportUrlInput", "support_url", "supportUrl", "url_input", "urlInput", "supportURLInput"); ok { + support.URL = requestAssetInputFormatFlag{Set: true, Value: value} + } + if value, ok := requestAssetBoolSetting(values, "support_base64_input", "supportBase64Input", "support_base64", "supportBase64", "base64_input", "base64Input", "supportBase64"); ok { + support.Base64 = requestAssetInputFormatFlag{Set: true, Value: value} + } +} + +func requestAssetBoolSetting(values map[string]any, keys ...string) (bool, bool) { + for _, key := range keys { + value, ok := values[key] + if !ok { + continue + } + switch typed := value.(type) { + case bool: + return typed, true + case string: + switch strings.ToLower(strings.TrimSpace(typed)) { + case "true", "1", "yes", "y", "on", "enabled", "enable": + return true, true + case "false", "0", "no", "n", "off", "disabled", "disable": + return false, true + } + } + } + return false, false +} + +func imageInputFieldNeedsHydration(path []string) bool { + key, parent := requestAssetFieldPath(path) + switch key { + case "image", "images", "image_url", "imageurl", "image_urls", "imageurls", "mask", + "first_frame", "firstframe", "last_frame", "lastframe", + "reference_image", "referenceimage", "reference_images", "referenceimages", + "frontal_image_url", "frontalimageurl", "front_image_url", "frontimageurl", + "refer_images", "referimages": + return true + case "url": + switch parent { + case "image_url", "imageurl", "image_urls", "imageurls", "refer_images", "referimages": + return true + } + } + return false +} + +func requestAssetStringLooksURL(value string) bool { + lower := strings.ToLower(strings.TrimSpace(value)) + return strings.HasPrefix(lower, "http://") || + strings.HasPrefix(lower, "https://") || + strings.HasPrefix(lower, "/static/uploaded/") +} + +func requestAssetURLIsPublic(storageProvider string, value string) bool { + if strings.EqualFold(strings.TrimSpace(storageProvider), "local_static") { + return false + } + raw := strings.TrimSpace(value) + if raw == "" { + return false + } + parsed, err := url.Parse(raw) + if err != nil { + return false + } + if !strings.EqualFold(parsed.Scheme, "http") && !strings.EqualFold(parsed.Scheme, "https") { + return false + } + return requestAssetHostIsPublic(parsed.Hostname()) +} + +func requestAssetHostIsPublic(host string) bool { + normalized := strings.ToLower(strings.Trim(strings.TrimSpace(host), "[]")) + if normalized == "" || normalized == "localhost" || strings.HasSuffix(normalized, ".local") { + return false + } + if addr, err := netip.ParseAddr(normalized); err == nil { + return !addr.IsPrivate() && + !addr.IsLoopback() && + !addr.IsLinkLocalUnicast() && + !addr.IsLinkLocalMulticast() && + !addr.IsUnspecified() + } + return strings.Contains(normalized, ".") +} + func mediaURLFieldNeedsHydration(path []string) bool { key, parent := requestAssetFieldPath(path) return key == "url" && (parent == "image_url" || parent == "audio_url" || parent == "video_url" || parent == "file_url") @@ -389,3 +657,15 @@ func requestAssetExpiredError(asset store.RequestAsset) error { } return &clients.ClientError{Code: "request_asset_expired", Message: message, Retryable: false} } + +func requestAssetUnsupportedInputFormatError(path []string, asset store.RequestAsset) error { + field := strings.Join(path, ".") + if field == "" { + field = "media" + } + message := "selected model does not support URL or base64 input for " + field + if asset.SHA256 != "" { + message += ": " + asset.SHA256 + } + return &clients.ClientError{Code: "request_asset_input_format_unsupported", Message: message, Retryable: false} +} diff --git a/apps/api/internal/runner/request_assets_test.go b/apps/api/internal/runner/request_assets_test.go index ae138ad..ea7b4ce 100644 --- a/apps/api/internal/runner/request_assets_test.go +++ b/apps/api/internal/runner/request_assets_test.go @@ -4,6 +4,8 @@ import ( "context" "encoding/base64" "errors" + "net/http" + "net/http/httptest" "os" "path/filepath" "testing" @@ -120,6 +122,183 @@ func TestHydrateProviderRequestAssetsConvertsVolcesImageURLAssetToDataURL(t *tes } } +func TestHydrateProviderRequestAssetsUsesImageCapabilityBase64ForTopLevelImageAsset(t *testing.T) { + storageDir := t.TempDir() + fileName := "gateway-request-asset-edit-image.png" + payload := []byte("edit image bytes") + if err := os.WriteFile(filepath.Join(storageDir, fileName), payload, 0o644); err != nil { + t.Fatalf("write request asset: %v", err) + } + service := &Service{cfg: config.Config{LocalUploadedStorageDir: storageDir}} + body := map[string]any{ + "image": map[string]any{ + "assetRef": map[string]any{ + "sha256": "sha-edit-image", + "contentType": "image/png", + "url": "/static/uploaded/" + fileName, + "storageProvider": "local_static", + }, + "url": "/static/uploaded/" + fileName, + }, + } + + hydrated, err := service.hydrateProviderRequestAssets(context.Background(), body, store.RuntimeModelCandidate{ + ModelType: "image_edit", + Capabilities: map[string]any{ + "image_edit": map[string]any{ + "support_url_input": false, + "support_base64_input": true, + }, + }, + }) + if err != nil { + t.Fatalf("hydrate request assets: %v", err) + } + if got, want := stringFromAny(hydrated["image"]), "data:image/png;base64,"+base64.StdEncoding.EncodeToString(payload); got != want { + t.Fatalf("unexpected hydrated image data url: got %q want %q", got, want) + } +} + +func TestHydrateProviderRequestAssetsImageCapabilityOverridesProviderDataURLDefault(t *testing.T) { + service := &Service{} + body := map[string]any{ + "messages": []any{ + map[string]any{ + "role": "user", + "content": []any{ + map[string]any{ + "type": "image_url", + "image_url": map[string]any{ + "url": map[string]any{ + "assetRef": map[string]any{ + "sha256": "sha-url-only-image", + "contentType": "image/png", + "url": "https://cdn.example.com/request.png", + "storageProvider": "remote", + }, + "url": "https://cdn.example.com/request.png", + }, + }, + }, + }, + }, + }, + } + + hydrated, err := service.hydrateProviderRequestAssets(context.Background(), body, store.RuntimeModelCandidate{ + Provider: "volces", + ModelType: "image_edit", + Capabilities: map[string]any{ + "image_edit": map[string]any{ + "support_url_input": true, + "support_base64_input": false, + }, + }, + }) + if err != nil { + t.Fatalf("hydrate request assets: %v", err) + } + messages := hydrated["messages"].([]any) + message := messages[0].(map[string]any) + content := message["content"].([]any) + imagePart := content[0].(map[string]any) + imageURL := imagePart["image_url"].(map[string]any) + if got, want := stringFromAny(imageURL["url"]), "https://cdn.example.com/request.png"; got != want { + t.Fatalf("image capability should keep URL despite provider default, got %q want %q", got, want) + } +} + +func TestHydrateProviderRequestAssetsUsesBase64ForLocalAssetEvenWhenModelSupportsURL(t *testing.T) { + storageDir := t.TempDir() + fileName := "gateway-request-asset-local-image.png" + payload := []byte("local image bytes") + if err := os.WriteFile(filepath.Join(storageDir, fileName), payload, 0o644); err != nil { + t.Fatalf("write request asset: %v", err) + } + service := &Service{cfg: config.Config{LocalUploadedStorageDir: storageDir}} + body := map[string]any{ + "image": map[string]any{ + "assetRef": map[string]any{ + "sha256": "sha-local-image", + "contentType": "image/png", + "url": "/static/uploaded/" + fileName, + "storageProvider": "local_static", + }, + "url": "/static/uploaded/" + fileName, + }, + } + + hydrated, err := service.hydrateProviderRequestAssets(context.Background(), body, store.RuntimeModelCandidate{ + ModelType: "image_edit", + Capabilities: map[string]any{ + "image_edit": map[string]any{ + "support_url_input": true, + "support_base64_input": true, + }, + }, + }) + if err != nil { + t.Fatalf("hydrate request assets: %v", err) + } + if got, want := stringFromAny(hydrated["image"]), "data:image/png;base64,"+base64.StdEncoding.EncodeToString(payload); got != want { + t.Fatalf("local asset should fall back to base64, got %q want %q", got, want) + } +} + +func TestHydrateProviderRequestAssetsConvertsPlainImageURLWhenModelRequiresBase64(t *testing.T) { + payload := []byte{0x89, 'P', 'N', 'G', '\r', '\n', 0x1a, '\n', 0, 0, 0, 0} + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "image/png") + _, _ = w.Write(payload) + })) + defer server.Close() + service := &Service{} + body := map[string]any{"image": server.URL + "/source.png"} + + hydrated, err := service.hydrateProviderRequestAssets(context.Background(), body, store.RuntimeModelCandidate{ + ModelType: "image_edit", + Capabilities: map[string]any{ + "image_edit": map[string]any{ + "support_url_input": false, + "support_base64_input": true, + }, + }, + }) + if err != nil { + t.Fatalf("hydrate request assets: %v", err) + } + if got, want := stringFromAny(hydrated["image"]), "data:image/png;base64,"+base64.StdEncoding.EncodeToString(payload); got != want { + t.Fatalf("plain image URL should be converted to data URL, got %q want %q", got, want) + } +} + +func TestHydrateProviderRequestAssetsConvertsPrivateImageURLEvenWhenModelSupportsURL(t *testing.T) { + payload := []byte{0x89, 'P', 'N', 'G', '\r', '\n', 0x1a, '\n', 1, 2, 3, 4} + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "image/png") + _, _ = w.Write(payload) + })) + defer server.Close() + service := &Service{} + body := map[string]any{"image": server.URL + "/source.png"} + + hydrated, err := service.hydrateProviderRequestAssets(context.Background(), body, store.RuntimeModelCandidate{ + ModelType: "image_edit", + Capabilities: map[string]any{ + "image_edit": map[string]any{ + "support_url_input": true, + "support_base64_input": true, + }, + }, + }) + if err != nil { + t.Fatalf("hydrate request assets: %v", err) + } + if got, want := stringFromAny(hydrated["image"]), "data:image/png;base64,"+base64.StdEncoding.EncodeToString(payload); got != want { + t.Fatalf("private image URL should fall back to data URL, got %q want %q", got, want) + } +} + func TestHydrateProviderRequestAssetsKeepsImageURLAssetAsURLForProviderURLDefault(t *testing.T) { service := &Service{} body := map[string]any{