diff --git a/apps/api/internal/runner/request_assets.go b/apps/api/internal/runner/request_assets.go index 91dd665..5c9723b 100644 --- a/apps/api/internal/runner/request_assets.go +++ b/apps/api/internal/runner/request_assets.go @@ -59,8 +59,16 @@ func (s *Service) slimParameterPreprocessingLog(task store.GatewayTask, log para return log } -func (s *Service) hydrateProviderRequestAssets(ctx context.Context, body map[string]any) (map[string]any, error) { - value, err := s.hydrateProviderRequestAssetValue(ctx, body, nil) +type requestAssetHydrationStyle string + +const ( + requestAssetHydrateURL requestAssetHydrationStyle = "url" + requestAssetHydrateDataURL requestAssetHydrationStyle = "data_url" + requestAssetHydrateRawBase64 requestAssetHydrationStyle = "raw_base64" +) + +func (s *Service) hydrateProviderRequestAssets(ctx context.Context, body map[string]any, candidate store.RuntimeModelCandidate) (map[string]any, error) { + value, err := s.hydrateProviderRequestAssetValue(ctx, body, nil, candidate) if err != nil { return nil, err } @@ -71,15 +79,15 @@ func (s *Service) hydrateProviderRequestAssets(ctx context.Context, body map[str return out, nil } -func (s *Service) hydrateProviderRequestAssetValue(ctx context.Context, value any, path []string) (any, error) { +func (s *Service) hydrateProviderRequestAssetValue(ctx context.Context, value any, path []string, candidate store.RuntimeModelCandidate) (any, error) { switch typed := value.(type) { case map[string]any: if ref, ok := typed["assetRef"].(map[string]any); ok { - return s.hydrateProviderRequestAssetRef(ctx, ref, path) + return s.hydrateProviderRequestAssetRef(ctx, ref, path, candidate) } next := make(map[string]any, len(typed)) for key, item := range typed { - hydrated, err := s.hydrateProviderRequestAssetValue(ctx, item, append(path, key)) + hydrated, err := s.hydrateProviderRequestAssetValue(ctx, item, append(path, key), candidate) if err != nil { return nil, err } @@ -89,7 +97,7 @@ func (s *Service) hydrateProviderRequestAssetValue(ctx context.Context, value an case []any: next := make([]any, 0, len(typed)) for index, item := range typed { - hydrated, err := s.hydrateProviderRequestAssetValue(ctx, item, append(path, fmt.Sprintf("[%d]", index))) + hydrated, err := s.hydrateProviderRequestAssetValue(ctx, item, append(path, fmt.Sprintf("[%d]", index)), candidate) if err != nil { return nil, err } @@ -101,12 +109,23 @@ func (s *Service) hydrateProviderRequestAssetValue(ctx context.Context, value an } } -func (s *Service) hydrateProviderRequestAssetRef(ctx context.Context, ref map[string]any, path []string) (any, error) { +func (s *Service) hydrateProviderRequestAssetRef(ctx context.Context, ref map[string]any, path []string, candidate store.RuntimeModelCandidate) (any, error) { asset, err := s.resolveRequestAsset(ctx, ref) if err != nil { return nil, err } - if providerFieldNeedsBase64(path) { + switch requestAssetHydrationForField(path, candidate) { + case requestAssetHydrateDataURL: + payload, err := s.readRequestAssetBytes(ctx, asset) + if err != nil { + return nil, err + } + contentType := strings.TrimSpace(asset.ContentType) + if contentType == "" { + contentType = "application/octet-stream" + } + return "data:" + contentType + ";base64," + base64.StdEncoding.EncodeToString(payload), nil + case requestAssetHydrateRawBase64: payload, err := s.readRequestAssetBytes(ctx, asset) if err != nil { return nil, err @@ -213,15 +232,28 @@ func (s *Service) localPathFromRequestAssetURL(value string) string { return filepath.Join(storageDir, fileName) } -func providerFieldNeedsBase64(path []string) bool { - if len(path) == 0 { - return false +func requestAssetHydrationForField(path []string, candidate store.RuntimeModelCandidate) requestAssetHydrationStyle { + if providerFieldNeedsRawBase64(path) { + return requestAssetHydrateRawBase64 } - key := strings.ToLower(strings.Trim(path[len(path)-1], "[]")) - parent := "" - if len(path) > 1 { - parent = strings.ToLower(strings.Trim(path[len(path)-2], "[]")) + if mediaURLFieldNeedsHydration(path) { + if style := configuredRequestAssetMediaURLHydration(candidate, requestAssetMediaURLKind(path)); style != "" { + return style + } + if providerMediaURLNeedsDataURL(candidate) { + return requestAssetHydrateDataURL + } } + return requestAssetHydrateURL +} + +func mediaURLFieldNeedsHydration(path []string) bool { + key, parent := requestAssetFieldPath(path) + return key == "url" && (parent == "image_url" || parent == "audio_url" || parent == "video_url" || parent == "file_url") +} + +func providerFieldNeedsRawBase64(path []string) bool { + key, parent := requestAssetFieldPath(path) return key == "b64_json" || key == "base64" || key == "b64" || @@ -230,6 +262,116 @@ func providerFieldNeedsBase64(path []string) bool { (parent == "input_audio" && key == "data") } +func requestAssetMediaURLKind(path []string) string { + _, parent := requestAssetFieldPath(path) + switch parent { + case "image_url": + return "image" + case "audio_url": + return "audio" + case "video_url": + return "video" + case "file_url": + return "file" + default: + return "" + } +} + +func configuredRequestAssetMediaURLHydration(candidate store.RuntimeModelCandidate, kind string) requestAssetHydrationStyle { + keys := []string{} + switch kind { + case "image": + keys = append(keys, "requestAssetImageURLFormat", "request_asset_image_url_format") + case "audio": + keys = append(keys, "requestAssetAudioURLFormat", "request_asset_audio_url_format") + case "video": + keys = append(keys, "requestAssetVideoURLFormat", "request_asset_video_url_format") + case "file": + keys = append(keys, "requestAssetFileURLFormat", "request_asset_file_url_format") + } + keys = append(keys, + "requestAssetMediaURLFormat", + "request_asset_media_url_format", + "mediaURLAssetFormat", + "media_url_asset_format", + ) + for _, key := range keys { + if style := requestAssetHydrationStyleFromString(stringFromAny(candidate.PlatformConfig[key])); style != "" { + return style + } + } + return "" +} + +func requestAssetHydrationStyleFromString(value string) requestAssetHydrationStyle { + normalized := strings.ToLower(strings.TrimSpace(value)) + normalized = strings.ReplaceAll(normalized, "-", "_") + normalized = strings.ReplaceAll(normalized, " ", "_") + switch normalized { + case "url", "remote_url", "public_url": + return requestAssetHydrateURL + case "data_url", "dataurl", "prefixed_base64", "base64_with_prefix", "base64_with_data_url_prefix": + return requestAssetHydrateDataURL + case "raw_base64", "base64", "bare_base64", "naked_base64": + return requestAssetHydrateRawBase64 + default: + return "" + } +} + +func providerMediaURLNeedsDataURL(candidate store.RuntimeModelCandidate) bool { + for _, name := range []string{candidate.Provider, candidate.SpecType, candidate.PlatformKey} { + switch normalizeProviderKey(name) { + case "openai", "volces", "volces_openai", "gemini", "vidu": + return true + } + } + return false +} + +func normalizeProviderKey(value string) string { + normalized := strings.ToLower(strings.TrimSpace(value)) + normalized = strings.ReplaceAll(normalized, "-", "_") + normalized = strings.ReplaceAll(normalized, " ", "_") + return normalized +} + +func requestAssetFieldPath(path []string) (string, string) { + if len(path) == 0 { + return "", "" + } + names := make([]string, 0, len(path)) + for _, segment := range path { + name := strings.ToLower(strings.TrimSpace(strings.Trim(segment, "[]"))) + if name == "" || requestAssetPathSegmentIsIndex(name) { + continue + } + names = append(names, name) + } + if len(names) == 0 { + return "", "" + } + key := names[len(names)-1] + parent := "" + if len(names) > 1 { + parent = names[len(names)-2] + } + return key, parent +} + +func requestAssetPathSegmentIsIndex(value string) bool { + if value == "" { + return false + } + for _, char := range value { + if char < '0' || char > '9' { + return false + } + } + return true +} + func requestAssetIsExpired(asset store.RequestAsset, now time.Time) bool { if asset.ExpiredAt != nil { return true diff --git a/apps/api/internal/runner/request_assets_test.go b/apps/api/internal/runner/request_assets_test.go index 1610010..ae138ad 100644 --- a/apps/api/internal/runner/request_assets_test.go +++ b/apps/api/internal/runner/request_assets_test.go @@ -34,7 +34,7 @@ func TestHydrateProviderRequestAssetsConvertsStrictBase64Field(t *testing.T) { }, } - hydrated, err := service.hydrateProviderRequestAssets(context.Background(), body) + hydrated, err := service.hydrateProviderRequestAssets(context.Background(), body, store.RuntimeModelCandidate{}) if err != nil { t.Fatalf("hydrate request assets: %v", err) } @@ -43,6 +43,123 @@ func TestHydrateProviderRequestAssetsConvertsStrictBase64Field(t *testing.T) { } } +func TestHydrateProviderRequestAssetsConvertsBase64ArrayField(t *testing.T) { + storageDir := t.TempDir() + fileName := "gateway-request-asset-array.png" + if err := os.WriteFile(filepath.Join(storageDir, fileName), []byte("array image bytes"), 0o644); err != nil { + t.Fatalf("write request asset: %v", err) + } + service := &Service{cfg: config.Config{LocalUploadedStorageDir: storageDir}} + body := map[string]any{ + "binary_data_base64": []any{ + map[string]any{ + "assetRef": map[string]any{ + "sha256": "sha-array-image", + "contentType": "image/png", + "url": "/static/uploaded/" + fileName, + "storageProvider": "local_static", + }, + "url": "/static/uploaded/" + fileName, + }, + }, + } + + hydrated, err := service.hydrateProviderRequestAssets(context.Background(), body, store.RuntimeModelCandidate{}) + if err != nil { + t.Fatalf("hydrate request assets: %v", err) + } + values := hydrated["binary_data_base64"].([]any) + if got, want := stringFromAny(values[0]), base64.StdEncoding.EncodeToString([]byte("array image bytes")); got != want { + t.Fatalf("unexpected hydrated array base64: got %q want %q", got, want) + } +} + +func TestHydrateProviderRequestAssetsConvertsVolcesImageURLAssetToDataURL(t *testing.T) { + storageDir := t.TempDir() + fileName := "gateway-request-asset-chat-image.png" + if err := os.WriteFile(filepath.Join(storageDir, fileName), []byte("chat image bytes"), 0o644); err != nil { + t.Fatalf("write request asset: %v", err) + } + service := &Service{cfg: config.Config{LocalUploadedStorageDir: storageDir}} + body := map[string]any{ + "messages": []any{ + map[string]any{ + "role": "user", + "content": []any{ + map[string]any{"type": "text", "text": "describe it"}, + map[string]any{ + "type": "image_url", + "image_url": map[string]any{ + "url": map[string]any{ + "assetRef": map[string]any{ + "sha256": "sha-chat-image", + "contentType": "image/png", + "url": "/static/uploaded/" + fileName, + "storageProvider": "local_static", + }, + "url": "/static/uploaded/" + fileName, + }, + }, + }, + }, + }, + }, + } + + hydrated, err := service.hydrateProviderRequestAssets(context.Background(), body, store.RuntimeModelCandidate{Provider: "volces"}) + if err != nil { + t.Fatalf("hydrate request assets: %v", err) + } + messages := hydrated["messages"].([]any) + message := messages[0].(map[string]any) + content := message["content"].([]any) + imagePart := content[1].(map[string]any) + imageURL := imagePart["image_url"].(map[string]any) + if got, want := stringFromAny(imageURL["url"]), "data:image/png;base64,"+base64.StdEncoding.EncodeToString([]byte("chat image bytes")); got != want { + t.Fatalf("unexpected hydrated image data url: got %q want %q", got, want) + } +} + +func TestHydrateProviderRequestAssetsKeepsImageURLAssetAsURLForProviderURLDefault(t *testing.T) { + service := &Service{} + body := map[string]any{ + "messages": []any{ + map[string]any{ + "role": "user", + "content": []any{ + map[string]any{ + "type": "image_url", + "image_url": map[string]any{ + "url": map[string]any{ + "assetRef": map[string]any{ + "sha256": "sha-url-image", + "contentType": "image/png", + "url": "https://cdn.example.com/request.png", + "storageProvider": "remote", + }, + "url": "https://cdn.example.com/request.png", + }, + }, + }, + }, + }, + }, + } + + hydrated, err := service.hydrateProviderRequestAssets(context.Background(), body, store.RuntimeModelCandidate{Provider: "jimeng"}) + if err != nil { + t.Fatalf("hydrate request assets: %v", err) + } + messages := hydrated["messages"].([]any) + message := messages[0].(map[string]any) + content := message["content"].([]any) + imagePart := content[0].(map[string]any) + imageURL := imagePart["image_url"].(map[string]any) + if got, want := stringFromAny(imageURL["url"]), "https://cdn.example.com/request.png"; got != want { + t.Fatalf("unexpected hydrated image URL: got %q want %q", got, want) + } +} + func TestHydrateProviderRequestAssetsReturnsExpiredError(t *testing.T) { expiredAt := time.Now().Add(-time.Minute).UTC().Format(time.RFC3339) service := &Service{} @@ -59,7 +176,7 @@ func TestHydrateProviderRequestAssetsReturnsExpiredError(t *testing.T) { }, } - _, err := service.hydrateProviderRequestAssets(context.Background(), body) + _, err := service.hydrateProviderRequestAssets(context.Background(), body, store.RuntimeModelCandidate{}) if err == nil { t.Fatal("expected expired request asset error") } diff --git a/apps/api/internal/runner/service.go b/apps/api/internal/runner/service.go index c3635a0..d41ce59 100644 --- a/apps/api/internal/runner/service.go +++ b/apps/api/internal/runner/service.go @@ -564,7 +564,7 @@ func (s *Service) runCandidate(ctx context.Context, task store.GatewayTask, user return clients.Response{}, fmt.Errorf("prepare http client: %w", err) } client := s.clientFor(candidate, simulated) - providerBody, err := s.hydrateProviderRequestAssets(ctx, body) + providerBody, err := s.hydrateProviderRequestAssets(ctx, body, candidate) if err != nil { _ = s.store.FinishTaskAttempt(ctx, store.FinishTaskAttemptInput{ AttemptID: attemptID,