diff --git a/apps/api/internal/runner/pricing.go b/apps/api/internal/runner/pricing.go index 6c01387..f2c4fde 100644 --- a/apps/api/internal/runner/pricing.go +++ b/apps/api/internal/runner/pricing.go @@ -82,19 +82,23 @@ func (s *Service) billings(ctx context.Context, user *auth.User, kind string, bo resource = "video" unit = "5s_video" baseKey = "videoBase" - duration := requestDurationSeconds(body) + duration, durationSource := billingDurationSeconds(body, response) + audioEnabled, audioSource := billingAudioEnabled(body, response) durationUnits := math.Max(1, math.Ceil(duration/5)) amount := float64(count) * durationUnits * resourcePrice(config, resource, baseKey, "basePrice") * resourceWeight(config, resource, "resolutionWeights", firstNonEmptyString(stringFromMap(body, "resolution"), stringFromMap(body, "size"))) * - resourceWeight(config, resource, "audioWeights", boolWeightKey(boolishValue(body["audio"]))) * + resourceWeight(config, resource, "audioWeights", boolWeightKey(audioEnabled)) * resourceWeight(config, resource, "referenceVideoWeights", boolWeightKey(requestHasReferenceVideo(body))) * - resourceWeight(config, resource, "voiceSpecifiedWeights", boolWeightKey(requestHasVoiceID(body))) * + resourceWeight(config, resource, "voiceSpecifiedWeights", boolWeightKey(requestHasVoiceID(body, audioEnabled))) * discount return []any{billingLineWithDetails(candidate, resource, unit, count*int(durationUnits), roundPrice(amount), discount, simulated, map[string]any{ "count": count, + "audio": audioEnabled, + "audioSource": audioSource, "durationSeconds": duration, + "durationSource": durationSource, "durationUnit": "5s", "durationUnitCount": durationUnits, })} @@ -345,6 +349,54 @@ func requestDurationSeconds(body map[string]any) float64 { return 5 } +func billingDurationSeconds(body map[string]any, response clients.Response) (float64, string) { + if duration, ok := generatedVideoDurationSeconds(response.Result); ok { + return duration, "generated_video" + } + return requestDurationSeconds(body), "preprocessed_request" +} + +func generatedVideoDurationSeconds(result map[string]any) (float64, bool) { + data, _ := result["data"].([]any) + for _, raw := range data { + item, _ := raw.(map[string]any) + if len(item) == 0 { + continue + } + duration := floatFromAny(item["duration"]) + if duration <= 0 { + continue + } + rounded := math.Round(duration) + if rounded <= 0 { + rounded = 1 + } + return rounded, true + } + return 0, false +} + +func billingAudioEnabled(body map[string]any, response clients.Response) (bool, string) { + if value, ok := generatedVideoHasAudio(response.Result); ok { + return value, "generated_video" + } + return boolishValue(body["audio"]), "preprocessed_request" +} + +func generatedVideoHasAudio(result map[string]any) (bool, bool) { + data, _ := result["data"].([]any) + for _, raw := range data { + item, _ := raw.(map[string]any) + if len(item) == 0 { + continue + } + if value, ok := boolishOptional(firstPresentValue(item, "has_audio", "hasAudio")); ok { + return value, true + } + } + return false, false +} + func requestHasReferenceVideo(body map[string]any) bool { if hasNonEmptyArray(body["video_list"]) || hasNonEmptyArray(body["videoList"]) { return true @@ -367,8 +419,8 @@ func requestHasReferenceVideo(body map[string]any) bool { return false } -func requestHasVoiceID(body map[string]any) bool { - return boolishValue(body["audio"]) && firstNonEmptyStringValue(body, "voice_id", "voiceId") != "" +func requestHasVoiceID(body map[string]any, audioEnabled bool) bool { + return audioEnabled && firstNonEmptyStringValue(body, "voice_id", "voiceId") != "" } func boolWeightKey(value bool) string { @@ -379,25 +431,38 @@ func boolWeightKey(value bool) string { } func boolishValue(value any) bool { + result, _ := boolishOptional(value) + return result +} + +func boolishOptional(value any) (bool, bool) { switch typed := value.(type) { case bool: - return typed + return typed, true case string: switch strings.ToLower(strings.TrimSpace(typed)) { case "true", "1", "yes", "on": - return true - default: - return false + return true, true + case "false", "0", "no", "off": + return false, true } case int: - return typed != 0 + return typed != 0, true case int64: - return typed != 0 + return typed != 0, true case float64: - return typed != 0 - default: - return false + return typed != 0, true } + return false, false +} + +func firstPresentValue(record map[string]any, keys ...string) any { + for _, key := range keys { + if value, ok := record[key]; ok { + return value + } + } + return nil } func hasNonEmptyArray(value any) bool { diff --git a/apps/api/internal/runner/pricing_test.go b/apps/api/internal/runner/pricing_test.go index 45280ea..43b0c74 100644 --- a/apps/api/internal/runner/pricing_test.go +++ b/apps/api/internal/runner/pricing_test.go @@ -76,6 +76,100 @@ func TestVideoBillingEstimateUsesFiveSecondUnitsAndDynamicWeights(t *testing.T) if got, want := line["quantity"], 3; got != want { t.Fatalf("video quantity = %v, want %v", got, want) } + if got, want := line["durationSource"], "preprocessed_request"; got != want { + t.Fatalf("video duration source = %v, want %v", got, want) + } + if got, want := line["audioSource"], "preprocessed_request"; got != want { + t.Fatalf("video audio source = %v, want %v", got, want) + } +} + +func TestVideoBillingPrefersGeneratedDuration(t *testing.T) { + service := &Service{} + candidate := store.RuntimeModelCandidate{ + ModelName: "video-model", + BaseBillingConfig: map[string]any{ + "video": map[string]any{"basePrice": 100}, + }, + } + + items := service.billings(context.Background(), nil, "videos.generations", map[string]any{ + "duration": 12, + "resolution": "720p", + }, candidate, clients.Response{ + Result: map[string]any{ + "data": []any{map[string]any{"type": "video", "duration": 6.6}}, + }, + }, false) + + line := firstBillingLine(t, items) + if got, want := floatFromAny(line["durationSeconds"]), 7.0; got != want { + t.Fatalf("video generated duration = %v, want %v", got, want) + } + if got, want := floatFromAny(line["durationUnitCount"]), 2.0; got != want { + t.Fatalf("video generated duration units = %v, want %v", got, want) + } + if got, want := floatFromAny(line["amount"]), 200.0; got != want { + t.Fatalf("video generated duration amount = %v, want %v", got, want) + } + if got, want := line["durationSource"], "generated_video"; got != want { + t.Fatalf("video duration source = %v, want %v", got, want) + } +} + +func TestVideoBillingPrefersGeneratedAudio(t *testing.T) { + service := &Service{} + candidate := store.RuntimeModelCandidate{ + ModelName: "video-model", + BaseBillingConfig: map[string]any{ + "video": map[string]any{ + "basePrice": 100, + "audioWeights": map[string]any{"true": 2, "false": 0.5}, + "voiceSpecifiedWeights": map[string]any{"true": 4}, + }, + }, + } + + items := service.billings(context.Background(), nil, "videos.generations", map[string]any{ + "audio": false, + "duration": 5, + }, candidate, clients.Response{ + Result: map[string]any{ + "data": []any{map[string]any{"type": "video", "has_audio": true}}, + }, + }, false) + + line := firstBillingLine(t, items) + if got, want := floatFromAny(line["amount"]), 200.0; got != want { + t.Fatalf("video generated audio amount = %v, want %v", got, want) + } + if got, want := line["audio"], true; got != want { + t.Fatalf("video generated audio = %v, want %v", got, want) + } + if got, want := line["audioSource"], "generated_video"; got != want { + t.Fatalf("video audio source = %v, want %v", got, want) + } + + items = service.billings(context.Background(), nil, "videos.generations", map[string]any{ + "audio": true, + "duration": 5, + "voice_id": "voice-a", + }, candidate, clients.Response{ + Result: map[string]any{ + "data": []any{map[string]any{"type": "video", "hasAudio": false}}, + }, + }, false) + + line = firstBillingLine(t, items) + if got, want := floatFromAny(line["amount"]), 50.0; got != want { + t.Fatalf("video generated no-audio amount = %v, want %v", got, want) + } + if got, want := line["audio"], false; got != want { + t.Fatalf("video generated no-audio = %v, want %v", got, want) + } + if got, want := line["audioSource"], "generated_video"; got != want { + t.Fatalf("video no-audio source = %v, want %v", got, want) + } } func TestVideoBillingEstimateSupportsServerMainStyleDynamicKeys(t *testing.T) { diff --git a/apps/api/internal/runner/service.go b/apps/api/internal/runner/service.go index 36e485c..85d4be3 100644 --- a/apps/api/internal/runner/service.go +++ b/apps/api/internal/runner/service.go @@ -616,6 +616,7 @@ func (s *Service) runCandidate(ctx context.Context, task store.GatewayTask, user return clients.Response{}, err } response.Result = uploadedResult + response.Result = s.enrichGeneratedVideoMetadata(ctx, task.Kind, response.Result) for _, progress := range response.Progress { if err := s.emit(ctx, task.ID, "task.progress", "running", progress.Phase, progress.Progress, progress.Message, progress.Payload, simulated); err != nil { return clients.Response{}, fmt.Errorf("emit task progress: %w", err) diff --git a/apps/api/internal/runner/video_duration.go b/apps/api/internal/runner/video_duration.go new file mode 100644 index 0000000..d16c54c --- /dev/null +++ b/apps/api/internal/runner/video_duration.go @@ -0,0 +1,144 @@ +package runner + +import ( + "context" + "encoding/json" + "fmt" + "math" + "os/exec" + "strconv" + "strings" + "time" +) + +const generatedVideoMetadataProbeTimeout = 8 * time.Second + +type generatedVideoMetadata struct { + Duration float64 + HasAudio bool + HasAudioKnown bool +} + +type ffprobeVideoMetadata struct { + Format struct { + Duration string `json:"duration"` + } `json:"format"` + Streams []struct { + CodecType string `json:"codec_type"` + } `json:"streams"` +} + +func (s *Service) enrichGeneratedVideoMetadata(ctx context.Context, taskKind string, result map[string]any) map[string]any { + if taskKind != "videos.generations" { + return result + } + data, _ := result["data"].([]any) + if len(data) == 0 { + return result + } + for _, raw := range data { + item, _ := raw.(map[string]any) + if len(item) == 0 || !isGeneratedVideoItem(item) { + continue + } + needsDuration := floatFromAny(item["duration"]) <= 0 + _, hasAudioMetadata := boolishOptional(firstPresentValue(item, "has_audio", "hasAudio")) + if !needsDuration && hasAudioMetadata { + continue + } + urlValue := firstNonEmptyStringValue(item, "video_url", "videoUrl", "url") + if urlValue == "" { + continue + } + metadata, err := s.probeVideoMetadata(ctx, urlValue) + if err != nil { + if s.logger != nil { + s.logger.Debug("probe generated video metadata failed", "url", trimForLog(urlValue), "error", err) + } + continue + } + if needsDuration && metadata.Duration > 0 { + item["duration"] = metadata.Duration + } + if !hasAudioMetadata && metadata.HasAudioKnown { + item["has_audio"] = metadata.HasAudio + } + } + return result +} + +func isGeneratedVideoItem(item map[string]any) bool { + itemType := strings.TrimSpace(stringFromAny(item["type"])) + if itemType == "video" { + return true + } + if firstNonEmptyStringValue(item, "video_url", "videoUrl") != "" { + return true + } + urlValue := strings.ToLower(firstNonEmptyStringValue(item, "url")) + return strings.Contains(urlValue, ".mp4") || + strings.Contains(urlValue, ".mov") || + strings.Contains(urlValue, ".webm") || + strings.Contains(urlValue, ".m3u8") +} + +func (s *Service) probeVideoMetadata(ctx context.Context, rawURL string) (generatedVideoMetadata, error) { + if _, err := exec.LookPath("ffprobe"); err != nil { + return generatedVideoMetadata{}, err + } + probeURL := rawURL + if s != nil { + if resolved, err := s.generatedAssetFetchURL(rawURL); err == nil && strings.TrimSpace(resolved) != "" { + probeURL = resolved + } + } + probeCtx, cancel := context.WithTimeout(ctx, generatedVideoMetadataProbeTimeout) + defer cancel() + cmd := exec.CommandContext( + probeCtx, + "ffprobe", + "-v", "error", + "-show_entries", "format=duration:stream=codec_type", + "-of", "json", + probeURL, + ) + output, err := cmd.Output() + if err != nil { + return generatedVideoMetadata{}, err + } + var probed ffprobeVideoMetadata + if err := json.Unmarshal(output, &probed); err != nil { + return generatedVideoMetadata{}, err + } + metadata := generatedVideoMetadata{} + if durationText := strings.TrimSpace(probed.Format.Duration); durationText != "" { + if duration, err := strconv.ParseFloat(durationText, 64); err == nil && duration > 0 && !math.IsNaN(duration) && !math.IsInf(duration, 0) { + rounded := math.Round(duration) + if rounded <= 0 { + rounded = 1 + } + metadata.Duration = rounded + } + } + if probed.Streams != nil { + metadata.HasAudioKnown = true + for _, stream := range probed.Streams { + if strings.TrimSpace(stream.CodecType) == "audio" { + metadata.HasAudio = true + break + } + } + } + if metadata.Duration <= 0 && !metadata.HasAudioKnown { + return metadata, fmt.Errorf("invalid video metadata: %q", trimForLog(string(output))) + } + return metadata, nil +} + +func trimForLog(value string) string { + value = strings.TrimSpace(value) + if len(value) <= 120 { + return value + } + return value[:120] + "..." +}