fix: prefer generated video metadata for billing

This commit is contained in:
wangbo 2026-05-20 00:11:28 +08:00
parent 69d23efb57
commit 11a2c13e4a
4 changed files with 318 additions and 14 deletions

View File

@ -82,19 +82,23 @@ func (s *Service) billings(ctx context.Context, user *auth.User, kind string, bo
resource = "video"
unit = "5s_video"
baseKey = "videoBase"
duration := requestDurationSeconds(body)
duration, durationSource := billingDurationSeconds(body, response)
audioEnabled, audioSource := billingAudioEnabled(body, response)
durationUnits := math.Max(1, math.Ceil(duration/5))
amount := float64(count) *
durationUnits *
resourcePrice(config, resource, baseKey, "basePrice") *
resourceWeight(config, resource, "resolutionWeights", firstNonEmptyString(stringFromMap(body, "resolution"), stringFromMap(body, "size"))) *
resourceWeight(config, resource, "audioWeights", boolWeightKey(boolishValue(body["audio"]))) *
resourceWeight(config, resource, "audioWeights", boolWeightKey(audioEnabled)) *
resourceWeight(config, resource, "referenceVideoWeights", boolWeightKey(requestHasReferenceVideo(body))) *
resourceWeight(config, resource, "voiceSpecifiedWeights", boolWeightKey(requestHasVoiceID(body))) *
resourceWeight(config, resource, "voiceSpecifiedWeights", boolWeightKey(requestHasVoiceID(body, audioEnabled))) *
discount
return []any{billingLineWithDetails(candidate, resource, unit, count*int(durationUnits), roundPrice(amount), discount, simulated, map[string]any{
"count": count,
"audio": audioEnabled,
"audioSource": audioSource,
"durationSeconds": duration,
"durationSource": durationSource,
"durationUnit": "5s",
"durationUnitCount": durationUnits,
})}
@ -345,6 +349,54 @@ func requestDurationSeconds(body map[string]any) float64 {
return 5
}
func billingDurationSeconds(body map[string]any, response clients.Response) (float64, string) {
if duration, ok := generatedVideoDurationSeconds(response.Result); ok {
return duration, "generated_video"
}
return requestDurationSeconds(body), "preprocessed_request"
}
func generatedVideoDurationSeconds(result map[string]any) (float64, bool) {
data, _ := result["data"].([]any)
for _, raw := range data {
item, _ := raw.(map[string]any)
if len(item) == 0 {
continue
}
duration := floatFromAny(item["duration"])
if duration <= 0 {
continue
}
rounded := math.Round(duration)
if rounded <= 0 {
rounded = 1
}
return rounded, true
}
return 0, false
}
func billingAudioEnabled(body map[string]any, response clients.Response) (bool, string) {
if value, ok := generatedVideoHasAudio(response.Result); ok {
return value, "generated_video"
}
return boolishValue(body["audio"]), "preprocessed_request"
}
func generatedVideoHasAudio(result map[string]any) (bool, bool) {
data, _ := result["data"].([]any)
for _, raw := range data {
item, _ := raw.(map[string]any)
if len(item) == 0 {
continue
}
if value, ok := boolishOptional(firstPresentValue(item, "has_audio", "hasAudio")); ok {
return value, true
}
}
return false, false
}
func requestHasReferenceVideo(body map[string]any) bool {
if hasNonEmptyArray(body["video_list"]) || hasNonEmptyArray(body["videoList"]) {
return true
@ -367,8 +419,8 @@ func requestHasReferenceVideo(body map[string]any) bool {
return false
}
func requestHasVoiceID(body map[string]any) bool {
return boolishValue(body["audio"]) && firstNonEmptyStringValue(body, "voice_id", "voiceId") != ""
func requestHasVoiceID(body map[string]any, audioEnabled bool) bool {
return audioEnabled && firstNonEmptyStringValue(body, "voice_id", "voiceId") != ""
}
func boolWeightKey(value bool) string {
@ -379,25 +431,38 @@ func boolWeightKey(value bool) string {
}
func boolishValue(value any) bool {
result, _ := boolishOptional(value)
return result
}
func boolishOptional(value any) (bool, bool) {
switch typed := value.(type) {
case bool:
return typed
return typed, true
case string:
switch strings.ToLower(strings.TrimSpace(typed)) {
case "true", "1", "yes", "on":
return true
default:
return false
return true, true
case "false", "0", "no", "off":
return false, true
}
case int:
return typed != 0
return typed != 0, true
case int64:
return typed != 0
return typed != 0, true
case float64:
return typed != 0
default:
return false
return typed != 0, true
}
return false, false
}
func firstPresentValue(record map[string]any, keys ...string) any {
for _, key := range keys {
if value, ok := record[key]; ok {
return value
}
}
return nil
}
func hasNonEmptyArray(value any) bool {

View File

@ -76,6 +76,100 @@ func TestVideoBillingEstimateUsesFiveSecondUnitsAndDynamicWeights(t *testing.T)
if got, want := line["quantity"], 3; got != want {
t.Fatalf("video quantity = %v, want %v", got, want)
}
if got, want := line["durationSource"], "preprocessed_request"; got != want {
t.Fatalf("video duration source = %v, want %v", got, want)
}
if got, want := line["audioSource"], "preprocessed_request"; got != want {
t.Fatalf("video audio source = %v, want %v", got, want)
}
}
func TestVideoBillingPrefersGeneratedDuration(t *testing.T) {
service := &Service{}
candidate := store.RuntimeModelCandidate{
ModelName: "video-model",
BaseBillingConfig: map[string]any{
"video": map[string]any{"basePrice": 100},
},
}
items := service.billings(context.Background(), nil, "videos.generations", map[string]any{
"duration": 12,
"resolution": "720p",
}, candidate, clients.Response{
Result: map[string]any{
"data": []any{map[string]any{"type": "video", "duration": 6.6}},
},
}, false)
line := firstBillingLine(t, items)
if got, want := floatFromAny(line["durationSeconds"]), 7.0; got != want {
t.Fatalf("video generated duration = %v, want %v", got, want)
}
if got, want := floatFromAny(line["durationUnitCount"]), 2.0; got != want {
t.Fatalf("video generated duration units = %v, want %v", got, want)
}
if got, want := floatFromAny(line["amount"]), 200.0; got != want {
t.Fatalf("video generated duration amount = %v, want %v", got, want)
}
if got, want := line["durationSource"], "generated_video"; got != want {
t.Fatalf("video duration source = %v, want %v", got, want)
}
}
func TestVideoBillingPrefersGeneratedAudio(t *testing.T) {
service := &Service{}
candidate := store.RuntimeModelCandidate{
ModelName: "video-model",
BaseBillingConfig: map[string]any{
"video": map[string]any{
"basePrice": 100,
"audioWeights": map[string]any{"true": 2, "false": 0.5},
"voiceSpecifiedWeights": map[string]any{"true": 4},
},
},
}
items := service.billings(context.Background(), nil, "videos.generations", map[string]any{
"audio": false,
"duration": 5,
}, candidate, clients.Response{
Result: map[string]any{
"data": []any{map[string]any{"type": "video", "has_audio": true}},
},
}, false)
line := firstBillingLine(t, items)
if got, want := floatFromAny(line["amount"]), 200.0; got != want {
t.Fatalf("video generated audio amount = %v, want %v", got, want)
}
if got, want := line["audio"], true; got != want {
t.Fatalf("video generated audio = %v, want %v", got, want)
}
if got, want := line["audioSource"], "generated_video"; got != want {
t.Fatalf("video audio source = %v, want %v", got, want)
}
items = service.billings(context.Background(), nil, "videos.generations", map[string]any{
"audio": true,
"duration": 5,
"voice_id": "voice-a",
}, candidate, clients.Response{
Result: map[string]any{
"data": []any{map[string]any{"type": "video", "hasAudio": false}},
},
}, false)
line = firstBillingLine(t, items)
if got, want := floatFromAny(line["amount"]), 50.0; got != want {
t.Fatalf("video generated no-audio amount = %v, want %v", got, want)
}
if got, want := line["audio"], false; got != want {
t.Fatalf("video generated no-audio = %v, want %v", got, want)
}
if got, want := line["audioSource"], "generated_video"; got != want {
t.Fatalf("video no-audio source = %v, want %v", got, want)
}
}
func TestVideoBillingEstimateSupportsServerMainStyleDynamicKeys(t *testing.T) {

View File

@ -616,6 +616,7 @@ func (s *Service) runCandidate(ctx context.Context, task store.GatewayTask, user
return clients.Response{}, err
}
response.Result = uploadedResult
response.Result = s.enrichGeneratedVideoMetadata(ctx, task.Kind, response.Result)
for _, progress := range response.Progress {
if err := s.emit(ctx, task.ID, "task.progress", "running", progress.Phase, progress.Progress, progress.Message, progress.Payload, simulated); err != nil {
return clients.Response{}, fmt.Errorf("emit task progress: %w", err)

View File

@ -0,0 +1,144 @@
package runner
import (
"context"
"encoding/json"
"fmt"
"math"
"os/exec"
"strconv"
"strings"
"time"
)
const generatedVideoMetadataProbeTimeout = 8 * time.Second
type generatedVideoMetadata struct {
Duration float64
HasAudio bool
HasAudioKnown bool
}
type ffprobeVideoMetadata struct {
Format struct {
Duration string `json:"duration"`
} `json:"format"`
Streams []struct {
CodecType string `json:"codec_type"`
} `json:"streams"`
}
func (s *Service) enrichGeneratedVideoMetadata(ctx context.Context, taskKind string, result map[string]any) map[string]any {
if taskKind != "videos.generations" {
return result
}
data, _ := result["data"].([]any)
if len(data) == 0 {
return result
}
for _, raw := range data {
item, _ := raw.(map[string]any)
if len(item) == 0 || !isGeneratedVideoItem(item) {
continue
}
needsDuration := floatFromAny(item["duration"]) <= 0
_, hasAudioMetadata := boolishOptional(firstPresentValue(item, "has_audio", "hasAudio"))
if !needsDuration && hasAudioMetadata {
continue
}
urlValue := firstNonEmptyStringValue(item, "video_url", "videoUrl", "url")
if urlValue == "" {
continue
}
metadata, err := s.probeVideoMetadata(ctx, urlValue)
if err != nil {
if s.logger != nil {
s.logger.Debug("probe generated video metadata failed", "url", trimForLog(urlValue), "error", err)
}
continue
}
if needsDuration && metadata.Duration > 0 {
item["duration"] = metadata.Duration
}
if !hasAudioMetadata && metadata.HasAudioKnown {
item["has_audio"] = metadata.HasAudio
}
}
return result
}
func isGeneratedVideoItem(item map[string]any) bool {
itemType := strings.TrimSpace(stringFromAny(item["type"]))
if itemType == "video" {
return true
}
if firstNonEmptyStringValue(item, "video_url", "videoUrl") != "" {
return true
}
urlValue := strings.ToLower(firstNonEmptyStringValue(item, "url"))
return strings.Contains(urlValue, ".mp4") ||
strings.Contains(urlValue, ".mov") ||
strings.Contains(urlValue, ".webm") ||
strings.Contains(urlValue, ".m3u8")
}
func (s *Service) probeVideoMetadata(ctx context.Context, rawURL string) (generatedVideoMetadata, error) {
if _, err := exec.LookPath("ffprobe"); err != nil {
return generatedVideoMetadata{}, err
}
probeURL := rawURL
if s != nil {
if resolved, err := s.generatedAssetFetchURL(rawURL); err == nil && strings.TrimSpace(resolved) != "" {
probeURL = resolved
}
}
probeCtx, cancel := context.WithTimeout(ctx, generatedVideoMetadataProbeTimeout)
defer cancel()
cmd := exec.CommandContext(
probeCtx,
"ffprobe",
"-v", "error",
"-show_entries", "format=duration:stream=codec_type",
"-of", "json",
probeURL,
)
output, err := cmd.Output()
if err != nil {
return generatedVideoMetadata{}, err
}
var probed ffprobeVideoMetadata
if err := json.Unmarshal(output, &probed); err != nil {
return generatedVideoMetadata{}, err
}
metadata := generatedVideoMetadata{}
if durationText := strings.TrimSpace(probed.Format.Duration); durationText != "" {
if duration, err := strconv.ParseFloat(durationText, 64); err == nil && duration > 0 && !math.IsNaN(duration) && !math.IsInf(duration, 0) {
rounded := math.Round(duration)
if rounded <= 0 {
rounded = 1
}
metadata.Duration = rounded
}
}
if probed.Streams != nil {
metadata.HasAudioKnown = true
for _, stream := range probed.Streams {
if strings.TrimSpace(stream.CodecType) == "audio" {
metadata.HasAudio = true
break
}
}
}
if metadata.Duration <= 0 && !metadata.HasAudioKnown {
return metadata, fmt.Errorf("invalid video metadata: %q", trimForLog(string(output)))
}
return metadata, nil
}
func trimForLog(value string) string {
value = strings.TrimSpace(value)
if len(value) <= 120 {
return value
}
return value[:120] + "..."
}