961 lines
27 KiB
Go
961 lines
27 KiB
Go
package clients
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"math"
|
|
"net/http"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
var volcesElementReferencePattern = regexp.MustCompile(`(?i)<<<[[:space:]]*element[_-]?([0-9]+)[[:space:]]*>>>|@element([0-9]+)`)
|
|
|
|
type VolcesClient struct {
|
|
HTTPClient *http.Client
|
|
}
|
|
|
|
func (c VolcesClient) Run(ctx context.Context, request Request) (Response, error) {
|
|
apiKey := credential(request.Candidate.Credentials, "apiKey", "api_key", "key", "token")
|
|
if apiKey == "" {
|
|
return Response{}, &ClientError{Code: "missing_credentials", Message: "volces api key is required", Retryable: false}
|
|
}
|
|
|
|
switch request.Kind {
|
|
case "images.generations", "images.edits":
|
|
return c.runImage(ctx, request, apiKey)
|
|
case "videos.generations":
|
|
return c.runVideo(ctx, request, apiKey)
|
|
default:
|
|
return Response{}, &ClientError{Code: "unsupported_kind", Message: "unsupported volces request kind", Retryable: false}
|
|
}
|
|
}
|
|
|
|
func (c VolcesClient) runImage(ctx context.Context, request Request, apiKey string) (Response, error) {
|
|
body := volcesImageBody(request)
|
|
raw, _ := json.Marshal(body)
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, joinURL(request.Candidate.BaseURL, "/images/generations"), bytes.NewReader(raw))
|
|
if err != nil {
|
|
return Response{}, err
|
|
}
|
|
req.Header.Set("Content-Type", "application/json")
|
|
req.Header.Set("Authorization", "Bearer "+apiKey)
|
|
|
|
resp, err := httpClient(request.HTTPClient, c.HTTPClient).Do(req)
|
|
if err != nil {
|
|
return Response{}, &ClientError{Code: "network", Message: err.Error(), Retryable: true}
|
|
}
|
|
responseStartedAt := time.Now()
|
|
requestID := requestIDFromHTTPResponse(resp)
|
|
result, err := decodeHTTPResponse(resp)
|
|
responseFinishedAt := time.Now()
|
|
if err != nil {
|
|
return Response{}, annotateResponseError(err, requestID, responseStartedAt, responseFinishedAt)
|
|
}
|
|
if requestID == "" {
|
|
requestID = requestIDFromResult(result)
|
|
}
|
|
return Response{
|
|
Result: result,
|
|
RequestID: requestID,
|
|
Usage: usageFromOpenAI(result),
|
|
Progress: providerProgress(request),
|
|
ResponseStartedAt: responseStartedAt,
|
|
ResponseFinishedAt: responseFinishedAt,
|
|
ResponseDurationMS: responseDurationMS(responseStartedAt, responseFinishedAt),
|
|
}, nil
|
|
}
|
|
|
|
func (c VolcesClient) runVideo(ctx context.Context, request Request, apiKey string) (Response, error) {
|
|
submitStartedAt := time.Now()
|
|
submitRequestID := strings.TrimSpace(request.RemoteTaskID)
|
|
upstreamTaskID := strings.TrimSpace(request.RemoteTaskID)
|
|
if upstreamTaskID == "" {
|
|
body := volcesVideoBody(request)
|
|
if err := validateVolcesVideoTaskBody(body); err != nil {
|
|
return Response{}, err
|
|
}
|
|
submitResult, requestID, err := c.postJSON(ctx, request, request.Candidate.BaseURL, "/contents/generations/tasks", apiKey, body)
|
|
submitRequestID = requestID
|
|
if err != nil {
|
|
return Response{}, annotateResponseError(err, submitRequestID, submitStartedAt, time.Now())
|
|
}
|
|
upstreamTaskID = strings.TrimSpace(stringFromAny(submitResult["id"]))
|
|
if upstreamTaskID == "" {
|
|
return Response{}, &ClientError{Code: "invalid_response", Message: "volces video task id is missing", RequestID: submitRequestID, Retryable: false}
|
|
}
|
|
if request.OnRemoteTaskSubmitted != nil {
|
|
if err := request.OnRemoteTaskSubmitted(upstreamTaskID, submitResult); err != nil {
|
|
return Response{}, err
|
|
}
|
|
}
|
|
}
|
|
|
|
interval := volcesPollInterval(request)
|
|
timeout := volcesPollTimeout(request)
|
|
deadline := time.NewTimer(timeout)
|
|
defer deadline.Stop()
|
|
|
|
ticker := time.NewTicker(interval)
|
|
defer ticker.Stop()
|
|
|
|
var lastResult map[string]any
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return Response{}, &ClientError{Code: "cancelled", Message: ctx.Err().Error(), RequestID: submitRequestID, Retryable: true}
|
|
default:
|
|
}
|
|
|
|
pollStartedAt := time.Now()
|
|
pollResult, pollRequestID, err := c.getJSON(ctx, request, request.Candidate.BaseURL, "/contents/generations/tasks/"+upstreamTaskID, apiKey)
|
|
pollFinishedAt := time.Now()
|
|
requestID := firstNonEmpty(pollRequestID, submitRequestID, upstreamTaskID)
|
|
if err != nil {
|
|
return Response{}, annotateResponseError(err, requestID, pollStartedAt, pollFinishedAt)
|
|
}
|
|
lastResult = pollResult
|
|
|
|
switch volcesTaskStatus(pollResult) {
|
|
case "succeeded":
|
|
result := volcesVideoSuccessResult(request, upstreamTaskID, pollResult)
|
|
return Response{
|
|
Result: result,
|
|
RequestID: requestID,
|
|
Usage: volcesVideoUsage(pollResult),
|
|
Progress: volcesVideoProgress(request, upstreamTaskID),
|
|
ResponseStartedAt: submitStartedAt,
|
|
ResponseFinishedAt: pollFinishedAt,
|
|
ResponseDurationMS: responseDurationMS(submitStartedAt, pollFinishedAt),
|
|
}, nil
|
|
case "failed", "cancelled":
|
|
return Response{}, &ClientError{
|
|
Code: volcesTaskErrorCode(pollResult),
|
|
Message: volcesTaskErrorMessage(pollResult),
|
|
RequestID: requestID,
|
|
ResponseStartedAt: submitStartedAt,
|
|
ResponseFinishedAt: pollFinishedAt,
|
|
ResponseDurationMS: responseDurationMS(submitStartedAt, pollFinishedAt),
|
|
Retryable: false,
|
|
}
|
|
}
|
|
|
|
select {
|
|
case <-ctx.Done():
|
|
return Response{}, &ClientError{Code: "cancelled", Message: ctx.Err().Error(), RequestID: requestID, Retryable: true}
|
|
case <-deadline.C:
|
|
return Response{}, &ClientError{
|
|
Code: "timeout",
|
|
Message: fmt.Sprintf("volces video task %s did not finish before timeout; last status: %s", upstreamTaskID, volcesTaskStatus(lastResult)),
|
|
RequestID: requestID,
|
|
Retryable: true,
|
|
}
|
|
case <-ticker.C:
|
|
}
|
|
}
|
|
}
|
|
|
|
func (c VolcesClient) postJSON(ctx context.Context, request Request, baseURL string, path string, apiKey string, body map[string]any) (map[string]any, string, error) {
|
|
raw, _ := json.Marshal(body)
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, joinURL(baseURL, path), bytes.NewReader(raw))
|
|
if err != nil {
|
|
return nil, "", err
|
|
}
|
|
req.Header.Set("Content-Type", "application/json")
|
|
req.Header.Set("Authorization", "Bearer "+apiKey)
|
|
resp, err := httpClient(request.HTTPClient, c.HTTPClient).Do(req)
|
|
if err != nil {
|
|
return nil, "", &ClientError{Code: "network", Message: err.Error(), Retryable: true}
|
|
}
|
|
requestID := requestIDFromHTTPResponse(resp)
|
|
result, err := decodeHTTPResponse(resp)
|
|
return result, requestID, err
|
|
}
|
|
|
|
func (c VolcesClient) getJSON(ctx context.Context, request Request, baseURL string, path string, apiKey string) (map[string]any, string, error) {
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, joinURL(baseURL, path), nil)
|
|
if err != nil {
|
|
return nil, "", err
|
|
}
|
|
req.Header.Set("Authorization", "Bearer "+apiKey)
|
|
resp, err := httpClient(request.HTTPClient, c.HTTPClient).Do(req)
|
|
if err != nil {
|
|
return nil, "", &ClientError{Code: "network", Message: err.Error(), Retryable: true}
|
|
}
|
|
requestID := requestIDFromHTTPResponse(resp)
|
|
result, err := decodeHTTPResponse(resp)
|
|
return result, requestID, err
|
|
}
|
|
|
|
func volcesImageBody(request Request) map[string]any {
|
|
body := cleanProviderBody(request.Body)
|
|
body["model"] = upstreamModelName(request.Candidate)
|
|
if _, ok := body["watermark"]; !ok {
|
|
body["watermark"] = false
|
|
}
|
|
if request.Kind == "images.generations" {
|
|
if _, ok := body["seed"]; !ok {
|
|
body["seed"] = -1
|
|
}
|
|
}
|
|
if resolution := strings.TrimSpace(stringFromAny(body["resolution"])); resolution != "" {
|
|
body["size"] = resolution
|
|
}
|
|
if size := widthHeightSize(body); size != "" {
|
|
body["size"] = size
|
|
}
|
|
if supportsMultipleOutputs(request, request.ModelType) && body["sequential_image_generation"] == nil {
|
|
body["sequential_image_generation"] = "auto"
|
|
}
|
|
return body
|
|
}
|
|
|
|
func volcesVideoBody(request Request) map[string]any {
|
|
body := cleanProviderBody(request.Body)
|
|
body["model"] = upstreamModelName(request.Candidate)
|
|
content := contentItems(body["content"])
|
|
if len(content) == 0 {
|
|
content = buildVolcesContentFromBody(body)
|
|
}
|
|
appendMultiShotTimeline(&content)
|
|
convertVolcesElementsToImageReferences(&content)
|
|
normalizeVolcesContentRoles(content)
|
|
return volcesVideoTaskBody(body, content)
|
|
}
|
|
|
|
func cleanProviderBody(body map[string]any) map[string]any {
|
|
out := cloneBody(body)
|
|
for _, key := range []string{
|
|
"runMode",
|
|
"mode",
|
|
"simulation",
|
|
"testMode",
|
|
"simulationProfile",
|
|
"testProfile",
|
|
"pollIntervalMs",
|
|
"poll_interval_ms",
|
|
"pollTimeoutSeconds",
|
|
"poll_timeout_seconds",
|
|
} {
|
|
delete(out, key)
|
|
}
|
|
return out
|
|
}
|
|
|
|
func buildVolcesContentFromBody(body map[string]any) []map[string]any {
|
|
content := make([]map[string]any, 0)
|
|
if prompt := firstNonEmptyStringValue(body, "prompt", "input"); prompt != "" {
|
|
content = append(content, map[string]any{"type": "text", "text": prompt})
|
|
}
|
|
appendURLContent := func(kind string, role string, url string) {
|
|
if strings.TrimSpace(url) == "" {
|
|
return
|
|
}
|
|
switch kind {
|
|
case "image_url":
|
|
content = append(content, map[string]any{"type": kind, "role": role, "image_url": map[string]any{"url": strings.TrimSpace(url)}})
|
|
case "video_url":
|
|
content = append(content, map[string]any{"type": kind, "role": role, "video_url": map[string]any{"url": strings.TrimSpace(url)}})
|
|
case "audio_url":
|
|
content = append(content, map[string]any{"type": kind, "role": role, "audio_url": map[string]any{"url": strings.TrimSpace(url)}})
|
|
}
|
|
}
|
|
|
|
firstFrame := firstNonEmptyStringValue(body, "first_frame", "firstFrame")
|
|
appendURLContent("image_url", "first_frame", firstFrame)
|
|
appendURLContent("image_url", "last_frame", firstNonEmptyStringValue(body, "last_frame", "lastFrame"))
|
|
imageURLs := firstNonEmptyStringListFromAny(body["image"], body["images"], body["image_url"], body["imageUrl"], body["image_urls"], body["imageUrls"])
|
|
if firstFrame == "" && len(imageURLs) > 0 {
|
|
appendURLContent("image_url", "first_frame", imageURLs[0])
|
|
imageURLs = imageURLs[1:]
|
|
}
|
|
for _, url := range imageURLs {
|
|
appendURLContent("image_url", "reference_image", url)
|
|
}
|
|
for _, url := range firstNonEmptyStringListFromAny(body["reference_image"], body["referenceImage"]) {
|
|
appendURLContent("image_url", "reference_image", url)
|
|
}
|
|
for _, url := range firstNonEmptyStringListFromAny(body["video"], body["video_url"], body["videoUrl"], body["reference_video"], body["referenceVideo"]) {
|
|
appendURLContent("video_url", "reference_video", url)
|
|
}
|
|
for _, url := range firstNonEmptyStringListFromAny(body["audio_url"], body["audioUrl"], body["reference_audio"], body["referenceAudio"]) {
|
|
appendURLContent("audio_url", "reference_audio", url)
|
|
}
|
|
if len(content) == 0 {
|
|
content = append(content, map[string]any{"type": "text", "text": ""})
|
|
}
|
|
return content
|
|
}
|
|
|
|
func volcesVideoTaskBody(body map[string]any, content []map[string]any) map[string]any {
|
|
out := map[string]any{
|
|
"model": body["model"],
|
|
"content": sanitizeVolcesVideoContent(content),
|
|
}
|
|
addVolcesVideoTaskParams(out, body)
|
|
return out
|
|
}
|
|
|
|
func validateVolcesVideoTaskBody(body map[string]any) error {
|
|
firstFrameCount := 0
|
|
lastFrameCount := 0
|
|
for _, item := range contentItems(body["content"]) {
|
|
if stringFromAny(item["type"]) != "image_url" {
|
|
continue
|
|
}
|
|
switch stringFromAny(item["role"]) {
|
|
case "first_frame":
|
|
firstFrameCount++
|
|
case "last_frame":
|
|
lastFrameCount++
|
|
}
|
|
}
|
|
if firstFrameCount > 1 {
|
|
return &ClientError{
|
|
Code: "invalid_parameter",
|
|
Message: fmt.Sprintf("content contains %d first_frame image items; expected at most one first frame image content", firstFrameCount),
|
|
StatusCode: 400,
|
|
Retryable: false,
|
|
}
|
|
}
|
|
if lastFrameCount > 1 {
|
|
return &ClientError{
|
|
Code: "invalid_parameter",
|
|
Message: fmt.Sprintf("content contains %d last_frame image items; expected at most one last frame image content", lastFrameCount),
|
|
StatusCode: 400,
|
|
Retryable: false,
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func addVolcesVideoTaskParams(out map[string]any, body map[string]any) {
|
|
copyVolcesStringParam(out, "callback_url", body, "callback_url", "callbackUrl")
|
|
copyVolcesBoolParam(out, "return_last_frame", body, "return_last_frame", "returnLastFrame")
|
|
copyVolcesIntParam(out, "execution_expires_after", body, "execution_expires_after", "executionExpiresAfter")
|
|
copyVolcesBoolParam(out, "generate_audio", body, "generate_audio", "generateAudio", "audio")
|
|
copyVolcesBoolParam(out, "draft", body, "draft")
|
|
copyVolcesStringParam(out, "resolution", body, "resolution", "size")
|
|
copyVolcesStringParam(out, "ratio", body, "ratio", "aspect_ratio", "aspectRatio")
|
|
if copyVolcesIntParam(out, "frames", body, "frames") {
|
|
delete(out, "duration")
|
|
} else {
|
|
copyVolcesIntParam(out, "duration", body, "duration", "duration_seconds", "durationSeconds", "dur")
|
|
}
|
|
copyVolcesIntParam(out, "seed", body, "seed")
|
|
copyVolcesBoolParam(out, "camera_fixed", body, "camera_fixed", "cameraFixed", "camerafixed", "cf")
|
|
copyVolcesBoolParam(out, "watermark", body, "watermark")
|
|
}
|
|
|
|
func copyVolcesStringParam(out map[string]any, target string, body map[string]any, keys ...string) bool {
|
|
for _, key := range keys {
|
|
if value := strings.TrimSpace(stringFromAny(body[key])); value != "" {
|
|
out[target] = value
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func copyVolcesIntParam(out map[string]any, target string, body map[string]any, keys ...string) bool {
|
|
for _, key := range keys {
|
|
if value, ok := volcesIntFromAny(body[key]); ok {
|
|
out[target] = value
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func copyVolcesBoolParam(out map[string]any, target string, body map[string]any, keys ...string) bool {
|
|
for _, key := range keys {
|
|
if value, ok := volcesBoolFromAny(body[key]); ok {
|
|
out[target] = value
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func volcesIntFromAny(value any) (int, bool) {
|
|
switch typed := value.(type) {
|
|
case nil:
|
|
return 0, false
|
|
case int:
|
|
return typed, true
|
|
case int64:
|
|
return int(typed), true
|
|
case float64:
|
|
return int(math.Round(typed)), true
|
|
case string:
|
|
text := strings.TrimSpace(typed)
|
|
if text == "" {
|
|
return 0, false
|
|
}
|
|
if parsed, err := strconv.ParseFloat(text, 64); err == nil {
|
|
return int(math.Round(parsed)), true
|
|
}
|
|
return 0, false
|
|
default:
|
|
return 0, false
|
|
}
|
|
}
|
|
|
|
func volcesBoolFromAny(value any) (bool, bool) {
|
|
switch typed := value.(type) {
|
|
case nil:
|
|
return false, false
|
|
case bool:
|
|
return typed, true
|
|
case int:
|
|
if typed == 1 {
|
|
return true, true
|
|
}
|
|
if typed == 0 {
|
|
return false, true
|
|
}
|
|
case int64:
|
|
if typed == 1 {
|
|
return true, true
|
|
}
|
|
if typed == 0 {
|
|
return false, true
|
|
}
|
|
case float64:
|
|
if typed == 1 {
|
|
return true, true
|
|
}
|
|
if typed == 0 {
|
|
return false, true
|
|
}
|
|
case string:
|
|
normalized := strings.ToLower(strings.TrimSpace(typed))
|
|
if normalized == "true" || normalized == "1" {
|
|
return true, true
|
|
}
|
|
if normalized == "false" || normalized == "0" {
|
|
return false, true
|
|
}
|
|
}
|
|
return false, false
|
|
}
|
|
|
|
func sanitizeVolcesVideoContent(content []map[string]any) []map[string]any {
|
|
out := make([]map[string]any, 0, len(content))
|
|
for _, item := range content {
|
|
switch stringFromAny(item["type"]) {
|
|
case "text":
|
|
out = append(out, map[string]any{
|
|
"type": "text",
|
|
"text": strings.TrimSpace(stringFromAny(item["text"])),
|
|
})
|
|
case "image_url":
|
|
url := volcesNestedURL(item, "image_url")
|
|
if url == "" {
|
|
continue
|
|
}
|
|
out = append(out, map[string]any{
|
|
"type": "image_url",
|
|
"role": volcesImageRole(item),
|
|
"image_url": map[string]any{"url": url},
|
|
})
|
|
case "video_url":
|
|
url := volcesNestedURL(item, "video_url")
|
|
if url == "" {
|
|
continue
|
|
}
|
|
videoURL := map[string]any{"url": url}
|
|
if value := strings.TrimSpace(stringFromAny(mapFromAny(item["video_url"])["refer_type"])); value != "" {
|
|
videoURL["refer_type"] = value
|
|
}
|
|
if value := strings.TrimSpace(stringFromAny(mapFromAny(item["video_url"])["keep_original_sound"])); value != "" {
|
|
videoURL["keep_original_sound"] = value
|
|
}
|
|
out = append(out, map[string]any{
|
|
"type": "video_url",
|
|
"role": "reference_video",
|
|
"video_url": videoURL,
|
|
})
|
|
case "audio_url":
|
|
url := volcesNestedURL(item, "audio_url")
|
|
if url == "" {
|
|
continue
|
|
}
|
|
out = append(out, map[string]any{
|
|
"type": "audio_url",
|
|
"role": "reference_audio",
|
|
"audio_url": map[string]any{"url": url},
|
|
})
|
|
}
|
|
}
|
|
if len(out) == 0 {
|
|
return []map[string]any{{"type": "text", "text": ""}}
|
|
}
|
|
return out
|
|
}
|
|
|
|
func volcesImageRole(item map[string]any) string {
|
|
switch strings.TrimSpace(stringFromAny(item["role"])) {
|
|
case "first_frame":
|
|
return "first_frame"
|
|
case "last_frame":
|
|
return "last_frame"
|
|
default:
|
|
return "reference_image"
|
|
}
|
|
}
|
|
|
|
func volcesNestedURL(item map[string]any, key string) string {
|
|
nested := mapFromAny(item[key])
|
|
return strings.TrimSpace(stringFromAny(nested["url"]))
|
|
}
|
|
|
|
func mapFromAny(value any) map[string]any {
|
|
if object, ok := value.(map[string]any); ok {
|
|
return object
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func contentItems(value any) []map[string]any {
|
|
switch typed := value.(type) {
|
|
case []any:
|
|
out := make([]map[string]any, 0, len(typed))
|
|
for _, raw := range typed {
|
|
item, ok := raw.(map[string]any)
|
|
if !ok {
|
|
continue
|
|
}
|
|
copied := map[string]any{}
|
|
for key, value := range item {
|
|
copied[key] = value
|
|
}
|
|
out = append(out, copied)
|
|
}
|
|
return out
|
|
case []map[string]any:
|
|
out := make([]map[string]any, 0, len(typed))
|
|
for _, item := range typed {
|
|
copied := map[string]any{}
|
|
for key, value := range item {
|
|
copied[key] = value
|
|
}
|
|
out = append(out, copied)
|
|
}
|
|
return out
|
|
default:
|
|
return nil
|
|
}
|
|
}
|
|
|
|
func normalizeVolcesContentRoles(content []map[string]any) {
|
|
for _, item := range content {
|
|
itemType := strings.TrimSpace(stringFromAny(item["type"]))
|
|
role := strings.TrimSpace(stringFromAny(item["role"]))
|
|
switch itemType {
|
|
case "image_url":
|
|
if role != "first_frame" && role != "last_frame" {
|
|
item["role"] = "reference_image"
|
|
}
|
|
case "video_url":
|
|
item["role"] = "reference_video"
|
|
case "audio_url":
|
|
item["role"] = "reference_audio"
|
|
}
|
|
}
|
|
}
|
|
|
|
func convertVolcesElementsToImageReferences(content *[]map[string]any) {
|
|
referenced := referencedVolcesElementIndexes(*content)
|
|
out := make([]map[string]any, 0, len(*content))
|
|
elementIndex := 0
|
|
for _, item := range *content {
|
|
if stringFromAny(item["type"]) != "element" {
|
|
out = append(out, item)
|
|
continue
|
|
}
|
|
elementIndex++
|
|
if !referenced[elementIndex] {
|
|
continue
|
|
}
|
|
url := volcesElementFrontalImageURL(item)
|
|
if url == "" {
|
|
continue
|
|
}
|
|
role := stringFromAny(item["role"])
|
|
if role != "first_frame" && role != "last_frame" {
|
|
role = "reference_image"
|
|
}
|
|
out = append(out, map[string]any{
|
|
"type": "image_url",
|
|
"role": role,
|
|
"image_url": map[string]any{"url": url},
|
|
})
|
|
}
|
|
*content = out
|
|
}
|
|
|
|
func referencedVolcesElementIndexes(content []map[string]any) map[int]bool {
|
|
out := map[int]bool{}
|
|
for _, item := range content {
|
|
if stringFromAny(item["type"]) != "text" {
|
|
continue
|
|
}
|
|
text := stringFromAny(item["text"])
|
|
if strings.TrimSpace(text) == "" {
|
|
continue
|
|
}
|
|
for _, match := range volcesElementReferencePattern.FindAllStringSubmatch(text, -1) {
|
|
raw := ""
|
|
if len(match) > 1 && match[1] != "" {
|
|
raw = match[1]
|
|
} else if len(match) > 2 {
|
|
raw = match[2]
|
|
}
|
|
index, err := strconv.Atoi(raw)
|
|
if err == nil && index > 0 {
|
|
out[index] = true
|
|
}
|
|
}
|
|
}
|
|
return out
|
|
}
|
|
|
|
func volcesElementFrontalImageURL(item map[string]any) string {
|
|
element := mapFromAny(item["element"])
|
|
if element == nil {
|
|
return ""
|
|
}
|
|
inline := mapFromAny(element["inline_element"])
|
|
for _, value := range []any{
|
|
inline["frontal_image_url"],
|
|
element["frontal_image_url"],
|
|
element["front_image_url"],
|
|
element["image_url"],
|
|
} {
|
|
if url := strings.TrimSpace(stringFromAny(value)); url != "" {
|
|
return url
|
|
}
|
|
}
|
|
return volcesReferImageURL(firstPresent(inline["refer_images"], element["refer_images"]))
|
|
}
|
|
|
|
func volcesReferImageURL(value any) string {
|
|
images := mapListFromAny(value)
|
|
firstURL := ""
|
|
for _, image := range images {
|
|
url := strings.TrimSpace(stringFromAny(image["url"]))
|
|
if url == "" {
|
|
continue
|
|
}
|
|
if firstURL == "" {
|
|
firstURL = url
|
|
}
|
|
slot := strings.ToLower(strings.TrimSpace(stringFromAny(image["slot_key"])))
|
|
if slot == "frontal" || slot == "front" {
|
|
return url
|
|
}
|
|
}
|
|
return firstURL
|
|
}
|
|
|
|
func mapListFromAny(value any) []map[string]any {
|
|
switch typed := value.(type) {
|
|
case []any:
|
|
out := make([]map[string]any, 0, len(typed))
|
|
for _, item := range typed {
|
|
if object := mapFromAny(item); object != nil {
|
|
out = append(out, object)
|
|
}
|
|
}
|
|
return out
|
|
case []map[string]any:
|
|
return typed
|
|
default:
|
|
return nil
|
|
}
|
|
}
|
|
|
|
func appendMultiShotTimeline(content *[]map[string]any) {
|
|
type shot struct {
|
|
index int
|
|
text string
|
|
duration float64
|
|
}
|
|
shots := make([]shot, 0)
|
|
items := *content
|
|
remaining := items[:0]
|
|
for index, item := range items {
|
|
if stringFromAny(item["type"]) != "text" {
|
|
remaining = append(remaining, item)
|
|
continue
|
|
}
|
|
role := stringFromAny(item["role"])
|
|
if role != "shot_prompt" && item["shot_index"] == nil {
|
|
remaining = append(remaining, item)
|
|
continue
|
|
}
|
|
text := strings.TrimSpace(stringFromAny(item["text"]))
|
|
if text == "" {
|
|
continue
|
|
}
|
|
shotIndex := numericValue(item["shot_index"], float64(index))
|
|
shots = append(shots, shot{index: int(math.Floor(shotIndex)), text: text, duration: numericValue(item["duration"], 5)})
|
|
}
|
|
if len(shots) == 0 {
|
|
return
|
|
}
|
|
*content = remaining
|
|
|
|
for i := 0; i < len(shots)-1; i++ {
|
|
for j := i + 1; j < len(shots); j++ {
|
|
if shots[j].index < shots[i].index {
|
|
shots[i], shots[j] = shots[j], shots[i]
|
|
}
|
|
}
|
|
}
|
|
cursor := 0.0
|
|
lines := make([]string, 0, len(shots))
|
|
for idx, shot := range shots {
|
|
start := cursor
|
|
duration := shot.duration
|
|
if duration <= 0 {
|
|
duration = 5
|
|
}
|
|
end := start + duration
|
|
cursor = end
|
|
shotNumber := shot.index + 1
|
|
if shotNumber <= 0 {
|
|
shotNumber = idx + 1
|
|
}
|
|
lines = append(lines, fmt.Sprintf("Shot %d, %gs~%gs: %s", shotNumber, start, end, shot.text))
|
|
}
|
|
|
|
textItem := ensureTextContent(content)
|
|
current := stringFromAny(textItem["text"])
|
|
const prefix = "Additional shot timeline (auto-generated):"
|
|
if strings.Contains(current, prefix) {
|
|
return
|
|
}
|
|
separator := ""
|
|
if strings.TrimSpace(current) != "" {
|
|
separator = "\n\n"
|
|
}
|
|
textItem["text"] = current + separator + prefix + "\n" + strings.Join(lines, "\n")
|
|
}
|
|
|
|
func ensureTextContent(content *[]map[string]any) map[string]any {
|
|
for _, item := range *content {
|
|
if stringFromAny(item["type"]) == "text" && item["shot_index"] == nil && stringFromAny(item["role"]) != "shot_prompt" {
|
|
return item
|
|
}
|
|
}
|
|
item := map[string]any{"type": "text", "text": ""}
|
|
*content = append([]map[string]any{item}, (*content)...)
|
|
return item
|
|
}
|
|
|
|
func supportsMultipleOutputs(request Request, capabilityName string) bool {
|
|
for _, key := range []string{capabilityName, request.ModelType, "image_generate", "image_edit"} {
|
|
if key == "" {
|
|
continue
|
|
}
|
|
capability, _ := request.Candidate.Capabilities[key].(map[string]any)
|
|
if boolFromAny(capability["output_multiple_images"]) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func widthHeightSize(body map[string]any) string {
|
|
width := numericValue(body["width"], 0)
|
|
height := numericValue(body["height"], 0)
|
|
if width <= 0 || height <= 0 {
|
|
return ""
|
|
}
|
|
return fmt.Sprintf("%dx%d", int(math.Round(width)), int(math.Round(height)))
|
|
}
|
|
|
|
func volcesTaskStatus(result map[string]any) string {
|
|
return strings.ToLower(strings.TrimSpace(stringFromAny(result["status"])))
|
|
}
|
|
|
|
func volcesTaskErrorCode(result map[string]any) string {
|
|
errorObj, _ := result["error"].(map[string]any)
|
|
if code := strings.TrimSpace(stringFromAny(errorObj["code"])); code != "" {
|
|
return code
|
|
}
|
|
status := volcesTaskStatus(result)
|
|
if status != "" {
|
|
return status
|
|
}
|
|
return "volces_task_failed"
|
|
}
|
|
|
|
func volcesTaskErrorMessage(result map[string]any) string {
|
|
errorObj, _ := result["error"].(map[string]any)
|
|
if message := strings.TrimSpace(stringFromAny(errorObj["message"])); message != "" {
|
|
return message
|
|
}
|
|
if status := volcesTaskStatus(result); status != "" {
|
|
return "volces video task " + status
|
|
}
|
|
return "volces video task failed"
|
|
}
|
|
|
|
func volcesVideoSuccessResult(request Request, upstreamTaskID string, raw map[string]any) map[string]any {
|
|
content, _ := raw["content"].(map[string]any)
|
|
videoURL := strings.TrimSpace(stringFromAny(content["video_url"]))
|
|
created := intFromAny(raw["created_at"])
|
|
if created == 0 {
|
|
created = int(nowUnix())
|
|
}
|
|
data := []any{}
|
|
if videoURL != "" {
|
|
data = append(data, map[string]any{"url": videoURL, "type": "video"})
|
|
}
|
|
return map[string]any{
|
|
"id": upstreamTaskID,
|
|
"object": "video.generation",
|
|
"created": created,
|
|
"model": upstreamModelName(request.Candidate),
|
|
"status": "succeeded",
|
|
"upstream_task_id": upstreamTaskID,
|
|
"data": data,
|
|
"raw": raw,
|
|
}
|
|
}
|
|
|
|
func volcesVideoUsage(raw map[string]any) Usage {
|
|
usage, _ := raw["usage"].(map[string]any)
|
|
output := intFromAny(usage["completion_tokens"])
|
|
total := intFromAny(usage["total_tokens"])
|
|
if total == 0 {
|
|
total = output
|
|
}
|
|
return Usage{OutputTokens: output, TotalTokens: total}
|
|
}
|
|
|
|
func volcesVideoProgress(request Request, upstreamTaskID string) []Progress {
|
|
progress := providerProgress(request)
|
|
progress = append(progress, Progress{
|
|
Phase: "polling_result",
|
|
Progress: 0.9,
|
|
Message: "volces video task completed",
|
|
Payload: map[string]any{"upstreamTaskId": upstreamTaskID},
|
|
})
|
|
return progress
|
|
}
|
|
|
|
func volcesPollInterval(request Request) time.Duration {
|
|
ms := numericValue(firstPresent(request.Candidate.PlatformConfig["volcesPollIntervalMs"], request.Body["pollIntervalMs"], request.Body["poll_interval_ms"]), 5000)
|
|
if ms < 100 {
|
|
ms = 100
|
|
}
|
|
return time.Duration(ms) * time.Millisecond
|
|
}
|
|
|
|
func volcesPollTimeout(request Request) time.Duration {
|
|
seconds := numericValue(firstPresent(request.Candidate.PlatformConfig["volcesPollTimeoutSeconds"], request.Body["pollTimeoutSeconds"], request.Body["poll_timeout_seconds"]), 600)
|
|
if seconds < 1 {
|
|
seconds = 600
|
|
}
|
|
return time.Duration(seconds) * time.Second
|
|
}
|
|
|
|
func firstNonEmpty(values ...string) string {
|
|
for _, value := range values {
|
|
if strings.TrimSpace(value) != "" {
|
|
return strings.TrimSpace(value)
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func firstNonEmptyStringValue(body map[string]any, keys ...string) string {
|
|
for _, key := range keys {
|
|
if value := strings.TrimSpace(stringFromAny(body[key])); value != "" {
|
|
return value
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func stringListFromAny(value any) []string {
|
|
switch typed := value.(type) {
|
|
case string:
|
|
if strings.TrimSpace(typed) == "" {
|
|
return nil
|
|
}
|
|
return []string{typed}
|
|
case []any:
|
|
out := make([]string, 0, len(typed))
|
|
for _, item := range typed {
|
|
if value := strings.TrimSpace(stringFromAny(item)); value != "" {
|
|
out = append(out, value)
|
|
}
|
|
}
|
|
return out
|
|
case []string:
|
|
out := make([]string, 0, len(typed))
|
|
for _, item := range typed {
|
|
if value := strings.TrimSpace(item); value != "" {
|
|
out = append(out, value)
|
|
}
|
|
}
|
|
return out
|
|
default:
|
|
return nil
|
|
}
|
|
}
|
|
|
|
func firstNonEmptyStringListFromAny(values ...any) []string {
|
|
for _, value := range values {
|
|
items := stringListFromAny(value)
|
|
if len(items) > 0 {
|
|
return items
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func numericValue(value any, fallback float64) float64 {
|
|
switch typed := value.(type) {
|
|
case int:
|
|
return float64(typed)
|
|
case int64:
|
|
return float64(typed)
|
|
case float64:
|
|
return typed
|
|
case string:
|
|
var parsed float64
|
|
if _, err := fmt.Sscanf(strings.TrimSpace(typed), "%f", &parsed); err == nil {
|
|
return parsed
|
|
}
|
|
return fallback
|
|
default:
|
|
return fallback
|
|
}
|
|
}
|
|
|
|
func boolFromAny(value any) bool {
|
|
switch typed := value.(type) {
|
|
case bool:
|
|
return typed
|
|
case string:
|
|
normalized := strings.ToLower(strings.TrimSpace(typed))
|
|
return normalized == "true" || normalized == "1"
|
|
case float64:
|
|
return typed == 1
|
|
case int:
|
|
return typed == 1
|
|
default:
|
|
return false
|
|
}
|
|
}
|