diff --git a/apps/api/internal/runner/param_processor.go b/apps/api/internal/runner/param_processor.go index 621a574..3bf9550 100644 --- a/apps/api/internal/runner/param_processor.go +++ b/apps/api/internal/runner/param_processor.go @@ -1478,7 +1478,7 @@ func isVideoResolution(modelType string, value string) bool { } func isVideoModelType(modelType string) bool { - return modelType == "video_generate" || modelType == "text_to_video" || modelType == "image_to_video" || modelType == "video_edit" || modelType == "omni_video" || modelType == "omni" + return modelType == "video_generate" || modelType == "text_to_video" || modelType == "image_to_video" || modelType == "video_edit" || modelType == "video_reference" || modelType == "video_first_last_frame" || modelType == "omni_video" || modelType == "omni" } func cloneMap(values map[string]any) map[string]any { diff --git a/apps/api/internal/runner/service.go b/apps/api/internal/runner/service.go index 288e169..116da1a 100644 --- a/apps/api/internal/runner/service.go +++ b/apps/api/internal/runner/service.go @@ -687,7 +687,7 @@ func requestedModelTypeFromBody(body map[string]any) string { func isKnownModelType(value string) bool { switch value { - case "text_generate", "image_generate", "image_edit", "video_generate", "image_to_video", "text_to_video", "video_edit", "omni_video", "omni": + case "text_generate", "image_generate", "image_edit", "video_generate", "image_to_video", "text_to_video", "video_edit", "video_reference", "video_first_last_frame", "omni_video", "omni": return true default: return false diff --git a/apps/web/src/api.ts b/apps/web/src/api.ts index 74e8f8f..664ac20 100644 --- a/apps/web/src/api.ts +++ b/apps/web/src/api.ts @@ -668,8 +668,12 @@ export async function createVideoGenerationTask( audio?: boolean; audioUrl?: string | string[]; audio_url?: string | string[]; + capabilityType?: string; content?: Array>; + firstFrame?: string; + first_frame?: string; model: string; + model_type?: string; prompt: string; aspect_ratio?: string; count?: number; @@ -681,6 +685,8 @@ export async function createVideoGenerationTask( image_url?: string | string[]; imageUrls?: string[]; image_urls?: string[]; + lastFrame?: string; + last_frame?: string; n?: number; output_audio?: boolean; referenceAudio?: string | string[]; diff --git a/apps/web/src/pages/PlaygroundPage.tsx b/apps/web/src/pages/PlaygroundPage.tsx index 0ab04b0..cd26a68 100644 --- a/apps/web/src/pages/PlaygroundPage.tsx +++ b/apps/web/src/pages/PlaygroundPage.tsx @@ -19,10 +19,15 @@ import { code } from '@streamdown/code'; import { math } from '@streamdown/math'; import { mermaid } from '@streamdown/mermaid'; import type { GatewayApiKey, GatewayTask, PlatformModel } from '@easyai-ai-gateway/contracts'; -import { Bot, ChevronDown, FileText, Image as ImageIcon, LoaderCircle, MessageSquarePlus, Music2, Paperclip, Send, Sparkles, Video, X } from 'lucide-react'; +import { Bot, ChevronDown, FileText, Image as ImageIcon, LoaderCircle, MessageSquarePlus, Music2, Paperclip, Plus, Repeat2, Send, Sparkles, Video, X } from 'lucide-react'; import { Badge, Button, Select, Textarea } from '../components/ui'; import { GatewayApiError, createImageEditTask, createImageGenerationTask, createVideoGenerationTask, pollTaskUntilSettled, streamChatCompletionText, taskIsPending, uploadFileToStorage } from '../api'; import type { PlaygroundMode } from '../types'; +import { + PlaygroundPromptMentionInput, + removeInvalidPlaygroundResourceTokens, + replacePlaygroundResourceTokens, +} from './playground-prompt-mention'; import { defaultMediaGenerationSettings, deriveMediaModelCapabilities, @@ -59,6 +64,7 @@ interface ModelOption { } type PlaygroundUploadKind = 'audio' | 'file' | 'image' | 'video'; +type PlaygroundUploadRole = 'first_frame' | 'last_frame'; interface PlaygroundUpload { contentType: string; @@ -66,6 +72,7 @@ interface PlaygroundUpload { kind: PlaygroundUploadKind; name: string; raw: Record; + role?: PlaygroundUploadRole; size: number; url: string; } @@ -95,6 +102,7 @@ const quickPrompts: Record = { }; const mediaUploadAccept = 'image/*,video/*,audio/*'; +const imageOnlyUploadAccept = 'image/*'; const chatUploadAccept = [ mediaUploadAccept, '.csv', @@ -167,7 +175,9 @@ export function PlaygroundPage(props: { const pendingMediaModelRef = useRef(''); const resumedTaskIdsRef = useRef(new Set()); const activeMode = useMemo(() => modeOptions.find((item) => item.value === props.mode) ?? modeOptions[0], [props.mode]); + const mediaUploadAcceptValue = mediaUploadAcceptForMode(props.mode, videoMode); const effectiveImageHasReference = imageHasReference || (props.mode === 'image' && mediaUploads.some((item) => item.kind === 'image')); + const mediaUploadSignature = useMemo(() => mediaUploads.map((item) => `${item.id}:${item.kind}:${item.role ?? ''}`).join('|'), [mediaUploads]); const modelOptions = useMemo( () => buildModelOptions(filterModelsForMode(props.models, props.mode, effectiveImageHasReference, videoMode)), [effectiveImageHasReference, props.mode, props.models, videoMode], @@ -215,7 +225,31 @@ export function PlaygroundPage(props: { writeStoredMediaRuns(mediaRuns); }, [mediaRuns]); - async function uploadMediaFiles(files: File[]) { + useEffect(() => { + if (props.mode === 'chat') return; + setPrompt((current) => removeInvalidPlaygroundResourceTokens(current, mediaUploads)); + }, [mediaUploadSignature, props.mode]); + + useEffect(() => { + if (props.mode === 'image') { + setMediaUploads((current) => current.some((item) => item.kind !== 'image') ? current.filter((item) => item.kind === 'image') : current); + return; + } + if (props.mode === 'video' && videoMode === 'first_last_frame') { + setMediaUploads((current) => normalizeFirstLastFrameUploads(current)); + } + }, [props.mode, videoMode]); + + useEffect(() => { + if (props.mode !== 'video') return; + setVideoMode((current) => { + if (mediaUploads.length > 0 && current === 'text_to_video') return 'omni_reference'; + if (mediaUploads.length === 0 && current === 'omni_reference') return 'text_to_video'; + return current; + }); + }, [mediaUploads.length, props.mode]); + + async function uploadMediaFiles(files: File[], targetRole?: PlaygroundUploadRole) { if (!files.length) return; const credential = activeApiKeySecret || props.token; if (!props.token) { @@ -231,15 +265,19 @@ export function PlaygroundPage(props: { try { const { items, warnings } = await uploadPlaygroundFiles(credential, files, { allowFiles: false, + allowedKinds: allowedMediaUploadKinds(props.mode, videoMode), source: `ai-gateway-playground-${props.mode}`, }); if (items.length) { - setMediaUploads((current) => [...current, ...items]); + setMediaUploads((current) => mergeMediaUploadsForMode(current, items, props.mode, videoMode, targetRole)); if (props.mode === 'image' && items.some((item) => item.kind === 'image')) { setImageHasReference(true); } + if (props.mode === 'video' && videoMode === 'text_to_video') { + setVideoMode('omni_reference'); + } } - setMediaUploadMessage(warnings[0] ?? (items.length ? `已上传 ${items.length} 个参考素材。` : '')); + setMediaUploadMessage(warnings[0] ?? ''); } catch (err) { setMediaUploadMessage(err instanceof Error ? err.message : '文件上传失败'); } finally { @@ -305,7 +343,8 @@ export function PlaygroundPage(props: { const localId = newLocalId(); const runUploads = overrides ? [] : mediaUploads; - const modelLabel = modelOptions.find((item) => item.value === runModel)?.label ?? runModel; + const runModelOption = modelOptions.find((item) => item.value === runModel); + const modelLabel = runModelOption?.label ?? runModel; const run: MediaGenerationRun = { createdAt: new Date().toISOString(), localId, @@ -320,11 +359,13 @@ export function PlaygroundPage(props: { setMediaRuns((current) => [...current, run]); setMediaMessage(''); try { - const uploadPayload = mediaUploadRequestPayload(runUploads, runMode); + const requestPrompt = replacePlaygroundResourceTokens(trimmedPrompt, runUploads, runMode); + const uploadPayload = mediaUploadRequestPayload(runUploads, runMode, videoMode); const requestPayload = { model: runModel, - prompt: promptWithUploadSummary(trimmedPrompt, runUploads), + prompt: requestPrompt, ...mediaRequestPayload(runSettings, runMode), + ...videoModeRequestPayload(runMode, videoMode, runUploads, runModelOption), ...uploadPayload, }; const response = runMode === 'video' @@ -415,7 +456,7 @@ export function PlaygroundPage(props: { imageHasReference={effectiveImageHasReference} mediaSettings={mediaSettings} mediaCapabilities={mediaCapabilities} - uploadAccept={mediaUploadAccept} + uploadAccept={mediaUploadAcceptValue} uploadMessage={mediaUploadMessage} uploads={mediaUploads} uploading={mediaUploading} @@ -434,8 +475,9 @@ export function PlaygroundPage(props: { } return next; })} + onSwapFrameUploads={() => setMediaUploads((current) => swapFirstLastFrameUploads(current))} onSubmit={() => void submitMediaTask()} - onUploadFiles={(files) => void uploadMediaFiles(files)} + onUploadFiles={(files, targetRole) => void uploadMediaFiles(files, targetRole)} onVideoModeChange={setVideoMode} /> ); @@ -1120,34 +1162,64 @@ function Composer(props: { onModelChange: (value: string) => void; onPromptChange: (value: string) => void; onRemoveUpload?: (id: string) => void; + onSwapFrameUploads?: () => void; onSubmit?: () => void; - onUploadFiles?: (files: File[]) => void; + onUploadFiles?: (files: File[], targetRole?: PlaygroundUploadRole) => void; onVideoModeChange?: (value: VideoCreateMode) => void; }) { const quickItems = quickPrompts[props.mode]; const apiKeyNotice = props.apiKeys && props.apiKeySecretsById ? apiKeyNoticeText(props.apiKeys, props.apiKeySecretsById) : ''; + const hasMediaReferencePicker = props.mode !== 'chat' && Boolean(props.onUploadFiles); + const mediaReferenceMessage = hasMediaReferencePicker + ? props.uploadMessage || mediaUploadSummaryMessage(props.uploads ?? [], props.mode, props.videoMode ?? 'text_to_video') + : props.uploadMessage; return (
-
- -
-