diff --git a/apps/web/src/api.ts b/apps/web/src/api.ts index 8e66cad..64578d6 100644 --- a/apps/web/src/api.ts +++ b/apps/web/src/api.ts @@ -552,12 +552,16 @@ export async function createImageEditTask( export async function createVideoGenerationTask( token: string, input: { + audio?: boolean; model: string; prompt: string; aspect_ratio?: string; count?: number; + duration?: number; + duration_seconds?: number; height?: number; n?: number; + output_audio?: boolean; resolution?: string; runMode?: string; simulation?: boolean; diff --git a/apps/web/src/pages/PlaygroundPage.tsx b/apps/web/src/pages/PlaygroundPage.tsx index 0762593..c132135 100644 --- a/apps/web/src/pages/PlaygroundPage.tsx +++ b/apps/web/src/pages/PlaygroundPage.tsx @@ -130,8 +130,10 @@ export function PlaygroundPage(props: { const mediaCapabilities = useMemo( () => props.mode === 'chat' ? undefined - : deriveMediaModelCapabilities(activeModelOption?.models, props.mode, videoMode), - [activeModelOption, props.mode, videoMode], + : activeModelOption + ? deriveMediaModelCapabilities(activeModelOption.models, props.mode, videoMode, mediaSettings.resolution) + : undefined, + [activeModelOption, mediaSettings.resolution, props.mode, videoMode], ); useEffect(() => { @@ -230,7 +232,7 @@ export function PlaygroundPage(props: { const requestPayload = { model: selectedModel, prompt: trimmedPrompt, - ...mediaRequestPayload(runSettings), + ...mediaRequestPayload(runSettings, runMode), }; const response = runMode === 'video' ? await createVideoGenerationTask(credential, requestPayload) @@ -1117,8 +1119,10 @@ function mediaSettingsFromStorage(value: unknown): MediaGenerationSettings { aspectRatio: stringFromUnknown(record.aspectRatio) || fallback.aspectRatio, countPreset: countPresetFromStorage(record.countPreset, fallback.countPreset), customCount: numberFromUnknown(record.customCount, fallback.customCount, 1, 20), + durationSeconds: numberFromUnknown(record.durationSeconds ?? record.duration_seconds ?? record.duration, fallback.durationSeconds, 1, 3600), height: numberFromUnknown(record.height, fallback.height, 128, 8192), outputMode: record.outputMode === 'group' ? 'group' : 'single', + outputAudio: booleanFromUnknown(record.outputAudio ?? record.output_audio ?? record.audio, fallback.outputAudio), resolution: stringFromUnknown(record.resolution) || fallback.resolution, width: numberFromUnknown(record.width, fallback.width, 128, 8192), }; @@ -1159,6 +1163,12 @@ function numberFromUnknown(value: unknown, fallback: number, min: number, max: n return Math.min(max, Math.max(min, Math.round(number))); } +function booleanFromUnknown(value: unknown, fallback: boolean) { + if (value === true || value === 'true') return true; + if (value === false || value === 'false') return false; + return fallback; +} + function delay(ms: number) { return new Promise((resolve) => window.setTimeout(resolve, ms)); } diff --git a/apps/web/src/pages/playground-media.tsx b/apps/web/src/pages/playground-media.tsx index fc85413..e478836 100644 --- a/apps/web/src/pages/playground-media.tsx +++ b/apps/web/src/pages/playground-media.tsx @@ -1,5 +1,7 @@ import { useEffect, useRef, type CSSProperties, type ReactNode } from 'react'; import type { GatewayTask, PlatformModel } from '@easyai-ai-gateway/contracts'; +import Segmented from 'antd/es/segmented'; +import Slider from 'antd/es/slider'; import { Download, Edit3, @@ -25,8 +27,10 @@ export interface MediaGenerationSettings { aspectRatio: string; countPreset: MediaCountPreset; customCount: number; + durationSeconds: number; height: number; outputMode: MediaOutputMode; + outputAudio: boolean; resolution: MediaResolution; width: number; } @@ -57,8 +61,12 @@ interface ResolutionOption { export interface MediaModelCapabilities { aspectRatios: string[]; + durationOptions: number[]; + durationRange: [number, number]; + durationStep: number; maxCount: number; resolutions: MediaResolution[]; + supportsAudio: boolean; supportsGroup: boolean; } @@ -83,7 +91,7 @@ const aspectRatioOptions: AspectRatioOption[] = [ const resolutionOptions: ResolutionOption[] = [ { value: '1K', label: '标准 1K', modes: ['image'] }, { value: '2K', label: '高清 2K', modes: ['image'] }, - { value: '4K', label: '超清 4K', modes: ['image', 'video'] }, + { value: '4K', label: '超清 4K', modes: ['image'] }, { value: '480p', label: '标清 480p', modes: ['video'] }, { value: '720p', label: '高清 720p', modes: ['video'] }, { value: '1080p', label: '全高清 1080p', modes: ['video'] }, @@ -103,8 +111,10 @@ export function defaultMediaGenerationSettings(): MediaGenerationSettings { aspectRatio: '1:1', countPreset: 1, customCount: 6, + durationSeconds: 5, height: 2048, outputMode: 'single', + outputAudio: true, resolution: '2K', width: 2048, }; @@ -116,7 +126,18 @@ export function mediaOutputCount(settings: MediaGenerationSettings) { return clampNumber(raw, 1, 20); } -export function mediaRequestPayload(settings: MediaGenerationSettings) { +export function mediaRequestPayload(settings: MediaGenerationSettings, mode: Exclude) { + if (mode === 'video') { + return { + aspect_ratio: settings.aspectRatio === 'auto' ? undefined : settings.aspectRatio, + audio: settings.outputAudio, + duration: settings.durationSeconds, + duration_seconds: settings.durationSeconds, + output_audio: settings.outputAudio, + resolution: settings.resolution, + }; + } + const count = mediaOutputCount(settings); const size = `${settings.width}x${settings.height}`; const highQuality = settings.resolution === '4K' || settings.resolution === '2160p'; @@ -136,14 +157,24 @@ export function deriveMediaModelCapabilities( models: PlatformModel[] | PlatformModel | undefined, mode: Exclude, contextKey?: string, + resolution?: string, ): MediaModelCapabilities { const modelList = (Array.isArray(models) ? models : models ? [models] : []).filter(Boolean); if (!modelList.length) return defaultMediaModelCapabilities(mode); - const derived = modelList.map((model) => deriveSingleMediaModelCapabilities(model, mode, contextKey)); + const derived = modelList.map((model) => deriveSingleMediaModelCapabilities(model, mode, contextKey, resolution)); + if (derived.length === 1) return derived[0]; + const durationOptions = intersectNumberValues( + derived.map((item) => durationValuesForCapabilities(item)), + durationValuesForCapabilities(defaultMediaModelCapabilities(mode)), + ); return { aspectRatios: intersectOptionValues(derived.map((item) => item.aspectRatios), aspectRatioOptions.map((item) => item.value)), + durationOptions, + durationRange: durationRangeFromValues(durationOptions), + durationStep: 1, maxCount: Math.max(1, Math.min(...derived.map((item) => item.maxCount))), resolutions: intersectOptionValues(derived.map((item) => item.resolutions), resolutionOptionsForMode(mode).map((item) => item.value)), + supportsAudio: derived.every((item) => item.supportsAudio), supportsGroup: derived.every((item) => item.supportsGroup), }; } @@ -155,6 +186,7 @@ export function normalizeMediaSettingsForCapabilities( ) { const aspectOptions = filterAspectRatioOptions(capabilities); const resolutionItems = filterResolutionOptions(capabilities, mode); + const durationOptions = durationValuesForCapabilities(capabilities); const maxCount = Math.max(1, Math.min(capabilities.maxCount, 20)); const supportsGroup = capabilities.supportsGroup && maxCount > 1; const next: MediaGenerationSettings = { @@ -163,6 +195,15 @@ export function normalizeMediaSettingsForCapabilities( resolution: resolutionItems.some((item) => item.value === settings.resolution) ? settings.resolution : resolutionItems[0]?.value ?? settings.resolution, }; + if (mode === 'video') { + next.countPreset = 1; + next.customCount = 1; + next.durationSeconds = closestDurationValue(settings.durationSeconds, durationOptions); + next.outputAudio = capabilities.supportsAudio ? settings.outputAudio : false; + next.outputMode = 'single'; + return mediaSettingsEqual(settings, next) ? settings : next; + } + if (!supportsGroup) { next.countPreset = 1; next.customCount = 1; @@ -191,13 +232,21 @@ export function MediaSettingsPopover(props: { }) { const capabilities = props.capabilities ?? defaultMediaModelCapabilities(props.mode); const aspectOptions = filterAspectRatioOptions(capabilities); - const resolutionItems = resolutionOptionsForMode(props.mode); - const enabledResolutions = new Set(filterResolutionOptions(capabilities, props.mode).map((item) => item.value)); + const resolutionItems = filterResolutionOptions(capabilities, props.mode); + const isImageMode = props.mode === 'image'; + const isVideoMode = props.mode === 'video'; + const durationOptions = durationValuesForCapabilities(capabilities); + const selectedDuration = closestDurationValue(props.settings.durationSeconds, durationOptions); + const durationUsesExplicitOptions = capabilities.durationOptions.length > 0; + const durationMarks = durationSliderMarks(durationOptions, durationUsesExplicitOptions); + const durationMin = durationOptions[0] ?? props.settings.durationSeconds; + const durationMax = durationOptions[durationOptions.length - 1] ?? props.settings.durationSeconds; + const durationStep = durationUsesExplicitOptions ? null : capabilities.durationStep; const maxCount = Math.max(1, Math.min(capabilities.maxCount, 20)); const supportsGroup = capabilities.supportsGroup && maxCount > 1; const countOptions = countPresetOptions.filter((item) => item.value === 'custom' ? maxCount > 4 : item.value <= Math.min(4, maxCount)); - const count = mediaOutputCount(props.settings); - const unit = props.mode === 'video' ? '条' : '张'; + const count = isImageMode ? mediaOutputCount(props.settings) : 1; + const unit = '张'; function patch(next: Partial) { props.onChange(normalizeMediaSettingsForCapabilities({ ...props.settings, ...next }, capabilities, props.mode)); @@ -247,8 +296,7 @@ export function MediaSettingsPopover(props: { type="button" key={item.value} data-active={props.settings.resolution === item.value} - disabled={!enabledResolutions.has(item.value)} - onClick={() => enabledResolutions.has(item.value) && patch({ resolution: item.value })} + onClick={() => patch({ resolution: item.value })} > {item.label} {item.value === '4K' && } @@ -257,85 +305,130 @@ export function MediaSettingsPopover(props: { -
- 尺寸 -
-
-
+ {!capabilities.supportsAudio && ( +

当前模型不支持输出音频,已自动关闭。

+ )} + -
- 生成数量 -
- - -
- {supportsGroup ? ( - <> -
- {countOptions.map((item) => ( - - ))} +
+
+ 视频时长 + {selectedDuration}s
- {props.settings.countPreset === 'custom' && props.settings.outputMode === 'group' && ( -
+ + )} + + {isImageMode && ( + <> +
+ 尺寸 +
+ + + + PX +
+
+ +
+ 生成数量 +
+ + +
+ {supportsGroup ? ( + <> +
+ {countOptions.map((item) => ( + + ))} +
+ {props.settings.countPreset === 'custom' && props.settings.outputMode === 'group' && ( + + )} + + ) : ( +

当前模型不支持组图输出。

)} - - ) : ( -

当前模型不支持组图输出。

- )} -

- - {count} / {unit} -

-
+

+ + {count} / {unit} +

+
+ + )} ); @@ -515,11 +608,11 @@ function mediaResultItemFromEntry(entry: unknown, mode: Exclude) { - const count = mediaOutputCount(settings); - const unit = mode === 'video' ? '条' : '张'; const resolutionLabel = resolutionOptionsForMode(mode).find((item) => item.value === settings.resolution)?.label ?? settings.resolution; + if (mode === 'video') return `${settings.aspectRatio} | ${resolutionLabel} | ${settings.durationSeconds}s | ${settings.outputAudio ? '有声音' : '无声音'}`; + const count = mediaOutputCount(settings); const modeLabel = settings.outputMode === 'single' ? '单图' : '组图'; - return `${settings.aspectRatio} | ${resolutionLabel} | ${settings.width}x${settings.height} | ${modeLabel} ${count}${unit}`; + return `${settings.aspectRatio} | ${resolutionLabel} | ${settings.width}x${settings.height} | ${modeLabel} ${count}张`; } function mediaStatusText(run: MediaGenerationRun) { @@ -568,10 +661,12 @@ function deriveSingleMediaModelCapabilities( model: PlatformModel, mode: Exclude, contextKey?: string, + resolution?: string, ): MediaModelCapabilities { const source = mergeCapabilityRecords(model.capabilities, model.capabilityOverride); const typeKeys = capabilityTypeKeys(model, source, mode, contextKey); const defaultCapabilities = defaultMediaModelCapabilities(mode); + const durationScopes = [resolution, contextKey]; const resolutionValues = normalizeResolutionValues(stringListFromCapability(firstCapabilityValue(source, typeKeys, ['output_resolutions']), [contextKey])); const allowedAspectValues = normalizeAspectRatioValues(stringListFromCapability(firstCapabilityValue(source, typeKeys, ['aspect_ratio_allowed']), [contextKey])); const ratioRange = ratioRangeFromValue(firstCapabilityValue(source, typeKeys, ['aspect_ratio_range'])); @@ -583,23 +678,36 @@ function deriveSingleMediaModelCapabilities( : allowedAspectValues.length ? allowedAspectValues : rangedAspectValues; const maxCountValue = numberFromUnknown(firstCapabilityValue(source, typeKeys, countCapabilityKeys(mode))); const explicitGroupSupport = boolFromUnknown(firstCapabilityValue(source, typeKeys, groupCapabilityKeys(mode))); + const durationRange = durationRangeFromValue(scopedCapabilityValue(firstCapabilityValue(source, typeKeys, ['duration_range']), durationScopes)) ?? defaultCapabilities.durationRange; + const durationStep = durationStepFromValue(scopedCapabilityValue(firstCapabilityValue(source, typeKeys, ['duration_step']), durationScopes), defaultCapabilities.durationStep); + const durationOptions = normalizeDurationValues(numberListFromCapability(scopedCapabilityValue(firstCapabilityValue(source, typeKeys, ['duration_options']), durationScopes))); + const explicitAudioSupport = boolFromUnknown(firstCapabilityValue(source, typeKeys, ['output_audio'])); const maxCount = explicitGroupSupport === false ? 1 : clampNumber(maxCountValue ?? defaultCapabilities.maxCount, 1, 20); const supportsGroup = explicitGroupSupport === false ? false : maxCount > 1; return { aspectRatios: aspectRatios.length ? aspectRatios : defaultCapabilities.aspectRatios, + durationOptions, + durationRange, + durationStep, maxCount, resolutions: resolutionValues.length ? resolutionValues : defaultCapabilities.resolutions, + supportsAudio: explicitAudioSupport ?? defaultCapabilities.supportsAudio, supportsGroup, }; } function defaultMediaModelCapabilities(mode: Exclude): MediaModelCapabilities { + const isVideo = mode === 'video'; return { aspectRatios: aspectRatioOptions.map((item) => item.value), + durationOptions: [], + durationRange: isVideo ? [5, 10] : [1, 1], + durationStep: 1, maxCount: 20, resolutions: resolutionOptionsForMode(mode).map((item) => item.value), - supportsGroup: true, + supportsAudio: false, + supportsGroup: mode === 'image', }; } @@ -616,6 +724,55 @@ function filterResolutionOptions(capabilities: MediaModelCapabilities, mode: Exc return items.length ? items : modeOptions; } +function durationValuesForCapabilities(capabilities: MediaModelCapabilities) { + const [min, max] = capabilities.durationRange; + const step = Math.max(1, capabilities.durationStep); + const options = capabilities.durationOptions.length + ? capabilities.durationOptions + : expandDurationRange(min, max, step); + const filtered = options.filter((value) => ( + value >= min + && value <= max + && durationMatchesStep(value, min, step) + )); + return filtered.length ? filtered : [min]; +} + +function expandDurationRange(min: number, max: number, step: number) { + const values: number[] = []; + for (let value = min; value <= max; value += step) { + values.push(value); + } + if (!values.includes(max)) values.push(max); + return normalizeDurationValues(values); +} + +function durationMatchesStep(value: number, min: number, step: number) { + if (step <= 1) return true; + const ratio = (value - min) / step; + return Math.abs(ratio - Math.round(ratio)) < 0.000001; +} + +function closestDurationValue(value: number, values: number[]) { + if (!values.length) return value; + return values.reduce((closest, item) => ( + Math.abs(item - value) < Math.abs(closest - value) ? item : closest + ), values[0]); +} + +function durationSliderMarks(values: number[], showAll: boolean) { + const markValues = showAll || values.length <= 16 + ? values + : uniqueNumberValues([values[0], values[values.length - 1]]); + return Object.fromEntries(markValues.map((value) => [value, `${value}s`])); +} + +function durationRangeFromValues(values: number[]): [number, number] { + const normalized = normalizeDurationValues(values); + if (!normalized.length) return [5, 10]; + return [normalized[0], normalized[normalized.length - 1]]; +} + function resolutionOptionsForMode(mode: Exclude) { return resolutionOptions.filter((item) => item.modes.includes(mode)); } @@ -627,6 +784,13 @@ function intersectOptionValues(values: string[][], fallback: string[]) { return intersection.length ? intersection : nonEmptyValues[0]; } +function intersectNumberValues(values: number[][], fallback: number[]) { + const nonEmptyValues = values.filter((items) => items.length > 0); + if (!nonEmptyValues.length) return fallback; + const intersection = fallback.filter((item) => nonEmptyValues.every((items) => items.includes(item))); + return intersection.length ? intersection : nonEmptyValues[0]; +} + function capabilityTypeKeys( model: PlatformModel, source: Record, @@ -664,6 +828,23 @@ function nestedCapabilityValue(source: Record, typeKeys: string return undefined; } +function scopedCapabilityValue(value: unknown, scopes: Array): unknown { + const record = recordFromUnknown(value); + if (!record) return value; + const scopeKeys = uniqueStrings(scopes.filter((item): item is string => Boolean(item))); + for (const scope of scopeKeys) { + const scoped = record[scope]; + if (hasCapabilityValue(scoped)) { + return scopedCapabilityValue(scoped, scopeKeys.filter((item) => item !== scope)); + } + } + for (const scope of ['default', 'all', '*']) { + const scoped = record[scope]; + if (hasCapabilityValue(scoped)) return scoped; + } + return value; +} + function groupCapabilityKeys(mode: Exclude) { return mode === 'image' ? ['output_multiple_images', 'multiple_images', 'support_multiple_images', 'supports_group'] @@ -700,6 +881,15 @@ function stringListFromCapability(value: unknown, preferredKeys: Array stringListFromCapability(item)); } +function numberListFromCapability(value: unknown): number[] { + if (Array.isArray(value)) return value.flatMap((item) => numberListFromCapability(item)); + if (typeof value === 'number') return [value]; + if (typeof value === 'string') return value.match(/-?\d+(?:\.\d+)?/g)?.map(Number).filter(Number.isFinite) ?? []; + const record = recordFromUnknown(value); + if (!record) return []; + return Object.values(record).flatMap((item) => numberListFromCapability(item)); +} + function normalizeAspectRatioValues(values: string[]) { const allowedValues = new Set(aspectRatioOptions.map((item) => item.value)); return uniqueStrings(values.map((value) => { @@ -743,6 +933,14 @@ function ratioRangeFromValue(value: unknown): [number, number] | undefined { return undefined; } +function durationRangeFromValue(value: unknown): [number, number] | undefined { + const range = ratioRangeFromValue(value); + if (!range) return undefined; + const min = Math.max(1, Math.round(range[0])); + const max = Math.max(1, Math.round(range[1])); + return [Math.min(min, max), Math.max(min, max)]; +} + function aspectRatioWithinRange(value: string, range: [number, number]) { const [width, height] = value.split(':').map(Number); if (!Number.isFinite(width) || !Number.isFinite(height) || height <= 0) return false; @@ -771,6 +969,23 @@ function numberFromUnknown(value: unknown) { return undefined; } +function durationStepFromValue(value: unknown, fallback: number) { + const parsed = numberFromUnknown(value); + if (!parsed || parsed <= 0) return fallback; + return Math.max(1, Math.round(parsed)); +} + +function normalizeDurationValues(values: number[]) { + return uniqueNumberValues(values + .filter((item) => Number.isFinite(item) && item > 0) + .map((item) => Math.round(item))) + .sort((left, right) => left - right); +} + +function uniqueNumberValues(values: number[]) { + return Array.from(new Set(values)); +} + function uniqueStrings(values: string[]) { return Array.from(new Set(values.map((item) => item.trim()).filter(Boolean))); } @@ -779,8 +994,10 @@ function mediaSettingsEqual(left: MediaGenerationSettings, right: MediaGeneratio return left.aspectRatio === right.aspectRatio && left.countPreset === right.countPreset && left.customCount === right.customCount + && left.durationSeconds === right.durationSeconds && left.height === right.height && left.outputMode === right.outputMode + && left.outputAudio === right.outputAudio && left.resolution === right.resolution && left.width === right.width; } diff --git a/apps/web/src/styles/playground.css b/apps/web/src/styles/playground.css index 557e07c..a188a01 100644 --- a/apps/web/src/styles/playground.css +++ b/apps/web/src/styles/playground.css @@ -920,6 +920,19 @@ font-weight: var(--font-weight-medium); } +.mediaSettingsTitleRow { + display: flex; + align-items: center; + justify-content: space-between; + gap: 8px; +} + +.mediaSettingsTitleRow strong { + color: var(--primary); + font-size: var(--font-size-sm); + font-weight: var(--font-weight-semibold); +} + .mediaAspectGrid { display: grid; grid-template-columns: repeat(auto-fill, 44px); @@ -1055,6 +1068,73 @@ color: #06a6bd; } +.mediaAudioSegment.ant-segmented { + width: 100%; + padding: 2px; + border-radius: var(--radius-md); + background: var(--muted); +} + +.mediaAudioSegment .ant-segmented-item { + min-height: 34px; + color: var(--muted-foreground); + font-weight: var(--font-weight-semibold); +} + +.mediaAudioSegment .ant-segmented-item-label { + min-height: 34px; + line-height: 34px; +} + +.mediaAudioSegment .ant-segmented-item-selected { + background: var(--surface); + color: var(--primary); +} + +.mediaAudioSegment .ant-segmented-item-disabled { + color: var(--muted-foreground); + opacity: 0.4; +} + +.mediaDurationSlider.ant-slider { + margin: 8px 8px 18px; +} + +.mediaDurationSlider .ant-slider-rail { + background: var(--border); +} + +.mediaDurationSlider .ant-slider-track { + background: var(--primary); +} + +.mediaDurationSlider .ant-slider-handle::after { + box-shadow: 0 0 0 2px color-mix(in srgb, var(--primary) 65%, transparent); +} + +.mediaDurationSlider .ant-slider-handle:hover::after, +.mediaDurationSlider .ant-slider-handle:focus::after { + box-shadow: 0 0 0 4px color-mix(in srgb, var(--primary) 22%, transparent); +} + +.mediaDurationSlider .ant-slider-dot { + border-color: var(--border); + background: var(--surface); +} + +.mediaDurationSlider .ant-slider-dot-active { + border-color: var(--primary); +} + +.mediaDurationSlider .ant-slider-mark-text { + color: var(--muted-foreground); + font-size: var(--font-size-xs); +} + +.mediaDurationSlider .ant-slider-mark-text-active { + color: var(--text-normal); +} + .mediaSizeRow { display: grid; grid-template-columns: minmax(0, 1fr) 24px minmax(0, 1fr) auto;