diff --git a/apps/api/internal/httpapi/openapi_models.go b/apps/api/internal/httpapi/openapi_models.go index 9871156..cc6d51e 100644 --- a/apps/api/internal/httpapi/openapi_models.go +++ b/apps/api/internal/httpapi/openapi_models.go @@ -214,7 +214,7 @@ type ImageGenerationRequest struct { Prompt string `json:"prompt" example:"A watercolor robot reading a book"` N int `json:"n,omitempty" example:"1"` Size string `json:"size,omitempty" example:"1024x1024"` - Quality string `json:"quality,omitempty" example:"standard"` + Quality string `json:"quality,omitempty" example:"auto"` ResponseFormat string `json:"response_format,omitempty" example:"url"` RunMode string `json:"runMode,omitempty" example:"simulation"` } @@ -226,6 +226,7 @@ type ImageEditRequest struct { Mask string `json:"mask,omitempty" example:"https://example.com/mask.png"` N int `json:"n,omitempty" example:"1"` Size string `json:"size,omitempty" example:"1024x1024"` + Quality string `json:"quality,omitempty" example:"auto"` ResponseFormat string `json:"response_format,omitempty" example:"url"` RunMode string `json:"runMode,omitempty" example:"simulation"` } diff --git a/apps/api/internal/runner/param_processor.go b/apps/api/internal/runner/param_processor.go index b366723..24349b3 100644 --- a/apps/api/internal/runner/param_processor.go +++ b/apps/api/internal/runner/param_processor.go @@ -63,6 +63,7 @@ func NewParamProcessorChain() ParamProcessorChain { durationProcessor{}, audioProcessor{}, imageCountProcessor{}, + imageQualityProcessor{}, }, } } diff --git a/apps/api/internal/runner/param_processor_media.go b/apps/api/internal/runner/param_processor_media.go index 10b94cd..7414098 100644 --- a/apps/api/internal/runner/param_processor_media.go +++ b/apps/api/internal/runner/param_processor_media.go @@ -378,3 +378,64 @@ func (imageCountProcessor) Process(params map[string]any, modelType string, cont params["n"] = count return true } + +type imageQualityProcessor struct{} + +func (imageQualityProcessor) Name() string { return "ImageQualityProcessor" } + +var openAICompatibleImageQualities = map[string]struct{}{ + "low": {}, + "medium": {}, + "high": {}, + "auto": {}, +} + +func (imageQualityProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool { + if modelType != "image_generate" && modelType != "image_edit" { + return false + } + _, ok := params["quality"] + return ok +} + +func (imageQualityProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool { + capability := capabilityForType(context.modelCapability, modelType) + quality := stringFromAny(params["quality"]) + if supportsImageQualityControl(capability) && isOpenAICompatibleImageQuality(quality) { + return true + } + + before := params["quality"] + delete(params, "quality") + context.recordChange( + "ImageQualityProcessor", + "remove", + "quality", + before, + nil, + "模型能力未开启生成质量控制,已移除 quality 参数。", + capabilityPath(modelType, "support_quality_control"), + capabilityValue(context.modelCapability, modelType, "support_quality_control"), + ) + return true +} + +func supportsImageQualityControl(capability map[string]any) bool { + if capability == nil { + return false + } + for _, key := range []string{"support_quality_control", "supportQualityControl", "quality_control", "qualityControl", "quality"} { + if boolFromAny(capability[key]) { + return true + } + } + return false +} + +func isOpenAICompatibleImageQuality(value string) bool { + if value == "" { + return false + } + _, ok := openAICompatibleImageQualities[value] + return ok +} diff --git a/apps/api/internal/runner/param_processor_test.go b/apps/api/internal/runner/param_processor_test.go index ffe873d..c0b7894 100644 --- a/apps/api/internal/runner/param_processor_test.go +++ b/apps/api/internal/runner/param_processor_test.go @@ -660,3 +660,56 @@ func TestParamProcessorImageResolutionAndOutputCount(t *testing.T) { t.Fatalf("image count should be capped to 4, got %+v", processed["n"]) } } + +func TestParamProcessorImageQualityControl(t *testing.T) { + body := map[string]any{ + "model": "mock-image", + "prompt": "draw", + "quality": "high", + } + + unsupported := preprocessRequestWithLog("images.generations", body, store.RuntimeModelCandidate{ + ModelType: "image_generate", + Capabilities: map[string]any{ + "image_generate": map[string]any{ + "output_resolutions": []any{"1K"}, + }, + }, + }) + if _, ok := unsupported.Body["quality"]; ok { + t.Fatalf("quality should be removed when capability does not support it: %+v", unsupported.Body) + } + if len(unsupported.Log.Changes) == 0 || unsupported.Log.Changes[len(unsupported.Log.Changes)-1].CapabilityPath != "capabilities.image_generate.support_quality_control" { + t.Fatalf("expected quality removal to be logged against support_quality_control, got %+v", unsupported.Log.Changes) + } + + supported := preprocessRequest("images.generations", body, store.RuntimeModelCandidate{ + ModelType: "image_generate", + Capabilities: map[string]any{ + "image_generate": map[string]any{ + "support_quality_control": true, + "output_resolutions": []any{"1K"}, + }, + }, + }) + if supported["quality"] != "high" { + t.Fatalf("quality should be retained when capability supports it: %+v", supported) + } + + incompatible := preprocessRequest("images.generations", map[string]any{ + "model": "mock-image", + "prompt": "draw", + "quality": "standard", + }, store.RuntimeModelCandidate{ + ModelType: "image_generate", + Capabilities: map[string]any{ + "image_generate": map[string]any{ + "support_quality_control": true, + "output_resolutions": []any{"1K"}, + }, + }, + }) + if _, ok := incompatible["quality"]; ok { + t.Fatalf("OpenAI-compatible GPT image quality should reject standard: %+v", incompatible) + } +} diff --git a/apps/api/migrations/0042_image_quality_control_capability.sql b/apps/api/migrations/0042_image_quality_control_capability.sql new file mode 100644 index 0000000..25161a0 --- /dev/null +++ b/apps/api/migrations/0042_image_quality_control_capability.sql @@ -0,0 +1,60 @@ +-- GPT Image 系列支持 OpenAI-compatible quality 参数;其他图像模型默认不声明, +-- runner 会在参数预处理时移除未支持模型上的 quality。 + +CREATE OR REPLACE FUNCTION pg_temp._tmp_enable_image_quality_control(capabilities jsonb) +RETURNS jsonb AS $$ +DECLARE + out jsonb := COALESCE(capabilities, '{}'::jsonb); +BEGIN + IF out ? 'image_generate' THEN + out := jsonb_set(out, '{image_generate,support_quality_control}', 'true'::jsonb, true); + END IF; + IF out ? 'image_edit' THEN + out := jsonb_set(out, '{image_edit,support_quality_control}', 'true'::jsonb, true); + END IF; + RETURN out; +END; +$$ LANGUAGE plpgsql; + +UPDATE base_model_catalog +SET capabilities = pg_temp._tmp_enable_image_quality_control(capabilities), + default_snapshot = CASE + WHEN COALESCE(default_snapshot, '{}'::jsonb) = '{}'::jsonb THEN default_snapshot + WHEN jsonb_typeof(default_snapshot->'metadata'->'rawModel'->'capabilities') = 'object' THEN jsonb_set( + jsonb_set( + default_snapshot, + '{capabilities}', + pg_temp._tmp_enable_image_quality_control(COALESCE(default_snapshot->'capabilities', '{}'::jsonb)), + true + ), + '{metadata,rawModel,capabilities}', + pg_temp._tmp_enable_image_quality_control(COALESCE(default_snapshot->'metadata'->'rawModel'->'capabilities', '{}'::jsonb)), + true + ) + ELSE jsonb_set( + default_snapshot, + '{capabilities}', + pg_temp._tmp_enable_image_quality_control(COALESCE(default_snapshot->'capabilities', '{}'::jsonb)), + true + ) + END, + metadata = CASE + WHEN jsonb_typeof(metadata->'rawModel'->'capabilities') = 'object' THEN jsonb_set( + metadata, + '{rawModel,capabilities}', + pg_temp._tmp_enable_image_quality_control(COALESCE(metadata->'rawModel'->'capabilities', '{}'::jsonb)), + true + ) + ELSE metadata + END, + updated_at = now() +WHERE provider_model_name IN ('gpt-image-1', 'gpt-image-1.5', 'gpt-image-2') + AND capabilities ?| ARRAY['image_generate', 'image_edit']; + +UPDATE platform_models +SET capabilities = pg_temp._tmp_enable_image_quality_control(capabilities), + updated_at = now() +WHERE COALESCE(NULLIF(provider_model_name, ''), model_name) IN ('gpt-image-1', 'gpt-image-1.5', 'gpt-image-2') + AND capabilities ?| ARRAY['image_generate', 'image_edit']; + +DROP FUNCTION pg_temp._tmp_enable_image_quality_control(jsonb); diff --git a/apps/web/src/pages/PlaygroundPage.tsx b/apps/web/src/pages/PlaygroundPage.tsx index 5a2b7e8..a9b8af1 100644 --- a/apps/web/src/pages/PlaygroundPage.tsx +++ b/apps/web/src/pages/PlaygroundPage.tsx @@ -132,7 +132,9 @@ export function PlaygroundPage(props: { const normalizedSettings = mediaCapabilities ? normalizeMediaSettingsForCapabilities(mediaSettings, mediaCapabilities, props.mode) : mediaSettings; - return buildMediaEstimatePayload(props.mode, selectedModel, prompt, normalizedSettings, mediaUploads, videoMode); + return buildMediaEstimatePayload(props.mode, selectedModel, prompt, normalizedSettings, mediaUploads, videoMode, { + supportsQualityControl: mediaCapabilities?.supportsQualityControl, + }); }, [mediaCapabilities, mediaSettings, mediaUploads, prompt, props.mode, selectedModel, videoMode]); useEffect(() => { @@ -348,11 +350,16 @@ export function PlaygroundPage(props: { ...mediaRequestPayload(runSettings, 'video'), }); } else { + const runMediaCapabilities = runModelOption + ? deriveMediaModelCapabilities(runModelOption.models, runMode, runVideoMode, runSettings.resolution) + : mediaCapabilities; const uploadPayload = sharedMediaUploadRequestPayload(runUploads, 'image'); const requestPayload = { model: runModel, prompt: requestPrompt, - ...mediaRequestPayload(runSettings, 'image'), + ...mediaRequestPayload(runSettings, 'image', { + supportsQualityControl: runMediaCapabilities?.supportsQualityControl, + }), ...uploadPayload, }; response = runUploads.some((item) => item.kind === 'image') @@ -799,6 +806,7 @@ function buildMediaEstimatePayload( settings: MediaGenerationSettings, uploads: PlaygroundUpload[], videoMode: VideoCreateMode, + options?: { supportsQualityControl?: boolean }, ): Record { const requestPrompt = replacePlaygroundResourceTokens(prompt.trim(), uploads, mode); if (mode === 'video') { @@ -815,7 +823,7 @@ function buildMediaEstimatePayload( kind: uploads.some((item) => item.kind === 'image') ? 'images.edits' : 'images.generations', model, prompt: requestPrompt, - ...mediaRequestPayload(settings, 'image'), + ...mediaRequestPayload(settings, 'image', options), ...uploadPayload, }; } @@ -1248,11 +1256,17 @@ function mediaSettingsFromStorage(value: unknown): MediaGenerationSettings { height: numberFromUnknown(record.height, fallback.height, 128, 8192), outputMode: record.outputMode === 'group' ? 'group' : 'single', outputAudio: booleanFromUnknown(record.outputAudio ?? record.output_audio ?? record.audio, fallback.outputAudio), + quality: imageQualityFromStorage(record.quality, fallback.quality), resolution: stringFromUnknown(record.resolution) || fallback.resolution, width: numberFromUnknown(record.width, fallback.width, 128, 8192), }; } +function imageQualityFromStorage(value: unknown, fallback: MediaGenerationSettings['quality']) { + if (value === 'low' || value === 'medium' || value === 'high' || value === 'auto') return value; + return fallback; +} + function videoModeFromStorage(value: unknown, uploads: PlaygroundUpload[]): VideoCreateMode { if (value === 'text_to_video' || value === 'first_last_frame' || value === 'omni_reference') return value; return inferVideoModeFromUploads(uploads); diff --git a/apps/web/src/pages/admin/BaseModelCapabilityEditor.tsx b/apps/web/src/pages/admin/BaseModelCapabilityEditor.tsx index e7c0740..36582d0 100644 --- a/apps/web/src/pages/admin/BaseModelCapabilityEditor.tsx +++ b/apps/web/src/pages/admin/BaseModelCapabilityEditor.tsx @@ -57,6 +57,7 @@ const embeddingFields: FieldDefinition[] = [ const imageFields: FieldDefinition[] = [ { key: 'support_base64_input', label: 'Base64 输入', type: 'boolean' }, { key: 'support_url_input', label: 'URL 输入', type: 'boolean' }, + { key: 'support_quality_control', label: '生成质量控制', hint: '支持请求中的 quality 参数', type: 'boolean' }, { key: 'input_multiple_images', label: '多图输入', type: 'boolean' }, { key: 'input_max_images_count', label: '最多输入图片', placeholder: '10', type: 'number' }, { key: 'output_multiple_images', label: '多图输出', type: 'boolean' }, @@ -778,6 +779,7 @@ function enabledBooleanLabels(config?: Record) { supportWebSearch: '联网搜索', support_base64_input: 'Base64 输入', support_url_input: 'URL 输入', + support_quality_control: '质量控制', input_multiple_images: '多图输入', output_multiple_images: '多图输出', input_audio: '音频输入', diff --git a/apps/web/src/pages/admin/base-model-capabilities.ts b/apps/web/src/pages/admin/base-model-capabilities.ts index 81e3bfe..bdd01f0 100644 --- a/apps/web/src/pages/admin/base-model-capabilities.ts +++ b/apps/web/src/pages/admin/base-model-capabilities.ts @@ -17,6 +17,7 @@ export type CapabilityFlagKey = | 'supportThinkingModeSwitch' | 'supportStructuredOutput' | 'supportWebSearch' + | 'supportQualityControl' | 'inputMultipleImages' | 'outputMultipleImages' | 'supportBase64Input' @@ -81,6 +82,7 @@ const flagKeys: CapabilityFlagKey[] = [ 'supportThinkingModeSwitch', 'supportStructuredOutput', 'supportWebSearch', + 'supportQualityControl', 'inputMultipleImages', 'outputMultipleImages', 'supportBase64Input', @@ -114,6 +116,7 @@ const managedRootKeys = new Set([ 'supportWebSearch', 'supportBase64Input', 'supportUrlInput', + 'supportQualityControl', 'maxContextTokens', 'maxInputTokens', 'maxOutputTokens', @@ -135,6 +138,7 @@ const managedNestedKeys = new Set([ 'dimensions', 'support_base64_input', 'support_url_input', + 'support_quality_control', 'input_multiple_images', 'input_max_images_count', 'output_multiple_images', @@ -204,6 +208,7 @@ export function capabilitiesToForm(value?: Record, modelType = state.flags.supportWebSearch = boolFrom(source.supportWebSearch ?? nestedValue(source, 'supportWebSearch')); state.flags.supportBase64Input = boolFrom(source.supportBase64Input ?? nestedValue(source, 'support_base64_input')); state.flags.supportUrlInput = boolFrom(source.supportUrlInput ?? nestedValue(source, 'support_url_input')); + state.flags.supportQualityControl = boolFrom(source.supportQualityControl ?? nestedValue(source, 'support_quality_control')); state.flags.inputMultipleImages = nestedBool(source, 'input_multiple_images'); state.flags.outputMultipleImages = nestedBool(source, 'output_multiple_images'); state.flags.outputAudio = nestedBool(source, 'output_audio'); @@ -278,10 +283,10 @@ export function defaultCapabilityConfig(type: string): Record { } if (type === 'text_embedding') return { dimensions: [] }; if (type === 'image_generate') { - return { output_resolutions: ['1K'], output_multiple_images: false }; + return { output_resolutions: ['1K'], output_multiple_images: false, support_quality_control: false }; } if (type === 'image_edit') { - return { input_multiple_images: false, output_resolutions: ['1K'], output_multiple_images: false }; + return { input_multiple_images: false, output_resolutions: ['1K'], output_multiple_images: false, support_quality_control: false }; } if (type === 'video_generate') { return { output_resolutions: ['720p'], duration_range: [5, 10], output_audio: false }; @@ -346,6 +351,7 @@ function rootCompatibilityConfig(source: Record) { 'supportWebSearch', 'supportBase64Input', 'supportUrlInput', + 'supportQualityControl', 'maxContextTokens', 'maxInputTokens', 'maxOutputTokens', @@ -361,6 +367,7 @@ function toCapabilityKey(key: string) { const map: Record = { supportBase64Input: 'support_base64_input', supportUrlInput: 'support_url_input', + supportQualityControl: 'support_quality_control', maxContextTokens: 'max_context_tokens', maxInputTokens: 'max_input_tokens', maxOutputTokens: 'max_output_tokens', diff --git a/apps/web/src/pages/playground-media.tsx b/apps/web/src/pages/playground-media.tsx index a7af89f..d86de9f 100644 --- a/apps/web/src/pages/playground-media.tsx +++ b/apps/web/src/pages/playground-media.tsx @@ -22,6 +22,7 @@ import type { PlaygroundUpload, PlaygroundUploadKind, PlaygroundVideoCreateMode export type MediaOutputMode = 'single' | 'group'; export type MediaCountPreset = 1 | 2 | 3 | 4 | 'custom'; export type MediaResolution = string; +export type ImageQuality = 'low' | 'medium' | 'high' | 'auto'; const mediaGridGap = 2; const mediaPreviewMaxHeight = 600; @@ -34,6 +35,7 @@ export interface MediaGenerationSettings { height: number; outputMode: MediaOutputMode; outputAudio: boolean; + quality: ImageQuality; resolution: MediaResolution; width: number; } @@ -100,6 +102,7 @@ export interface MediaModelCapabilities { resolutions: MediaResolution[]; supportsAudio: boolean; supportsGroup: boolean; + supportsQualityControl: boolean; } const aspectRatioOptions: AspectRatioOption[] = [ @@ -138,6 +141,13 @@ const countPresetOptions: Array<{ label: string; value: MediaCountPreset }> = [ { value: 'custom', label: '自定义' }, ]; +const imageQualityOptions: Array<{ label: string; value: ImageQuality }> = [ + { value: 'low', label: '低' }, + { value: 'medium', label: '标准' }, + { value: 'high', label: '高' }, + { value: 'auto', label: '自动' }, +]; + export function defaultMediaGenerationSettings(): MediaGenerationSettings { return { aspectRatio: '1:1', @@ -147,6 +157,7 @@ export function defaultMediaGenerationSettings(): MediaGenerationSettings { height: 2048, outputMode: 'single', outputAudio: true, + quality: 'auto', resolution: '2K', width: 2048, }; @@ -158,7 +169,11 @@ export function mediaOutputCount(settings: MediaGenerationSettings) { return clampNumber(raw, 1, 20); } -export function mediaRequestPayload(settings: MediaGenerationSettings, mode: Exclude) { +export function mediaRequestPayload( + settings: MediaGenerationSettings, + mode: Exclude, + options?: { supportsQualityControl?: boolean }, +) { if (mode === 'video') { return { aspect_ratio: settings.aspectRatio === 'auto' ? undefined : settings.aspectRatio, @@ -170,13 +185,12 @@ export function mediaRequestPayload(settings: MediaGenerationSettings, mode: Exc const count = mediaOutputCount(settings); const size = `${settings.width}x${settings.height}`; - const highQuality = settings.resolution === '4K' || settings.resolution === '2160p'; return { aspect_ratio: settings.aspectRatio === 'auto' ? undefined : settings.aspectRatio, count, height: settings.height, n: count, - quality: highQuality ? 'high' : 'medium', + quality: options?.supportsQualityControl ? settings.quality : undefined, resolution: settings.resolution, size, width: settings.width, @@ -206,6 +220,7 @@ export function deriveMediaModelCapabilities( resolutions: intersectOptionValues(derived.map((item) => item.resolutions), resolutionOptionsForMode(mode).map((item) => item.value)), supportsAudio: derived.every((item) => item.supportsAudio), supportsGroup: derived.every((item) => item.supportsGroup), + supportsQualityControl: derived.every((item) => item.supportsQualityControl), }; } @@ -250,6 +265,9 @@ export function normalizeMediaSettingsForCapabilities( } else { next.countPreset = 1; } + if (!capabilities.supportsQualityControl) { + next.quality = 'auto'; + } return mediaSettingsEqual(settings, next) ? settings : next; } @@ -335,6 +353,19 @@ export function MediaSettingsPopover(props: { + {isImageMode && capabilities.supportsQualityControl && ( +
+ 生成质量 + patch({ quality: value as ImageQuality })} + /> +
+ )} + {isVideoMode && ( <>
@@ -990,6 +1021,7 @@ function deriveSingleMediaModelCapabilities( const durationStep = durationStepFromValue(scopedCapabilityValue(firstCapabilityValue(source, typeKeys, ['duration_step']), durationScopes), defaultCapabilities.durationStep); const durationOptions = normalizeDurationValues(numberListFromCapability(scopedCapabilityValue(firstCapabilityValue(source, typeKeys, ['duration_options']), durationScopes))); const explicitAudioSupport = boolFromUnknown(firstCapabilityValue(source, typeKeys, ['output_audio'])); + const explicitQualitySupport = boolFromUnknown(firstCapabilityValue(source, typeKeys, ['support_quality_control', 'supportQualityControl', 'quality_control', 'qualityControl', 'quality'])); const maxCount = explicitGroupSupport === false ? 1 : clampNumber(maxCountValue ?? defaultCapabilities.maxCount, 1, 20); const supportsGroup = explicitGroupSupport === false ? false : maxCount > 1; @@ -1002,6 +1034,7 @@ function deriveSingleMediaModelCapabilities( resolutions: resolutionValues.length ? resolutionValues : defaultCapabilities.resolutions, supportsAudio: explicitAudioSupport ?? defaultCapabilities.supportsAudio, supportsGroup, + supportsQualityControl: explicitQualitySupport ?? defaultCapabilities.supportsQualityControl, }; } @@ -1016,6 +1049,7 @@ function defaultMediaModelCapabilities(mode: Exclude): M resolutions: resolutionOptionsForMode(mode).map((item) => item.value), supportsAudio: false, supportsGroup: mode === 'image', + supportsQualityControl: false, }; } @@ -1306,6 +1340,7 @@ function mediaSettingsEqual(left: MediaGenerationSettings, right: MediaGeneratio && left.height === right.height && left.outputMode === right.outputMode && left.outputAudio === right.outputAudio + && left.quality === right.quality && left.resolution === right.resolution && left.width === right.width; }