feat: support image quality control

This commit is contained in:
wangbo 2026-05-28 00:17:27 +08:00
parent 1d3a4f1da9
commit f5c69b9852
9 changed files with 243 additions and 9 deletions

View File

@ -214,7 +214,7 @@ type ImageGenerationRequest struct {
Prompt string `json:"prompt" example:"A watercolor robot reading a book"`
N int `json:"n,omitempty" example:"1"`
Size string `json:"size,omitempty" example:"1024x1024"`
Quality string `json:"quality,omitempty" example:"standard"`
Quality string `json:"quality,omitempty" example:"auto"`
ResponseFormat string `json:"response_format,omitempty" example:"url"`
RunMode string `json:"runMode,omitempty" example:"simulation"`
}
@ -226,6 +226,7 @@ type ImageEditRequest struct {
Mask string `json:"mask,omitempty" example:"https://example.com/mask.png"`
N int `json:"n,omitempty" example:"1"`
Size string `json:"size,omitempty" example:"1024x1024"`
Quality string `json:"quality,omitempty" example:"auto"`
ResponseFormat string `json:"response_format,omitempty" example:"url"`
RunMode string `json:"runMode,omitempty" example:"simulation"`
}

View File

@ -63,6 +63,7 @@ func NewParamProcessorChain() ParamProcessorChain {
durationProcessor{},
audioProcessor{},
imageCountProcessor{},
imageQualityProcessor{},
},
}
}

View File

@ -378,3 +378,64 @@ func (imageCountProcessor) Process(params map[string]any, modelType string, cont
params["n"] = count
return true
}
type imageQualityProcessor struct{}
func (imageQualityProcessor) Name() string { return "ImageQualityProcessor" }
var openAICompatibleImageQualities = map[string]struct{}{
"low": {},
"medium": {},
"high": {},
"auto": {},
}
func (imageQualityProcessor) ShouldProcess(params map[string]any, modelType string, context *paramProcessContext) bool {
if modelType != "image_generate" && modelType != "image_edit" {
return false
}
_, ok := params["quality"]
return ok
}
func (imageQualityProcessor) Process(params map[string]any, modelType string, context *paramProcessContext) bool {
capability := capabilityForType(context.modelCapability, modelType)
quality := stringFromAny(params["quality"])
if supportsImageQualityControl(capability) && isOpenAICompatibleImageQuality(quality) {
return true
}
before := params["quality"]
delete(params, "quality")
context.recordChange(
"ImageQualityProcessor",
"remove",
"quality",
before,
nil,
"模型能力未开启生成质量控制,已移除 quality 参数。",
capabilityPath(modelType, "support_quality_control"),
capabilityValue(context.modelCapability, modelType, "support_quality_control"),
)
return true
}
func supportsImageQualityControl(capability map[string]any) bool {
if capability == nil {
return false
}
for _, key := range []string{"support_quality_control", "supportQualityControl", "quality_control", "qualityControl", "quality"} {
if boolFromAny(capability[key]) {
return true
}
}
return false
}
func isOpenAICompatibleImageQuality(value string) bool {
if value == "" {
return false
}
_, ok := openAICompatibleImageQualities[value]
return ok
}

View File

@ -660,3 +660,56 @@ func TestParamProcessorImageResolutionAndOutputCount(t *testing.T) {
t.Fatalf("image count should be capped to 4, got %+v", processed["n"])
}
}
func TestParamProcessorImageQualityControl(t *testing.T) {
body := map[string]any{
"model": "mock-image",
"prompt": "draw",
"quality": "high",
}
unsupported := preprocessRequestWithLog("images.generations", body, store.RuntimeModelCandidate{
ModelType: "image_generate",
Capabilities: map[string]any{
"image_generate": map[string]any{
"output_resolutions": []any{"1K"},
},
},
})
if _, ok := unsupported.Body["quality"]; ok {
t.Fatalf("quality should be removed when capability does not support it: %+v", unsupported.Body)
}
if len(unsupported.Log.Changes) == 0 || unsupported.Log.Changes[len(unsupported.Log.Changes)-1].CapabilityPath != "capabilities.image_generate.support_quality_control" {
t.Fatalf("expected quality removal to be logged against support_quality_control, got %+v", unsupported.Log.Changes)
}
supported := preprocessRequest("images.generations", body, store.RuntimeModelCandidate{
ModelType: "image_generate",
Capabilities: map[string]any{
"image_generate": map[string]any{
"support_quality_control": true,
"output_resolutions": []any{"1K"},
},
},
})
if supported["quality"] != "high" {
t.Fatalf("quality should be retained when capability supports it: %+v", supported)
}
incompatible := preprocessRequest("images.generations", map[string]any{
"model": "mock-image",
"prompt": "draw",
"quality": "standard",
}, store.RuntimeModelCandidate{
ModelType: "image_generate",
Capabilities: map[string]any{
"image_generate": map[string]any{
"support_quality_control": true,
"output_resolutions": []any{"1K"},
},
},
})
if _, ok := incompatible["quality"]; ok {
t.Fatalf("OpenAI-compatible GPT image quality should reject standard: %+v", incompatible)
}
}

View File

@ -0,0 +1,60 @@
-- GPT Image 系列支持 OpenAI-compatible quality 参数;其他图像模型默认不声明,
-- runner 会在参数预处理时移除未支持模型上的 quality。
CREATE OR REPLACE FUNCTION pg_temp._tmp_enable_image_quality_control(capabilities jsonb)
RETURNS jsonb AS $$
DECLARE
out jsonb := COALESCE(capabilities, '{}'::jsonb);
BEGIN
IF out ? 'image_generate' THEN
out := jsonb_set(out, '{image_generate,support_quality_control}', 'true'::jsonb, true);
END IF;
IF out ? 'image_edit' THEN
out := jsonb_set(out, '{image_edit,support_quality_control}', 'true'::jsonb, true);
END IF;
RETURN out;
END;
$$ LANGUAGE plpgsql;
UPDATE base_model_catalog
SET capabilities = pg_temp._tmp_enable_image_quality_control(capabilities),
default_snapshot = CASE
WHEN COALESCE(default_snapshot, '{}'::jsonb) = '{}'::jsonb THEN default_snapshot
WHEN jsonb_typeof(default_snapshot->'metadata'->'rawModel'->'capabilities') = 'object' THEN jsonb_set(
jsonb_set(
default_snapshot,
'{capabilities}',
pg_temp._tmp_enable_image_quality_control(COALESCE(default_snapshot->'capabilities', '{}'::jsonb)),
true
),
'{metadata,rawModel,capabilities}',
pg_temp._tmp_enable_image_quality_control(COALESCE(default_snapshot->'metadata'->'rawModel'->'capabilities', '{}'::jsonb)),
true
)
ELSE jsonb_set(
default_snapshot,
'{capabilities}',
pg_temp._tmp_enable_image_quality_control(COALESCE(default_snapshot->'capabilities', '{}'::jsonb)),
true
)
END,
metadata = CASE
WHEN jsonb_typeof(metadata->'rawModel'->'capabilities') = 'object' THEN jsonb_set(
metadata,
'{rawModel,capabilities}',
pg_temp._tmp_enable_image_quality_control(COALESCE(metadata->'rawModel'->'capabilities', '{}'::jsonb)),
true
)
ELSE metadata
END,
updated_at = now()
WHERE provider_model_name IN ('gpt-image-1', 'gpt-image-1.5', 'gpt-image-2')
AND capabilities ?| ARRAY['image_generate', 'image_edit'];
UPDATE platform_models
SET capabilities = pg_temp._tmp_enable_image_quality_control(capabilities),
updated_at = now()
WHERE COALESCE(NULLIF(provider_model_name, ''), model_name) IN ('gpt-image-1', 'gpt-image-1.5', 'gpt-image-2')
AND capabilities ?| ARRAY['image_generate', 'image_edit'];
DROP FUNCTION pg_temp._tmp_enable_image_quality_control(jsonb);

View File

@ -132,7 +132,9 @@ export function PlaygroundPage(props: {
const normalizedSettings = mediaCapabilities
? normalizeMediaSettingsForCapabilities(mediaSettings, mediaCapabilities, props.mode)
: mediaSettings;
return buildMediaEstimatePayload(props.mode, selectedModel, prompt, normalizedSettings, mediaUploads, videoMode);
return buildMediaEstimatePayload(props.mode, selectedModel, prompt, normalizedSettings, mediaUploads, videoMode, {
supportsQualityControl: mediaCapabilities?.supportsQualityControl,
});
}, [mediaCapabilities, mediaSettings, mediaUploads, prompt, props.mode, selectedModel, videoMode]);
useEffect(() => {
@ -348,11 +350,16 @@ export function PlaygroundPage(props: {
...mediaRequestPayload(runSettings, 'video'),
});
} else {
const runMediaCapabilities = runModelOption
? deriveMediaModelCapabilities(runModelOption.models, runMode, runVideoMode, runSettings.resolution)
: mediaCapabilities;
const uploadPayload = sharedMediaUploadRequestPayload(runUploads, 'image');
const requestPayload = {
model: runModel,
prompt: requestPrompt,
...mediaRequestPayload(runSettings, 'image'),
...mediaRequestPayload(runSettings, 'image', {
supportsQualityControl: runMediaCapabilities?.supportsQualityControl,
}),
...uploadPayload,
};
response = runUploads.some((item) => item.kind === 'image')
@ -799,6 +806,7 @@ function buildMediaEstimatePayload(
settings: MediaGenerationSettings,
uploads: PlaygroundUpload[],
videoMode: VideoCreateMode,
options?: { supportsQualityControl?: boolean },
): Record<string, unknown> {
const requestPrompt = replacePlaygroundResourceTokens(prompt.trim(), uploads, mode);
if (mode === 'video') {
@ -815,7 +823,7 @@ function buildMediaEstimatePayload(
kind: uploads.some((item) => item.kind === 'image') ? 'images.edits' : 'images.generations',
model,
prompt: requestPrompt,
...mediaRequestPayload(settings, 'image'),
...mediaRequestPayload(settings, 'image', options),
...uploadPayload,
};
}
@ -1248,11 +1256,17 @@ function mediaSettingsFromStorage(value: unknown): MediaGenerationSettings {
height: numberFromUnknown(record.height, fallback.height, 128, 8192),
outputMode: record.outputMode === 'group' ? 'group' : 'single',
outputAudio: booleanFromUnknown(record.outputAudio ?? record.output_audio ?? record.audio, fallback.outputAudio),
quality: imageQualityFromStorage(record.quality, fallback.quality),
resolution: stringFromUnknown(record.resolution) || fallback.resolution,
width: numberFromUnknown(record.width, fallback.width, 128, 8192),
};
}
function imageQualityFromStorage(value: unknown, fallback: MediaGenerationSettings['quality']) {
if (value === 'low' || value === 'medium' || value === 'high' || value === 'auto') return value;
return fallback;
}
function videoModeFromStorage(value: unknown, uploads: PlaygroundUpload[]): VideoCreateMode {
if (value === 'text_to_video' || value === 'first_last_frame' || value === 'omni_reference') return value;
return inferVideoModeFromUploads(uploads);

View File

@ -57,6 +57,7 @@ const embeddingFields: FieldDefinition[] = [
const imageFields: FieldDefinition[] = [
{ key: 'support_base64_input', label: 'Base64 输入', type: 'boolean' },
{ key: 'support_url_input', label: 'URL 输入', type: 'boolean' },
{ key: 'support_quality_control', label: '生成质量控制', hint: '支持请求中的 quality 参数', type: 'boolean' },
{ key: 'input_multiple_images', label: '多图输入', type: 'boolean' },
{ key: 'input_max_images_count', label: '最多输入图片', placeholder: '10', type: 'number' },
{ key: 'output_multiple_images', label: '多图输出', type: 'boolean' },
@ -778,6 +779,7 @@ function enabledBooleanLabels(config?: Record<string, unknown>) {
supportWebSearch: '联网搜索',
support_base64_input: 'Base64 输入',
support_url_input: 'URL 输入',
support_quality_control: '质量控制',
input_multiple_images: '多图输入',
output_multiple_images: '多图输出',
input_audio: '音频输入',

View File

@ -17,6 +17,7 @@ export type CapabilityFlagKey =
| 'supportThinkingModeSwitch'
| 'supportStructuredOutput'
| 'supportWebSearch'
| 'supportQualityControl'
| 'inputMultipleImages'
| 'outputMultipleImages'
| 'supportBase64Input'
@ -81,6 +82,7 @@ const flagKeys: CapabilityFlagKey[] = [
'supportThinkingModeSwitch',
'supportStructuredOutput',
'supportWebSearch',
'supportQualityControl',
'inputMultipleImages',
'outputMultipleImages',
'supportBase64Input',
@ -114,6 +116,7 @@ const managedRootKeys = new Set<string>([
'supportWebSearch',
'supportBase64Input',
'supportUrlInput',
'supportQualityControl',
'maxContextTokens',
'maxInputTokens',
'maxOutputTokens',
@ -135,6 +138,7 @@ const managedNestedKeys = new Set<string>([
'dimensions',
'support_base64_input',
'support_url_input',
'support_quality_control',
'input_multiple_images',
'input_max_images_count',
'output_multiple_images',
@ -204,6 +208,7 @@ export function capabilitiesToForm(value?: Record<string, unknown>, modelType =
state.flags.supportWebSearch = boolFrom(source.supportWebSearch ?? nestedValue(source, 'supportWebSearch'));
state.flags.supportBase64Input = boolFrom(source.supportBase64Input ?? nestedValue(source, 'support_base64_input'));
state.flags.supportUrlInput = boolFrom(source.supportUrlInput ?? nestedValue(source, 'support_url_input'));
state.flags.supportQualityControl = boolFrom(source.supportQualityControl ?? nestedValue(source, 'support_quality_control'));
state.flags.inputMultipleImages = nestedBool(source, 'input_multiple_images');
state.flags.outputMultipleImages = nestedBool(source, 'output_multiple_images');
state.flags.outputAudio = nestedBool(source, 'output_audio');
@ -278,10 +283,10 @@ export function defaultCapabilityConfig(type: string): Record<string, unknown> {
}
if (type === 'text_embedding') return { dimensions: [] };
if (type === 'image_generate') {
return { output_resolutions: ['1K'], output_multiple_images: false };
return { output_resolutions: ['1K'], output_multiple_images: false, support_quality_control: false };
}
if (type === 'image_edit') {
return { input_multiple_images: false, output_resolutions: ['1K'], output_multiple_images: false };
return { input_multiple_images: false, output_resolutions: ['1K'], output_multiple_images: false, support_quality_control: false };
}
if (type === 'video_generate') {
return { output_resolutions: ['720p'], duration_range: [5, 10], output_audio: false };
@ -346,6 +351,7 @@ function rootCompatibilityConfig(source: Record<string, unknown>) {
'supportWebSearch',
'supportBase64Input',
'supportUrlInput',
'supportQualityControl',
'maxContextTokens',
'maxInputTokens',
'maxOutputTokens',
@ -361,6 +367,7 @@ function toCapabilityKey(key: string) {
const map: Record<string, string> = {
supportBase64Input: 'support_base64_input',
supportUrlInput: 'support_url_input',
supportQualityControl: 'support_quality_control',
maxContextTokens: 'max_context_tokens',
maxInputTokens: 'max_input_tokens',
maxOutputTokens: 'max_output_tokens',

View File

@ -22,6 +22,7 @@ import type { PlaygroundUpload, PlaygroundUploadKind, PlaygroundVideoCreateMode
export type MediaOutputMode = 'single' | 'group';
export type MediaCountPreset = 1 | 2 | 3 | 4 | 'custom';
export type MediaResolution = string;
export type ImageQuality = 'low' | 'medium' | 'high' | 'auto';
const mediaGridGap = 2;
const mediaPreviewMaxHeight = 600;
@ -34,6 +35,7 @@ export interface MediaGenerationSettings {
height: number;
outputMode: MediaOutputMode;
outputAudio: boolean;
quality: ImageQuality;
resolution: MediaResolution;
width: number;
}
@ -100,6 +102,7 @@ export interface MediaModelCapabilities {
resolutions: MediaResolution[];
supportsAudio: boolean;
supportsGroup: boolean;
supportsQualityControl: boolean;
}
const aspectRatioOptions: AspectRatioOption[] = [
@ -138,6 +141,13 @@ const countPresetOptions: Array<{ label: string; value: MediaCountPreset }> = [
{ value: 'custom', label: '自定义' },
];
const imageQualityOptions: Array<{ label: string; value: ImageQuality }> = [
{ value: 'low', label: '低' },
{ value: 'medium', label: '标准' },
{ value: 'high', label: '高' },
{ value: 'auto', label: '自动' },
];
export function defaultMediaGenerationSettings(): MediaGenerationSettings {
return {
aspectRatio: '1:1',
@ -147,6 +157,7 @@ export function defaultMediaGenerationSettings(): MediaGenerationSettings {
height: 2048,
outputMode: 'single',
outputAudio: true,
quality: 'auto',
resolution: '2K',
width: 2048,
};
@ -158,7 +169,11 @@ export function mediaOutputCount(settings: MediaGenerationSettings) {
return clampNumber(raw, 1, 20);
}
export function mediaRequestPayload(settings: MediaGenerationSettings, mode: Exclude<PlaygroundMode, 'chat'>) {
export function mediaRequestPayload(
settings: MediaGenerationSettings,
mode: Exclude<PlaygroundMode, 'chat'>,
options?: { supportsQualityControl?: boolean },
) {
if (mode === 'video') {
return {
aspect_ratio: settings.aspectRatio === 'auto' ? undefined : settings.aspectRatio,
@ -170,13 +185,12 @@ export function mediaRequestPayload(settings: MediaGenerationSettings, mode: Exc
const count = mediaOutputCount(settings);
const size = `${settings.width}x${settings.height}`;
const highQuality = settings.resolution === '4K' || settings.resolution === '2160p';
return {
aspect_ratio: settings.aspectRatio === 'auto' ? undefined : settings.aspectRatio,
count,
height: settings.height,
n: count,
quality: highQuality ? 'high' : 'medium',
quality: options?.supportsQualityControl ? settings.quality : undefined,
resolution: settings.resolution,
size,
width: settings.width,
@ -206,6 +220,7 @@ export function deriveMediaModelCapabilities(
resolutions: intersectOptionValues(derived.map((item) => item.resolutions), resolutionOptionsForMode(mode).map((item) => item.value)),
supportsAudio: derived.every((item) => item.supportsAudio),
supportsGroup: derived.every((item) => item.supportsGroup),
supportsQualityControl: derived.every((item) => item.supportsQualityControl),
};
}
@ -250,6 +265,9 @@ export function normalizeMediaSettingsForCapabilities(
} else {
next.countPreset = 1;
}
if (!capabilities.supportsQualityControl) {
next.quality = 'auto';
}
return mediaSettingsEqual(settings, next) ? settings : next;
}
@ -335,6 +353,19 @@ export function MediaSettingsPopover(props: {
</div>
</section>
{isImageMode && capabilities.supportsQualityControl && (
<section className="mediaSettingsSection">
<span className="mediaSettingsLabel"></span>
<Segmented
block
className="mediaAudioSegment"
options={imageQualityOptions}
value={props.settings.quality}
onChange={(value) => patch({ quality: value as ImageQuality })}
/>
</section>
)}
{isVideoMode && (
<>
<section className="mediaSettingsSection">
@ -990,6 +1021,7 @@ function deriveSingleMediaModelCapabilities(
const durationStep = durationStepFromValue(scopedCapabilityValue(firstCapabilityValue(source, typeKeys, ['duration_step']), durationScopes), defaultCapabilities.durationStep);
const durationOptions = normalizeDurationValues(numberListFromCapability(scopedCapabilityValue(firstCapabilityValue(source, typeKeys, ['duration_options']), durationScopes)));
const explicitAudioSupport = boolFromUnknown(firstCapabilityValue(source, typeKeys, ['output_audio']));
const explicitQualitySupport = boolFromUnknown(firstCapabilityValue(source, typeKeys, ['support_quality_control', 'supportQualityControl', 'quality_control', 'qualityControl', 'quality']));
const maxCount = explicitGroupSupport === false ? 1 : clampNumber(maxCountValue ?? defaultCapabilities.maxCount, 1, 20);
const supportsGroup = explicitGroupSupport === false ? false : maxCount > 1;
@ -1002,6 +1034,7 @@ function deriveSingleMediaModelCapabilities(
resolutions: resolutionValues.length ? resolutionValues : defaultCapabilities.resolutions,
supportsAudio: explicitAudioSupport ?? defaultCapabilities.supportsAudio,
supportsGroup,
supportsQualityControl: explicitQualitySupport ?? defaultCapabilities.supportsQualityControl,
};
}
@ -1016,6 +1049,7 @@ function defaultMediaModelCapabilities(mode: Exclude<PlaygroundMode, 'chat'>): M
resolutions: resolutionOptionsForMode(mode).map((item) => item.value),
supportsAudio: false,
supportsGroup: mode === 'image',
supportsQualityControl: false,
};
}
@ -1306,6 +1340,7 @@ function mediaSettingsEqual(left: MediaGenerationSettings, right: MediaGeneratio
&& left.height === right.height
&& left.outputMode === right.outputMode
&& left.outputAudio === right.outputAudio
&& left.quality === right.quality
&& left.resolution === right.resolution
&& left.width === right.width;
}