74 lines
3.6 KiB
SQL
74 lines
3.6 KiB
SQL
CREATE TABLE IF NOT EXISTS gateway_runner_policies (
|
||
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
|
||
policy_key text NOT NULL UNIQUE,
|
||
name text NOT NULL,
|
||
description text,
|
||
failover_policy jsonb NOT NULL DEFAULT '{}'::jsonb,
|
||
hard_stop_policy jsonb NOT NULL DEFAULT '{}'::jsonb,
|
||
priority_demote_policy jsonb NOT NULL DEFAULT '{}'::jsonb,
|
||
metadata jsonb NOT NULL DEFAULT '{}'::jsonb,
|
||
status text NOT NULL DEFAULT 'active',
|
||
created_at timestamptz NOT NULL DEFAULT now(),
|
||
updated_at timestamptz NOT NULL DEFAULT now()
|
||
);
|
||
|
||
ALTER TABLE IF EXISTS gateway_runner_policies
|
||
ADD COLUMN IF NOT EXISTS priority_demote_policy jsonb NOT NULL DEFAULT '{}'::jsonb;
|
||
|
||
INSERT INTO gateway_runner_policies (
|
||
policy_key, name, description, failover_policy, hard_stop_policy, priority_demote_policy, metadata, status
|
||
)
|
||
VALUES (
|
||
'default-runner-v1',
|
||
'默认全局调度策略',
|
||
'控制多个候选平台之间的故障切换;模型运行策略只可覆盖 failoverPolicy,不能覆盖 hardStopPolicy。',
|
||
'{
|
||
"enabled": true,
|
||
"maxPlatforms": 99,
|
||
"maxDurationSeconds": 600,
|
||
"allowCategories": ["network", "timeout", "stream_error", "rate_limit", "provider_5xx", "provider_overloaded", "auth_error"],
|
||
"denyCategories": ["request_error", "unsupported_model", "user_permission", "insufficient_balance"],
|
||
"allowCodes": ["auth_failed", "invalid_api_key", "missing_credentials"],
|
||
"allowKeywords": ["timeout", "network", "rate_limit", "overloaded", "temporarily_unavailable", "server_error", "auth_failed", "invalid_api_key", "missing_credentials", "unauthorized", "forbidden", "429", "5xx"],
|
||
"denyKeywords": ["invalid_parameter", "missing required", "bad request"],
|
||
"allowStatusCodes": [401, 403, 408, 429, 500, 502, 503, 504],
|
||
"denyStatusCodes": [],
|
||
"actions": {
|
||
"auth_error": "disable_and_next",
|
||
"rate_limit": "cooldown_and_next",
|
||
"provider_5xx": "next",
|
||
"request_error": "stop"
|
||
}
|
||
}'::jsonb,
|
||
'{
|
||
"enabled": true,
|
||
"categories": ["request_error", "unsupported_model", "user_permission", "insufficient_balance"],
|
||
"codes": ["bad_request", "invalid_request", "invalid_parameter", "missing_required", "unsupported_kind", "unsupported_model", "insufficient_balance", "permission_denied"],
|
||
"statusCodes": [],
|
||
"keywords": ["invalid_parameter", "missing required", "bad request", "insufficient balance"]
|
||
}'::jsonb,
|
||
'{
|
||
"enabled": true,
|
||
"demoteStep": 100,
|
||
"categories": ["network", "timeout", "stream_error", "rate_limit", "provider_5xx", "provider_overloaded"],
|
||
"codes": ["network", "timeout", "stream_read_error", "rate_limit", "server_error", "overloaded"],
|
||
"statusCodes": [408, 429, 500, 502, 503, 504],
|
||
"keywords": ["timeout", "network", "rate_limit", "overloaded", "temporarily_unavailable", "server_error", "429", "5xx"]
|
||
}'::jsonb,
|
||
'{"seed":"0026_runner_policies"}'::jsonb,
|
||
'active'
|
||
)
|
||
ON CONFLICT (policy_key) DO UPDATE
|
||
SET name = EXCLUDED.name,
|
||
description = EXCLUDED.description,
|
||
failover_policy = gateway_runner_policies.failover_policy || EXCLUDED.failover_policy,
|
||
hard_stop_policy = gateway_runner_policies.hard_stop_policy || EXCLUDED.hard_stop_policy,
|
||
priority_demote_policy = gateway_runner_policies.priority_demote_policy || EXCLUDED.priority_demote_policy,
|
||
metadata = gateway_runner_policies.metadata || EXCLUDED.metadata,
|
||
updated_at = now();
|
||
|
||
UPDATE model_runtime_policy_sets
|
||
SET description = '默认包含 TPM/RPM/并发、平台内调用重试、自动禁用和优先级降级关键词。'
|
||
WHERE policy_key = 'default-runtime-v1'
|
||
AND description = '默认包含 TPM/RPM/并发、失败重试、自动禁用和优先级降级关键词。';
|