feat: add priority demotion controls
This commit is contained in:
parent
3917b84b5d
commit
98abd247d6
@ -44,6 +44,44 @@ func (s *Server) updateRunnerPolicy(w http.ResponseWriter, r *http.Request) {
|
||||
writeJSON(w, http.StatusOK, item)
|
||||
}
|
||||
|
||||
type updatePlatformDynamicPriorityRequest struct {
|
||||
DynamicPriority *int `json:"dynamicPriority"`
|
||||
Reset bool `json:"reset"`
|
||||
}
|
||||
|
||||
func (s *Server) updatePlatformDynamicPriority(w http.ResponseWriter, r *http.Request) {
|
||||
var input updatePlatformDynamicPriorityRequest
|
||||
if err := json.NewDecoder(r.Body).Decode(&input); err != nil {
|
||||
writeError(w, http.StatusBadRequest, "invalid json body")
|
||||
return
|
||||
}
|
||||
var dynamicPriority *int
|
||||
if input.Reset {
|
||||
dynamicPriority = nil
|
||||
} else {
|
||||
if input.DynamicPriority == nil {
|
||||
writeError(w, http.StatusBadRequest, "dynamicPriority is required unless reset is true")
|
||||
return
|
||||
}
|
||||
if *input.DynamicPriority < 0 {
|
||||
writeError(w, http.StatusBadRequest, "dynamicPriority must be greater than or equal to 0")
|
||||
return
|
||||
}
|
||||
dynamicPriority = input.DynamicPriority
|
||||
}
|
||||
item, err := s.store.UpdatePlatformDynamicPriority(r.Context(), r.PathValue("platformID"), dynamicPriority)
|
||||
if err != nil {
|
||||
if store.IsNotFound(err) {
|
||||
writeError(w, http.StatusNotFound, "platform not found")
|
||||
return
|
||||
}
|
||||
s.logger.Error("update platform dynamic priority failed", "error", err)
|
||||
writeError(w, http.StatusInternalServerError, "update platform dynamic priority failed")
|
||||
return
|
||||
}
|
||||
writeJSON(w, http.StatusOK, item)
|
||||
}
|
||||
|
||||
func (s *Server) createRuntimePolicySet(w http.ResponseWriter, r *http.Request) {
|
||||
var input store.RuntimePolicySetInput
|
||||
if err := json.NewDecoder(r.Body).Decode(&input); err != nil {
|
||||
|
||||
@ -105,6 +105,7 @@ func NewServerWithContext(ctx context.Context, cfg config.Config, db *store.Stor
|
||||
mux.Handle("GET /api/admin/platforms", server.requireAdmin(auth.PermissionPower, http.HandlerFunc(server.listPlatforms)))
|
||||
mux.Handle("POST /api/admin/platforms", server.requireAdmin(auth.PermissionManager, http.HandlerFunc(server.createPlatform)))
|
||||
mux.Handle("PATCH /api/admin/platforms/{platformID}", server.requireAdmin(auth.PermissionManager, http.HandlerFunc(server.updatePlatform)))
|
||||
mux.Handle("PATCH /api/admin/platforms/{platformID}/dynamic-priority", server.requireAdmin(auth.PermissionManager, http.HandlerFunc(server.updatePlatformDynamicPriority)))
|
||||
mux.Handle("DELETE /api/admin/platforms/{platformID}", server.requireAdmin(auth.PermissionManager, http.HandlerFunc(server.deletePlatform)))
|
||||
mux.Handle("PUT /api/admin/platforms/{platformID}/models", server.requireAdmin(auth.PermissionManager, http.HandlerFunc(server.replacePlatformModels)))
|
||||
mux.Handle("POST /api/admin/platforms/{platformID}/models", server.requireAdmin(auth.PermissionManager, http.HandlerFunc(server.createPlatformModel)))
|
||||
|
||||
@ -43,7 +43,6 @@ type failoverDecision struct {
|
||||
type priorityDemoteDecision struct {
|
||||
Demote bool
|
||||
Reason string
|
||||
Step int
|
||||
Match policyRuleMatch
|
||||
Info failureInfo
|
||||
}
|
||||
@ -110,9 +109,9 @@ func failoverDecisionForCandidate(runnerPolicy store.RunnerPolicy, candidate sto
|
||||
return failoverDecision{Retry: false, Action: "stop", Reason: "client_non_retryable", Match: policyRuleMatch{Source: "provider_client", Policy: "ClientError", Rule: "Retryable", Value: "false"}, Info: info}
|
||||
}
|
||||
|
||||
func shouldDemoteCandidatePriority(runnerPolicy store.RunnerPolicy, err error) (bool, int) {
|
||||
func shouldDemoteCandidatePriority(runnerPolicy store.RunnerPolicy, err error) bool {
|
||||
decision := priorityDemoteDecisionForCandidate(runnerPolicy, err)
|
||||
return decision.Demote, decision.Step
|
||||
return decision.Demote
|
||||
}
|
||||
|
||||
func priorityDemoteDecisionForCandidate(runnerPolicy store.RunnerPolicy, err error) priorityDemoteDecision {
|
||||
@ -128,11 +127,7 @@ func priorityDemoteDecisionForCandidate(runnerPolicy store.RunnerPolicy, err err
|
||||
return priorityDemoteDecision{Demote: false, Reason: "priority_demote_disabled", Info: info}
|
||||
}
|
||||
if match, ok := priorityDemotePolicyMatch(policy, info); ok {
|
||||
step := intFromPolicy(policy, "demoteStep")
|
||||
if step <= 0 {
|
||||
step = 100
|
||||
}
|
||||
return priorityDemoteDecision{Demote: true, Reason: "priority_demote_policy", Step: step, Match: match, Info: info}
|
||||
return priorityDemoteDecision{Demote: true, Reason: "priority_demote_policy", Match: match, Info: info}
|
||||
}
|
||||
return priorityDemoteDecision{Demote: false, Reason: "priority_demote_no_match", Info: info}
|
||||
}
|
||||
|
||||
@ -171,19 +171,16 @@ func TestPriorityDemotePolicyIsKeywordGatedAndHardStopSafe(t *testing.T) {
|
||||
"categories": []any{"request_error"},
|
||||
},
|
||||
PriorityDemotePolicy: map[string]any{
|
||||
"enabled": true,
|
||||
"demoteStep": 25,
|
||||
"keywords": []any{"rate_limit"},
|
||||
"enabled": true,
|
||||
"keywords": []any{"rate_limit"},
|
||||
},
|
||||
}
|
||||
|
||||
shouldDemote, step := shouldDemoteCandidatePriority(runnerPolicy, &clients.ClientError{Code: "rate_limit", Message: "rate_limit from upstream", Retryable: true})
|
||||
if !shouldDemote || step != 25 {
|
||||
t.Fatalf("priority demotion should be enabled only by matched policy, got shouldDemote=%v step=%d", shouldDemote, step)
|
||||
if !shouldDemoteCandidatePriority(runnerPolicy, &clients.ClientError{Code: "rate_limit", Message: "rate_limit from upstream", Retryable: true}) {
|
||||
t.Fatal("priority demotion should be enabled only by matched policy")
|
||||
}
|
||||
|
||||
shouldDemote, _ = shouldDemoteCandidatePriority(runnerPolicy, &clients.ClientError{Code: "bad_request", StatusCode: 400, Retryable: true})
|
||||
if shouldDemote {
|
||||
if shouldDemoteCandidatePriority(runnerPolicy, &clients.ClientError{Code: "bad_request", StatusCode: 400, Retryable: true}) {
|
||||
t.Fatal("priority demotion should not run for hard-stop request errors")
|
||||
}
|
||||
}
|
||||
|
||||
@ -73,17 +73,26 @@ func (s *Service) applyPriorityDemotePolicy(ctx context.Context, taskID string,
|
||||
if !decision.Demote {
|
||||
return
|
||||
}
|
||||
if err := s.store.DemoteCandidatePlatformPriority(ctx, candidate.PlatformID, decision.Step); err == nil {
|
||||
s.recordAttemptTrace(ctx, taskID, attemptNo, priorityDemoteTraceEntry(decision, candidate.PlatformID, candidate.PlatformModelID))
|
||||
if dynamicPriority, err := s.store.DemoteCandidatePlatformPriority(ctx, candidate.PlatformID); err == nil {
|
||||
s.recordAttemptTrace(ctx, taskID, attemptNo, priorityDemoteTraceEntry(decision, candidate.PlatformID, candidate.PlatformModelID, dynamicPriority))
|
||||
_ = s.emit(ctx, taskID, "task.policy.priority_demoted", "running", "priority_demote", 0.52, "candidate platform priority demoted by runner policy", addPolicyTracePayload(map[string]any{
|
||||
"platformId": candidate.PlatformID,
|
||||
"platformModelId": candidate.PlatformModelID,
|
||||
"demoteStep": decision.Step,
|
||||
"dynamicPriority": dynamicPriority,
|
||||
"code": clients.ErrorCode(cause),
|
||||
"reason": decision.Reason,
|
||||
"errorMessage": messageFromError(cause),
|
||||
}, decision.Match, decision.Info), simulated)
|
||||
}
|
||||
}
|
||||
|
||||
func messageFromError(err error) string {
|
||||
if err == nil {
|
||||
return ""
|
||||
}
|
||||
return err.Error()
|
||||
}
|
||||
|
||||
func effectiveRuntimePolicy(base map[string]any, override map[string]any, key string) map[string]any {
|
||||
policy := base
|
||||
if nested, ok := override[key].(map[string]any); ok && len(nested) > 0 {
|
||||
|
||||
@ -34,10 +34,12 @@ func failoverTraceEntry(decision failoverDecision) map[string]any {
|
||||
return entry
|
||||
}
|
||||
|
||||
func priorityDemoteTraceEntry(decision priorityDemoteDecision, platformID string, platformModelID string) map[string]any {
|
||||
func priorityDemoteTraceEntry(decision priorityDemoteDecision, platformID string, platformModelID string, dynamicPriority int) map[string]any {
|
||||
entry := policyTraceEntry("priority_demoted", "priority_demote", "demote", decision.Reason, decision.Match, decision.Info)
|
||||
entry["demote"] = decision.Demote
|
||||
entry["demoteStep"] = decision.Step
|
||||
if dynamicPriority > 0 {
|
||||
entry["dynamicPriority"] = dynamicPriority
|
||||
}
|
||||
entry["platformId"] = platformID
|
||||
entry["platformModelId"] = platformModelID
|
||||
return entry
|
||||
|
||||
@ -59,3 +59,14 @@ func TestRuntimeCandidateSortingAvoidsFullCandidatesButKeepsFallback(t *testing.
|
||||
t.Fatalf("expected full high-priority candidate to remain as avoided fallback, got %+v", candidates)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDefaultRunnerPriorityDemotePolicyUsesAutoMode(t *testing.T) {
|
||||
policy := defaultRunnerPriorityDemotePolicy()
|
||||
|
||||
if _, ok := policy["demoteStep"]; ok {
|
||||
t.Fatal("priority demotion should be automatic and must not expose a demoteStep policy")
|
||||
}
|
||||
if policy["enabled"] != true {
|
||||
t.Fatalf("expected default priority demotion to stay enabled, got %+v", policy["enabled"])
|
||||
}
|
||||
}
|
||||
|
||||
@ -3,6 +3,7 @@ package store
|
||||
import (
|
||||
"context"
|
||||
"crypto/rand"
|
||||
"database/sql"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
@ -67,6 +68,8 @@ type Platform struct {
|
||||
AuthType string `json:"authType"`
|
||||
Status string `json:"status"`
|
||||
Priority int `json:"priority"`
|
||||
DynamicPriority *int `json:"dynamicPriority,omitempty"`
|
||||
EffectivePriority int `json:"effectivePriority"`
|
||||
DefaultPricingMode string `json:"defaultPricingMode"`
|
||||
DefaultDiscountFactor float64 `json:"defaultDiscountFactor"`
|
||||
PricingRuleSetID string `json:"pricingRuleSetId,omitempty"`
|
||||
@ -508,13 +511,14 @@ type TaskParamPreprocessingLog struct {
|
||||
|
||||
func (s *Store) ListPlatforms(ctx context.Context) ([]Platform, error) {
|
||||
rows, err := s.pool.Query(ctx, `
|
||||
SELECT id::text, provider, platform_key, name, COALESCE(internal_name, ''), COALESCE(base_url, ''), auth_type, status, priority,
|
||||
SELECT id::text, provider, platform_key, name, COALESCE(internal_name, ''), COALESCE(base_url, ''), auth_type, status,
|
||||
priority, dynamic_priority, COALESCE(dynamic_priority, priority),
|
||||
default_pricing_mode, default_discount_factor::float8, COALESCE(pricing_rule_set_id::text, ''),
|
||||
config, credentials, retry_policy, rate_limit_policy,
|
||||
COALESCE(to_char(cooldown_until AT TIME ZONE 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS.MS"Z"'), ''),
|
||||
created_at, updated_at
|
||||
FROM integration_platforms
|
||||
ORDER BY priority ASC, created_at DESC`)
|
||||
ORDER BY COALESCE(dynamic_priority, priority) ASC, priority ASC, created_at DESC`)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -527,6 +531,7 @@ ORDER BY priority ASC, created_at DESC`)
|
||||
var credentialsBytes []byte
|
||||
var retryPolicyBytes []byte
|
||||
var rateLimitPolicyBytes []byte
|
||||
var dynamicPriority sql.NullInt64
|
||||
if err := rows.Scan(
|
||||
&platform.ID,
|
||||
&platform.Provider,
|
||||
@ -537,6 +542,8 @@ ORDER BY priority ASC, created_at DESC`)
|
||||
&platform.AuthType,
|
||||
&platform.Status,
|
||||
&platform.Priority,
|
||||
&dynamicPriority,
|
||||
&platform.EffectivePriority,
|
||||
&platform.DefaultPricingMode,
|
||||
&platform.DefaultDiscountFactor,
|
||||
&platform.PricingRuleSetID,
|
||||
@ -550,6 +557,7 @@ ORDER BY priority ASC, created_at DESC`)
|
||||
); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
platform.DynamicPriority = intPointerFromNull(dynamicPriority)
|
||||
platform.Config = decodeObject(configBytes)
|
||||
platform.CredentialsPreview = maskCredentialsPreview(credentialsBytes)
|
||||
platform.RetryPolicy = decodeObject(retryPolicyBytes)
|
||||
@ -578,6 +586,7 @@ func (s *Store) CreatePlatform(ctx context.Context, input CreatePlatformInput) (
|
||||
var credentialsResultBytes []byte
|
||||
var retryPolicyBytes []byte
|
||||
var rateLimitPolicyBytes []byte
|
||||
var dynamicPriority sql.NullInt64
|
||||
err := s.pool.QueryRow(ctx, `
|
||||
INSERT INTO integration_platforms (
|
||||
provider, platform_key, name, internal_name, base_url, auth_type, credentials, config,
|
||||
@ -588,7 +597,8 @@ VALUES (
|
||||
$1, COALESCE(NULLIF($2, ''), 'platform_' || replace(gen_random_uuid()::text, '-', '')), $3, NULLIF($4, ''), $5, $6, $7, $8,
|
||||
$9, $10, NULLIF($11, '')::uuid, $12, $13, $14
|
||||
)
|
||||
RETURNING id::text, provider, platform_key, name, COALESCE(internal_name, ''), COALESCE(base_url, ''), auth_type, status, priority,
|
||||
RETURNING id::text, provider, platform_key, name, COALESCE(internal_name, ''), COALESCE(base_url, ''), auth_type, status,
|
||||
priority, dynamic_priority, COALESCE(dynamic_priority, priority),
|
||||
default_pricing_mode, default_discount_factor::float8, COALESCE(pricing_rule_set_id::text, ''),
|
||||
config, credentials, retry_policy, rate_limit_policy,
|
||||
COALESCE(to_char(cooldown_until AT TIME ZONE 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS.MS"Z"'), ''),
|
||||
@ -606,6 +616,8 @@ RETURNING id::text, provider, platform_key, name, COALESCE(internal_name, ''), C
|
||||
&platform.AuthType,
|
||||
&platform.Status,
|
||||
&platform.Priority,
|
||||
&dynamicPriority,
|
||||
&platform.EffectivePriority,
|
||||
&platform.DefaultPricingMode,
|
||||
&platform.DefaultDiscountFactor,
|
||||
&platform.PricingRuleSetID,
|
||||
@ -620,6 +632,7 @@ RETURNING id::text, provider, platform_key, name, COALESCE(internal_name, ''), C
|
||||
if err != nil {
|
||||
return Platform{}, err
|
||||
}
|
||||
platform.DynamicPriority = intPointerFromNull(dynamicPriority)
|
||||
platform.Config = decodeObject(configBytes)
|
||||
platform.CredentialsPreview = maskCredentialsPreview(credentialsResultBytes)
|
||||
platform.RetryPolicy = decodeObject(retryPolicyBytes)
|
||||
@ -650,6 +663,7 @@ func (s *Store) UpdatePlatform(ctx context.Context, id string, input CreatePlatf
|
||||
var credentialsResultBytes []byte
|
||||
var retryPolicyBytes []byte
|
||||
var rateLimitPolicyBytes []byte
|
||||
var dynamicPriority sql.NullInt64
|
||||
err := s.pool.QueryRow(ctx, `
|
||||
UPDATE integration_platforms
|
||||
SET provider = $2,
|
||||
@ -672,7 +686,8 @@ SET provider = $2,
|
||||
rate_limit_policy = $15,
|
||||
updated_at = now()
|
||||
WHERE id = $1::uuid
|
||||
RETURNING id::text, provider, platform_key, name, COALESCE(internal_name, ''), COALESCE(base_url, ''), auth_type, status, priority,
|
||||
RETURNING id::text, provider, platform_key, name, COALESCE(internal_name, ''), COALESCE(base_url, ''), auth_type, status,
|
||||
priority, dynamic_priority, COALESCE(dynamic_priority, priority),
|
||||
default_pricing_mode, default_discount_factor::float8, COALESCE(pricing_rule_set_id::text, ''),
|
||||
config, credentials, retry_policy, rate_limit_policy,
|
||||
COALESCE(to_char(cooldown_until AT TIME ZONE 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS.MS"Z"'), ''),
|
||||
@ -702,6 +717,8 @@ RETURNING id::text, provider, platform_key, name, COALESCE(internal_name, ''), C
|
||||
&platform.AuthType,
|
||||
&platform.Status,
|
||||
&platform.Priority,
|
||||
&dynamicPriority,
|
||||
&platform.EffectivePriority,
|
||||
&platform.DefaultPricingMode,
|
||||
&platform.DefaultDiscountFactor,
|
||||
&platform.PricingRuleSetID,
|
||||
@ -716,6 +733,7 @@ RETURNING id::text, provider, platform_key, name, COALESCE(internal_name, ''), C
|
||||
if err != nil {
|
||||
return Platform{}, err
|
||||
}
|
||||
platform.DynamicPriority = intPointerFromNull(dynamicPriority)
|
||||
platform.Config = decodeObject(configBytes)
|
||||
platform.CredentialsPreview = maskCredentialsPreview(credentialsResultBytes)
|
||||
platform.RetryPolicy = decodeObject(retryPolicyBytes)
|
||||
|
||||
@ -2,8 +2,11 @@ package store
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
type RateLimitMetricStatus struct {
|
||||
@ -17,30 +20,53 @@ type RateLimitMetricStatus struct {
|
||||
}
|
||||
|
||||
type ModelRateLimitStatus struct {
|
||||
PlatformModelID string `json:"platformModelId"`
|
||||
PlatformID string `json:"platformId"`
|
||||
PlatformName string `json:"platformName"`
|
||||
Provider string `json:"provider"`
|
||||
ModelName string `json:"modelName"`
|
||||
ProviderModelName string `json:"providerModelName,omitempty"`
|
||||
ModelAlias string `json:"modelAlias,omitempty"`
|
||||
DisplayName string `json:"displayName"`
|
||||
ModelType []string `json:"modelType"`
|
||||
Enabled bool `json:"enabled"`
|
||||
RateLimitPolicy map[string]any `json:"rateLimitPolicy,omitempty"`
|
||||
PlatformCooldownUntil string `json:"platformCooldownUntil,omitempty"`
|
||||
ModelCooldownUntil string `json:"modelCooldownUntil,omitempty"`
|
||||
Concurrent RateLimitMetricStatus `json:"concurrent"`
|
||||
QueuedTasks float64 `json:"queuedTasks"`
|
||||
RPM RateLimitMetricStatus `json:"rpm"`
|
||||
TPM RateLimitMetricStatus `json:"tpm"`
|
||||
LoadRatio float64 `json:"loadRatio"`
|
||||
PlatformModelID string `json:"platformModelId"`
|
||||
PlatformID string `json:"platformId"`
|
||||
PlatformName string `json:"platformName"`
|
||||
Provider string `json:"provider"`
|
||||
PlatformPriority int `json:"platformPriority"`
|
||||
PlatformDynamicPriority *int `json:"platformDynamicPriority,omitempty"`
|
||||
PlatformEffectivePriority int `json:"platformEffectivePriority"`
|
||||
ModelName string `json:"modelName"`
|
||||
ProviderModelName string `json:"providerModelName,omitempty"`
|
||||
ModelAlias string `json:"modelAlias,omitempty"`
|
||||
DisplayName string `json:"displayName"`
|
||||
ModelType []string `json:"modelType"`
|
||||
Enabled bool `json:"enabled"`
|
||||
RateLimitPolicy map[string]any `json:"rateLimitPolicy,omitempty"`
|
||||
PlatformCooldownUntil string `json:"platformCooldownUntil,omitempty"`
|
||||
ModelCooldownUntil string `json:"modelCooldownUntil,omitempty"`
|
||||
Concurrent RateLimitMetricStatus `json:"concurrent"`
|
||||
QueuedTasks float64 `json:"queuedTasks"`
|
||||
RPM RateLimitMetricStatus `json:"rpm"`
|
||||
TPM RateLimitMetricStatus `json:"tpm"`
|
||||
LoadRatio float64 `json:"loadRatio"`
|
||||
RecentPriorityDemotions []PriorityDemotionRecord `json:"recentPriorityDemotions,omitempty"`
|
||||
}
|
||||
|
||||
type PriorityDemotionRecord struct {
|
||||
ID string `json:"id"`
|
||||
TaskID string `json:"taskId"`
|
||||
PlatformID string `json:"platformId"`
|
||||
PlatformModelID string `json:"platformModelId,omitempty"`
|
||||
Reason string `json:"reason,omitempty"`
|
||||
ErrorCode string `json:"errorCode,omitempty"`
|
||||
ErrorMessage string `json:"errorMessage,omitempty"`
|
||||
Category string `json:"category,omitempty"`
|
||||
StatusCode int `json:"statusCode,omitempty"`
|
||||
PolicySource string `json:"policySource,omitempty"`
|
||||
Policy string `json:"policy,omitempty"`
|
||||
PolicyRule string `json:"policyRule,omitempty"`
|
||||
MatchedValue string `json:"matchedValue,omitempty"`
|
||||
DynamicPriority int `json:"dynamicPriority,omitempty"`
|
||||
CreatedAt time.Time `json:"createdAt"`
|
||||
}
|
||||
|
||||
func (s *Store) ListModelRateLimitStatuses(ctx context.Context) ([]ModelRateLimitStatus, error) {
|
||||
rows, err := s.pool.Query(ctx, `
|
||||
SELECT m.id::text, m.platform_id::text, p.name, p.provider,
|
||||
m.model_name, COALESCE(NULLIF(m.provider_model_name, ''), m.model_name), COALESCE(m.model_alias, ''),
|
||||
SELECT m.id::text, m.platform_id::text, p.name, p.provider,
|
||||
p.priority, p.dynamic_priority, COALESCE(p.dynamic_priority, p.priority),
|
||||
m.model_name, COALESCE(NULLIF(m.provider_model_name, ''), m.model_name), COALESCE(m.model_alias, ''),
|
||||
m.model_type, m.display_name, m.enabled,
|
||||
p.rate_limit_policy, COALESCE(rp.rate_limit_policy, '{}'::jsonb), COALESCE(NULLIF(m.runtime_policy_override, '{}'::jsonb), b.runtime_policy_override, '{}'::jsonb), m.rate_limit_policy,
|
||||
COALESCE(to_char(p.cooldown_until AT TIME ZONE 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS.MS"Z"'), ''),
|
||||
@ -119,6 +145,7 @@ ORDER BY p.priority ASC, m.model_name ASC`)
|
||||
var runtimePolicyBytes []byte
|
||||
var runtimeOverrideBytes []byte
|
||||
var modelPolicyBytes []byte
|
||||
var platformDynamicPriority sql.NullInt64
|
||||
var platformCooldownUntil string
|
||||
var modelCooldownUntil string
|
||||
var concurrentCurrent float64
|
||||
@ -134,6 +161,9 @@ ORDER BY p.priority ASC, m.model_name ASC`)
|
||||
&item.PlatformID,
|
||||
&item.PlatformName,
|
||||
&item.Provider,
|
||||
&item.PlatformPriority,
|
||||
&platformDynamicPriority,
|
||||
&item.PlatformEffectivePriority,
|
||||
&item.ModelName,
|
||||
&item.ProviderModelName,
|
||||
&item.ModelAlias,
|
||||
@ -157,6 +187,7 @@ ORDER BY p.priority ASC, m.model_name ASC`)
|
||||
); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
item.PlatformDynamicPriority = intPointerFromNull(platformDynamicPriority)
|
||||
item.ModelType = decodeStringArray(modelTypeBytes)
|
||||
policy := effectiveModelRateLimitPolicy(
|
||||
decodeObject(platformPolicyBytes),
|
||||
@ -177,6 +208,13 @@ ORDER BY p.priority ASC, m.model_name ASC`)
|
||||
if err := rows.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
demotions, err := s.listRecentPriorityDemotionsByPlatform(ctx, items, 10)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for index := range items {
|
||||
items[index].RecentPriorityDemotions = demotions[items[index].PlatformID]
|
||||
}
|
||||
sort.SliceStable(items, func(i, j int) bool {
|
||||
if items[i].LoadRatio == items[j].LoadRatio {
|
||||
return strings.ToLower(items[i].DisplayName) < strings.ToLower(items[j].DisplayName)
|
||||
@ -186,6 +224,91 @@ ORDER BY p.priority ASC, m.model_name ASC`)
|
||||
return items, nil
|
||||
}
|
||||
|
||||
func (s *Store) listRecentPriorityDemotionsByPlatform(ctx context.Context, statuses []ModelRateLimitStatus, limit int) (map[string][]PriorityDemotionRecord, error) {
|
||||
out := map[string][]PriorityDemotionRecord{}
|
||||
if limit <= 0 || len(statuses) == 0 {
|
||||
return out, nil
|
||||
}
|
||||
seen := map[string]bool{}
|
||||
platformIDs := make([]string, 0, len(statuses))
|
||||
for _, status := range statuses {
|
||||
platformID := strings.TrimSpace(status.PlatformID)
|
||||
if platformID == "" || seen[platformID] {
|
||||
continue
|
||||
}
|
||||
seen[platformID] = true
|
||||
platformIDs = append(platformIDs, platformID)
|
||||
}
|
||||
if len(platformIDs) == 0 {
|
||||
return out, nil
|
||||
}
|
||||
rows, err := s.pool.Query(ctx, `
|
||||
SELECT id::text, task_id::text, COALESCE(message, ''), payload, created_at
|
||||
FROM (
|
||||
SELECT e.*,
|
||||
row_number() OVER (
|
||||
PARTITION BY e.payload->>'platformId'
|
||||
ORDER BY e.created_at DESC, e.seq DESC
|
||||
) AS demotion_rank
|
||||
FROM gateway_task_events e
|
||||
WHERE e.event_type = 'task.policy.priority_demoted'
|
||||
AND e.payload->>'platformId' = ANY($1::text[])
|
||||
) ranked
|
||||
WHERE demotion_rank <= $2
|
||||
ORDER BY payload->>'platformId' ASC, created_at DESC`, platformIDs, limit)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
for rows.Next() {
|
||||
var id string
|
||||
var taskID string
|
||||
var message string
|
||||
var payloadBytes []byte
|
||||
var createdAt time.Time
|
||||
if err := rows.Scan(&id, &taskID, &message, &payloadBytes, &createdAt); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
record := priorityDemotionRecordFromEventPayload(id, taskID, message, decodeObject(payloadBytes), createdAt)
|
||||
if record.PlatformID == "" {
|
||||
continue
|
||||
}
|
||||
out[record.PlatformID] = append(out[record.PlatformID], record)
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
func priorityDemotionRecordFromEventPayload(id string, taskID string, message string, payload map[string]any, createdAt time.Time) PriorityDemotionRecord {
|
||||
errorMessage := stringValue(payload["errorMessage"])
|
||||
if errorMessage == "" {
|
||||
errorMessage = stringValue(payload["message"])
|
||||
}
|
||||
if errorMessage == "" {
|
||||
errorMessage = strings.TrimSpace(message)
|
||||
}
|
||||
errorCode := stringValue(payload["errorCode"])
|
||||
if errorCode == "" {
|
||||
errorCode = stringValue(payload["code"])
|
||||
}
|
||||
return PriorityDemotionRecord{
|
||||
ID: id,
|
||||
TaskID: taskID,
|
||||
PlatformID: stringValue(payload["platformId"]),
|
||||
PlatformModelID: stringValue(payload["platformModelId"]),
|
||||
Reason: stringValue(payload["reason"]),
|
||||
ErrorCode: errorCode,
|
||||
ErrorMessage: errorMessage,
|
||||
Category: stringValue(payload["category"]),
|
||||
StatusCode: intValue(payload["statusCode"]),
|
||||
PolicySource: stringValue(payload["policySource"]),
|
||||
Policy: stringValue(payload["policy"]),
|
||||
PolicyRule: stringValue(payload["policyRule"]),
|
||||
MatchedValue: stringValue(payload["matchedValue"]),
|
||||
DynamicPriority: intValue(payload["dynamicPriority"]),
|
||||
CreatedAt: createdAt,
|
||||
}
|
||||
}
|
||||
|
||||
func effectiveModelRateLimitPolicy(platformPolicy map[string]any, runtimePolicy map[string]any, runtimeOverride map[string]any, modelPolicy map[string]any) map[string]any {
|
||||
policy := platformPolicy
|
||||
if hasRateLimitRules(runtimePolicy) {
|
||||
@ -279,3 +402,19 @@ func floatValue(value any) float64 {
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
func intValue(value any) int {
|
||||
switch typed := value.(type) {
|
||||
case int:
|
||||
return typed
|
||||
case int64:
|
||||
return int(typed)
|
||||
case float64:
|
||||
return int(typed)
|
||||
case string:
|
||||
parsed, _ := strconv.Atoi(strings.TrimSpace(typed))
|
||||
return parsed
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,6 +1,9 @@
|
||||
package store
|
||||
|
||||
import "testing"
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestEffectiveModelRateLimitPolicyTreatsModelRulesAsAuthoritative(t *testing.T) {
|
||||
policy := effectiveModelRateLimitPolicy(
|
||||
@ -30,3 +33,31 @@ func TestEffectiveModelRateLimitPolicyTreatsModelRulesAsAuthoritative(t *testing
|
||||
t.Fatalf("expected missing model tpm limit to mean unlimited, got %v", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPriorityDemotionRecordFromEventPayloadKeepsReason(t *testing.T) {
|
||||
createdAt := time.Date(2026, 5, 12, 9, 30, 0, 0, time.UTC)
|
||||
record := priorityDemotionRecordFromEventPayload("event-1", "task-1", "fallback message", map[string]any{
|
||||
"platformId": "platform-1",
|
||||
"platformModelId": "platform-model-1",
|
||||
"reason": "priority_demote_policy",
|
||||
"errorCode": "rate_limit",
|
||||
"errorMessage": "upstream 429 rate limit",
|
||||
"category": "rate_limit",
|
||||
"statusCode": float64(429),
|
||||
"policySource": "gateway_runner_policies.priority_demote_policy",
|
||||
"policy": "priorityDemotePolicy",
|
||||
"policyRule": "categories",
|
||||
"matchedValue": "rate_limit",
|
||||
"dynamicPriority": float64(1511),
|
||||
}, createdAt)
|
||||
|
||||
if record.Reason != "priority_demote_policy" || record.ErrorMessage != "upstream 429 rate limit" {
|
||||
t.Fatalf("expected demotion reason and error message to survive, got %+v", record)
|
||||
}
|
||||
if record.StatusCode != 429 || record.DynamicPriority != 1511 {
|
||||
t.Fatalf("expected numeric demotion metadata, got %+v", record)
|
||||
}
|
||||
if !record.CreatedAt.Equal(createdAt) {
|
||||
t.Fatalf("expected createdAt %s, got %s", createdAt, record.CreatedAt)
|
||||
}
|
||||
}
|
||||
|
||||
@ -134,7 +134,6 @@ func defaultRunnerPolicy() RunnerPolicy {
|
||||
func defaultRunnerPriorityDemotePolicy() map[string]any {
|
||||
return map[string]any{
|
||||
"enabled": true,
|
||||
"demoteStep": 100,
|
||||
"categories": []any{"network", "timeout", "stream_error", "rate_limit", "provider_5xx", "provider_overloaded"},
|
||||
"codes": []any{"network", "timeout", "stream_read_error", "rate_limit", "server_error", "overloaded"},
|
||||
"statusCodes": []any{408, 429, 500, 502, 503, 504},
|
||||
|
||||
@ -2,8 +2,10 @@ package store
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/jackc/pgx/v5"
|
||||
)
|
||||
@ -28,6 +30,14 @@ type runtimePolicyScanner interface {
|
||||
Scan(dest ...any) error
|
||||
}
|
||||
|
||||
type PlatformDynamicPriorityState struct {
|
||||
PlatformID string `json:"platformId"`
|
||||
Priority int `json:"priority"`
|
||||
DynamicPriority *int `json:"dynamicPriority,omitempty"`
|
||||
EffectivePriority int `json:"effectivePriority"`
|
||||
UpdatedAt time.Time `json:"updatedAt"`
|
||||
}
|
||||
|
||||
func (s *Store) ListRuntimePolicySets(ctx context.Context) ([]RuntimePolicySet, error) {
|
||||
rows, err := s.pool.Query(ctx, `SELECT `+runtimePolicyColumns+` FROM model_runtime_policy_sets ORDER BY policy_key ASC`)
|
||||
if err != nil {
|
||||
@ -149,26 +159,64 @@ WHERE id = $1::uuid`, platformModelID, cooldownSeconds)
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *Store) DemoteCandidatePlatformPriority(ctx context.Context, platformID string, demoteStep int) error {
|
||||
func (s *Store) DemoteCandidatePlatformPriority(ctx context.Context, platformID string) (int, error) {
|
||||
if strings.TrimSpace(platformID) == "" {
|
||||
return 0, nil
|
||||
}
|
||||
var dynamicPriority int
|
||||
err := s.pool.QueryRow(ctx, `
|
||||
UPDATE integration_platforms target
|
||||
SET dynamic_priority = COALESCE((
|
||||
SELECT MAX(COALESCE(peer.dynamic_priority, peer.priority))
|
||||
FROM integration_platforms peer
|
||||
WHERE peer.deleted_at IS NULL
|
||||
), target.priority) + 1,
|
||||
updated_at = now()
|
||||
WHERE target.id = $1::uuid
|
||||
RETURNING dynamic_priority`, platformID).Scan(&dynamicPriority)
|
||||
return dynamicPriority, err
|
||||
}
|
||||
|
||||
func (s *Store) UpdatePlatformDynamicPriority(ctx context.Context, platformID string, dynamicPriority *int) (PlatformDynamicPriorityState, error) {
|
||||
if strings.TrimSpace(platformID) == "" {
|
||||
return PlatformDynamicPriorityState{}, pgx.ErrNoRows
|
||||
}
|
||||
value := 0
|
||||
reset := dynamicPriority == nil
|
||||
if dynamicPriority != nil {
|
||||
value = *dynamicPriority
|
||||
}
|
||||
return scanPlatformDynamicPriorityState(s.pool.QueryRow(ctx, `
|
||||
UPDATE integration_platforms
|
||||
SET dynamic_priority = CASE WHEN $2::boolean THEN priority ELSE $3::int END,
|
||||
updated_at = now()
|
||||
WHERE id = $1::uuid
|
||||
AND deleted_at IS NULL
|
||||
RETURNING id::text, priority, dynamic_priority, COALESCE(dynamic_priority, priority), updated_at`, platformID, reset, value))
|
||||
}
|
||||
|
||||
func scanPlatformDynamicPriorityState(scanner runtimePolicyScanner) (PlatformDynamicPriorityState, error) {
|
||||
var item PlatformDynamicPriorityState
|
||||
var dynamicPriority sql.NullInt64
|
||||
if err := scanner.Scan(
|
||||
&item.PlatformID,
|
||||
&item.Priority,
|
||||
&dynamicPriority,
|
||||
&item.EffectivePriority,
|
||||
&item.UpdatedAt,
|
||||
); err != nil {
|
||||
return PlatformDynamicPriorityState{}, err
|
||||
}
|
||||
item.DynamicPriority = intPointerFromNull(dynamicPriority)
|
||||
return item, nil
|
||||
}
|
||||
|
||||
func intPointerFromNull(value sql.NullInt64) *int {
|
||||
if !value.Valid {
|
||||
return nil
|
||||
}
|
||||
if demoteStep <= 0 {
|
||||
demoteStep = 100
|
||||
}
|
||||
_, err := s.pool.Exec(ctx, `
|
||||
UPDATE integration_platforms target
|
||||
SET dynamic_priority = GREATEST(
|
||||
COALESCE(target.dynamic_priority, target.priority),
|
||||
COALESCE((
|
||||
SELECT MAX(COALESCE(peer.dynamic_priority, peer.priority))
|
||||
FROM integration_platforms peer
|
||||
WHERE peer.deleted_at IS NULL
|
||||
), target.priority) + $2::int
|
||||
),
|
||||
updated_at = now()
|
||||
WHERE target.id = $1::uuid`, platformID, demoteStep)
|
||||
return err
|
||||
converted := int(value.Int64)
|
||||
return &converted
|
||||
}
|
||||
|
||||
func scanRuntimePolicySet(scanner runtimePolicyScanner) (RuntimePolicySet, error) {
|
||||
|
||||
@ -49,7 +49,6 @@ VALUES (
|
||||
}'::jsonb,
|
||||
'{
|
||||
"enabled": true,
|
||||
"demoteStep": 100,
|
||||
"categories": ["network", "timeout", "stream_error", "rate_limit", "provider_5xx", "provider_overloaded"],
|
||||
"codes": ["network", "timeout", "stream_read_error", "rate_limit", "server_error", "overloaded"],
|
||||
"statusCodes": [408, 429, 500, 502, 503, 504],
|
||||
|
||||
@ -0,0 +1,18 @@
|
||||
ALTER TABLE IF EXISTS gateway_runner_policies
|
||||
ADD COLUMN IF NOT EXISTS priority_demote_policy jsonb NOT NULL DEFAULT '{}'::jsonb;
|
||||
|
||||
UPDATE gateway_runner_policies
|
||||
SET priority_demote_policy = CASE
|
||||
WHEN COALESCE(priority_demote_policy, '{}'::jsonb) = '{}'::jsonb THEN
|
||||
'{
|
||||
"enabled": true,
|
||||
"categories": ["network", "timeout", "stream_error", "rate_limit", "provider_5xx", "provider_overloaded"],
|
||||
"codes": ["network", "timeout", "stream_read_error", "rate_limit", "server_error", "overloaded"],
|
||||
"statusCodes": [408, 429, 500, 502, 503, 504],
|
||||
"keywords": ["timeout", "network", "rate_limit", "overloaded", "temporarily_unavailable", "server_error", "429", "5xx"]
|
||||
}'::jsonb
|
||||
ELSE priority_demote_policy
|
||||
END,
|
||||
metadata = metadata || jsonb_build_object('priorityDemotePolicyRepair', '0034_runner_priority_demote_policy_repair'),
|
||||
updated_at = now()
|
||||
WHERE policy_key = 'default-runner-v1';
|
||||
@ -0,0 +1,5 @@
|
||||
UPDATE gateway_runner_policies
|
||||
SET priority_demote_policy = priority_demote_policy - 'demoteStep',
|
||||
metadata = metadata || jsonb_build_object('priorityDemoteAutoMode', '0035_runner_priority_demote_auto_mode'),
|
||||
updated_at = now()
|
||||
WHERE policy_key = 'default-runner-v1';
|
||||
@ -19,6 +19,7 @@ import type {
|
||||
IntegrationPlatform,
|
||||
ModelCatalogResponse,
|
||||
ModelRateLimitStatus,
|
||||
PlatformDynamicPriorityUpdateRequest,
|
||||
PlatformModel,
|
||||
PricingRule,
|
||||
PricingRuleSet,
|
||||
@ -79,6 +80,7 @@ import {
|
||||
updateAccessRule,
|
||||
updateGatewayUser,
|
||||
updatePlatform,
|
||||
updatePlatformDynamicPriority,
|
||||
updateTenant,
|
||||
updateUserGroup,
|
||||
} from './api';
|
||||
@ -558,6 +560,38 @@ export function App() {
|
||||
}
|
||||
}
|
||||
|
||||
async function savePlatformDynamicPriority(platformId: string, input: PlatformDynamicPriorityUpdateRequest) {
|
||||
setCoreState('loading');
|
||||
setCoreMessage('');
|
||||
try {
|
||||
const state = await updatePlatformDynamicPriority(token, platformId, input);
|
||||
setPlatforms((current) => current.map((platform) => platform.id === platformId
|
||||
? {
|
||||
...platform,
|
||||
dynamicPriority: state.dynamicPriority,
|
||||
effectivePriority: state.effectivePriority,
|
||||
priority: state.priority,
|
||||
updatedAt: state.updatedAt,
|
||||
}
|
||||
: platform));
|
||||
setModelRateLimits((current) => current.map((status) => status.platformId === platformId
|
||||
? {
|
||||
...status,
|
||||
platformDynamicPriority: state.dynamicPriority,
|
||||
platformEffectivePriority: state.effectivePriority,
|
||||
platformPriority: state.priority,
|
||||
}
|
||||
: status));
|
||||
invalidateDataKeys('modelCatalog', 'modelRateLimits', 'platforms', 'playgroundModels');
|
||||
setCoreState('ready');
|
||||
setCoreMessage(input.reset ? '平台动态优先级已重置。' : '平台动态优先级已更新。');
|
||||
} catch (err) {
|
||||
setCoreState('error');
|
||||
setCoreMessage(err instanceof Error ? err.message : '更新平台动态优先级失败');
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
async function removePlatform(platformId: string) {
|
||||
setCoreState('loading');
|
||||
setCoreMessage('');
|
||||
@ -994,6 +1028,7 @@ export function App() {
|
||||
onResetAllBaseModels={resetAllBaseModelsToDefault}
|
||||
onResetBaseModel={resetBaseModelToDefault}
|
||||
onSavePlatform={savePlatformWithModels}
|
||||
onSavePlatformDynamicPriority={savePlatformDynamicPriority}
|
||||
onSaveProvider={saveProvider}
|
||||
onSavePricingRuleSet={savePricingRuleSet}
|
||||
onSaveRunnerPolicy={saveRunnerPolicy}
|
||||
|
||||
@ -24,6 +24,8 @@ import type {
|
||||
ListResponse,
|
||||
ModelCatalogResponse,
|
||||
ModelRateLimitStatus,
|
||||
PlatformDynamicPriorityState,
|
||||
PlatformDynamicPriorityUpdateRequest,
|
||||
PlatformModel,
|
||||
PlayableGatewayApiKey,
|
||||
PricingRule,
|
||||
@ -102,6 +104,18 @@ export async function listPlatforms(token: string): Promise<ListResponse<Integra
|
||||
return request<ListResponse<IntegrationPlatform>>('/api/admin/platforms', { token });
|
||||
}
|
||||
|
||||
export async function updatePlatformDynamicPriority(
|
||||
token: string,
|
||||
platformId: string,
|
||||
input: PlatformDynamicPriorityUpdateRequest,
|
||||
): Promise<PlatformDynamicPriorityState> {
|
||||
return request<PlatformDynamicPriorityState>(`/api/admin/platforms/${platformId}/dynamic-priority`, {
|
||||
body: input,
|
||||
method: 'PATCH',
|
||||
token,
|
||||
});
|
||||
}
|
||||
|
||||
export async function listModels(token: string): Promise<ListResponse<PlatformModel>> {
|
||||
return request<ListResponse<PlatformModel>>('/api/admin/models', { token });
|
||||
}
|
||||
|
||||
@ -8,6 +8,7 @@ import type {
|
||||
GatewayTenantUpsertRequest,
|
||||
GatewayRunnerPolicyUpsertRequest,
|
||||
GatewayUserUpsertRequest,
|
||||
PlatformDynamicPriorityUpdateRequest,
|
||||
PricingRuleSetUpsertRequest,
|
||||
RuntimePolicySetUpsertRequest,
|
||||
UserGroupUpsertRequest,
|
||||
@ -63,6 +64,7 @@ export function AdminPage(props: {
|
||||
onResetBaseModel: (baseModelId: string) => Promise<void>;
|
||||
onBatchAccessRules: (input: GatewayAccessRuleBatchRequest) => Promise<void>;
|
||||
onSavePlatform: (input: PlatformWithModelsInput) => Promise<void>;
|
||||
onSavePlatformDynamicPriority: (platformId: string, input: PlatformDynamicPriorityUpdateRequest) => Promise<void>;
|
||||
onSaveProvider: (input: CatalogProviderUpsertRequest, providerId?: string) => Promise<void>;
|
||||
onSavePricingRuleSet: (input: PricingRuleSetUpsertRequest, ruleSetId?: string) => Promise<void>;
|
||||
onSaveRunnerPolicy: (input: GatewayRunnerPolicyUpsertRequest) => Promise<void>;
|
||||
@ -154,6 +156,7 @@ export function AdminPage(props: {
|
||||
modelRateLimits={props.data.modelRateLimits}
|
||||
modelRateLimitsUpdatedAt={props.data.modelRateLimitsUpdatedAt}
|
||||
platforms={props.data.platforms}
|
||||
onSavePlatformDynamicPriority={props.onSavePlatformDynamicPriority}
|
||||
/>
|
||||
)}
|
||||
{props.section === 'tenants' && <TenantsPanel {...identityPanelProps(props)} />}
|
||||
|
||||
@ -1,14 +1,19 @@
|
||||
import { useEffect, useMemo, useState } from 'react';
|
||||
import { Gauge } from 'lucide-react';
|
||||
import type { IntegrationPlatform, ModelRateLimitStatus } from '@easyai-ai-gateway/contracts';
|
||||
import { Badge, Card, CardContent, CardHeader, CardTitle, EmptyState, Table, TableCell, TableHead, TableRow } from '../../components/ui';
|
||||
import { useEffect, useMemo, useState, type FormEvent } from 'react';
|
||||
import { Popover as AntPopover } from 'antd';
|
||||
import { CheckCircle2, Gauge, History, RotateCcw, SlidersHorizontal } from 'lucide-react';
|
||||
import type { IntegrationPlatform, ModelRateLimitStatus, PlatformDynamicPriorityUpdateRequest, PriorityDemotionRecord } from '@easyai-ai-gateway/contracts';
|
||||
import { Badge, Button, Card, CardContent, CardHeader, CardTitle, EmptyState, FormDialog, Input, Label, Table, TableCell, TableHead, TableRow } from '../../components/ui';
|
||||
|
||||
export function RealtimeLoadPanel(props: {
|
||||
modelRateLimits: ModelRateLimitStatus[];
|
||||
modelRateLimitsUpdatedAt: number | null;
|
||||
platforms: IntegrationPlatform[];
|
||||
onSavePlatformDynamicPriority: (platformId: string, input: PlatformDynamicPriorityUpdateRequest) => Promise<void>;
|
||||
}) {
|
||||
const [now, setNow] = useState(() => Date.now());
|
||||
const [priorityDialog, setPriorityDialog] = useState<PriorityDialogState | null>(null);
|
||||
const [priorityError, setPriorityError] = useState('');
|
||||
const [prioritySaving, setPrioritySaving] = useState(false);
|
||||
const platformMap = useMemo(() => new Map(props.platforms.map((item) => [item.id, item])), [props.platforms]);
|
||||
|
||||
useEffect(() => {
|
||||
@ -16,6 +21,50 @@ export function RealtimeLoadPanel(props: {
|
||||
return () => window.clearInterval(timer);
|
||||
}, []);
|
||||
|
||||
function openPriorityDialog(status: ModelRateLimitStatus, platform: IntegrationPlatform | undefined) {
|
||||
setPriorityError('');
|
||||
setPriorityDialog({
|
||||
platform,
|
||||
status,
|
||||
value: formatPriority(platformEffectivePriority(status, platform)),
|
||||
});
|
||||
}
|
||||
|
||||
function closePriorityDialog() {
|
||||
if (prioritySaving) return;
|
||||
setPriorityDialog(null);
|
||||
setPriorityError('');
|
||||
}
|
||||
|
||||
async function submitPriorityForm(event: FormEvent<HTMLFormElement>) {
|
||||
event.preventDefault();
|
||||
if (!priorityDialog) return;
|
||||
const dynamicPriority = parsePriorityInput(priorityDialog.value);
|
||||
if (dynamicPriority === null) {
|
||||
setPriorityError('请输入大于等于 0 的整数。');
|
||||
return;
|
||||
}
|
||||
await savePriority(priorityDialog.status.platformId, { dynamicPriority });
|
||||
}
|
||||
|
||||
async function resetPriority() {
|
||||
if (!priorityDialog) return;
|
||||
await savePriority(priorityDialog.status.platformId, { reset: true });
|
||||
}
|
||||
|
||||
async function savePriority(platformId: string, input: PlatformDynamicPriorityUpdateRequest) {
|
||||
setPrioritySaving(true);
|
||||
setPriorityError('');
|
||||
try {
|
||||
await props.onSavePlatformDynamicPriority(platformId, input);
|
||||
setPriorityDialog(null);
|
||||
} catch (err) {
|
||||
setPriorityError(err instanceof Error ? err.message : '更新平台动态优先级失败');
|
||||
} finally {
|
||||
setPrioritySaving(false);
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<section className="pageStack">
|
||||
<Card>
|
||||
@ -31,14 +80,36 @@ export function RealtimeLoadPanel(props: {
|
||||
platformMap={platformMap}
|
||||
statuses={props.modelRateLimits}
|
||||
updatedAt={props.modelRateLimitsUpdatedAt}
|
||||
onAdjustPriority={openPriorityDialog}
|
||||
/>
|
||||
</CardContent>
|
||||
</Card>
|
||||
<PlatformPriorityDialog
|
||||
dialog={priorityDialog}
|
||||
error={priorityError}
|
||||
saving={prioritySaving}
|
||||
onClose={closePriorityDialog}
|
||||
onReset={resetPriority}
|
||||
onSubmit={submitPriorityForm}
|
||||
onValueChange={(value) => setPriorityDialog((current) => current ? { ...current, value } : current)}
|
||||
/>
|
||||
</section>
|
||||
);
|
||||
}
|
||||
|
||||
function RateLimitStatusTable(props: { statuses: ModelRateLimitStatus[]; platformMap: Map<string, IntegrationPlatform>; now: number; updatedAt: number | null }) {
|
||||
type PriorityDialogState = {
|
||||
platform: IntegrationPlatform | undefined;
|
||||
status: ModelRateLimitStatus;
|
||||
value: string;
|
||||
};
|
||||
|
||||
function RateLimitStatusTable(props: {
|
||||
statuses: ModelRateLimitStatus[];
|
||||
platformMap: Map<string, IntegrationPlatform>;
|
||||
now: number;
|
||||
updatedAt: number | null;
|
||||
onAdjustPriority: (status: ModelRateLimitStatus, platform: IntegrationPlatform | undefined) => void;
|
||||
}) {
|
||||
if (!props.statuses.length) {
|
||||
return <EmptyState title="暂无实时负载" description="模型产生请求后会在这里显示实时 RPM、TPM 和并发窗口。" />;
|
||||
}
|
||||
@ -53,6 +124,8 @@ function RateLimitStatusTable(props: { statuses: ModelRateLimitStatus[]; platfor
|
||||
<TableRow className="shTableHeader">
|
||||
<TableHead>模型</TableHead>
|
||||
<TableHead>平台</TableHead>
|
||||
<TableHead className="platformLimitNumberHead">平台优先级</TableHead>
|
||||
<TableHead className="platformLimitNumberHead">满载率</TableHead>
|
||||
<TableHead className="platformLimitMetricHead platformLimitNumberHead" title="正在执行 / 并发上限 / 排队任务">
|
||||
<span>并发</span>
|
||||
<small>正在执行 / 并发 / 排队</small>
|
||||
@ -60,7 +133,6 @@ function RateLimitStatusTable(props: { statuses: ModelRateLimitStatus[]; platfor
|
||||
<TableHead className="platformLimitNumberHead">TPM</TableHead>
|
||||
<TableHead className="platformLimitNumberHead">RPM</TableHead>
|
||||
<TableHead className="platformLimitStatusHead">状态</TableHead>
|
||||
<TableHead className="platformLimitNumberHead">满载率</TableHead>
|
||||
</TableRow>
|
||||
{props.statuses.map((status) => {
|
||||
const platform = props.platformMap.get(status.platformId);
|
||||
@ -78,16 +150,17 @@ function RateLimitStatusTable(props: { statuses: ModelRateLimitStatus[]; platfor
|
||||
<small>{status.provider}</small>
|
||||
</span>
|
||||
</TableCell>
|
||||
<TableCell className="platformLimitNumberCell">{concurrencyMetricCell(status)}</TableCell>
|
||||
<TableCell className="platformLimitNumberCell">{metricCell(status.tpm, true)}</TableCell>
|
||||
<TableCell className="platformLimitNumberCell">{metricCell(status.rpm)}</TableCell>
|
||||
<TableCell className="platformLimitStatusCell">{modelRuntimeStatusCell(status, props.now)}</TableCell>
|
||||
<TableCell className="platformLimitNumberCell">{platformPriorityCell(status, platform, props.onAdjustPriority)}</TableCell>
|
||||
<TableCell className="platformLimitNumberCell">
|
||||
<span className="rateLoadCell" data-overloaded={status.loadRatio > 0.8 ? 'true' : undefined}>
|
||||
<strong>{formatPercent(status.loadRatio)}</strong>
|
||||
<span className="rateLoadTrack"><i style={{ width: `${Math.min(status.loadRatio * 100, 100)}%` }} /></span>
|
||||
</span>
|
||||
</TableCell>
|
||||
<TableCell className="platformLimitNumberCell">{concurrencyMetricCell(status)}</TableCell>
|
||||
<TableCell className="platformLimitNumberCell">{metricCell(status.tpm, true)}</TableCell>
|
||||
<TableCell className="platformLimitNumberCell">{metricCell(status.rpm)}</TableCell>
|
||||
<TableCell className="platformLimitStatusCell">{modelRuntimeStatusCell(status, props.now)}</TableCell>
|
||||
</TableRow>
|
||||
);
|
||||
})}
|
||||
@ -101,6 +174,176 @@ function platformDisplayName(platform: IntegrationPlatform) {
|
||||
return platform.internalName?.trim() || platform.name;
|
||||
}
|
||||
|
||||
function platformPriorityCell(
|
||||
status: ModelRateLimitStatus,
|
||||
platform: IntegrationPlatform | undefined,
|
||||
onAdjustPriority: (status: ModelRateLimitStatus, platform: IntegrationPlatform | undefined) => void,
|
||||
) {
|
||||
const records = status.recentPriorityDemotions ?? [];
|
||||
const content = <PriorityDemotionPopover records={records} />;
|
||||
return (
|
||||
<span className="platformPriorityCell">
|
||||
<AntPopover
|
||||
align={{ offset: [0, 8] }}
|
||||
content={content}
|
||||
overlayClassName="priorityDemotionAntPopover"
|
||||
placement="bottomLeft"
|
||||
trigger={['hover', 'focus']}
|
||||
>
|
||||
<button className="priorityDemotionTrigger" type="button" aria-label={priorityDemotionAriaLabel(status, platform)}>
|
||||
<span className="rateMetricCell">
|
||||
<strong>{formatPriority(platformEffectivePriority(status, platform))}</strong>
|
||||
<small>{platformPrioritySubtitle(status, platform, records.length)}</small>
|
||||
</span>
|
||||
</button>
|
||||
</AntPopover>
|
||||
<Button className="priorityAdjustButton" type="button" variant="outline" size="xs" onClick={() => onAdjustPriority(status, platform)}>
|
||||
<SlidersHorizontal size={13} />
|
||||
调整
|
||||
</Button>
|
||||
</span>
|
||||
);
|
||||
}
|
||||
|
||||
function PlatformPriorityDialog(props: {
|
||||
dialog: PriorityDialogState | null;
|
||||
error: string;
|
||||
saving: boolean;
|
||||
onClose: () => void;
|
||||
onReset: () => void;
|
||||
onSubmit: (event: FormEvent<HTMLFormElement>) => void;
|
||||
onValueChange: (value: string) => void;
|
||||
}) {
|
||||
const dialog = props.dialog;
|
||||
const platform = dialog?.platform;
|
||||
const status = dialog?.status;
|
||||
return (
|
||||
<FormDialog
|
||||
bodyClassName="platformPriorityDialogBody"
|
||||
className="platformPriorityDialog"
|
||||
closeLabel="关闭"
|
||||
eyebrow="Runtime Priority"
|
||||
footer={(
|
||||
<>
|
||||
<Button type="button" variant="outline" disabled={props.saving} onClick={props.onClose}>
|
||||
取消
|
||||
</Button>
|
||||
<Button type="button" variant="secondary" disabled={props.saving || !dialog} onClick={props.onReset}>
|
||||
<RotateCcw size={15} />
|
||||
重置为静态优先级
|
||||
</Button>
|
||||
<Button type="submit" disabled={props.saving || !dialog}>
|
||||
<CheckCircle2 size={15} />
|
||||
保存动态优先级
|
||||
</Button>
|
||||
</>
|
||||
)}
|
||||
open={Boolean(dialog)}
|
||||
title="调整平台动态优先级"
|
||||
onClose={props.onClose}
|
||||
onSubmit={props.onSubmit}
|
||||
>
|
||||
<div className="platformPriorityDialogSummary">
|
||||
<span>
|
||||
<strong>{platform ? platformDisplayName(platform) : status?.platformName}</strong>
|
||||
<small>{status?.provider}</small>
|
||||
</span>
|
||||
<span>
|
||||
<strong>{formatPriority(platformEffectivePriority(status, platform))}</strong>
|
||||
<small>当前生效</small>
|
||||
</span>
|
||||
</div>
|
||||
<div className="platformPriorityDialogMetrics">
|
||||
<span>静态 {formatPriority(platformStaticPriority(status, platform))}</span>
|
||||
<span>动态 {formatPriority(platformDynamicPriority(status, platform))}</span>
|
||||
</div>
|
||||
<Label>
|
||||
动态优先级
|
||||
<Input
|
||||
value={dialog?.value ?? ''}
|
||||
inputMode="numeric"
|
||||
min={0}
|
||||
placeholder="例如 1511"
|
||||
onChange={(event) => props.onValueChange(event.target.value)}
|
||||
/>
|
||||
</Label>
|
||||
{props.error && <p className="formMessage error">{props.error}</p>}
|
||||
</FormDialog>
|
||||
);
|
||||
}
|
||||
|
||||
function PriorityDemotionPopover(props: { records: PriorityDemotionRecord[] }) {
|
||||
if (!props.records.length) {
|
||||
return (
|
||||
<span className="priorityDemotionPopover" role="tooltip">
|
||||
<span className="priorityDemotionEmpty">暂无优先级降级记录</span>
|
||||
</span>
|
||||
);
|
||||
}
|
||||
return (
|
||||
<span className="priorityDemotionPopover" role="tooltip">
|
||||
<span className="priorityDemotionHeader">
|
||||
<History size={14} />
|
||||
<strong>最近 10 条优先级降级</strong>
|
||||
</span>
|
||||
{props.records.map((record) => (
|
||||
<span key={record.id} className="priorityDemotionItem">
|
||||
<span className="priorityDemotionItemHeader">
|
||||
<strong>{priorityDemotionReasonText(record)}</strong>
|
||||
{priorityIsSet(record.dynamicPriority) ? <Badge variant="secondary">队尾 {formatPriority(record.dynamicPriority)}</Badge> : null}
|
||||
</span>
|
||||
<small>{priorityDemotionMetaText(record)}</small>
|
||||
{record.errorMessage && <span className="priorityDemotionError">{record.errorMessage}</span>}
|
||||
</span>
|
||||
))}
|
||||
</span>
|
||||
);
|
||||
}
|
||||
|
||||
function platformPrioritySubtitle(status: ModelRateLimitStatus, platform: IntegrationPlatform | undefined, demotionCount: number) {
|
||||
const staticPriority = platformStaticPriority(status, platform);
|
||||
const dynamicPriority = platformDynamicPriority(status, platform);
|
||||
const base = priorityIsSet(dynamicPriority) ? `静态 ${formatPriority(staticPriority)} · 动态 ${formatPriority(dynamicPriority)}` : `静态 ${formatPriority(staticPriority)}`;
|
||||
return demotionCount ? `${base} · 降级 ${demotionCount}` : base;
|
||||
}
|
||||
|
||||
function priorityDemotionAriaLabel(status: ModelRateLimitStatus, platform: IntegrationPlatform | undefined) {
|
||||
const count = status.recentPriorityDemotions?.length ?? 0;
|
||||
return `平台优先级 ${formatPriority(platformEffectivePriority(status, platform))},最近 ${count} 条优先级降级记录`;
|
||||
}
|
||||
|
||||
function priorityDemotionReasonText(record: PriorityDemotionRecord) {
|
||||
const category = record.category ? `错误分类 ${record.category}` : '';
|
||||
const code = record.errorCode ? `错误 ${record.errorCode}` : '';
|
||||
const statusCode = record.statusCode ? `状态码 ${record.statusCode}` : '';
|
||||
return [priorityDemotionReasonLabel(record.reason), statusCode, code, category].filter(Boolean).join(' · ');
|
||||
}
|
||||
|
||||
function priorityDemotionMetaText(record: PriorityDemotionRecord) {
|
||||
const policy = priorityDemotionPolicyText(record);
|
||||
const values = [
|
||||
formatDateTime(record.createdAt),
|
||||
shortId(record.taskId) ? `任务 ${shortId(record.taskId)}` : '',
|
||||
policy,
|
||||
].filter(Boolean);
|
||||
return values.join(' · ') || '-';
|
||||
}
|
||||
|
||||
function priorityDemotionPolicyText(record: PriorityDemotionRecord) {
|
||||
const policyPath = [record.policySource || record.policy, record.policyRule].filter(Boolean).join('.');
|
||||
if (!policyPath) return '';
|
||||
return record.matchedValue ? `策略 ${policyPath}=${record.matchedValue}` : `策略 ${policyPath}`;
|
||||
}
|
||||
|
||||
function priorityDemotionReasonLabel(reason: string | undefined) {
|
||||
const labels: Record<string, string> = {
|
||||
priority_demote_policy: '命中优先级降级规则',
|
||||
hard_stop_policy: '命中硬拒绝规则',
|
||||
runner_policy_disabled: '全局调度策略停用',
|
||||
};
|
||||
return reason ? labels[reason] ?? reason : '优先级降级';
|
||||
}
|
||||
|
||||
function metricCell(metric: ModelRateLimitStatus['rpm'], includeReserved = false) {
|
||||
if (!metric.limited) return <span className="rateMetricCell"><strong>{formatLimit(metric.currentValue)} / 不限</strong><small>{includeReserved ? reservedMetricText(metric) : '未配置上限'}</small></span>;
|
||||
return (
|
||||
@ -132,6 +375,19 @@ function formatTimeOfDay(timestamp: number | null) {
|
||||
return `${pad(date.getHours())}:${pad(date.getMinutes())}:${pad(date.getSeconds())}`;
|
||||
}
|
||||
|
||||
function formatDateTime(value: string | undefined) {
|
||||
if (!value) return '';
|
||||
const date = new Date(value);
|
||||
if (Number.isNaN(date.getTime())) return value;
|
||||
const pad = (item: number) => String(item).padStart(2, '0');
|
||||
return `${pad(date.getMonth() + 1)}-${pad(date.getDate())} ${pad(date.getHours())}:${pad(date.getMinutes())}:${pad(date.getSeconds())}`;
|
||||
}
|
||||
|
||||
function shortId(value: string | undefined) {
|
||||
if (!value) return '';
|
||||
return value.length > 8 ? value.slice(0, 8) : value;
|
||||
}
|
||||
|
||||
function modelRuntimeStatusCell(status: ModelRateLimitStatus, now: number) {
|
||||
const modelCooldownMs = cooldownRemainingMs(status.modelCooldownUntil, now);
|
||||
const platformCooldownMs = cooldownRemainingMs(status.platformCooldownUntil, now);
|
||||
@ -184,6 +440,53 @@ function formatLimit(value: number) {
|
||||
return trimNumber(value);
|
||||
}
|
||||
|
||||
function platformEffectivePriority(status: ModelRateLimitStatus | undefined, platform: IntegrationPlatform | undefined) {
|
||||
return firstPriority(
|
||||
status?.platformEffectivePriority,
|
||||
platform?.effectivePriority,
|
||||
status?.platformDynamicPriority,
|
||||
platform?.dynamicPriority,
|
||||
status?.platformPriority,
|
||||
platform?.priority,
|
||||
);
|
||||
}
|
||||
|
||||
function platformStaticPriority(status: ModelRateLimitStatus | undefined, platform: IntegrationPlatform | undefined) {
|
||||
return firstPriority(status?.platformPriority, platform?.priority);
|
||||
}
|
||||
|
||||
function platformDynamicPriority(status: ModelRateLimitStatus | undefined, platform: IntegrationPlatform | undefined) {
|
||||
return firstPriority(
|
||||
status?.platformDynamicPriority,
|
||||
platform?.dynamicPriority,
|
||||
status?.platformEffectivePriority,
|
||||
platform?.effectivePriority,
|
||||
status?.platformPriority,
|
||||
platform?.priority,
|
||||
);
|
||||
}
|
||||
|
||||
function firstPriority(...values: Array<number | undefined>) {
|
||||
return values.find(priorityIsSet);
|
||||
}
|
||||
|
||||
function priorityIsSet(value: number | undefined): value is number {
|
||||
return typeof value === 'number' && Number.isFinite(value);
|
||||
}
|
||||
|
||||
function formatPriority(value: number | undefined) {
|
||||
if (!Number.isFinite(value)) return '-';
|
||||
return String(Math.trunc(value ?? 0));
|
||||
}
|
||||
|
||||
function parsePriorityInput(value: string) {
|
||||
const trimmed = value.trim();
|
||||
if (!/^\d+$/.test(trimmed)) return null;
|
||||
const parsed = Number(trimmed);
|
||||
if (!Number.isSafeInteger(parsed)) return null;
|
||||
return parsed;
|
||||
}
|
||||
|
||||
function trimNumber(value: number) {
|
||||
return Number.isInteger(value) ? String(value) : value.toFixed(2).replace(/\.?0+$/, '');
|
||||
}
|
||||
|
||||
@ -50,7 +50,6 @@ type RunnerPolicyForm = {
|
||||
hardStopStatusCodes: string[];
|
||||
hardStopKeywords: string[];
|
||||
priorityDemoteEnabled: boolean;
|
||||
priorityDemoteStep: string;
|
||||
priorityDemoteCategories: string[];
|
||||
priorityDemoteCodes: string[];
|
||||
priorityDemoteStatusCodes: string[];
|
||||
@ -406,11 +405,7 @@ function RunnerPolicyEditor(props: {
|
||||
{activeStrategy === 'priorityDemote' && (
|
||||
<div className="runtimePolicyRows runnerPolicyDetailRows">
|
||||
<Toggle checked={props.form.priorityDemoteEnabled} label="启用优先级降级" onChange={(checked) => patch({ priorityDemoteEnabled: checked })} />
|
||||
<Label>
|
||||
降级步长
|
||||
<Input value={props.form.priorityDemoteStep} inputMode="numeric" onChange={(event) => patch({ priorityDemoteStep: event.target.value })} />
|
||||
<span className="runtimeFieldHint">命中降级规则后,失败平台动态优先级向后调整的幅度,默认 100。</span>
|
||||
</Label>
|
||||
<span className="runtimeFieldHint spanTwo">命中降级规则后,失败平台会自动调整到当前所有平台的优先级队尾。</span>
|
||||
<KeywordField label="降级分类" value={props.form.priorityDemoteCategories} onChange={(value) => patch({ priorityDemoteCategories: value })} />
|
||||
<KeywordField label="降级错误码" value={props.form.priorityDemoteCodes} onChange={(value) => patch({ priorityDemoteCodes: value })} />
|
||||
<KeywordField label="降级状态码" value={props.form.priorityDemoteStatusCodes} onChange={(value) => patch({ priorityDemoteStatusCodes: value })} />
|
||||
@ -478,7 +473,6 @@ function runnerPolicyToForm(policy: GatewayRunnerPolicy | null): RunnerPolicyFor
|
||||
hardStopStatusCodes: tagsFromValue(hardStop.statusCodes ?? []),
|
||||
hardStopKeywords: tagsFromValue(hardStop.keywords ?? ['invalid_parameter', 'missing required', 'bad request', 'insufficient balance']),
|
||||
priorityDemoteEnabled: readBool(priorityDemote.enabled, true),
|
||||
priorityDemoteStep: String(readNumber(priorityDemote.demoteStep, 100)),
|
||||
priorityDemoteCategories: tagsFromValue(priorityDemote.categories ?? ['network', 'timeout', 'stream_error', 'rate_limit', 'provider_5xx', 'provider_overloaded']),
|
||||
priorityDemoteCodes: tagsFromValue(priorityDemote.codes ?? ['network', 'timeout', 'stream_read_error', 'rate_limit', 'server_error', 'overloaded']),
|
||||
priorityDemoteStatusCodes: tagsFromValue(priorityDemote.statusCodes ?? [408, 429, 500, 502, 503, 504]),
|
||||
@ -516,7 +510,6 @@ function runnerFormToPayload(form: RunnerPolicyForm): GatewayRunnerPolicyUpsertR
|
||||
},
|
||||
priorityDemotePolicy: {
|
||||
enabled: form.priorityDemoteEnabled,
|
||||
demoteStep: positiveInt(form.priorityDemoteStep, 100),
|
||||
categories: cleanTags(form.priorityDemoteCategories),
|
||||
codes: cleanTags(form.priorityDemoteCodes),
|
||||
statusCodes: parseNumberTags(form.priorityDemoteStatusCodes),
|
||||
|
||||
@ -1016,8 +1016,8 @@
|
||||
}
|
||||
|
||||
.platformLimitTable .shTableRow {
|
||||
grid-template-columns: minmax(180px, 1.15fr) minmax(160px, 0.95fr) 150px 170px 140px 132px 132px;
|
||||
min-width: 1064px;
|
||||
grid-template-columns: minmax(180px, 1.1fr) minmax(160px, 0.9fr) 160px 132px 150px 170px 140px 132px;
|
||||
min-width: 1224px;
|
||||
}
|
||||
|
||||
.platformLimitTable .shTableHead,
|
||||
@ -1113,6 +1113,143 @@
|
||||
background: var(--destructive);
|
||||
}
|
||||
|
||||
.platformPriorityCell {
|
||||
display: grid;
|
||||
width: 100%;
|
||||
min-width: 0;
|
||||
gap: 7px;
|
||||
align-content: start;
|
||||
}
|
||||
|
||||
.platformPriorityCell .rateMetricCell small {
|
||||
overflow: visible;
|
||||
line-height: 1.35;
|
||||
text-overflow: clip;
|
||||
white-space: normal;
|
||||
}
|
||||
|
||||
.priorityDemotionTrigger {
|
||||
display: grid;
|
||||
width: 100%;
|
||||
padding: 0;
|
||||
border: 0;
|
||||
background: transparent;
|
||||
color: inherit;
|
||||
cursor: default;
|
||||
text-align: left;
|
||||
font: inherit;
|
||||
}
|
||||
|
||||
.priorityAdjustButton {
|
||||
justify-self: start;
|
||||
min-height: 22px;
|
||||
padding-inline: 8px;
|
||||
border-color: var(--border);
|
||||
color: var(--text-normal);
|
||||
background: #fff;
|
||||
}
|
||||
|
||||
.priorityDemotionAntPopover {
|
||||
z-index: 1200;
|
||||
}
|
||||
|
||||
.priorityDemotionPopover {
|
||||
display: grid;
|
||||
width: min(36rem, calc(100vw - 2rem));
|
||||
gap: 0.6rem;
|
||||
}
|
||||
|
||||
.priorityDemotionHeader,
|
||||
.priorityDemotionItemHeader {
|
||||
display: flex;
|
||||
min-width: 0;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
|
||||
.priorityDemotionHeader {
|
||||
color: var(--text-strong);
|
||||
}
|
||||
|
||||
.priorityDemotionHeader strong,
|
||||
.priorityDemotionItemHeader strong {
|
||||
min-width: 0;
|
||||
overflow-wrap: anywhere;
|
||||
}
|
||||
|
||||
.priorityDemotionItem {
|
||||
display: grid;
|
||||
gap: 0.35rem;
|
||||
padding-bottom: 0.6rem;
|
||||
border-bottom: 1px solid var(--border);
|
||||
}
|
||||
|
||||
.priorityDemotionItem:last-child {
|
||||
padding-bottom: 0;
|
||||
border-bottom: 0;
|
||||
}
|
||||
|
||||
.priorityDemotionItem small,
|
||||
.priorityDemotionEmpty {
|
||||
color: var(--text-soft);
|
||||
font-size: var(--font-size-xs);
|
||||
line-height: 1.4;
|
||||
}
|
||||
|
||||
.priorityDemotionError {
|
||||
color: var(--destructive);
|
||||
font-size: var(--font-size-xs);
|
||||
line-height: 1.45;
|
||||
overflow-wrap: anywhere;
|
||||
}
|
||||
|
||||
.platformPriorityDialog {
|
||||
width: min(30rem, calc(100vw - 2rem));
|
||||
}
|
||||
|
||||
.platformPriorityDialogBody {
|
||||
display: grid;
|
||||
gap: 1rem;
|
||||
}
|
||||
|
||||
.platformPriorityDialogSummary,
|
||||
.platformPriorityDialogMetrics {
|
||||
display: flex;
|
||||
min-width: 0;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
gap: 1rem;
|
||||
}
|
||||
|
||||
.platformPriorityDialogSummary {
|
||||
padding: 0.75rem;
|
||||
border: 1px solid var(--border-subtle);
|
||||
border-radius: 8px;
|
||||
background: #f8fafc;
|
||||
}
|
||||
|
||||
.platformPriorityDialogSummary span {
|
||||
display: grid;
|
||||
min-width: 0;
|
||||
gap: 3px;
|
||||
}
|
||||
|
||||
.platformPriorityDialogSummary span:last-child {
|
||||
justify-items: end;
|
||||
font-variant-numeric: tabular-nums;
|
||||
}
|
||||
|
||||
.platformPriorityDialogSummary strong {
|
||||
color: var(--text-strong);
|
||||
}
|
||||
|
||||
.platformPriorityDialogSummary small,
|
||||
.platformPriorityDialogMetrics {
|
||||
color: var(--muted-foreground);
|
||||
font-size: var(--font-size-xs);
|
||||
}
|
||||
|
||||
.platformModelToolbar {
|
||||
display: grid;
|
||||
grid-template-columns: minmax(220px, 0.6fr) minmax(260px, 1fr);
|
||||
|
||||
@ -62,6 +62,8 @@ export interface IntegrationPlatform {
|
||||
authType: string;
|
||||
status: 'enabled' | 'disabled' | string;
|
||||
priority: number;
|
||||
dynamicPriority?: number;
|
||||
effectivePriority?: number;
|
||||
defaultPricingMode: PricingMode;
|
||||
defaultDiscountFactor: number;
|
||||
pricingRuleSetId?: string;
|
||||
@ -760,11 +762,45 @@ export interface RateLimitMetricStatus {
|
||||
resetAt?: string;
|
||||
}
|
||||
|
||||
export interface PriorityDemotionRecord {
|
||||
id: string;
|
||||
taskId: string;
|
||||
platformId: string;
|
||||
platformModelId?: string;
|
||||
reason?: string;
|
||||
errorCode?: string;
|
||||
errorMessage?: string;
|
||||
category?: string;
|
||||
statusCode?: number;
|
||||
policySource?: string;
|
||||
policy?: string;
|
||||
policyRule?: string;
|
||||
matchedValue?: string;
|
||||
dynamicPriority?: number;
|
||||
createdAt: string;
|
||||
}
|
||||
|
||||
export interface PlatformDynamicPriorityUpdateRequest {
|
||||
dynamicPriority?: number;
|
||||
reset?: boolean;
|
||||
}
|
||||
|
||||
export interface PlatformDynamicPriorityState {
|
||||
platformId: string;
|
||||
priority: number;
|
||||
dynamicPriority?: number;
|
||||
effectivePriority: number;
|
||||
updatedAt: string;
|
||||
}
|
||||
|
||||
export interface ModelRateLimitStatus {
|
||||
platformModelId: string;
|
||||
platformId: string;
|
||||
platformName: string;
|
||||
provider: string;
|
||||
platformPriority: number;
|
||||
platformDynamicPriority?: number;
|
||||
platformEffectivePriority: number;
|
||||
modelName: string;
|
||||
providerModelName?: string;
|
||||
modelAlias?: string;
|
||||
@ -779,6 +815,7 @@ export interface ModelRateLimitStatus {
|
||||
rpm: RateLimitMetricStatus;
|
||||
tpm: RateLimitMetricStatus;
|
||||
loadRatio: number;
|
||||
recentPriorityDemotions?: PriorityDemotionRecord[];
|
||||
}
|
||||
|
||||
export interface GatewayNetworkProxyConfig {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user