feat: improve model rate limit tracking

This commit is contained in:
wangbo 2026-05-12 03:22:29 +08:00
parent 05632172d0
commit ba850a06c6
25 changed files with 1223 additions and 96 deletions

View File

@ -30,6 +30,17 @@ func main() {
os.Exit(1)
}
defer db.Close()
if recovery, err := db.RecoverInterruptedRuntimeState(ctx); err != nil {
logger.Error("recover interrupted runtime state failed", "error", err)
os.Exit(1)
} else if recovery.ReleasedConcurrencyLeases > 0 || recovery.ReleasedRateReservations > 0 || recovery.FailedAttempts > 0 || recovery.FailedTasks > 0 {
logger.Warn("interrupted runtime state recovered",
"releasedConcurrencyLeases", recovery.ReleasedConcurrencyLeases,
"releasedRateReservations", recovery.ReleasedRateReservations,
"failedAttempts", recovery.FailedAttempts,
"failedTasks", recovery.FailedTasks,
)
}
server := &http.Server{
Addr: cfg.HTTPAddr,

View File

@ -665,6 +665,23 @@ WHERE reference_type = 'gateway_task'
"runtimePolicySetId": rateLimitPolicySet.ID,
"runtimePolicyOverride": map[string]any{},
}, http.StatusCreated, &rateLimitPlatformModel)
var rateLimitFailedTask struct {
Task struct {
Status string `json:"status"`
ErrorCode string `json:"errorCode"`
} `json:"task"`
}
doJSON(t, server.URL, http.MethodPost, "/api/v1/chat/completions", apiKeyResponse.Secret, map[string]any{
"model": rateLimitedModel,
"runMode": "simulation",
"simulation": true,
"simulationDurationMs": 5,
"simulationProfile": "non_retryable_failure",
"messages": []map[string]any{{"role": "user", "content": "failed first"}},
}, http.StatusAccepted, &rateLimitFailedTask)
if rateLimitFailedTask.Task.Status != "failed" || rateLimitFailedTask.Task.ErrorCode != "bad_request" {
t.Fatalf("failed rate-limited task should fail before consuming rpm: %+v", rateLimitFailedTask.Task)
}
var rateLimitTaskOne struct {
Task struct {
Status string `json:"status"`
@ -863,7 +880,9 @@ WHERE reference_type = 'gateway_task'
{platformID: degradedPlatform.ID, runtimePolicySetID: degradePolicySet.ID},
{platformID: degradeSuccessPlatform.ID},
} {
var platformModel map[string]any
var platformModel struct {
ID string `json:"id"`
}
doJSON(t, server.URL, http.MethodPost, "/api/admin/platforms/"+item.platformID+"/models", loginResponse.AccessToken, map[string]any{
"canonicalModelKey": "openai:gpt-4o-mini",
"modelName": degradeModel,
@ -887,14 +906,23 @@ WHERE reference_type = 'gateway_task'
"messages": []map[string]any{{"role": "user", "content": "degrade please"}},
}, http.StatusAccepted, &degradeTask)
if degradeTask.Task.Status != "succeeded" {
t.Fatalf("degrade task should fail over after cooling down failed platform: %+v", degradeTask.Task)
t.Fatalf("degrade task should fail over after cooling down failed model: %+v", degradeTask.Task)
}
var cooledDown bool
if err := testPool.QueryRow(ctx, `SELECT COALESCE(cooldown_until > now(), false) FROM integration_platforms WHERE id = $1::uuid`, degradedPlatform.ID).Scan(&cooledDown); err != nil {
t.Fatalf("read degraded platform cooldown: %v", err)
var cooledDownModel bool
var cooledDownPlatform bool
if err := testPool.QueryRow(ctx, `
SELECT COALESCE(m.cooldown_until > now(), false), COALESCE(p.cooldown_until > now(), false)
FROM platform_models m
JOIN integration_platforms p ON p.id = m.platform_id
WHERE m.platform_id = $1::uuid
AND m.model_name = $2`, degradedPlatform.ID, degradeModel).Scan(&cooledDownModel, &cooledDownPlatform); err != nil {
t.Fatalf("read degraded model cooldown: %v", err)
}
if !cooledDown {
t.Fatal("degrade policy should set platform cooldown_until")
if !cooledDownModel {
t.Fatal("degrade policy should set platform model cooldown_until")
}
if cooledDownPlatform {
t.Fatal("degrade policy should not cool down the entire platform")
}
var autoDisablePolicySet struct {

View File

@ -490,7 +490,7 @@ func (s *Server) estimatePricing(w http.ResponseWriter, r *http.Request) {
estimate, err := s.runner.Estimate(r.Context(), kind, model, body, user)
if err != nil {
if errors.Is(err, store.ErrNoModelCandidate) {
writeError(w, http.StatusNotFound, "no enabled platform model matches request")
writeError(w, statusFromRunError(err), err.Error(), store.ModelCandidateErrorCode(err))
return
}
s.logger.Error("estimate pricing failed", "error", err)
@ -510,6 +510,16 @@ func (s *Server) listRateLimitWindows(w http.ResponseWriter, r *http.Request) {
writeJSON(w, http.StatusOK, map[string]any{"items": items})
}
func (s *Server) listModelRateLimitStatuses(w http.ResponseWriter, r *http.Request) {
items, err := s.store.ListModelRateLimitStatuses(r.Context())
if err != nil {
s.logger.Error("list model rate limit statuses failed", "error", err)
writeError(w, http.StatusInternalServerError, "list model rate limit statuses failed")
return
}
writeJSON(w, http.StatusOK, map[string]any{"items": items})
}
func (s *Server) createTask(kind string, compatible bool) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
user, ok := auth.UserFromContext(r.Context())
@ -557,7 +567,7 @@ func (s *Server) createTask(kind string, compatible bool) http.Handler {
if runErr != nil {
status := statusFromRunError(runErr)
errorPayload := map[string]any{
"code": clients.ErrorCode(runErr),
"code": runErrorCode(runErr),
"message": runErr.Error(),
"status": status,
}
@ -581,7 +591,7 @@ func (s *Server) createTask(kind string, compatible bool) http.Handler {
}
result, runErr := s.runner.Execute(r.Context(), task, user)
if runErr != nil {
writeError(w, statusFromRunError(runErr), runErr.Error(), clients.ErrorCode(runErr))
writeError(w, statusFromRunError(runErr), runErr.Error(), runErrorCode(runErr))
return
}
writeJSON(w, http.StatusOK, result.Output)
@ -634,10 +644,14 @@ func scopeForTaskKind(kind string) string {
func statusFromRunError(err error) int {
switch {
case store.ModelCandidateErrorCode(err) == "platform_cooling_down" || store.ModelCandidateErrorCode(err) == "model_cooling_down":
return http.StatusTooManyRequests
case errors.Is(err, store.ErrNoModelCandidate):
return http.StatusNotFound
case errors.Is(err, store.ErrRateLimited):
return http.StatusTooManyRequests
case clients.ErrorCode(err) == "rate_limit":
return http.StatusTooManyRequests
case errors.Is(err, store.ErrInsufficientWalletBalance):
return http.StatusPaymentRequired
default:
@ -645,6 +659,13 @@ func statusFromRunError(err error) int {
}
}
func runErrorCode(err error) string {
if errors.Is(err, store.ErrNoModelCandidate) {
return store.ModelCandidateErrorCode(err)
}
return clients.ErrorCode(err)
}
func (s *Server) listTasks(w http.ResponseWriter, r *http.Request) {
user, ok := auth.UserFromContext(r.Context())
if !ok {

View File

@ -107,6 +107,7 @@ func NewServer(cfg config.Config, db *store.Store, logger *slog.Logger) http.Han
mux.Handle("GET /api/v1/models", server.auth.Require(auth.PermissionBasic, http.HandlerFunc(server.listPlayableModels)))
mux.Handle("GET /api/v1/playground/models", server.auth.Require(auth.PermissionBasic, http.HandlerFunc(server.listPlayableModels)))
mux.Handle("GET /api/admin/runtime/rate-limit-windows", server.requireAdmin(auth.PermissionPower, http.HandlerFunc(server.listRateLimitWindows)))
mux.Handle("GET /api/admin/runtime/model-rate-limits", server.requireAdmin(auth.PermissionPower, http.HandlerFunc(server.listModelRateLimitStatuses)))
mux.Handle("POST /api/v1/chat/completions", server.auth.Require(auth.PermissionBasic, server.createTask("chat.completions", false)))
mux.Handle("POST /api/v1/responses", server.auth.Require(auth.PermissionBasic, server.createTask("responses", false)))
mux.Handle("POST /api/v1/images/generations", server.auth.Require(auth.PermissionBasic, server.createTask("images.generations", false)))

View File

@ -5,6 +5,7 @@ import (
"strings"
"github.com/easyai/easyai-ai-gateway/apps/api/internal/auth"
"github.com/easyai/easyai-ai-gateway/apps/api/internal/clients"
"github.com/easyai/easyai-ai-gateway/apps/api/internal/store"
)
@ -108,3 +109,21 @@ func estimateRequestTokens(body map[string]any) int {
}
return len([]rune(text))/4 + 1
}
func tokenUsageAmounts(usage clients.Usage) map[string]float64 {
out := map[string]float64{}
if usage.InputTokens > 0 {
out["tpm_input"] = float64(usage.InputTokens)
}
if usage.OutputTokens > 0 {
out["tpm_output"] = float64(usage.OutputTokens)
}
total := usage.TotalTokens
if total <= 0 {
total = usage.InputTokens + usage.OutputTokens
}
if total > 0 {
out["tpm_total"] = float64(total)
}
return out
}

View File

@ -0,0 +1,29 @@
package runner
import (
"testing"
"github.com/easyai/easyai-ai-gateway/apps/api/internal/clients"
)
func TestTokenUsageAmountsUsesActualUsageForTPM(t *testing.T) {
got := tokenUsageAmounts(clients.Usage{InputTokens: 12, OutputTokens: 8, TotalTokens: 21})
if got["tpm_input"] != 12 {
t.Fatalf("expected input token amount 12, got %v", got["tpm_input"])
}
if got["tpm_output"] != 8 {
t.Fatalf("expected output token amount 8, got %v", got["tpm_output"])
}
if got["tpm_total"] != 21 {
t.Fatalf("expected total token amount 21, got %v", got["tpm_total"])
}
}
func TestTokenUsageAmountsFallsBackToInputOutputTotal(t *testing.T) {
got := tokenUsageAmounts(clients.Usage{InputTokens: 3, OutputTokens: 5})
if got["tpm_total"] != 8 {
t.Fatalf("expected total token fallback 8, got %v", got["tpm_total"])
}
}

View File

@ -29,8 +29,8 @@ func (s *Service) applyCandidateFailurePolicies(ctx context.Context, taskID stri
degradePolicy := effectiveRuntimePolicy(candidate.DegradePolicy, candidate.RuntimePolicyOverride, "degradePolicy")
if failurePolicyMatches(degradePolicy, code, message) {
cooldownSeconds := intFromPolicy(degradePolicy, "cooldownSeconds")
if err := s.store.CooldownCandidatePlatform(ctx, candidate.PlatformID, cooldownSeconds); err == nil {
_ = s.emit(ctx, taskID, "task.policy.degraded", "running", "degrade", 0.5, "candidate platform cooled down by failure policy", map[string]any{
if err := s.store.CooldownCandidatePlatformModel(ctx, candidate.PlatformModelID, cooldownSeconds); err == nil {
_ = s.emit(ctx, taskID, "task.policy.degraded", "running", "degrade", 0.5, "candidate model cooled down by failure policy", map[string]any{
"platformId": candidate.PlatformID,
"platformModelId": candidate.PlatformModelID,
"cooldownSeconds": cooldownSeconds,
@ -52,8 +52,8 @@ func (s *Service) applyFailoverAction(ctx context.Context, taskID string, candid
}, decision.Match, decision.Info), simulated)
}
case "cooldown_and_next":
if err := s.store.CooldownCandidatePlatform(ctx, candidate.PlatformID, decision.CooldownSeconds); err == nil {
_ = s.emit(ctx, taskID, "task.policy.failover_cooled_down", "running", "failover", 0.51, "candidate platform cooled down by runner failover policy", addPolicyTracePayload(map[string]any{
if err := s.store.CooldownCandidatePlatformModel(ctx, candidate.PlatformModelID, decision.CooldownSeconds); err == nil {
_ = s.emit(ctx, taskID, "task.policy.failover_cooled_down", "running", "failover", 0.51, "candidate model cooled down by runner failover policy", addPolicyTracePayload(map[string]any{
"platformId": candidate.PlatformID,
"platformModelId": candidate.PlatformModelID,
"cooldownSeconds": decision.CooldownSeconds,

View File

@ -64,7 +64,7 @@ func (s *Service) execute(ctx context.Context, task store.GatewayTask, user *aut
}
candidates, err := s.store.ListModelCandidates(ctx, task.Model, modelType, user)
if err != nil {
failed, finishErr := s.failTask(ctx, task.ID, "no_model_candidate", err.Error(), task.RunMode == "simulation", err)
failed, finishErr := s.failTask(ctx, task.ID, store.ModelCandidateErrorCode(err), err.Error(), task.RunMode == "simulation", err)
if finishErr != nil {
return Result{}, finishErr
}
@ -279,7 +279,7 @@ func (s *Service) runCandidate(ctx context.Context, task store.GatewayTask, user
return clients.Response{}, err
}
reservations := s.rateLimitReservations(ctx, user, candidate, body)
limitResult, err := s.store.ReserveRateLimits(ctx, task.ID, reservations)
limitResult, err := s.store.ReserveRateLimits(ctx, task.ID, attemptID, reservations)
if err != nil {
clientErr := &clients.ClientError{Code: "rate_limit", Message: err.Error(), Retryable: false}
_ = s.store.FinishTaskAttempt(ctx, store.FinishTaskAttemptInput{
@ -292,6 +292,12 @@ func (s *Service) runCandidate(ctx context.Context, task store.GatewayTask, user
})
return clients.Response{}, clientErr
}
rateReservationsFinalized := false
defer func() {
if !rateReservationsFinalized {
_ = s.store.ReleaseRateLimitReservations(context.WithoutCancel(ctx), limitResult.Reservations, "attempt_failed")
}
}()
defer s.store.ReleaseConcurrencyLeases(context.WithoutCancel(ctx), limitResult.LeaseIDs)
if err := s.store.RecordClientAssignment(ctx, candidate); err != nil {
@ -397,6 +403,10 @@ func (s *Service) runCandidate(ctx context.Context, task store.GatewayTask, user
return clients.Response{}, err
}
}
if err := s.store.CommitRateLimitReservations(ctx, limitResult.Reservations, tokenUsageAmounts(response.Usage)); err != nil {
return clients.Response{}, err
}
rateReservationsFinalized = true
if err := s.store.FinishTaskAttempt(ctx, store.FinishTaskAttemptInput{
AttemptID: attemptID,
Status: "succeeded",

View File

@ -3,6 +3,7 @@ package store
import (
"context"
"fmt"
"strings"
"github.com/easyai/easyai-ai-gateway/apps/api/internal/auth"
)
@ -38,6 +39,7 @@ WHERE p.status = 'enabled'
AND m.enabled = true
AND m.model_type @> jsonb_build_array($2)
AND (p.cooldown_until IS NULL OR p.cooldown_until <= now())
AND (m.cooldown_until IS NULL OR m.cooldown_until <= now())
AND (
(COALESCE(m.model_alias, '') <> '' AND m.model_alias = $1)
OR (
@ -151,6 +153,11 @@ ORDER BY effective_priority ASC,
return nil, err
}
if len(items) == 0 {
if unavailableErr, err := s.modelCandidateCooldownError(ctx, model, modelType); err != nil {
return nil, err
} else if unavailableErr != nil {
return nil, unavailableErr
}
return nil, ErrNoModelCandidate
}
items, err = s.filterCandidatesByAccessRules(ctx, user, items)
@ -162,3 +169,89 @@ ORDER BY effective_priority ASC,
}
return items, nil
}
func (s *Store) modelCandidateCooldownError(ctx context.Context, model string, modelType string) (error, error) {
rows, err := s.pool.Query(ctx, `
SELECT p.name,
COALESCE(NULLIF(m.display_name, ''), NULLIF(m.model_alias, ''), m.model_name),
COALESCE(to_char(p.cooldown_until AT TIME ZONE 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS.MS"Z"'), ''),
GREATEST(COALESCE(EXTRACT(EPOCH FROM p.cooldown_until - now()), 0), 0)::float8,
COALESCE(to_char(m.cooldown_until AT TIME ZONE 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS.MS"Z"'), ''),
GREATEST(COALESCE(EXTRACT(EPOCH FROM m.cooldown_until - now()), 0), 0)::float8
FROM platform_models m
JOIN integration_platforms p ON p.id = m.platform_id
LEFT JOIN base_model_catalog b ON b.id = m.base_model_id
WHERE p.status = 'enabled'
AND p.deleted_at IS NULL
AND m.enabled = true
AND m.model_type @> jsonb_build_array($2)
AND (
(COALESCE(m.model_alias, '') <> '' AND m.model_alias = $1)
OR (
COALESCE(m.model_alias, '') = ''
AND (
m.model_name = $1
OR b.canonical_model_key = $1
OR b.provider_model_name = $1
)
)
)
ORDER BY GREATEST(COALESCE(p.cooldown_until, to_timestamp(0)), COALESCE(m.cooldown_until, to_timestamp(0))) DESC,
p.priority ASC,
m.created_at ASC`, model, modelType)
if err != nil {
return nil, err
}
defer rows.Close()
for rows.Next() {
var platformName string
var displayName string
var platformCooldownUntil string
var platformRemainingSeconds float64
var modelCooldownUntil string
var modelRemainingSeconds float64
if err := rows.Scan(
&platformName,
&displayName,
&platformCooldownUntil,
&platformRemainingSeconds,
&modelCooldownUntil,
&modelRemainingSeconds,
); err != nil {
return nil, err
}
if modelRemainingSeconds > 0 {
return &ModelCandidateUnavailableError{
Code: "model_cooling_down",
Message: cooldownErrorMessage("模型", displayName, modelRemainingSeconds, modelCooldownUntil),
}, nil
}
if platformRemainingSeconds > 0 {
return &ModelCandidateUnavailableError{
Code: "platform_cooling_down",
Message: cooldownErrorMessage("平台", platformName, platformRemainingSeconds, platformCooldownUntil),
}, nil
}
}
if err := rows.Err(); err != nil {
return nil, err
}
return nil, nil
}
func cooldownErrorMessage(scope string, name string, remainingSeconds float64, cooldownUntil string) string {
name = strings.TrimSpace(name)
if name == "" {
name = "候选"
}
remainingMinutes := remainingSeconds / 60
if remainingMinutes < 0.1 {
remainingMinutes = 0.1
}
message := fmt.Sprintf("%s %s 冷却中,剩余 %.1f 分钟", scope, name, remainingMinutes)
if strings.TrimSpace(cooldownUntil) != "" {
message += ",预计恢复时间 " + cooldownUntil
}
return message
}

View File

@ -193,6 +193,7 @@ RETURNING id::text, platform_id::text, COALESCE(base_model_id::text, ''), model_
capabilities, pricing_mode, COALESCE(discount_factor, 0)::float8,
COALESCE(pricing_rule_set_id::text, ''), billing_config_override, billing_config,
permission_config, retry_policy, rate_limit_policy, COALESCE(runtime_policy_set_id::text, ''), runtime_policy_override,
COALESCE(to_char(cooldown_until AT TIME ZONE 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS.MS"Z"'), ''),
enabled, created_at, updated_at`,
input.PlatformID,
baseID,
@ -234,6 +235,7 @@ RETURNING id::text, platform_id::text, COALESCE(base_model_id::text, ''), model_
&rateLimitPolicyBytes,
&model.RuntimePolicySetID,
&runtimePolicyOverrideBytes,
&model.CooldownUntil,
&model.Enabled,
&model.CreatedAt,
&model.UpdatedAt,

View File

@ -70,6 +70,7 @@ type Platform struct {
CredentialsPreview map[string]any `json:"credentialsPreview,omitempty"`
RetryPolicy map[string]any `json:"retryPolicy,omitempty"`
RateLimitPolicy map[string]any `json:"rateLimitPolicy,omitempty"`
CooldownUntil string `json:"cooldownUntil,omitempty"`
CreatedAt time.Time `json:"createdAt"`
UpdatedAt time.Time `json:"updatedAt"`
}
@ -151,6 +152,7 @@ type PlatformModel struct {
RateLimitPolicy map[string]any `json:"rateLimitPolicy,omitempty"`
RuntimePolicySetID string `json:"runtimePolicySetId,omitempty"`
RuntimePolicyOverride map[string]any `json:"runtimePolicyOverride,omitempty"`
CooldownUntil string `json:"cooldownUntil,omitempty"`
Enabled bool `json:"enabled"`
CreatedAt time.Time `json:"createdAt"`
UpdatedAt time.Time `json:"updatedAt"`
@ -478,7 +480,9 @@ func (s *Store) ListPlatforms(ctx context.Context) ([]Platform, error) {
rows, err := s.pool.Query(ctx, `
SELECT id::text, provider, platform_key, name, COALESCE(internal_name, ''), COALESCE(base_url, ''), auth_type, status, priority,
default_pricing_mode, default_discount_factor::float8, COALESCE(pricing_rule_set_id::text, ''),
config, credentials, retry_policy, rate_limit_policy, created_at, updated_at
config, credentials, retry_policy, rate_limit_policy,
COALESCE(to_char(cooldown_until AT TIME ZONE 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS.MS"Z"'), ''),
created_at, updated_at
FROM integration_platforms
ORDER BY priority ASC, created_at DESC`)
if err != nil {
@ -510,6 +514,7 @@ ORDER BY priority ASC, created_at DESC`)
&credentialsBytes,
&retryPolicyBytes,
&rateLimitPolicyBytes,
&platform.CooldownUntil,
&platform.CreatedAt,
&platform.UpdatedAt,
); err != nil {
@ -555,7 +560,9 @@ VALUES (
)
RETURNING id::text, provider, platform_key, name, COALESCE(internal_name, ''), COALESCE(base_url, ''), auth_type, status, priority,
default_pricing_mode, default_discount_factor::float8, COALESCE(pricing_rule_set_id::text, ''),
config, credentials, retry_policy, rate_limit_policy, created_at, updated_at`,
config, credentials, retry_policy, rate_limit_policy,
COALESCE(to_char(cooldown_until AT TIME ZONE 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS.MS"Z"'), ''),
created_at, updated_at`,
input.Provider, input.PlatformKey, input.Name, strings.TrimSpace(input.InternalName), input.BaseURL, input.AuthType, credentials, config,
input.DefaultPricingMode, input.DefaultDiscountFactor, input.PricingRuleSetID, input.Priority,
string(retryPolicy), string(rateLimitPolicy),
@ -576,6 +583,7 @@ RETURNING id::text, provider, platform_key, name, COALESCE(internal_name, ''), C
&credentialsResultBytes,
&retryPolicyBytes,
&rateLimitPolicyBytes,
&platform.CooldownUntil,
&platform.CreatedAt,
&platform.UpdatedAt,
)
@ -636,7 +644,9 @@ SET provider = $2,
WHERE id = $1::uuid
RETURNING id::text, provider, platform_key, name, COALESCE(internal_name, ''), COALESCE(base_url, ''), auth_type, status, priority,
default_pricing_mode, default_discount_factor::float8, COALESCE(pricing_rule_set_id::text, ''),
config, credentials, retry_policy, rate_limit_policy, created_at, updated_at`,
config, credentials, retry_policy, rate_limit_policy,
COALESCE(to_char(cooldown_until AT TIME ZONE 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS.MS"Z"'), ''),
created_at, updated_at`,
id,
input.Provider,
input.PlatformKey,
@ -669,6 +679,7 @@ RETURNING id::text, provider, platform_key, name, COALESCE(internal_name, ''), C
&credentialsResultBytes,
&retryPolicyBytes,
&rateLimitPolicyBytes,
&platform.CooldownUntil,
&platform.CreatedAt,
&platform.UpdatedAt,
)
@ -746,6 +757,7 @@ SELECT m.id::text, m.platform_id::text, COALESCE(m.base_model_id::text, ''), p.p
m.capability_override, m.capabilities, m.pricing_mode, COALESCE(m.discount_factor, 0)::float8,
COALESCE(m.pricing_rule_set_id::text, ''), m.billing_config_override, m.billing_config,
m.permission_config, m.retry_policy, m.rate_limit_policy, COALESCE(m.runtime_policy_set_id::text, ''), m.runtime_policy_override,
COALESCE(to_char(m.cooldown_until AT TIME ZONE 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS.MS"Z"'), ''),
m.enabled, m.created_at, m.updated_at
FROM platform_models m
JOIN integration_platforms p ON p.id = m.platform_id
@ -791,6 +803,7 @@ ORDER BY m.model_type ASC, m.model_name ASC`, args...)
&rateLimitPolicy,
&model.RuntimePolicySetID,
&runtimePolicyOverride,
&model.CooldownUntil,
&model.Enabled,
&model.CreatedAt,
&model.UpdatedAt,

View File

@ -0,0 +1,249 @@
package store
import (
"context"
"sort"
"strings"
)
type RateLimitMetricStatus struct {
CurrentValue float64 `json:"currentValue"`
UsedValue float64 `json:"usedValue"`
ReservedValue float64 `json:"reservedValue"`
LimitValue float64 `json:"limitValue"`
Limited bool `json:"limited"`
Ratio float64 `json:"ratio"`
ResetAt string `json:"resetAt,omitempty"`
}
type ModelRateLimitStatus struct {
PlatformModelID string `json:"platformModelId"`
PlatformID string `json:"platformId"`
PlatformName string `json:"platformName"`
Provider string `json:"provider"`
ModelName string `json:"modelName"`
ProviderModelName string `json:"providerModelName,omitempty"`
ModelAlias string `json:"modelAlias,omitempty"`
DisplayName string `json:"displayName"`
ModelType []string `json:"modelType"`
Enabled bool `json:"enabled"`
RateLimitPolicy map[string]any `json:"rateLimitPolicy,omitempty"`
PlatformCooldownUntil string `json:"platformCooldownUntil,omitempty"`
ModelCooldownUntil string `json:"modelCooldownUntil,omitempty"`
Concurrent RateLimitMetricStatus `json:"concurrent"`
RPM RateLimitMetricStatus `json:"rpm"`
TPM RateLimitMetricStatus `json:"tpm"`
LoadRatio float64 `json:"loadRatio"`
}
func (s *Store) ListModelRateLimitStatuses(ctx context.Context) ([]ModelRateLimitStatus, error) {
rows, err := s.pool.Query(ctx, `
SELECT m.id::text, m.platform_id::text, p.name, p.provider,
m.model_name, COALESCE(NULLIF(m.provider_model_name, ''), m.model_name), COALESCE(m.model_alias, ''),
m.model_type, m.display_name, m.enabled,
p.rate_limit_policy, COALESCE(rp.rate_limit_policy, '{}'::jsonb), COALESCE(NULLIF(m.runtime_policy_override, '{}'::jsonb), b.runtime_policy_override, '{}'::jsonb), m.rate_limit_policy,
COALESCE(to_char(p.cooldown_until AT TIME ZONE 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS.MS"Z"'), ''),
COALESCE(to_char(m.cooldown_until AT TIME ZONE 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS.MS"Z"'), ''),
COALESCE(con.active, 0)::float8,
COALESCE(rpm.used_value, 0)::float8, COALESCE(rpm.reserved_value, 0)::float8, COALESCE(rpm.reset_at::text, ''),
COALESCE(tpm.used_value, 0)::float8, COALESCE(tpm.reserved_value, 0)::float8, COALESCE(tpm.reset_at::text, '')
FROM platform_models m
JOIN integration_platforms p ON p.id = m.platform_id
LEFT JOIN base_model_catalog b ON b.id = m.base_model_id
LEFT JOIN model_runtime_policy_sets rp ON rp.id = COALESCE(m.runtime_policy_set_id, b.runtime_policy_set_id)
LEFT JOIN (
SELECT scope_key, SUM(lease_value) AS active
FROM gateway_concurrency_leases
WHERE scope_type = 'platform_model'
AND released_at IS NULL
AND expires_at > now()
GROUP BY scope_key
) con ON con.scope_key = m.id::text
LEFT JOIN (
SELECT DISTINCT ON (scope_key) scope_key, used_value, reserved_value, reset_at
FROM gateway_rate_limit_counters
WHERE scope_type = 'platform_model'
AND metric = 'rpm'
AND reset_at > now()
ORDER BY scope_key, window_start DESC
) rpm ON rpm.scope_key = m.id::text
LEFT JOIN (
SELECT scope_key, SUM(used_value) AS used_value, SUM(reserved_value) AS reserved_value, MAX(reset_at) AS reset_at
FROM gateway_rate_limit_counters
WHERE scope_type = 'platform_model'
AND metric LIKE 'tpm%'
AND reset_at > now()
GROUP BY scope_key
) tpm ON tpm.scope_key = m.id::text
WHERE p.deleted_at IS NULL
ORDER BY p.priority ASC, m.model_name ASC`)
if err != nil {
return nil, err
}
defer rows.Close()
items := make([]ModelRateLimitStatus, 0)
for rows.Next() {
var item ModelRateLimitStatus
var modelTypeBytes []byte
var platformPolicyBytes []byte
var runtimePolicyBytes []byte
var runtimeOverrideBytes []byte
var modelPolicyBytes []byte
var platformCooldownUntil string
var modelCooldownUntil string
var concurrentCurrent float64
var rpmUsed float64
var rpmReserved float64
var rpmResetAt string
var tpmUsed float64
var tpmReserved float64
var tpmResetAt string
if err := rows.Scan(
&item.PlatformModelID,
&item.PlatformID,
&item.PlatformName,
&item.Provider,
&item.ModelName,
&item.ProviderModelName,
&item.ModelAlias,
&modelTypeBytes,
&item.DisplayName,
&item.Enabled,
&platformPolicyBytes,
&runtimePolicyBytes,
&runtimeOverrideBytes,
&modelPolicyBytes,
&platformCooldownUntil,
&modelCooldownUntil,
&concurrentCurrent,
&rpmUsed,
&rpmReserved,
&rpmResetAt,
&tpmUsed,
&tpmReserved,
&tpmResetAt,
); err != nil {
return nil, err
}
item.ModelType = decodeStringArray(modelTypeBytes)
policy := effectiveModelRateLimitPolicy(
decodeObject(platformPolicyBytes),
decodeObject(runtimePolicyBytes),
decodeObject(runtimeOverrideBytes),
decodeObject(modelPolicyBytes),
)
item.PlatformCooldownUntil = platformCooldownUntil
item.ModelCooldownUntil = modelCooldownUntil
item.RateLimitPolicy = policy
item.Concurrent = metricStatus(concurrentCurrent, concurrentCurrent, 0, rateLimitForMetric(policy, "concurrent"), "")
item.RPM = metricStatus(rpmUsed+rpmReserved, rpmUsed, rpmReserved, rateLimitForMetric(policy, "rpm"), rpmResetAt)
item.TPM = metricStatus(tpmUsed+tpmReserved, tpmUsed, tpmReserved, tpmLimit(policy), tpmResetAt)
item.LoadRatio = maxFloat(item.Concurrent.Ratio, item.RPM.Ratio, item.TPM.Ratio)
items = append(items, item)
}
if err := rows.Err(); err != nil {
return nil, err
}
sort.SliceStable(items, func(i, j int) bool {
if items[i].LoadRatio == items[j].LoadRatio {
return strings.ToLower(items[i].DisplayName) < strings.ToLower(items[j].DisplayName)
}
return items[i].LoadRatio > items[j].LoadRatio
})
return items, nil
}
func effectiveModelRateLimitPolicy(platformPolicy map[string]any, runtimePolicy map[string]any, runtimeOverride map[string]any, modelPolicy map[string]any) map[string]any {
policy := platformPolicy
if hasRateLimitRules(runtimePolicy) {
policy = shallowMergeMap(policy, runtimePolicy)
}
if nested, ok := runtimeOverride["rateLimitPolicy"].(map[string]any); ok && len(nested) > 0 {
policy = shallowMergeMap(policy, nested)
}
if hasRateLimitRules(modelPolicy) {
policy = shallowMergeMap(policy, modelPolicy)
}
if hasRateLimitRules(policy) {
return policy
}
return nil
}
func hasRateLimitRules(policy map[string]any) bool {
rules, _ := policy["rules"].([]any)
return len(rules) > 0
}
func shallowMergeMap(base map[string]any, override map[string]any) map[string]any {
out := map[string]any{}
for key, value := range base {
out[key] = value
}
for key, value := range override {
out[key] = value
}
return out
}
func rateLimitForMetric(policy map[string]any, metric string) float64 {
rules, _ := policy["rules"].([]any)
for _, rawRule := range rules {
rule, _ := rawRule.(map[string]any)
if strings.TrimSpace(stringValue(rule["metric"])) == metric {
return floatValue(rule["limit"])
}
}
return 0
}
func tpmLimit(policy map[string]any) float64 {
if limit := rateLimitForMetric(policy, "tpm_total"); limit > 0 {
return limit
}
return rateLimitForMetric(policy, "tpm_input") + rateLimitForMetric(policy, "tpm_output")
}
func metricStatus(current float64, used float64, reserved float64, limit float64, resetAt string) RateLimitMetricStatus {
status := RateLimitMetricStatus{
CurrentValue: current,
UsedValue: used,
ReservedValue: reserved,
LimitValue: limit,
Limited: limit > 0,
ResetAt: resetAt,
}
if status.Limited {
status.Ratio = current / limit
}
return status
}
func maxFloat(values ...float64) float64 {
out := 0.0
for _, value := range values {
if value > out {
out = value
}
}
return out
}
func stringValue(value any) string {
text, _ := value.(string)
return strings.TrimSpace(text)
}
func floatValue(value any) float64 {
switch typed := value.(type) {
case int:
return float64(typed)
case int64:
return float64(typed)
case float64:
return typed
default:
return 0
}
}

View File

@ -0,0 +1,32 @@
package store
import "testing"
func TestEffectiveModelRateLimitPolicyTreatsModelRulesAsAuthoritative(t *testing.T) {
policy := effectiveModelRateLimitPolicy(
map[string]any{"rules": []any{
map[string]any{"metric": "rpm", "limit": 500},
map[string]any{"metric": "tpm_total", "limit": 100000},
}},
map[string]any{"rules": []any{
map[string]any{"metric": "rpm", "limit": 120},
map[string]any{"metric": "tpm_total", "limit": 240000},
map[string]any{"metric": "concurrent", "limit": 6},
}},
map[string]any{},
map[string]any{"rules": []any{
map[string]any{"metric": "rpm", "limit": 30},
map[string]any{"metric": "concurrent", "limit": 2},
}},
)
if got := rateLimitForMetric(policy, "rpm"); got != 30 {
t.Fatalf("expected model rpm limit to win, got %v", got)
}
if got := rateLimitForMetric(policy, "concurrent"); got != 2 {
t.Fatalf("expected model concurrent limit to win, got %v", got)
}
if got := rateLimitForMetric(policy, "tpm_total"); got != 0 {
t.Fatalf("expected missing model tpm limit to mean unlimited, got %v", got)
}
}

View File

@ -3,9 +3,19 @@ package store
import (
"context"
"errors"
"time"
"github.com/jackc/pgx/v5"
)
func (s *Store) ReserveRateLimits(ctx context.Context, taskID string, reservations []RateLimitReservation) (RateLimitResult, error) {
type RuntimeRecoveryResult struct {
ReleasedConcurrencyLeases int64 `json:"releasedConcurrencyLeases"`
ReleasedRateReservations int64 `json:"releasedRateReservations"`
FailedAttempts int64 `json:"failedAttempts"`
FailedTasks int64 `json:"failedTasks"`
}
func (s *Store) ReserveRateLimits(ctx context.Context, taskID string, attemptID string, reservations []RateLimitReservation) (RateLimitResult, error) {
tx, err := s.pool.Begin(ctx)
if err != nil {
return RateLimitResult{}, err
@ -24,70 +34,122 @@ func (s *Store) ReserveRateLimits(ctx context.Context, taskID string, reservatio
reservation.WindowSeconds = 60
}
if reservation.Metric == "concurrent" {
if reservation.LeaseTTLSeconds <= 0 {
reservation.LeaseTTLSeconds = 120
leaseID, err := reserveConcurrencyLease(ctx, tx, taskID, attemptID, reservation)
if err != nil {
return RateLimitResult{}, err
}
var active float64
if err := tx.QueryRow(ctx, `
result.LeaseIDs = append(result.LeaseIDs, leaseID)
continue
}
normalized, err := reserveCounterWindow(ctx, tx, taskID, attemptID, reservation)
if err != nil {
return RateLimitResult{}, err
}
result.Reservations = append(result.Reservations, normalized)
}
return result, tx.Commit(ctx)
}
func reserveConcurrencyLease(ctx context.Context, tx pgx.Tx, taskID string, attemptID string, reservation RateLimitReservation) (string, error) {
if reservation.LeaseTTLSeconds <= 0 {
reservation.LeaseTTLSeconds = 120
}
var active float64
if err := tx.QueryRow(ctx, `
SELECT COALESCE(SUM(lease_value), 0)::float8
FROM gateway_concurrency_leases
WHERE scope_type = $1
AND scope_key = $2
AND released_at IS NULL
AND expires_at > now()`,
reservation.ScopeType,
reservation.ScopeKey,
).Scan(&active); err != nil {
return RateLimitResult{}, err
}
if active+reservation.Amount > reservation.Limit {
return RateLimitResult{}, ErrRateLimited
}
var leaseID string
if err := tx.QueryRow(ctx, `
INSERT INTO gateway_concurrency_leases (task_id, scope_type, scope_key, lease_value, expires_at)
VALUES ($1::uuid, $2, $3, $4, now() + ($5::int * interval '1 second'))
reservation.ScopeType,
reservation.ScopeKey,
).Scan(&active); err != nil {
return "", err
}
if active+reservation.Amount > reservation.Limit {
return "", ErrRateLimited
}
var leaseID string
if err := tx.QueryRow(ctx, `
INSERT INTO gateway_concurrency_leases (task_id, attempt_id, scope_type, scope_key, lease_value, expires_at)
VALUES ($1::uuid, NULLIF($2, '')::uuid, $3, $4, $5, now() + ($6::int * interval '1 second'))
RETURNING id::text`,
taskID,
reservation.ScopeType,
reservation.ScopeKey,
reservation.Amount,
reservation.LeaseTTLSeconds,
).Scan(&leaseID); err != nil {
return RateLimitResult{}, err
}
result.LeaseIDs = append(result.LeaseIDs, leaseID)
continue
}
tag, err := tx.Exec(ctx, `
taskID,
attemptID,
reservation.ScopeType,
reservation.ScopeKey,
reservation.Amount,
reservation.LeaseTTLSeconds,
).Scan(&leaseID); err != nil {
return "", err
}
return leaseID, nil
}
func reserveCounterWindow(ctx context.Context, tx pgx.Tx, taskID string, attemptID string, reservation RateLimitReservation) (RateLimitReservation, error) {
usedAmount := 0.0
reservedAmount := reservation.Amount
var windowStart time.Time
err := tx.QueryRow(ctx, `
INSERT INTO gateway_rate_limit_counters (
scope_type, scope_key, metric, window_start, limit_value, used_value, reserved_value, reset_at
)
VALUES (
$1, $2, $3, date_trunc('minute', now()), $4, $5, 0,
date_trunc('minute', now()) + ($6::int * interval '1 second')
$1, $2, $3, date_trunc('minute', now()), $4, $5, $6,
date_trunc('minute', now()) + ($7::int * interval '1 second')
)
ON CONFLICT (scope_type, scope_key, metric, window_start) DO UPDATE
SET limit_value = EXCLUDED.limit_value,
used_value = gateway_rate_limit_counters.used_value + EXCLUDED.used_value,
reserved_value = gateway_rate_limit_counters.reserved_value + EXCLUDED.reserved_value,
reset_at = EXCLUDED.reset_at,
updated_at = now()
WHERE gateway_rate_limit_counters.used_value + EXCLUDED.used_value <= EXCLUDED.limit_value`,
reservation.ScopeType,
reservation.ScopeKey,
reservation.Metric,
reservation.Limit,
reservation.Amount,
reservation.WindowSeconds,
)
if err != nil {
return RateLimitResult{}, err
}
if tag.RowsAffected() == 0 {
return RateLimitResult{}, ErrRateLimited
WHERE gateway_rate_limit_counters.used_value + gateway_rate_limit_counters.reserved_value + EXCLUDED.used_value + EXCLUDED.reserved_value <= EXCLUDED.limit_value
RETURNING window_start`,
reservation.ScopeType,
reservation.ScopeKey,
reservation.Metric,
reservation.Limit,
usedAmount,
reservedAmount,
reservation.WindowSeconds,
).Scan(&windowStart)
if err != nil {
if errors.Is(err, pgx.ErrNoRows) {
return RateLimitReservation{}, ErrRateLimited
}
return RateLimitReservation{}, err
}
return result, tx.Commit(ctx)
reservation.WindowStart = windowStart
if err := tx.QueryRow(ctx, `
INSERT INTO gateway_rate_limit_reservations (
task_id, attempt_id, scope_type, scope_key, metric, window_start, limit_value, reserved_amount, status
)
VALUES (
$1::uuid, NULLIF($2, '')::uuid, $3, $4, $5, $6, $7, $8, 'reserved'
)
RETURNING id::text`,
taskID,
attemptID,
reservation.ScopeType,
reservation.ScopeKey,
reservation.Metric,
windowStart,
reservation.Limit,
reservedAmount,
).Scan(&reservation.ReservationID); err != nil {
return RateLimitReservation{}, err
}
return reservation, nil
}
func (s *Store) CommitRateLimitReservations(ctx context.Context, reservations []RateLimitReservation, actualByMetric map[string]float64) error {
return s.finishRateLimitReservations(ctx, reservations, actualByMetric, "committed", "success")
}
func (s *Store) ReleaseRateLimitReservations(ctx context.Context, reservations []RateLimitReservation, reason string) error {
return s.finishRateLimitReservations(ctx, reservations, nil, "released", reason)
}
func (s *Store) ReleaseConcurrencyLeases(ctx context.Context, leaseIDs []string) error {
@ -107,3 +169,193 @@ WHERE id = $1::uuid AND released_at IS NULL`, leaseID); err != nil && !errors.Is
}
return nil
}
func (s *Store) RecoverInterruptedRuntimeState(ctx context.Context) (RuntimeRecoveryResult, error) {
tx, err := s.pool.Begin(ctx)
if err != nil {
return RuntimeRecoveryResult{}, err
}
defer tx.Rollback(ctx)
result := RuntimeRecoveryResult{}
rows, err := tx.Query(ctx, `
UPDATE gateway_rate_limit_reservations
SET status = 'released',
reason = 'server_restarted',
finalized_at = now(),
updated_at = now()
WHERE status = 'reserved'
RETURNING scope_type, scope_key, metric, window_start, reserved_amount::float8`)
if err != nil {
return RuntimeRecoveryResult{}, err
}
for rows.Next() {
var reservation RateLimitReservation
if err := rows.Scan(&reservation.ScopeType, &reservation.ScopeKey, &reservation.Metric, &reservation.WindowStart, &reservation.Amount); err != nil {
rows.Close()
return RuntimeRecoveryResult{}, err
}
if err := releaseCounterReservation(ctx, tx, reservation.ScopeType, reservation.ScopeKey, reservation.Metric, reservation.WindowStart, reservation.Amount); err != nil {
rows.Close()
return RuntimeRecoveryResult{}, err
}
result.ReleasedRateReservations++
}
if err := rows.Err(); err != nil {
rows.Close()
return RuntimeRecoveryResult{}, err
}
rows.Close()
tag, err := tx.Exec(ctx, `
UPDATE gateway_concurrency_leases
SET released_at = now()
WHERE released_at IS NULL
AND expires_at > now()`)
if err != nil {
return RuntimeRecoveryResult{}, err
}
result.ReleasedConcurrencyLeases = tag.RowsAffected()
tag, err = tx.Exec(ctx, `
UPDATE gateway_task_attempts
SET status = 'failed',
retryable = false,
error_code = 'server_restarted',
error_message = 'attempt interrupted by service restart',
finished_at = now()
WHERE status = 'running'`)
if err != nil {
return RuntimeRecoveryResult{}, err
}
result.FailedAttempts = tag.RowsAffected()
taskRows, err := tx.Query(ctx, `
UPDATE gateway_tasks
SET status = 'failed',
error = 'task interrupted by service restart',
error_code = 'server_restarted',
error_message = 'task interrupted by service restart',
finished_at = now(),
updated_at = now()
WHERE status IN ('queued', 'running')
RETURNING id::text`)
if err != nil {
return RuntimeRecoveryResult{}, err
}
taskIDs := make([]string, 0)
for taskRows.Next() {
var taskID string
if err := taskRows.Scan(&taskID); err != nil {
taskRows.Close()
return RuntimeRecoveryResult{}, err
}
taskIDs = append(taskIDs, taskID)
}
if err := taskRows.Err(); err != nil {
taskRows.Close()
return RuntimeRecoveryResult{}, err
}
taskRows.Close()
for _, taskID := range taskIDs {
if _, err := tx.Exec(ctx, `
INSERT INTO gateway_task_events (task_id, seq, event_type, status, phase, progress, message, payload, simulated)
VALUES (
$1::uuid,
COALESCE((SELECT MAX(seq) + 1 FROM gateway_task_events WHERE task_id = $1::uuid), 1),
'task.recovered',
'failed',
'recovered',
1,
'task interrupted by service restart',
'{"code":"server_restarted"}'::jsonb,
false
)`, taskID); err != nil {
return RuntimeRecoveryResult{}, err
}
}
result.FailedTasks = int64(len(taskIDs))
return result, tx.Commit(ctx)
}
func (s *Store) finishRateLimitReservations(ctx context.Context, reservations []RateLimitReservation, actualByMetric map[string]float64, status string, reason string) error {
if len(reservations) == 0 {
return nil
}
tx, err := s.pool.Begin(ctx)
if err != nil {
return err
}
defer tx.Rollback(ctx)
for _, reservation := range reservations {
if reservation.ReservationID == "" {
continue
}
actualAmount := actualByMetric[reservation.Metric]
if status == "committed" && actualAmount <= 0 {
actualAmount = reservation.Amount
}
var stored RateLimitReservation
err := tx.QueryRow(ctx, `
UPDATE gateway_rate_limit_reservations
SET status = $2,
reason = NULLIF($3, ''),
actual_amount = CASE WHEN $2 = 'committed' THEN $4 ELSE actual_amount END,
finalized_at = now(),
updated_at = now()
WHERE id = $1::uuid
AND status = 'reserved'
RETURNING scope_type, scope_key, metric, window_start, reserved_amount::float8`,
reservation.ReservationID,
status,
reason,
actualAmount,
).Scan(&stored.ScopeType, &stored.ScopeKey, &stored.Metric, &stored.WindowStart, &stored.Amount)
if err != nil {
if errors.Is(err, pgx.ErrNoRows) {
continue
}
return err
}
if status == "committed" {
if err := commitCounterReservation(ctx, tx, stored.ScopeType, stored.ScopeKey, stored.Metric, stored.WindowStart, stored.Amount, actualAmount); err != nil {
return err
}
continue
}
if err := releaseCounterReservation(ctx, tx, stored.ScopeType, stored.ScopeKey, stored.Metric, stored.WindowStart, stored.Amount); err != nil {
return err
}
}
return tx.Commit(ctx)
}
func commitCounterReservation(ctx context.Context, tx pgx.Tx, scopeType string, scopeKey string, metric string, windowStart time.Time, reservedAmount float64, actualAmount float64) error {
_, err := tx.Exec(ctx, `
UPDATE gateway_rate_limit_counters
SET reserved_value = GREATEST(reserved_value - $5, 0),
used_value = used_value + $6,
updated_at = now()
WHERE scope_type = $1
AND scope_key = $2
AND metric = $3
AND window_start = $4`,
scopeType, scopeKey, metric, windowStart, reservedAmount, actualAmount)
return err
}
func releaseCounterReservation(ctx context.Context, tx pgx.Tx, scopeType string, scopeKey string, metric string, windowStart time.Time, reservedAmount float64) error {
_, err := tx.Exec(ctx, `
UPDATE gateway_rate_limit_counters
SET reserved_value = GREATEST(reserved_value - $5, 0),
updated_at = now()
WHERE scope_type = $1
AND scope_key = $2
AND metric = $3
AND window_start = $4`,
scopeType, scopeKey, metric, windowStart, reservedAmount)
return err
}

View File

@ -134,6 +134,21 @@ WHERE id = $1::uuid`, platformID, cooldownSeconds)
return err
}
func (s *Store) CooldownCandidatePlatformModel(ctx context.Context, platformModelID string, cooldownSeconds int) error {
if strings.TrimSpace(platformModelID) == "" {
return nil
}
if cooldownSeconds <= 0 {
cooldownSeconds = 300
}
_, err := s.pool.Exec(ctx, `
UPDATE platform_models
SET cooldown_until = now() + ($2::int * interval '1 second'),
updated_at = now()
WHERE id = $1::uuid`, platformModelID, cooldownSeconds)
return err
}
func (s *Store) DemoteCandidatePlatformPriority(ctx context.Context, platformID string, demoteStep int) error {
if strings.TrimSpace(platformID) == "" {
return nil

View File

@ -2,6 +2,7 @@ package store
import (
"errors"
"strings"
"time"
)
@ -10,6 +11,27 @@ var (
ErrRateLimited = errors.New("rate limit exceeded")
)
type ModelCandidateUnavailableError struct {
Code string
Message string
}
func (e *ModelCandidateUnavailableError) Error() string {
return e.Message
}
func (e *ModelCandidateUnavailableError) Unwrap() error {
return ErrNoModelCandidate
}
func ModelCandidateErrorCode(err error) string {
var candidateErr *ModelCandidateUnavailableError
if errors.As(err, &candidateErr) && strings.TrimSpace(candidateErr.Code) != "" {
return candidateErr.Code
}
return "no_model_candidate"
}
type CreatePlatformModelInput struct {
PlatformID string `json:"platformId"`
BaseModelID string `json:"baseModelId"`
@ -81,6 +103,7 @@ type RuntimeModelCandidate struct {
}
type RateLimitReservation struct {
ReservationID string
ScopeType string
ScopeKey string
Metric string
@ -88,10 +111,12 @@ type RateLimitReservation struct {
Amount float64
WindowSeconds int
LeaseTTLSeconds int
WindowStart time.Time
}
type RateLimitResult struct {
LeaseIDs []string
LeaseIDs []string
Reservations []RateLimitReservation
}
type CreateTaskAttemptInput struct {

View File

@ -0,0 +1,23 @@
CREATE TABLE IF NOT EXISTS gateway_rate_limit_reservations (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
task_id uuid NOT NULL REFERENCES gateway_tasks(id) ON DELETE CASCADE,
attempt_id uuid REFERENCES gateway_task_attempts(id) ON DELETE SET NULL,
scope_type text NOT NULL,
scope_key text NOT NULL,
metric text NOT NULL,
window_start timestamptz NOT NULL,
limit_value numeric NOT NULL,
reserved_amount numeric NOT NULL DEFAULT 0,
actual_amount numeric NOT NULL DEFAULT 0,
status text NOT NULL DEFAULT 'reserved',
reason text,
created_at timestamptz NOT NULL DEFAULT now(),
updated_at timestamptz NOT NULL DEFAULT now(),
finalized_at timestamptz
);
CREATE INDEX IF NOT EXISTS idx_rate_limit_reservations_active
ON gateway_rate_limit_reservations(status, scope_type, scope_key, metric, window_start);
CREATE INDEX IF NOT EXISTS idx_rate_limit_reservations_task
ON gateway_rate_limit_reservations(task_id, attempt_id);

View File

@ -0,0 +1,5 @@
ALTER TABLE IF EXISTS platform_models
ADD COLUMN IF NOT EXISTS cooldown_until timestamptz;
CREATE INDEX IF NOT EXISTS idx_platform_models_cooldown
ON platform_models(cooldown_until);

View File

@ -18,6 +18,7 @@ import type {
GatewayWalletTransaction,
IntegrationPlatform,
ModelCatalogResponse,
ModelRateLimitStatus,
PlatformModel,
PricingRule,
PricingRuleSet,
@ -54,6 +55,7 @@ import {
listBaseModels,
listCatalogProviders,
listModelCatalog,
listModelRateLimitStatuses,
listModels,
listPlayableApiKeys,
listPlayableModels,
@ -140,6 +142,7 @@ type DataKey =
| 'runnerPolicy'
| 'runtimePolicySets'
| 'rateLimitWindows'
| 'modelRateLimits'
| 'tenants'
| 'users'
| 'userGroups'
@ -183,6 +186,7 @@ export function App() {
const [accessRules, setAccessRules] = useState<GatewayAccessRule[]>([]);
const [auditLogs, setAuditLogs] = useState<GatewayAuditLog[]>([]);
const [rateLimitWindows, setRateLimitWindows] = useState<RateLimitWindow[]>([]);
const [modelRateLimits, setModelRateLimits] = useState<ModelRateLimitStatus[]>([]);
const [tenants, setTenants] = useState<GatewayTenant[]>([]);
const [users, setUsers] = useState<GatewayUser[]>([]);
const [userGroups, setUserGroups] = useState<UserGroup[]>([]);
@ -239,6 +243,23 @@ export function App() {
useEffect(() => {
void ensureRouteData(token);
}, [activePage, adminSection, taskListRequestKey, transactionListRequestKey, workspaceSection, token]);
useEffect(() => {
if (!token || activePage !== 'admin' || adminSection !== 'platforms') return undefined;
const timer = window.setInterval(() => {
void Promise.all([listModelRateLimitStatuses(token), listPlatforms(token)])
.then(([rateLimitResponse, platformResponse]) => {
setModelRateLimits(rateLimitResponse.items);
setPlatforms(platformResponse.items);
loadedDataKeysRef.current.add('modelRateLimits');
loadedDataKeysRef.current.add('platforms');
})
.catch(() => {
loadedDataKeysRef.current.delete('modelRateLimits');
loadedDataKeysRef.current.delete('platforms');
});
}, 3000);
return () => window.clearInterval(timer);
}, [activePage, adminSection, token]);
useEffect(() => {
function handlePopState() {
applyRoute(parseAppRoute());
@ -278,9 +299,10 @@ export function App() {
platforms,
pricingRules,
pricingRuleSets,
providers,
rateLimitWindows,
runtimePolicySets,
providers,
rateLimitWindows,
modelRateLimits,
runtimePolicySets,
taskResult,
tasks,
tenants,
@ -288,7 +310,7 @@ export function App() {
users,
walletAccounts,
walletTransactions,
}), [accessRules, apiKeys, auditLogs, baseModels, modelCatalog, models, networkProxyConfig, platforms, pricingRuleSets, pricingRules, providers, rateLimitWindows, runnerPolicy, runtimePolicySets, taskResult, tasks, tenants, userGroups, users, walletAccounts, walletTransactions]);
}), [accessRules, apiKeys, auditLogs, baseModels, modelCatalog, modelRateLimits, models, networkProxyConfig, platforms, pricingRuleSets, pricingRules, providers, rateLimitWindows, runnerPolicy, runtimePolicySets, taskResult, tasks, tenants, userGroups, users, walletAccounts, walletTransactions]);
async function refresh(nextToken = token) {
await ensureRouteData(nextToken, true);
@ -392,6 +414,9 @@ export function App() {
case 'rateLimitWindows':
setRateLimitWindows((await listRateLimitWindows(nextToken)).items);
return;
case 'modelRateLimits':
setModelRateLimits((await listModelRateLimitStatuses(nextToken)).items);
return;
case 'tenants':
setTenants((await listTenants(nextToken)).items);
return;
@ -514,7 +539,7 @@ export function App() {
const modelsResponse = await replacePlatformModels(token, platform.id, modelBindings);
setPlatforms((current) => [platformForState, ...current.filter((item) => item.id !== platform.id)]);
setModels((current) => [...current.filter((model) => model.platformId !== platform.id), ...modelsResponse.items]);
invalidateDataKeys('modelCatalog');
invalidateDataKeys('modelCatalog', 'modelRateLimits');
setCoreState('ready');
setCoreMessage(input.platformId
? `平台已更新,当前绑定 ${input.models.length} 个模型。`
@ -533,7 +558,7 @@ export function App() {
await deletePlatform(token, platformId);
setPlatforms((current) => current.filter((item) => item.id !== platformId));
setModels((current) => current.filter((item) => item.platformId !== platformId));
invalidateDataKeys('modelCatalog');
invalidateDataKeys('modelCatalog', 'modelRateLimits');
setCoreState('ready');
setCoreMessage('平台已删除。');
} catch (err) {
@ -788,6 +813,7 @@ export function App() {
setAccessRules([]);
setAuditLogs([]);
setRateLimitWindows([]);
setModelRateLimits([]);
setTenants([]);
setUsers([]);
setUserGroups([]);
@ -1132,7 +1158,7 @@ function dataKeysForRoute(
if (activePage !== 'admin') return [];
switch (adminSection) {
case 'overview':
return ['platforms', 'models', 'providers', 'pricingRules', 'runtimePolicySets', 'rateLimitWindows', 'tenants', 'users', 'userGroups', 'accessRules'];
return ['platforms', 'models', 'providers', 'pricingRules', 'runtimePolicySets', 'rateLimitWindows', 'modelRateLimits', 'tenants', 'users', 'userGroups', 'accessRules'];
case 'globalModels':
return ['providers'];
case 'pricing':
@ -1142,7 +1168,7 @@ function dataKeysForRoute(
case 'baseModels':
return ['baseModels', 'providers', 'pricingRuleSets', 'runtimePolicySets'];
case 'platforms':
return ['platforms', 'models', 'providers', 'baseModels', 'pricingRuleSets', 'networkProxyConfig'];
return ['platforms', 'models', 'modelRateLimits', 'providers', 'baseModels', 'pricingRuleSets', 'networkProxyConfig'];
case 'tenants':
return ['tenants', 'userGroups'];
case 'users':

View File

@ -22,6 +22,7 @@ import type {
IntegrationPlatform,
ListResponse,
ModelCatalogResponse,
ModelRateLimitStatus,
PlatformModel,
PlayableGatewayApiKey,
PricingRule,
@ -696,6 +697,10 @@ export async function listRateLimitWindows(token: string): Promise<ListResponse<
return request<ListResponse<RateLimitWindow>>('/api/admin/runtime/rate-limit-windows', { token });
}
export async function listModelRateLimitStatuses(token: string): Promise<ListResponse<ModelRateLimitStatus>> {
return request<ListResponse<ModelRateLimitStatus>>('/api/admin/runtime/model-rate-limits', { token });
}
export async function getNetworkProxyConfig(token: string): Promise<GatewayNetworkProxyConfig> {
return request<GatewayNetworkProxyConfig>('/api/admin/config/network-proxy', { token });
}

View File

@ -13,6 +13,7 @@ import type {
GatewayWalletTransaction,
IntegrationPlatform,
ModelCatalogResponse,
ModelRateLimitStatus,
PlatformModel,
PricingRule,
PricingRuleSet,
@ -35,6 +36,7 @@ export interface ConsoleData {
pricingRuleSets: PricingRuleSet[];
providers: CatalogProvider[];
rateLimitWindows: RateLimitWindow[];
modelRateLimits: ModelRateLimitStatus[];
runtimePolicySets: RuntimePolicySet[];
taskResult: GatewayTask | null;
tasks: GatewayTask[];

View File

@ -137,8 +137,9 @@ export function AdminPage(props: {
<PlatformManagementPanel
baseModels={props.data.baseModels}
message={props.operationMessage}
networkProxyConfig={props.data.networkProxyConfig}
platformModels={props.data.models}
networkProxyConfig={props.data.networkProxyConfig}
modelRateLimits={props.data.modelRateLimits}
platformModels={props.data.models}
platforms={props.data.platforms}
pricingRuleSets={props.data.pricingRuleSets}
providers={props.data.providers}

View File

@ -1,6 +1,6 @@
import { useEffect, useMemo, useState, type FormEvent, type ReactNode } from 'react';
import { Boxes, CheckCircle2, Globe2, KeyRound, Pencil, Plus, RotateCcw, Search, ServerCog, ShieldCheck, SlidersHorizontal, Trash2, X } from 'lucide-react';
import type { BaseModelCatalogItem, CatalogProvider, IntegrationPlatform, PlatformModel, PricingRuleSet } from '@easyai-ai-gateway/contracts';
import { Boxes, CheckCircle2, Gauge, Globe2, KeyRound, Pencil, Plus, RotateCcw, Search, ServerCog, ShieldCheck, SlidersHorizontal, Trash2, X } from 'lucide-react';
import type { BaseModelCatalogItem, CatalogProvider, IntegrationPlatform, ModelRateLimitStatus, PlatformModel, PricingRuleSet } from '@easyai-ai-gateway/contracts';
import { Badge, Button, Card, CardContent, CardHeader, CardTitle, ConfirmDialog, EmptyState, FormDialog, Input, Label, ScreenMessage, Select, Table, TableCell, TableHead, TableRow } from '../../components/ui';
import type { LoadState, PlatformWithModelsInput } from '../../types';
import {
@ -22,8 +22,9 @@ import { ModelCatalogCard } from './ModelCatalogCard';
export function PlatformManagementPanel(props: {
baseModels: BaseModelCatalogItem[];
message: string;
networkProxyConfig: { globalHttpProxy?: string; globalHttpProxySet: boolean; globalHttpProxySource?: string } | null;
message: string;
modelRateLimits: ModelRateLimitStatus[];
networkProxyConfig: { globalHttpProxy?: string; globalHttpProxySet: boolean; globalHttpProxySource?: string } | null;
platforms: IntegrationPlatform[];
platformModels: PlatformModel[];
pricingRuleSets: PricingRuleSet[];
@ -33,8 +34,9 @@ export function PlatformManagementPanel(props: {
onSavePlatform: (input: PlatformWithModelsInput) => Promise<void>;
}) {
const defaultProvider = props.providers[0]?.providerKey ?? props.baseModels[0]?.providerKey ?? '';
const [now, setNow] = useState(() => Date.now());
const [dialogOpen, setDialogOpen] = useState(false);
const [viewMode, setViewMode] = useState<'platforms' | 'models'>('platforms');
const [viewMode, setViewMode] = useState<'platforms' | 'models' | 'limits'>('platforms');
const [modelQuery, setModelQuery] = useState('');
const [selectedPlatformId, setSelectedPlatformId] = useState('');
const [validationMessage, setValidationMessage] = useState('');
@ -72,6 +74,11 @@ export function PlatformManagementPanel(props: {
});
}, [modelQuery, platformMap, props.platformModels, selectedModelPlatformId]);
useEffect(() => {
const timer = window.setInterval(() => setNow(Date.now()), 1000);
return () => window.clearInterval(timer);
}, []);
function openCreateDialog() {
const provider = providerOptions[0] ?? '';
const providerName = providerDisplayName(provider, providerMap);
@ -166,12 +173,14 @@ export function PlatformManagementPanel(props: {
</Button>
</CardHeader>
<CardContent>
<div className="platformViewTabs">
<button type="button" data-active={viewMode === 'platforms'} onClick={() => setViewMode('platforms')}></button>
<button type="button" data-active={viewMode === 'models'} onClick={() => setViewMode('models')}></button>
</div>
{viewMode === 'platforms' ? (
<PlatformTable
<div className="platformViewTabs">
<button type="button" data-active={viewMode === 'platforms'} onClick={() => setViewMode('platforms')}></button>
<button type="button" data-active={viewMode === 'models'} onClick={() => setViewMode('models')}></button>
<button type="button" data-active={viewMode === 'limits'} onClick={() => setViewMode('limits')}></button>
</div>
{viewMode === 'platforms' ? (
<PlatformTable
now={now}
platformModelCount={platformModelCount}
platforms={props.platforms}
providerMap={providerMap}
@ -180,8 +189,8 @@ export function PlatformManagementPanel(props: {
onCreate={openCreateDialog}
onEdit={openEditDialog}
/>
) : (
<PlatformModelTable
) : viewMode === 'models' ? (
<PlatformModelTable
baseModels={props.baseModels}
modelQuery={modelQuery}
models={filteredPlatformModels}
@ -190,9 +199,12 @@ export function PlatformManagementPanel(props: {
platforms={props.platforms}
providerMap={providerMap}
onModelQueryChange={setModelQuery}
onPlatformChange={setSelectedPlatformId}
/>
)}
now={now}
onPlatformChange={setSelectedPlatformId}
/>
) : (
<RateLimitStatusTable statuses={props.modelRateLimits} platformMap={platformMap} now={now} />
)}
{props.message && <p className="formMessage">{props.message}</p>}
</CardContent>
</Card>
@ -391,6 +403,7 @@ function ModelBindingPolicy(props: { form: PlatformWizardForm; onChange: (value:
}
function PlatformTable(props: {
now: number;
platformModelCount: Map<string, number>;
platforms: IntegrationPlatform[];
providerMap: Map<string, CatalogProvider>;
@ -428,6 +441,7 @@ function PlatformTable(props: {
const pricing = platformPricingSummary(platform, props.pricingRuleSets);
const rateLimit = platformRateLimitSummary(platform.rateLimitPolicy);
const runtime = platformRuntimeSummary(platform);
const platformCooldownMs = cooldownRemainingMs(platform.cooldownUntil, props.now);
return (
<TableRow key={platform.id}>
<TableCell>
@ -458,8 +472,14 @@ function PlatformTable(props: {
<TableCell>{props.platformModelCount.get(platform.id) ?? 0}</TableCell>
<TableCell>
<span className="platformTableName">
<strong><Badge variant={platform.status === 'enabled' ? 'success' : 'secondary'}>{platform.status}</Badge></strong>
<small>{runtime}</small>
<strong>
{platformCooldownMs > 0 ? (
<Badge variant="warning"></Badge>
) : (
<Badge variant={platform.status === 'enabled' ? 'success' : 'secondary'}>{platform.status}</Badge>
)}
</strong>
<small>{platformCooldownMs > 0 ? `剩余 ${formatCooldownRemaining(platformCooldownMs)}` : runtime}</small>
</span>
</TableCell>
<TableCell>
@ -487,6 +507,7 @@ function PlatformModelTable(props: {
platformMap: Map<string, IntegrationPlatform>;
platforms: IntegrationPlatform[];
providerMap: Map<string, CatalogProvider>;
now: number;
onModelQueryChange: (value: string) => void;
onPlatformChange: (value: string) => void;
}) {
@ -520,12 +541,14 @@ function PlatformModelTable(props: {
const provider = platform ? props.providerMap.get(platform.provider) : undefined;
const baseModel = findBaseModelForPlatformModel(platform, props.baseModels, model);
const modelIconPath = readPlatformModelIconPath(model, baseModel);
const modelCooldownMs = cooldownRemainingMs(model.cooldownUntil, props.now);
return (
<ModelCatalogCard
key={model.id}
badges={[
<Badge variant="outline">{model.modelType.join(', ')}</Badge>,
<Badge variant={model.enabled ? 'success' : 'secondary'}>{model.enabled ? 'enabled' : 'disabled'}</Badge>,
<Badge key="type" variant="outline">{model.modelType.join(', ')}</Badge>,
...(modelCooldownMs > 0 ? [<Badge key="cooldown" variant="warning"> · {formatCooldownRemaining(modelCooldownMs)}</Badge>] : []),
<Badge key="enabled" variant={model.enabled ? 'success' : 'secondary'}>{model.enabled ? 'enabled' : 'disabled'}</Badge>,
]}
chips={platformModelChips(model)}
iconPath={modelIconPath || provider?.iconPath}
@ -545,6 +568,62 @@ function PlatformModelTable(props: {
<EmptyState title="没有匹配的模型" description="换个平台或搜索关键词试试。" />
)
)}
</section>
);
}
function RateLimitStatusTable(props: { statuses: ModelRateLimitStatus[]; platformMap: Map<string, IntegrationPlatform>; now: number }) {
if (!props.statuses.length) {
return <EmptyState title="暂无限流状态" description="模型产生请求后会在这里显示实时 RPM、TPM 和并发窗口。" />;
}
return (
<section className="platformLimitView">
<div className="platformLimitHeader">
<span><Gauge size={15} /></span>
<small> 3 TPM + </small>
</div>
<div className="platformLimitTableViewport">
<Table className="platformDataTable platformLimitTable">
<TableRow className="shTableHeader">
<TableHead></TableHead>
<TableHead></TableHead>
<TableHead></TableHead>
<TableHead>TPM</TableHead>
<TableHead>RPM</TableHead>
<TableHead></TableHead>
<TableHead></TableHead>
</TableRow>
{props.statuses.map((status) => {
const platform = props.platformMap.get(status.platformId);
return (
<TableRow key={status.platformModelId}>
<TableCell>
<span className="platformTableName">
<strong>{status.displayName || status.modelAlias || status.modelName}</strong>
<small>{status.providerModelName || status.modelName}</small>
</span>
</TableCell>
<TableCell>
<span className="platformTableName">
<strong>{platform ? platformDisplayName(platform) : status.platformName}</strong>
<small>{status.provider}</small>
</span>
</TableCell>
<TableCell>{metricCell(status.concurrent)}</TableCell>
<TableCell>{metricCell(status.tpm, true)}</TableCell>
<TableCell>{metricCell(status.rpm)}</TableCell>
<TableCell>{modelRuntimeStatusCell(status, props.now)}</TableCell>
<TableCell>
<span className="rateLoadCell">
<strong>{formatPercent(status.loadRatio)}</strong>
<span className="rateLoadTrack"><i style={{ width: `${Math.min(status.loadRatio * 100, 100)}%` }} /></span>
</span>
</TableCell>
</TableRow>
);
})}
</Table>
</div>
</section>
);
}
@ -1112,7 +1191,63 @@ function rateLimitMetricText(metric: string) {
concurrent: '并发',
queue_size: '队列',
};
return labels[metric] ?? metric;
return labels[metric] ?? metric;
}
function metricCell(metric: ModelRateLimitStatus['rpm'], includeReserved = false) {
if (!metric.limited) return <span className="rateMetricCell"><strong>{formatLimit(metric.currentValue)} / </strong><small></small></span>;
return (
<span className="rateMetricCell">
<strong>{formatLimit(metric.currentValue)} / {formatLimit(metric.limitValue)}</strong>
<small>{includeReserved && metric.reservedValue > 0 ? `已用 ${formatLimit(metric.usedValue)} · 预占 ${formatLimit(metric.reservedValue)}` : `窗口 ${formatPercent(metric.ratio)}`}</small>
</span>
);
}
function modelRuntimeStatusCell(status: ModelRateLimitStatus, now: number) {
const modelCooldownMs = cooldownRemainingMs(status.modelCooldownUntil, now);
const platformCooldownMs = cooldownRemainingMs(status.platformCooldownUntil, now);
if (modelCooldownMs > 0) {
return (
<span className="platformTableName">
<strong><Badge variant="warning"></Badge></strong>
<small> {formatCooldownRemaining(modelCooldownMs)}</small>
</span>
);
}
if (platformCooldownMs > 0) {
return (
<span className="platformTableName">
<strong><Badge variant="warning"></Badge></strong>
<small> {formatCooldownRemaining(platformCooldownMs)}</small>
</span>
);
}
return (
<span className="platformTableName">
<strong><Badge variant={status.enabled ? 'success' : 'secondary'}>{status.enabled ? '可用' : '已停用'}</Badge></strong>
<small>{status.enabled ? '参与路由' : '不参与路由'}</small>
</span>
);
}
function cooldownRemainingMs(cooldownUntil: string | undefined, now: number) {
if (!cooldownUntil) return 0;
const until = Date.parse(cooldownUntil);
if (!Number.isFinite(until)) return 0;
return Math.max(until - now, 0);
}
function formatCooldownRemaining(milliseconds: number) {
const minutes = milliseconds / 60000;
if (minutes >= 1) return `${trimNumber(Math.ceil(minutes * 10) / 10)} 分钟`;
const seconds = Math.ceil(milliseconds / 1000);
return `${Math.max(seconds, 1)}`;
}
function formatPercent(value: number) {
if (!Number.isFinite(value) || value <= 0) return '0%';
return `${trimNumber(value * 100)}%`;
}
function platformRuntimeSummary(platform: IntegrationPlatform) {

View File

@ -964,6 +964,104 @@
gap: 12px;
}
.platformLimitView {
display: grid;
grid-template-rows: auto minmax(0, 1fr);
gap: 10px;
min-height: 0;
max-height: calc(100dvh - 220px);
overflow: hidden;
}
.platformLimitHeader {
display: flex;
align-items: center;
justify-content: space-between;
gap: 12px;
color: var(--muted-foreground);
font-size: var(--font-size-xs);
}
.platformLimitHeader span {
display: inline-flex;
align-items: center;
gap: 6px;
color: var(--text-strong);
font-weight: var(--font-weight-semibold);
}
.platformLimitTableViewport {
min-width: 0;
min-height: 0;
max-height: calc(100dvh - 270px);
overflow: auto;
border: 1px solid var(--border-subtle);
border-radius: 10px;
background: #fff;
overscroll-behavior: contain;
}
.platformLimitTable {
width: max-content;
min-width: 100%;
overflow: visible;
border: 0;
border-radius: 0;
}
.platformLimitTable .shTableHeader {
position: sticky;
top: 0;
z-index: 2;
}
.platformLimitTable .shTableRow {
grid-template-columns: clamp(150px, 16vw, 220px) minmax(148px, 1fr) minmax(104px, max-content) minmax(136px, max-content) minmax(122px, max-content) minmax(132px, max-content) minmax(128px, max-content);
min-width: 920px;
}
.platformLimitTable .shTableHead,
.platformLimitTable .shTableCell {
padding-right: 10px;
padding-left: 10px;
}
.rateMetricCell,
.rateLoadCell {
display: grid;
min-width: 0;
gap: 4px;
}
.rateMetricCell strong,
.rateLoadCell strong {
color: var(--text-strong);
font-size: var(--font-size-sm);
}
.rateMetricCell small {
overflow: hidden;
color: var(--muted-foreground);
font-size: var(--font-size-xs);
text-overflow: ellipsis;
white-space: nowrap;
}
.rateLoadTrack {
display: block;
height: 6px;
overflow: hidden;
border-radius: 999px;
background: #eef2f6;
}
.rateLoadTrack i {
display: block;
height: 100%;
border-radius: inherit;
background: #0f766e;
}
.platformModelToolbar {
display: grid;
grid-template-columns: minmax(220px, 0.6fr) minmax(260px, 1fr);

View File

@ -69,6 +69,7 @@ export interface IntegrationPlatform {
credentialsPreview?: Record<string, unknown>;
retryPolicy?: RateLimitPolicy | Record<string, unknown>;
rateLimitPolicy?: RateLimitPolicy;
cooldownUntil?: string;
createdAt: string;
updatedAt: string;
}
@ -644,6 +645,7 @@ export interface PlatformModel {
rateLimitPolicy?: RateLimitPolicy;
runtimePolicySetId?: string;
runtimePolicyOverride?: RuntimePolicyOverride;
cooldownUntil?: string;
enabled: boolean;
createdAt: string;
updatedAt: string;
@ -748,6 +750,36 @@ export interface RateLimitWindow {
updatedAt: string;
}
export interface RateLimitMetricStatus {
currentValue: number;
usedValue: number;
reservedValue: number;
limitValue: number;
limited: boolean;
ratio: number;
resetAt?: string;
}
export interface ModelRateLimitStatus {
platformModelId: string;
platformId: string;
platformName: string;
provider: string;
modelName: string;
providerModelName?: string;
modelAlias?: string;
displayName: string;
modelType: string[];
enabled: boolean;
rateLimitPolicy?: RateLimitPolicy | Record<string, unknown>;
platformCooldownUntil?: string;
modelCooldownUntil?: string;
concurrent: RateLimitMetricStatus;
rpm: RateLimitMetricStatus;
tpm: RateLimitMetricStatus;
loadRatio: number;
}
export interface GatewayNetworkProxyConfig {
globalHttpProxy?: string;
globalHttpProxySet: boolean;