feat: improve model rate limit tracking
This commit is contained in:
parent
05632172d0
commit
ba850a06c6
@ -30,6 +30,17 @@ func main() {
|
||||
os.Exit(1)
|
||||
}
|
||||
defer db.Close()
|
||||
if recovery, err := db.RecoverInterruptedRuntimeState(ctx); err != nil {
|
||||
logger.Error("recover interrupted runtime state failed", "error", err)
|
||||
os.Exit(1)
|
||||
} else if recovery.ReleasedConcurrencyLeases > 0 || recovery.ReleasedRateReservations > 0 || recovery.FailedAttempts > 0 || recovery.FailedTasks > 0 {
|
||||
logger.Warn("interrupted runtime state recovered",
|
||||
"releasedConcurrencyLeases", recovery.ReleasedConcurrencyLeases,
|
||||
"releasedRateReservations", recovery.ReleasedRateReservations,
|
||||
"failedAttempts", recovery.FailedAttempts,
|
||||
"failedTasks", recovery.FailedTasks,
|
||||
)
|
||||
}
|
||||
|
||||
server := &http.Server{
|
||||
Addr: cfg.HTTPAddr,
|
||||
|
||||
@ -665,6 +665,23 @@ WHERE reference_type = 'gateway_task'
|
||||
"runtimePolicySetId": rateLimitPolicySet.ID,
|
||||
"runtimePolicyOverride": map[string]any{},
|
||||
}, http.StatusCreated, &rateLimitPlatformModel)
|
||||
var rateLimitFailedTask struct {
|
||||
Task struct {
|
||||
Status string `json:"status"`
|
||||
ErrorCode string `json:"errorCode"`
|
||||
} `json:"task"`
|
||||
}
|
||||
doJSON(t, server.URL, http.MethodPost, "/api/v1/chat/completions", apiKeyResponse.Secret, map[string]any{
|
||||
"model": rateLimitedModel,
|
||||
"runMode": "simulation",
|
||||
"simulation": true,
|
||||
"simulationDurationMs": 5,
|
||||
"simulationProfile": "non_retryable_failure",
|
||||
"messages": []map[string]any{{"role": "user", "content": "failed first"}},
|
||||
}, http.StatusAccepted, &rateLimitFailedTask)
|
||||
if rateLimitFailedTask.Task.Status != "failed" || rateLimitFailedTask.Task.ErrorCode != "bad_request" {
|
||||
t.Fatalf("failed rate-limited task should fail before consuming rpm: %+v", rateLimitFailedTask.Task)
|
||||
}
|
||||
var rateLimitTaskOne struct {
|
||||
Task struct {
|
||||
Status string `json:"status"`
|
||||
@ -863,7 +880,9 @@ WHERE reference_type = 'gateway_task'
|
||||
{platformID: degradedPlatform.ID, runtimePolicySetID: degradePolicySet.ID},
|
||||
{platformID: degradeSuccessPlatform.ID},
|
||||
} {
|
||||
var platformModel map[string]any
|
||||
var platformModel struct {
|
||||
ID string `json:"id"`
|
||||
}
|
||||
doJSON(t, server.URL, http.MethodPost, "/api/admin/platforms/"+item.platformID+"/models", loginResponse.AccessToken, map[string]any{
|
||||
"canonicalModelKey": "openai:gpt-4o-mini",
|
||||
"modelName": degradeModel,
|
||||
@ -887,14 +906,23 @@ WHERE reference_type = 'gateway_task'
|
||||
"messages": []map[string]any{{"role": "user", "content": "degrade please"}},
|
||||
}, http.StatusAccepted, °radeTask)
|
||||
if degradeTask.Task.Status != "succeeded" {
|
||||
t.Fatalf("degrade task should fail over after cooling down failed platform: %+v", degradeTask.Task)
|
||||
t.Fatalf("degrade task should fail over after cooling down failed model: %+v", degradeTask.Task)
|
||||
}
|
||||
var cooledDown bool
|
||||
if err := testPool.QueryRow(ctx, `SELECT COALESCE(cooldown_until > now(), false) FROM integration_platforms WHERE id = $1::uuid`, degradedPlatform.ID).Scan(&cooledDown); err != nil {
|
||||
t.Fatalf("read degraded platform cooldown: %v", err)
|
||||
var cooledDownModel bool
|
||||
var cooledDownPlatform bool
|
||||
if err := testPool.QueryRow(ctx, `
|
||||
SELECT COALESCE(m.cooldown_until > now(), false), COALESCE(p.cooldown_until > now(), false)
|
||||
FROM platform_models m
|
||||
JOIN integration_platforms p ON p.id = m.platform_id
|
||||
WHERE m.platform_id = $1::uuid
|
||||
AND m.model_name = $2`, degradedPlatform.ID, degradeModel).Scan(&cooledDownModel, &cooledDownPlatform); err != nil {
|
||||
t.Fatalf("read degraded model cooldown: %v", err)
|
||||
}
|
||||
if !cooledDown {
|
||||
t.Fatal("degrade policy should set platform cooldown_until")
|
||||
if !cooledDownModel {
|
||||
t.Fatal("degrade policy should set platform model cooldown_until")
|
||||
}
|
||||
if cooledDownPlatform {
|
||||
t.Fatal("degrade policy should not cool down the entire platform")
|
||||
}
|
||||
|
||||
var autoDisablePolicySet struct {
|
||||
|
||||
@ -490,7 +490,7 @@ func (s *Server) estimatePricing(w http.ResponseWriter, r *http.Request) {
|
||||
estimate, err := s.runner.Estimate(r.Context(), kind, model, body, user)
|
||||
if err != nil {
|
||||
if errors.Is(err, store.ErrNoModelCandidate) {
|
||||
writeError(w, http.StatusNotFound, "no enabled platform model matches request")
|
||||
writeError(w, statusFromRunError(err), err.Error(), store.ModelCandidateErrorCode(err))
|
||||
return
|
||||
}
|
||||
s.logger.Error("estimate pricing failed", "error", err)
|
||||
@ -510,6 +510,16 @@ func (s *Server) listRateLimitWindows(w http.ResponseWriter, r *http.Request) {
|
||||
writeJSON(w, http.StatusOK, map[string]any{"items": items})
|
||||
}
|
||||
|
||||
func (s *Server) listModelRateLimitStatuses(w http.ResponseWriter, r *http.Request) {
|
||||
items, err := s.store.ListModelRateLimitStatuses(r.Context())
|
||||
if err != nil {
|
||||
s.logger.Error("list model rate limit statuses failed", "error", err)
|
||||
writeError(w, http.StatusInternalServerError, "list model rate limit statuses failed")
|
||||
return
|
||||
}
|
||||
writeJSON(w, http.StatusOK, map[string]any{"items": items})
|
||||
}
|
||||
|
||||
func (s *Server) createTask(kind string, compatible bool) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
user, ok := auth.UserFromContext(r.Context())
|
||||
@ -557,7 +567,7 @@ func (s *Server) createTask(kind string, compatible bool) http.Handler {
|
||||
if runErr != nil {
|
||||
status := statusFromRunError(runErr)
|
||||
errorPayload := map[string]any{
|
||||
"code": clients.ErrorCode(runErr),
|
||||
"code": runErrorCode(runErr),
|
||||
"message": runErr.Error(),
|
||||
"status": status,
|
||||
}
|
||||
@ -581,7 +591,7 @@ func (s *Server) createTask(kind string, compatible bool) http.Handler {
|
||||
}
|
||||
result, runErr := s.runner.Execute(r.Context(), task, user)
|
||||
if runErr != nil {
|
||||
writeError(w, statusFromRunError(runErr), runErr.Error(), clients.ErrorCode(runErr))
|
||||
writeError(w, statusFromRunError(runErr), runErr.Error(), runErrorCode(runErr))
|
||||
return
|
||||
}
|
||||
writeJSON(w, http.StatusOK, result.Output)
|
||||
@ -634,10 +644,14 @@ func scopeForTaskKind(kind string) string {
|
||||
|
||||
func statusFromRunError(err error) int {
|
||||
switch {
|
||||
case store.ModelCandidateErrorCode(err) == "platform_cooling_down" || store.ModelCandidateErrorCode(err) == "model_cooling_down":
|
||||
return http.StatusTooManyRequests
|
||||
case errors.Is(err, store.ErrNoModelCandidate):
|
||||
return http.StatusNotFound
|
||||
case errors.Is(err, store.ErrRateLimited):
|
||||
return http.StatusTooManyRequests
|
||||
case clients.ErrorCode(err) == "rate_limit":
|
||||
return http.StatusTooManyRequests
|
||||
case errors.Is(err, store.ErrInsufficientWalletBalance):
|
||||
return http.StatusPaymentRequired
|
||||
default:
|
||||
@ -645,6 +659,13 @@ func statusFromRunError(err error) int {
|
||||
}
|
||||
}
|
||||
|
||||
func runErrorCode(err error) string {
|
||||
if errors.Is(err, store.ErrNoModelCandidate) {
|
||||
return store.ModelCandidateErrorCode(err)
|
||||
}
|
||||
return clients.ErrorCode(err)
|
||||
}
|
||||
|
||||
func (s *Server) listTasks(w http.ResponseWriter, r *http.Request) {
|
||||
user, ok := auth.UserFromContext(r.Context())
|
||||
if !ok {
|
||||
|
||||
@ -107,6 +107,7 @@ func NewServer(cfg config.Config, db *store.Store, logger *slog.Logger) http.Han
|
||||
mux.Handle("GET /api/v1/models", server.auth.Require(auth.PermissionBasic, http.HandlerFunc(server.listPlayableModels)))
|
||||
mux.Handle("GET /api/v1/playground/models", server.auth.Require(auth.PermissionBasic, http.HandlerFunc(server.listPlayableModels)))
|
||||
mux.Handle("GET /api/admin/runtime/rate-limit-windows", server.requireAdmin(auth.PermissionPower, http.HandlerFunc(server.listRateLimitWindows)))
|
||||
mux.Handle("GET /api/admin/runtime/model-rate-limits", server.requireAdmin(auth.PermissionPower, http.HandlerFunc(server.listModelRateLimitStatuses)))
|
||||
mux.Handle("POST /api/v1/chat/completions", server.auth.Require(auth.PermissionBasic, server.createTask("chat.completions", false)))
|
||||
mux.Handle("POST /api/v1/responses", server.auth.Require(auth.PermissionBasic, server.createTask("responses", false)))
|
||||
mux.Handle("POST /api/v1/images/generations", server.auth.Require(auth.PermissionBasic, server.createTask("images.generations", false)))
|
||||
|
||||
@ -5,6 +5,7 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/easyai/easyai-ai-gateway/apps/api/internal/auth"
|
||||
"github.com/easyai/easyai-ai-gateway/apps/api/internal/clients"
|
||||
"github.com/easyai/easyai-ai-gateway/apps/api/internal/store"
|
||||
)
|
||||
|
||||
@ -108,3 +109,21 @@ func estimateRequestTokens(body map[string]any) int {
|
||||
}
|
||||
return len([]rune(text))/4 + 1
|
||||
}
|
||||
|
||||
func tokenUsageAmounts(usage clients.Usage) map[string]float64 {
|
||||
out := map[string]float64{}
|
||||
if usage.InputTokens > 0 {
|
||||
out["tpm_input"] = float64(usage.InputTokens)
|
||||
}
|
||||
if usage.OutputTokens > 0 {
|
||||
out["tpm_output"] = float64(usage.OutputTokens)
|
||||
}
|
||||
total := usage.TotalTokens
|
||||
if total <= 0 {
|
||||
total = usage.InputTokens + usage.OutputTokens
|
||||
}
|
||||
if total > 0 {
|
||||
out["tpm_total"] = float64(total)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
29
apps/api/internal/runner/limits_test.go
Normal file
29
apps/api/internal/runner/limits_test.go
Normal file
@ -0,0 +1,29 @@
|
||||
package runner
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/easyai/easyai-ai-gateway/apps/api/internal/clients"
|
||||
)
|
||||
|
||||
func TestTokenUsageAmountsUsesActualUsageForTPM(t *testing.T) {
|
||||
got := tokenUsageAmounts(clients.Usage{InputTokens: 12, OutputTokens: 8, TotalTokens: 21})
|
||||
|
||||
if got["tpm_input"] != 12 {
|
||||
t.Fatalf("expected input token amount 12, got %v", got["tpm_input"])
|
||||
}
|
||||
if got["tpm_output"] != 8 {
|
||||
t.Fatalf("expected output token amount 8, got %v", got["tpm_output"])
|
||||
}
|
||||
if got["tpm_total"] != 21 {
|
||||
t.Fatalf("expected total token amount 21, got %v", got["tpm_total"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestTokenUsageAmountsFallsBackToInputOutputTotal(t *testing.T) {
|
||||
got := tokenUsageAmounts(clients.Usage{InputTokens: 3, OutputTokens: 5})
|
||||
|
||||
if got["tpm_total"] != 8 {
|
||||
t.Fatalf("expected total token fallback 8, got %v", got["tpm_total"])
|
||||
}
|
||||
}
|
||||
@ -29,8 +29,8 @@ func (s *Service) applyCandidateFailurePolicies(ctx context.Context, taskID stri
|
||||
degradePolicy := effectiveRuntimePolicy(candidate.DegradePolicy, candidate.RuntimePolicyOverride, "degradePolicy")
|
||||
if failurePolicyMatches(degradePolicy, code, message) {
|
||||
cooldownSeconds := intFromPolicy(degradePolicy, "cooldownSeconds")
|
||||
if err := s.store.CooldownCandidatePlatform(ctx, candidate.PlatformID, cooldownSeconds); err == nil {
|
||||
_ = s.emit(ctx, taskID, "task.policy.degraded", "running", "degrade", 0.5, "candidate platform cooled down by failure policy", map[string]any{
|
||||
if err := s.store.CooldownCandidatePlatformModel(ctx, candidate.PlatformModelID, cooldownSeconds); err == nil {
|
||||
_ = s.emit(ctx, taskID, "task.policy.degraded", "running", "degrade", 0.5, "candidate model cooled down by failure policy", map[string]any{
|
||||
"platformId": candidate.PlatformID,
|
||||
"platformModelId": candidate.PlatformModelID,
|
||||
"cooldownSeconds": cooldownSeconds,
|
||||
@ -52,8 +52,8 @@ func (s *Service) applyFailoverAction(ctx context.Context, taskID string, candid
|
||||
}, decision.Match, decision.Info), simulated)
|
||||
}
|
||||
case "cooldown_and_next":
|
||||
if err := s.store.CooldownCandidatePlatform(ctx, candidate.PlatformID, decision.CooldownSeconds); err == nil {
|
||||
_ = s.emit(ctx, taskID, "task.policy.failover_cooled_down", "running", "failover", 0.51, "candidate platform cooled down by runner failover policy", addPolicyTracePayload(map[string]any{
|
||||
if err := s.store.CooldownCandidatePlatformModel(ctx, candidate.PlatformModelID, decision.CooldownSeconds); err == nil {
|
||||
_ = s.emit(ctx, taskID, "task.policy.failover_cooled_down", "running", "failover", 0.51, "candidate model cooled down by runner failover policy", addPolicyTracePayload(map[string]any{
|
||||
"platformId": candidate.PlatformID,
|
||||
"platformModelId": candidate.PlatformModelID,
|
||||
"cooldownSeconds": decision.CooldownSeconds,
|
||||
|
||||
@ -64,7 +64,7 @@ func (s *Service) execute(ctx context.Context, task store.GatewayTask, user *aut
|
||||
}
|
||||
candidates, err := s.store.ListModelCandidates(ctx, task.Model, modelType, user)
|
||||
if err != nil {
|
||||
failed, finishErr := s.failTask(ctx, task.ID, "no_model_candidate", err.Error(), task.RunMode == "simulation", err)
|
||||
failed, finishErr := s.failTask(ctx, task.ID, store.ModelCandidateErrorCode(err), err.Error(), task.RunMode == "simulation", err)
|
||||
if finishErr != nil {
|
||||
return Result{}, finishErr
|
||||
}
|
||||
@ -279,7 +279,7 @@ func (s *Service) runCandidate(ctx context.Context, task store.GatewayTask, user
|
||||
return clients.Response{}, err
|
||||
}
|
||||
reservations := s.rateLimitReservations(ctx, user, candidate, body)
|
||||
limitResult, err := s.store.ReserveRateLimits(ctx, task.ID, reservations)
|
||||
limitResult, err := s.store.ReserveRateLimits(ctx, task.ID, attemptID, reservations)
|
||||
if err != nil {
|
||||
clientErr := &clients.ClientError{Code: "rate_limit", Message: err.Error(), Retryable: false}
|
||||
_ = s.store.FinishTaskAttempt(ctx, store.FinishTaskAttemptInput{
|
||||
@ -292,6 +292,12 @@ func (s *Service) runCandidate(ctx context.Context, task store.GatewayTask, user
|
||||
})
|
||||
return clients.Response{}, clientErr
|
||||
}
|
||||
rateReservationsFinalized := false
|
||||
defer func() {
|
||||
if !rateReservationsFinalized {
|
||||
_ = s.store.ReleaseRateLimitReservations(context.WithoutCancel(ctx), limitResult.Reservations, "attempt_failed")
|
||||
}
|
||||
}()
|
||||
defer s.store.ReleaseConcurrencyLeases(context.WithoutCancel(ctx), limitResult.LeaseIDs)
|
||||
|
||||
if err := s.store.RecordClientAssignment(ctx, candidate); err != nil {
|
||||
@ -397,6 +403,10 @@ func (s *Service) runCandidate(ctx context.Context, task store.GatewayTask, user
|
||||
return clients.Response{}, err
|
||||
}
|
||||
}
|
||||
if err := s.store.CommitRateLimitReservations(ctx, limitResult.Reservations, tokenUsageAmounts(response.Usage)); err != nil {
|
||||
return clients.Response{}, err
|
||||
}
|
||||
rateReservationsFinalized = true
|
||||
if err := s.store.FinishTaskAttempt(ctx, store.FinishTaskAttemptInput{
|
||||
AttemptID: attemptID,
|
||||
Status: "succeeded",
|
||||
|
||||
@ -3,6 +3,7 @@ package store
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/easyai/easyai-ai-gateway/apps/api/internal/auth"
|
||||
)
|
||||
@ -38,6 +39,7 @@ WHERE p.status = 'enabled'
|
||||
AND m.enabled = true
|
||||
AND m.model_type @> jsonb_build_array($2)
|
||||
AND (p.cooldown_until IS NULL OR p.cooldown_until <= now())
|
||||
AND (m.cooldown_until IS NULL OR m.cooldown_until <= now())
|
||||
AND (
|
||||
(COALESCE(m.model_alias, '') <> '' AND m.model_alias = $1)
|
||||
OR (
|
||||
@ -151,6 +153,11 @@ ORDER BY effective_priority ASC,
|
||||
return nil, err
|
||||
}
|
||||
if len(items) == 0 {
|
||||
if unavailableErr, err := s.modelCandidateCooldownError(ctx, model, modelType); err != nil {
|
||||
return nil, err
|
||||
} else if unavailableErr != nil {
|
||||
return nil, unavailableErr
|
||||
}
|
||||
return nil, ErrNoModelCandidate
|
||||
}
|
||||
items, err = s.filterCandidatesByAccessRules(ctx, user, items)
|
||||
@ -162,3 +169,89 @@ ORDER BY effective_priority ASC,
|
||||
}
|
||||
return items, nil
|
||||
}
|
||||
|
||||
func (s *Store) modelCandidateCooldownError(ctx context.Context, model string, modelType string) (error, error) {
|
||||
rows, err := s.pool.Query(ctx, `
|
||||
SELECT p.name,
|
||||
COALESCE(NULLIF(m.display_name, ''), NULLIF(m.model_alias, ''), m.model_name),
|
||||
COALESCE(to_char(p.cooldown_until AT TIME ZONE 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS.MS"Z"'), ''),
|
||||
GREATEST(COALESCE(EXTRACT(EPOCH FROM p.cooldown_until - now()), 0), 0)::float8,
|
||||
COALESCE(to_char(m.cooldown_until AT TIME ZONE 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS.MS"Z"'), ''),
|
||||
GREATEST(COALESCE(EXTRACT(EPOCH FROM m.cooldown_until - now()), 0), 0)::float8
|
||||
FROM platform_models m
|
||||
JOIN integration_platforms p ON p.id = m.platform_id
|
||||
LEFT JOIN base_model_catalog b ON b.id = m.base_model_id
|
||||
WHERE p.status = 'enabled'
|
||||
AND p.deleted_at IS NULL
|
||||
AND m.enabled = true
|
||||
AND m.model_type @> jsonb_build_array($2)
|
||||
AND (
|
||||
(COALESCE(m.model_alias, '') <> '' AND m.model_alias = $1)
|
||||
OR (
|
||||
COALESCE(m.model_alias, '') = ''
|
||||
AND (
|
||||
m.model_name = $1
|
||||
OR b.canonical_model_key = $1
|
||||
OR b.provider_model_name = $1
|
||||
)
|
||||
)
|
||||
)
|
||||
ORDER BY GREATEST(COALESCE(p.cooldown_until, to_timestamp(0)), COALESCE(m.cooldown_until, to_timestamp(0))) DESC,
|
||||
p.priority ASC,
|
||||
m.created_at ASC`, model, modelType)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
for rows.Next() {
|
||||
var platformName string
|
||||
var displayName string
|
||||
var platformCooldownUntil string
|
||||
var platformRemainingSeconds float64
|
||||
var modelCooldownUntil string
|
||||
var modelRemainingSeconds float64
|
||||
if err := rows.Scan(
|
||||
&platformName,
|
||||
&displayName,
|
||||
&platformCooldownUntil,
|
||||
&platformRemainingSeconds,
|
||||
&modelCooldownUntil,
|
||||
&modelRemainingSeconds,
|
||||
); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if modelRemainingSeconds > 0 {
|
||||
return &ModelCandidateUnavailableError{
|
||||
Code: "model_cooling_down",
|
||||
Message: cooldownErrorMessage("模型", displayName, modelRemainingSeconds, modelCooldownUntil),
|
||||
}, nil
|
||||
}
|
||||
if platformRemainingSeconds > 0 {
|
||||
return &ModelCandidateUnavailableError{
|
||||
Code: "platform_cooling_down",
|
||||
Message: cooldownErrorMessage("平台", platformName, platformRemainingSeconds, platformCooldownUntil),
|
||||
}, nil
|
||||
}
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func cooldownErrorMessage(scope string, name string, remainingSeconds float64, cooldownUntil string) string {
|
||||
name = strings.TrimSpace(name)
|
||||
if name == "" {
|
||||
name = "候选"
|
||||
}
|
||||
remainingMinutes := remainingSeconds / 60
|
||||
if remainingMinutes < 0.1 {
|
||||
remainingMinutes = 0.1
|
||||
}
|
||||
message := fmt.Sprintf("%s %s 冷却中,剩余 %.1f 分钟", scope, name, remainingMinutes)
|
||||
if strings.TrimSpace(cooldownUntil) != "" {
|
||||
message += ",预计恢复时间 " + cooldownUntil
|
||||
}
|
||||
return message
|
||||
}
|
||||
|
||||
@ -193,6 +193,7 @@ RETURNING id::text, platform_id::text, COALESCE(base_model_id::text, ''), model_
|
||||
capabilities, pricing_mode, COALESCE(discount_factor, 0)::float8,
|
||||
COALESCE(pricing_rule_set_id::text, ''), billing_config_override, billing_config,
|
||||
permission_config, retry_policy, rate_limit_policy, COALESCE(runtime_policy_set_id::text, ''), runtime_policy_override,
|
||||
COALESCE(to_char(cooldown_until AT TIME ZONE 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS.MS"Z"'), ''),
|
||||
enabled, created_at, updated_at`,
|
||||
input.PlatformID,
|
||||
baseID,
|
||||
@ -234,6 +235,7 @@ RETURNING id::text, platform_id::text, COALESCE(base_model_id::text, ''), model_
|
||||
&rateLimitPolicyBytes,
|
||||
&model.RuntimePolicySetID,
|
||||
&runtimePolicyOverrideBytes,
|
||||
&model.CooldownUntil,
|
||||
&model.Enabled,
|
||||
&model.CreatedAt,
|
||||
&model.UpdatedAt,
|
||||
|
||||
@ -70,6 +70,7 @@ type Platform struct {
|
||||
CredentialsPreview map[string]any `json:"credentialsPreview,omitempty"`
|
||||
RetryPolicy map[string]any `json:"retryPolicy,omitempty"`
|
||||
RateLimitPolicy map[string]any `json:"rateLimitPolicy,omitempty"`
|
||||
CooldownUntil string `json:"cooldownUntil,omitempty"`
|
||||
CreatedAt time.Time `json:"createdAt"`
|
||||
UpdatedAt time.Time `json:"updatedAt"`
|
||||
}
|
||||
@ -151,6 +152,7 @@ type PlatformModel struct {
|
||||
RateLimitPolicy map[string]any `json:"rateLimitPolicy,omitempty"`
|
||||
RuntimePolicySetID string `json:"runtimePolicySetId,omitempty"`
|
||||
RuntimePolicyOverride map[string]any `json:"runtimePolicyOverride,omitempty"`
|
||||
CooldownUntil string `json:"cooldownUntil,omitempty"`
|
||||
Enabled bool `json:"enabled"`
|
||||
CreatedAt time.Time `json:"createdAt"`
|
||||
UpdatedAt time.Time `json:"updatedAt"`
|
||||
@ -478,7 +480,9 @@ func (s *Store) ListPlatforms(ctx context.Context) ([]Platform, error) {
|
||||
rows, err := s.pool.Query(ctx, `
|
||||
SELECT id::text, provider, platform_key, name, COALESCE(internal_name, ''), COALESCE(base_url, ''), auth_type, status, priority,
|
||||
default_pricing_mode, default_discount_factor::float8, COALESCE(pricing_rule_set_id::text, ''),
|
||||
config, credentials, retry_policy, rate_limit_policy, created_at, updated_at
|
||||
config, credentials, retry_policy, rate_limit_policy,
|
||||
COALESCE(to_char(cooldown_until AT TIME ZONE 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS.MS"Z"'), ''),
|
||||
created_at, updated_at
|
||||
FROM integration_platforms
|
||||
ORDER BY priority ASC, created_at DESC`)
|
||||
if err != nil {
|
||||
@ -510,6 +514,7 @@ ORDER BY priority ASC, created_at DESC`)
|
||||
&credentialsBytes,
|
||||
&retryPolicyBytes,
|
||||
&rateLimitPolicyBytes,
|
||||
&platform.CooldownUntil,
|
||||
&platform.CreatedAt,
|
||||
&platform.UpdatedAt,
|
||||
); err != nil {
|
||||
@ -555,7 +560,9 @@ VALUES (
|
||||
)
|
||||
RETURNING id::text, provider, platform_key, name, COALESCE(internal_name, ''), COALESCE(base_url, ''), auth_type, status, priority,
|
||||
default_pricing_mode, default_discount_factor::float8, COALESCE(pricing_rule_set_id::text, ''),
|
||||
config, credentials, retry_policy, rate_limit_policy, created_at, updated_at`,
|
||||
config, credentials, retry_policy, rate_limit_policy,
|
||||
COALESCE(to_char(cooldown_until AT TIME ZONE 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS.MS"Z"'), ''),
|
||||
created_at, updated_at`,
|
||||
input.Provider, input.PlatformKey, input.Name, strings.TrimSpace(input.InternalName), input.BaseURL, input.AuthType, credentials, config,
|
||||
input.DefaultPricingMode, input.DefaultDiscountFactor, input.PricingRuleSetID, input.Priority,
|
||||
string(retryPolicy), string(rateLimitPolicy),
|
||||
@ -576,6 +583,7 @@ RETURNING id::text, provider, platform_key, name, COALESCE(internal_name, ''), C
|
||||
&credentialsResultBytes,
|
||||
&retryPolicyBytes,
|
||||
&rateLimitPolicyBytes,
|
||||
&platform.CooldownUntil,
|
||||
&platform.CreatedAt,
|
||||
&platform.UpdatedAt,
|
||||
)
|
||||
@ -636,7 +644,9 @@ SET provider = $2,
|
||||
WHERE id = $1::uuid
|
||||
RETURNING id::text, provider, platform_key, name, COALESCE(internal_name, ''), COALESCE(base_url, ''), auth_type, status, priority,
|
||||
default_pricing_mode, default_discount_factor::float8, COALESCE(pricing_rule_set_id::text, ''),
|
||||
config, credentials, retry_policy, rate_limit_policy, created_at, updated_at`,
|
||||
config, credentials, retry_policy, rate_limit_policy,
|
||||
COALESCE(to_char(cooldown_until AT TIME ZONE 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS.MS"Z"'), ''),
|
||||
created_at, updated_at`,
|
||||
id,
|
||||
input.Provider,
|
||||
input.PlatformKey,
|
||||
@ -669,6 +679,7 @@ RETURNING id::text, provider, platform_key, name, COALESCE(internal_name, ''), C
|
||||
&credentialsResultBytes,
|
||||
&retryPolicyBytes,
|
||||
&rateLimitPolicyBytes,
|
||||
&platform.CooldownUntil,
|
||||
&platform.CreatedAt,
|
||||
&platform.UpdatedAt,
|
||||
)
|
||||
@ -746,6 +757,7 @@ SELECT m.id::text, m.platform_id::text, COALESCE(m.base_model_id::text, ''), p.p
|
||||
m.capability_override, m.capabilities, m.pricing_mode, COALESCE(m.discount_factor, 0)::float8,
|
||||
COALESCE(m.pricing_rule_set_id::text, ''), m.billing_config_override, m.billing_config,
|
||||
m.permission_config, m.retry_policy, m.rate_limit_policy, COALESCE(m.runtime_policy_set_id::text, ''), m.runtime_policy_override,
|
||||
COALESCE(to_char(m.cooldown_until AT TIME ZONE 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS.MS"Z"'), ''),
|
||||
m.enabled, m.created_at, m.updated_at
|
||||
FROM platform_models m
|
||||
JOIN integration_platforms p ON p.id = m.platform_id
|
||||
@ -791,6 +803,7 @@ ORDER BY m.model_type ASC, m.model_name ASC`, args...)
|
||||
&rateLimitPolicy,
|
||||
&model.RuntimePolicySetID,
|
||||
&runtimePolicyOverride,
|
||||
&model.CooldownUntil,
|
||||
&model.Enabled,
|
||||
&model.CreatedAt,
|
||||
&model.UpdatedAt,
|
||||
|
||||
249
apps/api/internal/store/rate_limit_status.go
Normal file
249
apps/api/internal/store/rate_limit_status.go
Normal file
@ -0,0 +1,249 @@
|
||||
package store
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type RateLimitMetricStatus struct {
|
||||
CurrentValue float64 `json:"currentValue"`
|
||||
UsedValue float64 `json:"usedValue"`
|
||||
ReservedValue float64 `json:"reservedValue"`
|
||||
LimitValue float64 `json:"limitValue"`
|
||||
Limited bool `json:"limited"`
|
||||
Ratio float64 `json:"ratio"`
|
||||
ResetAt string `json:"resetAt,omitempty"`
|
||||
}
|
||||
|
||||
type ModelRateLimitStatus struct {
|
||||
PlatformModelID string `json:"platformModelId"`
|
||||
PlatformID string `json:"platformId"`
|
||||
PlatformName string `json:"platformName"`
|
||||
Provider string `json:"provider"`
|
||||
ModelName string `json:"modelName"`
|
||||
ProviderModelName string `json:"providerModelName,omitempty"`
|
||||
ModelAlias string `json:"modelAlias,omitempty"`
|
||||
DisplayName string `json:"displayName"`
|
||||
ModelType []string `json:"modelType"`
|
||||
Enabled bool `json:"enabled"`
|
||||
RateLimitPolicy map[string]any `json:"rateLimitPolicy,omitempty"`
|
||||
PlatformCooldownUntil string `json:"platformCooldownUntil,omitempty"`
|
||||
ModelCooldownUntil string `json:"modelCooldownUntil,omitempty"`
|
||||
Concurrent RateLimitMetricStatus `json:"concurrent"`
|
||||
RPM RateLimitMetricStatus `json:"rpm"`
|
||||
TPM RateLimitMetricStatus `json:"tpm"`
|
||||
LoadRatio float64 `json:"loadRatio"`
|
||||
}
|
||||
|
||||
func (s *Store) ListModelRateLimitStatuses(ctx context.Context) ([]ModelRateLimitStatus, error) {
|
||||
rows, err := s.pool.Query(ctx, `
|
||||
SELECT m.id::text, m.platform_id::text, p.name, p.provider,
|
||||
m.model_name, COALESCE(NULLIF(m.provider_model_name, ''), m.model_name), COALESCE(m.model_alias, ''),
|
||||
m.model_type, m.display_name, m.enabled,
|
||||
p.rate_limit_policy, COALESCE(rp.rate_limit_policy, '{}'::jsonb), COALESCE(NULLIF(m.runtime_policy_override, '{}'::jsonb), b.runtime_policy_override, '{}'::jsonb), m.rate_limit_policy,
|
||||
COALESCE(to_char(p.cooldown_until AT TIME ZONE 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS.MS"Z"'), ''),
|
||||
COALESCE(to_char(m.cooldown_until AT TIME ZONE 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS.MS"Z"'), ''),
|
||||
COALESCE(con.active, 0)::float8,
|
||||
COALESCE(rpm.used_value, 0)::float8, COALESCE(rpm.reserved_value, 0)::float8, COALESCE(rpm.reset_at::text, ''),
|
||||
COALESCE(tpm.used_value, 0)::float8, COALESCE(tpm.reserved_value, 0)::float8, COALESCE(tpm.reset_at::text, '')
|
||||
FROM platform_models m
|
||||
JOIN integration_platforms p ON p.id = m.platform_id
|
||||
LEFT JOIN base_model_catalog b ON b.id = m.base_model_id
|
||||
LEFT JOIN model_runtime_policy_sets rp ON rp.id = COALESCE(m.runtime_policy_set_id, b.runtime_policy_set_id)
|
||||
LEFT JOIN (
|
||||
SELECT scope_key, SUM(lease_value) AS active
|
||||
FROM gateway_concurrency_leases
|
||||
WHERE scope_type = 'platform_model'
|
||||
AND released_at IS NULL
|
||||
AND expires_at > now()
|
||||
GROUP BY scope_key
|
||||
) con ON con.scope_key = m.id::text
|
||||
LEFT JOIN (
|
||||
SELECT DISTINCT ON (scope_key) scope_key, used_value, reserved_value, reset_at
|
||||
FROM gateway_rate_limit_counters
|
||||
WHERE scope_type = 'platform_model'
|
||||
AND metric = 'rpm'
|
||||
AND reset_at > now()
|
||||
ORDER BY scope_key, window_start DESC
|
||||
) rpm ON rpm.scope_key = m.id::text
|
||||
LEFT JOIN (
|
||||
SELECT scope_key, SUM(used_value) AS used_value, SUM(reserved_value) AS reserved_value, MAX(reset_at) AS reset_at
|
||||
FROM gateway_rate_limit_counters
|
||||
WHERE scope_type = 'platform_model'
|
||||
AND metric LIKE 'tpm%'
|
||||
AND reset_at > now()
|
||||
GROUP BY scope_key
|
||||
) tpm ON tpm.scope_key = m.id::text
|
||||
WHERE p.deleted_at IS NULL
|
||||
ORDER BY p.priority ASC, m.model_name ASC`)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
items := make([]ModelRateLimitStatus, 0)
|
||||
for rows.Next() {
|
||||
var item ModelRateLimitStatus
|
||||
var modelTypeBytes []byte
|
||||
var platformPolicyBytes []byte
|
||||
var runtimePolicyBytes []byte
|
||||
var runtimeOverrideBytes []byte
|
||||
var modelPolicyBytes []byte
|
||||
var platformCooldownUntil string
|
||||
var modelCooldownUntil string
|
||||
var concurrentCurrent float64
|
||||
var rpmUsed float64
|
||||
var rpmReserved float64
|
||||
var rpmResetAt string
|
||||
var tpmUsed float64
|
||||
var tpmReserved float64
|
||||
var tpmResetAt string
|
||||
if err := rows.Scan(
|
||||
&item.PlatformModelID,
|
||||
&item.PlatformID,
|
||||
&item.PlatformName,
|
||||
&item.Provider,
|
||||
&item.ModelName,
|
||||
&item.ProviderModelName,
|
||||
&item.ModelAlias,
|
||||
&modelTypeBytes,
|
||||
&item.DisplayName,
|
||||
&item.Enabled,
|
||||
&platformPolicyBytes,
|
||||
&runtimePolicyBytes,
|
||||
&runtimeOverrideBytes,
|
||||
&modelPolicyBytes,
|
||||
&platformCooldownUntil,
|
||||
&modelCooldownUntil,
|
||||
&concurrentCurrent,
|
||||
&rpmUsed,
|
||||
&rpmReserved,
|
||||
&rpmResetAt,
|
||||
&tpmUsed,
|
||||
&tpmReserved,
|
||||
&tpmResetAt,
|
||||
); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
item.ModelType = decodeStringArray(modelTypeBytes)
|
||||
policy := effectiveModelRateLimitPolicy(
|
||||
decodeObject(platformPolicyBytes),
|
||||
decodeObject(runtimePolicyBytes),
|
||||
decodeObject(runtimeOverrideBytes),
|
||||
decodeObject(modelPolicyBytes),
|
||||
)
|
||||
item.PlatformCooldownUntil = platformCooldownUntil
|
||||
item.ModelCooldownUntil = modelCooldownUntil
|
||||
item.RateLimitPolicy = policy
|
||||
item.Concurrent = metricStatus(concurrentCurrent, concurrentCurrent, 0, rateLimitForMetric(policy, "concurrent"), "")
|
||||
item.RPM = metricStatus(rpmUsed+rpmReserved, rpmUsed, rpmReserved, rateLimitForMetric(policy, "rpm"), rpmResetAt)
|
||||
item.TPM = metricStatus(tpmUsed+tpmReserved, tpmUsed, tpmReserved, tpmLimit(policy), tpmResetAt)
|
||||
item.LoadRatio = maxFloat(item.Concurrent.Ratio, item.RPM.Ratio, item.TPM.Ratio)
|
||||
items = append(items, item)
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
sort.SliceStable(items, func(i, j int) bool {
|
||||
if items[i].LoadRatio == items[j].LoadRatio {
|
||||
return strings.ToLower(items[i].DisplayName) < strings.ToLower(items[j].DisplayName)
|
||||
}
|
||||
return items[i].LoadRatio > items[j].LoadRatio
|
||||
})
|
||||
return items, nil
|
||||
}
|
||||
|
||||
func effectiveModelRateLimitPolicy(platformPolicy map[string]any, runtimePolicy map[string]any, runtimeOverride map[string]any, modelPolicy map[string]any) map[string]any {
|
||||
policy := platformPolicy
|
||||
if hasRateLimitRules(runtimePolicy) {
|
||||
policy = shallowMergeMap(policy, runtimePolicy)
|
||||
}
|
||||
if nested, ok := runtimeOverride["rateLimitPolicy"].(map[string]any); ok && len(nested) > 0 {
|
||||
policy = shallowMergeMap(policy, nested)
|
||||
}
|
||||
if hasRateLimitRules(modelPolicy) {
|
||||
policy = shallowMergeMap(policy, modelPolicy)
|
||||
}
|
||||
if hasRateLimitRules(policy) {
|
||||
return policy
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func hasRateLimitRules(policy map[string]any) bool {
|
||||
rules, _ := policy["rules"].([]any)
|
||||
return len(rules) > 0
|
||||
}
|
||||
|
||||
func shallowMergeMap(base map[string]any, override map[string]any) map[string]any {
|
||||
out := map[string]any{}
|
||||
for key, value := range base {
|
||||
out[key] = value
|
||||
}
|
||||
for key, value := range override {
|
||||
out[key] = value
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func rateLimitForMetric(policy map[string]any, metric string) float64 {
|
||||
rules, _ := policy["rules"].([]any)
|
||||
for _, rawRule := range rules {
|
||||
rule, _ := rawRule.(map[string]any)
|
||||
if strings.TrimSpace(stringValue(rule["metric"])) == metric {
|
||||
return floatValue(rule["limit"])
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func tpmLimit(policy map[string]any) float64 {
|
||||
if limit := rateLimitForMetric(policy, "tpm_total"); limit > 0 {
|
||||
return limit
|
||||
}
|
||||
return rateLimitForMetric(policy, "tpm_input") + rateLimitForMetric(policy, "tpm_output")
|
||||
}
|
||||
|
||||
func metricStatus(current float64, used float64, reserved float64, limit float64, resetAt string) RateLimitMetricStatus {
|
||||
status := RateLimitMetricStatus{
|
||||
CurrentValue: current,
|
||||
UsedValue: used,
|
||||
ReservedValue: reserved,
|
||||
LimitValue: limit,
|
||||
Limited: limit > 0,
|
||||
ResetAt: resetAt,
|
||||
}
|
||||
if status.Limited {
|
||||
status.Ratio = current / limit
|
||||
}
|
||||
return status
|
||||
}
|
||||
|
||||
func maxFloat(values ...float64) float64 {
|
||||
out := 0.0
|
||||
for _, value := range values {
|
||||
if value > out {
|
||||
out = value
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func stringValue(value any) string {
|
||||
text, _ := value.(string)
|
||||
return strings.TrimSpace(text)
|
||||
}
|
||||
|
||||
func floatValue(value any) float64 {
|
||||
switch typed := value.(type) {
|
||||
case int:
|
||||
return float64(typed)
|
||||
case int64:
|
||||
return float64(typed)
|
||||
case float64:
|
||||
return typed
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
32
apps/api/internal/store/rate_limit_status_test.go
Normal file
32
apps/api/internal/store/rate_limit_status_test.go
Normal file
@ -0,0 +1,32 @@
|
||||
package store
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestEffectiveModelRateLimitPolicyTreatsModelRulesAsAuthoritative(t *testing.T) {
|
||||
policy := effectiveModelRateLimitPolicy(
|
||||
map[string]any{"rules": []any{
|
||||
map[string]any{"metric": "rpm", "limit": 500},
|
||||
map[string]any{"metric": "tpm_total", "limit": 100000},
|
||||
}},
|
||||
map[string]any{"rules": []any{
|
||||
map[string]any{"metric": "rpm", "limit": 120},
|
||||
map[string]any{"metric": "tpm_total", "limit": 240000},
|
||||
map[string]any{"metric": "concurrent", "limit": 6},
|
||||
}},
|
||||
map[string]any{},
|
||||
map[string]any{"rules": []any{
|
||||
map[string]any{"metric": "rpm", "limit": 30},
|
||||
map[string]any{"metric": "concurrent", "limit": 2},
|
||||
}},
|
||||
)
|
||||
|
||||
if got := rateLimitForMetric(policy, "rpm"); got != 30 {
|
||||
t.Fatalf("expected model rpm limit to win, got %v", got)
|
||||
}
|
||||
if got := rateLimitForMetric(policy, "concurrent"); got != 2 {
|
||||
t.Fatalf("expected model concurrent limit to win, got %v", got)
|
||||
}
|
||||
if got := rateLimitForMetric(policy, "tpm_total"); got != 0 {
|
||||
t.Fatalf("expected missing model tpm limit to mean unlimited, got %v", got)
|
||||
}
|
||||
}
|
||||
@ -3,9 +3,19 @@ package store
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"time"
|
||||
|
||||
"github.com/jackc/pgx/v5"
|
||||
)
|
||||
|
||||
func (s *Store) ReserveRateLimits(ctx context.Context, taskID string, reservations []RateLimitReservation) (RateLimitResult, error) {
|
||||
type RuntimeRecoveryResult struct {
|
||||
ReleasedConcurrencyLeases int64 `json:"releasedConcurrencyLeases"`
|
||||
ReleasedRateReservations int64 `json:"releasedRateReservations"`
|
||||
FailedAttempts int64 `json:"failedAttempts"`
|
||||
FailedTasks int64 `json:"failedTasks"`
|
||||
}
|
||||
|
||||
func (s *Store) ReserveRateLimits(ctx context.Context, taskID string, attemptID string, reservations []RateLimitReservation) (RateLimitResult, error) {
|
||||
tx, err := s.pool.Begin(ctx)
|
||||
if err != nil {
|
||||
return RateLimitResult{}, err
|
||||
@ -24,70 +34,122 @@ func (s *Store) ReserveRateLimits(ctx context.Context, taskID string, reservatio
|
||||
reservation.WindowSeconds = 60
|
||||
}
|
||||
if reservation.Metric == "concurrent" {
|
||||
if reservation.LeaseTTLSeconds <= 0 {
|
||||
reservation.LeaseTTLSeconds = 120
|
||||
leaseID, err := reserveConcurrencyLease(ctx, tx, taskID, attemptID, reservation)
|
||||
if err != nil {
|
||||
return RateLimitResult{}, err
|
||||
}
|
||||
var active float64
|
||||
if err := tx.QueryRow(ctx, `
|
||||
result.LeaseIDs = append(result.LeaseIDs, leaseID)
|
||||
continue
|
||||
}
|
||||
normalized, err := reserveCounterWindow(ctx, tx, taskID, attemptID, reservation)
|
||||
if err != nil {
|
||||
return RateLimitResult{}, err
|
||||
}
|
||||
result.Reservations = append(result.Reservations, normalized)
|
||||
}
|
||||
return result, tx.Commit(ctx)
|
||||
}
|
||||
|
||||
func reserveConcurrencyLease(ctx context.Context, tx pgx.Tx, taskID string, attemptID string, reservation RateLimitReservation) (string, error) {
|
||||
if reservation.LeaseTTLSeconds <= 0 {
|
||||
reservation.LeaseTTLSeconds = 120
|
||||
}
|
||||
var active float64
|
||||
if err := tx.QueryRow(ctx, `
|
||||
SELECT COALESCE(SUM(lease_value), 0)::float8
|
||||
FROM gateway_concurrency_leases
|
||||
WHERE scope_type = $1
|
||||
AND scope_key = $2
|
||||
AND released_at IS NULL
|
||||
AND expires_at > now()`,
|
||||
reservation.ScopeType,
|
||||
reservation.ScopeKey,
|
||||
).Scan(&active); err != nil {
|
||||
return RateLimitResult{}, err
|
||||
}
|
||||
if active+reservation.Amount > reservation.Limit {
|
||||
return RateLimitResult{}, ErrRateLimited
|
||||
}
|
||||
var leaseID string
|
||||
if err := tx.QueryRow(ctx, `
|
||||
INSERT INTO gateway_concurrency_leases (task_id, scope_type, scope_key, lease_value, expires_at)
|
||||
VALUES ($1::uuid, $2, $3, $4, now() + ($5::int * interval '1 second'))
|
||||
reservation.ScopeType,
|
||||
reservation.ScopeKey,
|
||||
).Scan(&active); err != nil {
|
||||
return "", err
|
||||
}
|
||||
if active+reservation.Amount > reservation.Limit {
|
||||
return "", ErrRateLimited
|
||||
}
|
||||
var leaseID string
|
||||
if err := tx.QueryRow(ctx, `
|
||||
INSERT INTO gateway_concurrency_leases (task_id, attempt_id, scope_type, scope_key, lease_value, expires_at)
|
||||
VALUES ($1::uuid, NULLIF($2, '')::uuid, $3, $4, $5, now() + ($6::int * interval '1 second'))
|
||||
RETURNING id::text`,
|
||||
taskID,
|
||||
reservation.ScopeType,
|
||||
reservation.ScopeKey,
|
||||
reservation.Amount,
|
||||
reservation.LeaseTTLSeconds,
|
||||
).Scan(&leaseID); err != nil {
|
||||
return RateLimitResult{}, err
|
||||
}
|
||||
result.LeaseIDs = append(result.LeaseIDs, leaseID)
|
||||
continue
|
||||
}
|
||||
tag, err := tx.Exec(ctx, `
|
||||
taskID,
|
||||
attemptID,
|
||||
reservation.ScopeType,
|
||||
reservation.ScopeKey,
|
||||
reservation.Amount,
|
||||
reservation.LeaseTTLSeconds,
|
||||
).Scan(&leaseID); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return leaseID, nil
|
||||
}
|
||||
|
||||
func reserveCounterWindow(ctx context.Context, tx pgx.Tx, taskID string, attemptID string, reservation RateLimitReservation) (RateLimitReservation, error) {
|
||||
usedAmount := 0.0
|
||||
reservedAmount := reservation.Amount
|
||||
var windowStart time.Time
|
||||
err := tx.QueryRow(ctx, `
|
||||
INSERT INTO gateway_rate_limit_counters (
|
||||
scope_type, scope_key, metric, window_start, limit_value, used_value, reserved_value, reset_at
|
||||
)
|
||||
VALUES (
|
||||
$1, $2, $3, date_trunc('minute', now()), $4, $5, 0,
|
||||
date_trunc('minute', now()) + ($6::int * interval '1 second')
|
||||
$1, $2, $3, date_trunc('minute', now()), $4, $5, $6,
|
||||
date_trunc('minute', now()) + ($7::int * interval '1 second')
|
||||
)
|
||||
ON CONFLICT (scope_type, scope_key, metric, window_start) DO UPDATE
|
||||
SET limit_value = EXCLUDED.limit_value,
|
||||
used_value = gateway_rate_limit_counters.used_value + EXCLUDED.used_value,
|
||||
reserved_value = gateway_rate_limit_counters.reserved_value + EXCLUDED.reserved_value,
|
||||
reset_at = EXCLUDED.reset_at,
|
||||
updated_at = now()
|
||||
WHERE gateway_rate_limit_counters.used_value + EXCLUDED.used_value <= EXCLUDED.limit_value`,
|
||||
reservation.ScopeType,
|
||||
reservation.ScopeKey,
|
||||
reservation.Metric,
|
||||
reservation.Limit,
|
||||
reservation.Amount,
|
||||
reservation.WindowSeconds,
|
||||
)
|
||||
if err != nil {
|
||||
return RateLimitResult{}, err
|
||||
}
|
||||
if tag.RowsAffected() == 0 {
|
||||
return RateLimitResult{}, ErrRateLimited
|
||||
WHERE gateway_rate_limit_counters.used_value + gateway_rate_limit_counters.reserved_value + EXCLUDED.used_value + EXCLUDED.reserved_value <= EXCLUDED.limit_value
|
||||
RETURNING window_start`,
|
||||
reservation.ScopeType,
|
||||
reservation.ScopeKey,
|
||||
reservation.Metric,
|
||||
reservation.Limit,
|
||||
usedAmount,
|
||||
reservedAmount,
|
||||
reservation.WindowSeconds,
|
||||
).Scan(&windowStart)
|
||||
if err != nil {
|
||||
if errors.Is(err, pgx.ErrNoRows) {
|
||||
return RateLimitReservation{}, ErrRateLimited
|
||||
}
|
||||
return RateLimitReservation{}, err
|
||||
}
|
||||
return result, tx.Commit(ctx)
|
||||
reservation.WindowStart = windowStart
|
||||
if err := tx.QueryRow(ctx, `
|
||||
INSERT INTO gateway_rate_limit_reservations (
|
||||
task_id, attempt_id, scope_type, scope_key, metric, window_start, limit_value, reserved_amount, status
|
||||
)
|
||||
VALUES (
|
||||
$1::uuid, NULLIF($2, '')::uuid, $3, $4, $5, $6, $7, $8, 'reserved'
|
||||
)
|
||||
RETURNING id::text`,
|
||||
taskID,
|
||||
attemptID,
|
||||
reservation.ScopeType,
|
||||
reservation.ScopeKey,
|
||||
reservation.Metric,
|
||||
windowStart,
|
||||
reservation.Limit,
|
||||
reservedAmount,
|
||||
).Scan(&reservation.ReservationID); err != nil {
|
||||
return RateLimitReservation{}, err
|
||||
}
|
||||
return reservation, nil
|
||||
}
|
||||
|
||||
func (s *Store) CommitRateLimitReservations(ctx context.Context, reservations []RateLimitReservation, actualByMetric map[string]float64) error {
|
||||
return s.finishRateLimitReservations(ctx, reservations, actualByMetric, "committed", "success")
|
||||
}
|
||||
|
||||
func (s *Store) ReleaseRateLimitReservations(ctx context.Context, reservations []RateLimitReservation, reason string) error {
|
||||
return s.finishRateLimitReservations(ctx, reservations, nil, "released", reason)
|
||||
}
|
||||
|
||||
func (s *Store) ReleaseConcurrencyLeases(ctx context.Context, leaseIDs []string) error {
|
||||
@ -107,3 +169,193 @@ WHERE id = $1::uuid AND released_at IS NULL`, leaseID); err != nil && !errors.Is
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Store) RecoverInterruptedRuntimeState(ctx context.Context) (RuntimeRecoveryResult, error) {
|
||||
tx, err := s.pool.Begin(ctx)
|
||||
if err != nil {
|
||||
return RuntimeRecoveryResult{}, err
|
||||
}
|
||||
defer tx.Rollback(ctx)
|
||||
|
||||
result := RuntimeRecoveryResult{}
|
||||
rows, err := tx.Query(ctx, `
|
||||
UPDATE gateway_rate_limit_reservations
|
||||
SET status = 'released',
|
||||
reason = 'server_restarted',
|
||||
finalized_at = now(),
|
||||
updated_at = now()
|
||||
WHERE status = 'reserved'
|
||||
RETURNING scope_type, scope_key, metric, window_start, reserved_amount::float8`)
|
||||
if err != nil {
|
||||
return RuntimeRecoveryResult{}, err
|
||||
}
|
||||
for rows.Next() {
|
||||
var reservation RateLimitReservation
|
||||
if err := rows.Scan(&reservation.ScopeType, &reservation.ScopeKey, &reservation.Metric, &reservation.WindowStart, &reservation.Amount); err != nil {
|
||||
rows.Close()
|
||||
return RuntimeRecoveryResult{}, err
|
||||
}
|
||||
if err := releaseCounterReservation(ctx, tx, reservation.ScopeType, reservation.ScopeKey, reservation.Metric, reservation.WindowStart, reservation.Amount); err != nil {
|
||||
rows.Close()
|
||||
return RuntimeRecoveryResult{}, err
|
||||
}
|
||||
result.ReleasedRateReservations++
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
rows.Close()
|
||||
return RuntimeRecoveryResult{}, err
|
||||
}
|
||||
rows.Close()
|
||||
|
||||
tag, err := tx.Exec(ctx, `
|
||||
UPDATE gateway_concurrency_leases
|
||||
SET released_at = now()
|
||||
WHERE released_at IS NULL
|
||||
AND expires_at > now()`)
|
||||
if err != nil {
|
||||
return RuntimeRecoveryResult{}, err
|
||||
}
|
||||
result.ReleasedConcurrencyLeases = tag.RowsAffected()
|
||||
|
||||
tag, err = tx.Exec(ctx, `
|
||||
UPDATE gateway_task_attempts
|
||||
SET status = 'failed',
|
||||
retryable = false,
|
||||
error_code = 'server_restarted',
|
||||
error_message = 'attempt interrupted by service restart',
|
||||
finished_at = now()
|
||||
WHERE status = 'running'`)
|
||||
if err != nil {
|
||||
return RuntimeRecoveryResult{}, err
|
||||
}
|
||||
result.FailedAttempts = tag.RowsAffected()
|
||||
|
||||
taskRows, err := tx.Query(ctx, `
|
||||
UPDATE gateway_tasks
|
||||
SET status = 'failed',
|
||||
error = 'task interrupted by service restart',
|
||||
error_code = 'server_restarted',
|
||||
error_message = 'task interrupted by service restart',
|
||||
finished_at = now(),
|
||||
updated_at = now()
|
||||
WHERE status IN ('queued', 'running')
|
||||
RETURNING id::text`)
|
||||
if err != nil {
|
||||
return RuntimeRecoveryResult{}, err
|
||||
}
|
||||
taskIDs := make([]string, 0)
|
||||
for taskRows.Next() {
|
||||
var taskID string
|
||||
if err := taskRows.Scan(&taskID); err != nil {
|
||||
taskRows.Close()
|
||||
return RuntimeRecoveryResult{}, err
|
||||
}
|
||||
taskIDs = append(taskIDs, taskID)
|
||||
}
|
||||
if err := taskRows.Err(); err != nil {
|
||||
taskRows.Close()
|
||||
return RuntimeRecoveryResult{}, err
|
||||
}
|
||||
taskRows.Close()
|
||||
|
||||
for _, taskID := range taskIDs {
|
||||
if _, err := tx.Exec(ctx, `
|
||||
INSERT INTO gateway_task_events (task_id, seq, event_type, status, phase, progress, message, payload, simulated)
|
||||
VALUES (
|
||||
$1::uuid,
|
||||
COALESCE((SELECT MAX(seq) + 1 FROM gateway_task_events WHERE task_id = $1::uuid), 1),
|
||||
'task.recovered',
|
||||
'failed',
|
||||
'recovered',
|
||||
1,
|
||||
'task interrupted by service restart',
|
||||
'{"code":"server_restarted"}'::jsonb,
|
||||
false
|
||||
)`, taskID); err != nil {
|
||||
return RuntimeRecoveryResult{}, err
|
||||
}
|
||||
}
|
||||
result.FailedTasks = int64(len(taskIDs))
|
||||
|
||||
return result, tx.Commit(ctx)
|
||||
}
|
||||
|
||||
func (s *Store) finishRateLimitReservations(ctx context.Context, reservations []RateLimitReservation, actualByMetric map[string]float64, status string, reason string) error {
|
||||
if len(reservations) == 0 {
|
||||
return nil
|
||||
}
|
||||
tx, err := s.pool.Begin(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer tx.Rollback(ctx)
|
||||
|
||||
for _, reservation := range reservations {
|
||||
if reservation.ReservationID == "" {
|
||||
continue
|
||||
}
|
||||
actualAmount := actualByMetric[reservation.Metric]
|
||||
if status == "committed" && actualAmount <= 0 {
|
||||
actualAmount = reservation.Amount
|
||||
}
|
||||
var stored RateLimitReservation
|
||||
err := tx.QueryRow(ctx, `
|
||||
UPDATE gateway_rate_limit_reservations
|
||||
SET status = $2,
|
||||
reason = NULLIF($3, ''),
|
||||
actual_amount = CASE WHEN $2 = 'committed' THEN $4 ELSE actual_amount END,
|
||||
finalized_at = now(),
|
||||
updated_at = now()
|
||||
WHERE id = $1::uuid
|
||||
AND status = 'reserved'
|
||||
RETURNING scope_type, scope_key, metric, window_start, reserved_amount::float8`,
|
||||
reservation.ReservationID,
|
||||
status,
|
||||
reason,
|
||||
actualAmount,
|
||||
).Scan(&stored.ScopeType, &stored.ScopeKey, &stored.Metric, &stored.WindowStart, &stored.Amount)
|
||||
if err != nil {
|
||||
if errors.Is(err, pgx.ErrNoRows) {
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
if status == "committed" {
|
||||
if err := commitCounterReservation(ctx, tx, stored.ScopeType, stored.ScopeKey, stored.Metric, stored.WindowStart, stored.Amount, actualAmount); err != nil {
|
||||
return err
|
||||
}
|
||||
continue
|
||||
}
|
||||
if err := releaseCounterReservation(ctx, tx, stored.ScopeType, stored.ScopeKey, stored.Metric, stored.WindowStart, stored.Amount); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return tx.Commit(ctx)
|
||||
}
|
||||
|
||||
func commitCounterReservation(ctx context.Context, tx pgx.Tx, scopeType string, scopeKey string, metric string, windowStart time.Time, reservedAmount float64, actualAmount float64) error {
|
||||
_, err := tx.Exec(ctx, `
|
||||
UPDATE gateway_rate_limit_counters
|
||||
SET reserved_value = GREATEST(reserved_value - $5, 0),
|
||||
used_value = used_value + $6,
|
||||
updated_at = now()
|
||||
WHERE scope_type = $1
|
||||
AND scope_key = $2
|
||||
AND metric = $3
|
||||
AND window_start = $4`,
|
||||
scopeType, scopeKey, metric, windowStart, reservedAmount, actualAmount)
|
||||
return err
|
||||
}
|
||||
|
||||
func releaseCounterReservation(ctx context.Context, tx pgx.Tx, scopeType string, scopeKey string, metric string, windowStart time.Time, reservedAmount float64) error {
|
||||
_, err := tx.Exec(ctx, `
|
||||
UPDATE gateway_rate_limit_counters
|
||||
SET reserved_value = GREATEST(reserved_value - $5, 0),
|
||||
updated_at = now()
|
||||
WHERE scope_type = $1
|
||||
AND scope_key = $2
|
||||
AND metric = $3
|
||||
AND window_start = $4`,
|
||||
scopeType, scopeKey, metric, windowStart, reservedAmount)
|
||||
return err
|
||||
}
|
||||
|
||||
@ -134,6 +134,21 @@ WHERE id = $1::uuid`, platformID, cooldownSeconds)
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *Store) CooldownCandidatePlatformModel(ctx context.Context, platformModelID string, cooldownSeconds int) error {
|
||||
if strings.TrimSpace(platformModelID) == "" {
|
||||
return nil
|
||||
}
|
||||
if cooldownSeconds <= 0 {
|
||||
cooldownSeconds = 300
|
||||
}
|
||||
_, err := s.pool.Exec(ctx, `
|
||||
UPDATE platform_models
|
||||
SET cooldown_until = now() + ($2::int * interval '1 second'),
|
||||
updated_at = now()
|
||||
WHERE id = $1::uuid`, platformModelID, cooldownSeconds)
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *Store) DemoteCandidatePlatformPriority(ctx context.Context, platformID string, demoteStep int) error {
|
||||
if strings.TrimSpace(platformID) == "" {
|
||||
return nil
|
||||
|
||||
@ -2,6 +2,7 @@ package store
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
@ -10,6 +11,27 @@ var (
|
||||
ErrRateLimited = errors.New("rate limit exceeded")
|
||||
)
|
||||
|
||||
type ModelCandidateUnavailableError struct {
|
||||
Code string
|
||||
Message string
|
||||
}
|
||||
|
||||
func (e *ModelCandidateUnavailableError) Error() string {
|
||||
return e.Message
|
||||
}
|
||||
|
||||
func (e *ModelCandidateUnavailableError) Unwrap() error {
|
||||
return ErrNoModelCandidate
|
||||
}
|
||||
|
||||
func ModelCandidateErrorCode(err error) string {
|
||||
var candidateErr *ModelCandidateUnavailableError
|
||||
if errors.As(err, &candidateErr) && strings.TrimSpace(candidateErr.Code) != "" {
|
||||
return candidateErr.Code
|
||||
}
|
||||
return "no_model_candidate"
|
||||
}
|
||||
|
||||
type CreatePlatformModelInput struct {
|
||||
PlatformID string `json:"platformId"`
|
||||
BaseModelID string `json:"baseModelId"`
|
||||
@ -81,6 +103,7 @@ type RuntimeModelCandidate struct {
|
||||
}
|
||||
|
||||
type RateLimitReservation struct {
|
||||
ReservationID string
|
||||
ScopeType string
|
||||
ScopeKey string
|
||||
Metric string
|
||||
@ -88,10 +111,12 @@ type RateLimitReservation struct {
|
||||
Amount float64
|
||||
WindowSeconds int
|
||||
LeaseTTLSeconds int
|
||||
WindowStart time.Time
|
||||
}
|
||||
|
||||
type RateLimitResult struct {
|
||||
LeaseIDs []string
|
||||
LeaseIDs []string
|
||||
Reservations []RateLimitReservation
|
||||
}
|
||||
|
||||
type CreateTaskAttemptInput struct {
|
||||
|
||||
23
apps/api/migrations/0029_rate_limit_reservations.sql
Normal file
23
apps/api/migrations/0029_rate_limit_reservations.sql
Normal file
@ -0,0 +1,23 @@
|
||||
CREATE TABLE IF NOT EXISTS gateway_rate_limit_reservations (
|
||||
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
task_id uuid NOT NULL REFERENCES gateway_tasks(id) ON DELETE CASCADE,
|
||||
attempt_id uuid REFERENCES gateway_task_attempts(id) ON DELETE SET NULL,
|
||||
scope_type text NOT NULL,
|
||||
scope_key text NOT NULL,
|
||||
metric text NOT NULL,
|
||||
window_start timestamptz NOT NULL,
|
||||
limit_value numeric NOT NULL,
|
||||
reserved_amount numeric NOT NULL DEFAULT 0,
|
||||
actual_amount numeric NOT NULL DEFAULT 0,
|
||||
status text NOT NULL DEFAULT 'reserved',
|
||||
reason text,
|
||||
created_at timestamptz NOT NULL DEFAULT now(),
|
||||
updated_at timestamptz NOT NULL DEFAULT now(),
|
||||
finalized_at timestamptz
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_rate_limit_reservations_active
|
||||
ON gateway_rate_limit_reservations(status, scope_type, scope_key, metric, window_start);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_rate_limit_reservations_task
|
||||
ON gateway_rate_limit_reservations(task_id, attempt_id);
|
||||
5
apps/api/migrations/0030_platform_model_cooldown.sql
Normal file
5
apps/api/migrations/0030_platform_model_cooldown.sql
Normal file
@ -0,0 +1,5 @@
|
||||
ALTER TABLE IF EXISTS platform_models
|
||||
ADD COLUMN IF NOT EXISTS cooldown_until timestamptz;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_platform_models_cooldown
|
||||
ON platform_models(cooldown_until);
|
||||
@ -18,6 +18,7 @@ import type {
|
||||
GatewayWalletTransaction,
|
||||
IntegrationPlatform,
|
||||
ModelCatalogResponse,
|
||||
ModelRateLimitStatus,
|
||||
PlatformModel,
|
||||
PricingRule,
|
||||
PricingRuleSet,
|
||||
@ -54,6 +55,7 @@ import {
|
||||
listBaseModels,
|
||||
listCatalogProviders,
|
||||
listModelCatalog,
|
||||
listModelRateLimitStatuses,
|
||||
listModels,
|
||||
listPlayableApiKeys,
|
||||
listPlayableModels,
|
||||
@ -140,6 +142,7 @@ type DataKey =
|
||||
| 'runnerPolicy'
|
||||
| 'runtimePolicySets'
|
||||
| 'rateLimitWindows'
|
||||
| 'modelRateLimits'
|
||||
| 'tenants'
|
||||
| 'users'
|
||||
| 'userGroups'
|
||||
@ -183,6 +186,7 @@ export function App() {
|
||||
const [accessRules, setAccessRules] = useState<GatewayAccessRule[]>([]);
|
||||
const [auditLogs, setAuditLogs] = useState<GatewayAuditLog[]>([]);
|
||||
const [rateLimitWindows, setRateLimitWindows] = useState<RateLimitWindow[]>([]);
|
||||
const [modelRateLimits, setModelRateLimits] = useState<ModelRateLimitStatus[]>([]);
|
||||
const [tenants, setTenants] = useState<GatewayTenant[]>([]);
|
||||
const [users, setUsers] = useState<GatewayUser[]>([]);
|
||||
const [userGroups, setUserGroups] = useState<UserGroup[]>([]);
|
||||
@ -239,6 +243,23 @@ export function App() {
|
||||
useEffect(() => {
|
||||
void ensureRouteData(token);
|
||||
}, [activePage, adminSection, taskListRequestKey, transactionListRequestKey, workspaceSection, token]);
|
||||
useEffect(() => {
|
||||
if (!token || activePage !== 'admin' || adminSection !== 'platforms') return undefined;
|
||||
const timer = window.setInterval(() => {
|
||||
void Promise.all([listModelRateLimitStatuses(token), listPlatforms(token)])
|
||||
.then(([rateLimitResponse, platformResponse]) => {
|
||||
setModelRateLimits(rateLimitResponse.items);
|
||||
setPlatforms(platformResponse.items);
|
||||
loadedDataKeysRef.current.add('modelRateLimits');
|
||||
loadedDataKeysRef.current.add('platforms');
|
||||
})
|
||||
.catch(() => {
|
||||
loadedDataKeysRef.current.delete('modelRateLimits');
|
||||
loadedDataKeysRef.current.delete('platforms');
|
||||
});
|
||||
}, 3000);
|
||||
return () => window.clearInterval(timer);
|
||||
}, [activePage, adminSection, token]);
|
||||
useEffect(() => {
|
||||
function handlePopState() {
|
||||
applyRoute(parseAppRoute());
|
||||
@ -278,9 +299,10 @@ export function App() {
|
||||
platforms,
|
||||
pricingRules,
|
||||
pricingRuleSets,
|
||||
providers,
|
||||
rateLimitWindows,
|
||||
runtimePolicySets,
|
||||
providers,
|
||||
rateLimitWindows,
|
||||
modelRateLimits,
|
||||
runtimePolicySets,
|
||||
taskResult,
|
||||
tasks,
|
||||
tenants,
|
||||
@ -288,7 +310,7 @@ export function App() {
|
||||
users,
|
||||
walletAccounts,
|
||||
walletTransactions,
|
||||
}), [accessRules, apiKeys, auditLogs, baseModels, modelCatalog, models, networkProxyConfig, platforms, pricingRuleSets, pricingRules, providers, rateLimitWindows, runnerPolicy, runtimePolicySets, taskResult, tasks, tenants, userGroups, users, walletAccounts, walletTransactions]);
|
||||
}), [accessRules, apiKeys, auditLogs, baseModels, modelCatalog, modelRateLimits, models, networkProxyConfig, platforms, pricingRuleSets, pricingRules, providers, rateLimitWindows, runnerPolicy, runtimePolicySets, taskResult, tasks, tenants, userGroups, users, walletAccounts, walletTransactions]);
|
||||
|
||||
async function refresh(nextToken = token) {
|
||||
await ensureRouteData(nextToken, true);
|
||||
@ -392,6 +414,9 @@ export function App() {
|
||||
case 'rateLimitWindows':
|
||||
setRateLimitWindows((await listRateLimitWindows(nextToken)).items);
|
||||
return;
|
||||
case 'modelRateLimits':
|
||||
setModelRateLimits((await listModelRateLimitStatuses(nextToken)).items);
|
||||
return;
|
||||
case 'tenants':
|
||||
setTenants((await listTenants(nextToken)).items);
|
||||
return;
|
||||
@ -514,7 +539,7 @@ export function App() {
|
||||
const modelsResponse = await replacePlatformModels(token, platform.id, modelBindings);
|
||||
setPlatforms((current) => [platformForState, ...current.filter((item) => item.id !== platform.id)]);
|
||||
setModels((current) => [...current.filter((model) => model.platformId !== platform.id), ...modelsResponse.items]);
|
||||
invalidateDataKeys('modelCatalog');
|
||||
invalidateDataKeys('modelCatalog', 'modelRateLimits');
|
||||
setCoreState('ready');
|
||||
setCoreMessage(input.platformId
|
||||
? `平台已更新,当前绑定 ${input.models.length} 个模型。`
|
||||
@ -533,7 +558,7 @@ export function App() {
|
||||
await deletePlatform(token, platformId);
|
||||
setPlatforms((current) => current.filter((item) => item.id !== platformId));
|
||||
setModels((current) => current.filter((item) => item.platformId !== platformId));
|
||||
invalidateDataKeys('modelCatalog');
|
||||
invalidateDataKeys('modelCatalog', 'modelRateLimits');
|
||||
setCoreState('ready');
|
||||
setCoreMessage('平台已删除。');
|
||||
} catch (err) {
|
||||
@ -788,6 +813,7 @@ export function App() {
|
||||
setAccessRules([]);
|
||||
setAuditLogs([]);
|
||||
setRateLimitWindows([]);
|
||||
setModelRateLimits([]);
|
||||
setTenants([]);
|
||||
setUsers([]);
|
||||
setUserGroups([]);
|
||||
@ -1132,7 +1158,7 @@ function dataKeysForRoute(
|
||||
if (activePage !== 'admin') return [];
|
||||
switch (adminSection) {
|
||||
case 'overview':
|
||||
return ['platforms', 'models', 'providers', 'pricingRules', 'runtimePolicySets', 'rateLimitWindows', 'tenants', 'users', 'userGroups', 'accessRules'];
|
||||
return ['platforms', 'models', 'providers', 'pricingRules', 'runtimePolicySets', 'rateLimitWindows', 'modelRateLimits', 'tenants', 'users', 'userGroups', 'accessRules'];
|
||||
case 'globalModels':
|
||||
return ['providers'];
|
||||
case 'pricing':
|
||||
@ -1142,7 +1168,7 @@ function dataKeysForRoute(
|
||||
case 'baseModels':
|
||||
return ['baseModels', 'providers', 'pricingRuleSets', 'runtimePolicySets'];
|
||||
case 'platforms':
|
||||
return ['platforms', 'models', 'providers', 'baseModels', 'pricingRuleSets', 'networkProxyConfig'];
|
||||
return ['platforms', 'models', 'modelRateLimits', 'providers', 'baseModels', 'pricingRuleSets', 'networkProxyConfig'];
|
||||
case 'tenants':
|
||||
return ['tenants', 'userGroups'];
|
||||
case 'users':
|
||||
|
||||
@ -22,6 +22,7 @@ import type {
|
||||
IntegrationPlatform,
|
||||
ListResponse,
|
||||
ModelCatalogResponse,
|
||||
ModelRateLimitStatus,
|
||||
PlatformModel,
|
||||
PlayableGatewayApiKey,
|
||||
PricingRule,
|
||||
@ -696,6 +697,10 @@ export async function listRateLimitWindows(token: string): Promise<ListResponse<
|
||||
return request<ListResponse<RateLimitWindow>>('/api/admin/runtime/rate-limit-windows', { token });
|
||||
}
|
||||
|
||||
export async function listModelRateLimitStatuses(token: string): Promise<ListResponse<ModelRateLimitStatus>> {
|
||||
return request<ListResponse<ModelRateLimitStatus>>('/api/admin/runtime/model-rate-limits', { token });
|
||||
}
|
||||
|
||||
export async function getNetworkProxyConfig(token: string): Promise<GatewayNetworkProxyConfig> {
|
||||
return request<GatewayNetworkProxyConfig>('/api/admin/config/network-proxy', { token });
|
||||
}
|
||||
|
||||
@ -13,6 +13,7 @@ import type {
|
||||
GatewayWalletTransaction,
|
||||
IntegrationPlatform,
|
||||
ModelCatalogResponse,
|
||||
ModelRateLimitStatus,
|
||||
PlatformModel,
|
||||
PricingRule,
|
||||
PricingRuleSet,
|
||||
@ -35,6 +36,7 @@ export interface ConsoleData {
|
||||
pricingRuleSets: PricingRuleSet[];
|
||||
providers: CatalogProvider[];
|
||||
rateLimitWindows: RateLimitWindow[];
|
||||
modelRateLimits: ModelRateLimitStatus[];
|
||||
runtimePolicySets: RuntimePolicySet[];
|
||||
taskResult: GatewayTask | null;
|
||||
tasks: GatewayTask[];
|
||||
|
||||
@ -137,8 +137,9 @@ export function AdminPage(props: {
|
||||
<PlatformManagementPanel
|
||||
baseModels={props.data.baseModels}
|
||||
message={props.operationMessage}
|
||||
networkProxyConfig={props.data.networkProxyConfig}
|
||||
platformModels={props.data.models}
|
||||
networkProxyConfig={props.data.networkProxyConfig}
|
||||
modelRateLimits={props.data.modelRateLimits}
|
||||
platformModels={props.data.models}
|
||||
platforms={props.data.platforms}
|
||||
pricingRuleSets={props.data.pricingRuleSets}
|
||||
providers={props.data.providers}
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import { useEffect, useMemo, useState, type FormEvent, type ReactNode } from 'react';
|
||||
import { Boxes, CheckCircle2, Globe2, KeyRound, Pencil, Plus, RotateCcw, Search, ServerCog, ShieldCheck, SlidersHorizontal, Trash2, X } from 'lucide-react';
|
||||
import type { BaseModelCatalogItem, CatalogProvider, IntegrationPlatform, PlatformModel, PricingRuleSet } from '@easyai-ai-gateway/contracts';
|
||||
import { Boxes, CheckCircle2, Gauge, Globe2, KeyRound, Pencil, Plus, RotateCcw, Search, ServerCog, ShieldCheck, SlidersHorizontal, Trash2, X } from 'lucide-react';
|
||||
import type { BaseModelCatalogItem, CatalogProvider, IntegrationPlatform, ModelRateLimitStatus, PlatformModel, PricingRuleSet } from '@easyai-ai-gateway/contracts';
|
||||
import { Badge, Button, Card, CardContent, CardHeader, CardTitle, ConfirmDialog, EmptyState, FormDialog, Input, Label, ScreenMessage, Select, Table, TableCell, TableHead, TableRow } from '../../components/ui';
|
||||
import type { LoadState, PlatformWithModelsInput } from '../../types';
|
||||
import {
|
||||
@ -22,8 +22,9 @@ import { ModelCatalogCard } from './ModelCatalogCard';
|
||||
|
||||
export function PlatformManagementPanel(props: {
|
||||
baseModels: BaseModelCatalogItem[];
|
||||
message: string;
|
||||
networkProxyConfig: { globalHttpProxy?: string; globalHttpProxySet: boolean; globalHttpProxySource?: string } | null;
|
||||
message: string;
|
||||
modelRateLimits: ModelRateLimitStatus[];
|
||||
networkProxyConfig: { globalHttpProxy?: string; globalHttpProxySet: boolean; globalHttpProxySource?: string } | null;
|
||||
platforms: IntegrationPlatform[];
|
||||
platformModels: PlatformModel[];
|
||||
pricingRuleSets: PricingRuleSet[];
|
||||
@ -33,8 +34,9 @@ export function PlatformManagementPanel(props: {
|
||||
onSavePlatform: (input: PlatformWithModelsInput) => Promise<void>;
|
||||
}) {
|
||||
const defaultProvider = props.providers[0]?.providerKey ?? props.baseModels[0]?.providerKey ?? '';
|
||||
const [now, setNow] = useState(() => Date.now());
|
||||
const [dialogOpen, setDialogOpen] = useState(false);
|
||||
const [viewMode, setViewMode] = useState<'platforms' | 'models'>('platforms');
|
||||
const [viewMode, setViewMode] = useState<'platforms' | 'models' | 'limits'>('platforms');
|
||||
const [modelQuery, setModelQuery] = useState('');
|
||||
const [selectedPlatformId, setSelectedPlatformId] = useState('');
|
||||
const [validationMessage, setValidationMessage] = useState('');
|
||||
@ -72,6 +74,11 @@ export function PlatformManagementPanel(props: {
|
||||
});
|
||||
}, [modelQuery, platformMap, props.platformModels, selectedModelPlatformId]);
|
||||
|
||||
useEffect(() => {
|
||||
const timer = window.setInterval(() => setNow(Date.now()), 1000);
|
||||
return () => window.clearInterval(timer);
|
||||
}, []);
|
||||
|
||||
function openCreateDialog() {
|
||||
const provider = providerOptions[0] ?? '';
|
||||
const providerName = providerDisplayName(provider, providerMap);
|
||||
@ -166,12 +173,14 @@ export function PlatformManagementPanel(props: {
|
||||
</Button>
|
||||
</CardHeader>
|
||||
<CardContent>
|
||||
<div className="platformViewTabs">
|
||||
<button type="button" data-active={viewMode === 'platforms'} onClick={() => setViewMode('platforms')}>平台视图</button>
|
||||
<button type="button" data-active={viewMode === 'models'} onClick={() => setViewMode('models')}>模型视图</button>
|
||||
</div>
|
||||
{viewMode === 'platforms' ? (
|
||||
<PlatformTable
|
||||
<div className="platformViewTabs">
|
||||
<button type="button" data-active={viewMode === 'platforms'} onClick={() => setViewMode('platforms')}>平台视图</button>
|
||||
<button type="button" data-active={viewMode === 'models'} onClick={() => setViewMode('models')}>模型视图</button>
|
||||
<button type="button" data-active={viewMode === 'limits'} onClick={() => setViewMode('limits')}>实时限流</button>
|
||||
</div>
|
||||
{viewMode === 'platforms' ? (
|
||||
<PlatformTable
|
||||
now={now}
|
||||
platformModelCount={platformModelCount}
|
||||
platforms={props.platforms}
|
||||
providerMap={providerMap}
|
||||
@ -180,8 +189,8 @@ export function PlatformManagementPanel(props: {
|
||||
onCreate={openCreateDialog}
|
||||
onEdit={openEditDialog}
|
||||
/>
|
||||
) : (
|
||||
<PlatformModelTable
|
||||
) : viewMode === 'models' ? (
|
||||
<PlatformModelTable
|
||||
baseModels={props.baseModels}
|
||||
modelQuery={modelQuery}
|
||||
models={filteredPlatformModels}
|
||||
@ -190,9 +199,12 @@ export function PlatformManagementPanel(props: {
|
||||
platforms={props.platforms}
|
||||
providerMap={providerMap}
|
||||
onModelQueryChange={setModelQuery}
|
||||
onPlatformChange={setSelectedPlatformId}
|
||||
/>
|
||||
)}
|
||||
now={now}
|
||||
onPlatformChange={setSelectedPlatformId}
|
||||
/>
|
||||
) : (
|
||||
<RateLimitStatusTable statuses={props.modelRateLimits} platformMap={platformMap} now={now} />
|
||||
)}
|
||||
{props.message && <p className="formMessage">{props.message}</p>}
|
||||
</CardContent>
|
||||
</Card>
|
||||
@ -391,6 +403,7 @@ function ModelBindingPolicy(props: { form: PlatformWizardForm; onChange: (value:
|
||||
}
|
||||
|
||||
function PlatformTable(props: {
|
||||
now: number;
|
||||
platformModelCount: Map<string, number>;
|
||||
platforms: IntegrationPlatform[];
|
||||
providerMap: Map<string, CatalogProvider>;
|
||||
@ -428,6 +441,7 @@ function PlatformTable(props: {
|
||||
const pricing = platformPricingSummary(platform, props.pricingRuleSets);
|
||||
const rateLimit = platformRateLimitSummary(platform.rateLimitPolicy);
|
||||
const runtime = platformRuntimeSummary(platform);
|
||||
const platformCooldownMs = cooldownRemainingMs(platform.cooldownUntil, props.now);
|
||||
return (
|
||||
<TableRow key={platform.id}>
|
||||
<TableCell>
|
||||
@ -458,8 +472,14 @@ function PlatformTable(props: {
|
||||
<TableCell>{props.platformModelCount.get(platform.id) ?? 0}</TableCell>
|
||||
<TableCell>
|
||||
<span className="platformTableName">
|
||||
<strong><Badge variant={platform.status === 'enabled' ? 'success' : 'secondary'}>{platform.status}</Badge></strong>
|
||||
<small>{runtime}</small>
|
||||
<strong>
|
||||
{platformCooldownMs > 0 ? (
|
||||
<Badge variant="warning">冷却中</Badge>
|
||||
) : (
|
||||
<Badge variant={platform.status === 'enabled' ? 'success' : 'secondary'}>{platform.status}</Badge>
|
||||
)}
|
||||
</strong>
|
||||
<small>{platformCooldownMs > 0 ? `剩余 ${formatCooldownRemaining(platformCooldownMs)}` : runtime}</small>
|
||||
</span>
|
||||
</TableCell>
|
||||
<TableCell>
|
||||
@ -487,6 +507,7 @@ function PlatformModelTable(props: {
|
||||
platformMap: Map<string, IntegrationPlatform>;
|
||||
platforms: IntegrationPlatform[];
|
||||
providerMap: Map<string, CatalogProvider>;
|
||||
now: number;
|
||||
onModelQueryChange: (value: string) => void;
|
||||
onPlatformChange: (value: string) => void;
|
||||
}) {
|
||||
@ -520,12 +541,14 @@ function PlatformModelTable(props: {
|
||||
const provider = platform ? props.providerMap.get(platform.provider) : undefined;
|
||||
const baseModel = findBaseModelForPlatformModel(platform, props.baseModels, model);
|
||||
const modelIconPath = readPlatformModelIconPath(model, baseModel);
|
||||
const modelCooldownMs = cooldownRemainingMs(model.cooldownUntil, props.now);
|
||||
return (
|
||||
<ModelCatalogCard
|
||||
key={model.id}
|
||||
badges={[
|
||||
<Badge variant="outline">{model.modelType.join(', ')}</Badge>,
|
||||
<Badge variant={model.enabled ? 'success' : 'secondary'}>{model.enabled ? 'enabled' : 'disabled'}</Badge>,
|
||||
<Badge key="type" variant="outline">{model.modelType.join(', ')}</Badge>,
|
||||
...(modelCooldownMs > 0 ? [<Badge key="cooldown" variant="warning">冷却中 · {formatCooldownRemaining(modelCooldownMs)}</Badge>] : []),
|
||||
<Badge key="enabled" variant={model.enabled ? 'success' : 'secondary'}>{model.enabled ? 'enabled' : 'disabled'}</Badge>,
|
||||
]}
|
||||
chips={platformModelChips(model)}
|
||||
iconPath={modelIconPath || provider?.iconPath}
|
||||
@ -545,6 +568,62 @@ function PlatformModelTable(props: {
|
||||
<EmptyState title="没有匹配的模型" description="换个平台或搜索关键词试试。" />
|
||||
)
|
||||
)}
|
||||
</section>
|
||||
);
|
||||
}
|
||||
|
||||
function RateLimitStatusTable(props: { statuses: ModelRateLimitStatus[]; platformMap: Map<string, IntegrationPlatform>; now: number }) {
|
||||
if (!props.statuses.length) {
|
||||
return <EmptyState title="暂无限流状态" description="模型产生请求后会在这里显示实时 RPM、TPM 和并发窗口。" />;
|
||||
}
|
||||
return (
|
||||
<section className="platformLimitView">
|
||||
<div className="platformLimitHeader">
|
||||
<span><Gauge size={15} />按综合满载率从高到低排序</span>
|
||||
<small>每 3 秒刷新一次;TPM 显示已结算 + 预占。</small>
|
||||
</div>
|
||||
<div className="platformLimitTableViewport">
|
||||
<Table className="platformDataTable platformLimitTable">
|
||||
<TableRow className="shTableHeader">
|
||||
<TableHead>模型</TableHead>
|
||||
<TableHead>平台</TableHead>
|
||||
<TableHead>并发</TableHead>
|
||||
<TableHead>TPM</TableHead>
|
||||
<TableHead>RPM</TableHead>
|
||||
<TableHead>状态</TableHead>
|
||||
<TableHead>满载率</TableHead>
|
||||
</TableRow>
|
||||
{props.statuses.map((status) => {
|
||||
const platform = props.platformMap.get(status.platformId);
|
||||
return (
|
||||
<TableRow key={status.platformModelId}>
|
||||
<TableCell>
|
||||
<span className="platformTableName">
|
||||
<strong>{status.displayName || status.modelAlias || status.modelName}</strong>
|
||||
<small>{status.providerModelName || status.modelName}</small>
|
||||
</span>
|
||||
</TableCell>
|
||||
<TableCell>
|
||||
<span className="platformTableName">
|
||||
<strong>{platform ? platformDisplayName(platform) : status.platformName}</strong>
|
||||
<small>{status.provider}</small>
|
||||
</span>
|
||||
</TableCell>
|
||||
<TableCell>{metricCell(status.concurrent)}</TableCell>
|
||||
<TableCell>{metricCell(status.tpm, true)}</TableCell>
|
||||
<TableCell>{metricCell(status.rpm)}</TableCell>
|
||||
<TableCell>{modelRuntimeStatusCell(status, props.now)}</TableCell>
|
||||
<TableCell>
|
||||
<span className="rateLoadCell">
|
||||
<strong>{formatPercent(status.loadRatio)}</strong>
|
||||
<span className="rateLoadTrack"><i style={{ width: `${Math.min(status.loadRatio * 100, 100)}%` }} /></span>
|
||||
</span>
|
||||
</TableCell>
|
||||
</TableRow>
|
||||
);
|
||||
})}
|
||||
</Table>
|
||||
</div>
|
||||
</section>
|
||||
);
|
||||
}
|
||||
@ -1112,7 +1191,63 @@ function rateLimitMetricText(metric: string) {
|
||||
concurrent: '并发',
|
||||
queue_size: '队列',
|
||||
};
|
||||
return labels[metric] ?? metric;
|
||||
return labels[metric] ?? metric;
|
||||
}
|
||||
|
||||
function metricCell(metric: ModelRateLimitStatus['rpm'], includeReserved = false) {
|
||||
if (!metric.limited) return <span className="rateMetricCell"><strong>{formatLimit(metric.currentValue)} / 不限</strong><small>未配置上限</small></span>;
|
||||
return (
|
||||
<span className="rateMetricCell">
|
||||
<strong>{formatLimit(metric.currentValue)} / {formatLimit(metric.limitValue)}</strong>
|
||||
<small>{includeReserved && metric.reservedValue > 0 ? `已用 ${formatLimit(metric.usedValue)} · 预占 ${formatLimit(metric.reservedValue)}` : `窗口 ${formatPercent(metric.ratio)}`}</small>
|
||||
</span>
|
||||
);
|
||||
}
|
||||
|
||||
function modelRuntimeStatusCell(status: ModelRateLimitStatus, now: number) {
|
||||
const modelCooldownMs = cooldownRemainingMs(status.modelCooldownUntil, now);
|
||||
const platformCooldownMs = cooldownRemainingMs(status.platformCooldownUntil, now);
|
||||
if (modelCooldownMs > 0) {
|
||||
return (
|
||||
<span className="platformTableName">
|
||||
<strong><Badge variant="warning">模型冷却中</Badge></strong>
|
||||
<small>剩余 {formatCooldownRemaining(modelCooldownMs)}</small>
|
||||
</span>
|
||||
);
|
||||
}
|
||||
if (platformCooldownMs > 0) {
|
||||
return (
|
||||
<span className="platformTableName">
|
||||
<strong><Badge variant="warning">平台冷却中</Badge></strong>
|
||||
<small>剩余 {formatCooldownRemaining(platformCooldownMs)}</small>
|
||||
</span>
|
||||
);
|
||||
}
|
||||
return (
|
||||
<span className="platformTableName">
|
||||
<strong><Badge variant={status.enabled ? 'success' : 'secondary'}>{status.enabled ? '可用' : '已停用'}</Badge></strong>
|
||||
<small>{status.enabled ? '参与路由' : '不参与路由'}</small>
|
||||
</span>
|
||||
);
|
||||
}
|
||||
|
||||
function cooldownRemainingMs(cooldownUntil: string | undefined, now: number) {
|
||||
if (!cooldownUntil) return 0;
|
||||
const until = Date.parse(cooldownUntil);
|
||||
if (!Number.isFinite(until)) return 0;
|
||||
return Math.max(until - now, 0);
|
||||
}
|
||||
|
||||
function formatCooldownRemaining(milliseconds: number) {
|
||||
const minutes = milliseconds / 60000;
|
||||
if (minutes >= 1) return `${trimNumber(Math.ceil(minutes * 10) / 10)} 分钟`;
|
||||
const seconds = Math.ceil(milliseconds / 1000);
|
||||
return `${Math.max(seconds, 1)} 秒`;
|
||||
}
|
||||
|
||||
function formatPercent(value: number) {
|
||||
if (!Number.isFinite(value) || value <= 0) return '0%';
|
||||
return `${trimNumber(value * 100)}%`;
|
||||
}
|
||||
|
||||
function platformRuntimeSummary(platform: IntegrationPlatform) {
|
||||
|
||||
@ -964,6 +964,104 @@
|
||||
gap: 12px;
|
||||
}
|
||||
|
||||
.platformLimitView {
|
||||
display: grid;
|
||||
grid-template-rows: auto minmax(0, 1fr);
|
||||
gap: 10px;
|
||||
min-height: 0;
|
||||
max-height: calc(100dvh - 220px);
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.platformLimitHeader {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
gap: 12px;
|
||||
color: var(--muted-foreground);
|
||||
font-size: var(--font-size-xs);
|
||||
}
|
||||
|
||||
.platformLimitHeader span {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: 6px;
|
||||
color: var(--text-strong);
|
||||
font-weight: var(--font-weight-semibold);
|
||||
}
|
||||
|
||||
.platformLimitTableViewport {
|
||||
min-width: 0;
|
||||
min-height: 0;
|
||||
max-height: calc(100dvh - 270px);
|
||||
overflow: auto;
|
||||
border: 1px solid var(--border-subtle);
|
||||
border-radius: 10px;
|
||||
background: #fff;
|
||||
overscroll-behavior: contain;
|
||||
}
|
||||
|
||||
.platformLimitTable {
|
||||
width: max-content;
|
||||
min-width: 100%;
|
||||
overflow: visible;
|
||||
border: 0;
|
||||
border-radius: 0;
|
||||
}
|
||||
|
||||
.platformLimitTable .shTableHeader {
|
||||
position: sticky;
|
||||
top: 0;
|
||||
z-index: 2;
|
||||
}
|
||||
|
||||
.platformLimitTable .shTableRow {
|
||||
grid-template-columns: clamp(150px, 16vw, 220px) minmax(148px, 1fr) minmax(104px, max-content) minmax(136px, max-content) minmax(122px, max-content) minmax(132px, max-content) minmax(128px, max-content);
|
||||
min-width: 920px;
|
||||
}
|
||||
|
||||
.platformLimitTable .shTableHead,
|
||||
.platformLimitTable .shTableCell {
|
||||
padding-right: 10px;
|
||||
padding-left: 10px;
|
||||
}
|
||||
|
||||
.rateMetricCell,
|
||||
.rateLoadCell {
|
||||
display: grid;
|
||||
min-width: 0;
|
||||
gap: 4px;
|
||||
}
|
||||
|
||||
.rateMetricCell strong,
|
||||
.rateLoadCell strong {
|
||||
color: var(--text-strong);
|
||||
font-size: var(--font-size-sm);
|
||||
}
|
||||
|
||||
.rateMetricCell small {
|
||||
overflow: hidden;
|
||||
color: var(--muted-foreground);
|
||||
font-size: var(--font-size-xs);
|
||||
text-overflow: ellipsis;
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
.rateLoadTrack {
|
||||
display: block;
|
||||
height: 6px;
|
||||
overflow: hidden;
|
||||
border-radius: 999px;
|
||||
background: #eef2f6;
|
||||
}
|
||||
|
||||
.rateLoadTrack i {
|
||||
display: block;
|
||||
height: 100%;
|
||||
border-radius: inherit;
|
||||
background: #0f766e;
|
||||
}
|
||||
|
||||
.platformModelToolbar {
|
||||
display: grid;
|
||||
grid-template-columns: minmax(220px, 0.6fr) minmax(260px, 1fr);
|
||||
|
||||
@ -69,6 +69,7 @@ export interface IntegrationPlatform {
|
||||
credentialsPreview?: Record<string, unknown>;
|
||||
retryPolicy?: RateLimitPolicy | Record<string, unknown>;
|
||||
rateLimitPolicy?: RateLimitPolicy;
|
||||
cooldownUntil?: string;
|
||||
createdAt: string;
|
||||
updatedAt: string;
|
||||
}
|
||||
@ -644,6 +645,7 @@ export interface PlatformModel {
|
||||
rateLimitPolicy?: RateLimitPolicy;
|
||||
runtimePolicySetId?: string;
|
||||
runtimePolicyOverride?: RuntimePolicyOverride;
|
||||
cooldownUntil?: string;
|
||||
enabled: boolean;
|
||||
createdAt: string;
|
||||
updatedAt: string;
|
||||
@ -748,6 +750,36 @@ export interface RateLimitWindow {
|
||||
updatedAt: string;
|
||||
}
|
||||
|
||||
export interface RateLimitMetricStatus {
|
||||
currentValue: number;
|
||||
usedValue: number;
|
||||
reservedValue: number;
|
||||
limitValue: number;
|
||||
limited: boolean;
|
||||
ratio: number;
|
||||
resetAt?: string;
|
||||
}
|
||||
|
||||
export interface ModelRateLimitStatus {
|
||||
platformModelId: string;
|
||||
platformId: string;
|
||||
platformName: string;
|
||||
provider: string;
|
||||
modelName: string;
|
||||
providerModelName?: string;
|
||||
modelAlias?: string;
|
||||
displayName: string;
|
||||
modelType: string[];
|
||||
enabled: boolean;
|
||||
rateLimitPolicy?: RateLimitPolicy | Record<string, unknown>;
|
||||
platformCooldownUntil?: string;
|
||||
modelCooldownUntil?: string;
|
||||
concurrent: RateLimitMetricStatus;
|
||||
rpm: RateLimitMetricStatus;
|
||||
tpm: RateLimitMetricStatus;
|
||||
loadRatio: number;
|
||||
}
|
||||
|
||||
export interface GatewayNetworkProxyConfig {
|
||||
globalHttpProxy?: string;
|
||||
globalHttpProxySet: boolean;
|
||||
|
||||
Loading…
Reference in New Issue
Block a user