easyai-ai-gateway/apps/api/internal/store/rate_limit_status.go

250 lines
8.1 KiB
Go

package store
import (
"context"
"sort"
"strings"
)
type RateLimitMetricStatus struct {
CurrentValue float64 `json:"currentValue"`
UsedValue float64 `json:"usedValue"`
ReservedValue float64 `json:"reservedValue"`
LimitValue float64 `json:"limitValue"`
Limited bool `json:"limited"`
Ratio float64 `json:"ratio"`
ResetAt string `json:"resetAt,omitempty"`
}
type ModelRateLimitStatus struct {
PlatformModelID string `json:"platformModelId"`
PlatformID string `json:"platformId"`
PlatformName string `json:"platformName"`
Provider string `json:"provider"`
ModelName string `json:"modelName"`
ProviderModelName string `json:"providerModelName,omitempty"`
ModelAlias string `json:"modelAlias,omitempty"`
DisplayName string `json:"displayName"`
ModelType []string `json:"modelType"`
Enabled bool `json:"enabled"`
RateLimitPolicy map[string]any `json:"rateLimitPolicy,omitempty"`
PlatformCooldownUntil string `json:"platformCooldownUntil,omitempty"`
ModelCooldownUntil string `json:"modelCooldownUntil,omitempty"`
Concurrent RateLimitMetricStatus `json:"concurrent"`
RPM RateLimitMetricStatus `json:"rpm"`
TPM RateLimitMetricStatus `json:"tpm"`
LoadRatio float64 `json:"loadRatio"`
}
func (s *Store) ListModelRateLimitStatuses(ctx context.Context) ([]ModelRateLimitStatus, error) {
rows, err := s.pool.Query(ctx, `
SELECT m.id::text, m.platform_id::text, p.name, p.provider,
m.model_name, COALESCE(NULLIF(m.provider_model_name, ''), m.model_name), COALESCE(m.model_alias, ''),
m.model_type, m.display_name, m.enabled,
p.rate_limit_policy, COALESCE(rp.rate_limit_policy, '{}'::jsonb), COALESCE(NULLIF(m.runtime_policy_override, '{}'::jsonb), b.runtime_policy_override, '{}'::jsonb), m.rate_limit_policy,
COALESCE(to_char(p.cooldown_until AT TIME ZONE 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS.MS"Z"'), ''),
COALESCE(to_char(m.cooldown_until AT TIME ZONE 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS.MS"Z"'), ''),
COALESCE(con.active, 0)::float8,
COALESCE(rpm.used_value, 0)::float8, COALESCE(rpm.reserved_value, 0)::float8, COALESCE(rpm.reset_at::text, ''),
COALESCE(tpm.used_value, 0)::float8, COALESCE(tpm.reserved_value, 0)::float8, COALESCE(tpm.reset_at::text, '')
FROM platform_models m
JOIN integration_platforms p ON p.id = m.platform_id
LEFT JOIN base_model_catalog b ON b.id = m.base_model_id
LEFT JOIN model_runtime_policy_sets rp ON rp.id = COALESCE(m.runtime_policy_set_id, b.runtime_policy_set_id)
LEFT JOIN (
SELECT scope_key, SUM(lease_value) AS active
FROM gateway_concurrency_leases
WHERE scope_type = 'platform_model'
AND released_at IS NULL
AND expires_at > now()
GROUP BY scope_key
) con ON con.scope_key = m.id::text
LEFT JOIN (
SELECT DISTINCT ON (scope_key) scope_key, used_value, reserved_value, reset_at
FROM gateway_rate_limit_counters
WHERE scope_type = 'platform_model'
AND metric = 'rpm'
AND reset_at > now()
ORDER BY scope_key, window_start DESC
) rpm ON rpm.scope_key = m.id::text
LEFT JOIN (
SELECT scope_key, SUM(used_value) AS used_value, SUM(reserved_value) AS reserved_value, MAX(reset_at) AS reset_at
FROM gateway_rate_limit_counters
WHERE scope_type = 'platform_model'
AND metric LIKE 'tpm%'
AND reset_at > now()
GROUP BY scope_key
) tpm ON tpm.scope_key = m.id::text
WHERE p.deleted_at IS NULL
ORDER BY p.priority ASC, m.model_name ASC`)
if err != nil {
return nil, err
}
defer rows.Close()
items := make([]ModelRateLimitStatus, 0)
for rows.Next() {
var item ModelRateLimitStatus
var modelTypeBytes []byte
var platformPolicyBytes []byte
var runtimePolicyBytes []byte
var runtimeOverrideBytes []byte
var modelPolicyBytes []byte
var platformCooldownUntil string
var modelCooldownUntil string
var concurrentCurrent float64
var rpmUsed float64
var rpmReserved float64
var rpmResetAt string
var tpmUsed float64
var tpmReserved float64
var tpmResetAt string
if err := rows.Scan(
&item.PlatformModelID,
&item.PlatformID,
&item.PlatformName,
&item.Provider,
&item.ModelName,
&item.ProviderModelName,
&item.ModelAlias,
&modelTypeBytes,
&item.DisplayName,
&item.Enabled,
&platformPolicyBytes,
&runtimePolicyBytes,
&runtimeOverrideBytes,
&modelPolicyBytes,
&platformCooldownUntil,
&modelCooldownUntil,
&concurrentCurrent,
&rpmUsed,
&rpmReserved,
&rpmResetAt,
&tpmUsed,
&tpmReserved,
&tpmResetAt,
); err != nil {
return nil, err
}
item.ModelType = decodeStringArray(modelTypeBytes)
policy := effectiveModelRateLimitPolicy(
decodeObject(platformPolicyBytes),
decodeObject(runtimePolicyBytes),
decodeObject(runtimeOverrideBytes),
decodeObject(modelPolicyBytes),
)
item.PlatformCooldownUntil = platformCooldownUntil
item.ModelCooldownUntil = modelCooldownUntil
item.RateLimitPolicy = policy
item.Concurrent = metricStatus(concurrentCurrent, concurrentCurrent, 0, rateLimitForMetric(policy, "concurrent"), "")
item.RPM = metricStatus(rpmUsed+rpmReserved, rpmUsed, rpmReserved, rateLimitForMetric(policy, "rpm"), rpmResetAt)
item.TPM = metricStatus(tpmUsed+tpmReserved, tpmUsed, tpmReserved, tpmLimit(policy), tpmResetAt)
item.LoadRatio = maxFloat(item.Concurrent.Ratio, item.RPM.Ratio, item.TPM.Ratio)
items = append(items, item)
}
if err := rows.Err(); err != nil {
return nil, err
}
sort.SliceStable(items, func(i, j int) bool {
if items[i].LoadRatio == items[j].LoadRatio {
return strings.ToLower(items[i].DisplayName) < strings.ToLower(items[j].DisplayName)
}
return items[i].LoadRatio > items[j].LoadRatio
})
return items, nil
}
func effectiveModelRateLimitPolicy(platformPolicy map[string]any, runtimePolicy map[string]any, runtimeOverride map[string]any, modelPolicy map[string]any) map[string]any {
policy := platformPolicy
if hasRateLimitRules(runtimePolicy) {
policy = shallowMergeMap(policy, runtimePolicy)
}
if nested, ok := runtimeOverride["rateLimitPolicy"].(map[string]any); ok && len(nested) > 0 {
policy = shallowMergeMap(policy, nested)
}
if hasRateLimitRules(modelPolicy) {
policy = shallowMergeMap(policy, modelPolicy)
}
if hasRateLimitRules(policy) {
return policy
}
return nil
}
func hasRateLimitRules(policy map[string]any) bool {
rules, _ := policy["rules"].([]any)
return len(rules) > 0
}
func shallowMergeMap(base map[string]any, override map[string]any) map[string]any {
out := map[string]any{}
for key, value := range base {
out[key] = value
}
for key, value := range override {
out[key] = value
}
return out
}
func rateLimitForMetric(policy map[string]any, metric string) float64 {
rules, _ := policy["rules"].([]any)
for _, rawRule := range rules {
rule, _ := rawRule.(map[string]any)
if strings.TrimSpace(stringValue(rule["metric"])) == metric {
return floatValue(rule["limit"])
}
}
return 0
}
func tpmLimit(policy map[string]any) float64 {
if limit := rateLimitForMetric(policy, "tpm_total"); limit > 0 {
return limit
}
return rateLimitForMetric(policy, "tpm_input") + rateLimitForMetric(policy, "tpm_output")
}
func metricStatus(current float64, used float64, reserved float64, limit float64, resetAt string) RateLimitMetricStatus {
status := RateLimitMetricStatus{
CurrentValue: current,
UsedValue: used,
ReservedValue: reserved,
LimitValue: limit,
Limited: limit > 0,
ResetAt: resetAt,
}
if status.Limited {
status.Ratio = current / limit
}
return status
}
func maxFloat(values ...float64) float64 {
out := 0.0
for _, value := range values {
if value > out {
out = value
}
}
return out
}
func stringValue(value any) string {
text, _ := value.(string)
return strings.TrimSpace(text)
}
func floatValue(value any) float64 {
switch typed := value.(type) {
case int:
return float64(typed)
case int64:
return float64(typed)
case float64:
return typed
default:
return 0
}
}