easyai-ai-gateway/apps/api/internal/store/rate_limit_status.go

268 lines
8.8 KiB
Go

package store
import (
"context"
"sort"
"strings"
)
type RateLimitMetricStatus struct {
CurrentValue float64 `json:"currentValue"`
UsedValue float64 `json:"usedValue"`
ReservedValue float64 `json:"reservedValue"`
LimitValue float64 `json:"limitValue"`
Limited bool `json:"limited"`
Ratio float64 `json:"ratio"`
ResetAt string `json:"resetAt,omitempty"`
}
type ModelRateLimitStatus struct {
PlatformModelID string `json:"platformModelId"`
PlatformID string `json:"platformId"`
PlatformName string `json:"platformName"`
Provider string `json:"provider"`
ModelName string `json:"modelName"`
ProviderModelName string `json:"providerModelName,omitempty"`
ModelAlias string `json:"modelAlias,omitempty"`
DisplayName string `json:"displayName"`
ModelType []string `json:"modelType"`
Enabled bool `json:"enabled"`
RateLimitPolicy map[string]any `json:"rateLimitPolicy,omitempty"`
PlatformCooldownUntil string `json:"platformCooldownUntil,omitempty"`
ModelCooldownUntil string `json:"modelCooldownUntil,omitempty"`
Concurrent RateLimitMetricStatus `json:"concurrent"`
QueuedTasks float64 `json:"queuedTasks"`
RPM RateLimitMetricStatus `json:"rpm"`
TPM RateLimitMetricStatus `json:"tpm"`
LoadRatio float64 `json:"loadRatio"`
}
func (s *Store) ListModelRateLimitStatuses(ctx context.Context) ([]ModelRateLimitStatus, error) {
rows, err := s.pool.Query(ctx, `
SELECT m.id::text, m.platform_id::text, p.name, p.provider,
m.model_name, COALESCE(NULLIF(m.provider_model_name, ''), m.model_name), COALESCE(m.model_alias, ''),
m.model_type, m.display_name, m.enabled,
p.rate_limit_policy, COALESCE(rp.rate_limit_policy, '{}'::jsonb), COALESCE(NULLIF(m.runtime_policy_override, '{}'::jsonb), b.runtime_policy_override, '{}'::jsonb), m.rate_limit_policy,
COALESCE(to_char(p.cooldown_until AT TIME ZONE 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS.MS"Z"'), ''),
COALESCE(to_char(m.cooldown_until AT TIME ZONE 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS.MS"Z"'), ''),
COALESCE(con.active, 0)::float8,
COALESCE(queued.waiting, 0)::float8,
COALESCE(rpm.used_value, 0)::float8, COALESCE(rpm.reserved_value, 0)::float8, COALESCE(rpm.reset_at::text, ''),
COALESCE(tpm.used_value, 0)::float8, COALESCE(tpm.reserved_value, 0)::float8, COALESCE(tpm.reset_at::text, '')
FROM platform_models m
JOIN integration_platforms p ON p.id = m.platform_id
LEFT JOIN base_model_catalog b ON b.id = m.base_model_id
LEFT JOIN model_runtime_policy_sets rp ON rp.id = COALESCE(m.runtime_policy_set_id, b.runtime_policy_set_id)
LEFT JOIN (
SELECT scope_key, SUM(lease_value) AS active
FROM gateway_concurrency_leases
WHERE scope_type = 'platform_model'
AND released_at IS NULL
AND expires_at > now()
GROUP BY scope_key
) con ON con.scope_key = m.id::text
LEFT JOIN (
SELECT latest.platform_model_id, COUNT(*) AS waiting
FROM (
SELECT DISTINCT ON (a.task_id) a.task_id, a.platform_model_id::text AS platform_model_id
FROM gateway_tasks t
JOIN gateway_task_attempts a ON a.task_id = t.id
WHERE t.async_mode = true
AND t.status = 'queued'
AND a.platform_model_id IS NOT NULL
ORDER BY a.task_id, a.attempt_no DESC, a.started_at DESC
) latest
GROUP BY latest.platform_model_id
) queued ON queued.platform_model_id = m.id::text
LEFT JOIN (
SELECT DISTINCT ON (scope_key) scope_key, used_value, reserved_value, reset_at
FROM gateway_rate_limit_counters
WHERE scope_type = 'platform_model'
AND metric = 'rpm'
AND reset_at > now()
ORDER BY scope_key, window_start DESC
) rpm ON rpm.scope_key = m.id::text
LEFT JOIN (
SELECT scope_key, SUM(used_value) AS used_value, SUM(reserved_value) AS reserved_value, MAX(reset_at) AS reset_at
FROM gateway_rate_limit_counters
WHERE scope_type = 'platform_model'
AND metric LIKE 'tpm%'
AND reset_at > now()
GROUP BY scope_key
) tpm ON tpm.scope_key = m.id::text
WHERE p.deleted_at IS NULL
ORDER BY p.priority ASC, m.model_name ASC`)
if err != nil {
return nil, err
}
defer rows.Close()
items := make([]ModelRateLimitStatus, 0)
for rows.Next() {
var item ModelRateLimitStatus
var modelTypeBytes []byte
var platformPolicyBytes []byte
var runtimePolicyBytes []byte
var runtimeOverrideBytes []byte
var modelPolicyBytes []byte
var platformCooldownUntil string
var modelCooldownUntil string
var concurrentCurrent float64
var queuedTasks float64
var rpmUsed float64
var rpmReserved float64
var rpmResetAt string
var tpmUsed float64
var tpmReserved float64
var tpmResetAt string
if err := rows.Scan(
&item.PlatformModelID,
&item.PlatformID,
&item.PlatformName,
&item.Provider,
&item.ModelName,
&item.ProviderModelName,
&item.ModelAlias,
&modelTypeBytes,
&item.DisplayName,
&item.Enabled,
&platformPolicyBytes,
&runtimePolicyBytes,
&runtimeOverrideBytes,
&modelPolicyBytes,
&platformCooldownUntil,
&modelCooldownUntil,
&concurrentCurrent,
&queuedTasks,
&rpmUsed,
&rpmReserved,
&rpmResetAt,
&tpmUsed,
&tpmReserved,
&tpmResetAt,
); err != nil {
return nil, err
}
item.ModelType = decodeStringArray(modelTypeBytes)
policy := effectiveModelRateLimitPolicy(
decodeObject(platformPolicyBytes),
decodeObject(runtimePolicyBytes),
decodeObject(runtimeOverrideBytes),
decodeObject(modelPolicyBytes),
)
item.PlatformCooldownUntil = platformCooldownUntil
item.ModelCooldownUntil = modelCooldownUntil
item.RateLimitPolicy = policy
item.QueuedTasks = queuedTasks
item.Concurrent = metricStatus(concurrentCurrent, concurrentCurrent, 0, rateLimitForMetric(policy, "concurrent"), "")
item.RPM = metricStatus(rpmUsed+rpmReserved, rpmUsed, rpmReserved, rateLimitForMetric(policy, "rpm"), rpmResetAt)
item.TPM = metricStatus(tpmUsed+tpmReserved, tpmUsed, tpmReserved, tpmLimit(policy), tpmResetAt)
item.LoadRatio = maxFloat(item.Concurrent.Ratio, item.RPM.Ratio, item.TPM.Ratio)
items = append(items, item)
}
if err := rows.Err(); err != nil {
return nil, err
}
sort.SliceStable(items, func(i, j int) bool {
if items[i].LoadRatio == items[j].LoadRatio {
return strings.ToLower(items[i].DisplayName) < strings.ToLower(items[j].DisplayName)
}
return items[i].LoadRatio > items[j].LoadRatio
})
return items, nil
}
func effectiveModelRateLimitPolicy(platformPolicy map[string]any, runtimePolicy map[string]any, runtimeOverride map[string]any, modelPolicy map[string]any) map[string]any {
policy := platformPolicy
if hasRateLimitRules(runtimePolicy) {
policy = shallowMergeMap(policy, runtimePolicy)
}
if nested, ok := runtimeOverride["rateLimitPolicy"].(map[string]any); ok && len(nested) > 0 {
policy = shallowMergeMap(policy, nested)
}
if hasRateLimitRules(modelPolicy) {
policy = shallowMergeMap(policy, modelPolicy)
}
if hasRateLimitRules(policy) {
return policy
}
return nil
}
func hasRateLimitRules(policy map[string]any) bool {
rules, _ := policy["rules"].([]any)
return len(rules) > 0
}
func shallowMergeMap(base map[string]any, override map[string]any) map[string]any {
out := map[string]any{}
for key, value := range base {
out[key] = value
}
for key, value := range override {
out[key] = value
}
return out
}
func rateLimitForMetric(policy map[string]any, metric string) float64 {
rules, _ := policy["rules"].([]any)
for _, rawRule := range rules {
rule, _ := rawRule.(map[string]any)
if strings.TrimSpace(stringValue(rule["metric"])) == metric {
return floatValue(rule["limit"])
}
}
return 0
}
func tpmLimit(policy map[string]any) float64 {
if limit := rateLimitForMetric(policy, "tpm_total"); limit > 0 {
return limit
}
return rateLimitForMetric(policy, "tpm_input") + rateLimitForMetric(policy, "tpm_output")
}
func metricStatus(current float64, used float64, reserved float64, limit float64, resetAt string) RateLimitMetricStatus {
status := RateLimitMetricStatus{
CurrentValue: current,
UsedValue: used,
ReservedValue: reserved,
LimitValue: limit,
Limited: limit > 0,
ResetAt: resetAt,
}
if status.Limited {
status.Ratio = current / limit
}
return status
}
func maxFloat(values ...float64) float64 {
out := 0.0
for _, value := range values {
if value > out {
out = value
}
}
return out
}
func stringValue(value any) string {
text, _ := value.(string)
return strings.TrimSpace(text)
}
func floatValue(value any) float64 {
switch typed := value.(type) {
case int:
return float64(typed)
case int64:
return float64(typed)
case float64:
return typed
default:
return 0
}
}