diff --git a/apps/api/internal/httpapi/model_catalog.go b/apps/api/internal/httpapi/model_catalog.go
index 23e3568..470b0bb 100644
--- a/apps/api/internal/httpapi/model_catalog.go
+++ b/apps/api/internal/httpapi/model_catalog.go
@@ -476,16 +476,17 @@ func discountTitle(label string, discount float64) string {
}
func effectiveModelRateLimits(model store.PlatformModel, platform store.Platform, runtimePolicyMap map[string]store.RuntimePolicySet) ModelCatalogRateLimits {
- overridePolicy := objectValue(model.RuntimePolicyOverride["rateLimitPolicy"])
+ overridePolicyRaw, hasOverridePolicy := model.RuntimePolicyOverride["rateLimitPolicy"]
+ overridePolicy := objectValue(overridePolicyRaw)
runtimePolicy := map[string]any(nil)
if model.RuntimePolicySetID != "" {
runtimePolicy = runtimePolicyMap[model.RuntimePolicySetID].RateLimitPolicy
}
- policies := []map[string]any{
- overridePolicy,
- model.RateLimitPolicy,
- runtimePolicy,
- platform.RateLimitPolicy,
+ policies := []rateLimitPolicySource{
+ {policy: overridePolicy, authoritative: hasOverridePolicy},
+ {policy: model.RateLimitPolicy, authoritative: len(model.RateLimitPolicy) > 0},
+ {policy: runtimePolicy, authoritative: strings.TrimSpace(model.RuntimePolicySetID) != ""},
+ {policy: platform.RateLimitPolicy},
}
limits := ModelCatalogRateLimits{
RPM: firstRateLimit(policies, "rpm"),
@@ -496,18 +497,26 @@ func effectiveModelRateLimits(model store.PlatformModel, platform store.Platform
return limits
}
-func firstRateLimit(policies []map[string]any, metric string) *float64 {
- for _, policy := range policies {
- if value := readRateLimit(policy, metric); value != nil {
- return value
+type rateLimitPolicySource struct {
+ policy map[string]any
+ authoritative bool
+}
+
+func firstRateLimit(policies []rateLimitPolicySource, metric string) *float64 {
+ for _, source := range policies {
+ if value, ok := readRateLimit(source.policy, metric); ok {
+ return floatPointer(value)
+ }
+ if source.authoritative {
+ return floatPointer(0)
}
}
return nil
}
-func readRateLimit(policy map[string]any, metric string) *float64 {
+func readRateLimit(policy map[string]any, metric string) (float64, bool) {
if len(policy) == 0 {
- return nil
+ return 0, false
}
if rules, ok := policy["rules"].([]any); ok {
for _, item := range rules {
@@ -516,22 +525,22 @@ func readRateLimit(policy map[string]any, metric string) *float64 {
continue
}
if limit, ok := numberValue(rule["limit"]); ok {
- return &limit
+ return limit, true
}
}
}
for _, key := range rateLimitKeys(metric) {
if value, ok := numberValue(policy[key]); ok {
- return &value
+ return value, true
}
}
platformLimits := objectValue(policy["platformLimits"])
for _, key := range rateLimitKeys(metric) {
if value, ok := numberValue(platformLimits[key]); ok {
- return &value
+ return value, true
}
}
- return nil
+ return 0, false
}
func rateLimitKeys(metric string) []string {
@@ -1367,12 +1376,16 @@ func numberValue(value any) (float64, bool) {
}
func formatOptionalNumber(value *float64) string {
- if value == nil {
- return "-"
+ if value == nil || *value <= 0 {
+ return "不限"
}
return formatLimitNumber(*value)
}
+func floatPointer(value float64) *float64 {
+ return &value
+}
+
func formatLimitNumber(value float64) string {
switch {
case absFloat(value) >= 10000:
diff --git a/apps/api/internal/runner/limits.go b/apps/api/internal/runner/limits.go
index 9a47484..84066d8 100644
--- a/apps/api/internal/runner/limits.go
+++ b/apps/api/internal/runner/limits.go
@@ -83,11 +83,18 @@ func (s *Service) rateLimitReservations(ctx context.Context, user *auth.User, ca
func effectiveRateLimitPolicy(candidate store.RuntimeModelCandidate) map[string]any {
policy := candidate.PlatformRateLimitPolicy
- if hasRules(candidate.RuntimeRateLimitPolicy) {
+ if strings.TrimSpace(candidate.RuntimePolicySetID) != "" {
+ policy = candidate.RuntimeRateLimitPolicy
+ } else if hasRules(candidate.RuntimeRateLimitPolicy) {
policy = mergeMap(policy, candidate.RuntimeRateLimitPolicy)
}
- if nested, ok := candidate.RuntimePolicyOverride["rateLimitPolicy"].(map[string]any); ok && len(nested) > 0 {
- policy = mergeMap(policy, nested)
+ if _, hasOverride := candidate.RuntimePolicyOverride["rateLimitPolicy"]; hasOverride {
+ nested, _ := candidate.RuntimePolicyOverride["rateLimitPolicy"].(map[string]any)
+ if len(nested) == 0 {
+ policy = nil
+ } else {
+ policy = mergeMap(policy, nested)
+ }
}
if hasRules(candidate.ModelRateLimitPolicy) {
policy = mergeMap(policy, candidate.ModelRateLimitPolicy)
@@ -123,6 +130,9 @@ func reservationsFromPolicy(scopeType string, scopeKey string, scopeName string,
rule, _ := rawRule.(map[string]any)
metric := strings.TrimSpace(stringFromMap(rule, "metric"))
limit := floatFromAny(rule["limit"])
+ if metric == "" || limit <= 0 {
+ continue
+ }
amount := 1.0
if strings.HasPrefix(metric, "tpm") {
amount = float64(estimatedTokens)
diff --git a/apps/api/internal/runner/limits_test.go b/apps/api/internal/runner/limits_test.go
index 3c1cbb1..abe47b3 100644
--- a/apps/api/internal/runner/limits_test.go
+++ b/apps/api/internal/runner/limits_test.go
@@ -4,6 +4,7 @@ import (
"testing"
"github.com/easyai/easyai-ai-gateway/apps/api/internal/clients"
+ "github.com/easyai/easyai-ai-gateway/apps/api/internal/store"
)
func TestTokenUsageAmountsUsesActualUsageForTPM(t *testing.T) {
@@ -27,3 +28,39 @@ func TestTokenUsageAmountsFallsBackToInputOutputTotal(t *testing.T) {
t.Fatalf("expected total token fallback 8, got %v", got["tpm_total"])
}
}
+
+func TestEffectiveRateLimitPolicyTreatsEmptyRuntimePolicyAsUnlimited(t *testing.T) {
+ policy := effectiveRateLimitPolicy(store.RuntimeModelCandidate{
+ PlatformRateLimitPolicy: map[string]any{"rules": []any{
+ map[string]any{"metric": "rpm", "limit": 500},
+ }},
+ RuntimePolicySetID: "runtime-policy-1",
+ RuntimeRateLimitPolicy: map[string]any{"rules": []any{}},
+ })
+
+ if hasRules(policy) {
+ t.Fatalf("expected empty runtime policy to clear inherited limits, got %+v", policy)
+ }
+}
+
+func TestReservationsFromPolicySkipsNonPositiveLimits(t *testing.T) {
+ reservations := reservationsFromPolicy(
+ "platform_model",
+ "model-1",
+ "Model 1",
+ nil,
+ map[string]any{"rules": []any{
+ map[string]any{"metric": "rpm", "limit": -1},
+ map[string]any{"metric": "tpm_total", "limit": 0},
+ map[string]any{"metric": "concurrent", "limit": 2},
+ }},
+ map[string]any{"prompt": "hello"},
+ )
+
+ if len(reservations) != 1 {
+ t.Fatalf("expected only the positive concurrent rule to reserve, got %+v", reservations)
+ }
+ if reservations[0].Metric != "concurrent" || reservations[0].Limit != 2 {
+ t.Fatalf("expected concurrent reservation with limit 2, got %+v", reservations[0])
+ }
+}
diff --git a/apps/api/internal/store/candidates.go b/apps/api/internal/store/candidates.go
index 4e33c9e..1ec4e28 100644
--- a/apps/api/internal/store/candidates.go
+++ b/apps/api/internal/store/candidates.go
@@ -230,7 +230,7 @@ ORDER BY effective_priority ASC,
item.WaitingCount = maxFloat(queuedWaiting, stateWaitingCount)
item.LastAssignedUnix = lastAssignedUnix
applyRuntimeCandidateLoad(&item, runtimeCandidateLoadInput{
- Policy: effectiveModelRateLimitPolicy(item.PlatformRateLimitPolicy, item.RuntimeRateLimitPolicy, item.RuntimePolicyOverride, item.ModelRateLimitPolicy),
+ Policy: effectiveModelRateLimitPolicy(item.PlatformRateLimitPolicy, item.RuntimeRateLimitPolicy, item.RuntimePolicySetID, item.RuntimePolicyOverride, item.ModelRateLimitPolicy),
ConcurrentActive: concurrentActive,
QueuedWaiting: queuedWaiting,
RPMUsed: rpmUsed,
diff --git a/apps/api/internal/store/rate_limit_status.go b/apps/api/internal/store/rate_limit_status.go
index 633a058..cd5fef3 100644
--- a/apps/api/internal/store/rate_limit_status.go
+++ b/apps/api/internal/store/rate_limit_status.go
@@ -88,7 +88,8 @@ func (s *Store) ListModelRateLimitStatuses(ctx context.Context) ([]ModelRateLimi
p.priority, p.dynamic_priority, COALESCE(p.dynamic_priority, p.priority),
m.model_name, COALESCE(NULLIF(m.provider_model_name, ''), m.model_name), COALESCE(m.model_alias, ''),
m.model_type, m.display_name, m.enabled,
- p.rate_limit_policy, COALESCE(rp.rate_limit_policy, '{}'::jsonb), COALESCE(NULLIF(m.runtime_policy_override, '{}'::jsonb), b.runtime_policy_override, '{}'::jsonb), m.rate_limit_policy,
+ p.rate_limit_policy, COALESCE(rp.rate_limit_policy, '{}'::jsonb), COALESCE(m.runtime_policy_set_id::text, b.runtime_policy_set_id::text, ''),
+ COALESCE(NULLIF(m.runtime_policy_override, '{}'::jsonb), b.runtime_policy_override, '{}'::jsonb), m.rate_limit_policy,
COALESCE(to_char(p.cooldown_until AT TIME ZONE 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS.MS"Z"'), ''),
COALESCE(to_char(m.cooldown_until AT TIME ZONE 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS.MS"Z"'), ''),
COALESCE(con.active, 0)::float8,
@@ -163,6 +164,7 @@ ORDER BY p.priority ASC, m.model_name ASC`)
var modelTypeBytes []byte
var platformPolicyBytes []byte
var runtimePolicyBytes []byte
+ var runtimePolicySetID string
var runtimeOverrideBytes []byte
var modelPolicyBytes []byte
var platformDynamicPriority sql.NullInt64
@@ -193,6 +195,7 @@ ORDER BY p.priority ASC, m.model_name ASC`)
&item.Enabled,
&platformPolicyBytes,
&runtimePolicyBytes,
+ &runtimePolicySetID,
&runtimeOverrideBytes,
&modelPolicyBytes,
&platformCooldownUntil,
@@ -213,6 +216,7 @@ ORDER BY p.priority ASC, m.model_name ASC`)
policy := effectiveModelRateLimitPolicy(
decodeObject(platformPolicyBytes),
decodeObject(runtimePolicyBytes),
+ runtimePolicySetID,
decodeObject(runtimeOverrideBytes),
decodeObject(modelPolicyBytes),
)
@@ -432,13 +436,20 @@ func platformPolicyEventFromPayload(id string, taskID string, eventType string,
}
}
-func effectiveModelRateLimitPolicy(platformPolicy map[string]any, runtimePolicy map[string]any, runtimeOverride map[string]any, modelPolicy map[string]any) map[string]any {
+func effectiveModelRateLimitPolicy(platformPolicy map[string]any, runtimePolicy map[string]any, runtimePolicySetID string, runtimeOverride map[string]any, modelPolicy map[string]any) map[string]any {
policy := platformPolicy
- if hasRateLimitRules(runtimePolicy) {
+ if strings.TrimSpace(runtimePolicySetID) != "" {
+ policy = runtimePolicy
+ } else if hasRateLimitRules(runtimePolicy) {
policy = shallowMergeMap(policy, runtimePolicy)
}
- if nested, ok := runtimeOverride["rateLimitPolicy"].(map[string]any); ok && len(nested) > 0 {
- policy = shallowMergeMap(policy, nested)
+ if _, hasOverride := runtimeOverride["rateLimitPolicy"]; hasOverride {
+ nested, _ := runtimeOverride["rateLimitPolicy"].(map[string]any)
+ if len(nested) == 0 {
+ policy = nil
+ } else {
+ policy = shallowMergeMap(policy, nested)
+ }
}
if hasRateLimitRules(modelPolicy) {
policy = shallowMergeMap(policy, modelPolicy)
diff --git a/apps/api/internal/store/rate_limit_status_test.go b/apps/api/internal/store/rate_limit_status_test.go
index aacda62..84f0ee0 100644
--- a/apps/api/internal/store/rate_limit_status_test.go
+++ b/apps/api/internal/store/rate_limit_status_test.go
@@ -16,6 +16,7 @@ func TestEffectiveModelRateLimitPolicyTreatsModelRulesAsAuthoritative(t *testing
map[string]any{"metric": "tpm_total", "limit": 240000},
map[string]any{"metric": "concurrent", "limit": 6},
}},
+ "runtime-policy-1",
map[string]any{},
map[string]any{"rules": []any{
map[string]any{"metric": "rpm", "limit": 30},
@@ -34,6 +35,48 @@ func TestEffectiveModelRateLimitPolicyTreatsModelRulesAsAuthoritative(t *testing
}
}
+func TestEffectiveModelRateLimitPolicyTreatsEmptyRuntimePolicyAsUnlimited(t *testing.T) {
+ policy := effectiveModelRateLimitPolicy(
+ map[string]any{"rules": []any{
+ map[string]any{"metric": "rpm", "limit": 500},
+ map[string]any{"metric": "tpm_total", "limit": 100000},
+ }},
+ map[string]any{"rules": []any{}},
+ "runtime-policy-1",
+ map[string]any{},
+ map[string]any{},
+ )
+
+ if got := rateLimitForMetric(policy, "rpm"); got != 0 {
+ t.Fatalf("expected empty runtime policy rpm to mean unlimited, got %v", got)
+ }
+ if got := rateLimitForMetric(policy, "tpm_total"); got != 0 {
+ t.Fatalf("expected empty runtime policy tpm to mean unlimited, got %v", got)
+ }
+}
+
+func TestEffectiveModelRateLimitPolicyTreatsNegativeLimitAsUnlimited(t *testing.T) {
+ policy := effectiveModelRateLimitPolicy(
+ map[string]any{"rules": []any{
+ map[string]any{"metric": "rpm", "limit": 500},
+ }},
+ map[string]any{"rules": []any{
+ map[string]any{"metric": "rpm", "limit": -1},
+ }},
+ "runtime-policy-1",
+ map[string]any{},
+ map[string]any{},
+ )
+
+ if got := rateLimitForMetric(policy, "rpm"); got != -1 {
+ t.Fatalf("expected negative runtime rpm marker to be preserved, got %v", got)
+ }
+ status := metricStatus(10, 10, 0, rateLimitForMetric(policy, "rpm"), "")
+ if status.Limited {
+ t.Fatalf("expected negative runtime rpm marker to be reported as unlimited, got %+v", status)
+ }
+}
+
func TestPriorityDemotionRecordFromEventPayloadKeepsReason(t *testing.T) {
createdAt := time.Date(2026, 5, 12, 9, 30, 0, 0, time.UTC)
record := priorityDemotionRecordFromEventPayload("event-1", "task-1", "fallback message", map[string]any{
diff --git a/apps/web/src/pages/admin/PlatformManagementPanel.tsx b/apps/web/src/pages/admin/PlatformManagementPanel.tsx
index 3baec26..3ab11cd 100644
--- a/apps/web/src/pages/admin/PlatformManagementPanel.tsx
+++ b/apps/web/src/pages/admin/PlatformManagementPanel.tsx
@@ -343,10 +343,10 @@ export function PlatformManagementPanel(props: {