package store import "testing" func TestRuntimeCandidateLoadUsesMaxLimitedMetric(t *testing.T) { candidate := RuntimeModelCandidate{} applyRuntimeCandidateLoad(&candidate, runtimeCandidateLoadInput{ Policy: map[string]any{"rules": []any{ map[string]any{"metric": "rpm", "limit": 100}, map[string]any{"metric": "tpm_total", "limit": 1000}, map[string]any{"metric": "concurrent", "limit": 10}, }}, RPMUsed: 40, RPMReserved: 10, TPMUsed: 900, ConcurrentActive: 3, QueuedWaiting: 2, }) if !candidate.LoadLimited { t.Fatal("expected load to be limited when rate limit rules exist") } if candidate.LoadMetrics.RPMRatio != 0.5 { t.Fatalf("expected rpm ratio 0.5, got %v", candidate.LoadMetrics.RPMRatio) } if candidate.LoadMetrics.TPMRatio != 0.9 { t.Fatalf("expected tpm ratio 0.9, got %v", candidate.LoadMetrics.TPMRatio) } if candidate.LoadMetrics.ConcurrentRatio != 0.5 { t.Fatalf("expected concurrent ratio 0.5, got %v", candidate.LoadMetrics.ConcurrentRatio) } if candidate.LoadRatio != 0.9 { t.Fatalf("expected max load ratio 0.9, got %v", candidate.LoadRatio) } } func TestRuntimeCandidateSortingAvoidsFullCandidatesButKeepsFallback(t *testing.T) { candidates := []RuntimeModelCandidate{ { PlatformID: "high-priority-full", PlatformPriority: 1, LoadLimited: true, LoadRatio: 1.2, }, { PlatformID: "lower-priority-available", PlatformPriority: 50, LoadLimited: true, LoadRatio: 0.2, }, } sortRuntimeModelCandidates(candidates) if candidates[0].PlatformID != "lower-priority-available" { t.Fatalf("expected non-full candidate to be tried first, got %+v", candidates) } if candidates[1].PlatformID != "high-priority-full" || !candidates[1].LoadAvoided { t.Fatalf("expected full high-priority candidate to remain as avoided fallback, got %+v", candidates) } }