easyai-ai-gateway/apps/api/internal/store/candidates_test.go

62 lines
1.9 KiB
Go

package store
import "testing"
func TestRuntimeCandidateLoadUsesMaxLimitedMetric(t *testing.T) {
candidate := RuntimeModelCandidate{}
applyRuntimeCandidateLoad(&candidate, runtimeCandidateLoadInput{
Policy: map[string]any{"rules": []any{
map[string]any{"metric": "rpm", "limit": 100},
map[string]any{"metric": "tpm_total", "limit": 1000},
map[string]any{"metric": "concurrent", "limit": 10},
}},
RPMUsed: 40,
RPMReserved: 10,
TPMUsed: 900,
ConcurrentActive: 3,
QueuedWaiting: 2,
})
if !candidate.LoadLimited {
t.Fatal("expected load to be limited when rate limit rules exist")
}
if candidate.LoadMetrics.RPMRatio != 0.5 {
t.Fatalf("expected rpm ratio 0.5, got %v", candidate.LoadMetrics.RPMRatio)
}
if candidate.LoadMetrics.TPMRatio != 0.9 {
t.Fatalf("expected tpm ratio 0.9, got %v", candidate.LoadMetrics.TPMRatio)
}
if candidate.LoadMetrics.ConcurrentRatio != 0.5 {
t.Fatalf("expected concurrent ratio 0.5, got %v", candidate.LoadMetrics.ConcurrentRatio)
}
if candidate.LoadRatio != 0.9 {
t.Fatalf("expected max load ratio 0.9, got %v", candidate.LoadRatio)
}
}
func TestRuntimeCandidateSortingAvoidsFullCandidatesButKeepsFallback(t *testing.T) {
candidates := []RuntimeModelCandidate{
{
PlatformID: "high-priority-full",
PlatformPriority: 1,
LoadLimited: true,
LoadRatio: 1.2,
},
{
PlatformID: "lower-priority-available",
PlatformPriority: 50,
LoadLimited: true,
LoadRatio: 0.2,
},
}
sortRuntimeModelCandidates(candidates)
if candidates[0].PlatformID != "lower-priority-available" {
t.Fatalf("expected non-full candidate to be tried first, got %+v", candidates)
}
if candidates[1].PlatformID != "high-priority-full" || !candidates[1].LoadAvoided {
t.Fatalf("expected full high-priority candidate to remain as avoided fallback, got %+v", candidates)
}
}