62 lines
1.9 KiB
Go
62 lines
1.9 KiB
Go
package store
|
|
|
|
import "testing"
|
|
|
|
func TestRuntimeCandidateLoadUsesMaxLimitedMetric(t *testing.T) {
|
|
candidate := RuntimeModelCandidate{}
|
|
applyRuntimeCandidateLoad(&candidate, runtimeCandidateLoadInput{
|
|
Policy: map[string]any{"rules": []any{
|
|
map[string]any{"metric": "rpm", "limit": 100},
|
|
map[string]any{"metric": "tpm_total", "limit": 1000},
|
|
map[string]any{"metric": "concurrent", "limit": 10},
|
|
}},
|
|
RPMUsed: 40,
|
|
RPMReserved: 10,
|
|
TPMUsed: 900,
|
|
ConcurrentActive: 3,
|
|
QueuedWaiting: 2,
|
|
})
|
|
|
|
if !candidate.LoadLimited {
|
|
t.Fatal("expected load to be limited when rate limit rules exist")
|
|
}
|
|
if candidate.LoadMetrics.RPMRatio != 0.5 {
|
|
t.Fatalf("expected rpm ratio 0.5, got %v", candidate.LoadMetrics.RPMRatio)
|
|
}
|
|
if candidate.LoadMetrics.TPMRatio != 0.9 {
|
|
t.Fatalf("expected tpm ratio 0.9, got %v", candidate.LoadMetrics.TPMRatio)
|
|
}
|
|
if candidate.LoadMetrics.ConcurrentRatio != 0.5 {
|
|
t.Fatalf("expected concurrent ratio 0.5, got %v", candidate.LoadMetrics.ConcurrentRatio)
|
|
}
|
|
if candidate.LoadRatio != 0.9 {
|
|
t.Fatalf("expected max load ratio 0.9, got %v", candidate.LoadRatio)
|
|
}
|
|
}
|
|
|
|
func TestRuntimeCandidateSortingAvoidsFullCandidatesButKeepsFallback(t *testing.T) {
|
|
candidates := []RuntimeModelCandidate{
|
|
{
|
|
PlatformID: "high-priority-full",
|
|
PlatformPriority: 1,
|
|
LoadLimited: true,
|
|
LoadRatio: 1.2,
|
|
},
|
|
{
|
|
PlatformID: "lower-priority-available",
|
|
PlatformPriority: 50,
|
|
LoadLimited: true,
|
|
LoadRatio: 0.2,
|
|
},
|
|
}
|
|
|
|
sortRuntimeModelCandidates(candidates)
|
|
|
|
if candidates[0].PlatformID != "lower-priority-available" {
|
|
t.Fatalf("expected non-full candidate to be tried first, got %+v", candidates)
|
|
}
|
|
if candidates[1].PlatformID != "high-priority-full" || !candidates[1].LoadAvoided {
|
|
t.Fatalf("expected full high-priority candidate to remain as avoided fallback, got %+v", candidates)
|
|
}
|
|
}
|