easyai-ai-gateway/apps/api/internal/runner/runtime_policy.go

129 lines
5.1 KiB
Go

package runner
import (
"context"
"errors"
"strings"
"github.com/easyai/easyai-ai-gateway/apps/api/internal/clients"
"github.com/easyai/easyai-ai-gateway/apps/api/internal/store"
)
func (s *Service) applyCandidateFailurePolicies(ctx context.Context, taskID string, candidate store.RuntimeModelCandidate, cause error, simulated bool) {
code := clients.ErrorCode(cause)
message := ""
if cause != nil {
message = cause.Error()
}
autoDisablePolicy := effectiveRuntimePolicy(candidate.AutoDisablePolicy, candidate.RuntimePolicyOverride, "autoDisablePolicy")
if failurePolicyMatches(autoDisablePolicy, code, message) && intFromPolicy(autoDisablePolicy, "threshold") <= 1 {
if err := s.store.DisableCandidatePlatform(ctx, candidate.PlatformID); err == nil {
_ = s.emit(ctx, taskID, "task.policy.auto_disabled", "running", "auto_disable", 0.48, "candidate platform disabled by failure policy", map[string]any{
"platformId": candidate.PlatformID,
"platformModelId": candidate.PlatformModelID,
"code": code,
}, simulated)
}
}
degradePolicy := effectiveRuntimePolicy(candidate.DegradePolicy, candidate.RuntimePolicyOverride, "degradePolicy")
if failurePolicyMatches(degradePolicy, code, message) {
cooldownSeconds := intFromPolicy(degradePolicy, "cooldownSeconds")
if err := s.store.CooldownCandidatePlatformModel(ctx, candidate.PlatformModelID, cooldownSeconds); err == nil {
_ = s.emit(ctx, taskID, "task.policy.degraded", "running", "degrade", 0.5, "candidate model cooled down by failure policy", map[string]any{
"platformId": candidate.PlatformID,
"platformModelId": candidate.PlatformModelID,
"cooldownSeconds": cooldownSeconds,
"code": code,
}, simulated)
}
}
}
func (s *Service) applyFailoverAction(ctx context.Context, taskID string, candidate store.RuntimeModelCandidate, decision failoverDecision, simulated bool) {
switch decision.Action {
case "disable_and_next":
if err := s.store.DisableCandidatePlatform(ctx, candidate.PlatformID); err == nil {
_ = s.emit(ctx, taskID, "task.policy.failover_disabled", "running", "failover", 0.51, "candidate platform disabled by runner failover policy", addPolicyTracePayload(map[string]any{
"platformId": candidate.PlatformID,
"platformModelId": candidate.PlatformModelID,
"action": decision.Action,
"reason": decision.Reason,
}, decision.Match, decision.Info), simulated)
}
case "cooldown_and_next":
if err := s.store.CooldownCandidatePlatformModel(ctx, candidate.PlatformModelID, decision.CooldownSeconds); err == nil {
_ = s.emit(ctx, taskID, "task.policy.failover_cooled_down", "running", "failover", 0.51, "candidate model cooled down by runner failover policy", addPolicyTracePayload(map[string]any{
"platformId": candidate.PlatformID,
"platformModelId": candidate.PlatformModelID,
"cooldownSeconds": decision.CooldownSeconds,
"action": decision.Action,
"reason": decision.Reason,
}, decision.Match, decision.Info), simulated)
}
}
}
func (s *Service) applyPriorityDemotePolicy(ctx context.Context, taskID string, attemptNo int, runnerPolicy store.RunnerPolicy, candidate store.RuntimeModelCandidate, cause error, simulated bool) {
if errors.Is(cause, store.ErrRateLimited) {
return
}
decision := priorityDemoteDecisionForCandidate(runnerPolicy, cause)
if !decision.Demote {
return
}
if err := s.store.DemoteCandidatePlatformPriority(ctx, candidate.PlatformID, decision.Step); err == nil {
s.recordAttemptTrace(ctx, taskID, attemptNo, priorityDemoteTraceEntry(decision, candidate.PlatformID, candidate.PlatformModelID))
_ = s.emit(ctx, taskID, "task.policy.priority_demoted", "running", "priority_demote", 0.52, "candidate platform priority demoted by runner policy", addPolicyTracePayload(map[string]any{
"platformId": candidate.PlatformID,
"platformModelId": candidate.PlatformModelID,
"demoteStep": decision.Step,
"code": clients.ErrorCode(cause),
}, decision.Match, decision.Info), simulated)
}
}
func effectiveRuntimePolicy(base map[string]any, override map[string]any, key string) map[string]any {
policy := base
if nested, ok := override[key].(map[string]any); ok && len(nested) > 0 {
policy = mergeMap(policy, nested)
}
return policy
}
func failurePolicyMatches(policy map[string]any, code string, message string) bool {
if len(policy) == 0 || !boolFromMap(policy, "enabled") {
return false
}
keywords := stringListFromPolicy(policy, "keywords")
if len(keywords) == 0 {
return false
}
target := strings.ToLower(strings.TrimSpace(code + " " + message))
for _, keyword := range keywords {
keyword = strings.ToLower(strings.TrimSpace(keyword))
if keyword != "" && strings.Contains(target, keyword) {
return true
}
}
return false
}
func stringListFromPolicy(values map[string]any, key string) []string {
raw, ok := values[key].([]any)
if !ok {
if typed, ok := values[key].([]string); ok {
return typed
}
return nil
}
out := make([]string, 0, len(raw))
for _, item := range raw {
if text, ok := item.(string); ok && strings.TrimSpace(text) != "" {
out = append(out, text)
}
}
return out
}