125 lines
5.1 KiB
Go
125 lines
5.1 KiB
Go
package runner
|
|
|
|
import (
|
|
"context"
|
|
"strings"
|
|
|
|
"github.com/easyai/easyai-ai-gateway/apps/api/internal/clients"
|
|
"github.com/easyai/easyai-ai-gateway/apps/api/internal/store"
|
|
)
|
|
|
|
func (s *Service) applyCandidateFailurePolicies(ctx context.Context, taskID string, candidate store.RuntimeModelCandidate, cause error, simulated bool) {
|
|
code := clients.ErrorCode(cause)
|
|
message := ""
|
|
if cause != nil {
|
|
message = cause.Error()
|
|
}
|
|
|
|
autoDisablePolicy := effectiveRuntimePolicy(candidate.AutoDisablePolicy, candidate.RuntimePolicyOverride, "autoDisablePolicy")
|
|
if failurePolicyMatches(autoDisablePolicy, code, message) && intFromPolicy(autoDisablePolicy, "threshold") <= 1 {
|
|
if err := s.store.DisableCandidatePlatform(ctx, candidate.PlatformID); err == nil {
|
|
_ = s.emit(ctx, taskID, "task.policy.auto_disabled", "running", "auto_disable", 0.48, "candidate platform disabled by failure policy", map[string]any{
|
|
"platformId": candidate.PlatformID,
|
|
"platformModelId": candidate.PlatformModelID,
|
|
"code": code,
|
|
}, simulated)
|
|
}
|
|
}
|
|
|
|
degradePolicy := effectiveRuntimePolicy(candidate.DegradePolicy, candidate.RuntimePolicyOverride, "degradePolicy")
|
|
if failurePolicyMatches(degradePolicy, code, message) {
|
|
cooldownSeconds := intFromPolicy(degradePolicy, "cooldownSeconds")
|
|
if err := s.store.CooldownCandidatePlatformModel(ctx, candidate.PlatformModelID, cooldownSeconds); err == nil {
|
|
_ = s.emit(ctx, taskID, "task.policy.degraded", "running", "degrade", 0.5, "candidate model cooled down by failure policy", map[string]any{
|
|
"platformId": candidate.PlatformID,
|
|
"platformModelId": candidate.PlatformModelID,
|
|
"cooldownSeconds": cooldownSeconds,
|
|
"code": code,
|
|
}, simulated)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (s *Service) applyFailoverAction(ctx context.Context, taskID string, candidate store.RuntimeModelCandidate, decision failoverDecision, simulated bool) {
|
|
switch decision.Action {
|
|
case "disable_and_next":
|
|
if err := s.store.DisableCandidatePlatform(ctx, candidate.PlatformID); err == nil {
|
|
_ = s.emit(ctx, taskID, "task.policy.failover_disabled", "running", "failover", 0.51, "candidate platform disabled by runner failover policy", addPolicyTracePayload(map[string]any{
|
|
"platformId": candidate.PlatformID,
|
|
"platformModelId": candidate.PlatformModelID,
|
|
"action": decision.Action,
|
|
"reason": decision.Reason,
|
|
}, decision.Match, decision.Info), simulated)
|
|
}
|
|
case "cooldown_and_next":
|
|
if err := s.store.CooldownCandidatePlatformModel(ctx, candidate.PlatformModelID, decision.CooldownSeconds); err == nil {
|
|
_ = s.emit(ctx, taskID, "task.policy.failover_cooled_down", "running", "failover", 0.51, "candidate model cooled down by runner failover policy", addPolicyTracePayload(map[string]any{
|
|
"platformId": candidate.PlatformID,
|
|
"platformModelId": candidate.PlatformModelID,
|
|
"cooldownSeconds": decision.CooldownSeconds,
|
|
"action": decision.Action,
|
|
"reason": decision.Reason,
|
|
}, decision.Match, decision.Info), simulated)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (s *Service) applyPriorityDemotePolicy(ctx context.Context, taskID string, attemptNo int, runnerPolicy store.RunnerPolicy, candidate store.RuntimeModelCandidate, cause error, simulated bool) {
|
|
decision := priorityDemoteDecisionForCandidate(runnerPolicy, cause)
|
|
if !decision.Demote {
|
|
return
|
|
}
|
|
if err := s.store.DemoteCandidatePlatformPriority(ctx, candidate.PlatformID, decision.Step); err == nil {
|
|
s.recordAttemptTrace(ctx, taskID, attemptNo, priorityDemoteTraceEntry(decision, candidate.PlatformID, candidate.PlatformModelID))
|
|
_ = s.emit(ctx, taskID, "task.policy.priority_demoted", "running", "priority_demote", 0.52, "candidate platform priority demoted by runner policy", addPolicyTracePayload(map[string]any{
|
|
"platformId": candidate.PlatformID,
|
|
"platformModelId": candidate.PlatformModelID,
|
|
"demoteStep": decision.Step,
|
|
"code": clients.ErrorCode(cause),
|
|
}, decision.Match, decision.Info), simulated)
|
|
}
|
|
}
|
|
|
|
func effectiveRuntimePolicy(base map[string]any, override map[string]any, key string) map[string]any {
|
|
policy := base
|
|
if nested, ok := override[key].(map[string]any); ok && len(nested) > 0 {
|
|
policy = mergeMap(policy, nested)
|
|
}
|
|
return policy
|
|
}
|
|
|
|
func failurePolicyMatches(policy map[string]any, code string, message string) bool {
|
|
if len(policy) == 0 || !boolFromMap(policy, "enabled") {
|
|
return false
|
|
}
|
|
keywords := stringListFromPolicy(policy, "keywords")
|
|
if len(keywords) == 0 {
|
|
return false
|
|
}
|
|
target := strings.ToLower(strings.TrimSpace(code + " " + message))
|
|
for _, keyword := range keywords {
|
|
keyword = strings.ToLower(strings.TrimSpace(keyword))
|
|
if keyword != "" && strings.Contains(target, keyword) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func stringListFromPolicy(values map[string]any, key string) []string {
|
|
raw, ok := values[key].([]any)
|
|
if !ok {
|
|
if typed, ok := values[key].([]string); ok {
|
|
return typed
|
|
}
|
|
return nil
|
|
}
|
|
out := make([]string, 0, len(raw))
|
|
for _, item := range raw {
|
|
if text, ok := item.(string); ok && strings.TrimSpace(text) != "" {
|
|
out = append(out, text)
|
|
}
|
|
}
|
|
return out
|
|
}
|