easyai-ai-gateway/apps/api/internal/runner/trace.go

129 lines
3.7 KiB
Go

package runner
import (
"context"
"fmt"
"time"
)
func failureTraceEntry(err error, retryable bool) map[string]any {
info := failureInfoFromError(err)
entry := policyTraceEntry("failure", "client", "failed", "client_call_failed", policyRuleMatch{}, info)
entry["retryable"] = retryable
return entry
}
func retryTraceEntry(decision retryDecision, action string, clientAttempt int, maxAttempts int) map[string]any {
entry := policyTraceEntry("same_client_retry", "same_client", action, decision.Reason, decision.Match, decision.Info)
entry["retry"] = decision.Retry
entry["clientAttempt"] = clientAttempt
entry["maxAttempts"] = maxAttempts
return entry
}
func failoverTraceEntry(decision failoverDecision) map[string]any {
event := "failover_next"
if !decision.Retry {
event = "failover_stop"
}
entry := policyTraceEntry(event, "next_platform", decision.Action, decision.Reason, decision.Match, decision.Info)
entry["retry"] = decision.Retry
if decision.CooldownSeconds > 0 {
entry["cooldownSeconds"] = decision.CooldownSeconds
}
return entry
}
func priorityDemoteTraceEntry(decision priorityDemoteDecision, platformID string, platformModelID string) map[string]any {
entry := policyTraceEntry("priority_demoted", "priority_demote", "demote", decision.Reason, decision.Match, decision.Info)
entry["demote"] = decision.Demote
entry["demoteStep"] = decision.Step
entry["platformId"] = platformID
entry["platformModelId"] = platformModelID
return entry
}
func failoverTimeBudgetTraceEntry(elapsedSeconds int, maxDurationSeconds int, info failureInfo) map[string]any {
entry := policyTraceEntry("failover_stop", "next_platform", "stop", "failover_time_budget_exceeded", policyRuleMatch{
Source: "gateway_runner_policies.failover_policy",
Policy: "failoverPolicy",
Rule: "maxDurationSeconds",
Value: fmt.Sprintf("%d", maxDurationSeconds),
}, info)
entry["elapsedSeconds"] = elapsedSeconds
entry["maxDurationSeconds"] = maxDurationSeconds
return entry
}
func policyTraceEntry(event string, scope string, action string, reason string, match policyRuleMatch, info failureInfo) map[string]any {
entry := map[string]any{
"event": event,
"scope": scope,
"action": action,
"reason": reason,
"createdAt": time.Now().Format(time.RFC3339Nano),
}
if info.Code != "" {
entry["errorCode"] = info.Code
}
if info.Message != "" {
entry["message"] = info.Message
}
if info.Status > 0 {
entry["statusCode"] = info.Status
}
if info.Category != "" {
entry["category"] = info.Category
}
if match.Source != "" {
entry["policySource"] = match.Source
}
if match.Policy != "" {
entry["policy"] = match.Policy
}
if match.Rule != "" {
entry["policyRule"] = match.Rule
}
if match.Value != "" {
entry["matchedValue"] = match.Value
}
return entry
}
func addPolicyTracePayload(payload map[string]any, match policyRuleMatch, info failureInfo) map[string]any {
if payload == nil {
payload = map[string]any{}
}
if info.Code != "" {
payload["errorCode"] = info.Code
}
if info.Status > 0 {
payload["statusCode"] = info.Status
}
if info.Category != "" {
payload["category"] = info.Category
}
if match.Source != "" {
payload["policySource"] = match.Source
}
if match.Policy != "" {
payload["policy"] = match.Policy
}
if match.Rule != "" {
payload["policyRule"] = match.Rule
}
if match.Value != "" {
payload["matchedValue"] = match.Value
}
return payload
}
func (s *Service) recordAttemptTrace(ctx context.Context, taskID string, attemptNo int, entry map[string]any) {
if attemptNo <= 0 || len(entry) == 0 {
return
}
if err := s.store.AppendTaskAttemptTrace(ctx, taskID, attemptNo, entry); err != nil {
s.logger.Warn("append task attempt trace failed", "taskID", taskID, "attempt", attemptNo, "error", err)
}
}