agent47/examples/budget_aware_escalation.py at main · bmdhodl/agent47 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
"""Local-only example of advisor-style model escalation with AgentGuard.

Run:

    python examples/budget_aware_escalation.py
"""
from __future__ import annotations

from agentguard import (
    BudgetAwareEscalation,
    EscalationRequired,
    EscalationSignal,
    JsonlFileSink,
    Tracer,
)


def _simulated_model_call(model: str, prompt: str) -> dict:
    if model == "ollama/llama3.1:8b":
        return {
            "model": model,
            "prompt": prompt,
            "answer": "borderline",
            "token_count": 2430,
            "confidence": 0.39,
            "tool_call_depth": 4,
        }
    return {
        "model": model,
        "prompt": prompt,
        "answer": "escalated-final-answer",
        "token_count": 840,
        "confidence": 0.92,
        "tool_call_depth": 1,
    }


def main() -> int:
    guard = BudgetAwareEscalation(
        # primary_model / escalate_model are plain strings - any provider works.
        # This example escalates a cheap local model to a stronger one; the
        # escalate target could just as easily be an OpenAI or other-provider model.
        primary_model="ollama/llama3.1:8b",
        escalate_model="claude-opus-4-6",
        escalate_on=(
            EscalationSignal.TOKEN_COUNT(threshold=2000),
            EscalationSignal.CONFIDENCE_BELOW(threshold=0.45),
            EscalationSignal.TOOL_CALL_DEPTH(threshold=3),
        ),
    )
    tracer = Tracer(
        sink=JsonlFileSink("budget_aware_escalation_traces.jsonl"),
        service="budget-aware-escalation-example",
    )

    prompt = "Classify this contract clause and explain the escalation path."

    with tracer.trace("agent.turn.1") as ctx:
        first_model = guard.select_model()
        print(f"Turn 1 model: {first_model}")
        ctx.event("model.route", data={"selected_model": first_model, "route": "primary"})
        first_result = _simulated_model_call(first_model, prompt)
        ctx.event("llm.result", data=first_result)
        guard.auto_check("llm.result", first_result)

    with tracer.trace("agent.turn.2") as ctx:
        route = "primary"
        try:
            guard.check()
            second_model = guard.primary_model
            reason = "primary path"
        except EscalationRequired as exc:
            second_model = exc.target_model
            reason = exc.reason
            route = "escalated"
            ctx.event(
                "guard.escalation",
                data={
                    "signal_name": exc.signal_name,
                    "reason": exc.reason,
                    "target_model": exc.target_model,
                },
            )
        print(f"Turn 2 model: {second_model}")
        print(f"Escalation reason: {reason}")
        ctx.event("model.route", data={"selected_model": second_model, "route": route})
        second_result = _simulated_model_call(second_model, prompt)
        ctx.event("llm.result", data=second_result)

    print("Wrote budget_aware_escalation_traces.jsonl")
    return 0


if __name__ == "__main__":
    raise SystemExit(main())