Skip to content

Commit ec04889

Browse files
Haruka-Kayaclaude
andcommitted
fix(bridge): translate GLOB blocked patterns to RE2 (fail-open) + bundle robustness
Addresses the policies/bridge.py findings from review #3137. - GLOB fail-open: _pattern_to_regex translated GLOB patterns with fnmatch.translate, whose output (e.g. "(?s:.*\.exe)\Z") uses Python-only syntax that Go RE2 (OPA) rejects, leaving regex.match undefined so the GLOB deny silently failed open. Add _glob_to_re2 (translates * / ? / [seq] / [!seq] to RE2-safe syntax, anchored whole-string with (?s)^...$). - Reject non-finite confidence_threshold up front (inf rendered invalid Rego; nan was silently dropped); json.dumps uses allow_nan=False. - Clean up a self-created temp bundle dir if materialization fails. Adds tests/test_bridge_pattern.py (no agent_os dependency) including an OPA-backed regression test that compiles the GLOB regex under real RE2 and a fnmatchcase cross-check across edge cases. Verified with OPA 0.70.0. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> Claude-Session: https://claude.ai/code/session_01VT2CfGX3tmPsQaofuuWwED
1 parent 8519313 commit ec04889

2 files changed

Lines changed: 297 additions & 41 deletions

File tree

agent-governance-python/agt-policies/src/agt/policies/bridge.py

Lines changed: 120 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646

4747
import json
4848
import logging
49+
import math
4950
import shutil
5051
import tempfile
5152
from pathlib import Path
@@ -95,6 +96,55 @@ def _find_stock_rego_root() -> Path:
9596
)
9697

9798

99+
def _glob_to_re2(value: str) -> str:
100+
r"""Translate a shell glob to a Go RE2 regex anchored to the whole string.
101+
102+
Mirrors :func:`fnmatch.translate` semantics (``*`` / ``?`` / ``[seq]`` /
103+
``[!seq]``) but emits only RE2-compatible syntax. ``fnmatch.translate``
104+
cannot be used directly: it produces Python-specific constructs — an inline
105+
``(?s:...)`` flag group and the ``\Z`` end-of-string anchor — that Go RE2
106+
(the engine behind OPA's ``agt.patterns``) rejects, leaving the generated
107+
deny rule undefined so a ``GLOB`` blocked pattern silently fails open.
108+
109+
The result is a leading ``(?s)`` dot-all flag plus ``^``/``$`` anchors.
110+
Whole-string anchoring preserves v4 glob semantics (the pattern matches the
111+
entire policy text) under ``agt.patterns``' unanchored ``regex.match``.
112+
"""
113+
import re
114+
115+
out: list[str] = []
116+
i, n = 0, len(value)
117+
while i < n:
118+
ch = value[i]
119+
i += 1
120+
if ch == "*":
121+
out.append(".*")
122+
elif ch == "?":
123+
out.append(".")
124+
elif ch == "[":
125+
j = i
126+
if j < n and value[j] == "!":
127+
j += 1
128+
if j < n and value[j] == "]":
129+
j += 1
130+
while j < n and value[j] != "]":
131+
j += 1
132+
if j >= n:
133+
# No closing bracket: treat '[' as a literal.
134+
out.append(r"\[")
135+
else:
136+
inner = value[i:j].replace("\\", r"\\")
137+
i = j + 1
138+
if inner.startswith("!"):
139+
inner = "^" + inner[1:]
140+
elif inner.startswith("^"):
141+
inner = "\\" + inner
142+
out.append("[" + inner + "]")
143+
else:
144+
out.append(re.escape(ch))
145+
return "(?s)^" + "".join(out) + "$"
146+
147+
98148
def _pattern_to_regex(pattern: Any) -> str:
99149
"""Normalise a v4 ``blocked_patterns`` entry to a Go RE2 regex string.
100150
@@ -119,9 +169,7 @@ def _pattern_to_regex(pattern: Any) -> str:
119169
if kind_name == "REGEX":
120170
return value
121171
if kind_name == "GLOB":
122-
import fnmatch
123-
124-
return fnmatch.translate(value)
172+
return _glob_to_re2(value)
125173
raise ValueError(f"unsupported PatternType: {kind!r}")
126174

127175
raise ValueError(f"unsupported blocked_patterns entry: {pattern!r}")
@@ -180,12 +228,15 @@ def _render_rego(
180228
if budget_thresholds:
181229
branches.append(
182230
"v := budgets.deny_if_budget_exceeded("
183-
f"{json.dumps(budget_thresholds)})"
231+
f"{json.dumps(budget_thresholds, allow_nan=False)})"
184232
)
185233

186234
if confidence_threshold is not None and confidence_threshold > 0.0:
235+
# ``allow_nan=False`` is a defensive backstop; the caller
236+
# (governance_to_acs_manifest) rejects non-finite thresholds up front.
187237
branches.append(
188-
f"v := confidence.deny_if_low_confidence({json.dumps(confidence_threshold)})"
238+
"v := confidence.deny_if_low_confidence("
239+
f"{json.dumps(confidence_threshold, allow_nan=False)})"
189240
)
190241

191242
if require_human_approval:
@@ -301,49 +352,77 @@ def governance_to_acs_manifest(
301352
``policy.allowed_tools``. ``approval`` is set when
302353
``require_human_approval`` is true.
303354
"""
355+
created_bundle_dir = bundle_dir is None
304356
bundle_dir = (
305357
Path(bundle_dir).resolve()
306-
if bundle_dir is not None
358+
if not created_bundle_dir
307359
else Path(tempfile.mkdtemp(prefix="agt_bridge_")).resolve()
308360
)
309361
bundle_dir.mkdir(parents=True, exist_ok=True)
310362

311-
stock_root = stock_rego_root or _find_stock_rego_root()
312-
for rego_file in stock_root.glob("*.rego"):
313-
if rego_file.name.endswith("_test.rego"):
314-
continue
315-
shutil.copy(rego_file, bundle_dir / rego_file.name)
316-
317-
blocked_pattern_regexes = [_pattern_to_regex(p) for p in policy.blocked_patterns]
318-
319-
# AGT-M3 round-2 BLOCK A: ``max_tool_calls=0`` is the v4 sentinel for
320-
# "deny every tool call", not "no constraint". Forward 0 through to the
321-
# ``budgets.deny_if_budget_exceeded`` helper. The helper compares
322-
# ``tool_call_count >= limit`` so with ``limit=0`` and the default
323-
# ``tool_call_count=0`` the first call is denied with
324-
# ``budget_tool_calls_exceeded``, which preserves the v4 contract end-to-end
325-
# for any caller that loads the bridge manifest into AgtRuntime directly
326-
# (not just through the AdapterRuntimeBridge host fallback). Previously a
327-
# ``GovernancePolicy(max_tool_calls=0, confidence_threshold=0.0)`` slipped
328-
# through to the default ``allow`` verdict because the budget rule was
329-
# omitted and the fallback ``pre_tool_call`` binding (no ``tool_name_from``)
330-
# never tripped any deny rule. Keep ``max_tokens`` at ``> 0`` because the v4
331-
# dataclass validation rejects ``max_tokens <= 0`` and there is no v4 wire
332-
# value to preserve there.
333-
rego_source = _render_rego(
334-
package="agt.governance_policy",
335-
max_tokens=policy.max_tokens if policy.max_tokens > 0 else None,
336-
max_tool_calls=policy.max_tool_calls if policy.max_tool_calls >= 0 else None,
337-
confidence_threshold=(
363+
try:
364+
stock_root = stock_rego_root or _find_stock_rego_root()
365+
for rego_file in stock_root.glob("*.rego"):
366+
if rego_file.name.endswith("_test.rego"):
367+
continue
368+
shutil.copy(rego_file, bundle_dir / rego_file.name)
369+
370+
blocked_pattern_regexes = [
371+
_pattern_to_regex(p) for p in policy.blocked_patterns
372+
]
373+
374+
# A non-finite confidence_threshold is invalid input. ``inf`` would
375+
# otherwise slip past the ``> 0`` guard in _render_rego and render
376+
# ``deny_if_low_confidence(Infinity)`` (invalid Rego/JSON that fails to
377+
# compile); ``nan`` would be silently dropped, so a caller who set a
378+
# threshold would get no confidence deny at all. Fail loudly instead.
379+
if policy.confidence_threshold is not None and not math.isfinite(
338380
policy.confidence_threshold
339-
if policy.confidence_threshold and policy.confidence_threshold > 0
340-
else None
341-
),
342-
blocked_patterns=blocked_pattern_regexes,
343-
require_human_approval=policy.require_human_approval,
344-
)
345-
rego_path = bundle_dir / f"{policy_id}.rego"
346-
rego_path.write_text(rego_source, encoding="utf-8")
381+
):
382+
raise ValueError(
383+
"confidence_threshold must be finite, got "
384+
f"{policy.confidence_threshold!r}"
385+
)
386+
387+
# AGT-M3 round-2 BLOCK A: ``max_tool_calls=0`` is the v4 sentinel for
388+
# "deny every tool call", not "no constraint". Forward 0 through to the
389+
# ``budgets.deny_if_budget_exceeded`` helper. The helper compares
390+
# ``tool_call_count >= limit`` so with ``limit=0`` and the default
391+
# ``tool_call_count=0`` the first call is denied with
392+
# ``budget_tool_calls_exceeded``, which preserves the v4 contract
393+
# end-to-end for any caller that loads the bridge manifest into
394+
# AgtRuntime directly (not just through the AdapterRuntimeBridge host
395+
# fallback). Previously a ``GovernancePolicy(max_tool_calls=0,
396+
# confidence_threshold=0.0)`` slipped through to the default ``allow``
397+
# verdict because the budget rule was omitted and the fallback
398+
# ``pre_tool_call`` binding (no ``tool_name_from``) never tripped any
399+
# deny rule. Keep ``max_tokens`` at ``> 0`` because the v4 dataclass
400+
# validation rejects ``max_tokens <= 0`` and there is no v4 wire value
401+
# to preserve there.
402+
rego_source = _render_rego(
403+
package="agt.governance_policy",
404+
max_tokens=policy.max_tokens if policy.max_tokens > 0 else None,
405+
max_tool_calls=(
406+
policy.max_tool_calls if policy.max_tool_calls >= 0 else None
407+
),
408+
confidence_threshold=(
409+
policy.confidence_threshold
410+
if policy.confidence_threshold and policy.confidence_threshold > 0
411+
else None
412+
),
413+
blocked_patterns=blocked_pattern_regexes,
414+
require_human_approval=policy.require_human_approval,
415+
)
416+
rego_path = bundle_dir / f"{policy_id}.rego"
417+
rego_path.write_text(rego_source, encoding="utf-8")
418+
except Exception:
419+
# Do not leave a half-built bundle behind (stock libs copied, generated
420+
# module missing) when pattern translation, rendering, or a write fails.
421+
# Only clean up a directory we created ourselves; a caller-supplied one
422+
# is theirs to manage.
423+
if created_bundle_dir:
424+
shutil.rmtree(bundle_dir, ignore_errors=True)
425+
raise
347426

348427
# bind_tools must also cover the case where the policy has a budget
349428
# or human-approval requirement but no explicit tool allowlist; without
Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
# Copyright (c) Microsoft Corporation.
2+
# Licensed under the MIT License.
3+
"""Unit tests for ``agt.policies.bridge`` pattern translation and bundle safety.
4+
5+
These avoid the v4 ``agent_os`` dependency (unlike ``test_bridge.py``) by
6+
exercising the bridge's pattern translation directly and by driving
7+
:func:`governance_to_acs_manifest` with a duck-typed policy fixture. They pin
8+
the fix for the ``GLOB`` fail-open: ``fnmatch.translate`` emits Python-only
9+
``(?s:...)`` / ``\\Z`` constructs that Go RE2 (OPA) rejects, so a ``GLOB``
10+
blocked pattern silently never matched.
11+
"""
12+
13+
from __future__ import annotations
14+
15+
import json
16+
import re
17+
import shutil
18+
import subprocess
19+
import tempfile
20+
import types
21+
from pathlib import Path
22+
23+
import pytest
24+
25+
from agt.policies.bridge import (
26+
_glob_to_re2,
27+
_pattern_to_regex,
28+
governance_to_acs_manifest,
29+
)
30+
31+
32+
def _opa_regex_match(pattern: str, subject: str, opa: str) -> bool | None:
33+
"""Evaluate ``regex.match(pattern, subject)`` under real OPA / Go RE2.
34+
35+
Returns the boolean result, or ``None`` when RE2 rejects the pattern
36+
(which is exactly what the old ``fnmatch.translate`` output triggered:
37+
the deny rule went undefined and a GLOB pattern silently failed open).
38+
"""
39+
rego = "package c\nimport rego.v1\nm := regex.match(input.p, input.s)\n"
40+
with tempfile.TemporaryDirectory() as d:
41+
rego_path = Path(d) / "c.rego"
42+
rego_path.write_text(rego, encoding="utf-8")
43+
proc = subprocess.run(
44+
[opa, "eval", "--stdin-input", "--data", str(rego_path),
45+
"--format", "json", "data.c.m"],
46+
input=json.dumps({"p": pattern, "s": subject}),
47+
capture_output=True,
48+
text=True,
49+
timeout=10,
50+
)
51+
if proc.returncode != 0:
52+
return None
53+
try:
54+
return json.loads(proc.stdout)["result"][0]["expressions"][0]["value"]
55+
except (KeyError, IndexError, ValueError):
56+
return None
57+
58+
59+
def _glob(value: str) -> tuple[str, types.SimpleNamespace]:
60+
return (value, types.SimpleNamespace(name="GLOB"))
61+
62+
63+
@pytest.mark.parametrize(
64+
"glob, subject, expect",
65+
[
66+
("*.exe", "payload.exe", True),
67+
("*.exe", "payload.txt", False),
68+
("secret?.log", "secret1.log", True),
69+
("secret?.log", "secret12.log", False),
70+
("[ab]*.sh", "a_run.sh", True),
71+
("[ab]*.sh", "c_run.sh", False),
72+
],
73+
)
74+
def test_glob_to_re2_matches_like_fnmatch(glob: str, subject: str, expect: bool) -> None:
75+
assert bool(re.match(_glob_to_re2(glob), subject)) is expect
76+
77+
78+
@pytest.mark.parametrize(
79+
"glob, subject",
80+
[
81+
("[!ab]x", "cx"), # negated class matches
82+
("[!ab]x", "ax"), # negated class excludes
83+
("a.c", "a.c"), # '.' is a literal in a glob (escaped)
84+
("a.c", "axc"), # ...so it must NOT match any char
85+
("a+b*", "a+bcd"), # '+' is a literal, escaped
86+
("f[oo", "f[oo"), # unclosed '[' is a literal
87+
("f[oo", "fXoo"),
88+
("*.tar.gz", "archive.tar.gz"),
89+
("data_?.csv", "data_9.csv"),
90+
("data_?.csv", "data_99.csv"),
91+
],
92+
)
93+
def test_glob_to_re2_agrees_with_fnmatchcase(glob: str, subject: str) -> None:
94+
# Cross-check the translator against Python's reference matcher (the
95+
# case-sensitive variant, matching our whole-string anchoring). RE2-safe
96+
# output that still mirrors glob semantics for every edge case.
97+
import fnmatch
98+
99+
assert bool(re.match(_glob_to_re2(glob), subject)) is fnmatch.fnmatchcase(
100+
subject, glob
101+
)
102+
103+
104+
def test_glob_output_is_re2_safe() -> None:
105+
# Go RE2 rejects ``\Z`` and the inline ``(?s:...)`` flag group that
106+
# ``fnmatch.translate`` produces; the translator must emit neither.
107+
rx = _glob_to_re2("*.exe")
108+
assert "\\Z" not in rx
109+
assert "(?s:" not in rx
110+
assert rx.startswith("(?s)^") and rx.endswith("$")
111+
112+
113+
def test_pattern_to_regex_dispatches_glob_to_re2() -> None:
114+
assert _pattern_to_regex(_glob("*.exe")) == _glob_to_re2("*.exe")
115+
116+
117+
@pytest.mark.parametrize(
118+
"subject, expect",
119+
[("payload.exe", True), ("payload.txt", False), ("dir/app.exe", True)],
120+
)
121+
def test_glob_re2_compiles_and_matches_under_opa(subject: str, expect: bool) -> None:
122+
"""Regression guard: the GLOB regex must compile AND match under Go RE2.
123+
124+
The previous ``fnmatch.translate`` output (``(?s:.*\\.exe)\\Z``) is rejected
125+
by RE2, so ``regex.match`` returned undefined (``None`` here) and the deny
126+
rule silently failed open. The existing suite never compiled a GLOB pattern
127+
under OPA, which is how the bug slipped through.
128+
"""
129+
opa = shutil.which("opa") or str(Path.home() / ".local" / "bin" / "opa")
130+
if not Path(opa).exists():
131+
pytest.skip("opa binary required for RE2 compatibility check")
132+
rx = _glob_to_re2("*.exe")
133+
result = _opa_regex_match(rx, subject, opa)
134+
assert result is expect, f"regex.match({rx!r}, {subject!r}) -> {result!r}"
135+
136+
137+
def test_substring_and_regex_patterns_unchanged() -> None:
138+
assert _pattern_to_regex("a.b/c") == re.escape("a.b/c")
139+
regex_entry = ("rm\\s+-rf", types.SimpleNamespace(name="REGEX"))
140+
assert _pattern_to_regex(regex_entry) == "rm\\s+-rf"
141+
142+
143+
class _Policy:
144+
name = "p"
145+
version = "1.0.0"
146+
max_tokens = 100
147+
max_tool_calls = 5
148+
allowed_tools: list[str] = []
149+
blocked_patterns: list = []
150+
require_human_approval = False
151+
confidence_threshold = 0.0
152+
153+
154+
@pytest.mark.parametrize("bad", [float("inf"), float("nan")])
155+
def test_non_finite_confidence_threshold_rejected(tmp_path: Path, bad: float) -> None:
156+
pol = _Policy()
157+
pol.confidence_threshold = bad
158+
with pytest.raises(ValueError):
159+
governance_to_acs_manifest(
160+
pol, bundle_dir=tmp_path / "bundle", stock_rego_root=tmp_path / "stock"
161+
)
162+
163+
164+
def test_created_bundle_dir_cleaned_up_on_failure(
165+
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
166+
) -> None:
167+
target = tmp_path / "agt_bridge_leak"
168+
monkeypatch.setattr(
169+
"agt.policies.bridge.tempfile.mkdtemp", lambda *a, **k: str(target)
170+
)
171+
pol = _Policy()
172+
# Non-string GLOB value makes _pattern_to_regex raise after the temp dir
173+
# was created and stock libs (none here) were processed.
174+
pol.blocked_patterns = [(123, types.SimpleNamespace(name="GLOB"))]
175+
with pytest.raises(ValueError):
176+
governance_to_acs_manifest(pol, stock_rego_root=tmp_path / "stock")
177+
assert not target.exists(), "self-created bundle dir must be removed on failure"

0 commit comments

Comments
 (0)