feat(sdk): Tool.terminal flag + Agent._plan hook (0.13.0)

Shangri-la-0428 · claude · Shangri-la-0428 · commit 39568c2170f1 · 2026-04-12T06:27:16.000+08:00
Two seams for deployments that need to extend the pipeline without
forking the SDK:

- Tool.terminal: marks a tool as turn-completing. The Generator's
  tool loop breaks after a successful terminal call so the LLM
  cannot re-emit the same write (e.g. comment_on_post) across
  subsequent rounds. Fixes the duplicate-reply class of bugs where
  one mention produced 2-3 copies of the same comment.

- Agent._plan hook: symmetric with _perceive/_enrich. Default
  delegates to planner.plan (the SDK rule engine). Subclasses
  override to layer per-user or per-deployment rules on top while
  keeping Psyche + Thronglets-driven decisions intact.

run_pipeline takes an optional plan_fn callable; base.Agent wires
it to self._plan in process(). Pure additions — existing callers
are unaffected.

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,43 @@
 # Changelog
 
+## [0.13.0] - 2026-04-12
+
+### Added
+
+- **`Tool.terminal` flag** — write-side actions (social posts, comment
+  replies, anything that completes a turn) can be marked terminal at
+  registration time:
+
+      registry.register("comment_on_post", schema, handler, terminal=True)
+
+  ``ToolRegistry.is_terminal(name)`` exposes the flag for the tool
+  loop. Default is ``False``; existing tools register unchanged.
+
+- **`Agent._plan` hook** — symmetric with `_perceive`, `_enrich`,
+  `_build_prompt`, etc. Default delegates to ``planner.plan`` so the
+  built-in Psyche/Thronglets-driven Plan stays the baseline. Subclasses
+  override to layer deployment-specific rules on top — for example
+  ``oasyce-samantha 0.2.0`` uses this seam for per-user standing
+  rules without touching the SDK Planner.
+
+- **`run_pipeline(plan_fn=...)` parameter** — accepts a custom
+  Plan-producing callable. ``None`` falls back to the SDK default.
+  This is the pipeline-side counterpart of ``Agent._plan``.
+
+### Fixed
+
+- **Tool loop no longer duplicates terminal calls.** ``Agent._generate``
+  now ends the turn after a successful terminal-tool batch, instead of
+  giving the LLM another round to re-emit the same write. This was the
+  root cause of Samantha posting 2-3 duplicate comments on a single
+  ``mention`` with an image attached: vision models occasionally
+  re-evaluate after a tool call and re-issue the same ``comment_on_post``
+  in round 2/3 of the loop.
+
+  The fix is opt-in via ``Tool.terminal=True`` — non-terminal read
+  tools (memory recall, balance query) still chain freely so the LLM
+  can synthesise a final answer from a tool result.
+
 ## [0.12.0] - 2026-04-11
 
 ### Removed
diff --git a/oasyce_sdk/agent/base.py b/oasyce_sdk/agent/base.py
@@ -24,6 +24,7 @@
 Overridable hooks (all prefixed with ``_``):
 
   ``_perceive``       : stimulus → Perception (default: identity.perceive)
+  ``_plan``           : (stimulus, perception) → Plan (default: planner.plan)
   ``_enrich``         : Plan-driven context gathering (default: images only)
   ``_build_prompt``   : format the stimulus for the LLM (default: raw content)
   ``_get_llm``        : pick an LLM slot for this stimulus (default: registry)
@@ -47,7 +48,7 @@
 
 from .channel import Channel
 from .pipeline import EnrichContext, run_pipeline
-from .planner import Plan
+from .planner import Plan, plan as default_plan
 from .stimulus import Stimulus
 from .tools import ToolContext, ToolRegistry
 
@@ -135,6 +136,7 @@ def process(self, stimulus: Stimulus) -> str | None:
             reflect=self._reflect,
             constitution=self.constitution,
             tool_registry=self._tools,
+            plan_fn=self._plan,
         )
 
     def close(self) -> None:
@@ -164,6 +166,24 @@ def _perceive(self, stimulus: Stimulus) -> "Perception":
         context = f"{stimulus.kind}: {stimulus.content[:200]}"
         return self.identity.perceive(context)
 
+    def _plan(self, stimulus: Stimulus, perception: "Perception") -> Plan:
+        """(Stimulus, Perception) → Plan. Pure, zero cost.
+
+        Default delegates to ``planner.plan`` — the SDK rule engine
+        driven by Psyche ResponseContract + Thronglets ambient priors.
+        Subclasses override to layer their own rules on top, e.g.::
+
+            def _plan(self, stimulus, perception):
+                p = super()._plan(stimulus, perception)
+                if stimulus.kind == "chat" and stimulus.sender_id:
+                    self.session(stimulus.sender_id).rules.apply(stimulus, p)
+                return p
+
+        The hook receives both stimulus and perception so user-rule
+        layers can match on text *and* on Psyche state.
+        """
+        return default_plan(stimulus, perception)
+
     def _enrich(self, stimulus: Stimulus, plan: Plan) -> EnrichContext:
         """Gather per-stimulus context. Default: images only.
 
@@ -222,8 +242,14 @@ def _generate(
           1. Call LLM with ``messages`` + ``tools``
           2. If no tool calls, return the text
           3. Else execute each tool call via ``self._tools.execute``,
-             append results as messages, and loop
-          4. Bail after ``TOOL_LOOP_MAX_ROUNDS`` iterations
+             append results as messages
+          4. If any tool call in this round was *terminal* (a write-side
+             action that completes the turn — social posts, replies),
+             break the loop. The LLM should not get a second chance to
+             re-emit the same write on the same turn — that is exactly
+             how a single ``mention`` ended up producing two or three
+             duplicate comments.
+          5. Otherwise loop, bailing after ``TOOL_LOOP_MAX_ROUNDS``
 
         On LLM exception while images are in the prompt, retry once
         text-only — a common OpenAI-compat failure mode where the
@@ -248,6 +274,7 @@ def _generate(
                     raise
             if not resp.tool_calls:
                 return resp.text
+            terminal_called = False
             for tc in resp.tool_calls:
                 self._inject_tool_defaults(tc, stimulus)
                 result = self._tools.execute(tc.name, tc.arguments, tool_ctx)
@@ -260,6 +287,14 @@ def _generate(
                     "role": "user",
                     "content": f"[Tool result for {tc.name}]: {result}",
                 })
+                if self._tools.is_terminal(tc.name):
+                    terminal_called = True
+            if terminal_called:
+                logger.info(
+                    "%s terminal tool fired, ending turn after this round",
+                    stimulus.kind,
+                )
+                return resp.text
         return resp.text if resp else ""
 
     def _inject_tool_defaults(
diff --git a/oasyce_sdk/agent/pipeline.py b/oasyce_sdk/agent/pipeline.py
@@ -18,7 +18,7 @@
 
 from .context import ConversationMessage, build_messages
 from .evaluator import evaluate as evaluate_response
-from .planner import Plan, plan as make_plan
+from .planner import Plan, plan as default_plan
 
 if TYPE_CHECKING:
     from .llm import LLMProvider
@@ -63,6 +63,7 @@ def run_pipeline(
     reflect: Callable[["Stimulus", str, "Perception"], None],
     constitution: str,
     tool_registry: "ToolRegistry",
+    plan_fn: Callable[["Stimulus", "Perception"], Plan] | None = None,
 ) -> str | None:
     """Run Perceive → Plan → Enrich → Generate → Evaluate → Deliver → Reflect.
 
@@ -71,6 +72,10 @@ def run_pipeline(
 
     The Plan is derived from the Perception — Psyche ResponseContract +
     Thronglets ambient priors — and then refined by GenerationControls.
+    Deployments can pass a custom ``plan_fn`` callable to layer their
+    own rules (e.g. user-defined standing instructions) on top of the
+    built-in rule engine; ``None`` falls back to ``planner.plan`` —
+    the SDK's pure rule engine.
     """
     logger.info(
         "pipeline: %s sender=%s post=%s",
@@ -81,7 +86,7 @@ def run_pipeline(
     perception = perceive(stimulus)
 
     # 2. Plan — zero cost, rule engine driven by Psyche + Thronglets
-    plan = make_plan(stimulus, perception)
+    plan = (plan_fn or default_plan)(stimulus, perception)
 
     # Psyche GenerationControls are hard limits layered over the Plan
     gc = perception.generation_controls
diff --git a/oasyce_sdk/agent/tools.py b/oasyce_sdk/agent/tools.py
@@ -73,11 +73,23 @@ class ToolContext:
 
 @dataclass
 class Tool:
-    """One tool: a schema the LLM sees, and a handler we execute."""
+    """One tool: a schema the LLM sees, and a handler we execute.
+
+    Fields:
+      name, schema, handler — the dispatch triple.
+      terminal — write-side actions that complete the turn. The
+        Generator's tool loop breaks immediately after a successful
+        terminal call, preventing the LLM from re-emitting the same
+        action across rounds (e.g. posting the same comment 2-3 times
+        on a single mention). Read tools (memory recall, balance query)
+        stay non-terminal so the LLM can synthesise a final answer
+        from the result.
+    """
 
     name: str
     schema: dict[str, Any]
     handler: Callable[[dict[str, Any], ToolContext], str]
+    terminal: bool = False
 
 
 class ToolRegistry:
@@ -91,8 +103,12 @@ def register(
         name: str,
         schema: dict[str, Any],
         handler: Callable[[dict[str, Any], ToolContext], str],
+        *,
+        terminal: bool = False,
     ) -> None:
-        self._tools[name] = Tool(name=name, schema=schema, handler=handler)
+        self._tools[name] = Tool(
+            name=name, schema=schema, handler=handler, terminal=terminal,
+        )
 
     @property
     def definitions(self) -> list[dict[str, Any]]:
@@ -104,6 +120,15 @@ def select(self, names: list[str] | None) -> list[dict[str, Any]]:
             return self.definitions
         return [t.schema for t in self._tools.values() if t.name in names]
 
+    def is_terminal(self, name: str) -> bool:
+        """Whether ``name`` is a terminal (turn-completing) tool.
+
+        Unknown names return False — the loop continues, the unknown-tool
+        error is surfaced to the LLM via ``execute``.
+        """
+        tool = self._tools.get(name)
+        return bool(tool and tool.terminal)
+
     def execute(
         self,
         name: str,
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "oasyce-sdk"
-version = "0.12.0"
+version = "0.13.0"
 description = "Python SDK for Oasyce -- On-chain economic system for AI agents: escrow, service marketplace, data rights, reputation, dispute resolution"
 readme = "README.md"
 requires-python = ">=3.10"
diff --git a/tests/test_agent_modules.py b/tests/test_agent_modules.py