Skip to content

Commit 13df78b

Browse files
authored
feat: implement new agent flow (#43)
1 parent 710b2f7 commit 13df78b

19 files changed

Lines changed: 3823 additions & 985 deletions

docs/superpowers/plans/2026-04-06-three-phase-agent-workflow.md

Lines changed: 1942 additions & 0 deletions
Large diffs are not rendered by default.

docs/superpowers/specs/2026-04-06-three-phase-workflow-design.md

Lines changed: 504 additions & 0 deletions
Large diffs are not rendered by default.

src/lib/dispatch.test.ts

Lines changed: 4 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,8 @@ vi.mock("workflow/api", () => ({
99
getRun: (...args: any[]) => mockGetRun(...args),
1010
}));
1111

12-
vi.mock("../workflows/implementation.js", () => ({
13-
implementationWorkflow: "implementationWorkflow_sentinel",
14-
}));
15-
16-
vi.mock("../workflows/review-fix.js", () => ({
17-
reviewFixWorkflow: "reviewFixWorkflow_sentinel",
12+
vi.mock("../workflows/agent.js", () => ({
13+
agentWorkflow: "agentWorkflow_sentinel",
1814
}));
1915

2016
const mockSandboxList = vi.fn();
@@ -102,7 +98,7 @@ describe("dispatchTicket", () => {
10298
mockStart.mockResolvedValue({ runId: "run_123" });
10399
});
104100

105-
it("dispatches implementation workflow when no PR exists", async () => {
101+
it("dispatches agentWorkflow for any ticket", async () => {
106102
const adapters = makeAdapters();
107103
const { dispatchTicket } = await import("./dispatch.js");
108104

@@ -114,32 +110,8 @@ describe("dispatchTicket", () => {
114110
expect.stringMatching(/^claiming:\d+$/),
115111
);
116112
expect(adapters.issueTracker.fetchTicket).toHaveBeenCalledWith("PROJ-42");
117-
expect(adapters.vcs.findPR).toHaveBeenCalledWith("blazebot/proj-42");
118-
expect(mockStart).toHaveBeenCalledWith("implementationWorkflow_sentinel", [
119-
"ticket-001",
120-
]);
121-
expect(adapters.runRegistry.register).toHaveBeenCalledWith(
122-
"PROJ-42",
123-
"run_123",
124-
);
125-
});
126-
127-
it("dispatches review-fix workflow when PR exists", async () => {
128-
const adapters = makeAdapters({
129-
findPR: vi.fn().mockResolvedValue({
130-
id: 7,
131-
url: "https://github.com/pr/7",
132-
branch: "blazebot/proj-42",
133-
}),
134-
});
135-
const { dispatchTicket } = await import("./dispatch.js");
136-
137-
const result = await dispatchTicket("PROJ-42", adapters, 5);
138-
139-
expect(result).toEqual({ started: true, runId: "run_123" });
140-
expect(mockStart).toHaveBeenCalledWith("reviewFixWorkflow_sentinel", [
113+
expect(mockStart).toHaveBeenCalledWith("agentWorkflow_sentinel", [
141114
"ticket-001",
142-
"blazebot/proj-42",
143115
]);
144116
expect(adapters.runRegistry.register).toHaveBeenCalledWith(
145117
"PROJ-42",

src/lib/dispatch.ts

Lines changed: 7 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import { start, getRun } from "workflow/api";
2-
import { implementationWorkflow } from "../workflows/implementation.js";
3-
import { reviewFixWorkflow } from "../workflows/review-fix.js";
2+
import { agentWorkflow } from "../workflows/agent.js";
43
import { logger } from "./logger.js";
54
import type { Adapters } from "./adapters.js";
65

@@ -25,7 +24,7 @@ export async function dispatchTicket(
2524
adapters: Adapters,
2625
maxConcurrentAgents: number,
2726
): Promise<DispatchResult> {
28-
const { issueTracker, vcs, runRegistry } = adapters;
27+
const { issueTracker, runRegistry } = adapters;
2928

3029
if (await runRegistry.isTicketFailed(ticketKey)) {
3130
logger.info({ ticketKey }, "dispatch_skipped_previously_failed");
@@ -45,9 +44,12 @@ export async function dispatchTicket(
4544

4645
try {
4746
const ticket = await issueTracker.fetchTicket(ticketKey);
48-
const branchName = `blazebot/${ticket.identifier.toLowerCase()}`;
4947

50-
const handle = await startWorkflow(ticket, branchName, vcs);
48+
const handle = await start(agentWorkflow, [ticket.id]);
49+
logger.info(
50+
{ ticketId: ticket.id, identifier: ticket.identifier, runId: handle.runId },
51+
"workflow_started",
52+
);
5153

5254
const claimStillHeld = await verifyClaimNotCancelled(
5355
ticketKey,
@@ -90,26 +92,6 @@ async function getActiveSandboxCount(): Promise<number> {
9092
}
9193
}
9294

93-
async function startWorkflow(
94-
ticket: { id: string; identifier: string },
95-
branchName: string,
96-
vcs: Adapters["vcs"],
97-
) {
98-
const existingPR = await vcs.findPR(branchName);
99-
100-
const handle = existingPR
101-
? await start(reviewFixWorkflow, [ticket.id, branchName])
102-
: await start(implementationWorkflow, [ticket.id]);
103-
104-
const workflowType = existingPR ? "review_fix" : "implementation";
105-
logger.info(
106-
{ ticketId: ticket.id, identifier: ticket.identifier, runId: handle.runId },
107-
`workflow_started_${workflowType}`,
108-
);
109-
110-
return handle;
111-
}
112-
11395
async function verifyClaimNotCancelled(
11496
ticketKey: string,
11597
expectedClaimValue: string,

src/lib/prompts.ts

Lines changed: 120 additions & 143 deletions
Large diffs are not rendered by default.

src/sandbox/agent-runner.test.ts

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@ import {
33
parseAgentOutput,
44
AGENT_SCHEMA,
55
type AgentOutput,
6+
parseResearchStatus,
7+
parseReviewOutput,
8+
REVIEW_SCHEMA,
69
} from "./agent-runner.js";
710

811
describe("parseAgentOutput", () => {
@@ -123,3 +126,97 @@ describe("AGENT_SCHEMA", () => {
123126
expect(() => JSON.parse(AGENT_SCHEMA)).not.toThrow();
124127
});
125128
});
129+
130+
describe("parseResearchStatus", () => {
131+
it("extracts completed status", () => {
132+
const raw = "STATUS: completed\n\n# Implementation Plan\n1. Create foo.ts";
133+
const { status, body } = parseResearchStatus(raw);
134+
expect(status).toBe("completed");
135+
expect(body).toContain("# Implementation Plan");
136+
});
137+
138+
it("extracts clarification_needed status", () => {
139+
const raw = "STATUS: clarification_needed\n\n1. What database?\n2. Which auth?";
140+
const { status, body } = parseResearchStatus(raw);
141+
expect(status).toBe("clarification_needed");
142+
expect(body).toContain("What database?");
143+
});
144+
145+
it("extracts failed status", () => {
146+
const raw = "STATUS: failed\n\nCould not access repository";
147+
const { status, body } = parseResearchStatus(raw);
148+
expect(status).toBe("failed");
149+
});
150+
151+
it("defaults to failed when no STATUS line", () => {
152+
const raw = "Here is my analysis of the codebase...";
153+
const { status, body } = parseResearchStatus(raw);
154+
expect(status).toBe("failed");
155+
expect(body).toContain("analysis");
156+
});
157+
158+
it("handles STATUS line with extra whitespace", () => {
159+
const raw = " STATUS: completed \n\nPlan here";
160+
const { status } = parseResearchStatus(raw);
161+
expect(status).toBe("completed");
162+
});
163+
});
164+
165+
describe("parseReviewOutput", () => {
166+
it("parses approved result", () => {
167+
const raw = JSON.stringify({
168+
result: "approved",
169+
feedback: "Looks good",
170+
issues: [],
171+
});
172+
const output = parseReviewOutput(raw);
173+
expect(output.result).toBe("approved");
174+
expect(output.feedback).toBe("Looks good");
175+
});
176+
177+
it("parses changes_requested result with issues", () => {
178+
const raw = JSON.stringify({
179+
result: "changes_requested",
180+
feedback: "Several issues found",
181+
issues: [
182+
{ file: "src/foo.ts", description: "Missing null check", severity: "critical" },
183+
],
184+
});
185+
const output = parseReviewOutput(raw);
186+
expect(output.result).toBe("changes_requested");
187+
expect(output.issues).toHaveLength(1);
188+
expect(output.issues[0].severity).toBe("critical");
189+
});
190+
191+
it("returns failed on unparseable output", () => {
192+
const output = parseReviewOutput("not json");
193+
expect(output.result).toBe("failed");
194+
expect(output.error).toBeDefined();
195+
});
196+
197+
it("returns failed on empty output", () => {
198+
const output = parseReviewOutput("");
199+
expect(output.result).toBe("failed");
200+
});
201+
202+
it("extracts from result envelope", () => {
203+
const envelope = JSON.stringify({
204+
type: "result",
205+
subtype: "success",
206+
is_error: false,
207+
structured_output: {
208+
result: "approved",
209+
feedback: "All good",
210+
issues: [],
211+
},
212+
});
213+
const output = parseReviewOutput(envelope);
214+
expect(output.result).toBe("approved");
215+
});
216+
});
217+
218+
describe("REVIEW_SCHEMA", () => {
219+
it("is valid JSON", () => {
220+
expect(() => JSON.parse(REVIEW_SCHEMA)).not.toThrow();
221+
});
222+
});

src/sandbox/agent-runner.ts

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,3 +103,111 @@ export function parseAgentOutput(raw: string): AgentOutput {
103103
error: `Agent output was not structured JSON. Output starts with: ${raw.slice(0, 500)}`,
104104
};
105105
}
106+
107+
// --- Research Status Parser ---
108+
109+
export type ResearchStatus = "completed" | "clarification_needed" | "failed";
110+
111+
export interface ResearchResult {
112+
status: ResearchStatus;
113+
body: string;
114+
}
115+
116+
const VALID_RESEARCH_STATUSES: ResearchStatus[] = ["completed", "clarification_needed", "failed"];
117+
118+
export function parseResearchStatus(raw: string): ResearchResult {
119+
const lines = raw.split("\n");
120+
const firstLine = lines[0]?.trim() ?? "";
121+
const match = firstLine.match(/^STATUS:\s*(\S+)/i);
122+
123+
if (match && VALID_RESEARCH_STATUSES.includes(match[1] as ResearchStatus)) {
124+
const body = lines.slice(1).join("\n").trim();
125+
return { status: match[1] as ResearchStatus, body };
126+
}
127+
128+
return { status: "failed", body: raw };
129+
}
130+
131+
// --- Review Output Schema ---
132+
133+
const reviewOutputSchema = z.object({
134+
result: z.enum(["approved", "changes_requested", "failed"]),
135+
feedback: z.string(),
136+
issues: z.array(z.object({
137+
file: z.string(),
138+
description: z.string(),
139+
severity: z.enum(["critical", "suggestion"]),
140+
})),
141+
error: z.string().optional(),
142+
});
143+
144+
export type ReviewOutput = z.infer<typeof reviewOutputSchema>;
145+
146+
export const REVIEW_SCHEMA = JSON.stringify({
147+
type: "object",
148+
properties: {
149+
result: {
150+
type: "string",
151+
enum: ["approved", "changes_requested", "failed"],
152+
},
153+
feedback: { type: "string" },
154+
issues: {
155+
type: "array",
156+
items: {
157+
type: "object",
158+
properties: {
159+
file: { type: "string" },
160+
description: { type: "string" },
161+
severity: { type: "string", enum: ["critical", "suggestion"] },
162+
},
163+
required: ["file", "description", "severity"],
164+
},
165+
},
166+
error: { type: "string" },
167+
},
168+
required: ["result", "feedback", "issues"],
169+
});
170+
171+
export function parseReviewOutput(raw: string): ReviewOutput {
172+
if (!raw.trim()) {
173+
return { result: "failed", feedback: "", issues: [], error: "Review agent produced no output" };
174+
}
175+
176+
// Direct parse
177+
try {
178+
const direct = reviewOutputSchema.safeParse(JSON.parse(raw));
179+
if (direct.success) return direct.data;
180+
} catch {}
181+
182+
// Stream-json / result-envelope format
183+
const lines = raw.split("\n").filter(Boolean);
184+
for (let i = lines.length - 1; i >= 0; i--) {
185+
try {
186+
const event = JSON.parse(lines[i]);
187+
188+
if (event.type === "result" && event.structured_output != null) {
189+
const parsed = reviewOutputSchema.safeParse(event.structured_output);
190+
if (parsed.success) return parsed.data;
191+
}
192+
193+
const direct = reviewOutputSchema.safeParse(event);
194+
if (direct.success) return direct.data;
195+
} catch {}
196+
}
197+
198+
// Fallback: extract JSON objects
199+
const objects = raw.matchAll(/\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}/g);
200+
for (const [candidate] of objects) {
201+
try {
202+
const result = reviewOutputSchema.safeParse(JSON.parse(candidate));
203+
if (result.success) return result.data;
204+
} catch {}
205+
}
206+
207+
return {
208+
result: "failed",
209+
feedback: "",
210+
issues: [],
211+
error: `Review output was not structured JSON. Output starts with: ${raw.slice(0, 500)}`,
212+
};
213+
}

0 commit comments

Comments
 (0)