Skip to content

Commit 064dab8

Browse files
justrachclaude
andauthored
Bump v0.0.27: SIGINT partial telemetry on early exit (#385)
* Bump v0.0.27: SIGINT handler sends partial telemetry on early exit Catches Ctrl+C during swarm runs and sends partial telemetry if at least one worker completed — skips entirely if zero agents finished (bad data). Adds `"interrupted":true` field to telemetry JSON for interrupted runs. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * Bump npm/package.json to 0.0.27 for version consistency Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent c5b8b7f commit 064dab8

5 files changed

Lines changed: 226 additions & 7 deletions

File tree

build.zig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ pub fn build(b: *std.Build) void {
55
const optimize = b.standardOptimizeOption(.{});
66

77
// Version: defaults to build.zig.zon value; override with -Dversion=X.Y.Z at release
8-
const version = b.option([]const u8, "version", "Version string") orelse "0.0.26";
8+
const version = b.option([]const u8, "version", "Version string") orelse "0.0.27";
99
const build_options = b.addOptions();
1010
build_options.addOption([]const u8, "version", version);
1111

build.zig.zon

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
.name = .codedb,
1010
// This is a [Semantic Version](https://semver.org/).
1111
// In a future version of Zig it will be used for package deduplication.
12-
.version = "0.0.26",
12+
.version = "0.0.27",
1313
// Together with name, this represents a globally unique package
1414
// identifier. This field is generated by the Zig toolchain when the
1515
// package is first created, and then *never changes*. This allows

npm/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "devswarm",
3-
"version": "0.0.26",
3+
"version": "0.0.27",
44
"description": "Orchestrate AI coding agents (Claude Code, Codex, Gemini CLI) with GitHub ops, swarm spawning, and code graph intelligence — over MCP",
55
"keywords": ["mcp", "github", "ai", "claude", "codex", "swarm", "agents", "orchestration"],
66
"homepage": "https://github.com/justrach/codedb",

src/swarm.zig

Lines changed: 119 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,30 @@ const notify = @import("notify.zig");
2020
/// Hard ceiling on parallel agents regardless of what the caller requests.
2121
pub const HARD_MAX: u32 = 100;
2222

23+
// ── SIGINT handling for partial telemetry ──────────────────────────────────────
24+
var g_interrupted: std.atomic.Value(bool) = std.atomic.Value(bool).init(false);
25+
26+
fn sigintHandler(_: c_int) callconv(.c) void {
27+
g_interrupted.store(true, .release);
28+
}
29+
30+
fn installSigintHandler() void {
31+
const act = std.posix.Sigaction{
32+
.handler = .{ .handler = sigintHandler },
33+
.mask = std.posix.sigemptyset(),
34+
.flags = 0,
35+
};
36+
std.posix.sigaction(std.posix.SIG.INT, &act, null);
37+
}
38+
39+
fn restoreDefaultSigint() void {
40+
const act = std.posix.Sigaction{
41+
.handler = .{ .handler = null },
42+
.mask = std.posix.sigemptyset(),
43+
.flags = 0,
44+
};
45+
std.posix.sigaction(std.posix.SIG.INT, &act, null);
46+
}
2347
// ── Worker ────────────────────────────────────────────────────────────────────
2448

2549
const Worker = struct {
@@ -134,6 +158,10 @@ pub fn runSwarm(
134158
) void {
135159
const cap: usize = @min(max_agents, HARD_MAX);
136160

161+
// ── Install SIGINT handler for partial telemetry ──────────────────────────
162+
g_interrupted.store(false, .release);
163+
installSigintHandler();
164+
defer restoreDefaultSigint();
137165
// ── Phase 0: Announce swarm start ────────────────────────────────────────
138166
{
139167
var msg_buf: [256]u8 = undefined;
@@ -323,7 +351,58 @@ pub fn runSwarm(
323351
if (w.allocated_prompt) |p| alloc.free(p);
324352
}
325353

326-
// ── Phase 3b: Capture file manifest for writable swarms ──────────────
354+
// ── Check for early interruption ─────────────────────────────────────────
355+
if (g_interrupted.load(.acquire)) {
356+
// Count how many workers actually completed successfully
357+
var completed: usize = 0;
358+
for (worker_metrics[0..count]) |m| {
359+
if (m.success) completed += 1;
360+
}
361+
362+
if (completed > 0) {
363+
// Send partial telemetry — at least one agent finished
364+
var grid = telemetry.GridMetrics.init(alloc, "worker");
365+
for (worker_metrics[0..count]) |*m| {
366+
m.*.role = workers[m.worker_id].role;
367+
m.*.model = workers[m.worker_id].model;
368+
grid.addWorker(alloc, m.*) catch {};
369+
}
370+
swarm_telemetry.addGrid(grid) catch {};
371+
372+
const telemetry_json = swarm_telemetry.toJson(alloc, true);
373+
if (telemetry_json.len > 0) {
374+
std.debug.print("\n[telemetry:interrupted] {d}/{d} agents completed\n", .{ completed, count });
375+
std.debug.print("[telemetry] {s}\n", .{telemetry_json});
376+
telemetry.upload(alloc, telemetry_json);
377+
378+
if (telemetry_out) |path| {
379+
const file = if (std.fs.path.isAbsolute(path))
380+
std.fs.createFileAbsolute(path, .{}) catch null
381+
else
382+
std.fs.cwd().createFile(path, .{}) catch null;
383+
if (file) |f| {
384+
defer f.close();
385+
f.writeAll(telemetry_json) catch {};
386+
f.writeAll("\n") catch {};
387+
}
388+
}
389+
alloc.free(telemetry_json);
390+
}
391+
} else {
392+
std.debug.print("\n[telemetry:interrupted] no agents completed — skipping telemetry\n", .{});
393+
}
394+
395+
// Write partial results to output
396+
for (workers[0..count]) |*w| {
397+
if (w.out.items.len > 0) {
398+
out.appendSlice(alloc, w.out.items) catch {};
399+
out.appendSlice(alloc, "\n") catch {};
400+
}
401+
w.out.deinit(std.heap.page_allocator);
402+
}
403+
appendErr(alloc, out, "swarm interrupted by user (Ctrl+C)");
404+
return;
405+
}
327406
var manifest: []const u8 = "";
328407
var manifest_alloc: ?[]u8 = null;
329408
defer if (manifest_alloc) |m| alloc.free(m);
@@ -397,7 +476,7 @@ pub fn runSwarm(
397476
swarm_telemetry.addGrid(grid) catch {};
398477

399478

400-
const telemetry_json = swarm_telemetry.toJson(alloc);
479+
const telemetry_json = swarm_telemetry.toJson(alloc, false);
401480
if (telemetry_json.len > 0) {
402481
std.debug.print("\n[telemetry] {s}\n", .{telemetry_json});
403482

@@ -461,4 +540,42 @@ test "swarm: appendErr writes JSON error object" {
461540
try std.testing.expect(parsed.value == .object);
462541
const msg = parsed.value.object.get("error") orelse return error.MissingError;
463542
try std.testing.expectEqualStrings("something went wrong", msg.string);
543+
try std.testing.expectEqualStrings("something went wrong", msg.string);
544+
}
545+
546+
test "swarm: g_interrupted starts false" {
547+
try std.testing.expect(!g_interrupted.load(.acquire));
548+
}
549+
550+
test "swarm: sigint handler sets interrupted flag" {
551+
// Reset to known state
552+
g_interrupted.store(false, .release);
553+
try std.testing.expect(!g_interrupted.load(.acquire));
554+
555+
// Simulate what the signal handler does
556+
sigintHandler(0);
557+
558+
try std.testing.expect(g_interrupted.load(.acquire));
559+
560+
// Reset
561+
g_interrupted.store(false, .release);
562+
}
563+
564+
test "swarm: install and restore sigint handler" {
565+
// Should not crash
566+
installSigintHandler();
567+
restoreDefaultSigint();
568+
}
569+
570+
test "swarm: sigint handler is idempotent" {
571+
g_interrupted.store(false, .release);
572+
573+
// Multiple signals should not cause issues
574+
sigintHandler(0);
575+
sigintHandler(0);
576+
sigintHandler(0);
577+
578+
try std.testing.expect(g_interrupted.load(.acquire));
579+
580+
g_interrupted.store(false, .release);
464581
}

src/telemetry.zig

Lines changed: 104 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ pub const SwarmTelemetry = struct {
178178
}
179179
}
180180

181-
pub fn toJson(self: *Self, alloc: std.mem.Allocator) []const u8 {
181+
pub fn toJson(self: *Self, alloc: std.mem.Allocator, interrupted: bool) []const u8 {
182182
self.finalize();
183183

184184
var buf: std.ArrayList(u8) = .empty;
@@ -223,6 +223,10 @@ pub const SwarmTelemetry = struct {
223223
const pt_str = std.fmt.bufPrint(&ms_buf, "{d}", .{self.parallelism_theoretical}) catch "";
224224
buf.appendSlice(alloc, pt_str) catch return "";
225225

226+
if (interrupted) {
227+
buf.appendSlice(alloc, ",\"interrupted\":true") catch return "";
228+
}
229+
226230
buf.appendSlice(alloc, "}") catch return "";
227231

228232
return alloc.dupe(u8, buf.items) catch "";
@@ -443,7 +447,7 @@ test "telemetry: SwarmTelemetry toJson produces valid JSON" {
443447
t.parallelism_theoretical = 4;
444448

445449

446-
const json = t.toJson(alloc);
450+
const json = t.toJson(alloc, false);
447451
defer alloc.free(json);
448452

449453
const parsed = try std.json.parseFromSlice(std.json.Value, alloc, json, .{});
@@ -456,6 +460,104 @@ test "telemetry: SwarmTelemetry toJson produces valid JSON" {
456460
try std.testing.expect(obj.get("grids") != null);
457461
try std.testing.expect(obj.get("total_cost_usd") != null);
458462
try std.testing.expect(obj.get("total_wall_ms") != null);
463+
try std.testing.expect(obj.get("total_wall_ms") != null);
464+
// Non-interrupted should NOT have "interrupted" field
465+
try std.testing.expect(obj.get("interrupted") == null);
466+
}
467+
468+
test "telemetry: toJson with interrupted=true includes interrupted field" {
469+
const alloc = std.testing.allocator;
470+
var t = SwarmTelemetry.init(alloc, "interrupted task");
471+
defer t.deinit();
472+
473+
var grid = GridMetrics.init(alloc, "worker");
474+
var w = WorkerMetrics.init(0, "finder", "claude-sonnet-4-6");
475+
w.tokens_in = 1000;
476+
w.tokens_out = 500;
477+
w.wall_ms = 2000;
478+
w.success = true;
479+
try grid.addWorker(alloc, w);
480+
try t.addGrid(grid);
481+
482+
const json = t.toJson(alloc, true);
483+
defer alloc.free(json);
484+
485+
const parsed = try std.json.parseFromSlice(std.json.Value, alloc, json, .{});
486+
defer parsed.deinit();
487+
488+
const obj = &parsed.value.object;
489+
try std.testing.expect(obj.get("interrupted") != null);
490+
try std.testing.expect(obj.get("interrupted").?.bool == true);
491+
try std.testing.expectEqualStrings("interrupted task", obj.get("task").?.string);
492+
}
493+
494+
test "telemetry: toJson with interrupted=false omits interrupted field" {
495+
const alloc = std.testing.allocator;
496+
var t = SwarmTelemetry.init(alloc, "normal task");
497+
defer t.deinit();
498+
499+
const json = t.toJson(alloc, false);
500+
defer alloc.free(json);
501+
502+
const parsed = try std.json.parseFromSlice(std.json.Value, alloc, json, .{});
503+
defer parsed.deinit();
504+
505+
const obj = &parsed.value.object;
506+
try std.testing.expect(obj.get("interrupted") == null);
507+
}
508+
509+
test "telemetry: partial telemetry with mixed worker success" {
510+
const alloc = std.testing.allocator;
511+
var t = SwarmTelemetry.init(alloc, "partial swarm");
512+
defer t.deinit();
513+
514+
var grid = GridMetrics.init(alloc, "worker");
515+
516+
// Worker 0: completed successfully
517+
var w0 = WorkerMetrics.init(0, "finder", "claude-sonnet-4-6");
518+
w0.tokens_in = 3000;
519+
w0.tokens_out = 1200;
520+
w0.wall_ms = 5000;
521+
w0.tool_calls = 8;
522+
w0.success = true;
523+
try grid.addWorker(alloc, w0);
524+
525+
// Worker 1: interrupted mid-run (no output)
526+
var w1 = WorkerMetrics.init(1, "reviewer", "claude-sonnet-4-6");
527+
w1.tokens_in = 500;
528+
w1.tokens_out = 0;
529+
w1.wall_ms = 1200;
530+
w1.success = false;
531+
try grid.addWorker(alloc, w1);
532+
533+
// Worker 2: completed successfully
534+
var w2 = WorkerMetrics.init(2, "fixer", "claude-sonnet-4-6");
535+
w2.tokens_in = 2000;
536+
w2.tokens_out = 800;
537+
w2.wall_ms = 4000;
538+
w2.tool_calls = 5;
539+
w2.success = true;
540+
try grid.addWorker(alloc, w2);
541+
542+
try t.addGrid(grid);
543+
544+
const json = t.toJson(alloc, true);
545+
defer alloc.free(json);
546+
547+
const parsed = try std.json.parseFromSlice(std.json.Value, alloc, json, .{});
548+
defer parsed.deinit();
549+
550+
const obj = &parsed.value.object;
551+
try std.testing.expect(obj.get("interrupted").?.bool == true);
552+
553+
// Should have all 3 workers in the grid (including the failed one)
554+
const grids = obj.get("grids").?.array;
555+
try std.testing.expectEqual(@as(usize, 1), grids.items.len);
556+
const workers = grids.items[0].object.get("workers").?.array;
557+
try std.testing.expectEqual(@as(usize, 3), workers.items.len);
558+
559+
// Cost should still be calculated for workers that ran
560+
try std.testing.expect(obj.get("total_cost_usd").?.float > 0);
459561
}
460562

461563
test "telemetry: estimateCost returns expected values" {

0 commit comments

Comments
 (0)