diff --git a/.changeset/swarm-agent-orchestration.md b/.changeset/swarm-agent-orchestration.md new file mode 100644 index 00000000..8ad07664 --- /dev/null +++ b/.changeset/swarm-agent-orchestration.md @@ -0,0 +1,6 @@ +--- +"@moonshot-ai/agent-core": minor +"@moonshot-ai/kimi-code": minor +--- + +Add `/swarm` command and Swarm tool: decompose a task into parallel role-specialized subagents and synthesize their results. diff --git a/apps/kimi-code/src/tui/commands/dispatch.ts b/apps/kimi-code/src/tui/commands/dispatch.ts index 3bd878b0..e47fe0d7 100644 --- a/apps/kimi-code/src/tui/commands/dispatch.ts +++ b/apps/kimi-code/src/tui/commands/dispatch.ts @@ -42,6 +42,7 @@ import { handleInitCommand, handleTitleCommand, } from './session'; +import { handleSwarmCommand } from './swarm'; // --------------------------------------------------------------------------- // Re-exports — keep existing consumers working @@ -255,6 +256,9 @@ async function handleBuiltInSlashCommand( case 'plan': await handlePlanCommand(host, args); return; + case 'swarm': + await handleSwarmCommand(host, args); + return; case 'compact': await handleCompactCommand(host, args); return; diff --git a/apps/kimi-code/src/tui/commands/registry.ts b/apps/kimi-code/src/tui/commands/registry.ts index faf76b57..ab771269 100644 --- a/apps/kimi-code/src/tui/commands/registry.ts +++ b/apps/kimi-code/src/tui/commands/registry.ts @@ -36,6 +36,13 @@ export const BUILTIN_SLASH_COMMANDS = [ priority: 100, availability: (args) => (args.trim().toLowerCase() === 'clear' ? 'idle-only' : 'always'), }, + { + name: 'swarm', + aliases: [], + description: 'Run a task as a parallel agent swarm', + priority: 100, + availability: 'idle-only', + }, { name: 'model', aliases: [], diff --git a/apps/kimi-code/src/tui/commands/swarm.ts b/apps/kimi-code/src/tui/commands/swarm.ts new file mode 100644 index 00000000..b5c52b6a --- /dev/null +++ b/apps/kimi-code/src/tui/commands/swarm.ts @@ -0,0 +1,39 @@ +import { NO_ACTIVE_SESSION_MESSAGE } from '../constant/kimi-tui'; +import { formatErrorMessage } from '../utils/event-payload'; +import type { SlashCommandHost } from './dispatch'; + +export function buildSwarmPrompt(task: string): string { + return [ + 'Use the Swarm tool to accomplish the following task.', + 'Call the Swarm tool exactly once with this task as its `task` argument; do not do the work yourself.', + '', + 'Task:', + task, + ].join('\n'); +} + +export async function handleSwarmCommand(host: SlashCommandHost, args: string): Promise { + const session = host.session; + if (session === undefined) { + host.showError(NO_ACTIVE_SESSION_MESSAGE); + return; + } + const task = args.trim(); + if (task.length === 0) { + host.showError('Usage: /swarm '); + return; + } + // Route through the same session-request lifecycle as a normal send / + // skill activation rather than calling session.prompt raw. beginSessionRequest + // flips streamingPhase out of 'idle' synchronously, so the input gate closes + // immediately and shows the waiting pane; otherwise, during the window before + // turn.started arrives the UI still thinks it is idle and a fast follow-up + // message could be dispatched as a second concurrent prompt and be silently + // dropped as agent_busy. + host.beginSessionRequest(); + try { + await session.prompt(buildSwarmPrompt(task)); + } catch (error) { + host.failSessionRequest(`Failed to start swarm: ${formatErrorMessage(error)}`); + } +} diff --git a/apps/kimi-code/src/tui/components/messages/swarm-dashboard-model.ts b/apps/kimi-code/src/tui/components/messages/swarm-dashboard-model.ts new file mode 100644 index 00000000..0c99830b --- /dev/null +++ b/apps/kimi-code/src/tui/components/messages/swarm-dashboard-model.ts @@ -0,0 +1,271 @@ +export type SwarmPhase = 'planning' | 'working' | 'synthesizing' | 'done' | 'cancelled' | 'failed'; +export type WorkerStatus = 'running' | 'done' | 'failed' | 'retrying' | 'dropped'; + +export interface WorkerRow { + id: string; + role: string; + status: WorkerStatus; + toolCount: number; + latestActivity?: string; + tokens?: number; + error?: string; +} + +export interface SwarmModel { + task: string; + phase: SwarmPhase; + total: number; + doneCount: number; + failedCount: number; + droppedCount: number; + workers: Map; + /** Set when phase is 'failed': the reason the whole swarm errored out. */ + failureMessage?: string; +} + +export type SwarmEvent = + | { t: 'planned'; total: number } + | { t: 'synthesizing' } + | { t: 'done'; succeeded: number; failed: number } + | { t: 'cancelled' } + | { t: 'failed'; message: string } + | { t: 'worker.spawned'; id: string; role: string } + | { t: 'worker.toolcall'; id: string; activity: string } + | { t: 'worker.tokens'; id: string; tokens: number } + | { t: 'worker.done'; id: string; tokens?: number } + | { t: 'worker.failed'; id: string; error: string } + | { t: 'worker.retrying'; role: string } + | { t: 'worker.reassigned'; fromRole: string; toRole: string } + | { t: 'worker.dropped'; role: string; reason: string }; + +export function initialSwarmModel(task: string): SwarmModel { + return { + task, + phase: 'planning', + total: 0, + doneCount: 0, + failedCount: 0, + droppedCount: 0, + workers: new Map(), + }; +} + +/** + * Which summary counter (if any) a worker status contributes to. `running` and + * `retrying` are in-flight states that count toward nothing; the three terminal + * states each map to exactly one counter. Used to keep `doneCount`/ + * `failedCount`/`droppedCount` consistent as a row transitions across attempts + * (e.g. failed → retrying → running → done) without ever double-counting. + */ +function countKeyFor(status: WorkerStatus): 'doneCount' | 'failedCount' | 'droppedCount' | null { + if (status === 'done') return 'doneCount'; + if (status === 'failed') return 'failedCount'; + if (status === 'dropped') return 'droppedCount'; + // 'running' and 'retrying' are in-flight states — they count toward nothing. + return null; +} + +/** Counter adjustments to move a row from `prev` to `next` status. */ +function countAdjustments( + prev: WorkerStatus, + next: WorkerStatus, +): Partial> { + const from = countKeyFor(prev); + const to = countKeyFor(next); + if (from === to) return {}; + const adj: Partial> = {}; + if (from !== null) adj[from] = -1; + if (to !== null) adj[to] = (adj[to] ?? 0) + 1; + return adj; +} + +/** Apply count deltas onto a model, clamping at zero. */ +function withCounts( + model: SwarmModel, + adj: Partial>, +): Pick { + return { + doneCount: Math.max(0, model.doneCount + (adj.doneCount ?? 0)), + failedCount: Math.max(0, model.failedCount + (adj.failedCount ?? 0)), + droppedCount: Math.max(0, model.droppedCount + (adj.droppedCount ?? 0)), + }; +} + +/** A status the recovery loop can collapse a re-spawn onto (one row per role). */ +function isReusableForRespawn(status: WorkerStatus): boolean { + return status === 'failed' || status === 'dropped' || status === 'retrying'; +} + +export function applySwarmEvent(model: SwarmModel, event: SwarmEvent): SwarmModel { + switch (event.t) { + case 'planned': + return { ...model, phase: 'working', total: event.total }; + case 'synthesizing': + return { ...model, phase: 'synthesizing' }; + case 'done': + return { ...model, phase: 'done' }; + case 'cancelled': + return { ...model, phase: 'cancelled' }; + case 'failed': + return { ...model, phase: 'failed', failureMessage: event.message }; + case 'worker.spawned': { + if (model.workers.has(event.id)) return model; + const workers = new Map(model.workers); + // Recovery: if a row for this role exists in a terminal/retrying state, a + // re-spawn is the SAME subtask running again. Reuse that row (re-key it to + // the new subagent id, reset to running, clear the error) so the role keeps + // a single dashboard row across attempts instead of accumulating duplicates. + // Running rows are never reused, so single-run same-role fan-out is intact. + const prior = findReusableRoleRow(model.workers, event.role); + if (prior !== undefined) { + workers.delete(prior.id); + workers.set(event.id, { id: event.id, role: event.role, status: 'running', toolCount: 0 }); + return { + ...model, + workers, + ...withCounts(model, countAdjustments(prior.status, 'running')), + }; + } + workers.set(event.id, { id: event.id, role: event.role, status: 'running', toolCount: 0 }); + return { ...model, workers }; + } + case 'worker.toolcall': { + const workers = new Map(model.workers); + const w = workers.get(event.id); + if (w !== undefined) { + workers.set(event.id, { ...w, toolCount: w.toolCount + 1, latestActivity: event.activity }); + } + return { ...model, workers }; + } + case 'worker.tokens': { + const w = model.workers.get(event.id); + if (w === undefined) return model; + const workers = new Map(model.workers); + workers.set(event.id, { ...w, tokens: event.tokens }); + return { ...model, workers }; + } + case 'worker.done': { + const workers = new Map(model.workers); + const w = workers.get(event.id); + if (w === undefined) return model; + workers.set(event.id, { + ...w, + status: 'done', + latestActivity: undefined, + ...(event.tokens !== undefined ? { tokens: event.tokens } : {}), + }); + return { ...model, workers, ...withCounts(model, countAdjustments(w.status, 'done')) }; + } + case 'worker.failed': { + const workers = new Map(model.workers); + const w = workers.get(event.id); + if (w === undefined) return model; + workers.set(event.id, { ...w, status: 'failed', latestActivity: undefined, error: event.error }); + return { ...model, workers, ...withCounts(model, countAdjustments(w.status, 'failed')) }; + } + case 'worker.retrying': { + // The coordinator decided to re-run this role's subtask. Keep its row + // visible but mark it retrying (an in-flight, uncounted state) so the + // re-spawn can collapse onto it. Carries no subagent id, so we match by + // role against the most recent terminal/retrying row. + const prior = findReusableRoleRow(model.workers, event.role); + if (prior === undefined || prior.status === 'retrying') return model; + const workers = new Map(model.workers); + const adj = countAdjustments(prior.status, 'retrying'); + workers.set(prior.id, { ...prior, status: 'retrying', latestActivity: undefined }); + return { ...model, workers, ...withCounts(model, adj) }; + } + case 'worker.reassigned': { + // The reviser moved this subtask to a new role. Re-key the SAME row from + // the old role to the new one and mark it retrying so the subsequent + // worker.spawned for the new role reuses THIS row (one row per subtask) + // instead of stranding the old-role row in 'retrying' forever. If no + // old-role row exists, no-op — there is nothing to correlate. + const prior = findReusableRoleRow(model.workers, event.fromRole); + if (prior === undefined) return model; + const workers = new Map(model.workers); + const adj = countAdjustments(prior.status, 'retrying'); + workers.set(prior.id, { + ...prior, + role: event.toRole, + status: 'retrying', + latestActivity: undefined, + error: undefined, + }); + return { ...model, workers, ...withCounts(model, adj) }; + } + case 'worker.dropped': { + // The coordinator gave up on this role's subtask. Mark its row dropped + // (or create a dropped row if the subtask never spawned a worker) and + // record the reason. + const prior = findReusableRoleRow(model.workers, event.role) ?? findRoleRow(model.workers, event.role); + const workers = new Map(model.workers); + if (prior === undefined) { + // No row yet (dropped before ever spawning): synthesize one keyed by the + // role so the gap is visible. A role label collides with no subagent id. + workers.set(event.role, { + id: event.role, + role: event.role, + status: 'dropped', + toolCount: 0, + error: event.reason, + }); + return { ...model, workers, ...withCounts(model, countAdjustments('running', 'dropped')) }; + } + workers.set(prior.id, { ...prior, status: 'dropped', latestActivity: undefined, error: event.reason }); + return { ...model, workers, ...withCounts(model, countAdjustments(prior.status, 'dropped')) }; + } + default: + return model; + } +} + +/** Most recently inserted row for a role (any status), or undefined. */ +function findRoleRow(workers: Map, role: string): WorkerRow | undefined { + let match: WorkerRow | undefined; + for (const w of workers.values()) { + if (w.role === role) match = w; + } + return match; +} + +/** + * Most recently inserted row for a role that a re-spawn or revise can collapse + * onto (terminal or retrying). Running rows are skipped so concurrent same-role + * workers in a single run keep distinct rows. + */ +function findReusableRoleRow(workers: Map, role: string): WorkerRow | undefined { + let match: WorkerRow | undefined; + for (const w of workers.values()) { + if (w.role === role && isReusableForRespawn(w.status)) match = w; + } + return match; +} + +export function workerActivityFromTool(name: string, args: Record): string { + const s = (v: unknown): string | undefined => (typeof v === 'string' ? v : undefined); + switch (name) { + case 'Read': { + const p = s(args['path']); + return p !== undefined ? `read ${p}` : 'read'; + } + case 'Grep': { + const p = s(args['pattern']); + return p !== undefined ? `grep "${p}"` : 'grep'; + } + case 'Glob': { + const p = s(args['pattern']); + return p !== undefined ? `glob ${p}` : 'glob'; + } + case 'WebSearch': { + const q = s(args['query']); + return q !== undefined ? `search "${q}"` : 'search'; + } + case 'FetchURL': { + const u = s(args['url']); + return u !== undefined ? `fetch ${u}` : 'fetch'; + } + default: + return name; + } +} diff --git a/apps/kimi-code/src/tui/components/messages/tool-call.ts b/apps/kimi-code/src/tui/components/messages/tool-call.ts index d0fcc1f6..8ddd135d 100644 --- a/apps/kimi-code/src/tui/components/messages/tool-call.ts +++ b/apps/kimi-code/src/tui/components/messages/tool-call.ts @@ -25,10 +25,19 @@ import { decodeMcpToolName } from '#/tui/utils/mcp-tool-name'; import { PlanBoxComponent } from './plan-box'; import { ShellExecutionComponent } from './shell-execution'; +import { + applySwarmEvent, + initialSwarmModel, + type SwarmEvent, + type SwarmModel, + type WorkerRow, +} from './swarm-dashboard-model'; import { countNonEmptyLines, pickChip } from './tool-renderers/chip'; import { pickResultRenderer } from './tool-renderers/registry'; const MAX_ARG_LENGTH = 60; +/** Keeps a running swarm worker's activity to a single dashboard line. */ +const SWARM_ACTIVITY_MAX_LENGTH = 48; const MAX_SUB_TOOL_CALLS_SHOWN = 4; const MAX_SINGLE_SUBAGENT_TOOL_ROWS = 4; const APPROVED_PLAN_MARKER = '## Approved Plan:'; @@ -521,6 +530,17 @@ export class ToolCallComponent extends Container { private progressLines: string[] = []; private static readonly MAX_PROGRESS_LINES = 24; + // ── Swarm dashboard state ──────────────────────────────────────── + // + // Populated only when this tool call is the `Swarm` coordinator. The pure + // reducer in `swarm-dashboard-model` folds `applySwarm(event)` into this + // model; the body-building path renders the dashboard (one or two gutter + // lines per worker, mirroring `AgentGroupComponent`) instead of the normal + // progress/sub-tool/subagent blocks. No animated, per-render content is used + // so the rendered lines stay stable and pi-tui's differential renderer never + // re-emits the card into scrollback. + private swarmModel: SwarmModel | undefined; + /** * Registered by a group container (`AgentGroupComponent` or * `ReadGroupComponent`) when this component is borrowed as a hidden state @@ -545,6 +565,9 @@ export class ToolCallComponent extends Container { this.colors = colors; this.ui = ui; this.markdownTheme = markdownTheme; + if (toolCall.name === 'Swarm') { + this.swarmModel = initialSwarmModel(str(toolCall.args['task'])); + } this.applySubagentReplay(toolCall.subagent); this.addChild(new Spacer(1)); @@ -587,6 +610,7 @@ export class ToolCallComponent extends Container { // authoritative final state. Without this clear, a finished tool would // show both the streamed status lines and the final output stacked. this.progressLines = []; + this.finalizeSwarmModelIfNeeded(result); this.finalizeSubagentElapsedIfNeeded(); this.syncStreamingProgressTimer(); this.syncSubagentElapsedTimer(); @@ -629,6 +653,51 @@ export class ToolCallComponent extends Container { this.ui?.requestRender(); } + /** True when this tool call drives the `Swarm` coordinator dashboard. */ + isSwarm(): boolean { + return this.toolCall.name === 'Swarm'; + } + + /** + * Fold a swarm dashboard event into the model and re-render in place. + * No-ops for non-swarm tool calls so callers can route blindly. Mirrors + * {@link ToolCallComponent.appendProgress} so the swarm card stays a single, + * stable component managed by the normal tool-call lifecycle. + */ + applySwarm(event: SwarmEvent): void { + if (this.swarmModel === undefined) return; + this.swarmModel = applySwarmEvent(this.swarmModel, event); + this.headerText.setText(this.buildHeader()); + this.rebuildBody(); + this.notifySnapshotChange(); + this.ui?.requestRender(); + } + + /** + * Drives the swarm dashboard to its terminal state when the tool result + * lands. An ordinary failure (planner/synthesizer error) has already driven + * the model to 'failed' via a progress event carrying the reason, so leave it + * be; only a genuine abort/cancel reaches an error result still non-terminal, + * so finalize that as cancelled. A success result ensures the header shows + * the summary even if the `done` progress event was missed. + */ + private finalizeSwarmModelIfNeeded(result: ToolResultBlockData): void { + if (this.swarmModel === undefined) return; + if (result.is_error === true) { + if (this.swarmModel.phase !== 'failed') { + this.swarmModel = applySwarmEvent(this.swarmModel, { t: 'cancelled' }); + } + return; + } + if (this.swarmModel.phase !== 'done' && this.swarmModel.phase !== 'cancelled') { + this.swarmModel = applySwarmEvent(this.swarmModel, { + t: 'done', + succeeded: this.swarmModel.doneCount, + failed: this.swarmModel.failedCount, + }); + } + } + dispose(): void { this.stopStreamingProgressTimer(); this.stopSubagentElapsedTimer(); @@ -1156,6 +1225,10 @@ export class ToolCallComponent extends Container { bullet = chalk.hex(colors.roleAssistant)(STATUS_BULLET); } + if (this.swarmModel !== undefined) { + return this.buildSwarmHeader(); + } + if (toolCall.name === 'ExitPlanMode') { const label = chalk.hex(colors.primary).bold('Current plan'); if (!isFinished || result === undefined || result.is_error === true) { @@ -1211,6 +1284,139 @@ export class ToolCallComponent extends Container { return tone(` · ${text}`); } + // ── Swarm dashboard rendering ──────────────────────────────────── + // + // The swarm card mirrors `AgentGroupComponent`'s gutter/indent/color + // vocabulary. No animated, per-render content is used so the rendered lines + // stay identical across consecutive renders — the property that lets + // pi-tui's differential renderer keep one stable card. + + /** + * Single-line header for the Swarm card (carried by `headerText`). Mirrors + * `AgentGroupComponent.buildHeader`: a status bullet (roleAssistant while + * active, success when terminal), the bold `Swarm` label, a dim `· title` + * segment (omitted when empty so no dangling `·`), and a dim phase/summary + * tail. The displayed task is sourced live from the tool-call args rather + * than the stale model so it reflects the fully-streamed task string. + */ + private buildSwarmHeader(): string { + const c = this.colors; + const m = this.swarmModel; + if (m === undefined) return ''; + const rawTask = str(this.toolCall.args['task']).replace(/\s+/g, ' ').trim(); + const title = rawTask.length > 56 ? `${rawTask.slice(0, 56)}…` : rawTask; + const label = chalk.hex(c.primary).bold('Swarm'); + const titlePart = title.length > 0 ? chalk.dim(` · ${title}`) : ''; + const terminal = m.phase === 'done' || m.phase === 'cancelled' || m.phase === 'failed'; + const bullet = + m.phase === 'failed' + ? chalk.hex(c.error)(STATUS_BULLET) + : terminal + ? chalk.hex(c.success)(STATUS_BULLET) + : chalk.hex(c.roleAssistant)(STATUS_BULLET); + let tail: string; + if (terminal) { + const tag = + m.phase === 'cancelled' ? ' · cancelled' : m.phase === 'failed' ? ' · failed' : ''; + // Surface drops alongside ✓/✗ so a recovered-with-gaps run is honest about + // the missing subtasks; omitted when zero to keep the common run compact. + const droppedPart = m.droppedCount > 0 ? ` ${String(m.droppedCount)}⊘` : ''; + tail = chalk.dim( + ` · ${String(m.workers.size)} workers · ${String(m.doneCount)}✓ ${String(m.failedCount)}✗${droppedPart}${tag}`, + ); + } else if (m.phase === 'planning') { + tail = chalk.dim(' · planning…'); + } else if (m.phase === 'synthesizing') { + tail = chalk.dim(' · synthesizing…'); + } else { + tail = chalk.dim(` · ${String(m.doneCount + m.failedCount)}/${String(m.total)} workers`); + } + return `${bullet}${label}${titlePart}${tail}`; + } + + /** + * Renders one or two gutter lines per worker into the body, mirroring + * `AgentGroupComponent.appendLines` (the `├─`/`└─`/`│` vocabulary, the + * 2-space lead, and the dim/primary/error coloring). While still planning + * with no workers yet, a single dim placeholder line keeps the card from + * rendering blank. + */ + private buildSwarmBody(): void { + const m = this.swarmModel; + if (m === undefined) return; + const workers = [...m.workers.values()]; + if (m.phase === 'planning' && workers.length === 0) { + this.addChild(new Text(` ${chalk.dim('└─ planning subtasks…')}`, 0, 0)); + return; + } + workers.forEach((w, idx) => { + const isLast = idx === workers.length - 1; + for (const line of this.buildSwarmWorkerLine(w, isLast)) { + this.addChild(new Text(line, 0, 0)); + } + }); + // A whole-swarm failure (planner/synthesizer error) surfaces its reason as + // an error line so the card is honest about what went wrong instead of + // hiding the message behind a 'cancelled'-looking header. + if (m.phase === 'failed') { + const reason = m.failureMessage ?? 'swarm failed'; + this.addChild(new Text(` ${chalk.hex(this.colors.error)(`✗ ${reason}`)}`, 0, 0)); + } + } + + /** + * Builds the gutter lines for one worker. Line 1 carries the branch, the + * role, and a dim stats tail; line 2 (omitted once the worker is done) + * carries the latest activity or the failure reason. Matches + * `AgentGroupComponent`'s two-line gutter format. + */ + private buildSwarmWorkerLine(w: WorkerRow, isLast: boolean): string[] { + const c = this.colors; + const branch1 = isLast ? '└─' : '├─'; + const branch2 = isLast ? ' ' : '│ '; + const role = chalk.hex(c.primary)(w.role); + + // Live token counts are shown for every worker (running, retrying, done) so + // the dashboard stays consistent with `AgentGroupComponent`, which renders + // live tokens for all subagents from `agent.status.updated`. Running workers + // get their figure from `worker.tokens`; done workers from `worker.done`. + const tok = w.tokens !== undefined && w.tokens > 0 ? ` · ${formatTokens(w.tokens)}` : ''; + let statsPart = ''; + if (w.status === 'done') { + statsPart = chalk.dim(` · ${String(w.toolCount)} call${w.toolCount === 1 ? '' : 's'}${tok}`); + } else if (w.status === 'retrying') { + statsPart = chalk.dim(` · retrying…${tok}`); + } else if (w.status === 'running' && w.toolCount > 0) { + statsPart = chalk.dim(` · ${String(w.toolCount)} call${w.toolCount === 1 ? '' : 's'}${tok}`); + } + const line1 = ` ${branch1} ${role}${statsPart}`; + + if (w.status === 'done') { + return [line1]; + } + // Retrying is a transient in-flight state shown as a single line so the + // role's row stays visible (and stable) while the coordinator re-runs it. + // Dim the role label to match the 'dropped' convention: non-running rows + // (retrying, dropped) use a dimmed label, running/done/failed keep primary. + if (w.status === 'retrying') { + return [` ${branch1} ${chalk.dim(w.role)}${statsPart}`]; + } + if (w.status === 'failed') { + const errLine = chalk.hex(c.error)(`failed: ${w.error ?? 'error'}`); + return [line1, ` ${branch2} ${errLine}`]; + } + // Dropped: the coordinator gave up on this subtask. Dim the row and show the + // reason on the second gutter line so the gap is explicit, not silent. + if (w.status === 'dropped') { + const dropLine = chalk.dim(`dropped: ${w.error ?? 'no reason'}`); + return [` ${branch1} ${chalk.dim(w.role)}`, ` ${branch2} ${dropLine}`]; + } + const raw = w.latestActivity ?? 'starting…'; + const activity = + raw.length > SWARM_ACTIVITY_MAX_LENGTH ? `${raw.slice(0, SWARM_ACTIVITY_MAX_LENGTH)}…` : raw; + return [line1, ` ${branch2} ${chalk.dim(`now: ${activity}`)}`]; + } + private rebuildContent(): void { while (this.children.length > this.callPreviewEndIndex) { this.children.pop(); @@ -1242,6 +1448,7 @@ export class ToolCallComponent extends Container { * styled individually so surrounding prose keeps its default dim tone. */ private buildProgressBlock(): void { + if (this.swarmModel !== undefined) return; if (this.progressLines.length === 0) return; if (this.result !== undefined) return; for (const raw of this.progressLines) { @@ -1262,6 +1469,7 @@ export class ToolCallComponent extends Container { } private buildSubagentBlock(): void { + if (this.swarmModel !== undefined) return; if ( this.subagentAgentId === undefined && this.ongoingSubCalls.size === 0 && @@ -1537,6 +1745,10 @@ export class ToolCallComponent extends Container { private buildCallPreview(): void { const name = this.toolCall.name; + if (this.swarmModel !== undefined) { + this.buildSwarmBody(); + return; + } if (name === 'ExitPlanMode') { this.buildPlanPreview(); return; @@ -1726,6 +1938,9 @@ export class ToolCallComponent extends Container { private buildContent(): void { const { result } = this; + // Swarm renders its dashboard via buildSwarmBody; the result output is the + // synthesized report which is surfaced elsewhere, not in this card. + if (this.swarmModel !== undefined) return; if (result === undefined || !result.output) return; if (this.isSingleSubagentView()) { diff --git a/apps/kimi-code/src/tui/controllers/session-event-handler.ts b/apps/kimi-code/src/tui/controllers/session-event-handler.ts index 3e263666..4fd3b011 100644 --- a/apps/kimi-code/src/tui/controllers/session-event-handler.ts +++ b/apps/kimi-code/src/tui/controllers/session-event-handler.ts @@ -33,6 +33,7 @@ import type { import { MoonLoader } from '../components/chrome/moon-loader'; import { StatusMessageComponent } from '../components/messages/status-message'; +import { workerActivityFromTool } from '../components/messages/swarm-dashboard-model'; import { MAIN_AGENT_ID, OAUTH_LOGIN_REQUIRED_CODE, @@ -72,6 +73,28 @@ import type { } from '../types'; import type { TUIState } from '../tui-state'; +/** + * Live token figure for a swarm worker from its `agent.status.updated` event, + * computed identically to the non-swarm grouped-subagent path so a running + * worker shows the same `· X tok` the grouped card would: prefer the per-agent + * `contextTokens` when positive, otherwise fall back to the usage grand total + * (`total ?? currentTurn`, summing input + output). Returns `undefined` when no + * positive figure is available so the dashboard line stays unchanged. + */ +function liveSwarmWorkerTokens(event: AgentStatusUpdatedEvent): number | undefined { + if (event.contextTokens !== undefined && event.contextTokens > 0) { + return event.contextTokens; + } + const usage = event.usage?.total ?? event.usage?.currentTurn; + if (usage === undefined) return undefined; + const total = + (usage.inputOther ?? 0) + + (usage.inputCacheRead ?? 0) + + (usage.inputCacheCreation ?? 0) + + usage.output; + return total > 0 ? total : undefined; +} + export interface SessionEventHost { state: TUIState; session: Session | undefined; @@ -232,8 +255,33 @@ export class SessionEventHandler { if (info === undefined || info.parentToolCallId.length === 0) return true; const { parentToolCallId } = info; const sourceName = info.name; + const toolCall = streamingUI.getToolComponent(parentToolCallId); if (toolCall === undefined) return true; + + // Swarm worker events drive the swarm dashboard, not the subagent block, and + // never fall through to the regular Agent appendSubToolCall path. + if (toolCall.isSwarm()) { + if (event.type === 'tool.call.started') { + toolCall.applySwarm({ + t: 'worker.toolcall', + id: subagentId, + activity: workerActivityFromTool(event.name, argsRecord(event.args)), + }); + } else if (event.type === 'agent.status.updated') { + // Mirror the non-swarm `agent.status.updated` path's live-token + // computation (prefer the per-agent context tokens, fall back to the + // usage total) so a running worker shows the same live `· X tok` the + // grouped subagent card would. Matches the value `worker.done` later + // records via `SubagentCompletedEvent.contextTokens`. + const tokens = liveSwarmWorkerTokens(event); + if (tokens !== undefined) { + toolCall.applySwarm({ t: 'worker.tokens', id: subagentId, tokens }); + } + } + return true; + } + toolCall.setSubagentMeta(subagentId, sourceName); switch (event.type) { @@ -483,6 +531,47 @@ export class SessionEventHandler { } private handleToolProgress(event: ToolProgressEvent): void { + if (event.update.kind === 'custom' && event.update.customKind === 'swarm') { + const tc = this.host.streamingUI.getToolComponent(event.toolCallId); + if (tc === undefined || !tc.isSwarm()) return; + const p = event.update.customData as { + phase?: string; + total?: number; + role?: string; + newRole?: string; + decision?: string; + reason?: string; + message?: string; + }; + if (p.phase === 'planned' && typeof p.total === 'number') { + tc.applySwarm({ t: 'planned', total: p.total }); + } else if (p.phase === 'synthesizing') { + tc.applySwarm({ t: 'synthesizing' }); + } else if (p.phase === 'done') { + tc.applySwarm({ t: 'done', succeeded: 0, failed: 0 }); + } else if (p.phase === 'failed') { + // An ordinary swarm failure (planner/synthesizer error) — show it as a + // failed dashboard with the reason, not a success-toned 'cancelled'. + tc.applySwarm({ t: 'failed', message: typeof p.message === 'string' ? p.message : '' }); + } else if (p.phase === 'revising' && typeof p.role === 'string') { + // Route by the reviser's decision so each recovery path shows the right + // transient state: + // - retry/regenerate re-run the same role → mark it retrying. + // - reassign moves the subtask to a new role → re-key the existing + // row so the subtask keeps ONE row (no orphan left in retrying). + // - drop emits nothing here; the subsequent 'dropped' event fully + // describes it (and skipping this avoids a drop→retrying flash). + if (p.decision === 'reassign' && typeof p.newRole === 'string') { + tc.applySwarm({ t: 'worker.reassigned', fromRole: p.role, toRole: p.newRole }); + } else if (p.decision === 'retry' || p.decision === 'regenerate') { + tc.applySwarm({ t: 'worker.retrying', role: p.role }); + } + } else if (p.phase === 'dropped' && typeof p.role === 'string') { + // The subtask was given up on — show it as a dropped gap with the reason. + tc.applySwarm({ t: 'worker.dropped', role: p.role, reason: p.reason ?? '' }); + } + return; + } if (event.update.kind !== 'status') return; const text = event.update.text; if (text === undefined || text.length === 0) return; @@ -695,6 +784,25 @@ export class SessionEventHandler { name: event.subagentName, }); + const swarmTc = streamingUI.getToolComponent(event.parentToolCallId); + if (swarmTc?.isSwarm() === true) { + // Only real workers (profile `swarm:`) become dashboard rows. The + // planner (`swarm-planner`, plus a possible retry) and synthesizer + // (`swarm-synthesizer`) share the same parent tool-call id but must not + // appear as workers or inflate the worker counts. Any non-worker subagent + // under a swarm coordinator returns without falling through to the + // foreground path. + const workerPrefix = 'swarm:'; + if (event.subagentName.startsWith(workerPrefix)) { + swarmTc.applySwarm({ + t: 'worker.spawned', + id: event.subagentId, + role: event.description ?? event.subagentName.slice(workerPrefix.length), + }); + } + return; + } + if (event.runInBackground) { const meta = this.buildBackgroundAgentMetadata(event); this.backgroundAgentMetadata.set(event.subagentId, meta); @@ -740,6 +848,14 @@ export class SessionEventHandler { } const tc = streamingUI.getToolComponent(event.parentToolCallId); if (tc === undefined) return; + if (tc.isSwarm()) { + tc.applySwarm({ + t: 'worker.done', + id: event.subagentId, + ...(event.contextTokens !== undefined ? { tokens: event.contextTokens } : {}), + }); + return; + } tc.onSubagentCompleted({ contextTokens: event.contextTokens, usage: event.usage, @@ -777,6 +893,10 @@ export class SessionEventHandler { } const tc = streamingUI.getToolComponent(event.parentToolCallId); if (tc === undefined) return; + if (tc.isSwarm()) { + tc.applySwarm({ t: 'worker.failed', id: event.subagentId, error: event.error }); + return; + } tc.onSubagentFailed({ error: event.error }); streamingUI.removeToolComponentIfInactive(event.parentToolCallId); } diff --git a/apps/kimi-code/src/tui/controllers/streaming-ui.ts b/apps/kimi-code/src/tui/controllers/streaming-ui.ts index cb0a3321..c8a2d1b6 100644 --- a/apps/kimi-code/src/tui/controllers/streaming-ui.ts +++ b/apps/kimi-code/src/tui/controllers/streaming-ui.ts @@ -594,6 +594,14 @@ export class StreamingUIController { onToolCallStart(toolCall: ToolCallBlockData): void { if (toolCall.name === 'AskUserQuestion') return; + // A tool call of any other kind breaks an in-flight Agent/Read run, so the + // pending groups are reset here to avoid a non-Agent/Read call (e.g. Swarm) + // between Agent/Read calls leaving a stale pending group. Swarm itself flows + // through the normal ToolCallComponent path below — it renders its dashboard + // via the managed tool-call lifecycle (one stable component per tool id). + if (toolCall.name !== 'Agent') this._pendingAgentGroup = null; + if (toolCall.name !== 'Read') this._pendingReadGroup = null; + const { state } = this.host; const tc = new ToolCallComponent( toolCall, @@ -607,9 +615,6 @@ export class StreamingUIController { if (state.planExpanded) tc.setPlanExpanded(true); this._pendingToolComponents.set(toolCall.id, tc); - if (toolCall.name !== 'Agent') this._pendingAgentGroup = null; - if (toolCall.name !== 'Read') this._pendingReadGroup = null; - let handled = this.tryAttachAgentToolCall(toolCall, tc); if (!handled) handled = this.tryAttachReadToolCall(toolCall, tc); if (!handled) { @@ -633,6 +638,7 @@ export class StreamingUIController { onToolCallEnd(toolCallId: string, result: ToolResultBlockData): void { const { state } = this.host; const matchedCall = this._activeToolCalls.get(toolCallId); + const tc = this._pendingToolComponents.get(toolCallId); if (tc) { tc.setResult(result); diff --git a/apps/kimi-code/test/tui/commands/swarm.test.ts b/apps/kimi-code/test/tui/commands/swarm.test.ts new file mode 100644 index 00000000..52f8a914 --- /dev/null +++ b/apps/kimi-code/test/tui/commands/swarm.test.ts @@ -0,0 +1,73 @@ +import { buildSwarmPrompt, handleSwarmCommand } from '#/tui/commands/swarm'; +import { describe, expect, it, vi } from 'vitest'; + +describe('buildSwarmPrompt', () => { + it('frames the task to force the Swarm tool', () => { + const p = buildSwarmPrompt('compare three libraries'); + expect(p).toContain('Swarm'); + expect(p).toContain('compare three libraries'); + }); +}); + +describe('handleSwarmCommand', () => { + it('errors when there is no active session', async () => { + const showError = vi.fn(); + await handleSwarmCommand({ session: undefined, showError } as never, 'do it'); + expect(showError).toHaveBeenCalled(); + }); + + it('errors when args are empty', async () => { + const showError = vi.fn(); + const prompt = vi.fn(); + await handleSwarmCommand({ session: { prompt }, showError } as never, ' '); + expect(showError).toHaveBeenCalled(); + expect(prompt).not.toHaveBeenCalled(); + }); + + it('sends a framed prompt to the session', async () => { + const prompt = vi.fn<(text: string) => Promise>(async () => undefined); + const showError = vi.fn(); + const beginSessionRequest = vi.fn(); + const failSessionRequest = vi.fn(); + await handleSwarmCommand( + { session: { prompt }, showError, beginSessionRequest, failSessionRequest } as never, + 'compare libs', + ); + expect(prompt).toHaveBeenCalledTimes(1); + expect(String(prompt.mock.calls[0]?.[0])).toContain('compare libs'); + }); + + it('begins the session request before prompting so a follow-up cannot race the swarm turn', async () => { + const prompt = vi.fn<(text: string) => Promise>(async () => undefined); + const showError = vi.fn(); + const beginSessionRequest = vi.fn(); + const failSessionRequest = vi.fn(); + await handleSwarmCommand( + { session: { prompt }, showError, beginSessionRequest, failSessionRequest } as never, + 'compare libs', + ); + expect(beginSessionRequest).toHaveBeenCalledTimes(1); + // The streamingPhase must flip out of 'idle' BEFORE the prompt is dispatched, + // otherwise the input gate stays open during turn startup. + expect(beginSessionRequest.mock.invocationCallOrder[0]).toBeLessThan( + prompt.mock.invocationCallOrder[0] ?? Infinity, + ); + expect(failSessionRequest).not.toHaveBeenCalled(); + }); + + it('fails the session request when the prompt rejects', async () => { + const prompt = vi.fn<(text: string) => Promise>(async () => { + throw new Error('boom'); + }); + const showError = vi.fn(); + const beginSessionRequest = vi.fn(); + const failSessionRequest = vi.fn(); + await handleSwarmCommand( + { session: { prompt }, showError, beginSessionRequest, failSessionRequest } as never, + 'compare libs', + ); + expect(beginSessionRequest).toHaveBeenCalledTimes(1); + expect(failSessionRequest).toHaveBeenCalledTimes(1); + expect(String(failSessionRequest.mock.calls[0]?.[0])).toContain('boom'); + }); +}); diff --git a/apps/kimi-code/test/tui/components/messages/swarm-dashboard-model.test.ts b/apps/kimi-code/test/tui/components/messages/swarm-dashboard-model.test.ts new file mode 100644 index 00000000..5a4e993b --- /dev/null +++ b/apps/kimi-code/test/tui/components/messages/swarm-dashboard-model.test.ts @@ -0,0 +1,296 @@ +import { describe, expect, it } from 'vitest'; + +import { + applySwarmEvent, + initialSwarmModel, + workerActivityFromTool, + type SwarmModel, +} from '#/tui/components/messages/swarm-dashboard-model'; + +function reduce(events: Parameters[1][]): SwarmModel { + return events.reduce((m, e) => applySwarmEvent(m, e), initialSwarmModel('do a task')); +} + +describe('swarm failed phase', () => { + it('sets the failed phase and stores the failure message', () => { + const m = reduce([ + { t: 'planned', total: 1 }, + { t: 'failed', message: 'planner exploded' }, + ]); + expect(m.phase).toBe('failed'); + expect(m.failureMessage).toBe('planner exploded'); + }); +}); + +describe('applySwarmEvent', () => { + it('starts in planning phase with the task', () => { + const m = initialSwarmModel('my task'); + expect(m.task).toBe('my task'); + expect(m.phase).toBe('planning'); + expect(m.workers.size).toBe(0); + }); + + it('planned sets total and moves to working', () => { + const m = reduce([{ t: 'planned', total: 5 }]); + expect(m.phase).toBe('working'); + expect(m.total).toBe(5); + }); + + it('builds a worker row on spawn and tracks activity + count', () => { + const m = reduce([ + { t: 'planned', total: 2 }, + { t: 'worker.spawned', id: 'a1', role: 'Researcher' }, + { t: 'worker.toolcall', id: 'a1', activity: 'read foo.ts' }, + { t: 'worker.toolcall', id: 'a1', activity: 'grep "x"' }, + ]); + const w = m.workers.get('a1'); + expect(w?.role).toBe('Researcher'); + expect(w?.status).toBe('running'); + expect(w?.toolCount).toBe(2); + expect(w?.latestActivity).toBe('grep "x"'); + }); + + it('marks workers done/failed and counts them', () => { + const m = reduce([ + { t: 'planned', total: 2 }, + { t: 'worker.spawned', id: 'a1', role: 'R' }, + { t: 'worker.spawned', id: 'a2', role: 'A' }, + { t: 'worker.done', id: 'a1', tokens: 2100 }, + { t: 'worker.failed', id: 'a2', error: 'timeout' }, + ]); + expect(m.workers.get('a1')?.status).toBe('done'); + expect(m.workers.get('a1')?.tokens).toBe(2100); + expect(m.workers.get('a2')?.status).toBe('failed'); + expect(m.workers.get('a2')?.error).toBe('timeout'); + expect(m.doneCount).toBe(1); + expect(m.failedCount).toBe(1); + }); + + it('synthesizing then done set the phase', () => { + const m = reduce([{ t: 'planned', total: 1 }, { t: 'synthesizing' }, { t: 'done', succeeded: 1, failed: 0 }]); + expect(m.phase).toBe('done'); + }); + + it('cancelled sets the phase', () => { + const m = reduce([{ t: 'planned', total: 1 }, { t: 'cancelled' }]); + expect(m.phase).toBe('cancelled'); + }); + + it('clamps a worker that finishes without an explicit running transition', () => { + const m = reduce([{ t: 'worker.spawned', id: 'a1', role: 'R' }, { t: 'worker.done', id: 'a1' }]); + expect(m.workers.get('a1')?.status).toBe('done'); + }); + + it('worker.tokens updates a running worker tokens without touching count/status/activity', () => { + const m = reduce([ + { t: 'planned', total: 1 }, + { t: 'worker.spawned', id: 'a1', role: 'Researcher' }, + { t: 'worker.toolcall', id: 'a1', activity: 'read foo.ts' }, + { t: 'worker.tokens', id: 'a1', tokens: 3200 }, + ]); + const w = m.workers.get('a1'); + expect(w?.tokens).toBe(3200); + expect(w?.status).toBe('running'); + expect(w?.toolCount).toBe(1); + expect(w?.latestActivity).toBe('read foo.ts'); + }); + + it('worker.tokens is a no-op for an unknown worker id', () => { + const before = reduce([ + { t: 'planned', total: 1 }, + { t: 'worker.spawned', id: 'a1', role: 'R' }, + ]); + const after = applySwarmEvent(before, { t: 'worker.tokens', id: 'ghost', tokens: 999 }); + expect(after).toBe(before); + expect(after.workers.get('ghost')).toBeUndefined(); + }); + + it('worker.retrying sets the matching role row to retrying and keeps it visible', () => { + const m = reduce([ + { t: 'planned', total: 1 }, + { t: 'worker.spawned', id: 'a1', role: 'Worker' }, + { t: 'worker.failed', id: 'a1', error: 'boom' }, + { t: 'worker.retrying', role: 'Worker' }, + ]); + expect(m.workers.size).toBe(1); + expect(m.workers.get('a1')?.status).toBe('retrying'); + }); + + it('a worker.spawned for a role already retrying REUSES the row (no duplicate, id updated, running)', () => { + const m = reduce([ + { t: 'planned', total: 1 }, + { t: 'worker.spawned', id: 'a1', role: 'Worker' }, + { t: 'worker.failed', id: 'a1', error: 'boom' }, + { t: 'worker.retrying', role: 'Worker' }, + { t: 'worker.spawned', id: 'a2', role: 'Worker' }, + ]); + // Exactly one row for the role, now keyed by the NEW subagent id, reset to running. + expect(m.workers.size).toBe(1); + expect(m.workers.get('a1')).toBeUndefined(); + const w = m.workers.get('a2'); + expect(w?.role).toBe('Worker'); + expect(w?.status).toBe('running'); + expect(w?.error).toBeUndefined(); + }); + + it('a worker.spawned for a role in a terminal failed state REUSES the row on retry', () => { + // Even without an explicit worker.retrying, a re-spawn of the same role + // collapses onto the existing terminal row (one row per role across attempts). + const m = reduce([ + { t: 'worker.spawned', id: 'a1', role: 'Worker' }, + { t: 'worker.failed', id: 'a1', error: 'boom' }, + { t: 'worker.spawned', id: 'a2', role: 'Worker' }, + ]); + expect(m.workers.size).toBe(1); + expect(m.workers.get('a2')?.status).toBe('running'); + }); + + it('worker.dropped sets an existing role row to dropped with the reason', () => { + const m = reduce([ + { t: 'planned', total: 1 }, + { t: 'worker.spawned', id: 'a1', role: 'Worker' }, + { t: 'worker.failed', id: 'a1', error: 'boom' }, + { t: 'worker.dropped', role: 'Worker', reason: 'impossible' }, + ]); + expect(m.workers.size).toBe(1); + const w = m.workers.get('a1'); + expect(w?.status).toBe('dropped'); + expect(w?.error).toBe('impossible'); + }); + + it('worker.dropped creates a dropped row when the subtask never spawned a worker', () => { + const m = reduce([ + { t: 'planned', total: 1 }, + { t: 'worker.dropped', role: 'Planner', reason: 'no decision' }, + ]); + expect(m.workers.size).toBe(1); + const w = [...m.workers.values()][0]; + expect(w?.role).toBe('Planner'); + expect(w?.status).toBe('dropped'); + expect(w?.error).toBe('no decision'); + }); + + it('distinct roles still get distinct rows; same-role reuse does not collapse them', () => { + const m = reduce([ + { t: 'planned', total: 2 }, + { t: 'worker.spawned', id: 'a1', role: 'Researcher' }, + { t: 'worker.spawned', id: 'a2', role: 'Analyst' }, + { t: 'worker.failed', id: 'a1', error: 'boom' }, + { t: 'worker.retrying', role: 'Researcher' }, + { t: 'worker.spawned', id: 'a3', role: 'Researcher' }, + ]); + expect(m.workers.size).toBe(2); + expect(m.workers.get('a3')?.role).toBe('Researcher'); + expect(m.workers.get('a2')?.role).toBe('Analyst'); + }); + + it('a reassign (new role) re-spawn adds a new row (does not reuse a different role)', () => { + const m = reduce([ + { t: 'planned', total: 1 }, + { t: 'worker.spawned', id: 'a1', role: 'Worker' }, + { t: 'worker.failed', id: 'a1', error: 'boom' }, + { t: 'worker.retrying', role: 'Worker' }, + { t: 'worker.spawned', id: 'a2', role: 'R2' }, + ]); + expect(m.workers.size).toBe(2); + expect(m.workers.get('a1')?.role).toBe('Worker'); + expect(m.workers.get('a2')?.role).toBe('R2'); + }); + + it('reassign collapses to ONE row: failed(OLD) -> reassigned(OLD->NEW) -> spawned(NEW) -> done', () => { + // The reassign-orphan regression: before the fix, a reassign marked the OLD + // role row retrying then the re-spawn created a NEW role row, stranding the + // old one in 'retrying' forever. The reassigned event re-keys the SAME row. + const m = reduce([ + { t: 'planned', total: 1 }, + { t: 'worker.spawned', id: 'a1', role: 'OldRole' }, + { t: 'worker.failed', id: 'a1', error: 'boom' }, + { t: 'worker.reassigned', fromRole: 'OldRole', toRole: 'NewRole' }, + { t: 'worker.spawned', id: 'a2', role: 'NewRole' }, + { t: 'worker.done', id: 'a2', tokens: 1500 }, + ]); + // Exactly one row, final role NewRole, status done. + expect(m.workers.size).toBe(1); + const w = [...m.workers.values()][0]; + expect(w?.role).toBe('NewRole'); + expect(w?.status).toBe('done'); + expect(w?.tokens).toBe(1500); + // No row left dangling in 'retrying', and no stray OldRole row. + expect([...m.workers.values()].some((r) => r.status === 'retrying')).toBe(false); + expect([...m.workers.values()].some((r) => r.role === 'OldRole')).toBe(false); + expect(m.doneCount).toBe(1); + expect(m.failedCount).toBe(0); + }); + + it('worker.reassigned re-keys the failed row to the new role and marks it retrying', () => { + const m = reduce([ + { t: 'planned', total: 1 }, + { t: 'worker.spawned', id: 'a1', role: 'OldRole' }, + { t: 'worker.failed', id: 'a1', error: 'boom' }, + { t: 'worker.reassigned', fromRole: 'OldRole', toRole: 'NewRole' }, + ]); + expect(m.workers.size).toBe(1); + const w = m.workers.get('a1'); + expect(w?.role).toBe('NewRole'); + expect(w?.status).toBe('retrying'); + expect(w?.error).toBeUndefined(); + // The transient failed count is reversed when the row leaves the failed state. + expect(m.failedCount).toBe(0); + }); + + it('worker.reassigned is a no-op when no fromRole row exists', () => { + const before = reduce([ + { t: 'planned', total: 1 }, + { t: 'worker.spawned', id: 'a1', role: 'Other' }, + ]); + const after = applySwarmEvent(before, { + t: 'worker.reassigned', + fromRole: 'Missing', + toRole: 'NewRole', + }); + expect(after).toBe(before); + }); + + it('full failed->retrying->respawn(running)->done on ONE role keeps counts consistent', () => { + // Locks count bookkeeping: the transient failed must be reversed, so the + // surviving row is done and the failed/dropped counts return to zero. + const m = reduce([ + { t: 'planned', total: 1 }, + { t: 'worker.spawned', id: 'a1', role: 'Worker' }, + { t: 'worker.failed', id: 'a1', error: 'boom' }, + { t: 'worker.retrying', role: 'Worker' }, + { t: 'worker.spawned', id: 'a2', role: 'Worker' }, + { t: 'worker.done', id: 'a2', tokens: 900 }, + ]); + expect(m.workers.size).toBe(1); + const w = [...m.workers.values()][0]; + expect(w?.status).toBe('done'); + expect(m.doneCount).toBe(1); + expect(m.failedCount).toBe(0); + expect(m.droppedCount).toBe(0); + }); + + it('single-run (no retry) leaves running rows untouched by reuse logic', () => { + const m = reduce([ + { t: 'planned', total: 2 }, + { t: 'worker.spawned', id: 'a1', role: 'Researcher' }, + { t: 'worker.spawned', id: 'a2', role: 'Researcher' }, + ]); + // Two concurrent running workers of the same role keep distinct rows; reuse + // only applies to terminal/retrying rows, so single-run fan-out is unchanged. + expect(m.workers.size).toBe(2); + }); +}); + +describe('workerActivityFromTool', () => { + it('formats common tools compactly', () => { + expect(workerActivityFromTool('Read', { path: 'a/b.ts' })).toBe('read a/b.ts'); + expect(workerActivityFromTool('Grep', { pattern: 'foo' })).toBe('grep "foo"'); + expect(workerActivityFromTool('Glob', { pattern: '*.ts' })).toBe('glob *.ts'); + expect(workerActivityFromTool('WebSearch', { query: 'kimi' })).toBe('search "kimi"'); + expect(workerActivityFromTool('FetchURL', { url: 'http://x' })).toBe('fetch http://x'); + }); + it('falls back to the tool name', () => { + expect(workerActivityFromTool('Mystery', {})).toBe('Mystery'); + }); +}); diff --git a/apps/kimi-code/test/tui/components/messages/swarm-routing.test.ts b/apps/kimi-code/test/tui/components/messages/swarm-routing.test.ts new file mode 100644 index 00000000..10d6fb43 --- /dev/null +++ b/apps/kimi-code/test/tui/components/messages/swarm-routing.test.ts @@ -0,0 +1,447 @@ +import { describe, expect, it } from 'vitest'; + +import type { Event } from '@moonshot-ai/kimi-code-sdk'; + +import { SessionEventHandler, type SessionEventHost } from '#/tui/controllers/session-event-handler'; +import { ToolCallComponent } from '#/tui/components/messages/tool-call'; +import { workerActivityFromTool } from '#/tui/components/messages/swarm-dashboard-model'; +import { darkColors } from '#/tui/theme/colors'; + +const strip = (t: string): string => t.replaceAll(/\[[0-9;]*m/g, ''); + +function makeSwarm(): ToolCallComponent { + return new ToolCallComponent( + { id: 'tc-swarm', name: 'Swarm', args: { task: 'task' } }, + undefined, + darkColors, + ); +} + +describe('swarm dashboard wiring (translation)', () => { + it('produces the expected dashboard from a worker lifecycle sequence', () => { + const dash = makeSwarm(); + dash.applySwarm({ t: 'planned', total: 2 }); + dash.applySwarm({ t: 'worker.spawned', id: 's1', role: 'Researcher' }); + dash.applySwarm({ t: 'worker.toolcall', id: 's1', activity: workerActivityFromTool('Read', { path: 'a.ts' }) }); + dash.applySwarm({ t: 'worker.done', id: 's1', tokens: 2100 }); + dash.applySwarm({ t: 'worker.spawned', id: 's2', role: 'Analyst' }); + dash.applySwarm({ t: 'worker.failed', id: 's2', error: 'timeout' }); + dash.applySwarm({ t: 'done', succeeded: 1, failed: 1 }); + const out = strip(dash.render(80).join('\n')); + expect(out).toContain('Researcher'); + expect(out).toContain('Analyst'); + expect(out).toContain('timeout'); + expect(out).toMatch(/2 workers/); + }); + + it('routes live swarm events through SessionEventHandler into the dashboard', () => { + const parentToolCallId = 'tc-swarm'; + const dash = makeSwarm(); + const mockHost = { + streamingUI: { + setTurnId: (): void => {}, + getToolComponent: (id: string): ToolCallComponent | undefined => + id === parentToolCallId ? dash : undefined, + }, + } as unknown as SessionEventHost; + const handler = new SessionEventHandler(mockHost); + const noop = (): void => {}; + + handler.handleEvent( + { + type: 'tool.progress', + agentId: 'main', + sessionId: 's', + turnId: 1, + toolCallId: parentToolCallId, + update: { kind: 'custom', customKind: 'swarm', customData: { phase: 'planned', total: 1 } }, + } as unknown as Event, + noop, + ); + handler.handleEvent( + { + type: 'subagent.spawned', + agentId: 'main', + sessionId: 's', + subagentId: 'w1', + subagentName: 'swarm:Researcher', + parentToolCallId, + description: 'Researcher', + runInBackground: false, + } as unknown as Event, + noop, + ); + handler.handleEvent( + { + type: 'tool.call.started', + agentId: 'w1', + sessionId: 's', + turnId: 1, + toolCallId: 'inner-1', + name: 'Read', + args: { path: 'x.ts' }, + } as unknown as Event, + noop, + ); + handler.handleEvent( + { + type: 'subagent.failed', + agentId: 'main', + sessionId: 's', + subagentId: 'w1', + parentToolCallId, + error: 'boom', + } as unknown as Event, + noop, + ); + + const out = strip(dash.render(80).join('\n')); + expect(out).toContain('Researcher'); + expect(out).toContain('boom'); + // Active header tail reports worker progress (1 of 1 terminal). + expect(out).toMatch(/1\/1 workers/); + }); + + it('routes custom revising/dropped progress into retrying/dropped dashboard states', () => { + const parentToolCallId = 'tc-swarm'; + const dash = makeSwarm(); + const mockHost = { + streamingUI: { + setTurnId: (): void => {}, + getToolComponent: (id: string): ToolCallComponent | undefined => + id === parentToolCallId ? dash : undefined, + }, + } as unknown as SessionEventHost; + const handler = new SessionEventHandler(mockHost); + const noop = (): void => {}; + + const progress = (customData: Record): void => { + handler.handleEvent( + { + type: 'tool.progress', + agentId: 'main', + sessionId: 's', + turnId: 1, + toolCallId: parentToolCallId, + update: { kind: 'custom', customKind: 'swarm', customData }, + } as unknown as Event, + noop, + ); + }; + const spawn = (subagentId: string): void => { + handler.handleEvent( + { + type: 'subagent.spawned', + agentId: 'main', + sessionId: 's', + subagentId, + subagentName: 'swarm:Worker', + parentToolCallId, + description: 'Worker', + runInBackground: false, + } as unknown as Event, + noop, + ); + }; + + progress({ phase: 'planned', total: 1 }); + spawn('w1'); + handler.handleEvent( + { + type: 'subagent.failed', + agentId: 'main', + sessionId: 's', + subagentId: 'w1', + parentToolCallId, + error: 'boom', + } as unknown as Event, + noop, + ); + // Coordinator decides to retry the Worker subtask. + progress({ phase: 'revising', subtaskId: 'task-1', role: 'Worker', decision: 'retry', attempt: 1 }); + const retrying = strip(dash.render(80).join('\n')); + expect(retrying).toContain('Worker'); + expect(retrying).toContain('retrying'); + + // Re-spawn collapses onto the same row, then the subtask is ultimately dropped. + spawn('w2'); + progress({ phase: 'dropped', subtaskId: 'task-1', role: 'Worker', reason: 'impossible' }); + const out = strip(dash.render(80).join('\n')); + expect(out.match(/Worker/g)?.length).toBe(1); + expect(out).toContain('dropped: impossible'); + }); + + it('routes a reassign decision so the subtask keeps ONE row (no orphan)', () => { + const parentToolCallId = 'tc-swarm'; + const dash = makeSwarm(); + const mockHost = { + streamingUI: { + setTurnId: (): void => {}, + getToolComponent: (id: string): ToolCallComponent | undefined => + id === parentToolCallId ? dash : undefined, + }, + } as unknown as SessionEventHost; + const handler = new SessionEventHandler(mockHost); + const noop = (): void => {}; + + const progress = (customData: Record): void => { + handler.handleEvent( + { + type: 'tool.progress', + agentId: 'main', + sessionId: 's', + turnId: 1, + toolCallId: parentToolCallId, + update: { kind: 'custom', customKind: 'swarm', customData }, + } as unknown as Event, + noop, + ); + }; + const spawn = (subagentId: string, role: string): void => { + handler.handleEvent( + { + type: 'subagent.spawned', + agentId: 'main', + sessionId: 's', + subagentId, + subagentName: `swarm:${role}`, + parentToolCallId, + description: role, + runInBackground: false, + } as unknown as Event, + noop, + ); + }; + const fail = (subagentId: string): void => { + handler.handleEvent( + { + type: 'subagent.failed', + agentId: 'main', + sessionId: 's', + subagentId, + parentToolCallId, + error: 'boom', + } as unknown as Event, + noop, + ); + }; + const complete = (subagentId: string): void => { + handler.handleEvent( + { + type: 'subagent.completed', + agentId: 'main', + sessionId: 's', + subagentId, + parentToolCallId, + resultSummary: 'ok', + } as unknown as Event, + noop, + ); + }; + + progress({ phase: 'planned', total: 1 }); + spawn('w1', 'OldRole'); + fail('w1'); + // Reviser reassigns OldRole -> NewRole; the re-spawn uses the NEW role. + progress({ + phase: 'revising', + subtaskId: 'task-1', + role: 'OldRole', + newRole: 'NewRole', + decision: 'reassign', + attempt: 1, + }); + spawn('w2', 'NewRole'); + complete('w2'); + + const out = strip(dash.render(80).join('\n')); + // Exactly one row, now labeled with the new role; the old role is gone. + expect(out).toContain('NewRole'); + expect(out).not.toContain('OldRole'); + // No stray retrying row left behind. + expect(out).not.toContain('retrying'); + }); + + it('a drop decision then dropped produces a single dropped row with no transient retrying', () => { + const parentToolCallId = 'tc-swarm'; + const dash = makeSwarm(); + const mockHost = { + streamingUI: { + setTurnId: (): void => {}, + getToolComponent: (id: string): ToolCallComponent | undefined => + id === parentToolCallId ? dash : undefined, + }, + } as unknown as SessionEventHost; + const handler = new SessionEventHandler(mockHost); + const noop = (): void => {}; + + const progress = (customData: Record): void => { + handler.handleEvent( + { + type: 'tool.progress', + agentId: 'main', + sessionId: 's', + turnId: 1, + toolCallId: parentToolCallId, + update: { kind: 'custom', customKind: 'swarm', customData }, + } as unknown as Event, + noop, + ); + }; + + progress({ phase: 'planned', total: 1 }); + handler.handleEvent( + { + type: 'subagent.spawned', + agentId: 'main', + sessionId: 's', + subagentId: 'w1', + subagentName: 'swarm:Worker', + parentToolCallId, + description: 'Worker', + runInBackground: false, + } as unknown as Event, + noop, + ); + handler.handleEvent( + { + type: 'subagent.failed', + agentId: 'main', + sessionId: 's', + subagentId: 'w1', + parentToolCallId, + error: 'boom', + } as unknown as Event, + noop, + ); + // The reviser decides to DROP. The 'revising' event with decision 'drop' + // must emit NOTHING (no transient retrying flash); the subsequent 'dropped' + // event fully describes the gap. + progress({ phase: 'revising', subtaskId: 'task-1', role: 'Worker', decision: 'drop', attempt: 1 }); + const afterRevise = strip(dash.render(80).join('\n')); + expect(afterRevise).not.toContain('retrying'); + + progress({ phase: 'dropped', subtaskId: 'task-1', role: 'Worker', reason: 'impossible' }); + const out = strip(dash.render(80).join('\n')); + expect(out.match(/Worker/g)?.length).toBe(1); + expect(out).toContain('dropped: impossible'); + expect(out).not.toContain('retrying'); + }); + + it('counts only real workers — planner/synthesizer/retry never become rows', () => { + const parentToolCallId = 'tc-swarm'; + const dash = makeSwarm(); + const mockHost = { + streamingUI: { + setTurnId: (): void => {}, + getToolComponent: (id: string): ToolCallComponent | undefined => + id === parentToolCallId ? dash : undefined, + }, + } as unknown as SessionEventHost; + const handler = new SessionEventHandler(mockHost); + const noop = (): void => {}; + + const spawn = (subagentId: string, subagentName: string, description: string): void => { + handler.handleEvent( + { + type: 'subagent.spawned', + agentId: 'main', + sessionId: 's', + subagentId, + subagentName, + parentToolCallId, + description, + runInBackground: false, + } as unknown as Event, + noop, + ); + }; + const complete = (subagentId: string): void => { + handler.handleEvent( + { + type: 'subagent.completed', + agentId: 'main', + sessionId: 's', + subagentId, + parentToolCallId, + resultSummary: 'ok', + } as unknown as Event, + noop, + ); + }; + + // Coordinator order: planner, two workers, synthesizer — all under the + // same parent tool-call id. Only the two `swarm:` workers are rows. + spawn('p1', 'swarm-planner', 'Swarm planner'); + spawn('w1', 'swarm:Researcher', 'Researcher'); + spawn('w2', 'swarm:Analyst', 'Analyst'); + spawn('synth', 'swarm-synthesizer', 'Swarm synthesizer'); + + complete('p1'); + complete('w1'); + complete('w2'); + complete('synth'); + + // The Swarm tool's custom `done` progress finalizes the dashboard. + handler.handleEvent( + { + type: 'tool.progress', + agentId: 'main', + sessionId: 's', + turnId: 1, + toolCallId: parentToolCallId, + update: { kind: 'custom', customKind: 'swarm', customData: { phase: 'done', succeeded: 2, failed: 0 } }, + } as unknown as Event, + noop, + ); + + const out = strip(dash.render(80).join('\n')); + expect(out).toContain('Researcher'); + expect(out).toContain('Analyst'); + expect(out).not.toContain('planner'); + expect(out).not.toContain('synthesizer'); + expect(out).toContain('2 workers · 2✓ 0✗'); + }); + + it('routes a failed progress event into a failed dashboard state and keeps it over the error result', () => { + const parentToolCallId = 'tc-swarm'; + const dash = makeSwarm(); + const mockHost = { + streamingUI: { + setTurnId: (): void => {}, + getToolComponent: (id: string): ToolCallComponent | undefined => + id === parentToolCallId ? dash : undefined, + }, + } as unknown as SessionEventHost; + const handler = new SessionEventHandler(mockHost); + const noop = (): void => {}; + const progress = (customData: Record): void => { + handler.handleEvent( + { + type: 'tool.progress', + agentId: 'main', + sessionId: 's', + turnId: 1, + toolCallId: parentToolCallId, + update: { kind: 'custom', customKind: 'swarm', customData }, + } as unknown as Event, + noop, + ); + }; + + progress({ phase: 'planned', total: 1 }); + progress({ phase: 'failed', message: 'planner failed to produce a valid plan' }); + // The error tool result then lands; it must NOT override 'failed' with + // a success-toned 'cancelled'. + dash.setResult({ + tool_call_id: parentToolCallId, + output: 'Swarm failed: planner failed to produce a valid plan', + is_error: true, + }); + + const out = strip(dash.render(80).join('\n')); + expect(out).toContain('· failed'); + expect(out).not.toContain('cancelled'); + expect(out).toContain('planner failed to produce a valid plan'); + }); +}); diff --git a/apps/kimi-code/test/tui/components/messages/tool-call-swarm.test.ts b/apps/kimi-code/test/tui/components/messages/tool-call-swarm.test.ts new file mode 100644 index 00000000..feee992f --- /dev/null +++ b/apps/kimi-code/test/tui/components/messages/tool-call-swarm.test.ts @@ -0,0 +1,231 @@ +import { describe, expect, it } from 'vitest'; + +import { ToolCallComponent } from '#/tui/components/messages/tool-call'; +import { darkColors } from '#/tui/theme/colors'; + +const ESC = String.fromCodePoint(0x1b); +function strip(text: string): string { + return text + .replaceAll(/\[[0-9;]*m/g, '') + .replaceAll(new RegExp(`${ESC}\\][0-9];;[^\\u0007]*\\u0007`, 'g'), ''); +} + +function makeSwarm(task: string): ToolCallComponent { + return new ToolCallComponent( + { id: 'tc-swarm', name: 'Swarm', args: { task } }, + undefined, + darkColors, + ); +} + +describe('ToolCallComponent swarm mode', () => { + it('identifies swarm tool calls and no-ops applySwarm on non-swarm tools', () => { + const swarm = makeSwarm('t'); + expect(swarm.isSwarm()).toBe(true); + + const read = new ToolCallComponent( + { id: 'tc-read', name: 'Read', args: { path: 'foo.ts' } }, + undefined, + darkColors, + ); + expect(read.isSwarm()).toBe(false); + const before = read.render(80).join('\n'); + // applySwarm must be a safe no-op on non-swarm tools. + read.applySwarm({ t: 'planned', total: 2 }); + expect(read.render(80).join('\n')).toBe(before); + }); + + it('renders the header and worker rows with the AgentGroup gutter style', () => { + const c = makeSwarm('compare error handling'); + c.applySwarm({ t: 'planned', total: 2 }); + c.applySwarm({ t: 'worker.spawned', id: 'a1', role: 'Researcher' }); + c.applySwarm({ t: 'worker.toolcall', id: 'a1', activity: 'read foo.ts' }); + c.applySwarm({ t: 'worker.spawned', id: 'a2', role: 'Analyst' }); + c.applySwarm({ t: 'worker.done', id: 'a2', tokens: 1800 }); + + const out = strip(c.render(80).join('\n')); + expect(out).toContain('Swarm'); + expect(out).toContain('compare error handling'); + expect(out).toContain('Researcher'); + expect(out).toContain('read foo.ts'); + expect(out).toContain('Analyst'); + // Active header tail reports worker progress (1 of 2 terminal). + expect(out).toContain('1/2 workers'); + // Mirrors AgentGroup's gutter vocabulary: branch glyphs + "now:" activity. + expect(out).toContain('├─ Researcher'); + expect(out).toContain('now: read foo.ts'); + // The last worker (done) uses the closing branch and shows its call stats. + expect(out).toContain('└─ Analyst'); + expect(out).toContain('1.8k tok'); + }); + + it('shows live token counts on a running worker that received worker.tokens', () => { + const c = makeSwarm('compare error handling'); + c.applySwarm({ t: 'planned', total: 1 }); + c.applySwarm({ t: 'worker.spawned', id: 'a1', role: 'Researcher' }); + c.applySwarm({ t: 'worker.toolcall', id: 'a1', activity: 'read foo.ts' }); + c.applySwarm({ t: 'worker.tokens', id: 'a1', tokens: 4200 }); + + const out = strip(c.render(80).join('\n')); + // The running worker still shows its live activity line ... + expect(out).toContain('now: read foo.ts'); + // ... alongside the live token count on its stats line. + expect(out).toContain('1 call'); + expect(out).toContain('4.2k tok'); + }); + + it('shows a dim planning placeholder before any workers spawn', () => { + const c = makeSwarm('explore the repo'); + const out = strip(c.render(80).join('\n')); + expect(out).toContain('planning…'); + expect(out).toContain('└─ planning subtasks…'); + }); + + it('produces byte-identical output across consecutive renders (stability)', () => { + const c = makeSwarm('stable task'); + c.applySwarm({ t: 'planned', total: 2 }); + c.applySwarm({ t: 'worker.spawned', id: 'a1', role: 'Researcher' }); + c.applySwarm({ t: 'worker.toolcall', id: 'a1', activity: 'read foo.ts' }); + c.applySwarm({ t: 'worker.spawned', id: 'a2', role: 'Analyst' }); + + // The root-cause property: a stable component renders the same lines each + // time, so pi-tui's differential renderer never re-emits it to scrollback. + expect(c.render(80).join('\n')).toBe(c.render(80).join('\n')); + }); + + it('shows a failed worker with its error on the second gutter line', () => { + const c = makeSwarm('t'); + c.applySwarm({ t: 'planned', total: 1 }); + c.applySwarm({ t: 'worker.spawned', id: 'a1', role: 'Scan' }); + c.applySwarm({ t: 'worker.failed', id: 'a1', error: 'timeout' }); + const out = strip(c.render(80).join('\n')); + expect(out).toContain('└─ Scan'); + expect(out).toContain('failed: timeout'); + }); + + it('renders a retrying worker with a dim retrying indicator', () => { + const c = makeSwarm('t'); + c.applySwarm({ t: 'planned', total: 1 }); + c.applySwarm({ t: 'worker.spawned', id: 'a1', role: 'Worker' }); + c.applySwarm({ t: 'worker.failed', id: 'a1', error: 'boom' }); + c.applySwarm({ t: 'worker.retrying', role: 'Worker' }); + const out = strip(c.render(80).join('\n')); + expect(out).toContain('└─ Worker'); + expect(out).toContain('retrying'); + }); + + it('reuses the same row when a retried worker re-spawns (one row per role)', () => { + const c = makeSwarm('t'); + c.applySwarm({ t: 'planned', total: 1 }); + c.applySwarm({ t: 'worker.spawned', id: 'a1', role: 'Worker' }); + c.applySwarm({ t: 'worker.failed', id: 'a1', error: 'boom' }); + c.applySwarm({ t: 'worker.retrying', role: 'Worker' }); + c.applySwarm({ t: 'worker.spawned', id: 'a2', role: 'Worker' }); + c.applySwarm({ t: 'worker.done', id: 'a2', tokens: 1500 }); + const out = strip(c.render(80).join('\n')); + // Only one Worker row across both attempts. + expect(out.match(/Worker/g)?.length).toBe(1); + expect(out).toContain('1.5k tok'); + expect(out).not.toContain('retrying'); + }); + + it('renders a dropped worker with its drop reason', () => { + const c = makeSwarm('t'); + c.applySwarm({ t: 'planned', total: 1 }); + c.applySwarm({ t: 'worker.spawned', id: 'a1', role: 'Worker' }); + c.applySwarm({ t: 'worker.failed', id: 'a1', error: 'boom' }); + c.applySwarm({ t: 'worker.dropped', role: 'Worker', reason: 'impossible' }); + const out = strip(c.render(80).join('\n')); + expect(out).toContain('└─ Worker'); + expect(out).toContain('dropped: impossible'); + }); + + it('done-phase header shows the dropped count when there are drops', () => { + const c = makeSwarm('t'); + c.applySwarm({ t: 'planned', total: 2 }); + c.applySwarm({ t: 'worker.spawned', id: 'a1', role: 'R' }); + c.applySwarm({ t: 'worker.done', id: 'a1' }); + c.applySwarm({ t: 'worker.spawned', id: 'a2', role: 'A' }); + c.applySwarm({ t: 'worker.failed', id: 'a2', error: 'x' }); + c.applySwarm({ t: 'worker.dropped', role: 'A', reason: 'gave up' }); + c.applySwarm({ t: 'done', succeeded: 1, failed: 0 }); + c.setResult({ tool_call_id: 'tc-swarm', output: 'final report', is_error: false }); + const out = strip(c.render(80).join('\n')); + expect(out).toContain('1✓'); + expect(out).toContain('1⊘'); + }); + + it('finalizes to a cancelled header on an error result (genuine abort, no failure event)', () => { + const c = makeSwarm('t'); + c.applySwarm({ t: 'planned', total: 2 }); + c.applySwarm({ t: 'worker.spawned', id: 'a1', role: 'R' }); + c.applySwarm({ t: 'worker.done', id: 'a1' }); + c.applySwarm({ t: 'worker.spawned', id: 'a2', role: 'A' }); + c.setResult({ tool_call_id: 'tc-swarm', output: 'aborted', is_error: true }); + const out = strip(c.render(80).join('\n')); + expect(out).toContain('cancelled'); + }); + + it('finalizes to a failed header showing the reason when a failure preceded the error result', () => { + const c = makeSwarm('do research'); + c.applySwarm({ t: 'planned', total: 1 }); + // An ordinary swarm failure (planner/synthesizer error) emits a failure + // event before the error result. The card must show the reason, not + // masquerade as a success-toned 'cancelled' with the message hidden. + c.applySwarm({ + t: 'failed', + message: 'Swarm planner failed to produce a valid plan after one retry', + }); + c.setResult({ + tool_call_id: 'tc-swarm', + output: 'Swarm failed: Swarm planner failed to produce a valid plan after one retry', + is_error: true, + }); + const out = strip(c.render(80).join('\n')); + expect(out).toContain('· failed'); + expect(out).not.toContain('cancelled'); + expect(out).toContain('planner failed to produce a valid plan'); + }); + + it('finalizes to a summary header after done + success result', () => { + const c = makeSwarm('t'); + c.applySwarm({ t: 'planned', total: 2 }); + c.applySwarm({ t: 'worker.spawned', id: 'a1', role: 'R' }); + c.applySwarm({ t: 'worker.done', id: 'a1' }); + c.applySwarm({ t: 'worker.spawned', id: 'a2', role: 'A' }); + c.applySwarm({ t: 'worker.failed', id: 'a2', error: 'x' }); + c.applySwarm({ t: 'done', succeeded: 1, failed: 1 }); + c.setResult({ tool_call_id: 'tc-swarm', output: 'final report', is_error: false }); + const out = strip(c.render(80).join('\n')); + expect(out).toMatch(/2 workers/); + expect(out).toContain('1✓'); + expect(out).toContain('1✗'); + }); + + it('synthesizes a done header when a success result arrives before the done event', () => { + const c = makeSwarm('t'); + c.applySwarm({ t: 'planned', total: 1 }); + c.applySwarm({ t: 'worker.spawned', id: 'a1', role: 'R' }); + c.applySwarm({ t: 'worker.done', id: 'a1' }); + // No explicit {t:'done'} — setResult must finalize the header to a summary. + c.setResult({ tool_call_id: 'tc-swarm', output: 'final report', is_error: false }); + const out = strip(c.render(80).join('\n')); + expect(out).toMatch(/1 workers/); + expect(out).toContain('1✓'); + }); + + it('reflects a task supplied via tool-call args after empty-args construction', () => { + // The coordinator's `tool.call.started` fires before the streamed args + // finish, so the task is empty at construction time. The header must read + // the live task from the tool call once `updateToolCall` syncs it. + const c = new ToolCallComponent( + { id: 'tc-swarm', name: 'Swarm', args: {} }, + undefined, + darkColors, + ); + c.updateToolCall({ id: 'tc-swarm', name: 'Swarm', args: { task: 'explore the repo' } }); + c.applySwarm({ t: 'planned', total: 2 }); + const out = strip(c.render(80).join('\n')); + expect(out).toContain('explore the repo'); + }); +}); diff --git a/packages/agent-core/src/agent/index.ts b/packages/agent-core/src/agent/index.ts index 5473f65a..281f458c 100644 --- a/packages/agent-core/src/agent/index.ts +++ b/packages/agent-core/src/agent/index.ts @@ -14,6 +14,7 @@ import { import type { EnabledPluginSessionStart } from '#/plugin'; +import type { SubagentLoopHooks } from './swarm/stall-hook'; import type { McpConnectionManager } from '../mcp'; import type { PreparedSystemPromptContext, ResolvedAgentProfile } from '../profile'; import type { ModelProvider } from '../session/provider-manager'; @@ -114,6 +115,16 @@ export class Agent { readonly cron: CronManager | null; readonly replayBuilder: ReplayBuilder; + /** + * Loop hooks scoped to this agent when it runs as a subagent (e.g. swarm + * worker stall detection). Set by {@link SessionSubagentHost} when spawning; + * `undefined` for the main agent and regular subagents, so they run with + * identical (default) turn hooks. Narrowed to the only phase `TurnFlow` + * consumes (`prepareToolExecution`) so the unaffected-paths invariant is + * enforced by the type. + */ + subagentLoopHooks?: SubagentLoopHooks | undefined; + private lastLlmConfigLogSignature?: string; constructor(options: AgentOptions) { diff --git a/packages/agent-core/src/agent/swarm/concurrency.ts b/packages/agent-core/src/agent/swarm/concurrency.ts new file mode 100644 index 00000000..0cde561e --- /dev/null +++ b/packages/agent-core/src/agent/swarm/concurrency.ts @@ -0,0 +1,21 @@ +export async function mapWithConcurrency( + items: readonly T[], + limit: number, + fn: (item: T, index: number) => Promise, +): Promise { + const max = Math.max(1, Math.floor(limit)); + let cursor = 0; + + async function worker(): Promise { + while (cursor < items.length) { + const index = cursor; + cursor += 1; + const item = items[index]; + if (item === undefined) continue; + await fn(item, index); + } + } + + const count = Math.min(max, items.length); + await Promise.all(Array.from({ length: count }, () => worker())); +} diff --git a/packages/agent-core/src/agent/swarm/coordinator.ts b/packages/agent-core/src/agent/swarm/coordinator.ts new file mode 100644 index 00000000..d5a87c00 --- /dev/null +++ b/packages/agent-core/src/agent/swarm/coordinator.ts @@ -0,0 +1,219 @@ +import { mapWithConcurrency } from './concurrency'; +import { parsePlan, parseReviseDecision } from './parse'; +import { + ALLOWED_WORKER_TOOLS, + DEFAULT_WORKER_TOOLS, + PLANNER_SYSTEM_PROMPT, + REVISER_SYSTEM_PROMPT, + SYNTHESIZER_SYSTEM_PROMPT, + renderPlannerPrompt, + renderPlannerRetryPrompt, + renderReviseSubtaskPrompt, + renderSynthesizerPrompt, +} from './prompts'; +import { + DEFAULT_MAX_ATTEMPTS, + DEFAULT_MAX_WAVES, + type ReviseDecision, + type Subtask, + type SwarmCoordinatorDeps, + type SwarmPlan, + type SwarmProgress, +} from './types'; + +export class SwarmCoordinator { + constructor(private readonly deps: SwarmCoordinatorDeps) {} + + private progress(text: string): void { + this.deps.onProgress?.(text); + } + + private emit(progress: SwarmProgress): void { + this.deps.onProgressCustom?.(progress); + } + + async run(rootTask: string): Promise { + this.deps.signal.throwIfAborted(); + this.progress('Planning subtasks…'); + const plan = await this.decompose(rootTask); + this.progress(`Planned ${String(plan.subtasks.length)} subtasks`); + this.emit({ phase: 'planned', total: plan.subtasks.length }); + + await this.runWithRetries(plan); + + this.emit({ phase: 'synthesizing' }); + this.progress('Synthesizing results…'); + const result = await this.deps.spawnSubagent({ + profileName: 'swarm-synthesizer', + systemPrompt: SYNTHESIZER_SYSTEM_PROMPT, + tools: [], + prompt: renderSynthesizerPrompt(plan), + description: 'Swarm synthesizer', + signal: this.deps.signal, + }); + const succeeded = plan.subtasks.filter((s) => s.status === 'done').length; + const failed = plan.subtasks.filter((s) => s.status === 'failed').length; + const dropped = plan.subtasks.filter((s) => s.status === 'dropped').length; + this.emit({ phase: 'done', succeeded, failed, dropped }); + return result.result; + } + + private async decompose(rootTask: string): Promise { + const first = await this.deps.spawnSubagent({ + profileName: 'swarm-planner', + systemPrompt: PLANNER_SYSTEM_PROMPT, + tools: [], + prompt: renderPlannerPrompt(rootTask), + description: 'Swarm planner', + signal: this.deps.signal, + }); + const plan = parsePlan(rootTask, first.result); + if (plan !== null) return plan; + + const retry = await this.deps.spawnSubagent({ + profileName: 'swarm-planner', + systemPrompt: PLANNER_SYSTEM_PROMPT, + tools: [], + prompt: renderPlannerRetryPrompt(rootTask, first.result), + description: 'Swarm planner (retry)', + signal: this.deps.signal, + }); + const retried = parsePlan(rootTask, retry.result); + if (retried !== null) return retried; + + throw new Error('Swarm planner failed to produce a valid plan after one retry'); + } + + /** + * Wave loop with bounded failure recovery. Each iteration runs the pending + * subtasks; then, for every subtask still 'failed', either force-drops it + * (attempts exhausted) or asks the reviser how to recover it and re-queues it + * for the next wave. Terminates when no subtasks remain pending, or when the + * {@link DEFAULT_MAX_WAVES} safety bound is hit. + */ + private async runWithRetries(plan: SwarmPlan): Promise { + const maxAttempts = this.deps.maxAttempts ?? DEFAULT_MAX_ATTEMPTS; + const maxWaves = this.deps.maxWaves ?? DEFAULT_MAX_WAVES; + + for (let wave = 0; wave < maxWaves; wave += 1) { + const pending = plan.subtasks.filter((s) => s.status === 'pending'); + if (pending.length === 0) break; + + await this.runWave(pending); + + for (const st of plan.subtasks) { + if (st.status !== 'failed') continue; + if (st.attempts >= maxAttempts) { + this.forceDrop(st, `attempts exhausted (${String(st.attempts)})`); + continue; + } + const decision = await this.reviseSubtask(st); + this.emit({ + phase: 'revising', + subtaskId: st.id, + // Capture the role BEFORE applyDecision so a `reassign` still + // correlates to the existing dashboard row keyed by the old role. + role: st.role, + decision: decision.kind, + // For a reassign, carry the NEW role too so the dashboard can re-key + // the existing old-role row instead of stranding it in `retrying`. + ...(decision.kind === 'reassign' ? { newRole: decision.role } : {}), + attempt: st.attempts, + }); + this.applyDecision(st, decision); + } + } + + // Safety net: anything still pending after the wave bound is dropped so the + // loop is guaranteed to terminate and the subtask surfaces as a gap. + for (const st of plan.subtasks) { + if (st.status === 'pending' || st.status === 'failed') { + this.forceDrop(st, 'recovery wave limit reached'); + } + } + } + + /** Run a SUBSET of subtasks (the pending ones passed in) concurrently. */ + private async runWave(subtasks: Subtask[]): Promise { + const limit = this.deps.maxConcurrency ?? 4; + await mapWithConcurrency(subtasks, limit, async (st) => { + this.deps.signal.throwIfAborted(); + st.status = 'running'; + st.attempts += 1; + this.progress(`▸ ${st.role}: started`); + try { + const out = await this.deps.spawnSubagent({ + profileName: `swarm:${st.role}`, + systemPrompt: st.systemPrompt, + tools: (st.toolAllowlist ?? DEFAULT_WORKER_TOOLS).filter((t) => + ALLOWED_WORKER_TOOLS.includes(t), + ), + prompt: st.prompt, + description: st.role, + signal: this.deps.signal, + }); + st.result = out.result; + st.status = 'done'; + this.progress(`✓ ${st.role}: done`); + } catch (err) { + // A genuine swarm-wide cancel must propagate (and must NOT be revised). + if (this.deps.signal.aborted) throw err; + st.status = 'failed'; + st.error = err instanceof Error ? err.message : String(err); + this.progress(`✗ ${st.role}: failed (${st.error})`); + } + }); + } + + /** + * Ask a reviser subagent how to recover one failed subtask. On a malformed + * response we conservatively drop (rather than burn an attempt on a confused + * reviser). + */ + private async reviseSubtask(st: Subtask): Promise { + const out = await this.deps.spawnSubagent({ + profileName: 'swarm-reviser', + systemPrompt: REVISER_SYSTEM_PROMPT, + tools: [], + prompt: renderReviseSubtaskPrompt(st, st.error), + description: `Swarm reviser (${st.role})`, + signal: this.deps.signal, + }); + return ( + parseReviseDecision(out.result) ?? { + kind: 'drop', + reason: 'reviser produced no valid decision', + } + ); + } + + /** Apply a reviser decision in place, re-queueing the subtask unless dropped. */ + private applyDecision(st: Subtask, decision: ReviseDecision): void { + switch (decision.kind) { + case 'retry': + st.status = 'pending'; + return; + case 'regenerate': + st.prompt = decision.prompt; + st.status = 'pending'; + return; + case 'reassign': + st.role = decision.role; + st.systemPrompt = decision.systemPrompt; + st.toolAllowlist = decision.toolAllowlist; + st.status = 'pending'; + return; + case 'drop': + this.forceDrop(st, decision.reason); + return; + } + } + + /** Mark a subtask dropped, record the reason, and emit a 'dropped' event. */ + private forceDrop(st: Subtask, reason: string): void { + st.status = 'dropped'; + st.error = st.error === undefined ? `dropped: ${reason}` : `${st.error} (dropped: ${reason})`; + this.progress(`x ${st.role}: dropped (${reason})`); + this.emit({ phase: 'dropped', subtaskId: st.id, role: st.role, reason }); + } +} diff --git a/packages/agent-core/src/agent/swarm/parse.ts b/packages/agent-core/src/agent/swarm/parse.ts new file mode 100644 index 00000000..65ef1d5f --- /dev/null +++ b/packages/agent-core/src/agent/swarm/parse.ts @@ -0,0 +1,95 @@ +import type { ReviseDecision, SwarmPlan, Subtask } from './types'; + +export function extractJsonObject(text: string): string | null { + const fence = /```(?:json)?\s*([\s\S]*?)```/.exec(text); + const candidate = fence?.[1] ?? text; + const start = candidate.indexOf('{'); + const end = candidate.lastIndexOf('}'); + if (start === -1 || end === -1 || end < start) return null; + return candidate.slice(start, end + 1); +} + +export function parsePlan(rootTask: string, text: string): SwarmPlan | null { + const json = extractJsonObject(text); + if (json === null) return null; + + let parsed: unknown; + try { + parsed = JSON.parse(json); + } catch { + return null; + } + if (typeof parsed !== 'object' || parsed === null) return null; + + const subtasksRaw = (parsed as { subtasks?: unknown }).subtasks; + if (!Array.isArray(subtasksRaw) || subtasksRaw.length === 0) return null; + + const subtasks: Subtask[] = []; + for (let i = 0; i < subtasksRaw.length; i += 1) { + const raw = subtasksRaw[i]; + if (typeof raw !== 'object' || raw === null) return null; + const o = raw as Record; + if ( + typeof o['role'] !== 'string' || + typeof o['systemPrompt'] !== 'string' || + typeof o['prompt'] !== 'string' + ) { + return null; + } + const toolAllowlist = Array.isArray(o['toolAllowlist']) + ? o['toolAllowlist'].filter((t): t is string => typeof t === 'string') + : undefined; + subtasks.push({ + id: typeof o['id'] === 'string' && o['id'].length > 0 ? o['id'] : `task-${String(i + 1)}`, + role: o['role'], + systemPrompt: o['systemPrompt'], + prompt: o['prompt'], + toolAllowlist, + status: 'pending', + attempts: 0, + }); + } + return { rootTask, subtasks }; +} + +/** + * Parse a reviser subagent's decision about a single failed subtask. Returns + * `null` on any malformed input (missing/invalid `kind` or required per-variant + * fields) so the caller can apply a conservative fallback. + */ +export function parseReviseDecision(text: string): ReviseDecision | null { + const json = extractJsonObject(text); + if (json === null) return null; + + let parsed: unknown; + try { + parsed = JSON.parse(json); + } catch { + return null; + } + if (typeof parsed !== 'object' || parsed === null) return null; + const o = parsed as Record; + + switch (o['kind']) { + case 'retry': + return { kind: 'retry' }; + case 'regenerate': + if (typeof o['prompt'] !== 'string' || o['prompt'].length === 0) return null; + return { kind: 'regenerate', prompt: o['prompt'] }; + case 'reassign': { + if (typeof o['role'] !== 'string' || o['role'].length === 0) return null; + if (typeof o['systemPrompt'] !== 'string' || o['systemPrompt'].length === 0) return null; + const toolAllowlist = Array.isArray(o['toolAllowlist']) + ? o['toolAllowlist'].filter((t): t is string => typeof t === 'string') + : undefined; + return toolAllowlist === undefined + ? { kind: 'reassign', role: o['role'], systemPrompt: o['systemPrompt'] } + : { kind: 'reassign', role: o['role'], systemPrompt: o['systemPrompt'], toolAllowlist }; + } + case 'drop': + if (typeof o['reason'] !== 'string' || o['reason'].length === 0) return null; + return { kind: 'drop', reason: o['reason'] }; + default: + return null; + } +} diff --git a/packages/agent-core/src/agent/swarm/prompts.ts b/packages/agent-core/src/agent/swarm/prompts.ts new file mode 100644 index 00000000..79747ee2 --- /dev/null +++ b/packages/agent-core/src/agent/swarm/prompts.ts @@ -0,0 +1,90 @@ +import type { Subtask, SwarmPlan } from './types'; + +/** Read-only default tool set for workers; planner may widen via toolAllowlist within the allowlist. */ +export const DEFAULT_WORKER_TOOLS: readonly string[] = ['Read', 'Grep', 'Glob', 'WebSearch', 'FetchURL']; + +/** Tool names a worker is allowed to request. Read-only for Phase 1 (no Write/Edit/Bash, no dispatch tools). */ +export const ALLOWED_WORKER_TOOLS: readonly string[] = [ + 'Read', + 'Grep', + 'Glob', + 'WebSearch', + 'FetchURL', + 'ReadMediaFile', +]; + +export const PLANNER_SYSTEM_PROMPT = [ + 'You are a swarm planner. Decompose the user task into independent subtasks that can run in parallel.', + 'For each subtask invent a short role name, a focused system prompt for that role, and a concrete prompt.', + 'All workers are read-only. Optionally specify toolAllowlist to RESTRICT a subtask to a subset of the allowed tools; you cannot grant tools beyond the allowed list (anything else is ignored).', + `Allowed tools: ${ALLOWED_WORKER_TOOLS.join(', ')}.`, + 'Output ONLY a JSON object, no prose, matching exactly:', + '{"subtasks":[{"id":"task-1","role":"...","systemPrompt":"...","prompt":"...","toolAllowlist":["Read"]}]}', + 'Keep it to at most 6 subtasks. Each subtask must be self-contained (workers cannot see each other).', +].join('\n'); + +export function renderPlannerPrompt(rootTask: string): string { + return `Task to decompose:\n${rootTask}\n\nReturn only the JSON plan.`; +} + +export function renderPlannerRetryPrompt(rootTask: string, previous: string): string { + return [ + `Task to decompose:\n${rootTask}`, + '', + 'Your previous response was not valid JSON in the required shape:', + previous.slice(0, 1000), + '', + 'Return ONLY the JSON object, with a non-empty "subtasks" array. No prose, no code fences.', + ].join('\n'); +} + +export const SYNTHESIZER_SYSTEM_PROMPT = [ + 'You are a swarm synthesizer. You are given the original task and the outputs of several worker subagents.', + 'Merge them into one coherent, complete answer for the user.', + 'If a subtask failed or was dropped, surface the gap explicitly instead of inventing its content. Never pretend a dropped or failed subtask succeeded.', +].join('\n'); + +export function renderSynthesizerPrompt(plan: SwarmPlan): string { + const blocks = plan.subtasks.map((st) => { + let body: string; + if (st.status === 'done') { + body = st.result ?? ''; + } else if (st.status === 'dropped') { + body = `[DROPPED: ${st.error ?? 'no reason given'}]`; + } else { + body = `[FAILED: ${st.error ?? 'unknown error'}]`; + } + return `### ${st.role} (${st.status})\n${body}`; + }); + return [`Original task:\n${plan.rootTask}`, '', 'Worker outputs:', '', ...blocks].join('\n'); +} + +export const REVISER_SYSTEM_PROMPT = [ + 'You are a swarm reviser. You are given ONE subtask that failed (a real error or a detected stall/loop) along with its error.', + 'Decide how to recover it by choosing exactly one of:', + '- retry: re-run the subtask unchanged (use only for transient/flaky errors).', + '- regenerate: re-run with a more specific, better-scoped prompt you provide.', + '- reassign: re-run under a different role with a new system prompt (and optionally a restricted toolAllowlist).', + '- drop: abandon the subtask when it is impossible or not worth retrying; give a short reason.', + 'For stalled or looping errors, prefer regenerate (with a tighter, more concrete prompt) or reassign — a plain retry will usually stall again.', + `Tools available to workers: ${ALLOWED_WORKER_TOOLS.join(', ')} (toolAllowlist may only restrict to a subset).`, + 'Output ONLY a JSON object, no prose, matching exactly one of:', + '{"kind":"retry"}', + '{"kind":"regenerate","prompt":"..."}', + '{"kind":"reassign","role":"...","systemPrompt":"...","toolAllowlist":["Read"]}', + '{"kind":"drop","reason":"..."}', +].join('\n'); + +export function renderReviseSubtaskPrompt(subtask: Subtask, error: string | undefined): string { + return [ + 'A subtask failed. Decide how to recover it.', + '', + `Role: ${subtask.role}`, + `System prompt: ${subtask.systemPrompt}`, + `Prompt: ${subtask.prompt}`, + `Attempts so far: ${String(subtask.attempts)}`, + `Error: ${error ?? 'unknown error'}`, + '', + 'Return ONLY the JSON decision object.', + ].join('\n'); +} diff --git a/packages/agent-core/src/agent/swarm/stall-hook.ts b/packages/agent-core/src/agent/swarm/stall-hook.ts new file mode 100644 index 00000000..046f90f7 --- /dev/null +++ b/packages/agent-core/src/agent/swarm/stall-hook.ts @@ -0,0 +1,81 @@ +/** + * Stall detection for swarm worker subagents. + * + * A worker that repeats the SAME tool call (same name + canonical args) at or + * beyond a threshold is making no progress. This hook detects that repetition + * and stops the offending call so a coordinator recovery loop can revise the + * worker. It is injected ONLY for swarm workers; the main agent and regular + * subagents never receive it, so their behavior is unchanged. + * + * The repeat key reuses {@link canonicalTelemetryArgs} — the same canonical + * definition the shared tool-call deduplicator (PR #15) uses — so semantically + * equal arguments collapse to one key regardless of property order. + */ + +import { canonicalTelemetryArgs } from '../turn/canonical-args'; +import type { LoopHooks, PrepareToolExecutionResult } from '../../loop/types'; + +/** + * The only loop-hook phase a subagent (swarm worker) overrides. `TurnFlow` + * composes just `prepareToolExecution` ahead of its built-in dedup, so a + * purpose-named subset keeps the surface honest and the main agent / regular + * subagent paths provably unaffected. + */ +export type SubagentLoopHooks = Pick; + +/** Max length of the repeated-call args snippet embedded in a stall reason. */ +const STALL_ARGS_PREVIEW_MAX_CHARS = 120; + +export interface StallDetectionHookOptions { + /** Repeat count (inclusive) at which a call is treated as a stall. */ + readonly repeatThreshold: number; + /** + * Invoked exactly once, the first time the threshold is reached. Receives a + * distinguishable reason string (e.g. + * `stalled: repeated () x`) so a caller can abort a per-worker + * controller with it. + */ + readonly onStall: (reason: string) => void; +} + +/** + * Build a {@link LoopHooks} fragment whose `prepareToolExecution` counts + * repeats of each (name, canonical-args) key. When a key's count reaches + * `repeatThreshold`, it fires `onStall(reason)` once and blocks that call (and + * every subsequent repeat of the same key) with the same reason. + * + * Distinct calls never accumulate toward the threshold, so legitimate + * progressing work is never stalled. + */ +export function createStallDetectionHook( + options: StallDetectionHookOptions, +): SubagentLoopHooks { + const { repeatThreshold, onStall } = options; + const counts = new Map(); + let stalled = false; + + return { + prepareToolExecution: async (ctx): Promise => { + const canonicalArgs = canonicalTelemetryArgs(ctx.args); + const key = `${ctx.toolCall.name} ${canonicalArgs}`; + const next = (counts.get(key) ?? 0) + 1; + counts.set(key, next); + + if (next < repeatThreshold) return undefined; + + // Include the repeated call's canonical args (truncated) so the reviser + // can see WHAT was repeated, not just which tool — e.g. + // `stalled: repeated Read({"path":"/a"}) x10`. + const argsPreview = + canonicalArgs.length > STALL_ARGS_PREVIEW_MAX_CHARS + ? `${canonicalArgs.slice(0, STALL_ARGS_PREVIEW_MAX_CHARS)}…` + : canonicalArgs; + const reason = `stalled: repeated ${ctx.toolCall.name}(${argsPreview}) x${String(next)}`; + if (!stalled) { + stalled = true; + onStall(reason); + } + return { block: true, reason }; + }, + }; +} diff --git a/packages/agent-core/src/agent/swarm/types.ts b/packages/agent-core/src/agent/swarm/types.ts new file mode 100644 index 00000000..f907374d --- /dev/null +++ b/packages/agent-core/src/agent/swarm/types.ts @@ -0,0 +1,92 @@ +export interface Subtask { + id: string; + role: string; + systemPrompt: string; + prompt: string; + toolAllowlist?: string[] | undefined; + status: 'pending' | 'running' | 'done' | 'failed' | 'dropped'; + result?: string | undefined; + error?: string | undefined; + /** Number of times this subtask has actually been executed by a worker. */ + attempts: number; +} + +export interface SwarmPlan { + rootTask: string; + subtasks: Subtask[]; +} + +/** What the coordinator needs to run one subagent to completion. */ +export type SpawnSubagentFn = (args: { + profileName: string; + systemPrompt: string; + tools: string[]; + prompt: string; + description: string; + signal: AbortSignal; +}) => Promise<{ result: string }>; + +/** + * Decision a reviser subagent makes about a single failed/stalled subtask. + * Shape mirrors the JSON the reviser emits (see {@link parseReviseDecision}). + */ +export type ReviseDecision = + | { kind: 'retry' } + | { kind: 'regenerate'; prompt: string } + | { kind: 'reassign'; role: string; systemPrompt: string; toolAllowlist?: string[] } + | { kind: 'drop'; reason: string }; + +export type SwarmProgress = + | { phase: 'planned'; total: number } + | { + phase: 'revising'; + subtaskId: string; + /** + * The subtask's role at the moment the reviser decision is emitted, i.e. + * BEFORE the decision is applied. For a `reassign` this is the OLD role, + * letting the dashboard correlate the event to the existing worker row. + */ + role: string; + decision: 'retry' | 'regenerate' | 'reassign' | 'drop'; + /** + * For a `reassign`, the NEW role the subtask is being moved to (the + * decision's role). Lets the dashboard re-key the existing OLD-role row to + * the new role so the subtask keeps a single row across the reassign, + * rather than stranding the old row in `retrying`. Absent for other + * decisions. + */ + newRole?: string; + attempt: number; + } + | { phase: 'dropped'; subtaskId: string; role: string; reason: string } + | { phase: 'synthesizing' } + | { phase: 'done'; succeeded: number; failed: number; dropped: number }; + +export interface SwarmCoordinatorDeps { + spawnSubagent: SpawnSubagentFn; + signal: AbortSignal; + onProgress?: ((text: string) => void) | undefined; + onProgressCustom?: ((progress: SwarmProgress) => void) | undefined; + maxConcurrency?: number | undefined; + /** + * Maximum number of times a single subtask is executed before it is + * force-dropped (counting the original run). Defaults to + * {@link DEFAULT_MAX_ATTEMPTS}. + */ + maxAttempts?: number | undefined; + /** + * Safety bound on the number of wave iterations the recovery loop performs + * before giving up, to guarantee termination. Defaults to + * {@link DEFAULT_MAX_WAVES}. + */ + maxWaves?: number | undefined; +} + +/** Default repeat threshold for swarm worker stall detection. */ +export const DEFAULT_STALL_REPEAT_THRESHOLD = 10; + +/** Default cap on per-subtask execution attempts before a force-drop. */ +export const DEFAULT_MAX_ATTEMPTS = 2; + +/** Default safety cap on recovery-loop wave iterations. */ +export const DEFAULT_MAX_WAVES = 6; diff --git a/packages/agent-core/src/agent/tool/index.ts b/packages/agent-core/src/agent/tool/index.ts index 550cfeba..5038b2a5 100644 --- a/packages/agent-core/src/agent/tool/index.ts +++ b/packages/agent-core/src/agent/tool/index.ts @@ -393,6 +393,9 @@ export class ToolManager { log: this.agent.log, }, ), + this.agent.subagentHost && + this.agent.type !== 'sub' && + new b.SwarmTool(this.agent.subagentHost, { log: this.agent.log }), toolServices?.webSearcher && new b.WebSearchTool(toolServices.webSearcher), toolServices?.urlFetcher && new b.FetchURLTool(toolServices.urlFetcher), ] diff --git a/packages/agent-core/src/agent/turn/index.ts b/packages/agent-core/src/agent/turn/index.ts index 068d5626..794fadca 100644 --- a/packages/agent-core/src/agent/turn/index.ts +++ b/packages/agent-core/src/agent/turn/index.ts @@ -358,6 +358,10 @@ export class TurnFlow { private async runTurn(turnId: number, signal: AbortSignal): Promise { let stopHookContinuationUsed = false; const deduper = new ToolCallDeduplicator(); + // Per-subagent loop hooks (e.g. swarm worker stall detection). Composed + // ahead of the built-in prepareToolExecution; undefined for the main agent + // and regular subagents, leaving their hook behavior unchanged. + const subagentPrepareToolExecution = this.agent.subagentLoopHooks?.prepareToolExecution; await this.agent.mcp?.waitForInitialLoad(signal); while (true) { signal.throwIfAborted(); @@ -413,6 +417,13 @@ export class TurnFlow { return { continue: false }; }, prepareToolExecution: async (ctx) => { + // Run the subagent-scoped hook first; honor its decision (a block + // ends the call) before the built-in dedup logic. Falls through + // when it returns undefined, preserving the dedup behavior. + if (subagentPrepareToolExecution !== undefined) { + const subagentResult = await subagentPrepareToolExecution(ctx); + if (subagentResult !== undefined) return subagentResult; + } const cached = deduper.checkSameStep( ctx.toolCall.id, ctx.toolCall.name, diff --git a/packages/agent-core/src/profile/default/agent.yaml b/packages/agent-core/src/profile/default/agent.yaml index 82b81bd3..072904eb 100644 --- a/packages/agent-core/src/profile/default/agent.yaml +++ b/packages/agent-core/src/profile/default/agent.yaml @@ -23,6 +23,7 @@ tools: - Skill - WebSearch - Agent + - Swarm - FetchURL - AskUserQuestion - EnterPlanMode diff --git a/packages/agent-core/src/session/subagent-host.ts b/packages/agent-core/src/session/subagent-host.ts index 49861e9d..b797ad18 100644 --- a/packages/agent-core/src/session/subagent-host.ts +++ b/packages/agent-core/src/session/subagent-host.ts @@ -3,6 +3,7 @@ import type { TokenUsage } from '@moonshot-ai/kosong'; import type { Agent } from '../agent'; import type { PromptOrigin } from '../agent/context'; import type { LoopTurnStopReason } from '../loop'; +import type { SubagentLoopHooks } from '../agent/swarm/stall-hook'; import { DEFAULT_AGENT_PROFILES, prepareSystemPromptContext, @@ -11,19 +12,22 @@ import { import { linkAbortSignal } from '../utils/abort'; import { collectGitContext } from './git-context'; import type { Session } from './index'; -import SUMMARY_CONTINUATION_PROMPT from './summary-continuation.md'; - -/** - * A subagent summary shorter than this many characters triggers one - * follow-up turn that asks the subagent to expand it, so the parent - * agent receives a technically complete handoff. - */ -const SUMMARY_MIN_LENGTH = 200; -const SUMMARY_CONTINUATION_ATTEMPTS = 1; + const HOOK_TEXT_PREVIEW_LENGTH = 500; const SUBAGENT_MAX_TOKENS_ERROR = 'Subagent turn failed before completing its final summary: reason=max_tokens'; +export function buildOverrideProfile( + name: string, + override: { systemPrompt: string; tools: string[] }, +): ResolvedAgentProfile { + return { + name, + systemPrompt: () => override.systemPrompt, + tools: override.tools, + }; +} + type RunSubagentOptions = { readonly parentToolCallId: string; readonly parentToolCallUuid?: string | undefined; @@ -32,6 +36,14 @@ type RunSubagentOptions = { readonly runInBackground: boolean; readonly origin?: PromptOrigin | undefined; readonly signal: AbortSignal; + readonly profileOverride?: { readonly systemPrompt: string; readonly tools: string[] } | undefined; + /** + * Loop hooks scoped to this subagent only (e.g. swarm worker stall + * detection). Composed into the subagent's turn hooks alongside the + * built-in ones. Absent for the main agent and regular subagents, so their + * behavior is unchanged. + */ + readonly loopHooks?: SubagentLoopHooks | undefined; }; type SubagentCompletion = { @@ -68,7 +80,9 @@ export class SessionSubagentHost { throw new Error(`Parent agent "${this.ownerAgentId}" was not found`); } - const profile = this.resolveProfile(parent, profileName); + const profile = options.profileOverride + ? buildOverrideProfile(profileName, options.profileOverride) + : this.resolveProfile(parent, profileName); const { id, agent } = await this.session.createAgent( { type: 'sub', generate: parent.rawGenerate }, undefined, @@ -90,7 +104,7 @@ export class SessionSubagentHost { ...options, signal: controller.signal, }, - () => this.configureChild(parent, agent, profile), + () => this.configureChild(parent, agent, profile, options), ).finally(() => { unlinkAbortSignal(); this.activeChildren.delete(id); @@ -152,6 +166,7 @@ export class SessionSubagentHost { // reflected — a subagent always uses the parent agent's model. () => { child.config.update({ modelAlias: parent.config.modelAlias }); + child.subagentLoopHooks = options.loopHooks; return Promise.resolve(); }, ).finally(() => { @@ -235,19 +250,7 @@ export class SessionSubagentHost { child.turn.prompt([{ type: 'text', text: childPrompt }], origin); await runChildTurnToCompletion(child, options.signal); - // A subagent that returns an overly terse summary leaves the parent - // agent under-informed. Give it a bounded number of chances to expand - // the handoff; if it is still short after that, accept it as-is rather - // than retrying indefinitely. - let result = lastAssistantText(child); - let remainingContinuations = SUMMARY_CONTINUATION_ATTEMPTS; - while (remainingContinuations > 0 && result.length < SUMMARY_MIN_LENGTH) { - remainingContinuations -= 1; - options.signal.throwIfAborted(); - child.turn.prompt([{ type: 'text', text: SUMMARY_CONTINUATION_PROMPT }], origin); - await runChildTurnToCompletion(child, options.signal); - result = lastAssistantText(child); - } + const result = lastAssistantText(child); const usage = child.usage.data().total; parent.emitEvent({ type: 'subagent.completed', @@ -275,6 +278,7 @@ export class SessionSubagentHost { parent: Agent, child: Agent, profile: ResolvedAgentProfile, + options: RunSubagentOptions, ): Promise { // A subagent always inherits the parent agent's model. child.config.update({ @@ -283,6 +287,10 @@ export class SessionSubagentHost { thinkingLevel: parent.config.thinkingLevel, }); + // Per-worker loop hooks (e.g. swarm stall detection) are scoped to this + // child only; absent for regular subagents, leaving them unaffected. + child.subagentLoopHooks = options.loopHooks; + const context = await prepareSystemPromptContext(child.kaos); child.useProfile(profile, context); } diff --git a/packages/agent-core/src/session/summary-continuation.md b/packages/agent-core/src/session/summary-continuation.md deleted file mode 100644 index 8efb589a..00000000 --- a/packages/agent-core/src/session/summary-continuation.md +++ /dev/null @@ -1,5 +0,0 @@ -Your previous response was too brief. Please provide a more comprehensive summary that includes: - -1. Specific technical details and implementations -2. Detailed findings and analysis -3. All important information that the parent agent should know \ No newline at end of file diff --git a/packages/agent-core/src/tools/builtin/collaboration/swarm.ts b/packages/agent-core/src/tools/builtin/collaboration/swarm.ts new file mode 100644 index 00000000..e4b3951b --- /dev/null +++ b/packages/agent-core/src/tools/builtin/collaboration/swarm.ts @@ -0,0 +1,155 @@ +/** + * SwarmTool — collaboration tool that runs a task as a self-directed agent + * swarm. + * + * Like {@link AgentTool}, this is a "collaboration tool": it uses + * `SessionSubagentHost` (injected via the constructor) to create in-process + * subagents. The {@link SwarmCoordinator} dynamically decomposes the task into + * parallel role-specialized workers, then synthesizes their outputs into one + * answer. + * + * Workers are spawned with an ad-hoc `profileOverride`, and the tool is + * registered only on non-sub agents so a swarm worker can never launch another + * swarm (recursion guard). + */ + +import { z } from 'zod'; + +import type { BuiltinTool } from '../../../agent/tool'; +import type { Logger } from '../../../logging'; +import type { ExecutableToolContext, ExecutableToolResult, ToolExecution } from '../../../loop/types'; +import type { SessionSubagentHost } from '../../../session/subagent-host'; +import { toInputJsonSchema } from '../../support/input-schema'; +import { SwarmCoordinator } from '../../../agent/swarm/coordinator'; +import { DEFAULT_STALL_REPEAT_THRESHOLD } from '../../../agent/swarm/types'; +import { createStallDetectionHook } from '../../../agent/swarm/stall-hook'; +import { linkAbortSignal } from '../../../utils/abort'; + +export const SwarmToolInputSchema = z.object({ + task: z.string().describe('The high-level task to decompose and run as a parallel agent swarm.'), +}); + +export type SwarmToolInput = z.infer; + +const SWARM_DESCRIPTION = + 'Run a task as a self-directed agent swarm: dynamically decompose it into parallel ' + + 'role-specialized subagents, then synthesize their outputs into one answer. ' + + 'Use for broad, parallelizable tasks (research, multi-file analysis). ' + + 'Subagents run in isolated contexts and cannot themselves launch swarms.'; + +const DEFAULT_MAX_CONCURRENCY = 4; + +export class SwarmTool implements BuiltinTool { + readonly name: string = 'Swarm'; + readonly description: string = SWARM_DESCRIPTION; + readonly parameters: Record = toInputJsonSchema(SwarmToolInputSchema); + private readonly log: Logger | undefined; + + constructor( + private readonly subagentHost: SessionSubagentHost, + options?: { log?: Logger }, + ) { + this.log = options?.log; + } + + resolveExecution(args: SwarmToolInput): ToolExecution { + return { + description: `Running swarm: ${args.task.replace(/\s+/g, ' ').trim().slice(0, 60)}`, + approvalRule: 'Swarm', + execute: (ctx) => this.execution(args, ctx), + }; + } + + private async execution( + args: SwarmToolInput, + ctx: ExecutableToolContext, + ): Promise { + const stallRepeatThreshold = DEFAULT_STALL_REPEAT_THRESHOLD; + const coordinator = new SwarmCoordinator({ + signal: ctx.signal, + maxConcurrency: DEFAULT_MAX_CONCURRENCY, + onProgress: (text) => ctx.onUpdate?.({ kind: 'status', text }), + onProgressCustom: (progress) => + ctx.onUpdate?.({ kind: 'custom', customKind: 'swarm', customData: progress }), + spawnSubagent: async ({ profileName, systemPrompt, tools, prompt, description, signal }) => { + // Workers (the swarm: spawns) get stall detection. Planner and + // synthesizer make no tool calls, so the hook is harmless there but we + // scope it to workers to keep their behavior identical. + const isWorker = profileName.startsWith('swarm:'); + if (!isWorker) { + const handle = await this.subagentHost.spawn(profileName, { + parentToolCallId: ctx.toolCallId, + prompt, + description, + runInBackground: false, + signal, + profileOverride: { systemPrompt, tools }, + }); + return handle.completion; + } + + // Per-worker AbortController linked to the incoming signal: a + // coordinator cancel still propagates DOWN, but a stall aborts ONLY + // this worker — the coordinator's signal stays unaborted, so the wave + // records a single failed subtask instead of cancelling the swarm. + const workerController = new AbortController(); + const unlink = linkAbortSignal(signal, workerController); + let stallReason: string | undefined; + const loopHooks = createStallDetectionHook({ + repeatThreshold: stallRepeatThreshold, + onStall: (reason) => { + stallReason = reason; + this.log?.warn(`swarm worker stalled (${description}): ${reason}`); + workerController.abort(new Error(reason)); + }, + }); + try { + const handle = await this.subagentHost.spawn(profileName, { + parentToolCallId: ctx.toolCallId, + prompt, + description, + runInBackground: false, + signal: workerController.signal, + profileOverride: { systemPrompt, tools }, + loopHooks, + }); + return await handle.completion; + } catch (error) { + // A stall aborts the worker, which surfaces as a generic cancellation + // ("Subagent turn cancelled"). Re-throw the distinguishable stalled + // reason instead so the coordinator records it on the subtask — but + // only when the incoming (coordinator) signal is NOT itself aborted, + // so a genuine swarm-wide cancel still propagates as a cancel. + if (stallReason !== undefined && !signal.aborted) { + throw new Error(stallReason, { cause: error }); + } + throw error; + } finally { + unlink(); + } + }, + }); + + try { + const output = await coordinator.run(args.task); + return { output }; + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + this.log?.error(`swarm failed: ${message}`); + // Distinguish an ordinary failure (planner/synthesizer error) from a + // genuine cancel (the turn was aborted). For a real failure, drive the + // dashboard to a 'failed' state that surfaces the reason; an abort emits + // nothing here so the card still finalizes as 'cancelled' on the error + // result. Without this, ordinary failures masquerade as a success-toned + // 'cancelled' card with the reason hidden. + if (!ctx.signal.aborted) { + ctx.onUpdate?.({ + kind: 'custom', + customKind: 'swarm', + customData: { phase: 'failed', message }, + }); + } + return { output: `Swarm failed: ${message}`, isError: true }; + } + } +} diff --git a/packages/agent-core/src/tools/builtin/index.ts b/packages/agent-core/src/tools/builtin/index.ts index ebbe0dc7..70c21875 100644 --- a/packages/agent-core/src/tools/builtin/index.ts +++ b/packages/agent-core/src/tools/builtin/index.ts @@ -8,6 +8,7 @@ export * from '../cron/cron-list'; export * from './collaboration/agent'; export * from './collaboration/ask-user'; export * from './collaboration/skill-tool'; +export * from './collaboration/swarm'; export * from './file/edit'; export * from './file/glob'; export * from './file/grep'; diff --git a/packages/agent-core/test/agent/swarm/stall-hook-turn.e2e.test.ts b/packages/agent-core/test/agent/swarm/stall-hook-turn.e2e.test.ts new file mode 100644 index 00000000..c59c4d76 --- /dev/null +++ b/packages/agent-core/test/agent/swarm/stall-hook-turn.e2e.test.ts @@ -0,0 +1,97 @@ +/** + * Turn-level proof that the swarm stall hook + per-worker abort wiring stops a + * worker that repeats the same tool call, while distinct calls run to + * completion. This mirrors how the swarm spawnSubagent adapter wires the hook: + * a per-worker AbortController linked to the parent signal, and an onStall + * callback that aborts that controller with a distinguishable reason. + */ + +import { describe, expect, it } from 'vitest'; + +import { createStallDetectionHook } from '../../../src/agent/swarm/stall-hook'; +import type { LoopHooks } from '../../../src/loop/index'; +import { linkAbortSignal } from '../../../src/utils/abort'; +import { makeToolCall, makeToolUseResponse, makeEndTurnResponse } from '../../loop/fixtures/fake-llm'; +import { runTurn } from '../../loop/fixtures/helpers'; +import { EchoTool } from '../../loop/fixtures/tools'; + +describe('swarm stall hook — turn level', () => { + it('stops a worker that repeats the same tool call and aborts its per-worker controller', async () => { + // Parent (coordinator) signal stays unaborted; per-worker controller is + // linked to it so coordinator cancel still propagates down. + const parent = new AbortController(); + const worker = new AbortController(); + const unlink = linkAbortSignal(parent.signal, worker); + + let stallReason: string | undefined; + const hook: Partial = createStallDetectionHook({ + repeatThreshold: 3, + onStall: (reason) => { + stallReason = reason; + worker.abort(new Error(reason)); + }, + }); + + const echo = new EchoTool(); + // LLM keeps emitting the identical tool call; without a stop the loop would + // run forever. The hook must block on the 3rd repeat and the abort must end + // the turn. + const sameCall = () => makeToolUseResponse([makeToolCall('echo', { text: 'spin' })]); + const responses = [sameCall(), sameCall(), sameCall(), sameCall(), sameCall()]; + + const { result } = await runTurn({ + hooks: hook as LoopHooks, + tools: [echo], + responses, + signal: worker.signal, + }); + + unlink(); + + // Worker turn ended as aborted; the per-worker controller fired. + expect(result.stopReason).toBe('aborted'); + expect(worker.signal.aborted).toBe(true); + expect(stallReason).toMatch(/stalled/i); + expect(stallReason).toContain('echo'); + // The reason carries the repeated call's args so a reviser sees WHAT spun. + expect(stallReason).toContain('spin'); + // Crucially the coordinator's signal is NOT aborted — a single worker + // failure, not a whole-swarm cancel. + expect(parent.signal.aborted).toBe(false); + // The blocked call never executed the tool. + expect(echo.calls.length).toBeLessThan(3); + }); + + it('lets distinct progressing tool calls run to completion without stalling', async () => { + const worker = new AbortController(); + let stalled = false; + const hook: Partial = createStallDetectionHook({ + repeatThreshold: 3, + onStall: () => { + stalled = true; + worker.abort(); + }, + }); + + const echo = new EchoTool(); + const responses = [ + makeToolUseResponse([makeToolCall('echo', { text: 'a' })]), + makeToolUseResponse([makeToolCall('echo', { text: 'b' })]), + makeToolUseResponse([makeToolCall('echo', { text: 'c' })]), + makeToolUseResponse([makeToolCall('echo', { text: 'd' })]), + makeEndTurnResponse('done'), + ]; + + const { result } = await runTurn({ + hooks: hook as LoopHooks, + tools: [echo], + responses, + signal: worker.signal, + }); + + expect(stalled).toBe(false); + expect(worker.signal.aborted).toBe(false); + expect(result.stopReason).toBe('end_turn'); + expect(echo.calls.length).toBe(4); + }); +}); diff --git a/packages/agent-core/test/agent/swarm/stall-hook.test.ts b/packages/agent-core/test/agent/swarm/stall-hook.test.ts new file mode 100644 index 00000000..6f4ee643 --- /dev/null +++ b/packages/agent-core/test/agent/swarm/stall-hook.test.ts @@ -0,0 +1,99 @@ +import { describe, expect, it, vi } from 'vitest'; + +import { createStallDetectionHook } from '../../../src/agent/swarm/stall-hook'; +import type { ToolExecutionHookContext } from '../../../src/loop/index'; + +function makeCtx(name: string, args: unknown, id = 'call'): ToolExecutionHookContext { + return { + toolCall: { type: 'function', id, name, arguments: JSON.stringify(args) }, + args, + turnId: 'turn-1', + stepNumber: 1, + signal: new AbortController().signal, + // `llm` is unused by the stall hook; the cast keeps the fixture small. + } as unknown as ToolExecutionHookContext; +} + +describe('createStallDetectionHook', () => { + it('blocks and fires onStall exactly once when the same call repeats >= threshold', async () => { + const onStall = vi.fn(); + const hook = createStallDetectionHook({ repeatThreshold: 3, onStall }); + const prepare = hook.prepareToolExecution; + expect(prepare).toBeDefined(); + + const ctx = makeCtx('Read', { path: '/a' }); + + const r1 = await prepare!(ctx); + const r2 = await prepare!(ctx); + expect(r1).toBeUndefined(); + expect(r2).toBeUndefined(); + expect(onStall).not.toHaveBeenCalled(); + + const r3 = await prepare!(ctx); + expect(r3?.block).toBe(true); + expect(r3?.reason).toMatch(/stalled/i); + expect(r3?.reason).toContain('Read'); + // The reason includes the repeated call's canonical args so the reviser + // can tell WHAT was repeated, not just which tool. + expect(r3?.reason).toContain('/a'); + expect(r3?.reason).toContain('"path"'); + expect(onStall).toHaveBeenCalledTimes(1); + expect(onStall).toHaveBeenLastCalledWith(r3?.reason); + + // Further repeats keep blocking but never re-fire onStall. + const r4 = await prepare!(ctx); + expect(r4?.block).toBe(true); + expect(onStall).toHaveBeenCalledTimes(1); + }); + + it('truncates very long repeated args in the stall reason', async () => { + const onStall = vi.fn(); + const hook = createStallDetectionHook({ repeatThreshold: 2, onStall }); + const prepare = hook.prepareToolExecution!; + + const longPattern = 'x'.repeat(500); + const ctx = makeCtx('Grep', { pattern: longPattern }); + await prepare(ctx); + const r = await prepare(ctx); + expect(r?.block).toBe(true); + // Truncated with an ellipsis — the full 500-char pattern is not embedded. + expect(r?.reason).toContain('…'); + expect(r?.reason?.length).toBeLessThan(longPattern.length); + expect(r?.reason).toContain('Grep'); + }); + + it('never triggers on distinct progressing calls', async () => { + const onStall = vi.fn(); + const hook = createStallDetectionHook({ repeatThreshold: 3, onStall }); + const prepare = hook.prepareToolExecution!; + + for (let i = 0; i < 10; i += 1) { + const r = await prepare(makeCtx('Read', { path: `/file-${String(i)}` })); + expect(r).toBeUndefined(); + } + expect(onStall).not.toHaveBeenCalled(); + }); + + it('treats canonically-equal args as the same key (key order independent)', async () => { + const onStall = vi.fn(); + const hook = createStallDetectionHook({ repeatThreshold: 2, onStall }); + const prepare = hook.prepareToolExecution!; + + const r1 = await prepare(makeCtx('Edit', { a: 1, b: 2 })); + const r2 = await prepare(makeCtx('Edit', { b: 2, a: 1 })); + expect(r1).toBeUndefined(); + expect(r2?.block).toBe(true); + expect(onStall).toHaveBeenCalledTimes(1); + }); + + it('keys on tool name too: same args under different names do not collide', async () => { + const onStall = vi.fn(); + const hook = createStallDetectionHook({ repeatThreshold: 2, onStall }); + const prepare = hook.prepareToolExecution!; + + await prepare(makeCtx('Read', { path: '/a' })); + const r = await prepare(makeCtx('Grep', { path: '/a' })); + expect(r).toBeUndefined(); + expect(onStall).not.toHaveBeenCalled(); + }); +}); diff --git a/packages/agent-core/test/agent/swarm/subagent-hooks-unaffected.test.ts b/packages/agent-core/test/agent/swarm/subagent-hooks-unaffected.test.ts new file mode 100644 index 00000000..8af36edf --- /dev/null +++ b/packages/agent-core/test/agent/swarm/subagent-hooks-unaffected.test.ts @@ -0,0 +1,96 @@ +/** + * Locks the "main agent / regular subagent unaffected" invariant: when + * `agent.subagentLoopHooks` is UNDEFINED (every path except a swarm worker), + * `TurnFlow.runTurn` composes ONLY the built-in `prepareToolExecution` (the + * tool-call deduplicator). This test replicates that composition exactly and + * proves the built-in same-step dedup still short-circuits identical calls, + * so non-swarm turns run with unchanged behavior. + */ + +import { describe, expect, it } from 'vitest'; + +import { ToolCallDeduplicator } from '../../../src/agent/turn/tool-dedup'; +import type { + LoopHooks, + PrepareToolExecutionHook, +} from '../../../src/loop/index'; +import { makeToolCall, makeToolUseResponse, makeEndTurnResponse } from '../../loop/fixtures/fake-llm'; +import { runTurn } from '../../loop/fixtures/helpers'; +import { EchoTool } from '../../loop/fixtures/tools'; + +/** + * Build the same `prepareToolExecution` hook `TurnFlow.runTurn` builds: run the + * subagent-scoped hook first (when present), then fall through to the built-in + * dedup. With `subagentPrepareToolExecution` undefined this is exactly the + * non-swarm composition. + */ +function buildHooks( + deduper: ToolCallDeduplicator, + subagentPrepareToolExecution: PrepareToolExecutionHook | undefined, +): LoopHooks { + return { + beforeStep: async () => { + deduper.beginStep(); + return; + }, + afterStep: async () => { + deduper.endStep(); + }, + prepareToolExecution: async (ctx) => { + if (subagentPrepareToolExecution !== undefined) { + const subagentResult = await subagentPrepareToolExecution(ctx); + if (subagentResult !== undefined) return subagentResult; + } + const cached = deduper.checkSameStep(ctx.toolCall.id, ctx.toolCall.name, ctx.args); + if (cached !== null) return { syntheticResult: cached }; + return undefined; + }, + finalizeToolResult: async (ctx) => { + return deduper.finalizeResult(ctx.toolCall.id, ctx.toolCall.name, ctx.args, ctx.result); + }, + }; +} + +describe('subagentLoopHooks undefined — non-swarm turn unaffected', () => { + it('built-in same-step dedup still short-circuits identical calls (no subagent hook)', async () => { + const deduper = new ToolCallDeduplicator(); + // subagentLoopHooks UNDEFINED — the non-swarm / regular-subagent case. + const hooks = buildHooks(deduper, undefined); + + const echo = new EchoTool(); + // One step emits the identical tool call twice; the built-in dedup must + // execute the tool only once and serve the second from the placeholder. + const responses = [ + makeToolUseResponse([ + makeToolCall('echo', { text: 'same' }, 'c1'), + makeToolCall('echo', { text: 'same' }, 'c2'), + ]), + makeEndTurnResponse('done'), + ]; + + const { result } = await runTurn({ hooks, tools: [echo], responses }); + + expect(result.stopReason).toBe('end_turn'); + // Dedup short-circuits the duplicate: the tool ran exactly once. + expect(echo.calls.length).toBe(1); + }); + + it('distinct same-step calls all execute (dedup does not over-collapse)', async () => { + const deduper = new ToolCallDeduplicator(); + const hooks = buildHooks(deduper, undefined); + + const echo = new EchoTool(); + const responses = [ + makeToolUseResponse([ + makeToolCall('echo', { text: 'a' }, 'c1'), + makeToolCall('echo', { text: 'b' }, 'c2'), + ]), + makeEndTurnResponse('done'), + ]; + + const { result } = await runTurn({ hooks, tools: [echo], responses }); + + expect(result.stopReason).toBe('end_turn'); + expect(echo.calls.length).toBe(2); + }); +}); diff --git a/packages/agent-core/test/session/subagent-host.test.ts b/packages/agent-core/test/session/subagent-host.test.ts index faf62052..fde45abc 100644 --- a/packages/agent-core/test/session/subagent-host.test.ts +++ b/packages/agent-core/test/session/subagent-host.test.ts @@ -262,6 +262,37 @@ describe('SessionSubagentHost', () => { ]); }); + it('uses the profileOverride system prompt and tools instead of a registry profile', async () => { + const parent = testAgent(); + parent.configure(); + parent.newEvents(); + + const summary = + 'Researched the requested topic thoroughly and returned a complete, detailed summary that gives the parent agent everything it needs to continue without repeating the investigation work that was already finished here.'; + const child = testAgent(); + child.mockNextResponse({ type: 'text', text: summary }); + const session = fakeSession(parent.agent, child.agent); + const host = new SessionSubagentHost(session, 'main'); + + const handle = await host.spawn('swarm:Researcher', { + parentToolCallId: 'call_agent', + prompt: 'Research the topic', + description: 'Research', + runInBackground: false, + signal, + profileOverride: { + systemPrompt: 'You are a researcher.', + tools: ['Read', 'Grep'], + }, + }); + + await expect(handle.completion).resolves.toMatchObject({ result: summary }); + expect(handle.profileName).toBe('swarm:Researcher'); + expect(child.agent.config.systemPrompt).toBe('You are a researcher.'); + expect(child.llmCalls[0]?.systemPrompt).toBe('You are a researcher.'); + expect(child.llmCalls[0]?.tools.map((tool) => tool.name).toSorted()).toEqual(['Grep', 'Read']); + }); + it('rejects unknown subagent types before creating a child agent', async () => { const parent = testAgent(); parent.configure(); @@ -404,15 +435,17 @@ describe('SessionSubagentHost', () => { ); }); - it('re-prompts the child when the first summary is too short', async () => { + it('returns a short summary as-is without re-prompting the child', async () => { const parent = testAgent(); parent.configure(); parent.newEvents(); - const longSummary = 'Detailed findings: '.repeat(20); + const shortSummary = 'done'; const child = testAgent(); - child.mockNextResponse({ type: 'text', text: 'done' }); - child.mockNextResponse({ type: 'text', text: longSummary }); + child.mockNextResponse({ type: 'text', text: shortSummary }); + // A second response is queued to prove it is never consumed: a short + // summary must NOT trigger a follow-up "expand" turn. + child.mockNextResponse({ type: 'text', text: 'Detailed findings: '.repeat(20) }); const session = fakeSession(parent.agent, child.agent); const host = new SessionSubagentHost(session, 'main'); @@ -424,12 +457,8 @@ describe('SessionSubagentHost', () => { signal, }); - await expect(handle.completion).resolves.toMatchObject({ result: longSummary.trim() }); - expect(child.llmCalls).toHaveLength(2); - expect(child.llmCalls[1]?.history.at(-1)).toMatchObject({ - role: 'user', - content: [{ type: 'text', text: expect.stringContaining('too brief') }], - }); + await expect(handle.completion).resolves.toMatchObject({ result: shortSummary }); + expect(child.llmCalls).toHaveLength(1); }); it('fails the child instead of re-prompting when the response is truncated', async () => { diff --git a/packages/agent-core/test/session/subagent-override.test.ts b/packages/agent-core/test/session/subagent-override.test.ts new file mode 100644 index 00000000..656156ab --- /dev/null +++ b/packages/agent-core/test/session/subagent-override.test.ts @@ -0,0 +1,26 @@ +import { describe, expect, it } from 'vitest'; + +import { buildOverrideProfile } from '../../src/session/subagent-host'; +import { TEST_OS_ENV } from '../fixtures/test-kaos'; + +describe('buildOverrideProfile', () => { + it('builds a profile whose renderer returns the literal system prompt', () => { + const profile = buildOverrideProfile('swarm:Auditor', { + systemPrompt: 'You are a dependency auditor.', + tools: ['Read', 'Grep'], + }); + + expect(profile.name).toBe('swarm:Auditor'); + expect(profile.tools).toEqual(['Read', 'Grep']); + expect(profile.systemPrompt({ osEnv: TEST_OS_ENV, cwd: '/tmp' })).toBe( + 'You are a dependency auditor.', + ); + }); + + it('ignores render context and always returns the override text', () => { + const profile = buildOverrideProfile('swarm:X', { systemPrompt: 'fixed', tools: [] }); + expect(profile.systemPrompt({ osEnv: TEST_OS_ENV, cwd: '/a', cwdListing: 'noise' })).toBe( + 'fixed', + ); + }); +}); diff --git a/packages/agent-core/test/swarm/concurrency.test.ts b/packages/agent-core/test/swarm/concurrency.test.ts new file mode 100644 index 00000000..bd786610 --- /dev/null +++ b/packages/agent-core/test/swarm/concurrency.test.ts @@ -0,0 +1,33 @@ +import { describe, expect, it } from 'vitest'; + +import { mapWithConcurrency } from '../../src/agent/swarm/concurrency'; + +describe('mapWithConcurrency', () => { + it('processes every item', async () => { + const seen: number[] = []; + await mapWithConcurrency([1, 2, 3, 4], 2, async (n) => { + seen.push(n); + }); + expect(seen.sort((a, b) => a - b)).toEqual([1, 2, 3, 4]); + }); + + it('never exceeds the concurrency limit', async () => { + let active = 0; + let peak = 0; + await mapWithConcurrency([1, 2, 3, 4, 5, 6], 2, async () => { + active += 1; + peak = Math.max(peak, active); + await new Promise((r) => setTimeout(r, 5)); + active -= 1; + }); + expect(peak).toBeLessThanOrEqual(2); + }); + + it('treats a limit below 1 as 1', async () => { + const seen: number[] = []; + await mapWithConcurrency([1, 2], 0, async (n) => { + seen.push(n); + }); + expect(seen.sort((a, b) => a - b)).toEqual([1, 2]); + }); +}); diff --git a/packages/agent-core/test/swarm/coordinator.test.ts b/packages/agent-core/test/swarm/coordinator.test.ts new file mode 100644 index 00000000..b9a4fbd6 --- /dev/null +++ b/packages/agent-core/test/swarm/coordinator.test.ts @@ -0,0 +1,428 @@ +import { describe, expect, it, vi } from 'vitest'; + +import { SwarmCoordinator } from '../../src/agent/swarm/coordinator'; +import type { SpawnSubagentFn } from '../../src/agent/swarm/types'; + +const PLAN_JSON = JSON.stringify({ + subtasks: [ + { role: 'Researcher', systemPrompt: 'sp-research', prompt: 'p-research' }, + { role: 'Analyst', systemPrompt: 'sp-analyst', prompt: 'p-analyst', toolAllowlist: ['Read'] }, + ], +}); + +function makeSpawner(byProfile: Record): SpawnSubagentFn { + return vi.fn(async (args) => { + if (args.profileName === 'swarm-planner') return { result: '```json\n' + PLAN_JSON + '\n```' }; + if (args.profileName === 'swarm-synthesizer') return { result: 'FINAL ANSWER' }; + const key = args.profileName; + return { result: byProfile[key] ?? `done:${args.description}` }; + }); +} + +describe('SwarmCoordinator.run', () => { + it('plans, runs workers concurrently, and synthesizes', async () => { + const spawn = makeSpawner({}); + const coordinator = new SwarmCoordinator({ + spawnSubagent: spawn, + signal: new AbortController().signal, + maxConcurrency: 4, + }); + + const result = await coordinator.run('do a thing'); + + expect(result).toBe('FINAL ANSWER'); + const calls = (spawn as ReturnType).mock.calls.map((c) => c[0]); + expect(calls).toHaveLength(4); + expect(calls[0].profileName).toBe('swarm-planner'); + expect(calls.some((c) => c.profileName === 'swarm:Researcher' && c.systemPrompt === 'sp-research')).toBe(true); + expect(calls.some((c) => c.profileName === 'swarm:Analyst' && c.tools.includes('Read'))).toBe(true); + expect(calls[calls.length - 1].profileName).toBe('swarm-synthesizer'); + }); + + it('retries planning once on invalid JSON, then succeeds', async () => { + let first = true; + const spawn: SpawnSubagentFn = vi.fn(async (args) => { + if (args.profileName === 'swarm-planner') { + if (first) { + first = false; + return { result: 'not json at all' }; + } + return { result: PLAN_JSON }; + } + if (args.profileName === 'swarm-synthesizer') return { result: 'OK' }; + return { result: 'worker-done' }; + }); + const coordinator = new SwarmCoordinator({ spawnSubagent: spawn, signal: new AbortController().signal }); + const result = await coordinator.run('x'); + expect(result).toBe('OK'); + }); + + it('throws when planning fails twice', async () => { + const spawn: SpawnSubagentFn = vi.fn(async () => ({ result: 'never json' })); + const coordinator = new SwarmCoordinator({ spawnSubagent: spawn, signal: new AbortController().signal }); + await expect(coordinator.run('x')).rejects.toThrow(/valid plan/i); + }); + + it('records a failed worker and still synthesizes', async () => { + const spawn: SpawnSubagentFn = vi.fn(async (args) => { + if (args.profileName === 'swarm-planner') return { result: PLAN_JSON }; + if (args.profileName === 'swarm-synthesizer') return { result: 'SYNTH' }; + if (args.profileName === 'swarm:Researcher') throw new Error('boom'); + return { result: 'analyst-done' }; + }); + const onProgress = vi.fn(); + const coordinator = new SwarmCoordinator({ + spawnSubagent: spawn, + signal: new AbortController().signal, + onProgress, + }); + const result = await coordinator.run('x'); + expect(result).toBe('SYNTH'); + expect(onProgress.mock.calls.some((c) => /failed/i.test(String(c[0])))).toBe(true); + }); + + it('strips disallowed tools (Agent/Bash) from a planner-supplied allowlist', async () => { + const planWithBadTools = JSON.stringify({ + subtasks: [{ role: 'X', systemPrompt: 's', prompt: 'p', toolAllowlist: ['Agent', 'Read', 'Bash'] }], + }); + const spawn = vi.fn(async (args) => { + if (args.profileName === 'swarm-planner') return { result: planWithBadTools }; + if (args.profileName === 'swarm-synthesizer') return { result: 'S' }; + return { result: 'w' }; + }); + const coordinator = new SwarmCoordinator({ spawnSubagent: spawn, signal: new AbortController().signal }); + await coordinator.run('x'); + const worker = (spawn as ReturnType).mock.calls + .map((c) => c[0]) + .find((c) => c.profileName === 'swarm:X'); + expect(worker?.tools).toEqual(['Read']); + }); + + it('emits structured progress: planned(total) → synthesizing → done', async () => { + const spawn = makeSpawner({}); + const onProgressCustom = vi.fn(); + const coordinator = new SwarmCoordinator({ + spawnSubagent: spawn, + signal: new AbortController().signal, + onProgressCustom, + }); + await coordinator.run('do a thing'); + const payloads = (onProgressCustom as ReturnType).mock.calls.map((c) => c[0]); + expect(payloads).toContainEqual({ phase: 'planned', total: 2 }); + expect(payloads).toContainEqual({ phase: 'synthesizing' }); + expect( + payloads.some( + (p) => p.phase === 'done' && p.succeeded === 2 && p.failed === 0 && p.dropped === 0, + ), + ).toBe(true); + }); + + it('propagates abort instead of swallowing it (no synthesis after cancel)', async () => { + const controller = new AbortController(); + const PLAN = JSON.stringify({ subtasks: [{ role: 'A', systemPrompt: 's', prompt: 'p' }] }); + const spawn = vi.fn(async (args) => { + if (args.profileName === 'swarm-planner') return { result: PLAN }; + controller.abort(); + const e = new Error('aborted'); + e.name = 'AbortError'; + throw e; + }); + const coordinator = new SwarmCoordinator({ spawnSubagent: spawn, signal: controller.signal }); + await expect(coordinator.run('x')).rejects.toThrow(); + const profiles = (spawn as ReturnType).mock.calls.map((c) => c[0].profileName); + expect(profiles).not.toContain('swarm-synthesizer'); + }); +}); + +// One-subtask plan keeps wave behavior deterministic for recovery tests. +const ONE_PLAN = JSON.stringify({ + subtasks: [{ id: 'task-1', role: 'Worker', systemPrompt: 'sp', prompt: 'p-original' }], +}); + +describe('SwarmCoordinator failure recovery', () => { + it('retry: a worker fails once, reviser says retry, re-run succeeds', async () => { + let workerCalls = 0; + const spawn = vi.fn(async (args) => { + if (args.profileName === 'swarm-planner') return { result: ONE_PLAN }; + if (args.profileName === 'swarm-synthesizer') return { result: 'SYNTH' }; + if (args.profileName === 'swarm-reviser') return { result: '{"kind":"retry"}' }; + // swarm:Worker + workerCalls += 1; + if (workerCalls === 1) throw new Error('boom'); + return { result: 'worker-ok' }; + }); + const onProgressCustom = vi.fn(); + const coordinator = new SwarmCoordinator({ + spawnSubagent: spawn, + signal: new AbortController().signal, + onProgressCustom, + }); + const result = await coordinator.run('x'); + expect(result).toBe('SYNTH'); + expect(workerCalls).toBe(2); + const payloads = (onProgressCustom as ReturnType).mock.calls.map((c) => c[0]); + expect(payloads).toContainEqual({ + phase: 'revising', + subtaskId: 'task-1', + role: 'Worker', + decision: 'retry', + attempt: 1, + }); + expect( + payloads.some( + (p) => p.phase === 'done' && p.succeeded === 1 && p.failed === 0 && p.dropped === 0, + ), + ).toBe(true); + }); + + it('regenerate: re-run uses the new prompt from the reviser', async () => { + const workerPrompts: string[] = []; + let workerCalls = 0; + const spawn = vi.fn(async (args) => { + if (args.profileName === 'swarm-planner') return { result: ONE_PLAN }; + if (args.profileName === 'swarm-synthesizer') return { result: 'SYNTH' }; + if (args.profileName === 'swarm-reviser') + return { result: '{"kind":"regenerate","prompt":"NEW PROMPT"}' }; + workerCalls += 1; + workerPrompts.push(args.prompt); + if (workerCalls === 1) throw new Error('boom'); + return { result: 'worker-ok' }; + }); + const coordinator = new SwarmCoordinator({ + spawnSubagent: spawn, + signal: new AbortController().signal, + }); + const result = await coordinator.run('x'); + expect(result).toBe('SYNTH'); + expect(workerPrompts[0]).toBe('p-original'); + expect(workerPrompts[1]).toBe('NEW PROMPT'); + }); + + it('reassign: re-run uses the new role, systemPrompt, and tools', async () => { + const seen: Array<{ profileName: string; systemPrompt: string; tools: string[] }> = []; + let workerCalls = 0; + const spawn = vi.fn(async (args) => { + if (args.profileName === 'swarm-planner') return { result: ONE_PLAN }; + if (args.profileName === 'swarm-synthesizer') return { result: 'SYNTH' }; + if (args.profileName === 'swarm-reviser') + return { + result: '{"kind":"reassign","role":"R2","systemPrompt":"SP2","toolAllowlist":["Read"]}', + }; + seen.push({ + profileName: args.profileName, + systemPrompt: args.systemPrompt, + tools: args.tools, + }); + workerCalls += 1; + if (workerCalls === 1) throw new Error('boom'); + return { result: 'worker-ok' }; + }); + const onProgressCustom = vi.fn(); + const coordinator = new SwarmCoordinator({ + spawnSubagent: spawn, + signal: new AbortController().signal, + onProgressCustom, + }); + const result = await coordinator.run('x'); + expect(result).toBe('SYNTH'); + expect(seen[0]?.profileName).toBe('swarm:Worker'); + expect(seen[1]?.profileName).toBe('swarm:R2'); + expect(seen[1]?.systemPrompt).toBe('SP2'); + expect(seen[1]?.tools).toEqual(['Read']); + // The 'revising' event carries the role as it was BEFORE the reassign so + // the dashboard can correlate it to the existing worker row, plus the NEW + // role so the dashboard can re-key that row instead of stranding it. + const payloads = (onProgressCustom as ReturnType).mock.calls.map((c) => c[0]); + expect(payloads).toContainEqual({ + phase: 'revising', + subtaskId: 'task-1', + role: 'Worker', + decision: 'reassign', + newRole: 'R2', + attempt: 1, + }); + }); + + it('drop (LLM-chosen): a dropped subtask is not re-run and is surfaced as a gap', async () => { + let workerCalls = 0; + let synthesizerPrompt: string | undefined; + const spawn = vi.fn(async (args) => { + if (args.profileName === 'swarm-planner') return { result: ONE_PLAN }; + if (args.profileName === 'swarm-synthesizer') { + synthesizerPrompt = args.prompt; + return { result: 'SYNTH' }; + } + if (args.profileName === 'swarm-reviser') + return { result: '{"kind":"drop","reason":"impossible"}' }; + workerCalls += 1; + throw new Error('boom'); + }); + const onProgressCustom = vi.fn(); + const coordinator = new SwarmCoordinator({ + spawnSubagent: spawn, + signal: new AbortController().signal, + onProgressCustom, + }); + const result = await coordinator.run('x'); + expect(result).toBe('SYNTH'); + expect(workerCalls).toBe(1); // ran once, then dropped — never re-run + const payloads = (onProgressCustom as ReturnType).mock.calls.map((c) => c[0]); + expect(payloads).toContainEqual({ + phase: 'dropped', + subtaskId: 'task-1', + role: 'Worker', + reason: 'impossible', + }); + expect( + payloads.some( + (p) => p.phase === 'done' && p.succeeded === 0 && p.failed === 0 && p.dropped === 1, + ), + ).toBe(true); + expect(synthesizerPrompt).toMatch(/DROPPED/); + expect(synthesizerPrompt).toContain('impossible'); + }); + + it('maxAttempts: a perpetually failing subtask runs exactly maxAttempts times then force-drops', async () => { + let workerCalls = 0; + const spawn = vi.fn(async (args) => { + if (args.profileName === 'swarm-planner') return { result: ONE_PLAN }; + if (args.profileName === 'swarm-synthesizer') return { result: 'SYNTH' }; + if (args.profileName === 'swarm-reviser') return { result: '{"kind":"retry"}' }; + workerCalls += 1; + throw new Error('always-boom'); + }); + const onProgressCustom = vi.fn(); + const coordinator = new SwarmCoordinator({ + spawnSubagent: spawn, + signal: new AbortController().signal, + maxAttempts: 2, + onProgressCustom, + }); + const result = await coordinator.run('x'); + expect(result).toBe('SYNTH'); + expect(workerCalls).toBe(2); // exactly maxAttempts runs + const payloads = (onProgressCustom as ReturnType).mock.calls.map((c) => c[0]); + expect(payloads.some((p) => p.phase === 'dropped' && p.subtaskId === 'task-1')).toBe(true); + expect( + payloads.some((p) => p.phase === 'done' && p.succeeded === 0 && p.dropped === 1), + ).toBe(true); + // Reviser is consulted only after attempt 1 (attempt 2 hits the cap and force-drops). + const reviserCalls = (spawn as ReturnType).mock.calls + .map((c) => c[0]) + .filter((c) => c.profileName === 'swarm-reviser'); + expect(reviserCalls).toHaveLength(1); + }); + + it('reviser parse failure falls back to a conservative drop (does not burn attempts)', async () => { + let workerCalls = 0; + const spawn = vi.fn(async (args) => { + if (args.profileName === 'swarm-planner') return { result: ONE_PLAN }; + if (args.profileName === 'swarm-synthesizer') return { result: 'SYNTH' }; + if (args.profileName === 'swarm-reviser') return { result: 'I am confused, no json here' }; + workerCalls += 1; + throw new Error('boom'); + }); + const onProgressCustom = vi.fn(); + const coordinator = new SwarmCoordinator({ + spawnSubagent: spawn, + signal: new AbortController().signal, + onProgressCustom, + }); + const result = await coordinator.run('x'); + expect(result).toBe('SYNTH'); + expect(workerCalls).toBe(1); + const payloads = (onProgressCustom as ReturnType).mock.calls.map((c) => c[0]); + expect(payloads).toContainEqual({ + phase: 'revising', + subtaskId: 'task-1', + role: 'Worker', + decision: 'drop', + attempt: 1, + }); + expect(payloads.some((p) => p.phase === 'dropped' && p.subtaskId === 'task-1')).toBe(true); + }); + + it('multi-wave: a revised subtask re-runs in a later wave and the loop terminates', async () => { + // Two subtasks; both fail on wave 1, retry, both succeed on wave 2. + const TWO_PLAN = JSON.stringify({ + subtasks: [ + { id: 'task-1', role: 'A', systemPrompt: 'spa', prompt: 'pa' }, + { id: 'task-2', role: 'B', systemPrompt: 'spb', prompt: 'pb' }, + ], + }); + const calls: Record = {}; + const spawn = vi.fn(async (args) => { + if (args.profileName === 'swarm-planner') return { result: TWO_PLAN }; + if (args.profileName === 'swarm-synthesizer') return { result: 'SYNTH' }; + if (args.profileName === 'swarm-reviser') return { result: '{"kind":"retry"}' }; + calls[args.profileName] = (calls[args.profileName] ?? 0) + 1; + if (calls[args.profileName] === 1) throw new Error('boom'); + return { result: 'ok' }; + }); + const coordinator = new SwarmCoordinator({ + spawnSubagent: spawn, + signal: new AbortController().signal, + }); + const result = await coordinator.run('x'); + expect(result).toBe('SYNTH'); + expect(calls['swarm:A']).toBe(2); + expect(calls['swarm:B']).toBe(2); + }); + + it('all subtasks dropped: still synthesizes with a gap-only prompt (no crash)', async () => { + const TWO_PLAN = JSON.stringify({ + subtasks: [ + { id: 'task-1', role: 'A', systemPrompt: 'spa', prompt: 'pa' }, + { id: 'task-2', role: 'B', systemPrompt: 'spb', prompt: 'pb' }, + ], + }); + let synthesizerPrompt: string | undefined; + const spawn = vi.fn(async (args) => { + if (args.profileName === 'swarm-planner') return { result: TWO_PLAN }; + if (args.profileName === 'swarm-synthesizer') { + synthesizerPrompt = args.prompt; + return { result: 'SYNTH' }; + } + if (args.profileName === 'swarm-reviser') + return { result: '{"kind":"drop","reason":"impossible"}' }; + // Every worker fails on its first (only) run, then is dropped. + throw new Error('boom'); + }); + const onProgressCustom = vi.fn(); + const coordinator = new SwarmCoordinator({ + spawnSubagent: spawn, + signal: new AbortController().signal, + onProgressCustom, + }); + const result = await coordinator.run('x'); + expect(result).toBe('SYNTH'); + // Synthesizer was consulted and its prompt surfaces both subtasks as gaps, + // never inventing a success. + expect(synthesizerPrompt).toBeDefined(); + expect(synthesizerPrompt).toMatch(/DROPPED/); + expect(synthesizerPrompt).not.toMatch(/done\)/); + const payloads = (onProgressCustom as ReturnType).mock.calls.map((c) => c[0]); + expect( + payloads.some( + (p) => p.phase === 'done' && p.succeeded === 0 && p.dropped === 2 && p.failed === 0, + ), + ).toBe(true); + }); + + it('does not revise on a genuine swarm-wide cancel (re-throws the abort)', async () => { + const controller = new AbortController(); + const spawn = vi.fn(async (args) => { + if (args.profileName === 'swarm-planner') return { result: ONE_PLAN }; + // Worker: a real swarm-wide cancel — abort the coordinator signal and throw. + controller.abort(); + const e = new Error('aborted'); + e.name = 'AbortError'; + throw e; + }); + const coordinator = new SwarmCoordinator({ spawnSubagent: spawn, signal: controller.signal }); + await expect(coordinator.run('x')).rejects.toThrow(); + const profiles = (spawn as ReturnType).mock.calls.map((c) => c[0].profileName); + expect(profiles).not.toContain('swarm-reviser'); + expect(profiles).not.toContain('swarm-synthesizer'); + }); +}); diff --git a/packages/agent-core/test/swarm/parse.test.ts b/packages/agent-core/test/swarm/parse.test.ts new file mode 100644 index 00000000..62186bba --- /dev/null +++ b/packages/agent-core/test/swarm/parse.test.ts @@ -0,0 +1,107 @@ +import { describe, expect, it } from 'vitest'; + +import { extractJsonObject, parsePlan, parseReviseDecision } from '../../src/agent/swarm/parse'; + +describe('extractJsonObject', () => { + it('extracts a fenced json block', () => { + expect(extractJsonObject('blah\n```json\n{"a":1}\n```\ntail')).toBe('{"a":1}'); + }); + it('extracts a bare object from surrounding prose', () => { + expect(extractJsonObject('here you go: {"a":1} done')).toBe('{"a":1}'); + }); + it('returns null when no object is present', () => { + expect(extractJsonObject('no json here')).toBeNull(); + }); +}); + +describe('parsePlan', () => { + const good = JSON.stringify({ + subtasks: [ + { role: 'Researcher', systemPrompt: 'be a researcher', prompt: 'research X' }, + { id: 'b', role: 'Writer', systemPrompt: 'be a writer', prompt: 'write Y', toolAllowlist: ['Read'] }, + ], + }); + + it('parses a valid plan and fills default ids', () => { + const plan = parsePlan('root', '```json\n' + good + '\n```'); + expect(plan).not.toBeNull(); + expect(plan?.rootTask).toBe('root'); + expect(plan?.subtasks).toHaveLength(2); + expect(plan?.subtasks[0]?.id).toBe('task-1'); + expect(plan?.subtasks[0]?.status).toBe('pending'); + expect(plan?.subtasks[1]?.id).toBe('b'); + expect(plan?.subtasks[1]?.toolAllowlist).toEqual(['Read']); + }); + + it('returns null for empty subtasks', () => { + expect(parsePlan('root', '{"subtasks":[]}')).toBeNull(); + }); + + it('returns null when a subtask misses required fields', () => { + expect(parsePlan('root', '{"subtasks":[{"role":"R"}]}')).toBeNull(); + }); + + it('returns null for non-json garbage', () => { + expect(parsePlan('root', 'totally not json')).toBeNull(); + }); +}); + +describe('parseReviseDecision', () => { + it('parses a retry decision', () => { + expect(parseReviseDecision('{"kind":"retry"}')).toEqual({ kind: 'retry' }); + }); + + it('parses a retry decision from a fenced block', () => { + expect(parseReviseDecision('```json\n{"kind":"retry"}\n```')).toEqual({ kind: 'retry' }); + }); + + it('parses a regenerate decision with a new prompt', () => { + expect(parseReviseDecision('{"kind":"regenerate","prompt":"NEW"}')).toEqual({ + kind: 'regenerate', + prompt: 'NEW', + }); + }); + + it('parses a reassign decision with role, systemPrompt, and toolAllowlist', () => { + expect( + parseReviseDecision( + '{"kind":"reassign","role":"R2","systemPrompt":"SP2","toolAllowlist":["Read"]}', + ), + ).toEqual({ kind: 'reassign', role: 'R2', systemPrompt: 'SP2', toolAllowlist: ['Read'] }); + }); + + it('parses a reassign decision without a toolAllowlist', () => { + expect(parseReviseDecision('{"kind":"reassign","role":"R2","systemPrompt":"SP2"}')).toEqual({ + kind: 'reassign', + role: 'R2', + systemPrompt: 'SP2', + }); + }); + + it('parses a drop decision with a reason', () => { + expect(parseReviseDecision('{"kind":"drop","reason":"impossible"}')).toEqual({ + kind: 'drop', + reason: 'impossible', + }); + }); + + it('returns null for an unknown kind', () => { + expect(parseReviseDecision('{"kind":"explode"}')).toBeNull(); + }); + + it('returns null when a regenerate decision misses its prompt', () => { + expect(parseReviseDecision('{"kind":"regenerate"}')).toBeNull(); + }); + + it('returns null when a reassign decision misses required fields', () => { + expect(parseReviseDecision('{"kind":"reassign","role":"R2"}')).toBeNull(); + }); + + it('returns null when a drop decision misses its reason', () => { + expect(parseReviseDecision('{"kind":"drop"}')).toBeNull(); + }); + + it('returns null for non-json garbage', () => { + expect(parseReviseDecision('totally not json')).toBeNull(); + }); +}); diff --git a/packages/agent-core/test/swarm/swarm-tool.test.ts b/packages/agent-core/test/swarm/swarm-tool.test.ts new file mode 100644 index 00000000..bc0e644f --- /dev/null +++ b/packages/agent-core/test/swarm/swarm-tool.test.ts @@ -0,0 +1,223 @@ +import { describe, expect, it, vi } from 'vitest'; + +import { SwarmTool } from '../../src/tools/builtin/collaboration/swarm'; +import type { SessionSubagentHost } from '../../src/session/subagent-host'; +import type { ToolExecutionHookContext } from '../../src/loop/index'; + +const PLAN_JSON = JSON.stringify({ + subtasks: [{ role: 'R', systemPrompt: 'sp', prompt: 'p' }], +}); + +function makeHookCtx(name: string, args: unknown): ToolExecutionHookContext { + return { + toolCall: { type: 'function', id: 'c', name, arguments: JSON.stringify(args) }, + args, + turnId: 't', + stepNumber: 1, + signal: new AbortController().signal, + } as unknown as ToolExecutionHookContext; +} + +function fakeHost(): SessionSubagentHost { + const spawn = vi.fn(async (profileName: string) => { + const result = + profileName === 'swarm-planner' + ? PLAN_JSON + : profileName === 'swarm-synthesizer' + ? 'FINAL' + : 'worker-out'; + return { agentId: 'a', profileName, resumed: false, completion: Promise.resolve({ result }) }; + }); + return { spawn } as unknown as SessionSubagentHost; +} + +describe('SwarmTool', () => { + it('exposes a task parameter and an approval rule', () => { + const tool = new SwarmTool(fakeHost()); + expect(tool.name).toBe('Swarm'); + const exec = tool.resolveExecution({ task: 'hello' }); + expect('approvalRule' in exec && exec.approvalRule).toBe('Swarm'); + }); + + it('runs the coordinator and returns the synthesized output', async () => { + const tool = new SwarmTool(fakeHost()); + const exec = tool.resolveExecution({ task: 'do it' }); + if (!('execute' in exec)) throw new Error('expected runnable execution'); + const updates: string[] = []; + const result = await exec.execute({ + turnId: 't1', + toolCallId: 'tc1', + signal: new AbortController().signal, + onUpdate: (u) => { + if (u.text !== undefined) updates.push(u.text); + }, + }); + expect('output' in result && result.output).toBe('FINAL'); + expect(updates.length).toBeGreaterThan(0); + }); + + it('emits a failed dashboard event with the reason on an ordinary failure', async () => { + // Planner always returns garbage -> decompose fails after its one retry, + // so coordinator.run rejects with an ordinary (non-abort) error. + const host = { + spawn: vi.fn(async (profileName: string) => ({ + agentId: 'a', + profileName, + resumed: false, + completion: Promise.resolve({ result: 'not json at all' }), + })), + } as unknown as SessionSubagentHost; + const tool = new SwarmTool(host); + const exec = tool.resolveExecution({ task: 'do it' }); + if (!('execute' in exec)) throw new Error('expected runnable execution'); + const customData: Array<{ phase?: string; message?: string }> = []; + const result = await exec.execute({ + turnId: 't1', + toolCallId: 'tc1', + signal: new AbortController().signal, + onUpdate: (u) => { + if (u.kind === 'custom') customData.push(u.customData as { phase?: string; message?: string }); + }, + }); + expect('isError' in result && result.isError).toBe(true); + const failed = customData.find((d) => d.phase === 'failed'); + expect(failed).toBeDefined(); + expect(failed?.message).toContain('planner failed to produce a valid plan'); + }); + + it('does not emit a failed event when the swarm is aborted (genuine cancel)', async () => { + const controller = new AbortController(); + const host = { + spawn: vi.fn(async (profileName: string) => ({ + agentId: 'a', + profileName, + resumed: false, + completion: Promise.resolve({ result: 'not json at all' }), + })), + } as unknown as SessionSubagentHost; + const tool = new SwarmTool(host); + const exec = tool.resolveExecution({ task: 'do it' }); + if (!('execute' in exec)) throw new Error('expected runnable execution'); + controller.abort(); + const customData: Array<{ phase?: string }> = []; + const result = await exec.execute({ + turnId: 't1', + toolCallId: 'tc1', + signal: controller.signal, + onUpdate: (u) => { + if (u.kind === 'custom') customData.push(u.customData as { phase?: string }); + }, + }); + expect('isError' in result && result.isError).toBe(true); + expect(customData.find((d) => d.phase === 'failed')).toBeUndefined(); + }); + + it('injects a stall hook + per-worker signal for workers but not planner/synthesizer', async () => { + const seen: Array<{ profileName: string; hasHooks: boolean; sameAsCoordinator: boolean }> = []; + const coordinatorSignal = new AbortController().signal; + const spawn = vi.fn(async (profileName: string, options: any) => { + seen.push({ + profileName, + hasHooks: options.loopHooks !== undefined, + sameAsCoordinator: options.signal === coordinatorSignal, + }); + const result = + profileName === 'swarm-planner' + ? PLAN_JSON + : profileName === 'swarm-synthesizer' + ? 'FINAL' + : 'worker-out'; + return { agentId: 'a', profileName, resumed: false, completion: Promise.resolve({ result }) }; + }); + const host = { spawn } as unknown as SessionSubagentHost; + + const tool = new SwarmTool(host); + const exec = tool.resolveExecution({ task: 'do it' }); + if (!('execute' in exec)) throw new Error('expected runnable execution'); + await exec.execute({ turnId: 't1', toolCallId: 'tc1', signal: coordinatorSignal }); + + const planner = seen.find((s) => s.profileName === 'swarm-planner'); + const synth = seen.find((s) => s.profileName === 'swarm-synthesizer'); + const worker = seen.find((s) => s.profileName === 'swarm:R'); + expect(planner?.hasHooks).toBe(false); + expect(synth?.hasHooks).toBe(false); + // Planner/synthesizer use the coordinator signal directly. + expect(planner?.sameAsCoordinator).toBe(true); + // Worker gets the stall hook and a distinct (linked) per-worker signal. + expect(worker?.hasHooks).toBe(true); + expect(worker?.sameAsCoordinator).toBe(false); + }); + + it('translates a worker stall into a distinguishable error recorded by the coordinator, leaving the coordinator signal unaborted, and still synthesizes', async () => { + const coordinator = new AbortController(); + let synthesizerPrompt: string | undefined; + + const spawn = vi.fn(async (profileName: string, options: any) => { + if (profileName === 'swarm-planner') { + return { + agentId: 'p', + profileName, + resumed: false, + completion: Promise.resolve({ result: PLAN_JSON }), + }; + } + if (profileName === 'swarm-synthesizer') { + synthesizerPrompt = options.prompt; + return { + agentId: 's', + profileName, + resumed: false, + completion: Promise.resolve({ result: 'SYNTH' }), + }; + } + if (profileName === 'swarm-reviser') { + // The coordinator now consults a reviser for the stalled subtask; drop + // it so the worker is not re-run and the stall surfaces as a gap. + return { + agentId: 'r', + profileName, + resumed: false, + completion: Promise.resolve({ result: '{"kind":"drop","reason":"stall is unrecoverable"}' }), + }; + } + // Worker: drive the injected stall hook with a repeated tool call. The + // hook's onStall aborts the per-worker signal; we mirror the real + // subagent-host path by rejecting with the generic cancel message once + // the per-worker signal is aborted. + const hook = options.loopHooks?.prepareToolExecution; + expect(hook).toBeDefined(); + const ctx = makeHookCtx('Read', { path: '/loop' }); + const completion = (async () => { + for (let i = 0; i < 100; i += 1) { + const decision = await hook(ctx); + if (decision?.block === true) break; + } + // Per-worker signal was aborted by the stall hook. + expect(options.signal.aborted).toBe(true); + const err = new Error('Subagent turn cancelled'); + err.name = 'AbortError'; + throw err; + })(); + return { agentId: 'w', profileName, resumed: false, completion }; + }); + const host = { spawn } as unknown as SessionSubagentHost; + + const tool = new SwarmTool(host); + const exec = tool.resolveExecution({ task: 'do it' }); + if (!('execute' in exec)) throw new Error('expected runnable execution'); + const result = await exec.execute({ + turnId: 't1', + toolCallId: 'tc1', + signal: coordinator.signal, + }); + + // Swarm still completes (synthesis ran) despite the stalled worker. + expect('output' in result && result.output).toBe('SYNTH'); + // The coordinator signal was never aborted by the per-worker stall. + expect(coordinator.signal.aborted).toBe(false); + // The synthesizer prompt records the worker as failed with the + // distinguishable stalled reason. + expect(synthesizerPrompt).toMatch(/stalled/i); + expect(synthesizerPrompt).toContain('Read'); + }); +});