diff --git a/CONTEXT.md b/CONTEXT.md deleted file mode 100644 index 70df1e0673..0000000000 --- a/CONTEXT.md +++ /dev/null @@ -1,110 +0,0 @@ -# Mux Conversation Context - -Mux preserves workspace transcripts while controlling which messages are active conversation context for the agent. - -## Language - -**Context Reset**: -Starts a new active conversation context while preserving earlier transcript history. -_Avoid_: soft clear, compaction, truncate, clear history - -**Transcript History**: -The persisted record of messages in a workspace, including messages that are no longer active context. -_Avoid_: context, prompt history - -**Active Conversation Context**: -The subset of transcript history eligible to be sent to the agent for the next response. -_Avoid_: chat history, transcript - -**Compaction Boundary**: -A context boundary that carries a provider-visible summary of earlier transcript history. -_Avoid_: context reset - -**Context Reset Boundary**: -A visible separator in transcript history where the active conversation context starts over; older history may be hidden behind a load-older affordance. -_Avoid_: compaction boundary, summary message - -**Agent Carryover State**: -Workspace state outside transcript history that can influence future agent turns. -_Avoid_: hidden context, leftovers - -**Context Boundary**: -A transcript marker that divides provider-eligible context windows without deleting transcript history. -_Avoid_: synthetic assistant message, compacted message - -**Hard Clear**: -A destructive operation that deletes transcript history for the active workspace. -_Avoid_: context reset, soft clear - -**Provider-Eligible Message**: -A transcript message that can contribute content to a future agent request. -_Avoid_: persisted row, visible message - -**Transcript Export**: -A shared or copied representation of transcript history, including visible context boundaries. -_Avoid_: active context export - -## Relationships - -- A **Hard Clear** deletes **Transcript History**. -- A **Context Reset** preserves **Transcript History**. -- A **Context Reset** creates a **Context Reset Boundary**. -- A **Compaction Boundary** is a kind of **Context Boundary**. -- A **Context Reset Boundary** is a kind of **Context Boundary**. -- A **Context Reset Boundary** separates older **Transcript History** from the new **Active Conversation Context**. -- Older **Transcript History** above a **Context Reset Boundary** can be hidden behind load-older history. -- **Active Conversation Context** may be smaller than **Transcript History**. - -- A **Context Reset** clears **Agent Carryover State** so previous work does not influence future agent turns. -- A **Context Reset Boundary** is created only when the current context window contains at least one **Provider-Eligible Message**. -- A **Context Reset Boundary** is visible transcript structure, not conversation content for the agent. - -- Context usage reflects **Active Conversation Context**, not all loaded **Transcript History**. -- Messages above the latest **Context Boundary** are viewable and exportable but cannot directly mutate the current **Active Conversation Context**. -- A **Transcript Export** can include **Transcript History** from above a **Context Reset Boundary**. - -## Example dialogue - -> **Dev:** "After a **Context Reset**, can the agent answer from messages above the **Context Reset Boundary**, or see a hidden note that the reset happened?" -> **Domain expert:** "No — those messages remain in **Transcript History**, and the boundary is visible transcript structure, but neither is part of the agent's **Active Conversation Context**." - -> **Dev:** "Are **Compaction Boundaries** and **Context Reset Boundaries** separate mechanisms?" -> **Domain expert:** "No — both are **Context Boundaries**. A **Compaction Boundary** summarizes earlier history for the agent; a **Context Reset Boundary** does not." - -> **Dev:** "Should `/clear` preserve **Transcript History** now that **Context Reset** exists?" -> **Domain expert:** "No — `/clear` remains a **Hard Clear**. `/clear --soft` performs a **Context Reset**." - -> **Dev:** "If there is no **Transcript History**, should a **Context Reset** create a boundary anyway?" -> **Domain expert:** "No — without earlier history, there is nothing for a **Context Reset Boundary** to separate." - -> **Dev:** "If the user repeats `/clear --soft` before sending another message, should we append another **Context Reset Boundary**?" -> **Domain expert:** "No — repeated resets with no active-context messages are no-op successes." - -> **Dev:** "Should the `/clear --soft` command itself appear as a user message?" -> **Domain expert:** "No — a **Context Reset** is represented by a **Context Reset Boundary**, not by a user prompt." - -> **Dev:** "Can a **Context Reset** happen while the agent is still responding?" -> **Domain expert:** "No — context can only be reset once the active turn has stopped and transcript ordering is stable." - -> **Dev:** "How should users find **Context Reset** outside slash commands?" -> **Domain expert:** "Expose it as a separate command from **Hard Clear**, named around resetting context while preserving history." - -> **Dev:** "What should the visible separator say?" -> **Domain expert:** "Use `Context reset`; avoid labels that imply transcript history was deleted." - -> **Dev:** "Should a **Context Reset Boundary** show when it happened?" -> **Domain expert:** "Persist the timestamp for ordering and audit, but keep the visible separator label simple." - -> **Dev:** "Can a **Context Reset** happen while user input is queued?" -> **Domain expert:** "No — queued input belongs to the old context and must be sent or cleared before resetting." - -> **Dev:** "What happens to pending composer content when a user performs a **Context Reset**?" -> **Domain expert:** "Resetting context starts fresh, so pending composer state should not carry over." - -> **Dev:** "Should partial or aborted messages before a **Context Reset Boundary** be cleaned up?" -> **Domain expert:** "No — they remain **Transcript History** above the boundary, but are outside the new **Active Conversation Context**." - -## Flagged ambiguities - -- "soft clear" is a user-facing command style, not the domain concept; resolved: use **Context Reset** for the behavior. -- "compaction boundary" implies summarization; resolved: use **Context Reset Boundary** for a reset without summarization. diff --git a/bun.lock b/bun.lock index 514abc7ca3..97eca1ace3 100644 --- a/bun.lock +++ b/bun.lock @@ -79,8 +79,10 @@ "posthog-node": "^5.17.0", "quickjs-emscripten": "^0.31.0", "quickjs-emscripten-core": "^0.31.0", + "react": "18.3.1", "react-colorful": "^5.6.1", "react-resizable-panels": "^3.0.6", + "react-dom": "18.3.1", "react-router-dom": "^7.11.0", "recharts": "^2.15.3", "rehype-harden": "^1.1.5", @@ -169,13 +171,11 @@ "postcss": "^8.5.6", "posthog-js": "^1.276.0", "prettier": "^3.6.2", - "react": "^18.2.0", "react-compiler-runtime": "^1.0.0", "react-dnd": "^16.0.1", "react-dnd-html5-backend": "^16.0.1", "react-dnd-test-backend": "^16.0.1", "react-dnd-test-utils": "^16.0.1", - "react-dom": "^18.2.0", "rehype-katex": "^7.0.1", "rehype-raw": "^7.0.0", "remark-gfm": "^4.0.1", diff --git a/docs/hooks/tools.mdx b/docs/hooks/tools.mdx index bde56b6c89..ba6b836185 100644 --- a/docs/hooks/tools.mdx +++ b/docs/hooks/tools.mdx @@ -227,12 +227,15 @@ If a value is too large for the environment, it may be omitted (not set). Mux al
-agent_report (2) +agent_report (5) -| Env var | JSON path | Type | Description | -| -------------------------------- | ---------------- | ------ | ----------- | -| `MUX_TOOL_INPUT_REPORT_MARKDOWN` | `reportMarkdown` | string | — | -| `MUX_TOOL_INPUT_TITLE` | `title` | string | — | +| Env var | JSON path | Type | Description | +| --------------------------------------- | ---------------------- | ------- | ------------------------------------------------------------------------------------ | +| `MUX_TOOL_INPUT_REPORT_MARKDOWN` | `reportMarkdown` | string | — | +| `MUX_TOOL_INPUT_REPORT_MARKDOWN_PATH` | `reportMarkdownPath` | string | Path to the markdown report file, usually report.md in the workspace root | +| `MUX_TOOL_INPUT_STRUCTURED_OUTPUT` | `structuredOutput` | unknown | — | +| `MUX_TOOL_INPUT_STRUCTURED_OUTPUT_PATH` | `structuredOutputPath` | string | Path to a JSON file containing the structured output, usually structured-output.json | +| `MUX_TOOL_INPUT_TITLE` | `title` | string | — |
@@ -679,6 +682,37 @@ If a value is too large for the environment, it may be omitted (not set). Mux al +
+workflow_read (1) + +| Env var | JSON path | Type | Description | +| --------------------- | --------- | ------ | ----------- | +| `MUX_TOOL_INPUT_NAME` | `name` | string | — | + +
+ +
+workflow_run (3) + +| Env var | JSON path | Type | Description | +| ---------------------------------- | ------------------- | ------- | ----------- | +| `MUX_TOOL_INPUT_ARGS` | `args` | unknown | — | +| `MUX_TOOL_INPUT_NAME` | `name` | string | — | +| `MUX_TOOL_INPUT_RUN_IN_BACKGROUND` | `run_in_background` | boolean | — | + +
+ +
+workflow_write (3) + +| Env var | JSON path | Type | Description | +| ---------------------------- | ------------- | ------ | ----------- | +| `MUX_TOOL_INPUT_DESCRIPTION` | `description` | string | — | +| `MUX_TOOL_INPUT_NAME` | `name` | string | — | +| `MUX_TOOL_INPUT_SOURCE` | `source` | string | — | + +
+ {/* END TOOL_HOOK_ENV_VARS */} diff --git a/package.json b/package.json index e01f9e0bf2..73bf77efb4 100644 --- a/package.json +++ b/package.json @@ -121,8 +121,10 @@ "posthog-node": "^5.17.0", "quickjs-emscripten": "^0.31.0", "quickjs-emscripten-core": "^0.31.0", + "react": "18.3.1", "react-colorful": "^5.6.1", "react-resizable-panels": "^3.0.6", + "react-dom": "18.3.1", "react-router-dom": "^7.11.0", "recharts": "^2.15.3", "rehype-harden": "^1.1.5", @@ -211,13 +213,11 @@ "postcss": "^8.5.6", "posthog-js": "^1.276.0", "prettier": "^3.6.2", - "react": "^18.2.0", "react-compiler-runtime": "^1.0.0", "react-dnd": "^16.0.1", "react-dnd-html5-backend": "^16.0.1", "react-dnd-test-backend": "^16.0.1", "react-dnd-test-utils": "^16.0.1", - "react-dom": "^18.2.0", "rehype-katex": "^7.0.1", "rehype-raw": "^7.0.0", "remark-gfm": "^4.0.1", diff --git a/rfc/20260529_dynamic-workflows.md b/rfc/20260529_dynamic-workflows.md new file mode 100644 index 0000000000..17d8dc56d5 --- /dev/null +++ b/rfc/20260529_dynamic-workflows.md @@ -0,0 +1,699 @@ +--- +author: @mux +date: 2026-05-29 +--- + +# Dynamic Workflows for Mux + +Status: Draft + +## Stakeholders + +- [ ] Product Lead: +- [ ] Engineering DRI: +- [ ] CTO: +- [ ] Frontend/UI reviewer: +- [ ] Runtime/task orchestration reviewer: + +## Problem Statement + +Mux already supports parallel agent workflows through tasks, sub-agents, agent skills, programmatic tool calling, goals, and durable chat/task state. However, repeatable multi-agent orchestration still mostly lives in prose instructions, ad-hoc parent-agent reasoning, or one-off tool calls. That makes sophisticated patterns such as deep research, adversarial verification, multi-lane review, and goal-internal control flow harder to reuse, inspect, resume, or explain visually. + +We want Mux to support executable workflow orchestration: plain JavaScript scripts that coordinate sub-agent tasks through a small conductor-only API. A workflow should make the orchestration structure visible to users, preserve durable progress across restarts, support structured sub-agent outputs, and be discoverable like skills without turning skills into executable code. + +The initial product should be a developer-facing experiment with enough durability and observability to dogfood real workflows. It should not start as a polished `/workflows` dashboard product. + +## Glossary + +This RFC relies on these terms: + +- **Workflow Definition**: reusable executable orchestration that coordinates agent work. +- **Workflow Run**: one execution of a workflow definition for a specific request/input. +- **Scratch Workflow Definition**: a generated workflow definition stored for one workflow run without becoming reusable/discoverable. +- **Workflow Promotion**: explicitly saving a scratch workflow definition as a reusable workflow definition. +- **Workflow Primitive**: a conductor operation available inside workflow JavaScript. +- **Workflow Step**: a replayable unit of orchestration progress within a durable workflow run. +- **Durable Workflow Run**: a workflow run that can continue after Mux restarts without losing completed orchestration progress. +- **Workflow Resume**: continuing an interrupted workflow run from durable state. +- **Partial Workflow Recovery**: reusing recoverable workflow steps and rerunning missing/unrecoverable steps. +- **Workflow Result**: final workflow output, including a human-readable report and optional machine-readable data. +- **Structured Task Output**: machine-readable task result requested by a workflow run. +- **Report-Time Validation**: validation when a task submits structured task output as part of its final report. +- **Goal Step**: a unit of progress inside a goal; a workflow run can be a goal step. + +## Goals + +1. Let agents and users run conductor-only JavaScript workflow definitions that coordinate sub-agent tasks. +2. Make workflow runs observable in the main chat with first-class phases, logs, child tasks, validation events, status, and result. +3. Make workflow runs durable and resumable from the first real workflow implementation. +4. Support report-time JSON Schema validation for structured sub-agent outputs. +5. Preserve the existing skill mental model for discovery and precedence while keeping workflow storage and trust boundaries separate. +6. Support dynamic one-off workflow generation and explicit promotion to reusable workflow definitions. +7. Ship built-in deep research as the first showcase workflow. + +## Non-goals + +1. Do not make agent skills executable. +2. Do not expose arbitrary tools such as bash, file editing, web fetch, browser automation, or `mux.*` inside workflow definitions in v1. +3. Do not add TypeScript workflow authoring in v1. +4. Do not build a full workflow dashboard in v1. +5. Do not add workflow-specific concurrency or total-agent caps in v1. +6. Do not implement generic `parallel(fn[])`, nested `workflow(...)`, or workflow-side memory/file-write primitives in v1. +7. Do not make a workflow run automatically create, replace, or complete workspace goals. + +## Proposal Overview + +Add a first-class workflow product layer on top of Mux's task/sub-agent system. + +A workflow definition is plain JavaScript. It runs in a sandboxed coordinator runtime that exposes only conductor primitives such as `agent`, `parallelAgents`, `phase`, and `log`. Hands-on work happens inside spawned tasks, whose transcripts and tools remain visible through existing task infrastructure. + +A workflow run persists a durable journal of steps, emitted events, spawned task IDs, structured outputs, and final result. On resume after interruption or restart, Mux reruns the workflow definition against the journal. Completed steps short-circuit from recorded results. Missing or unrecoverable steps rerun when safe. + +Workflow definitions are discovered from project-local, global, and built-in roots. Project-local definitions are governed by existing project trust. Dynamically generated scratch definitions are saved under Mux-controlled scratch/run storage and can be promoted explicitly to reusable definitions. + +The first built-in workflow is deep research. It should demonstrate scoping, source gathering, cross-checking, adversarial refutation, structured output validation, and final synthesis. + +## UX & Design + +### Invocation + +Workflow runs should be launched primarily from chat/tool-call interactions. + +Once workflow definitions exist, they should be discoverable like skills: + +- Slash invocation explicitly starts a workflow run: `/deep-research browser automation`. +- Inline `$name` references include/identify a workflow definition in chat context. They start a run only when the surrounding user request clearly asks to run that workflow. + +Examples: + +- `/deep-research browser automation` starts a workflow run. +- `Run $deep-research on browser automation` starts a workflow run. +- `Can you improve $deep-research?` references the workflow definition for inspection/editing; it does not run it. +- `Compare $deep-research and $bug-hunt` references definitions; it does not run them. + +Slash invocation should stay simple in v1. Do not add a `--background` slash flag initially. Tool-based launch should expose `run_in_background` for the agent. + +### Workflow run card + +The first version should include a lightweight first-class workflow run card in the launching chat. The card should show: + +- Workflow name and source. +- Run status: running, completed, failed, interrupted, waiting, or backgrounded. +- Current and completed phases. +- Child tasks and their statuses. +- Workflow logs emitted by the coordinator. +- Structured output validation successes and failures. +- Final workflow result or error. +- A promotion affordance for scratch workflows. +- Resume/interrupt actions for durable runs. + +A full workflow dashboard is an eventual requirement, not a v1 requirement. + +### Foreground and background behavior + +Workflow runs support foreground and background execution. + +- Tool launch includes `run_in_background`, defaulting to `false`, matching bash and task behavior. +- Slash-launched workflows start in the foreground. +- If the user sends a follow-up message or manually starts another workflow while a foreground workflow is active, Mux should move the first run to the background rather than blocking the conversation. +- Background workflow runs remain visible through their run cards and can later be queried, awaited, resumed, or integrated by the parent agent. + +### Dynamic generation and promotion + +Agents can generate one-off scratch workflow definitions for a specific request. Those generated scripts should be written to a Mux-controlled scratch/run location under Mux home, not automatically saved into project or global workflow roots. + +Scratch workflow definitions are durable for their run. The run record must retain the script content or a stable reference to the scratch script so recovery can replay it. + +Scratch definitions are not discoverable as reusable slash/inline workflows until the user promotes them. Promotion should open a naming/location flow where the user explicitly chooses project-local or global storage. Do not default the location. + +### Future drilldown + +Eventually, users should be able to drill into running child tasks from a workflow run card, inspect stuck agents, resume child tasks where possible, or manually prompt/intervene. The run card should not block those future interactions, but detailed task-intervention UI can come after the first workflow release. + +## Operational Scenarios + +### Deep research + +A user asks Mux to research an unfamiliar technical topic. The parent agent dynamically creates or selects a deep research workflow. The workflow: + +1. Scopes the topic. +2. Fans out source discovery to multiple sub-agents. +3. Asks source-reading agents to return structured source summaries. +4. Runs adversarial verification agents that refute or qualify claims. +5. Synthesizes a final report with structured claims, sources, and confidence. + +The run card shows each phase, spawned tasks, validation events, logs, and final synthesis. + +### Crash and resume + +A workflow has completed source discovery and spawned verification tasks. Mux restarts. On startup or explicit resume, the same workflow run continues from its durable journal. Completed steps short-circuit. Running/interrupted child tasks resume in place where possible. Missing or corrupt step records rerun through partial workflow recovery. + +The workflow does not spawn duplicate agents for completed steps. + +### Goal-internal workflow + +A user sets a goal to complete a long-running project objective. The agent uses a workflow as control flow inside the goal loop. The workflow delegates work, verification, correctness review, and progress assessment, then returns a workflow result to the agent. The goal remains the ongoing loop; the workflow is a goal step, not the goal itself. + +### Scratch promotion + +An agent generates a one-off research workflow for a narrow question. After the run succeeds, the user clicks “Save workflow,” enters a name/description, chooses project-local or global storage, and promotes the scratch workflow definition. The promoted definition appears in slash and `$` suggestions according to normal discovery/precedence rules. + +## Requirements + +### Initial Functional Requirements + +#### Workflow definition authoring + +- Workflow definitions are plain `.js` files in v1. +- TypeScript authoring is deferred; future TypeScript/Zod layers can compile down to JavaScript and JSON Schema. +- Workflow scripts run in a sandboxed coordinator runtime. +- Workflow scripts must not have direct filesystem, shell, network, or `mux.*` tool access in v1. + +#### Workflow primitives + +Expose these conductor-only primitives in v1: + +- `agent(spec)` — spawn one task and, by default, wait for its report. +- `backgroundAgent(spec)` — spawn one task and return a handle without waiting. +- `awaitAgents(handles, opts?)` — await one or more task handles. +- `parallelAgents(specs, opts?)` — spawn a group of tasks and wait for reports. +- `phase(name, details?)` — emit workflow progress. +- `log(message, data?)` — emit lightweight workflow diagnostics. +- `args` — read-only workflow input. +- `limits` or `budget` — read-only run caps. + +Represent variants and best-of-style fan-out with `parallelAgents(...)` patterns in v1. Keep existing task-tool `n` and `variants` available to ordinary agents, but do not add dedicated workflow primitives yet. + +#### Durable step identity + +- Replay-boundary primitives require stable IDs. +- `agent(...)`, `backgroundAgent(...)`, and `parallelAgents(...)` create durable replay boundaries and need stable author-provided identities. +- `phase(...)` and `log(...)` do not require author-provided IDs. +- Missing IDs during normal execution should fail fast as workflow authoring errors. +- Missing or corrupted persisted results during recovery should trigger partial workflow recovery when safe. + +Example: + +```js +const scope = agent({ + id: "scope-topic", + title: "Scope topic", + agent: "explore", + prompt: `Scope this research topic: ${args.topic}`, +}); + +const reports = parallelAgents({ + id: "verify-claims", + items: claims, + key: (claim) => claim.id, + task: (claim) => ({ + title: `Verify ${claim.id}`, + agent: "explore", + prompt: `Verify or refute this claim: ${claim.text}`, + }), +}); +``` + +#### Structured task output + +Workflow task primitives may include `outputSchema`. + +- `outputSchema` is a JSON Schema object literal. +- The initial schema subset should include `type`, `properties`, `required`, `items`, `enum`, `minItems`, `maxItems`, `minLength`, `maxLength`, and `additionalProperties`. +- When a workflow task has an output schema, the child task must submit both `reportMarkdown` and `structuredOutput` through its final report tool. +- Mux validates `structuredOutput` at report time before accepting the report. +- If validation fails, the final report tool call returns a validation error inside the child task. The child remains active and can call the report tool again. +- The workflow receives only validated structured output. + +Example schema: + +```js +const result = agent({ + id: "find-claims", + title: "Find key claims", + agent: "explore", + prompt: "Find five claims that need verification...", + outputSchema: { + type: "object", + required: ["findings"], + additionalProperties: false, + properties: { + findings: { + type: "array", + minItems: 5, + items: { + type: "object", + required: ["claim", "evidence", "confidence"], + additionalProperties: false, + properties: { + claim: { type: "string", minLength: 1 }, + evidence: { type: "array", items: { type: "string" } }, + confidence: { enum: ["low", "medium", "high"] }, + }, + }, + }, + }, + }, +}); +``` + +#### Workflow result + +The minimum v1 result contract is: + +```js +{ + reportMarkdown: string, + structuredOutput?: unknown, +} +``` + +If a workflow returns a string, Mux may treat it as `reportMarkdown`. The run card displays `reportMarkdown`; parent agents and goal loops can consume `structuredOutput`. + +#### Storage and discovery + +Workflow definitions are stored separately from agent skills. + +Initial roots: + +1. Project-local: `.mux/workflows/.js` +2. Global user-private: `~/.mux/workflows/.js` +3. Built-in: workflow definitions shipped with Mux + +Discovery and precedence should mirror skills where possible: + +1. Project-local wins. +2. Global wins over built-in. +3. Built-in is fallback. + +Project-local definitions can override built-ins when the project is trusted. + +Scratch workflow definitions should be stored under Mux-controlled scratch/run storage and are not included in reusable discovery until promoted. + +#### Trust + +Workflow trust piggybacks on existing project trust in v1. + +- Built-in workflow definitions are trusted by default. +- Global user-private definitions are treated like user-controlled Mux configuration. +- Project-local definitions are repo-controlled executable orchestration and are governed by existing project trust. +- Untrusted projects must not execute `.mux/workflows/*`. +- Discovery can omit untrusted project-local workflows or show them disabled with a trust-project affordance. + +Do not add per-workflow or per-content-hash approval in v1. + +#### Interrupt and resume + +- Use **resume**, not restart, as the primary continuation concept. +- Interrupting a workflow run stops the coordinator and cascade-interrupts active child tasks while preserving durable workflow state, completed step results, and interrupted task workspaces where possible. +- Resume continues the same workflow run ID from its durable journal. +- Completed steps short-circuit from recorded results. +- Interrupted child task workspaces resume in place when possible. +- Missing or unrecoverable steps rerun through partial workflow recovery. +- Reserve restart for a future explicit “start over as a new run” action. + +#### Goals + +- Workflows and goals are complementary. +- A goal is the ongoing objective loop. +- A workflow run is control flow inside that loop and can be a goal step. +- Workflow work, child tasks, costs, and results should be attributable to the active goal when launched inside one. +- A workflow run should not automatically create, replace, or complete a goal in v1. + +#### Built-in workflow focus + +- Ship deep research as the first built-in workflow. +- Defer deep review until deep research proves the runtime, durable replay, structured outputs, and run-card model. + +### Initial Non-functional Requirements + +#### Reliability + +- Workflow runs must be durable across Mux restarts/crashes. +- Recovery must not duplicate completed agent tasks. +- Recovery should be partial rather than all-or-nothing. +- Corrupt/missing step records should be isolated where possible; intact steps should still be reused. + +#### Security + +- The workflow coordinator is conductor-only in v1. +- Project-local workflows are gated by project trust. +- Workflow definitions should be treated as executable code, not documentation. +- Do not silently execute skills. + +#### Observability + +- Workflow state, phases, logs, child tasks, structured output validation, errors, and results must be visible through the run card. +- Background workflow runs must remain discoverable from the launching chat. +- The run store should support future dashboard/list views. + +#### Usability + +- Discovery/precedence should match skill intuition as much as possible. +- Slash invocation should be simple. +- `$name` references must not implicitly execute code unless the user clearly asks to run the workflow. +- Promotion from scratch to reusable workflow must require explicit user action and explicit location choice. + +#### Performance and cost + +- Do not add workflow-specific concurrency caps in v1. +- Use the existing global task queue/settings. +- Raise the default global `maxParallelAgentTasks` from 3 to 16 so workflow fan-out feels meaningfully parallel by default. +- Keep architecture open for future workflow-level budgets/caps if customers need them. + +### Eventual Requirements + +Future versions should be able to add: + +- Full workflow dashboard/run list. +- Command-palette workflow discovery and run management. +- TypeScript authoring or a richer schema DSL that compiles to v1 primitives. +- Generic `parallel(fn[])` if the runtime can safely support it. +- Nested workflow calls. +- Carefully scoped workflow-only memory/write primitives. +- Workflow-level concurrency/cost budgets. +- Per-task drilldown, intervention, and resume controls from the workflow card. +- Deep review as a built-in or refactored workflow. + +## Scope + +### In scope for the initial RFC direction + +- Workflow domain model and storage boundaries. +- Workflow runtime permission model. +- Initial conductor primitives. +- Structured task output validation. +- Durable run and partial recovery semantics. +- Lightweight run card behavior. +- Invocation/discovery/promotion/trust semantics. +- Built-in deep research focus. + +### Out of scope until the implementation plan + +- Exact class/file names. +- Exact database/file format schema. +- Exact IPC/oRPC surface. +- Exact React component hierarchy. +- Migration details for existing task/session artifacts. +- Final deep research prompt text. +- Exhaustive tests and issue breakdown. + +## Architecture + +### Proposed services + +Introduce a first-class workflow layer rather than stretching `code_execution` into a product feature. + +Recommended service boundaries: + +- `WorkflowDefinitionStore` + - Discovers project-local, global, built-in, and scratch definitions. + - Applies precedence and trust gates. + - Separates reusable definitions from scratch definitions. + +- `WorkflowRunStore` + - Persists workflow run metadata, status, events, final result, errors, and child task links. + - Owns the durable step/result journal. + - Stores enough definition identity/content to resume scratch runs. + +- `WorkflowRunner` + - Executes plain JavaScript workflow definitions in a sandboxed coordinator runtime. + - Exposes conductor primitives. + - Replays against the run journal on resume. + - Fails fast on authoring errors such as missing durable step IDs. + +- `WorkflowEventBus` + - Emits run status, phase, log, child-task, validation, result, and error events to the UI. + +- `TaskService` adapter + - Spawns child tasks. + - Awaits reports. + - Validates structured outputs through the final report path. + - Interrupts/resumes child task workspaces where possible. + +### Runtime model + +The workflow runtime can reuse the sandbox substrate used by programmatic tool calling, but it should expose a different API. PTC exposes model tools under `mux.*` for batching tool calls; workflows expose a conductor API for durable orchestration. The two features should not be conflated. + +In v1, workflow scripts have no direct access to Node, shell, filesystem, network, or arbitrary Mux tools. All side-effectful work happens inside tasks. + +### Durable run storage sketch + +The exact paths should be finalized during implementation planning, but the storage model should support: + +- Workspace/session-scoped run records under Mux home. +- A run metadata file. +- An append-only or recoverable event log. +- A step/result journal keyed by stable step IDs and input hashes. +- A stored copy or stable reference for scratch workflow definitions. +- Child task ID links and accepted report artifacts. + +A plausible shape: + +```text +~/.mux/sessions//workflows// + run.json + definition.js # for scratch or captured executable content + events.jsonl + steps.jsonl +``` + +Reusable definitions remain outside run storage: + +```text +/.mux/workflows/.js +~/.mux/workflows/.js +src/node/builtinWorkflows/.js # source-of-truth location TBD +``` + +### Replay algorithm sketch + +For each replay-boundary primitive call: + +1. Validate that the call has a stable ID. +2. Normalize durable input data for the call. +3. Look up a completed journal entry by step ID and input identity. +4. If a valid result exists, return it without spawning work. +5. If no valid result exists, execute the primitive. +6. Persist the started/completed state and result. +7. On validation or persistence failure, surface an error or use partial recovery where safe. + +For recovery with partial corruption: + +- Reuse intact completed steps. +- Rerun missing steps. +- Rerun corrupt/unrecoverable steps when safe. +- Preserve completed downstream steps when their identities and inputs remain valid. +- Do not fail the whole workflow solely because one step record is missing. + +### Structured task report path + +The existing sub-agent report contract will need to grow from Markdown-only reporting to optional structured reporting for workflow-spawned tasks. + +Conceptual report input for schema-constrained tasks: + +```ts +{ + reportMarkdown: string; + title?: string | null; + structuredOutput?: unknown; +} +``` + +The child task should receive a final report tool schema that reflects the requested output schema. If its submitted structured output fails validation, the tool returns a validation error and the task remains active. + +This is intentionally stronger than extracting JSON from Markdown after the fact. + +### Invocation and discovery flow + +- Workflow discovery reuses skill-like ordering and UI intuition. +- Workflow descriptors must be distinguishable from skill descriptors in UI. +- Slash invocation starts a workflow run. +- `$name` creates a workflow reference; execution depends on user intent. +- Scratch workflows are not discoverable until promoted. + +### Trust flow + +Project-local workflow discovery/execution must consult project trust, matching existing repo-controlled Mux script/config behavior. + +If a project is untrusted: + +- Do not execute project-local workflow definitions. +- Prefer omitting them from suggestions; if shown, render disabled with a trust-project action. + +### Goal attribution flow + +When a workflow run starts in a workspace with an active goal, the run and its child tasks should be attributable to that goal. The workflow result should feed back into the active goal loop. Goal completion remains governed by existing goal mechanisms. + +## Phases + +This RFC intentionally stops short of a detailed implementation plan. A later plan should break these down into small, testable slices. + +1. **Workflow domain and run store** + - Define workflow run metadata, event log, step journal, and scratch definition persistence. + +2. **Conductor runtime and primitives** + - Execute plain JavaScript workflow definitions with conductor-only APIs. + - Implement stable step ID enforcement and replay lookup. + +3. **Task integration and structured output** + - Extend workflow-spawned task reporting to support report-time structured output validation. + - Return validated task outputs to workflow primitives. + +4. **Run card and events** + - Render lightweight workflow run cards in chat. + - Stream phases, logs, task links, validation events, result, and error state. + +5. **Discovery, invocation, trust, and promotion** + - Add workflow definition discovery roots, skill-like precedence, slash/inline references, project trust gating, scratch definitions, and promotion UI. + +6. **Durable resume and partial recovery** + - Resume interrupted/restarted runs from the durable journal. + - Reuse completed steps and rerun missing/unrecoverable steps. + - Cascade interrupt/resume child task workspaces where possible. + +7. **Built-in deep research** + - Ship a built-in deep research workflow and dogfood it heavily before adding more built-ins. + +## Dogfooding and Validation + +Before treating workflows as a productized feature, dogfood deep research end to end. + +### Required dogfood scenarios + +1. **Novel topic research** + - Start from a broad topic. + - Scope, fan out, verify, refute, and synthesize. + - Confirm final report includes claims, sources, confidence, and caveats. + +2. **Adversarial validation** + - Have one lane produce claims and another lane refute/qualify them. + - Verify the run card makes disagreement/refutation understandable. + +3. **Crash/resume** + - Interrupt or restart Mux mid-run. + - Resume the same workflow run. + - Confirm completed steps are not duplicated and missing steps recover. + +4. **Structured output validation failure** + - Force a child task to submit invalid structured output. + - Confirm the report tool returns a validation error and the child can resubmit. + +5. **Foreground to background** + - Start a slash-invoked foreground workflow. + - Send another user message or start another workflow. + - Confirm the first workflow moves to background and remains observable. + +6. **Scratch promotion** + - Generate a dynamic scratch workflow. + - Promote it with explicit name/location choice. + - Confirm discovery/precedence/trust behavior after promotion. + +### Evidence to capture + +Each dogfood pass should produce reviewer-visible evidence: + +- Prompt used. +- Workflow script or definition source. +- Run transcript and run-card screenshots. +- Spawned task list and task transcripts. +- Structured validation events. +- Final workflow result. +- Screenshots of promotion/resume/interrupt UI where applicable. +- Short screen recording for visual workflow behavior and recovery paths. + +### Automated validation targets + +The implementation plan should include targeted tests for: + +- Definition discovery and precedence. +- Project trust gating. +- Slash vs `$name` semantics. +- Stable step ID enforcement. +- Replay short-circuiting completed steps. +- Partial recovery for missing/corrupt step records. +- Report-time structured output validation and retry. +- Foreground/background transitions. +- Workflow run card projection from events. +- Goal attribution. + +## Alternatives Considered + +### Reuse skills as executable workflows + +Rejected. Skills are reusable instruction/reference playbooks. Making them executable would blur a key trust boundary and undermine progressive disclosure semantics. + +### Use PTC `code_execution` directly as workflows + +Rejected for product shape. PTC is useful substrate, but workflows need durable run identity, step journaling, resume/recovery, phase/log events, trust, discovery, promotion, and a first-class run card. + +### TypeScript workflow definitions in v1 + +Deferred. Plain JavaScript avoids an authoring compile pipeline, source maps, dependency resolution, and a larger trust surface. TypeScript can be layered on later. + +### Direct tools inside workflow scripts + +Rejected for v1. Workflows should coordinate; tasks should execute. This keeps side effects visible in task transcripts and makes durable replay safer. + +### Per-workflow content-hash approvals + +Rejected for v1. Existing project trust already gates repo-controlled Mux scripts and configuration. Project-local workflow definitions should use the same project trust model initially. + +### Workflow-specific concurrency caps + +Rejected for v1. Existing task settings and queueing should govern concurrency. Raise the global default parallel task limit to 16 instead of adding arbitrary workflow-only caps. + +### Dedicated best-of/variant primitives + +Deferred. `parallelAgents(...)` can express v1 variants and best-of-style fan-out. Dedicated primitives should wait until built-in workflows reveal stable semantics. + +### Deep review as the first built-in + +Deferred. Deep research is more novel, cleaner for conductor-only orchestration, and better for visually proving phases, cross-checking, structured outputs, and adversarial verification. + +## Open Questions + +These should be resolved during implementation planning, not before this RFC can guide the plan: + +1. Exact on-disk run-store paths and file formats. +2. Exact oRPC/IPC API surface for workflow discovery, launch, events, interrupt, resume, and promotion. +3. Exact UI treatment for disabled untrusted project-local workflows in suggestions. +4. Exact child-task prompt/report-tool injection for output schemas. +5. Exact lifecycle for cleaning old scratch workflow definitions and old run journals. +6. Exact default built-in deep research script and structured output schema. +7. Exact migration behavior if a workflow definition changes while an old run is resumable. + +## Evidence Map + +Repo facts verified during the grilling session: + +- Agent skills are file-based playbooks with skill-like discovery roots and precedence: `docs/agents/agent-skills.mdx`, `src/node/services/agentSkills/agentSkillsService.ts`. +- Current PTC uses sandboxed JavaScript and exposes synchronous-looking `mux.*` tools: `src/node/services/tools/code_execution.ts`. +- Task tooling supports foreground/background runs and grouped task spawning: `src/node/services/tools/task.ts`, `src/common/utils/tools/toolDefinitions.ts`. +- Task settings currently default `maxParallelAgentTasks` to 3 and allow up to 256: `src/common/config/schemas/taskSettings.ts`, `src/common/types/tasks.ts`. +- Project trust is a per-project config bit used to gate repo-controlled scripts/config: `src/node/utils/projectTrust.ts`, `src/common/schemas/project.ts`, `src/browser/features/Settings/Sections/SecuritySection.tsx`. +- Project trust gates `.mux/init`, `.mux/tool_env`, tool hooks, git hooks, task/workspace creation, and project-local MCP config: `src/node/runtime/initHook.ts`, `src/node/services/hooks.ts`, `src/node/services/tools/bash.ts`, `src/node/services/mcpConfigService.ts`, `src/node/services/workspaceService.ts`, `src/node/services/taskService.ts`. +- Sub-agent final reports currently accept Markdown and optional title only: `src/common/utils/tools/toolDefinitions.ts`, `src/node/services/tools/agent_report.ts`. +- Task interruption currently has a preserved-interruption path distinct from destructive task termination: `src/node/services/workspaceService.ts`, `src/node/services/taskService.ts`, `src/node/services/tools/task_terminate.ts`. +- Chat/tool-call crash resilience uses `partial.json` and `chat.jsonl`: `src/node/services/historyService.ts`, `src/node/services/streamManager.ts`. + +## Decision Ledger + +| Decision | Status | Rationale | +| ---------------------------------------------------------------- | -------: | ---------------------------------------------------------------- | +| Use separate workflow definitions, not executable skills | Accepted | Preserves skill trust/progressive-disclosure semantics. | +| Start as developer-facing experiment, not full dashboard | Accepted | Learn orchestration value before polishing product shell. | +| Plain JavaScript authoring in v1 | Accepted | Matches sandbox substrate and avoids compile pipeline. | +| Conductor-only workflow runtime | Accepted | Keeps side effects in task transcripts and replay safer. | +| Report-time structured task output validation | Accepted | Gives workflows a real programmatic contract. | +| JSON Schema object literals for task output schemas | Accepted | Dependency-light, serializable, future TS/Zod can compile to it. | +| Lightweight workflow run card in chat | Accepted | Needed for trust/observability without full dashboard. | +| Foreground/background behavior mirrors bash/task | Accepted | Matches existing Mux UX and agent control. | +| Durable workflow runs from first real implementation | Accepted | A crash/restart should not lose orchestration progress. | +| Explicit stable IDs for replay-boundary primitives | Accepted | Durable replay cannot rely on fragile call order. | +| Partial workflow recovery | Accepted | Recovery should reuse intact steps and rerun missing work. | +| Project trust governs project-local workflows | Accepted | Reuses existing repo-controlled script trust model. | +| Scratch workflows are one-off by default and promotable | Accepted | Enables dynamic generation without polluting reusable roots. | +| No workflow-specific caps in v1; raise global task default to 16 | Accepted | Keeps limits simple while enabling fan-out. | +| Workflows can be goal steps | Accepted | Goals are loops; workflows are control flow inside loops. | +| Deep research first built-in | Accepted | Best showcase for novel value and conductor-only orchestration. | diff --git a/src/browser/contexts/CommandRegistryContext.tsx b/src/browser/contexts/CommandRegistryContext.tsx index ad5138072b..92ae18ffa0 100644 --- a/src/browser/contexts/CommandRegistryContext.tsx +++ b/src/browser/contexts/CommandRegistryContext.tsx @@ -62,6 +62,10 @@ interface CommandRegistryContextValue { const CommandRegistryContext = createContext(null); +export function useOptionalCommandRegistry(): CommandRegistryContextValue | null { + return useContext(CommandRegistryContext); +} + export function useCommandRegistry(): CommandRegistryContextValue { const ctx = useContext(CommandRegistryContext); if (!ctx) throw new Error("useCommandRegistry must be used within CommandRegistryProvider"); diff --git a/src/browser/features/ChatInput/CommandSuggestions.test.tsx b/src/browser/features/ChatInput/CommandSuggestions.test.tsx index c0b5b43220..7eb7d998f5 100644 --- a/src/browser/features/ChatInput/CommandSuggestions.test.tsx +++ b/src/browser/features/ChatInput/CommandSuggestions.test.tsx @@ -43,6 +43,19 @@ describe("CommandSuggestions", () => { globalThis.document = undefined as unknown as Document; }); + it("renders workflow suggestions with a distinct badge", () => { + const { getByText } = render( + undefined} + onDismiss={() => undefined} + isVisible + /> + ); + + expect(getByText("Workflow")).toBeTruthy(); + }); + it("preserves the selected suggestion by id when suggestions reorder", () => { const initialSuggestions = [makeSuggestion("a"), makeSuggestion("b"), makeSuggestion("c")]; const nextSuggestions = [makeSuggestion("c"), makeSuggestion("a"), makeSuggestion("b")]; diff --git a/src/browser/features/ChatInput/CommandSuggestions.tsx b/src/browser/features/ChatInput/CommandSuggestions.tsx index 41e99bb549..818582ddbe 100644 --- a/src/browser/features/ChatInput/CommandSuggestions.tsx +++ b/src/browser/features/ChatInput/CommandSuggestions.tsx @@ -67,6 +67,30 @@ function HighlightedText({ return {parts}; } +const SUGGESTION_KIND_BADGES = { + workflow: { label: "Workflow", className: "text-plan-mode" }, + skill: { label: "Skill", className: "text-medium" }, +} satisfies Partial< + Record, { label: string; className: string }> +>; + +function SuggestionKindBadge(props: { kind: SlashSuggestion["kind"] }) { + if (props.kind !== "workflow" && props.kind !== "skill") { + return null; + } + const badge = SUGGESTION_KIND_BADGES[props.kind]; + return ( + + {badge.label} + + ); +} + // Props interface interface CommandSuggestionsProps { suggestions: SlashSuggestion[]; @@ -285,6 +309,7 @@ export const CommandSuggestions: React.FC = ({ > +
= (props) => { const creationProject = variant === "creation" ? userProjects.get(creationParentProjectPath) : undefined; const [thinkingLevel] = useThinkingLevel(); + const dynamicWorkflowsExperimentEnabled = useExperimentValue(EXPERIMENT_IDS.DYNAMIC_WORKFLOWS); const workspaceHeartbeatsExperimentEnabled = useExperimentValue( EXPERIMENT_IDS.WORKSPACE_HEARTBEATS ); @@ -266,6 +273,7 @@ const ChatInputInner: React.FC = (props) => { asyncCommandScopeRef.current = { variant, workspaceId }; }, [variant, workspaceId]); + const store = useWorkspaceStoreRaw(); const workspaceSidebarState = useOptionalWorkspaceSidebarState(workspaceId); const workspaceGoal = workspaceSidebarState?.goal ?? null; @@ -361,6 +369,8 @@ const ChatInputInner: React.FC = (props) => { const [atMentionSuggestions, setAtMentionSuggestions] = useState([]); const [showSkillSuggestions, setShowSkillSuggestions] = useState(false); const [skillSuggestions, setSkillSuggestions] = useState([]); + const projectedWorkflowRunCardKeysRef = useRef(new Set()); + const workflowsRequestIdRef = useRef(0); const agentSkillsRequestIdRef = useRef(0); const atMentionDebounceRef = useRef | null>(null); const atMentionRequestIdRef = useRef(0); @@ -373,6 +383,9 @@ const ChatInputInner: React.FC = (props) => { const [showCommandSuggestions, setShowCommandSuggestions] = useState(false); const [commandSuggestions, setCommandSuggestions] = useState([]); + const [workflowDefinitionDescriptors, setWorkflowDefinitionDescriptors] = useState< + WorkflowDefinitionDescriptor[] + >([]); const [agentSkillDescriptors, setAgentSkillDescriptors] = useState([]); const [toast, setToast] = useState(null); // State for destructive command confirmation modal (currently only /clear). @@ -1436,15 +1449,24 @@ const ChatInputInner: React.FC = (props) => { useLayoutEffect(() => { const suggestions = getSlashCommandSuggestions(input, { agentSkills: agentSkillDescriptors, + workflows: dynamicWorkflowsExperimentEnabled ? workflowDefinitionDescriptors : [], variant, isExperimentEnabled: (experimentId) => resolveSlashCommandExperimentValue(experimentId, { workspaceHeartbeats: workspaceHeartbeatsExperimentEnabled, + dynamicWorkflows: dynamicWorkflowsExperimentEnabled, }), }); setCommandSuggestions((prev) => replaceSuggestions(prev, suggestions)); setShowCommandSuggestions(suggestions.length > 0); - }, [input, agentSkillDescriptors, variant, workspaceHeartbeatsExperimentEnabled]); + }, [ + input, + agentSkillDescriptors, + workflowDefinitionDescriptors, + variant, + workspaceHeartbeatsExperimentEnabled, + dynamicWorkflowsExperimentEnabled, + ]); // Derive ghost hint for slash-command argument syntax. // Show only when suggestions are hidden and the input is exactly "/command " with no args yet. @@ -1453,9 +1475,61 @@ const ChatInputInner: React.FC = (props) => { isExperimentEnabled: (experimentId) => resolveSlashCommandExperimentValue(experimentId, { workspaceHeartbeats: workspaceHeartbeatsExperimentEnabled, + dynamicWorkflows: dynamicWorkflowsExperimentEnabled, }), }); + // Load workflow definitions for slash suggestions and slash invocation. + useEffect(() => { + let isMounted = true; + const requestId = ++workflowsRequestIdRef.current; + + const loadWorkflows = async () => { + if (!api || variant !== "workspace" || !workspaceId || !dynamicWorkflowsExperimentEnabled) { + if (isMounted && workflowsRequestIdRef.current === requestId) { + setWorkflowDefinitionDescriptors([]); + } + return; + } + + try { + const [workflows, runs] = await Promise.all([ + api.workflows.listDefinitions({ workspaceId }), + api.workflows.listRuns({ workspaceId }), + ]); + if (!isMounted || workflowsRequestIdRef.current !== requestId) { + return; + } + setWorkflowDefinitionDescriptors(Array.isArray(workflows) ? workflows : []); + for (const run of runs) { + if ( + hasWorkflowRunToolCallMessage(store.getWorkspaceState(workspaceId).muxMessages, run) + ) { + continue; + } + const cardKey = `${workspaceId}:${run.id}:${run.updatedAt}:${run.status}`; + if (projectedWorkflowRunCardKeysRef.current.has(cardKey)) { + continue; + } + projectedWorkflowRunCardKeysRef.current.add(cardKey); + addWorkflowRunCardMessageForRun(workspaceId, run); + } + } catch (error) { + console.error("Failed to load workflow definitions:", error); + if (!isMounted || workflowsRequestIdRef.current !== requestId) { + return; + } + setWorkflowDefinitionDescriptors([]); + } + }; + + void loadWorkflows(); + + return () => { + isMounted = false; + }; + }, [api, variant, workspaceId, dynamicWorkflowsExperimentEnabled, store]); + // Load agent skills for suggestions useEffect(() => { let isMounted = true; @@ -1956,6 +2030,7 @@ const ChatInputInner: React.FC = (props) => { variant, workspaceId: commandWorkspaceId, projectPath: commandProjectPath, + dynamicWorkflowsEnabled: dynamicWorkflowsExperimentEnabled, openSettings: open, currentModel: workspaceSidebarState?.currentModel ?? null, sendMessageOptions: commandSendMessageOptions, @@ -2168,8 +2243,10 @@ const ChatInputInner: React.FC = (props) => { const { parsed, skillInvocation } = await parseCommandWithSkillInvocation({ messageText, agentSkillDescriptors, + workflowDefinitions: dynamicWorkflowsExperimentEnabled ? workflowDefinitionDescriptors : [], api, discovery: skillDiscovery, + workflowWorkspaceId: variant === "workspace" ? workspaceId : null, }); const combinedSkillRefs = await resolveInlineSkillRefsForSend({ messageText, diff --git a/src/browser/features/ChatInput/utils.inlineSkillRefs.test.ts b/src/browser/features/ChatInput/utils.inlineSkillRefs.test.ts index 5474ce89e5..53f868fd87 100644 --- a/src/browser/features/ChatInput/utils.inlineSkillRefs.test.ts +++ b/src/browser/features/ChatInput/utils.inlineSkillRefs.test.ts @@ -1,3 +1,4 @@ +/* eslint-disable @typescript-eslint/await-thenable */ import { describe, expect, test } from "bun:test"; import type { AgentSkillDescriptor } from "@/common/types/agentSkill"; import { @@ -38,6 +39,65 @@ describe("parseCommandWithSkillInvocation", () => { }); }); +describe("parseCommandWithSkillInvocation workflows", () => { + test("resolves an unambiguous unknown slash command as a workflow run", async () => { + await expect( + parseCommandWithSkillInvocation({ + messageText: "/deep-research topic: mux", + agentSkillDescriptors: [], + workflowDefinitions: [ + { + name: "deep-research", + description: "Research deeply", + scope: "built-in", + executable: true, + }, + ], + api: null, + discovery: null, + workflowWorkspaceId: "workspace-1", + }) + ).resolves.toEqual({ + parsed: { type: "workflow-run", name: "deep-research", argsText: "topic: mux" }, + skillInvocation: null, + }); + }); + + test("requires explicit workflow invocation when a skill and workflow share a name", async () => { + await expect( + parseCommandWithSkillInvocation({ + messageText: "/deep-review topic: mux", + agentSkillDescriptors: [ + { + name: "deep-review", + description: "Review deeply", + scope: "project", + }, + ], + workflowDefinitions: [ + { + name: "deep-review", + description: "Ambiguous workflow", + scope: "project", + executable: true, + }, + ], + api: null, + discovery: null, + workflowWorkspaceId: "workspace-1", + }) + ).resolves.toEqual({ + parsed: { + type: "command-invalid-args", + command: "deep-review", + input: "deep-review", + usage: "Skill and workflow names collide. Use /workflow deep-review to run the workflow.", + }, + skillInvocation: null, + }); + }); +}); + describe("resolveInlineSkillRefsForSend", () => { test("returns an empty array for no slash and no inline refs", async () => { expect( diff --git a/src/browser/features/ChatInput/utils.ts b/src/browser/features/ChatInput/utils.ts index 7aacbcce35..02ec9fbe25 100644 --- a/src/browser/features/ChatInput/utils.ts +++ b/src/browser/features/ChatInput/utils.ts @@ -6,6 +6,7 @@ import { resolveInlineSkillReferences, } from "@/browser/utils/agentSkills/inlineSkillReferences"; import type { AgentSkillDescriptor } from "@/common/types/agentSkill"; +import type { WorkflowDefinitionDescriptor } from "@/common/types/workflow"; import type { ParsedRuntime } from "@/common/types/runtime"; import { buildAgentSkillMetadata, @@ -23,6 +24,9 @@ export type CreationRuntimeValidationError = | { mode: "ssh"; kind: "missingCoderTemplate" } | { mode: "ssh"; kind: "missingCoderPreset" }; +const WORKFLOW_SKILL_COLLISION_USAGE = (name: string) => + `Skill and workflow names collide. Use /workflow ${name} to run the workflow.`; + export interface SkillInvocation { descriptor: AgentSkillDescriptor; userText: string; @@ -116,11 +120,52 @@ async function resolveSkillInvocation(options: { }; } +function resolveLocalWorkflowInvocation(options: { + messageText: string; + parsed: ParsedCommand; + workflowDefinitions: WorkflowDefinitionDescriptor[]; + hasSkill: boolean; + workflowWorkspaceId: string | null; +}): ParsedCommand | null { + if (!isUnknownSlashCommand(options.parsed) || options.workflowWorkspaceId == null) { + return null; + } + + const command = options.parsed.command; + const prefix = `/${command}`; + const afterPrefix = options.messageText.slice(prefix.length); + const hasSeparator = afterPrefix.length === 0 || /^\s/u.test(afterPrefix); + if (!hasSeparator) { + return null; + } + + const workflow = options.workflowDefinitions.find( + (definition) => definition.name === command && definition.executable + ); + if (!workflow) { + return null; + } + + if (options.hasSkill) { + return { + type: "command-invalid-args", + command, + input: command, + usage: WORKFLOW_SKILL_COLLISION_USAGE(command), + }; + } + + const argsText = afterPrefix.trimStart(); + return { type: "workflow-run", name: workflow.name, ...(argsText ? { argsText } : {}) }; +} + export async function parseCommandWithSkillInvocation(options: { messageText: string; agentSkillDescriptors: AgentSkillDescriptor[]; + workflowDefinitions?: WorkflowDefinitionDescriptor[]; api: APIClient | null; discovery: SkillResolutionTarget | null; + workflowWorkspaceId?: string | null; }): Promise<{ parsed: ParsedCommand; skillInvocation: SkillInvocation | null }> { const parsed = parseCommand(options.messageText); const skillInvocation = await resolveSkillInvocation({ @@ -131,7 +176,22 @@ export async function parseCommandWithSkillInvocation(options: { discovery: options.discovery, }); - return { parsed: skillInvocation ? null : parsed, skillInvocation }; + const workflowInvocation = resolveLocalWorkflowInvocation({ + messageText: options.messageText, + parsed, + workflowDefinitions: options.workflowDefinitions ?? [], + hasSkill: skillInvocation != null, + workflowWorkspaceId: options.workflowWorkspaceId ?? null, + }); + + if (workflowInvocation?.type === "command-invalid-args") { + return { parsed: workflowInvocation, skillInvocation: null }; + } + + return { + parsed: skillInvocation || workflowInvocation ? workflowInvocation : parsed, + skillInvocation, + }; } /** diff --git a/src/browser/features/Tools/AgentReportToolCall.test.tsx b/src/browser/features/Tools/AgentReportToolCall.test.tsx index da55f053d6..1a3185c4c8 100644 --- a/src/browser/features/Tools/AgentReportToolCall.test.tsx +++ b/src/browser/features/Tools/AgentReportToolCall.test.tsx @@ -44,4 +44,36 @@ describe("AgentReportToolCall", () => { expect(view.getByText(/Hello/)).toBeTruthy(); expect(view.getByText(/World/)).toBeTruthy(); }); + + test("renders file-backed report payload from tool output", () => { + const view = render( + + + + ); + + expect(view.getByText(/File Report/)).toBeTruthy(); + expect(view.getByText(/From disk/)).toBeTruthy(); + }); + + test("renders default file-backed report placeholder before tool output", () => { + const view = render( + + + + ); + + expect(view.getByText(/Report file: report\.md/)).toBeTruthy(); + }); }); diff --git a/src/browser/features/Tools/AgentReportToolCall.tsx b/src/browser/features/Tools/AgentReportToolCall.tsx index 2bf50dedd2..f40ae64e0d 100644 --- a/src/browser/features/Tools/AgentReportToolCall.tsx +++ b/src/browser/features/Tools/AgentReportToolCall.tsx @@ -26,6 +26,19 @@ interface AgentReportToolCallProps { status?: ToolStatus; } +function getSubmittedReportMarkdown( + args: AgentReportToolArgs, + result: AgentReportToolResult | undefined +): string { + if (result && "success" in result && result.success === true && result.report?.reportMarkdown) { + return result.report.reportMarkdown; + } + if ("reportMarkdown" in args) { + return args.reportMarkdown; + } + return `Report file: ${args.reportMarkdownPath ?? "report.md"}`; +} + export const AgentReportToolCall: React.FC = ({ args, result, @@ -37,9 +50,10 @@ export const AgentReportToolCall: React.FC = ({ const errorResult = isToolErrorResult(result) ? result : null; const title = args.title ?? "Agent report"; + const reportMarkdown = getSubmittedReportMarkdown(args, result); // Show a small preview when collapsed so the card still has some useful context. - const firstLine = args.reportMarkdown.trim().split("\n")[0] ?? ""; + const firstLine = reportMarkdown.trim().split("\n")[0] ?? ""; const preview = firstLine.length > 80 ? firstLine.slice(0, 80).trim() + "…" : firstLine; return ( @@ -54,7 +68,7 @@ export const AgentReportToolCall: React.FC = ({ {expanded && (
- +
{errorResult && {errorResult.error}}
diff --git a/src/browser/features/Tools/Shared/getToolComponent.test.ts b/src/browser/features/Tools/Shared/getToolComponent.test.ts index 66a69bf971..f733af9c42 100644 --- a/src/browser/features/Tools/Shared/getToolComponent.test.ts +++ b/src/browser/features/Tools/Shared/getToolComponent.test.ts @@ -7,10 +7,16 @@ import { CompleteGoalToolCall } from "../CompleteGoalToolCall"; import { DesktopActionToolCall } from "../DesktopActionToolCall"; import { DesktopScreenshotToolCall } from "../DesktopScreenshotToolCall"; import { GenericToolCall } from "../GenericToolCall"; +import { WorkflowRunToolCall } from "../WorkflowRunToolCall"; import { GetGoalToolCall } from "../GetGoalToolCall"; import { getToolComponent } from "./getToolComponent"; describe("getToolComponent", () => { + test("returns WorkflowRunToolCall for workflow_run", () => { + const component = getToolComponent("workflow_run", { name: "deep-research" }); + expect(component).toBe(WorkflowRunToolCall); + }); + test("returns AgentReportToolCall for agent_report", () => { const component = getToolComponent("agent_report", { reportMarkdown: "# Hello" }); expect(component).toBe(AgentReportToolCall); diff --git a/src/browser/features/Tools/Shared/getToolComponent.ts b/src/browser/features/Tools/Shared/getToolComponent.ts index 178c231a78..3946af6233 100644 --- a/src/browser/features/Tools/Shared/getToolComponent.ts +++ b/src/browser/features/Tools/Shared/getToolComponent.ts @@ -41,6 +41,7 @@ import { } from "../TaskToolCall"; import { TaskApplyGitPatchToolCall } from "../TaskApplyGitPatchToolCall"; import { GetGoalToolCall } from "../GetGoalToolCall"; +import { WorkflowRunToolCall } from "../WorkflowRunToolCall"; import { CompleteGoalToolCall } from "../CompleteGoalToolCall"; /** @@ -164,6 +165,10 @@ const TOOL_REGISTRY: Record = { component: TaskApplyGitPatchToolCall, schema: TOOL_DEFINITIONS.task_apply_git_patch.schema, }, + workflow_run: { + component: WorkflowRunToolCall, + schema: TOOL_DEFINITIONS.workflow_run.schema, + }, agent_report: { component: AgentReportToolCall, schema: TOOL_DEFINITIONS.agent_report.schema, diff --git a/src/browser/features/Tools/WorkflowRunToolCall.stories.tsx b/src/browser/features/Tools/WorkflowRunToolCall.stories.tsx new file mode 100644 index 0000000000..7a7cd5fd70 --- /dev/null +++ b/src/browser/features/Tools/WorkflowRunToolCall.stories.tsx @@ -0,0 +1,106 @@ +import type { Meta, StoryObj } from "@storybook/react-vite"; +import { WorkflowRunToolCall } from "@/browser/features/Tools/WorkflowRunToolCall"; +import { lightweightMeta } from "@/browser/stories/meta.js"; + +const meta = { + ...lightweightMeta, + title: "App/Chat/Tools/WorkflowRun", + component: WorkflowRunToolCall, +} satisfies Meta; + +export default meta; + +type Story = StoryObj; + +export const CompletedDeepResearch: Story = { + args: { + args: { + name: "deep-research", + args: { topic: "workflow run cards" }, + run_in_background: false, + }, + status: "completed", + result: { + status: "completed", + runId: "wfr_story", + result: { + reportMarkdown: + "# Deep Research\n\nWorkflow run cards should show phases, tasks, and final synthesis.", + structuredOutput: { confidence: "medium", gaps: ["Dogfood in full app"] }, + }, + run: { + id: "wfr_story", + workspaceId: "workspace-1", + definition: { + name: "deep-research", + description: "Deep research", + scope: "built-in", + executable: true, + }, + definitionSource: "export default function workflow() { return null; }", + definitionHash: "sha256:story", + args: { topic: "workflow run cards" }, + status: "completed", + createdAt: "2026-05-29T00:00:00.000Z", + updatedAt: "2026-05-29T00:00:02.000Z", + events: [ + { sequence: 1, type: "status", at: "2026-05-29T00:00:00.000Z", status: "running" }, + { sequence: 2, type: "phase", at: "2026-05-29T00:00:00.000Z", name: "scope" }, + { + sequence: 3, + type: "task", + at: "2026-05-29T00:00:01.000Z", + stepId: "scope-topic", + taskId: "task_scope", + status: "completed", + }, + { + sequence: 4, + type: "phase", + at: "2026-05-29T00:00:01.000Z", + name: "adversarial-verification", + }, + { sequence: 5, type: "status", at: "2026-05-29T00:00:02.000Z", status: "completed" }, + ], + steps: [], + }, + }, + }, +}; + +export const ScratchPromotable: Story = { + args: { + args: { + name: "scratch", + args: { topic: "promote this workflow" }, + run_in_background: true, + }, + status: "completed", + result: { + status: "completed", + runId: "wfr_scratch_story", + result: { reportMarkdown: "# Scratch workflow\n\nThis one-off workflow can be promoted." }, + run: { + id: "wfr_scratch_story", + workspaceId: "workspace-1", + definition: { + name: "scratch", + description: "Scratch workflow", + scope: "scratch", + executable: true, + }, + definitionSource: "export default function workflow() { return null; }", + definitionHash: "sha256:scratch-story", + args: { topic: "promote this workflow" }, + status: "completed", + createdAt: "2026-05-29T00:00:00.000Z", + updatedAt: "2026-05-29T00:00:02.000Z", + events: [ + { sequence: 1, type: "phase", at: "2026-05-29T00:00:00.000Z", name: "draft" }, + { sequence: 2, type: "status", at: "2026-05-29T00:00:02.000Z", status: "completed" }, + ], + steps: [], + }, + }, + }, +}; diff --git a/src/browser/features/Tools/WorkflowRunToolCall.test.tsx b/src/browser/features/Tools/WorkflowRunToolCall.test.tsx new file mode 100644 index 0000000000..598e9f0159 --- /dev/null +++ b/src/browser/features/Tools/WorkflowRunToolCall.test.tsx @@ -0,0 +1,728 @@ +/* eslint-disable @typescript-eslint/require-await */ +import { afterEach, beforeEach, describe, expect, test } from "bun:test"; +import { GlobalWindow } from "happy-dom"; +import { cleanup, fireEvent, render, waitFor } from "@testing-library/react"; +import userEvent from "@testing-library/user-event"; +import { useEffect, type ReactNode } from "react"; + +import { APIContext } from "@/browser/contexts/API"; +import { + CommandRegistryProvider, + useCommandRegistry, + type CommandAction, +} from "@/browser/contexts/CommandRegistryContext"; +import { TooltipProvider } from "@/browser/components/Tooltip/Tooltip"; +import { WorkflowRunToolCall } from "./WorkflowRunToolCall"; + +function APIHarness(props: { client: unknown; children: ReactNode }) { + return ( + undefined, + retry: () => undefined, + }} + > + {props.children} + + ); +} + +function CommandActionCapture(props: { onActions: (actions: CommandAction[]) => void }) { + const registry = useCommandRegistry(); + useEffect(() => { + props.onActions(registry.getActions()); + }); + return null; +} + +describe("WorkflowRunToolCall", () => { + let originalWindow: typeof globalThis.window; + let originalDocument: typeof globalThis.document; + let originalLocalStorage: typeof globalThis.localStorage; + + beforeEach(() => { + originalWindow = globalThis.window; + originalDocument = globalThis.document; + originalLocalStorage = globalThis.localStorage; + globalThis.window = new GlobalWindow() as unknown as Window & typeof globalThis; + globalThis.document = globalThis.window.document; + globalThis.localStorage = globalThis.window.localStorage; + }); + + afterEach(() => { + cleanup(); + globalThis.window = originalWindow; + globalThis.document = originalDocument; + globalThis.localStorage = originalLocalStorage; + }); + + test("renders workflow run phases, linked task ids, and final report", () => { + const view = render( + + + + ); + + expect(view.getByText("deep-research")).toBeTruthy(); + expect(view.getByText("wfr_123")).toBeTruthy(); + expect(view.getByText("Arguments")).toBeTruthy(); + expect(view.getByText(/workflow cards/)).toBeTruthy(); + expect(view.getByText("scope")).toBeTruthy(); + expect(view.getByText("adversarial-verification")).toBeTruthy(); + expect(view.getByText("task_scope")).toBeTruthy(); + expect(view.getByText(/Workflow result body/)).toBeTruthy(); + expect(view.getByText(/confidence/)).toBeTruthy(); + }); + + test("refreshes a running workflow from the API and shows the completed result", async () => { + const api = { + workflows: { + getRun: async () => ({ + id: "wfr_live", + workspaceId: "workspace-1", + definition: { + name: "deep-research", + description: "Deep research", + scope: "built-in", + executable: true, + }, + definitionSource: "export default function workflow() { return null; }", + definitionHash: "sha256:test", + args: { topic: "workflow cards" }, + status: "completed", + createdAt: "2026-05-29T00:00:00.000Z", + updatedAt: "2026-05-29T00:00:02.000Z", + events: [ + { sequence: 1, type: "status", at: "2026-05-29T00:00:00.000Z", status: "running" }, + { sequence: 2, type: "phase", at: "2026-05-29T00:00:00.000Z", name: "scope" }, + { + sequence: 3, + type: "result", + at: "2026-05-29T00:00:02.000Z", + result: { reportMarkdown: "done live" }, + }, + { sequence: 4, type: "status", at: "2026-05-29T00:00:02.000Z", status: "completed" }, + ], + steps: [], + }), + }, + }; + + const view = render( + + + + + + ); + + await waitFor(() => expect(view.getByText("done live")).toBeTruthy()); + expect(view.getAllByText("completed").length).toBeGreaterThan(0); + }); + + test("shows interrupt action for running workflows and updates with the returned run", async () => { + let interrupted = false; + const api = { + workflows: { + getRun: async () => null, + interrupt: async () => { + interrupted = true; + return { + id: "wfr_interrupt", + workspaceId: "workspace-1", + definition: { + name: "deep-research", + description: "Deep research", + scope: "built-in", + executable: true, + }, + definitionSource: "export default function workflow() { return null; }", + definitionHash: "sha256:test", + args: { topic: "workflow cards" }, + status: "interrupted", + createdAt: "2026-05-29T00:00:00.000Z", + updatedAt: "2026-05-29T00:00:02.000Z", + events: [ + { sequence: 1, type: "status", at: "2026-05-29T00:00:00.000Z", status: "running" }, + { + sequence: 2, + type: "status", + at: "2026-05-29T00:00:02.000Z", + status: "interrupted", + }, + ], + steps: [], + }; + }, + }, + }; + + const view = render( + + + + + + ); + + fireEvent.click(view.getByRole("button", { name: "Interrupt workflow" })); + + await waitFor(() => expect(interrupted).toBe(true)); + await waitFor(() => expect(view.getAllByText("interrupted").length).toBeGreaterThan(0)); + }); + + test("registers workflow run actions with the command palette", async () => { + let interrupted = false; + let actions: CommandAction[] = []; + const api = { + workflows: { + getRun: async () => null, + interrupt: async () => { + interrupted = true; + return { + id: "wfr_palette", + workspaceId: "workspace-1", + definition: { + name: "deep-research", + description: "Deep research", + scope: "built-in", + executable: true, + }, + definitionSource: "export default function workflow() { return null; }", + definitionHash: "sha256:test", + args: { topic: "workflow cards" }, + status: "interrupted", + createdAt: "2026-05-29T00:00:00.000Z", + updatedAt: "2026-05-29T00:00:02.000Z", + events: [ + { sequence: 1, type: "status", at: "2026-05-29T00:00:00.000Z", status: "running" }, + { + sequence: 2, + type: "status", + at: "2026-05-29T00:00:02.000Z", + status: "interrupted", + }, + ], + steps: [], + }; + }, + }, + }; + + render( + + + + + (actions = nextActions)} /> + + + + ); + + await waitFor(() => + expect(actions.some((action) => action.id === "workflow:wfr_palette:interrupt")).toBe(true) + ); + const interruptAction = actions.find( + (action) => action.id === "workflow:wfr_palette:interrupt" + ); + expect(interruptAction).toBeDefined(); + await interruptAction?.run(); + + await waitFor(() => expect(interrupted).toBe(true)); + }); + + test("shows resume action for interrupted workflows and refreshes after resume", async () => { + let resumed = false; + let getRunCalls = 0; + const interruptedRun = { + id: "wfr_resume", + workspaceId: "workspace-1", + definition: { + name: "deep-research", + description: "Deep research", + scope: "built-in" as const, + executable: true, + }, + definitionSource: "export default function workflow() { return null; }", + definitionHash: "sha256:test", + args: { topic: "workflow cards" }, + status: "interrupted" as const, + createdAt: "2026-05-29T00:00:00.000Z", + updatedAt: "2026-05-29T00:00:01.000Z", + events: [ + { + sequence: 1, + type: "status" as const, + at: "2026-05-29T00:00:00.000Z", + status: "interrupted" as const, + }, + ], + steps: [], + }; + const completedRun = { + ...interruptedRun, + status: "completed" as const, + updatedAt: "2026-05-29T00:00:02.000Z", + events: [ + ...interruptedRun.events, + { + sequence: 2, + type: "result" as const, + at: "2026-05-29T00:00:02.000Z", + result: { reportMarkdown: "resumed" }, + }, + { + sequence: 3, + type: "status" as const, + at: "2026-05-29T00:00:02.000Z", + status: "completed" as const, + }, + ], + }; + const api = { + workflows: { + resume: async () => { + resumed = true; + return { + runId: "wfr_resume", + status: "running" as const, + result: null, + }; + }, + getRun: async () => { + getRunCalls += 1; + return getRunCalls === 1 ? interruptedRun : completedRun; + }, + }, + }; + + const view = render( + + + + + + ); + + fireEvent.click(view.getByRole("button", { name: "Resume workflow" })); + + await waitFor(() => expect(resumed).toBe(true)); + await waitFor(() => expect(view.getByText("resumed")).toBeTruthy()); + }); + + test("clears resume polling when resume fails", async () => { + let getRunCalls = 0; + const api = { + workflows: { + resume: async () => { + throw new Error("Project trust is required"); + }, + getRun: async () => { + getRunCalls += 1; + return null; + }, + }, + }; + + const view = render( + + + + + + ); + + fireEvent.click(view.getByRole("button", { name: "Resume workflow" })); + + await waitFor(() => expect(view.getByText("Project trust is required")).toBeTruthy()); + await new Promise((resolve) => setTimeout(resolve, 0)); + expect(getRunCalls).toBe(0); + }); + + test("promotes scratch workflow runs only after the user provides name description and location", async () => { + const promotions: unknown[] = []; + const api = { + workflows: { + promoteScratch: async (input: unknown) => { + promotions.push(input); + return { + name: "promoted-research", + description: "Promoted", + scope: "global", + executable: true, + }; + }, + }, + }; + + const user = userEvent.setup({ document: globalThis.document }); + const view = render( + + + + + + ); + + fireEvent.click(view.getByRole("button", { name: "Promote workflow" })); + expect( + view.getByRole("button", { name: "Save reusable workflow" }).hasAttribute("disabled") + ).toBe(true); + + await user.type(view.getByPlaceholderText("my-workflow"), "promoted-research"); + await user.type( + view.getByPlaceholderText("What this workflow does"), + "Promoted scratch workflow" + ); + await user.click(view.getByRole("button", { name: "Global workflows" })); + await user.click(view.getByRole("button", { name: "Save reusable workflow" })); + + await waitFor(() => expect(promotions).toHaveLength(1)); + expect(promotions[0]).toEqual({ + workspaceId: "workspace-1", + runId: "wfr_scratch", + name: "promoted-research", + description: "Promoted scratch workflow", + location: "global", + overwrite: false, + }); + await waitFor(() => + expect(view.getByText("Promoted as promoted-research (global)")).toBeTruthy() + ); + }); + + test("uses live workflow run status for the header instead of stale tool completion state", () => { + const view = render( + + + + ); + + expect(view.getByText("executing")).toBeTruthy(); + expect(view.queryByText("completed")).toBeNull(); + }); +}); diff --git a/src/browser/features/Tools/WorkflowRunToolCall.tsx b/src/browser/features/Tools/WorkflowRunToolCall.tsx new file mode 100644 index 0000000000..552d70f65b --- /dev/null +++ b/src/browser/features/Tools/WorkflowRunToolCall.tsx @@ -0,0 +1,549 @@ +import React, { useCallback, useContext, useEffect, useState } from "react"; + +import { APIContext } from "@/browser/contexts/API"; +import { + useOptionalCommandRegistry, + type CommandAction, +} from "@/browser/contexts/CommandRegistryContext"; +import type { WorkflowRunEvent, WorkflowRunRecord } from "@/common/types/workflow"; +import type { + WorkflowRunToolArgs, + WorkflowRunToolResult, + WorkflowRunToolSuccessResult, +} from "@/common/types/tools"; + +import { + ToolContainer, + ToolHeader, + ExpandIcon, + ToolName, + StatusIndicator, + ToolDetails, + ToolIcon, + ErrorBox, +} from "./Shared/ToolPrimitives"; +import { + getStatusDisplay, + isToolErrorResult, + type ToolStatus, + useToolExpansion, +} from "./Shared/toolUtils"; +import { MarkdownRenderer } from "../Messages/MarkdownRenderer"; + +interface WorkflowRunToolCallProps { + args: WorkflowRunToolArgs; + result?: WorkflowRunToolResult; + status?: ToolStatus; +} + +function isWorkflowRunSuccessResult( + value: WorkflowRunToolResult | undefined +): value is WorkflowRunToolSuccessResult { + return value != null && !isToolErrorResult(value); +} + +function getReportMarkdown(value: unknown): string | null { + if (value != null && typeof value === "object") { + const reportMarkdown = (value as Record).reportMarkdown; + if (typeof reportMarkdown === "string" && reportMarkdown.trim().length > 0) { + return reportMarkdown; + } + } + return null; +} + +function getStructuredOutput(value: unknown): unknown { + if (value != null && typeof value === "object") { + return (value as Record).structuredOutput; + } + return undefined; +} + +function formatJson(value: unknown): string { + const formatted = JSON.stringify(value, null, 2); + return typeof formatted === "string" ? formatted : String(value); +} + +function getEventKey(event: WorkflowRunEvent): string { + return `${event.sequence}:${event.type}`; +} + +function WorkflowEventRow(props: { event: WorkflowRunEvent }) { + const event = props.event; + switch (event.type) { + case "phase": + return
  • {event.name}
  • ; + case "log": + return
  • {event.message}
  • ; + case "task": + return ( +
  • + {event.stepId}{" "} + {event.taskId}{" "} + {event.status} +
  • + ); + case "validation": + return ( +
  • + {event.stepId} validation {event.success ? "passed" : "failed"} + {event.message ? `: ${event.message}` : ""} +
  • + ); + case "error": + return
  • {event.message}
  • ; + case "status": + return
  • Status: {event.status}
  • ; + case "result": + return
  • Result recorded
  • ; + } +} + +type PromotionLocation = "" | "global" | "project"; + +interface PromotionDraft { + name: string; + description: string; + location: PromotionLocation; + overwrite: boolean; +} + +const EMPTY_PROMOTION_DRAFT: PromotionDraft = { + name: "", + description: "", + location: "", + overwrite: false, +}; + +const WORKFLOW_ACTION_BUTTON_CLASS = + "text-muted hover:text-foreground border-border rounded border px-2 py-1"; +const WORKFLOW_LOCATION_BUTTON_CLASS = `${WORKFLOW_ACTION_BUTTON_CLASS} data-[selected=true]:text-foreground`; + +const REFRESHING_WORKFLOW_STATUSES = new Set(["pending", "running", "backgrounded"]); + +function getLatestResultEvent(run: WorkflowRunRecord | null | undefined): unknown { + return run?.events.findLast((event) => event.type === "result")?.result; +} + +function shouldRefreshWorkflow(status: string): boolean { + return REFRESHING_WORKFLOW_STATUSES.has(status); +} + +function toToolStatus(status: string): ToolStatus { + if (status === "running") { + return "executing"; + } + if ( + status === "pending" || + status === "completed" || + status === "failed" || + status === "interrupted" || + status === "backgrounded" + ) { + return status; + } + return "pending"; +} + +export const WorkflowRunToolCall: React.FC = ({ + args, + result, + status = "pending", +}) => { + const apiState = useContext(APIContext); + const commandRegistry = useOptionalCommandRegistry(); + const { expanded, setExpanded, toggleExpanded } = useToolExpansion(true); + const registerCommandSource = commandRegistry?.registerSource; + const errorResult = isToolErrorResult(result) ? result : null; + const successResult = isWorkflowRunSuccessResult(result) ? result : null; + const [refreshedRun, setRefreshedRun] = useState(null); + const [resumingRunId, setResumingRunId] = useState(null); + const baseRun = successResult?.run; + const runId = successResult?.runId ?? baseRun?.id; + const run = refreshedRun?.id === runId ? refreshedRun : baseRun; + const displayStatus = run?.status ?? successResult?.status ?? status; + const resultValue = successResult?.result ?? getLatestResultEvent(run); + const reportMarkdown = getReportMarkdown(resultValue); + const structuredOutput = getStructuredOutput(resultValue); + const invocationArgs = run?.args ?? args.args ?? {}; + const events = run?.events ?? []; + const interestingEvents = events.filter( + (event) => event.type !== "status" && event.type !== "result" + ); + const headerStatus = toToolStatus(displayStatus); + + const [actionError, setActionError] = useState(null); + const [showPromotionForm, setShowPromotionForm] = useState(false); + const [promotionDraft, setPromotionDraft] = useState(EMPTY_PROMOTION_DRAFT); + const [promotedWorkflow, setPromotedWorkflow] = useState(null); + const canInterrupt = + apiState?.api != null && + run?.workspaceId != null && + (displayStatus === "running" || displayStatus === "backgrounded"); + const canResume = + apiState?.api != null && run?.workspaceId != null && displayStatus === "interrupted"; + const canPromote = run?.workspaceId != null && run?.definition.scope === "scratch"; + const canSubmitPromotion = + promotionDraft.name.trim().length > 0 && + promotionDraft.description.trim().length > 0 && + promotionDraft.location.length > 0; + + // Stable identity keeps the command-palette registration from churning while still using + // the latest run/api identifiers that determine the operation target. + const updateRunFromAction = useCallback( + async (action: "interrupt" | "resume") => { + if (apiState?.api == null || run?.workspaceId == null || runId == null) { + return; + } + setActionError(null); + let resumeRequestAccepted = false; + try { + const nextRun = + action === "interrupt" + ? await apiState.api.workflows.interrupt({ workspaceId: run.workspaceId, runId }) + : await apiState.api.workflows.resume({ workspaceId: run.workspaceId, runId }); + if (action === "resume") { + resumeRequestAccepted = true; + setResumingRunId(runId); + } + if ("id" in nextRun) { + setRefreshedRun(nextRun); + if (nextRun.status !== "interrupted") { + setResumingRunId(null); + } + return; + } + const refreshed = await apiState.api.workflows.getRun({ + workspaceId: run.workspaceId, + runId, + }); + if (refreshed != null) { + setRefreshedRun(refreshed); + if (refreshed.status !== "interrupted") { + setResumingRunId(null); + } + } + } catch (error) { + if (action === "resume" && !resumeRequestAccepted) { + setResumingRunId(null); + } + setActionError(error instanceof Error ? error.message : `Failed to ${action} workflow`); + } + }, + [apiState?.api, run?.workspaceId, runId] + ); + + const promoteScratchWorkflow = async () => { + if ( + apiState?.api == null || + run?.workspaceId == null || + runId == null || + !canSubmitPromotion || + promotionDraft.location === "" + ) { + return; + } + setActionError(null); + try { + const descriptor = await apiState.api.workflows.promoteScratch({ + workspaceId: run.workspaceId, + runId, + name: promotionDraft.name.trim(), + description: promotionDraft.description.trim(), + location: promotionDraft.location, + overwrite: promotionDraft.overwrite, + }); + setPromotionDraft(EMPTY_PROMOTION_DRAFT); + setPromotedWorkflow(`${descriptor.name} (${descriptor.scope})`); + setShowPromotionForm(false); + } catch (error) { + setActionError(error instanceof Error ? error.message : "Failed to promote workflow"); + } + }; + + useEffect(() => { + if (registerCommandSource == null || runId == null || run?.workspaceId == null) { + return; + } + + const unregister = registerCommandSource(() => { + const subtitle = `${args.name} • ${runId}`; + const actions: CommandAction[] = []; + if (canInterrupt) { + actions.push({ + id: `workflow:${runId}:interrupt`, + title: `Interrupt workflow: ${args.name}`, + subtitle, + section: "Workflows", + keywords: ["workflow", "interrupt", "stop", args.name, runId], + run: () => updateRunFromAction("interrupt"), + }); + } + if (canResume) { + actions.push({ + id: `workflow:${runId}:resume`, + title: `Resume workflow: ${args.name}`, + subtitle, + section: "Workflows", + keywords: ["workflow", "resume", "continue", args.name, runId], + run: () => updateRunFromAction("resume"), + }); + } + if (canPromote) { + actions.push({ + id: `workflow:${runId}:promote`, + title: `Promote workflow: ${args.name}`, + subtitle, + section: "Workflows", + keywords: ["workflow", "promote", "scratch", args.name, runId], + run: () => { + setExpanded(true); + setShowPromotionForm(true); + }, + }); + } + return actions; + }); + + return unregister; + }, [ + args.name, + canInterrupt, + canPromote, + canResume, + registerCommandSource, + run?.workspaceId, + runId, + setExpanded, + updateRunFromAction, + ]); + + useEffect(() => { + if ( + apiState?.api == null || + runId == null || + run?.workspaceId == null || + (!shouldRefreshWorkflow(displayStatus) && resumingRunId !== runId) + ) { + return; + } + + let ignore = false; + const refresh = async () => { + try { + const nextRun = await apiState.api.workflows.getRun({ + workspaceId: run.workspaceId, + runId, + }); + if (!ignore && nextRun != null) { + setRefreshedRun(nextRun); + if (nextRun.status !== "interrupted") { + setResumingRunId(null); + } + } + } catch (error) { + console.error("Failed to refresh workflow run:", error); + } + }; + + void refresh(); + const interval = window.setInterval(() => { + void refresh(); + }, 2_000); + return () => { + ignore = true; + window.clearInterval(interval); + }; + }, [apiState?.api, displayStatus, resumingRunId, run?.workspaceId, runId]); + + return ( + + + + + {args.name} + {getStatusDisplay(headerStatus)} + + + {expanded && ( + +
    + {runId && {runId}} + {displayStatus} + {run?.definition.scope && {run.definition.scope}} +
    + +
    +
    Arguments
    +
    +              {formatJson(invocationArgs)}
    +            
    +
    + + {(canInterrupt || canResume || canPromote) && ( +
    + {canInterrupt && ( + + )} + {canResume && ( + + )} + {canPromote && ( + + )} +
    + )} + + {promotedWorkflow && ( +
    Promoted as {promotedWorkflow}
    + )} + + {showPromotionForm && canPromote && ( +
    { + event.preventDefault(); + void promoteScratchWorkflow(); + }} + > + + +
    +
    Location
    +
    + + +
    +
    + + +
    + )} + + {actionError && {actionError}} + + {interestingEvents.length > 0 && ( +
    +
    + Workflow events +
    +
      + {interestingEvents.map((event) => ( + + ))} +
    +
    + )} + + {reportMarkdown && ( +
    + +
    + )} + + {structuredOutput !== undefined && ( +
    +
    + Structured output +
    +
    +                {formatJson(structuredOutput)}
    +              
    +
    + )} + + {errorResult && {errorResult.error}} +
    + )} +
    + ); +}; diff --git a/src/browser/hooks/useSendMessageOptions.ts b/src/browser/hooks/useSendMessageOptions.ts index 3426e7042d..53664d1061 100644 --- a/src/browser/hooks/useSendMessageOptions.ts +++ b/src/browser/hooks/useSendMessageOptions.ts @@ -61,6 +61,8 @@ export function useSendMessageOptions(workspaceId: string): SendMessageOptionsWi const execSubagentHardRestart = useExperimentOverrideValue( EXPERIMENT_IDS.EXEC_SUBAGENT_HARD_RESTART ); + const dynamicWorkflows = useExperimentOverrideValue(EXPERIMENT_IDS.DYNAMIC_WORKFLOWS); + const subagentFileReports = useExperimentOverrideValue(EXPERIMENT_IDS.SUBAGENT_FILE_REPORTS); // Prefer metadata over the global default until workspace localStorage seeding catches up. const metadataSettings = getWorkspaceAiSettingsFromMetadata( @@ -82,6 +84,8 @@ export function useSendMessageOptions(workspaceId: string): SendMessageOptionsWi programmaticToolCallingExclusive, advisorTool, execSubagentHardRestart, + dynamicWorkflows, + subagentFileReports, }, disableWorkspaceAgents, }); diff --git a/src/browser/utils/chatCommands.test.ts b/src/browser/utils/chatCommands.test.ts index de8a11775d..67f4af6cfa 100644 --- a/src/browser/utils/chatCommands.test.ts +++ b/src/browser/utils/chatCommands.test.ts @@ -145,6 +145,60 @@ function createGoalCommandContext(api: SlashCommandContext["api"]): SlashCommand }); } +describe("processSlashCommand - workflow", () => { + test("rejects workflow execution when dynamic workflows are disabled", async () => { + const start = mock(() => + Promise.resolve({ runId: "wfr_123", status: "running", result: null }) + ); + const context = createSlashCommandContext({ + api: { + workflows: { start }, + } as unknown as SlashCommandContext["api"], + dynamicWorkflowsEnabled: false, + }); + + const result = await processSlashCommand( + { type: "workflow-run", name: "deep-research", argsText: "mux" }, + context + ); + + expect(result).toEqual({ clearInput: false, toastShown: true }); + expect(start).not.toHaveBeenCalled(); + expect(context.setToast).toHaveBeenCalledWith( + expect.objectContaining({ type: "error", message: "Dynamic workflows are disabled" }) + ); + }); + + test("starts workflow slash commands in the background", async () => { + ensureWindowDispatchEvent(); + const start = mock(() => + Promise.resolve({ runId: "wfr_123", status: "running", result: null }) + ); + const getRun = mock(() => Promise.resolve(null)); + const context = createSlashCommandContext({ + api: { + workflows: { start, getRun }, + } as unknown as SlashCommandContext["api"], + dynamicWorkflowsEnabled: true, + }); + + const result = await processSlashCommand( + { type: "workflow-run", name: "deep-research", argsText: "mux" }, + context + ); + + expect(result).toEqual({ clearInput: true, toastShown: true }); + expect(start).toHaveBeenCalledWith( + expect.objectContaining({ + workspaceId: "test-ws", + name: "deep-research", + runInBackground: true, + }) + ); + expect(getRun).toHaveBeenCalledWith({ workspaceId: "test-ws", runId: "wfr_123" }); + }); +}); + describe("processSlashCommand - side-question", () => { function createSideQuestionContext( sideQuestion: (input: { diff --git a/src/browser/utils/chatCommands.ts b/src/browser/utils/chatCommands.ts index 4f9297cc8a..30dbf81930 100644 --- a/src/browser/utils/chatCommands.ts +++ b/src/browser/utils/chatCommands.ts @@ -75,6 +75,7 @@ import { } from "@/browser/features/ChatInput/ChatInputToasts"; import { trackCommandUsed } from "@/common/telemetry"; import { addEphemeralMessage } from "@/browser/stores/WorkspaceStore"; +import { addWorkflowRunCardMessage } from "@/browser/utils/workflowRunMessages"; import { setGoalWithConflictRetry } from "@/browser/utils/goals/setGoalWithConflictRetry"; import { loadGoalDefaults, resolveGoalSetIntent } from "@/browser/utils/goals/resolveGoalSetIntent"; import { SIDE_QUESTION_COMMAND } from "@/common/utils/messages/sideQuestion"; @@ -158,6 +159,9 @@ export interface SlashCommandContext extends Omit void; + /** Current dynamic-workflows experiment assignment for executable workflow commands. */ + dynamicWorkflowsEnabled?: boolean; + // Global Actions setPreferredModel: (model: string) => void; setVimEnabled: (cb: (prev: boolean) => boolean) => void; @@ -182,6 +186,19 @@ export interface SlashCommandContext extends Omit { }); describe("init command", () => { + it("parses explicit workflow invocation", () => { + expect(parseCommand('/workflow deep-research {"topic":"mux"}')).toEqual({ + type: "workflow-run", + name: "deep-research", + argsText: '{"topic":"mux"}', + }); + }); + it("should parse /init as unknown-command (handled as a skill invocation)", () => { expectParse("/init", { type: "unknown-command", diff --git a/src/browser/utils/slashCommands/registry.ts b/src/browser/utils/slashCommands/registry.ts index f8b9b15d97..03c95adcbc 100644 --- a/src/browser/utils/slashCommands/registry.ts +++ b/src/browser/utils/slashCommands/registry.ts @@ -657,6 +657,44 @@ const btwCommandDefinition: SlashCommandDefinition = { }, }; +const WORKFLOW_COMMAND_USAGE = "/workflow [args]"; + +const workflowCommandDefinition: SlashCommandDefinition = { + key: "workflow", + description: "Run an explicit workflow by name", + experimentGate: EXPERIMENT_IDS.DYNAMIC_WORKFLOWS, + inputHint: WORKFLOW_COMMAND_USAGE, + suggestions: ({ partialToken, context }) => { + const workflows: SuggestionDefinition[] = (context.workflows ?? []) + .filter((workflow) => workflow.executable) + .map((workflow) => ({ + key: workflow.name, + description: `${workflow.description} (${workflow.scope} workflow)`, + })); + return filterAndMapSuggestions(workflows, partialToken, (workflow) => ({ + id: `workflow-explicit:${workflow.key}`, + display: workflow.key, + description: workflow.description, + replacement: `/workflow ${workflow.key} `, + kind: "workflow", + })); + }, + handler: ({ rawInput }): ParsedCommand => { + const trimmed = rawInput.trim(); + if (!trimmed) { + return { type: "command-missing-args", command: "workflow", usage: WORKFLOW_COMMAND_USAGE }; + } + const firstWhitespace = trimmed.search(/\s/u); + const name = firstWhitespace === -1 ? trimmed : trimmed.slice(0, firstWhitespace); + const argsText = firstWhitespace === -1 ? undefined : trimmed.slice(firstWhitespace).trim(); + return { + type: "workflow-run", + name, + ...(argsText ? { argsText } : {}), + }; + }, +}; + const debugLlmRequestCommandDefinition: SlashCommandDefinition = { key: "debug-llm-request", description: "Show the last LLM request sent (debug)", @@ -677,6 +715,7 @@ export const SLASH_COMMAND_DEFINITIONS: readonly SlashCommandDefinition[] = [ heartbeatCommandDefinition, goalCommandDefinition, btwCommandDefinition, + workflowCommandDefinition, debugLlmRequestCommandDefinition, ]; diff --git a/src/browser/utils/slashCommands/suggestions.test.ts b/src/browser/utils/slashCommands/suggestions.test.ts index 5d7b2c3426..771bbe8681 100644 --- a/src/browser/utils/slashCommands/suggestions.test.ts +++ b/src/browser/utils/slashCommands/suggestions.test.ts @@ -1,3 +1,4 @@ +/* eslint-disable @typescript-eslint/no-unsafe-argument */ import { describe, it, expect } from "bun:test"; import { EXPERIMENT_IDS, type ExperimentId } from "@/common/constants/experiments"; import { getSlashCommandSuggestions } from "./suggestions"; @@ -97,6 +98,101 @@ describe("getSlashCommandSuggestions", () => { expect(suggestions.map((s) => s.display)).toContain("/deep-review"); }); + it("includes workflows distinctly and hides ambiguous top-level workflow shortcuts", () => { + const suggestions = getSlashCommandSuggestions("/deep", { + agentSkills: [ + { + name: "deep-review", + description: "Review deeply", + scope: "project", + }, + ], + workflows: [ + { + name: "deep-research", + description: "Research deeply", + scope: "built-in", + executable: true, + }, + { + name: "deep-review", + description: "Ambiguous workflow", + scope: "project", + executable: true, + }, + ], + }); + + expect(suggestions).toContainEqual( + expect.objectContaining({ + id: "workflow:deep-research", + display: "/deep-research", + kind: "workflow", + replacement: "/deep-research ", + }) + ); + expect(suggestions).not.toContainEqual(expect.objectContaining({ id: "workflow:deep-review" })); + expect(suggestions).toContainEqual(expect.objectContaining({ id: "skill:deep-review" })); + }); + + it("hides top-level workflow shortcuts that collide with model aliases", () => { + const workflow = { + name: "haiku", + description: "Alias collision workflow", + scope: "global" as const, + executable: true, + }; + + const suggestions = getSlashCommandSuggestions("/ha", { workflows: [workflow] }); + + expect(suggestions).toContainEqual(expect.objectContaining({ id: "model-oneshot:haiku" })); + expect(suggestions).not.toContainEqual(expect.objectContaining({ id: "workflow:haiku" })); + + const explicitSuggestions = getSlashCommandSuggestions("/workflow ha", { + workflows: [workflow], + isExperimentEnabled: (experimentId) => experimentId === EXPERIMENT_IDS.DYNAMIC_WORKFLOWS, + }); + + expect(explicitSuggestions).toContainEqual( + expect.objectContaining({ + id: "workflow-explicit:haiku", + display: "haiku", + kind: "workflow", + replacement: "/workflow haiku ", + }) + ); + }); + + it("suggests explicit workflow invocations for ambiguous workflow names", () => { + const suggestions = getSlashCommandSuggestions("/workflow deep", { + agentSkills: [ + { + name: "deep-review", + description: "Review deeply", + scope: "project", + }, + ], + workflows: [ + { + name: "deep-review", + description: "Ambiguous workflow", + scope: "project", + executable: true, + }, + ], + isExperimentEnabled: (experimentId) => experimentId === EXPERIMENT_IDS.DYNAMIC_WORKFLOWS, + }); + + expect(suggestions).toEqual([ + expect.objectContaining({ + id: "workflow-explicit:deep-review", + display: "deep-review", + kind: "workflow", + replacement: "/workflow deep-review ", + }), + ]); + }); + it("filters top level commands by partial input", () => { const suggestions = getSlashCommandSuggestions("/cl"); expect(suggestions).toHaveLength(1); diff --git a/src/browser/utils/slashCommands/suggestions.ts b/src/browser/utils/slashCommands/suggestions.ts index d2e43901c3..a195563571 100644 --- a/src/browser/utils/slashCommands/suggestions.ts +++ b/src/browser/utils/slashCommands/suggestions.ts @@ -75,10 +75,36 @@ function buildTopLevelSuggestions( id: `skill:${definition.key}`, display: `/${definition.key}`, description: definition.description, + kind: "skill", replacement, }; }); + const skillNames = new Set((context.agentSkills ?? []).map((skill) => skill.name)); + const workflowDefinitions: SuggestionDefinition[] = (context.workflows ?? []) + .filter((workflow) => workflow.executable) + // Known commands, skills, and model one-shot aliases must not execute workflow code through + // ambiguous top-level slash shortcuts. The explicit /workflow command remains available. + .filter((workflow) => !SLASH_COMMAND_DEFINITION_MAP.has(workflow.name)) + .filter((workflow) => !skillNames.has(workflow.name)) + .filter((workflow) => !Object.hasOwn(MODEL_ABBREVIATIONS, workflow.name)) + .map((workflow) => ({ + key: workflow.name, + description: `${workflow.description} (${workflow.scope} workflow)`, + })); + + const workflowSuggestions = filterAndMapSuggestions( + workflowDefinitions, + partial, + (definition) => ({ + id: `workflow:${definition.key}`, + display: `/${definition.key}`, + description: definition.description, + kind: "workflow", + replacement: `/${definition.key} `, + }) + ); + // Model alias one-shot suggestions (e.g., /haiku, /sonnet, /opus+high). // The build callback below hardcodes the trailing space, so `appendSpace` // is intentionally omitted here. @@ -100,7 +126,12 @@ function buildTopLevelSuggestions( }) ); - return [...commandSuggestions, ...skillSuggestions, ...modelAliasSuggestions]; + return [ + ...commandSuggestions, + ...skillSuggestions, + ...workflowSuggestions, + ...modelAliasSuggestions, + ]; } function buildSubcommandSuggestions( diff --git a/src/browser/utils/slashCommands/types.ts b/src/browser/utils/slashCommands/types.ts index 0b38a4e0c0..6fc84f8476 100644 --- a/src/browser/utils/slashCommands/types.ts +++ b/src/browser/utils/slashCommands/types.ts @@ -11,6 +11,7 @@ import type { ExperimentId } from "@/common/constants/experiments"; import type { AgentSkillDescriptor } from "@/common/types/agentSkill"; +import type { WorkflowDefinitionDescriptor } from "@/common/types/workflow"; import type { ParsedThinkingInput } from "@/common/types/thinking"; export type ParsedCommand = @@ -31,6 +32,7 @@ export type ParsedCommand = | { type: "vim-toggle" } | { type: "plan-show" } | { type: "plan-open" } + | { type: "workflow-run"; name: string; argsText?: string } | { type: "debug-llm-request" } | { type: "unknown-command"; command: string; subcommand?: string } | { type: "command-unknown-flag"; command: string; flag: string; usage?: string } @@ -94,10 +96,12 @@ export interface SlashSuggestion { id: string; display: string; description: string; + kind?: "command" | "skill" | "workflow" | "model"; replacement: string; } export interface SlashSuggestionContext extends SlashCommandVisibilityContext { + workflows?: WorkflowDefinitionDescriptor[]; agentSkills?: AgentSkillDescriptor[]; } diff --git a/src/browser/utils/workflowRunMessages.test.ts b/src/browser/utils/workflowRunMessages.test.ts new file mode 100644 index 0000000000..60ab192cf8 --- /dev/null +++ b/src/browser/utils/workflowRunMessages.test.ts @@ -0,0 +1,79 @@ +import { describe, expect, test } from "bun:test"; + +import { buildWorkflowRunCardMessage, hasWorkflowRunToolCallMessage } from "./workflowRunMessages"; +import type { MuxMessage } from "@/common/types/message"; +import type { WorkflowRunRecord } from "@/common/types/workflow"; + +describe("buildWorkflowRunCardMessage", () => { + test("builds a stable workflow_run card message with the current durable run", () => { + const run: WorkflowRunRecord = { + id: "wfr_reload", + workspaceId: "workspace-1", + definition: { + name: "deep-research", + description: "Deep research", + scope: "built-in", + executable: true, + }, + definitionSource: "export default function workflow() { return null; }", + definitionHash: "sha256:test", + args: { topic: "reload" }, + status: "completed", + createdAt: "2026-05-29T00:00:00.000Z", + updatedAt: "2026-05-29T00:00:01.000Z", + events: [], + steps: [], + }; + + const message = buildWorkflowRunCardMessage( + { name: run.definition.name, args: run.args }, + { runId: run.id, status: run.status, result: { reportMarkdown: "done" }, run }, + 123 + ); + + expect(message.id).toBe("workflow-run-wfr_reload"); + expect(message.parts[0]).toMatchObject({ + type: "dynamic-tool", + toolName: "workflow_run", + input: { name: "deep-research", args: { topic: "reload" }, run_in_background: true }, + output: { status: "completed", runId: "wfr_reload", result: { reportMarkdown: "done" }, run }, + }); + }); + + test("detects existing persisted workflow_run tool calls by run id or in-flight input", () => { + const run = { + id: "wfr_existing", + definition: { + name: "deep-research", + description: "Deep research", + scope: "built-in" as const, + executable: true, + }, + args: { topic: "reload" }, + }; + const completedMessage = buildWorkflowRunCardMessage( + { name: run.definition.name, args: run.args }, + { runId: run.id, status: "completed", result: { reportMarkdown: "done" } }, + 123 + ); + const inFlightMessage: MuxMessage = { + id: "assistant_1", + role: "assistant", + parts: [ + { + type: "dynamic-tool", + toolCallId: "call_1", + toolName: "workflow_run", + state: "input-available", + input: { name: "deep-research", args: { topic: "reload" } }, + }, + ], + }; + + expect(hasWorkflowRunToolCallMessage([completedMessage], run)).toBe(true); + expect(hasWorkflowRunToolCallMessage([inFlightMessage], run)).toBe(true); + expect(hasWorkflowRunToolCallMessage([completedMessage], { ...run, id: "wfr_missing" })).toBe( + false + ); + }); +}); diff --git a/src/browser/utils/workflowRunMessages.ts b/src/browser/utils/workflowRunMessages.ts new file mode 100644 index 0000000000..168e4ea11e --- /dev/null +++ b/src/browser/utils/workflowRunMessages.ts @@ -0,0 +1,121 @@ +import { addEphemeralMessage } from "@/browser/stores/WorkspaceStore"; +import type { MuxMessage } from "@/common/types/message"; +import type { WorkflowRunRecord } from "@/common/types/workflow"; +import assert from "@/common/utils/assert"; + +export interface WorkflowRunCardInput { + name: string; + args: unknown; +} + +export interface WorkflowRunCardResult { + runId: string; + status: string; + result: unknown; + run?: WorkflowRunRecord; +} + +function getLatestWorkflowResult(run: WorkflowRunRecord): unknown { + return run.events.findLast((event) => event.type === "result")?.result ?? null; +} + +function getOutputRunId(output: unknown): string | null { + if (output != null && typeof output === "object") { + const runId = (output as Record).runId; + if (typeof runId === "string" && runId.length > 0) { + return runId; + } + } + return null; +} + +function getWorkflowInput(input: unknown): WorkflowRunCardInput | null { + if (input != null && typeof input === "object") { + const record = input as Record; + if (typeof record.name === "string" && record.name.length > 0) { + return { name: record.name, args: record.args ?? {} }; + } + } + return null; +} + +function jsonEqual(left: unknown, right: unknown): boolean { + try { + return JSON.stringify(left) === JSON.stringify(right); + } catch { + return false; + } +} + +export function hasWorkflowRunToolCallMessage( + messages: readonly MuxMessage[], + run: Pick +): boolean { + assert(run.id.length > 0, "hasWorkflowRunToolCallMessage: run id is required"); + return messages.some((message) => + message.parts.some((part) => { + if (part.type !== "dynamic-tool" || part.toolName !== "workflow_run") { + return false; + } + if (part.state === "output-available") { + return getOutputRunId(part.output) === run.id; + } + const input = getWorkflowInput(part.input); + return input?.name === run.definition.name && jsonEqual(input.args, run.args); + }) + ); +} + +export function buildWorkflowRunCardMessage( + input: WorkflowRunCardInput, + result: WorkflowRunCardResult, + now = Date.now() +): MuxMessage { + assert(input.name.length > 0, "buildWorkflowRunCardMessage: workflow name is required"); + assert(result.runId.length > 0, "buildWorkflowRunCardMessage: runId is required"); + return { + id: `workflow-run-${result.runId}`, + role: "assistant", + parts: [ + { + type: "dynamic-tool", + toolCallId: `workflow-run-${result.runId}`, + toolName: "workflow_run", + state: "output-available", + input: { + name: input.name, + args: input.args, + run_in_background: true, + }, + output: { + status: result.status, + runId: result.runId, + result: result.result, + ...(result.run != null ? { run: result.run } : {}), + }, + timestamp: now, + }, + ], + metadata: { + historySequence: Number.MAX_SAFE_INTEGER, + timestamp: now, + }, + }; +} + +export function addWorkflowRunCardMessage( + workspaceId: string, + input: WorkflowRunCardInput, + result: WorkflowRunCardResult +): void { + assert(workspaceId.length > 0, "addWorkflowRunCardMessage: workspaceId is required"); + addEphemeralMessage(workspaceId, buildWorkflowRunCardMessage(input, result)); +} + +export function addWorkflowRunCardMessageForRun(workspaceId: string, run: WorkflowRunRecord): void { + addWorkflowRunCardMessage( + workspaceId, + { name: run.definition.name, args: run.args }, + { runId: run.id, status: run.status, result: getLatestWorkflowResult(run), run } + ); +} diff --git a/src/cli/run.ts b/src/cli/run.ts index 438343312f..b20ff12fa8 100644 --- a/src/cli/run.ts +++ b/src/cli/run.ts @@ -284,6 +284,8 @@ function buildExperimentsObject(experimentIds: string[]): SendMessageOptions["ex programmaticToolCalling: experimentIds.includes("programmatic-tool-calling"), programmaticToolCallingExclusive: experimentIds.includes("programmatic-tool-calling-exclusive"), execSubagentHardRestart: experimentIds.includes("exec-subagent-hard-restart"), + dynamicWorkflows: experimentIds.includes("dynamic-workflows"), + subagentFileReports: experimentIds.includes("subagent-file-reports"), }; } diff --git a/src/common/config/schemas/taskSettings.ts b/src/common/config/schemas/taskSettings.ts index 2ead3a550c..5d6542ccb7 100644 --- a/src/common/config/schemas/taskSettings.ts +++ b/src/common/config/schemas/taskSettings.ts @@ -1,7 +1,7 @@ import { z } from "zod"; export const TASK_SETTINGS_LIMITS = { - maxParallelAgentTasks: { min: 1, max: 256, default: 3 }, + maxParallelAgentTasks: { min: 1, max: 256, default: 16 }, maxTaskNestingDepth: { min: 1, max: 5, default: 3 }, } as const; diff --git a/src/common/constants/experiments.ts b/src/common/constants/experiments.ts index eebb22e742..793e7fb189 100644 --- a/src/common/constants/experiments.ts +++ b/src/common/constants/experiments.ts @@ -16,6 +16,8 @@ export const EXPERIMENT_IDS = { ADVISOR_TOOL: "advisor-tool", WORKSPACE_HEARTBEATS: "workspace-heartbeats", PORTABLE_DESKTOP: "portable-desktop", + DYNAMIC_WORKFLOWS: "dynamic-workflows", + SUBAGENT_FILE_REPORTS: "subagent-file-reports", } as const; export type ExperimentId = (typeof EXPERIMENT_IDS)[keyof typeof EXPERIMENT_IDS]; @@ -131,6 +133,23 @@ export const EXPERIMENTS: Record = { platformRestriction: ["linux"], showInSettings: true, }, + [EXPERIMENT_IDS.DYNAMIC_WORKFLOWS]: { + id: EXPERIMENT_IDS.DYNAMIC_WORKFLOWS, + name: "Dynamic Workflows", + description: "Enable durable JavaScript workflow orchestration for delegated agent tasks", + enabledByDefault: false, + userOverridable: true, + showInSettings: true, + }, + [EXPERIMENT_IDS.SUBAGENT_FILE_REPORTS]: { + id: EXPERIMENT_IDS.SUBAGENT_FILE_REPORTS, + name: "Subagent File Reports", + description: + "Submit subagent task reports through workspace files (`report.md` and `structured-output.json`)", + enabledByDefault: false, + userOverridable: true, + showInSettings: true, + }, }; function getPlatformDisplayName(platform: NodeJS.Platform): string { diff --git a/src/common/orpc/schemas.ts b/src/common/orpc/schemas.ts index 651822c8dc..510e752f55 100644 --- a/src/common/orpc/schemas.ts +++ b/src/common/orpc/schemas.ts @@ -45,6 +45,7 @@ export { FrontendWorkspaceMetadataSchema, GitStatusSchema, ProjectRefSchema, + WorkflowTaskMetadataSchema, WorkspaceActivitySnapshotSchema, WorkspaceGoalDefaultsOverrideSchema, WorkspaceHeartbeatSettingsSchema, @@ -103,6 +104,22 @@ export { SkillNameSchema, } from "./schemas/agentSkill"; +// Workflow schemas +export { + StructuredTaskOutputSchema, + WorkflowDefinitionDescriptorSchema, + WorkflowDefinitionScopeSchema, + WorkflowEventSequenceSchema, + WorkflowNameSchema, + WorkflowResultSchema, + WorkflowRunEventSchema, + WorkflowRunRecordSchema, + WorkflowRunStatusSchema, + WorkflowRunStatusTransitionSchema, + WorkflowStepRecordSchema, + WorkflowStepStatusSchema, +} from "./schemas/workflow"; + // Instruction context schemas (AGENTS.md, CLAUDE.md, …) export { AdditionalSystemContextSchema, @@ -259,6 +276,7 @@ export { menu, agentSkills, agents, + workflows, nameGeneration, projects, mcpOauth, diff --git a/src/common/orpc/schemas/api.ts b/src/common/orpc/schemas/api.ts index e1120c0700..688f720d1d 100644 --- a/src/common/orpc/schemas/api.ts +++ b/src/common/orpc/schemas/api.ts @@ -71,6 +71,12 @@ import { AgentSkillPackageSchema, SkillNameSchema, } from "./agentSkill"; +import { + WorkflowDefinitionDescriptorSchema, + WorkflowNameSchema, + WorkflowRunRecordSchema, + WorkflowRunStatusSchema, +} from "./workflow"; import { AgentDefinitionDescriptorSchema, AgentDefinitionPackageSchema, @@ -1713,6 +1719,66 @@ export const agentSkills = { }, }; +// Workflows +export const workflows = { + listDefinitions: { + input: z.object({ workspaceId: z.string().min(1) }).strict(), + output: z.array(WorkflowDefinitionDescriptorSchema), + }, + readDefinition: { + input: z.object({ workspaceId: z.string().min(1), name: WorkflowNameSchema }).strict(), + output: z.object({ descriptor: WorkflowDefinitionDescriptorSchema, source: z.string().min(1) }), + }, + listRuns: { + input: z.object({ workspaceId: z.string().min(1) }).strict(), + output: z.array(WorkflowRunRecordSchema), + }, + getRun: { + input: z.object({ workspaceId: z.string().min(1), runId: z.string().min(1) }).strict(), + output: WorkflowRunRecordSchema.nullable(), + }, + interrupt: { + input: z.object({ workspaceId: z.string().min(1), runId: z.string().min(1) }).strict(), + output: WorkflowRunRecordSchema, + }, + resume: { + input: z.object({ workspaceId: z.string().min(1), runId: z.string().min(1) }).strict(), + output: z.object({ + runId: z.string().min(1), + status: WorkflowRunStatusSchema, + result: z.unknown(), + }), + }, + promoteScratch: { + input: z + .object({ + workspaceId: z.string().min(1), + runId: z.string().min(1), + name: WorkflowNameSchema, + description: z.string().min(1).max(1024), + location: z.enum(["project", "global"]), + overwrite: z.boolean().optional(), + }) + .strict(), + output: WorkflowDefinitionDescriptorSchema, + }, + start: { + input: z + .object({ + workspaceId: z.string().min(1), + name: WorkflowNameSchema, + runInBackground: z.boolean().optional(), + args: z.unknown().optional(), + }) + .strict(), + output: z.object({ + runId: z.string().min(1), + status: WorkflowRunStatusSchema, + result: z.unknown(), + }), + }, +}; + // Name generation for new workspaces (decoupled from workspace creation) export const nameGeneration = { generate: { diff --git a/src/common/orpc/schemas/stream.ts b/src/common/orpc/schemas/stream.ts index 7eb0c9d4c4..5aac24def0 100644 --- a/src/common/orpc/schemas/stream.ts +++ b/src/common/orpc/schemas/stream.ts @@ -670,6 +670,8 @@ export const ExperimentsSchema = z.object({ programmaticToolCalling: z.boolean().optional(), programmaticToolCallingExclusive: z.boolean().optional(), advisorTool: z.boolean().optional(), + dynamicWorkflows: z.boolean().optional(), + subagentFileReports: z.boolean().optional(), execSubagentHardRestart: z.boolean().optional(), }); diff --git a/src/common/orpc/schemas/telemetry.ts b/src/common/orpc/schemas/telemetry.ts index aeb0163a58..0de5c1c660 100644 --- a/src/common/orpc/schemas/telemetry.ts +++ b/src/common/orpc/schemas/telemetry.ts @@ -45,6 +45,7 @@ const TelemetryCommandTypeSchema = z.enum([ "plan", "providers", "goal", + "workflow", "btw", ]); diff --git a/src/common/orpc/schemas/workflow.test.ts b/src/common/orpc/schemas/workflow.test.ts new file mode 100644 index 0000000000..107508ca78 --- /dev/null +++ b/src/common/orpc/schemas/workflow.test.ts @@ -0,0 +1,108 @@ +import { describe, expect, test } from "bun:test"; +import { EXPERIMENTS, EXPERIMENT_IDS } from "@/common/constants/experiments"; +import { WorkflowTaskMetadataSchema } from "./workspace"; +import { + WorkflowDefinitionDescriptorSchema, + WorkflowEventSequenceSchema, + WorkflowNameSchema, + WorkflowRunRecordSchema, + WorkflowRunStatusTransitionSchema, +} from "./workflow"; + +describe("workflow domain schemas", () => { + test("accepts a durable workflow run record with ordered events", () => { + const run = WorkflowRunRecordSchema.parse({ + id: "wfr_123", + workspaceId: "workspace-1", + definition: { + name: "deep-research", + description: "Research a topic", + scope: "built-in", + executable: true, + }, + definitionSource: "export default async function workflow() { return null; }", + definitionHash: "sha256:abc123", + args: { topic: "workflow replay" }, + status: "running", + createdAt: "2026-05-29T00:00:00.000Z", + updatedAt: "2026-05-29T00:00:01.000Z", + events: [ + { + sequence: 1, + type: "status", + at: "2026-05-29T00:00:00.000Z", + status: "running", + }, + { + sequence: 2, + type: "phase", + at: "2026-05-29T00:00:01.000Z", + name: "scope", + }, + ], + steps: [], + }); + + expect(run.definition.name).toBe("deep-research"); + expect(run.events.map((event) => event.sequence)).toEqual([1, 2]); + }); + + test("rejects invalid workflow names and non-executable untrusted descriptors", () => { + expect(WorkflowNameSchema.safeParse("bad--name").success).toBe(false); + expect(WorkflowNameSchema.safeParse("DeepResearch").success).toBe(false); + + const result = WorkflowDefinitionDescriptorSchema.safeParse({ + name: "local-workflow", + description: "Project local workflow", + scope: "project", + executable: false, + blockedReason: "Project is not trusted", + }); + + expect(result.success).toBe(true); + }); + + test("rejects out-of-order events", () => { + const result = WorkflowEventSequenceSchema.safeParse([ + { sequence: 2, type: "log", at: "2026-05-29T00:00:00.000Z", message: "late" }, + { sequence: 1, type: "log", at: "2026-05-29T00:00:01.000Z", message: "early" }, + ]); + + expect(result.success).toBe(false); + }); + + test("rejects impossible status transitions", () => { + expect( + WorkflowRunStatusTransitionSchema.safeParse({ from: "completed", to: "running" }).success + ).toBe(false); + expect( + WorkflowRunStatusTransitionSchema.safeParse({ from: "running", to: "interrupted" }).success + ).toBe(true); + }); +}); + +describe("workflow task metadata schema", () => { + test("accepts workflow task metadata with an output schema", () => { + const parsed = WorkflowTaskMetadataSchema.parse({ + runId: "wfr_123", + stepId: "claims", + outputSchema: { type: "object" }, + }); + + expect(parsed).toEqual({ + runId: "wfr_123", + stepId: "claims", + outputSchema: { type: "object" }, + }); + }); +}); + +describe("workflow experiment gate", () => { + test("keeps dynamic workflows opt-in during rollout", () => { + const experiment = EXPERIMENTS[EXPERIMENT_IDS.DYNAMIC_WORKFLOWS]; + + expect(experiment.enabledByDefault).toBe(false); + expect(experiment.userOverridable).toBe(true); + expect(experiment.showInSettings).toBe(true); + }); +}); diff --git a/src/common/orpc/schemas/workflow.ts b/src/common/orpc/schemas/workflow.ts new file mode 100644 index 0000000000..00772c106d --- /dev/null +++ b/src/common/orpc/schemas/workflow.ts @@ -0,0 +1,171 @@ +import { z } from "zod"; + +export const WorkflowNameSchema = z + .string() + .min(1) + .max(64) + .regex(/^[a-z0-9]+(?:-[a-z0-9]+)*$/); + +export const WorkflowDefinitionScopeSchema = z.enum(["project", "global", "built-in", "scratch"]); + +export const WorkflowRunStatusSchema = z.enum([ + "pending", + "running", + "backgrounded", + "interrupted", + "completed", + "failed", +]); + +const IsoDateTimeSchema = z.string().datetime({ offset: true }); +const JsonValueSchema: z.ZodType = z.lazy(() => + z.union([ + z.string(), + z.number(), + z.boolean(), + z.null(), + z.array(JsonValueSchema), + z.record(z.string(), JsonValueSchema), + ]) +); + +export const WorkflowDefinitionDescriptorSchema = z + .object({ + name: WorkflowNameSchema, + description: z.string().min(1).max(1024), + scope: WorkflowDefinitionScopeSchema, + sourcePath: z.string().min(1).optional(), + executable: z.boolean(), + blockedReason: z.string().min(1).optional(), + }) + .refine((value) => value.executable || value.blockedReason != null, { + message: "Non-executable workflow definitions must include a blocked reason", + path: ["blockedReason"], + }); + +export const WorkflowResultSchema = z.object({ + reportMarkdown: z.string(), + structuredOutput: JsonValueSchema.optional(), +}); + +export const StructuredTaskOutputSchema = z.object({ + reportMarkdown: z.string(), + title: z.string().min(1).nullable().optional(), + structuredOutput: JsonValueSchema.optional(), +}); + +export const WorkflowRunEventSchema = z.discriminatedUnion("type", [ + z.object({ + sequence: z.number().int().positive(), + type: z.literal("status"), + at: IsoDateTimeSchema, + status: WorkflowRunStatusSchema, + }), + z.object({ + sequence: z.number().int().positive(), + type: z.literal("phase"), + at: IsoDateTimeSchema, + name: z.string().min(1), + details: JsonValueSchema.optional(), + }), + z.object({ + sequence: z.number().int().positive(), + type: z.literal("log"), + at: IsoDateTimeSchema, + message: z.string().min(1), + data: JsonValueSchema.optional(), + }), + z.object({ + sequence: z.number().int().positive(), + type: z.literal("task"), + at: IsoDateTimeSchema, + stepId: z.string().min(1), + taskId: z.string().min(1), + status: z.string().min(1), + }), + z.object({ + sequence: z.number().int().positive(), + type: z.literal("validation"), + at: IsoDateTimeSchema, + stepId: z.string().min(1), + success: z.boolean(), + message: z.string().min(1).optional(), + }), + z.object({ + sequence: z.number().int().positive(), + type: z.literal("result"), + at: IsoDateTimeSchema, + result: WorkflowResultSchema, + }), + z.object({ + sequence: z.number().int().positive(), + type: z.literal("error"), + at: IsoDateTimeSchema, + message: z.string().min(1), + }), +]); + +export const WorkflowEventSequenceSchema = z + .array(WorkflowRunEventSchema) + .superRefine((events, ctx) => { + let previousSequence = 0; + for (const [index, event] of events.entries()) { + if (event.sequence <= previousSequence) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: "Workflow events must be strictly ordered by increasing sequence", + path: [index, "sequence"], + }); + } + previousSequence = event.sequence; + } + }); + +export const WorkflowStepStatusSchema = z.enum(["started", "completed", "failed", "interrupted"]); + +export const WorkflowStepRecordSchema = z.object({ + stepId: z.string().min(1), + inputHash: z.string().min(1), + status: WorkflowStepStatusSchema, + taskId: z.string().min(1).optional(), + startedAt: IsoDateTimeSchema, + completedAt: IsoDateTimeSchema.optional(), + result: StructuredTaskOutputSchema.optional(), + error: z.string().min(1).optional(), +}); + +const WorkflowRunStatusTransitions: Record< + z.infer, + ReadonlyArray> +> = { + pending: ["running", "backgrounded", "interrupted", "failed"], + running: ["backgrounded", "interrupted", "completed", "failed"], + backgrounded: ["running", "interrupted", "completed", "failed"], + interrupted: ["running", "failed"], + completed: [], + failed: [], +}; + +export const WorkflowRunStatusTransitionSchema = z + .object({ + from: WorkflowRunStatusSchema, + to: WorkflowRunStatusSchema, + }) + .refine((transition) => WorkflowRunStatusTransitions[transition.from].includes(transition.to), { + message: "Invalid workflow run status transition", + path: ["to"], + }); + +export const WorkflowRunRecordSchema = z.object({ + id: z.string().min(1), + workspaceId: z.string().min(1), + definition: WorkflowDefinitionDescriptorSchema, + definitionSource: z.string().min(1), + definitionHash: z.string().min(1), + args: JsonValueSchema, + status: WorkflowRunStatusSchema, + createdAt: IsoDateTimeSchema, + updatedAt: IsoDateTimeSchema, + events: WorkflowEventSequenceSchema, + steps: z.array(WorkflowStepRecordSchema), +}); diff --git a/src/common/orpc/schemas/workspace.ts b/src/common/orpc/schemas/workspace.ts index 88ba304ca2..e357804b96 100644 --- a/src/common/orpc/schemas/workspace.ts +++ b/src/common/orpc/schemas/workspace.ts @@ -86,6 +86,14 @@ export const WorkspaceHeartbeatSettingsSchema = z.object({ }), }); +export const WorkflowTaskMetadataSchema = z.object({ + runId: z.string().min(1).meta({ description: "Workflow run that spawned this task." }), + stepId: z.string().min(1).meta({ description: "Workflow step that spawned this task." }), + outputSchema: z.unknown().optional().meta({ + description: "Optional JSON Schema subset required for this task's structured output.", + }), +}); + export const WorkspaceMetadataSchema = z.object({ id: z.string().meta({ description: @@ -143,6 +151,9 @@ export const WorkspaceMetadataSchema = z.object({ description: 'If set, selects an agent definition for this workspace (e.g., "explore" or "exec").', }), + workflowTask: WorkflowTaskMetadataSchema.optional().meta({ + description: "Workflow run/step metadata for workflow-spawned child tasks.", + }), bestOf: BestOfGroupSchema.optional().meta({ description: "Grouping metadata for child tasks spawned from the same parent tool call.", }), diff --git a/src/common/schemas/project.ts b/src/common/schemas/project.ts index 0de08229a5..a9437bd92e 100644 --- a/src/common/schemas/project.ts +++ b/src/common/schemas/project.ts @@ -54,6 +54,14 @@ export const WorktreeArchiveSnapshotSchema = z.object({ }), }); +export const WorkflowTaskMetadataSchema = z.object({ + runId: z.string().min(1).meta({ description: "Workflow run that spawned this task." }), + stepId: z.string().min(1).meta({ description: "Workflow step that spawned this task." }), + outputSchema: z.unknown().optional().meta({ + description: "Optional JSON Schema subset required for this task's structured output.", + }), +}); + export const WorkspaceConfigSchema = z.object({ path: z.string().meta({ description: "Absolute path to workspace directory - REQUIRED for backward compatibility", @@ -107,6 +115,9 @@ export const WorkspaceConfigSchema = z.object({ description: 'If set, selects an agent definition for this workspace (e.g., "explore" or "exec").', }), + workflowTask: WorkflowTaskMetadataSchema.optional().meta({ + description: "Workflow run/step metadata for workflow-spawned child tasks.", + }), bestOf: BestOfGroupSchema.optional().meta({ description: "Grouping metadata for child tasks spawned from the same parent tool call.", }), @@ -135,6 +146,8 @@ export const WorkspaceConfigSchema = z.object({ programmaticToolCalling: z.boolean().optional(), programmaticToolCallingExclusive: z.boolean().optional(), advisorTool: z.boolean().optional(), + dynamicWorkflows: z.boolean().optional(), + subagentFileReports: z.boolean().optional(), execSubagentHardRestart: z.boolean().optional(), }) .optional() diff --git a/src/common/telemetry/payload.ts b/src/common/telemetry/payload.ts index 7868afcb87..1d8dbe1371 100644 --- a/src/common/telemetry/payload.ts +++ b/src/common/telemetry/payload.ts @@ -300,7 +300,8 @@ export type TelemetryCommandType = | "plan" | "providers" | "goal" - | "btw"; + | "btw" + | "workflow"; /** * Command usage event - tracks slash command usage patterns diff --git a/src/common/types/tasks.test.ts b/src/common/types/tasks.test.ts index 1fefdee3e8..8a7c31cf48 100644 --- a/src/common/types/tasks.test.ts +++ b/src/common/types/tasks.test.ts @@ -44,6 +44,11 @@ describe("normalizeTaskSettings", () => { expect(normalizeTaskSettings({})).toEqual(DEFAULT_TASK_SETTINGS); }); + test("uses sixteen parallel agent tasks by default while preserving explicit values", () => { + expect(normalizeTaskSettings(undefined).maxParallelAgentTasks).toBe(16); + expect(normalizeTaskSettings({ maxParallelAgentTasks: 4 }).maxParallelAgentTasks).toBe(4); + }); + test("defaults include preserveSubagentsUntilArchive: false", () => { const normalized = normalizeTaskSettings(undefined); expect(normalized.preserveSubagentsUntilArchive).toBe(false); diff --git a/src/common/types/tools.ts b/src/common/types/tools.ts index fb76014e9f..883e3181b6 100644 --- a/src/common/types/tools.ts +++ b/src/common/types/tools.ts @@ -30,6 +30,7 @@ import type { TaskTerminateToolResultSchema, TOOL_DEFINITIONS, WebFetchToolResultSchema, + WorkflowRunToolResultSchema, } from "@/common/utils/tools/toolDefinitions"; // Bash Tool Types, derived from schema (avoid drift) @@ -259,6 +260,13 @@ export type TaskTerminateToolArgs = z.infer; +// Workflow Run Tool Types +export type WorkflowRunToolArgs = z.infer; + +export type WorkflowRunToolSuccessResult = z.infer; + +export type WorkflowRunToolResult = WorkflowRunToolSuccessResult | ToolErrorResult; + // Agent Report Tool Types export type AgentReportToolArgs = z.infer; diff --git a/src/common/types/workflow.ts b/src/common/types/workflow.ts new file mode 100644 index 0000000000..9d814eccbf --- /dev/null +++ b/src/common/types/workflow.ts @@ -0,0 +1,36 @@ +import type { z } from "zod"; +import type { + StructuredTaskOutputSchema, + WorkflowDefinitionDescriptorSchema, + WorkflowDefinitionScopeSchema, + WorkflowNameSchema, + WorkflowResultSchema, + WorkflowRunEventSchema, + WorkflowRunRecordSchema, + WorkflowRunStatusSchema, + WorkflowStepRecordSchema, + WorkflowStepStatusSchema, +} from "@/common/orpc/schemas"; +import { WorkflowRunStatusTransitionSchema } from "@/common/orpc/schemas"; +import assert from "@/common/utils/assert"; + +export type WorkflowName = z.infer; +export type WorkflowDefinitionScope = z.infer; +export type WorkflowRunStatus = z.infer; +export type WorkflowStepStatus = z.infer; +export type WorkflowDefinitionDescriptor = z.infer; +export type WorkflowResult = z.infer; +export type StructuredTaskOutput = z.infer; +export type WorkflowRunEvent = z.infer; +export type WorkflowStepRecord = z.infer; +export type WorkflowRunRecord = z.infer; + +export function assertWorkflowRunStatusTransition( + from: WorkflowRunStatus, + to: WorkflowRunStatus +): void { + assert( + WorkflowRunStatusTransitionSchema.safeParse({ from, to }).success, + `Invalid workflow run status transition: ${from} -> ${to}` + ); +} diff --git a/src/common/utils/jsonSchemaSubset.test.ts b/src/common/utils/jsonSchemaSubset.test.ts new file mode 100644 index 0000000000..9f02ae1c9b --- /dev/null +++ b/src/common/utils/jsonSchemaSubset.test.ts @@ -0,0 +1,127 @@ +import { describe, expect, test } from "bun:test"; +import { validateJsonSchemaSubset, validateJsonSchemaSubsetSchema } from "./jsonSchemaSubset"; + +describe("validateJsonSchemaSubset", () => { + test("validates schemas without requiring an example value", () => { + expect( + validateJsonSchemaSubsetSchema({ + type: "object", + required: ["summary"], + properties: { summary: { type: "string" } }, + additionalProperties: false, + }) + ).toEqual({ success: true }); + + expect(validateJsonSchemaSubsetSchema({ type: ["string", "null"] })).toEqual({ + success: false, + errors: [{ path: "$", message: "Unsupported JSON Schema type union" }], + }); + }); + + test("accepts nested objects that satisfy required properties and primitive types", () => { + const result = validateJsonSchemaSubset( + { + type: "object", + required: ["claims"], + properties: { + claims: { + type: "array", + items: { + type: "object", + required: ["text", "confidence"], + properties: { + text: { type: "string" }, + confidence: { type: "number" }, + }, + additionalProperties: false, + }, + }, + }, + additionalProperties: false, + }, + { claims: [{ text: "Workflow runs are durable", confidence: 0.8 }] } + ); + + expect(result).toEqual({ success: true }); + }); + + test("returns actionable paths for missing required properties and type errors", () => { + const result = validateJsonSchemaSubset( + { + type: "object", + required: ["summary", "sources"], + properties: { + summary: { type: "string" }, + sources: { type: "array", items: { type: "string" } }, + }, + }, + { sources: ["one", 2] } + ); + + expect(result).toEqual({ + success: false, + errors: [ + { path: "$.summary", message: "Required property is missing" }, + { path: "$.sources[1]", message: "Expected string, got number" }, + ], + }); + }); + + test("rejects unsupported schema keywords instead of ignoring them", () => { + const result = validateJsonSchemaSubset({ type: "string", pattern: "^ok$" }, "ok"); + + expect(result).toEqual({ + success: false, + errors: [{ path: "$", message: "Unsupported JSON Schema keyword: pattern" }], + }); + }); + + test("rejects JSON Schema type unions instead of skipping type validation", () => { + const result = validateJsonSchemaSubset({ type: ["string", "null"] }, 42); + + expect(result).toEqual({ + success: false, + errors: [{ path: "$", message: "Unsupported JSON Schema type union" }], + }); + }); + + test("rejects schema-valued additionalProperties instead of ignoring extra values", () => { + const result = validateJsonSchemaSubset( + { type: "object", additionalProperties: { type: "string" } }, + { extra: 42 } + ); + + expect(result).toEqual({ + success: false, + errors: [ + { + path: "$.additionalProperties", + message: "Unsupported JSON Schema additionalProperties schema", + }, + ], + }); + }); + + test("supports enum, integer, and additionalProperties false", () => { + const result = validateJsonSchemaSubset( + { + type: "object", + properties: { + status: { enum: ["pass", "fail"] }, + count: { type: "integer" }, + }, + additionalProperties: false, + }, + { status: "maybe", count: 1.5, extra: true } + ); + + expect(result).toEqual({ + success: false, + errors: [ + { path: "$.status", message: "Expected one of: pass, fail" }, + { path: "$.count", message: "Expected integer, got number" }, + { path: "$.extra", message: "Additional property is not allowed" }, + ], + }); + }); +}); diff --git a/src/common/utils/jsonSchemaSubset.ts b/src/common/utils/jsonSchemaSubset.ts new file mode 100644 index 0000000000..ab0243227f --- /dev/null +++ b/src/common/utils/jsonSchemaSubset.ts @@ -0,0 +1,217 @@ +export interface JsonSchemaValidationError { + path: string; + message: string; +} + +export type JsonSchemaSubsetValidationResult = + | { success: true } + | { success: false; errors: JsonSchemaValidationError[] }; + +const SUPPORTED_SCHEMA_KEYWORDS = new Set([ + "type", + "properties", + "required", + "items", + "additionalProperties", + "enum", +]); + +export function validateJsonSchemaSubsetSchema(schema: unknown): JsonSchemaSubsetValidationResult { + if (!isPlainRecord(schema)) { + return { success: false, errors: [{ path: "$", message: "Schema must be an object" }] }; + } + + const errors: JsonSchemaValidationError[] = []; + collectUnsupportedKeywordErrors(schema, "$", errors); + return errors.length === 0 ? { success: true } : { success: false, errors }; +} + +export function validateJsonSchemaSubset( + schema: unknown, + value: unknown +): JsonSchemaSubsetValidationResult { + const schemaValidation = validateJsonSchemaSubsetSchema(schema); + if (!schemaValidation.success) { + return schemaValidation; + } + + const errors: JsonSchemaValidationError[] = []; + validateValue(schema, value, "$", errors); + return errors.length === 0 ? { success: true } : { success: false, errors }; +} + +function validateValue( + schema: unknown, + value: unknown, + path: string, + errors: JsonSchemaValidationError[] +): void { + if (!isPlainRecord(schema)) { + errors.push({ path, message: "Schema must be an object" }); + return; + } + + if (Array.isArray(schema.enum) && !schema.enum.some((candidate) => Object.is(candidate, value))) { + errors.push({ path, message: `Expected one of: ${schema.enum.map(String).join(", ")}` }); + } + + if (typeof schema.type === "string") { + validateType(schema.type, value, path, errors); + } + + if (schema.type === "object" && isPlainRecord(value)) { + validateObject(schema, value, path, errors); + } + + if (schema.type === "array" && Array.isArray(value)) { + validateArray(schema, value, path, errors); + } +} + +function validateObject( + schema: Record, + value: Record, + path: string, + errors: JsonSchemaValidationError[] +): void { + const properties = isPlainRecord(schema.properties) ? schema.properties : {}; + const required = Array.isArray(schema.required) ? schema.required : []; + + for (const property of required) { + if (typeof property !== "string") { + errors.push({ path, message: "Required property names must be strings" }); + continue; + } + if (!(property in value)) { + errors.push({ path: `${path}.${property}`, message: "Required property is missing" }); + } + } + + for (const [property, propertySchema] of Object.entries(properties)) { + if (property in value) { + validateValue(propertySchema, value[property], `${path}.${property}`, errors); + } + } + + if (schema.additionalProperties === false) { + const allowedProperties = new Set(Object.keys(properties)); + for (const property of Object.keys(value)) { + if (!allowedProperties.has(property)) { + errors.push({ path: `${path}.${property}`, message: "Additional property is not allowed" }); + } + } + } +} + +function validateArray( + schema: Record, + value: unknown[], + path: string, + errors: JsonSchemaValidationError[] +): void { + if (schema.items == null) { + return; + } + + for (const [index, item] of value.entries()) { + validateValue(schema.items, item, `${path}[${index}]`, errors); + } +} + +function validateType( + type: string, + value: unknown, + path: string, + errors: JsonSchemaValidationError[] +): void { + switch (type) { + case "object": + if (!isPlainRecord(value)) { + errors.push({ path, message: `Expected object, got ${getJsonType(value)}` }); + } + return; + case "array": + if (!Array.isArray(value)) { + errors.push({ path, message: `Expected array, got ${getJsonType(value)}` }); + } + return; + case "string": + if (typeof value !== "string") { + errors.push({ path, message: `Expected string, got ${getJsonType(value)}` }); + } + return; + case "number": + if (typeof value !== "number" || !Number.isFinite(value)) { + errors.push({ path, message: `Expected number, got ${getJsonType(value)}` }); + } + return; + case "integer": + if (typeof value !== "number" || !Number.isInteger(value)) { + errors.push({ path, message: `Expected integer, got ${getJsonType(value)}` }); + } + return; + case "boolean": + if (typeof value !== "boolean") { + errors.push({ path, message: `Expected boolean, got ${getJsonType(value)}` }); + } + return; + case "null": + if (value !== null) { + errors.push({ path, message: `Expected null, got ${getJsonType(value)}` }); + } + return; + default: + errors.push({ path, message: `Unsupported JSON Schema type: ${type}` }); + } +} + +function collectUnsupportedKeywordErrors( + schema: unknown, + path: string, + errors: JsonSchemaValidationError[] +): void { + if (!isPlainRecord(schema)) { + return; + } + + for (const key of Object.keys(schema)) { + if (!SUPPORTED_SCHEMA_KEYWORDS.has(key)) { + errors.push({ path, message: `Unsupported JSON Schema keyword: ${key}` }); + } + } + + if (Array.isArray(schema.type)) { + errors.push({ path, message: "Unsupported JSON Schema type union" }); + } + + if ( + schema.additionalProperties != null && + schema.additionalProperties !== true && + schema.additionalProperties !== false + ) { + errors.push({ + path: `${path}.additionalProperties`, + message: "Unsupported JSON Schema additionalProperties schema", + }); + } + + if (isPlainRecord(schema.properties)) { + for (const [property, propertySchema] of Object.entries(schema.properties)) { + collectUnsupportedKeywordErrors(propertySchema, `${path}.${property}`, errors); + } + } + + if (schema.items != null) { + collectUnsupportedKeywordErrors(schema.items, `${path}[]`, errors); + } +} + +function isPlainRecord(value: unknown): value is Record { + return value != null && typeof value === "object" && !Array.isArray(value); +} + +function getJsonType(value: unknown): string { + if (value === null) return "null"; + if (Array.isArray(value)) return "array"; + return typeof value; +} diff --git a/src/common/utils/tools/toolDefinitions.test.ts b/src/common/utils/tools/toolDefinitions.test.ts index 9ba385c9ed..aa7eeadd90 100644 --- a/src/common/utils/tools/toolDefinitions.test.ts +++ b/src/common/utils/tools/toolDefinitions.test.ts @@ -410,6 +410,15 @@ describe("TOOL_DEFINITIONS", () => { expect(tools).toContain("skills_catalog_read"); }); + it("only includes workflow_run when dynamic workflows are enabled", () => { + expect(getAvailableTools("openai:gpt-4o", { enableDynamicWorkflows: false })).not.toContain( + "workflow_run" + ); + expect(getAvailableTools("openai:gpt-4o", { enableDynamicWorkflows: true })).toContain( + "workflow_run" + ); + }); + it("agent_skill_write schema rejects an advertise tool argument (advertise is authored in content)", () => { const parsed = TOOL_DEFINITIONS.agent_skill_write.schema.safeParse({ name: "demo-skill", diff --git a/src/common/utils/tools/toolDefinitions.ts b/src/common/utils/tools/toolDefinitions.ts index c96292cb33..9fcb66a05d 100644 --- a/src/common/utils/tools/toolDefinitions.ts +++ b/src/common/utils/tools/toolDefinitions.ts @@ -27,7 +27,15 @@ */ import { z } from "zod"; -import { AgentIdSchema, AgentSkillPackageSchema, SkillNameSchema } from "@/common/orpc/schemas"; +import { + AgentIdSchema, + AgentSkillPackageSchema, + SkillNameSchema, + WorkflowDefinitionDescriptorSchema, + WorkflowNameSchema, + WorkflowRunRecordSchema, + WorkflowRunStatusSchema, +} from "@/common/orpc/schemas"; import { RUNTIME_MODE, type RuntimeMode } from "@/common/types/runtime"; import { BASH_HARD_MAX_LINES, @@ -323,6 +331,7 @@ const TaskToolCompletedReportSchema = z taskId: z.string(), reportMarkdown: z.string(), title: z.string().optional(), + structuredOutput: z.unknown().optional(), agentId: z.string().optional(), agentType: z.string().optional(), groupKind: z.enum(TASK_GROUP_KIND_VALUES).optional(), @@ -364,6 +373,7 @@ export const TaskToolCompletedResultSchema = z taskIds: z.array(z.string()).min(1).optional(), reportMarkdown: z.string().optional(), title: z.string().optional(), + structuredOutput: z.unknown().optional(), agentId: z.string().optional(), agentType: z.string().optional(), reports: z.array(TaskToolCompletedReportSchema).min(1).optional(), @@ -527,6 +537,7 @@ export const TaskAwaitToolCompletedResultSchema = z status: z.literal("completed"), taskId: z.string(), reportMarkdown: z.string(), + structuredOutput: z.unknown().optional(), title: z.string().optional(), output: z.string().optional(), elapsed_ms: z.number().optional(), @@ -770,18 +781,121 @@ export const TaskListToolResultSchema = z }) .strict(); +// ----------------------------------------------------------------------------- +// workflow_run (durable workflow orchestration) +// ----------------------------------------------------------------------------- + +export const WorkflowListToolArgsSchema = z.object({}).strict(); + +export const WorkflowListToolResultSchema = z + .object({ + workflows: z.array(WorkflowDefinitionDescriptorSchema), + }) + .strict(); + +export const WorkflowReadToolArgsSchema = z + .object({ + name: WorkflowNameSchema, + }) + .strict(); + +export const WorkflowReadToolResultSchema = z + .object({ + descriptor: WorkflowDefinitionDescriptorSchema, + source: z.string().min(1), + }) + .strict(); + +export const WorkflowWriteToolArgsSchema = z + .object({ + name: WorkflowNameSchema, + description: z.string().min(1).max(1024), + source: z.string().min(1), + }) + .strict(); + +export const WorkflowWriteToolResultSchema = z + .object({ + descriptor: WorkflowDefinitionDescriptorSchema, + }) + .strict(); + +export const WorkflowRunToolArgsSchema = z + .object({ + name: WorkflowNameSchema, + args: z.unknown().nullish(), + run_in_background: z.boolean().nullish().default(false), + }) + .strict(); + +export const WorkflowRunToolResultSchema = z + .object({ + status: WorkflowRunStatusSchema, + runId: z.string().min(1), + result: z.unknown(), + run: WorkflowRunRecordSchema.optional(), + }) + .strict(); + // ----------------------------------------------------------------------------- // agent_report (explicit subagent -> parent report) // ----------------------------------------------------------------------------- -export const AgentReportToolArgsSchema = z +export const AgentReportInlineToolArgsSchema = z .object({ reportMarkdown: z.string().min(1), + structuredOutput: z.unknown().nullish(), + title: z.string().nullish(), + }) + .strict(); + +export const AgentReportFileToolArgsSchema = z + .object({ + reportMarkdownPath: z + .string() + .min(1) + .nullish() + .describe("Path to the markdown report file, usually report.md in the workspace root"), + structuredOutputPath: z + .string() + .min(1) + .nullish() + .describe( + "Path to a JSON file containing the structured output, usually structured-output.json" + ), title: z.string().nullish(), }) .strict(); -export const AgentReportToolResultSchema = z.object({ success: z.literal(true) }).strict(); +export const AgentReportToolArgsSchema = z.union([ + AgentReportInlineToolArgsSchema, + AgentReportFileToolArgsSchema, +]); + +export const AgentReportSubmittedReportSchema = z + .object({ + reportMarkdown: z.string().min(1), + structuredOutput: z.unknown().optional(), + title: z.string().min(1).optional(), + }) + .strict(); + +export const AgentReportToolResultSchema = z.discriminatedUnion("success", [ + z + .object({ + success: z.literal(true), + message: z.string().min(1).optional(), + report: AgentReportSubmittedReportSchema.optional(), + }) + .strict(), + z + .object({ + success: z.literal(false), + message: z.string().min(1), + errors: z.array(z.object({ path: z.string().min(1), message: z.string().min(1) })).min(1), + }) + .strict(), +]); const FILE_TOOL_PATH = z .string() .describe("Path to the file to edit (absolute or relative to the current workspace)"); @@ -1444,6 +1558,26 @@ export const TOOL_DEFINITIONS = { "This is a discovery tool, NOT a waiting mechanism. If the current request actually depends on a task's output, call task_await with the specific task IDs you need; do not await all active tasks just because they appear here.", schema: TaskListToolArgsSchema, }, + workflow_list: { + description: + "List durable workflow definitions available in this workspace. Use this before workflow_run when you do not already know the workflow name.", + schema: WorkflowListToolArgsSchema, + }, + workflow_read: { + description: + "Read a durable workflow definition's descriptor and source by name. Use this to inspect expected args or understand a workflow before running it.", + schema: WorkflowReadToolArgsSchema, + }, + workflow_write: { + description: + "Write or replace a session-scoped scratch workflow definition. The workflow becomes runnable by name in this workspace, but is not promoted to project or global workflows; only the user can promote it from the UI.", + schema: WorkflowWriteToolArgsSchema, + }, + workflow_run: { + description: + "Start a durable workflow run by workflow name. Workflows coordinate delegated agent tasks and preserve run state for replay/resume.", + schema: WorkflowRunToolArgsSchema, + }, agent_report: { description: "Report the final result of a sub-agent task back to the parent workspace. " + @@ -2174,6 +2308,7 @@ export function getAvailableTools( enableAgentReport?: boolean; enableAnalyticsQuery?: boolean; enableAdvisor?: boolean; + enableDynamicWorkflows?: boolean; /** @deprecated Mux global tools are always included. */ enableMuxGlobalAgentsTools?: boolean; } @@ -2182,6 +2317,7 @@ export function getAvailableTools( const enableAgentReport = options?.enableAgentReport ?? true; const enableAnalyticsQuery = options?.enableAnalyticsQuery ?? true; const enableAdvisor = options?.enableAdvisor ?? false; + const enableDynamicWorkflows = options?.enableDynamicWorkflows ?? false; // Base tools available for all models // Note: Tool availability is controlled by agent tool policy (allowlist), not mode checks here. @@ -2219,6 +2355,9 @@ export function getAvailableTools( "task_apply_git_patch", "task_terminate", "task_list", + ...(enableDynamicWorkflows + ? ["workflow_list", "workflow_read", "workflow_write", "workflow_run"] + : []), ...(enableAgentReport ? ["agent_report"] : []), "get_goal", "complete_goal", diff --git a/src/common/utils/tools/tools.test.ts b/src/common/utils/tools/tools.test.ts index 1a630f4b37..b0a8419f19 100644 --- a/src/common/utils/tools/tools.test.ts +++ b/src/common/utils/tools/tools.test.ts @@ -1,3 +1,4 @@ +/* eslint-disable @typescript-eslint/require-await */ import { describe, expect, mock, test } from "bun:test"; import { z } from "zod"; @@ -89,6 +90,68 @@ describe("getToolsForModel", () => { expect(toolsWithReport.agent_report).toBeDefined(); }); + test("only includes workflow tools when dynamic workflows service and experiment are enabled", async () => { + const runtime = new LocalRuntime(process.cwd()); + const initStateManager = createInitStateManager(); + + const withoutExperiment = await getToolsForModel( + "noop:model", + { + cwd: process.cwd(), + runtime, + runtimeTempDir: "/tmp", + workspaceId: "ws-1", + workflowService: { + listDefinitions: mock(async () => []), + readDefinition: mock(async () => ({ + descriptor: { name: "demo", description: "Demo", scope: "built-in", executable: true }, + source: "export default function workflow() { return null; }", + })), + startNamedWorkflow: mock(async () => ({ + runId: "wfr_1", + status: "completed" as const, + result: null, + })), + }, + }, + "ws-1", + initStateManager + ); + expect(withoutExperiment.workflow_list).toBeUndefined(); + expect(withoutExperiment.workflow_read).toBeUndefined(); + expect(withoutExperiment.workflow_write).toBeUndefined(); + expect(withoutExperiment.workflow_run).toBeUndefined(); + + const withExperiment = await getToolsForModel( + "noop:model", + { + cwd: process.cwd(), + runtime, + runtimeTempDir: "/tmp", + workspaceId: "ws-1", + experiments: { dynamicWorkflows: true }, + workflowService: { + listDefinitions: mock(async () => []), + readDefinition: mock(async () => ({ + descriptor: { name: "demo", description: "Demo", scope: "built-in", executable: true }, + source: "export default function workflow() { return null; }", + })), + startNamedWorkflow: mock(async () => ({ + runId: "wfr_1", + status: "completed" as const, + result: null, + })), + }, + }, + "ws-1", + initStateManager + ); + expect(withExperiment.workflow_list).toBeDefined(); + expect(withExperiment.workflow_read).toBeDefined(); + expect(withExperiment.workflow_write).toBeDefined(); + expect(withExperiment.workflow_run).toBeDefined(); + }); + test("includes desktop tools when workspace capability is available", async () => { const runtime = new LocalRuntime(process.cwd()); const initStateManager = createInitStateManager(); diff --git a/src/common/utils/tools/tools.ts b/src/common/utils/tools/tools.ts index a506b59bf0..cc37d66d98 100644 --- a/src/common/utils/tools/tools.ts +++ b/src/common/utils/tools/tools.ts @@ -40,6 +40,12 @@ import { createMuxAgentsReadTool } from "@/node/services/tools/mux_agents_read"; import { createMuxAgentsWriteTool } from "@/node/services/tools/mux_agents_write"; import { createMuxConfigReadTool } from "@/node/services/tools/mux_config_read"; import { createMuxConfigWriteTool } from "@/node/services/tools/mux_config_write"; +import { + createWorkflowListTool, + createWorkflowReadTool, + createWorkflowWriteTool, +} from "@/node/services/tools/workflow_definitions"; +import { createWorkflowRunTool } from "@/node/services/tools/workflow_run"; import { createAgentReportTool } from "@/node/services/tools/agent_report"; import { wrapWithInitWait } from "@/node/services/tools/wrapWithInitWait"; import { withHooks, type HookConfig } from "@/node/services/tools/withHooks"; @@ -138,6 +144,34 @@ export interface ToolConfiguration { reportModelUsage?: (event: ToolModelUsageEvent) => void; /** Task orchestration for sub-agent tasks */ taskService?: TaskService; + /** Durable workflow lifecycle service for dynamic workflow tools. */ + workflowService?: { + listDefinitions(options: { projectTrusted: boolean }): Promise; + readDefinition(input: { + name: string; + projectTrusted: boolean; + }): Promise<{ descriptor: unknown; source: string }>; + writeScratchWorkflow?(input: { + workspaceId: string; + name: string; + description: string; + source: string; + }): Promise; + getRun?(input: { workspaceId: string; runId: string }): Promise; + startNamedWorkflowInBackground?(input: { + name: string; + workspaceId: string; + projectTrusted: boolean; + args: unknown; + }): Promise<{ runId: string; status: string; result: unknown }>; + startNamedWorkflow(input: { + name: string; + workspaceId: string; + projectTrusted: boolean; + args: unknown; + abortSignal?: AbortSignal; + }): Promise<{ runId: string; status: string; result: unknown }>; + }; /** Workspace goal lifecycle service for model-facing goal tools. */ goalService?: WorkspaceGoalService; /** Per-request goal tool gates derived from goal status and agent capabilities. */ @@ -145,6 +179,10 @@ export interface ToolConfiguration { getGoal: boolean; completeGoal: boolean; }; + /** Optional JSON Schema subset required by a workflow-spawned task report. */ + workflowAgentOutputSchema?: unknown; + /** When true, subagent reports are submitted by paths to report.md/structured-output.json. */ + subagentReportFiles?: boolean; /** Enable agent_report tool (only valid for child task workspaces) */ enableAgentReport?: boolean; /** Experiments inherited from parent (for subagent spawning) */ @@ -153,6 +191,8 @@ export interface ToolConfiguration { programmaticToolCallingExclusive?: boolean; advisorTool?: boolean; execSubagentHardRestart?: boolean; + dynamicWorkflows?: boolean; + subagentFileReports?: boolean; }; /** Available sub-agents for the task tool description (dynamic context) */ availableSubagents?: AgentDefinitionDescriptor[]; @@ -455,6 +495,14 @@ export async function getToolsForModel( // (workspaceStatusGenerator.ts), which create the tool inline. Exposing // them in the default toolset would let exec-derived agents see their // "call me immediately" descriptions. + ...(config.workflowService && config.experiments?.dynamicWorkflows + ? { + workflow_list: createWorkflowListTool(config), + workflow_read: createWorkflowReadTool(config), + workflow_write: createWorkflowWriteTool(config), + workflow_run: createWorkflowRunTool(config), + } + : {}), ...(config.enableAgentReport ? { agent_report: createAgentReportTool(config) } : {}), ...(config.goalService && config.enableGoalTools?.getGoal ? { get_goal: createGetGoalTool(config) } @@ -566,6 +614,9 @@ export async function getToolsForModel( getAvailableTools(modelString, { enableAgentReport: config.enableAgentReport, enableAnalyticsQuery: Boolean(config.analyticsService), + enableDynamicWorkflows: Boolean( + config.workflowService && config.experiments?.dynamicWorkflows + ), enableAdvisor: Boolean(config.advisorRuntime), // Mux global tools are always created; tool policy (agent frontmatter) // controls which agents can actually use them. diff --git a/src/constants/slashCommands.ts b/src/constants/slashCommands.ts index c21b8363e4..d612372a72 100644 --- a/src/constants/slashCommands.ts +++ b/src/constants/slashCommands.ts @@ -14,6 +14,7 @@ export const WORKSPACE_ONLY_COMMAND_KEYS: ReadonlySet = new Set([ "plan", "heartbeat", "btw", + "workflow", ]); /** @@ -35,4 +36,5 @@ export const WORKSPACE_ONLY_COMMAND_TYPES: ReadonlySet = new Set([ "goal-complete", "goal-clear", "side-question", + "workflow-run", ]); diff --git a/src/node/config.ts b/src/node/config.ts index c964edf5ba..36aa2de4f3 100644 --- a/src/node/config.ts +++ b/src/node/config.ts @@ -1552,6 +1552,7 @@ export class Config { parentWorkspaceId: workspace.parentWorkspaceId, agentType: workspace.agentType, agentId: workspace.agentId, + workflowTask: workspace.workflowTask, bestOf: workspace.bestOf, taskStatus: workspace.taskStatus, reportedAt: workspace.reportedAt, @@ -1650,6 +1651,7 @@ export class Config { metadata.parentWorkspaceId ??= workspace.parentWorkspaceId; metadata.agentType ??= workspace.agentType; metadata.agentId ??= workspace.agentId; + metadata.workflowTask ??= workspace.workflowTask; metadata.bestOf ??= workspace.bestOf; metadata.taskStatus ??= workspace.taskStatus; metadata.reportedAt ??= workspace.reportedAt; @@ -1719,6 +1721,7 @@ export class Config { parentWorkspaceId: workspace.parentWorkspaceId, agentType: workspace.agentType, agentId: workspace.agentId, + workflowTask: workspace.workflowTask, bestOf: workspace.bestOf, taskStatus: workspace.taskStatus, reportedAt: workspace.reportedAt, @@ -1770,6 +1773,7 @@ export class Config { parentWorkspaceId: workspace.parentWorkspaceId, agentType: workspace.agentType, agentId: workspace.agentId, + workflowTask: workspace.workflowTask, bestOf: workspace.bestOf, taskStatus: workspace.taskStatus, reportedAt: workspace.reportedAt, @@ -1839,6 +1843,7 @@ export class Config { parentWorkspaceId: metadata.parentWorkspaceId, agentType: metadata.agentType, agentId: metadata.agentId, + workflowTask: metadata.workflowTask, bestOf: metadata.bestOf, taskStatus: metadata.taskStatus, reportedAt: metadata.reportedAt, diff --git a/src/node/orpc/context.ts b/src/node/orpc/context.ts index c6d725147b..9c978471f8 100644 --- a/src/node/orpc/context.ts +++ b/src/node/orpc/context.ts @@ -1,3 +1,4 @@ +import type { IJSRuntimeFactory } from "@/node/services/ptc/runtime"; import type { IncomingHttpHeaders } from "http"; import type { Config } from "@/node/config"; import type { AIService } from "@/node/services/aiService"; @@ -89,5 +90,6 @@ export interface ORPCContext { desktopSessionManager: DesktopSessionManager; desktopTokenManager: DesktopTokenManager; desktopBridgeServer: DesktopBridgeServer; + workflowRuntimeFactory: IJSRuntimeFactory; headers?: IncomingHttpHeaders; } diff --git a/src/node/orpc/router.test.ts b/src/node/orpc/router.test.ts index ced3ecaf38..5021b0e264 100644 --- a/src/node/orpc/router.test.ts +++ b/src/node/orpc/router.test.ts @@ -1,3 +1,4 @@ +/* eslint-disable @typescript-eslint/await-thenable, @typescript-eslint/no-unsafe-argument, @typescript-eslint/no-unsafe-assignment, @typescript-eslint/require-await, @typescript-eslint/restrict-template-expressions, local/no-sync-fs-methods */ import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test"; import { createRouterClient } from "@orpc/server"; import * as fs from "fs"; @@ -5,6 +6,8 @@ import * as os from "os"; import * as path from "path"; import { DEFAULT_TASK_SETTINGS } from "@/common/types/tasks"; import { Config } from "@/node/config"; +import { QuickJSRuntimeFactory } from "@/node/services/ptc/quickjsRuntime"; +import { WorkflowRunStore } from "@/node/services/workflows/WorkflowRunStore"; import type { ORPCContext } from "./context"; import { router } from "./router"; @@ -52,6 +55,200 @@ describe("router workspace goal validation", () => { }); }); +async function waitForRouterWorkflowStatus( + client: { + workflows: { + getRun(input: { workspaceId: string; runId: string }): Promise<{ status: string } | null>; + }; + }, + workspaceId: string, + runId: string, + status: string +): Promise { + const deadline = Date.now() + 1_000; + while (Date.now() < deadline) { + const run = await client.workflows.getRun({ workspaceId, runId }); + if (run?.status === status) { + return; + } + await new Promise((resolve) => setTimeout(resolve, 10)); + } + const run = await client.workflows.getRun({ workspaceId, runId }); + throw new Error(`Timed out waiting for ${runId} to become ${status}; got ${run?.status}`); +} + +describe("router workflow routes", () => { + let tempDir: string; + let config: Config; + let projectPath: string; + + beforeEach(async () => { + tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "mux-router-workflows-test-")); + config = new Config(tempDir); + projectPath = path.join(tempDir, "project"); + fs.mkdirSync(path.join(projectPath, ".mux", "workflows"), { recursive: true }); + fs.writeFileSync( + path.join(projectPath, ".mux", "workflows", "demo.js"), + `// description: Demo workflow\nexport default function workflow({ args }) { return { reportMarkdown: args.topic }; }\n` + ); + await config.editConfig((current) => { + current.projects.set(projectPath, { workspaces: [], trusted: true }); + return current; + }); + }); + + afterEach(() => { + fs.rmSync(tempDir, { recursive: true, force: true }); + }); + + function createContext(options: { enabled: boolean }): ORPCContext { + return { + workflowRuntimeFactory: new QuickJSRuntimeFactory(), + config, + aiService: { + waitForInit: mock(async () => undefined), + getWorkspaceMetadata: mock(async () => ({ + success: true, + data: { + id: "workspace-1", + name: "workspace-1", + projectPath, + namedWorkspacePath: projectPath, + runtimeConfig: { type: "local", srcBaseDir: tempDir }, + }, + })), + }, + taskService: {}, + experimentsService: { + isExperimentEnabled: mock(() => options.enabled), + }, + } as unknown as ORPCContext; + } + + test("lists workflow definitions only when dynamic workflows are enabled", async () => { + const disabledClient = createRouterClient(router(), { + context: createContext({ enabled: false }), + }); + await expect( + disabledClient.workflows.listDefinitions({ workspaceId: "workspace-1" }) + ).rejects.toThrow(/Dynamic workflows are disabled/); + + const enabledClient = createRouterClient(router(), { + context: createContext({ enabled: true }), + }); + await expect( + enabledClient.workflows.readDefinition({ workspaceId: "workspace-1", name: "demo" }) + ).resolves.toMatchObject({ + descriptor: expect.objectContaining({ name: "demo", scope: "project" }), + source: expect.stringContaining("reportMarkdown: args.topic"), + }); + await expect( + enabledClient.workflows.listDefinitions({ workspaceId: "workspace-1" }) + ).resolves.toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: "demo", scope: "project", executable: true }), + ]) + ); + }); + + test("promotes a scratch workflow run through the API", async () => { + const runStore = new WorkflowRunStore({ sessionDir: config.getSessionDir("workspace-1") }); + await runStore.createRun({ + id: "wfr_scratch_api", + workspaceId: "workspace-1", + definition: { name: "scratch", description: "Scratch", scope: "scratch", executable: true }, + definitionSource: + "export default function workflow() { return { reportMarkdown: 'scratch api' }; }\n", + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + const client = createRouterClient(router(), { context: createContext({ enabled: true }) }); + + await expect( + client.workflows.promoteScratch({ + workspaceId: "workspace-1", + runId: "wfr_scratch_api", + name: "scratch-api", + description: "Scratch API workflow", + location: "project", + overwrite: false, + }) + ).resolves.toMatchObject({ name: "scratch-api", scope: "project", executable: true }); + expect( + fs.readFileSync(path.join(projectPath, ".mux", "workflows", "scratch-api.js"), "utf-8") + ).toContain("Scratch API workflow"); + }); + + test("interrupts and resumes workflow runs through the API", async () => { + const runStore = new WorkflowRunStore({ sessionDir: config.getSessionDir("workspace-1") }); + await runStore.createRun({ + id: "wfr_api_resume", + workspaceId: "workspace-1", + definition: { name: "demo", description: "Demo", scope: "built-in", executable: true }, + definitionSource: + "export default function workflow() { return { reportMarkdown: 'resumed via api' }; }\n", + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + + const client = createRouterClient(router(), { context: createContext({ enabled: true }) }); + + await expect( + client.workflows.interrupt({ workspaceId: "workspace-1", runId: "wfr_api_resume" }) + ).resolves.toMatchObject({ id: "wfr_api_resume", status: "interrupted" }); + await expect( + client.workflows.resume({ workspaceId: "workspace-1", runId: "wfr_api_resume" }) + ).resolves.toEqual({ + runId: "wfr_api_resume", + status: "running", + result: null, + }); + await waitForRouterWorkflowStatus(client, "workspace-1", "wfr_api_resume", "completed"); + }); + + test("starts a trusted project-local workflow through the API", async () => { + const client = createRouterClient(router(), { context: createContext({ enabled: true }) }); + + const result = await client.workflows.start({ + workspaceId: "workspace-1", + name: "demo", + args: { topic: "workflow routes" }, + }); + + expect(result.status).toBe("completed"); + expect(result.runId).toMatch(/^wfr_/); + expect(result.result).toEqual({ reportMarkdown: "workflow routes" }); + + await expect( + client.workflows.getRun({ workspaceId: "workspace-1", runId: result.runId }) + ).resolves.toMatchObject({ + id: result.runId, + workspaceId: "workspace-1", + definition: expect.objectContaining({ name: "demo" }), + status: "completed", + }); + await expect(client.workflows.listRuns({ workspaceId: "workspace-1" })).resolves.toEqual([ + expect.objectContaining({ id: result.runId, status: "completed" }), + ]); + }); + + test("starts a workflow in the background when requested through the API", async () => { + const client = createRouterClient(router(), { context: createContext({ enabled: true }) }); + + const result = await client.workflows.start({ + workspaceId: "workspace-1", + name: "demo", + runInBackground: true, + args: { topic: "background workflow routes" }, + }); + + expect(result.status).toBe("running"); + expect(result.runId).toMatch(/^wfr_/); + expect(result.result).toBeNull(); + await waitForRouterWorkflowStatus(client, "workspace-1", result.runId, "completed"); + }); +}); + describe("router config.saveConfig", () => { let tempDir: string; let config: Config; diff --git a/src/node/orpc/router.ts b/src/node/orpc/router.ts index 55ab8a8ab7..0bd0bcad3c 100644 --- a/src/node/orpc/router.ts +++ b/src/node/orpc/router.ts @@ -39,8 +39,9 @@ import type { LogEntry } from "@/node/services/logBuffer"; import { clearLogEntries, subscribeLogFeed } from "@/node/services/logBuffer"; import { createReplayBufferedStreamMessageRelay } from "./replayBufferedStreamMessageRelay"; +import { getRuntimeType } from "@/node/runtime/initHook"; import { createRuntime, checkRuntimeAvailability } from "@/node/runtime/runtimeFactory"; -import { createRuntimeForWorkspace } from "@/node/runtime/runtimeHelpers"; +import { createRuntimeForWorkspace, resolveWorkspaceRootPath } from "@/node/runtime/runtimeHelpers"; import { readPlanFile } from "@/node/utils/runtime/helpers"; import { secretsToRecord } from "@/common/types/secrets"; import { roundToBase2 } from "@/common/telemetry/utils"; @@ -94,6 +95,13 @@ import { type SubagentTranscriptArtifactIndexEntry, } from "@/node/services/subagentTranscriptArtifacts"; import { getErrorMessage } from "@/common/utils/errors"; +import { + shouldUseRuntimeWorkflowProjectIO, + WorkflowDefinitionStore, +} from "@/node/services/workflows/WorkflowDefinitionStore"; +import { WorkflowRunStore } from "@/node/services/workflows/WorkflowRunStore"; +import { WorkflowService } from "@/node/services/workflows/WorkflowService"; +import { WorkflowTaskServiceAdapter } from "@/node/services/workflows/WorkflowTaskServiceAdapter"; import { isProjectTrusted } from "@/node/utils/projectTrust"; const RAW_QUERY_USER_ERROR_PATTERNS = [ @@ -158,6 +166,64 @@ function isTrustedProjectPath(context: ORPCContext, projectPath?: string | null) return isProjectTrusted(context.config, projectPath); } +function assertDynamicWorkflowsEnabled(context: ORPCContext): void { + if (!context.experimentsService.isExperimentEnabled(EXPERIMENT_IDS.DYNAMIC_WORKFLOWS)) { + throw new ORPCError("BAD_REQUEST", { + message: "Dynamic workflows are disabled", + }); + } +} + +async function resolveWorkflowContext( + context: ORPCContext, + workspaceId: string +): Promise<{ service: WorkflowService; projectTrusted: boolean }> { + assert(workspaceId.length > 0, "resolveWorkflowContext: workspaceId is required"); + assertDynamicWorkflowsEnabled(context); + await context.aiService.waitForInit(workspaceId); + const metadataResult = await context.aiService.getWorkspaceMetadata(workspaceId); + if (!metadataResult.success) { + throw new Error(metadataResult.error); + } + const metadata = metadataResult.data; + const projectTrusted = isTrustedProjectPath(context, metadata.projectPath); + const runtime = createRuntimeForWorkspace(metadata); + const workspacePath = resolveWorkspaceRootPath(metadata, runtime); + const runtimeType = getRuntimeType(metadata.runtimeConfig); + const useRuntimeProjectIO = shouldUseRuntimeWorkflowProjectIO(runtimeType); + + const subagentFileReportsExperimentEnabled = context.experimentsService.isExperimentEnabled( + EXPERIMENT_IDS.SUBAGENT_FILE_REPORTS + ); + + return { + projectTrusted, + service: new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ + projectRoot: runtime.normalizePath(".mux/workflows", workspacePath), + globalRoot: path.join(context.config.rootDir, "workflows"), + scratchRoot: path.join(context.config.getSessionDir(workspaceId), "workflows"), + projectRuntime: useRuntimeProjectIO ? runtime : undefined, + projectCwd: useRuntimeProjectIO ? workspacePath : undefined, + }), + runStore: new WorkflowRunStore({ sessionDir: context.config.getSessionDir(workspaceId) }), + runtimeFactory: context.workflowRuntimeFactory, + taskAdapterFactory: (runId) => + new WorkflowTaskServiceAdapter({ + taskService: context.taskService, + parentWorkspaceId: workspaceId, + workflowRunId: runId, + defaultAgentId: "explore", + experiments: { + dynamicWorkflows: true, + subagentFileReports: subagentFileReportsExperimentEnabled, + }, + }), + runnerId: `workflow-runner:${workspaceId}`, + }), + }; +} + function normalizeOptionalConfigString(value: string | null | undefined): string | undefined { const trimmedValue = value?.trim(); if (!trimmedValue) { @@ -1564,6 +1630,105 @@ export const router = (authToken?: string) => { return result.package; }), }, + workflows: { + listDefinitions: t + .input(schemas.workflows.listDefinitions.input) + .output(schemas.workflows.listDefinitions.output) + .handler(async ({ context, input }) => { + const { service, projectTrusted } = await resolveWorkflowContext( + context, + input.workspaceId + ); + return service.listDefinitions({ projectTrusted }); + }), + readDefinition: t + .input(schemas.workflows.readDefinition.input) + .output(schemas.workflows.readDefinition.output) + .handler(async ({ context, input }) => { + const { service, projectTrusted } = await resolveWorkflowContext( + context, + input.workspaceId + ); + return service.readDefinition({ name: input.name, projectTrusted }); + }), + listRuns: t + .input(schemas.workflows.listRuns.input) + .output(schemas.workflows.listRuns.output) + .handler(async ({ context, input }) => { + const { service, projectTrusted } = await resolveWorkflowContext( + context, + input.workspaceId + ); + await service.resumeCrashedRuns({ workspaceId: input.workspaceId, projectTrusted }); + return service.listRuns({ workspaceId: input.workspaceId }); + }), + getRun: t + .input(schemas.workflows.getRun.input) + .output(schemas.workflows.getRun.output) + .handler(async ({ context, input }) => { + const { service } = await resolveWorkflowContext(context, input.workspaceId); + return service.getRun({ workspaceId: input.workspaceId, runId: input.runId }); + }), + interrupt: t + .input(schemas.workflows.interrupt.input) + .output(schemas.workflows.interrupt.output) + .handler(async ({ context, input }) => { + const { service } = await resolveWorkflowContext(context, input.workspaceId); + return service.interruptRun({ workspaceId: input.workspaceId, runId: input.runId }); + }), + resume: t + .input(schemas.workflows.resume.input) + .output(schemas.workflows.resume.output) + .handler(async ({ context, input }) => { + const { service, projectTrusted } = await resolveWorkflowContext( + context, + input.workspaceId + ); + return service.resumeRunInBackground({ + workspaceId: input.workspaceId, + runId: input.runId, + projectTrusted, + }); + }), + promoteScratch: t + .input(schemas.workflows.promoteScratch.input) + .output(schemas.workflows.promoteScratch.output) + .handler(async ({ context, input }) => { + const { service, projectTrusted } = await resolveWorkflowContext( + context, + input.workspaceId + ); + return service.promoteScratchWorkflow({ + workspaceId: input.workspaceId, + runId: input.runId, + name: input.name, + description: input.description, + location: input.location, + overwrite: input.overwrite ?? false, + projectTrusted, + }); + }), + start: t + .input(schemas.workflows.start.input) + .output(schemas.workflows.start.output) + .handler(async ({ context, input, signal }) => { + assertDynamicWorkflowsEnabled(context); + const { service, projectTrusted } = await resolveWorkflowContext( + context, + input.workspaceId + ); + const workflowStartArgs = { + name: input.name, + workspaceId: input.workspaceId, + projectTrusted, + args: input.args ?? {}, + }; + if (input.runInBackground === true) { + return service.startNamedWorkflowInBackground(workflowStartArgs); + } + return service.startNamedWorkflow({ ...workflowStartArgs, abortSignal: signal }); + }), + }, providers: { list: t .input(schemas.providers.list.input) diff --git a/src/node/services/agentSkills/builtInSkillContent.generated.ts b/src/node/services/agentSkills/builtInSkillContent.generated.ts index 81760fc7ef..26b13883f2 100644 --- a/src/node/services/agentSkills/builtInSkillContent.generated.ts +++ b/src/node/services/agentSkills/builtInSkillContent.generated.ts @@ -3979,12 +3979,15 @@ export const BUILTIN_SKILL_FILES: Record> = { "", "", "
    ", - "agent_report (2)", + "agent_report (5)", "", - "| Env var | JSON path | Type | Description |", - "| -------------------------------- | ---------------- | ------ | ----------- |", - "| `MUX_TOOL_INPUT_REPORT_MARKDOWN` | `reportMarkdown` | string | — |", - "| `MUX_TOOL_INPUT_TITLE` | `title` | string | — |", + "| Env var | JSON path | Type | Description |", + "| --------------------------------------- | ---------------------- | ------- | ------------------------------------------------------------------------------------ |", + "| `MUX_TOOL_INPUT_REPORT_MARKDOWN` | `reportMarkdown` | string | — |", + "| `MUX_TOOL_INPUT_REPORT_MARKDOWN_PATH` | `reportMarkdownPath` | string | Path to the markdown report file, usually report.md in the workspace root |", + "| `MUX_TOOL_INPUT_STRUCTURED_OUTPUT` | `structuredOutput` | unknown | — |", + "| `MUX_TOOL_INPUT_STRUCTURED_OUTPUT_PATH` | `structuredOutputPath` | string | Path to a JSON file containing the structured output, usually structured-output.json |", + "| `MUX_TOOL_INPUT_TITLE` | `title` | string | — |", "", "
    ", "", @@ -4431,6 +4434,37 @@ export const BUILTIN_SKILL_FILES: Record> = { "", "", "", + "
    ", + "workflow_read (1)", + "", + "| Env var | JSON path | Type | Description |", + "| --------------------- | --------- | ------ | ----------- |", + "| `MUX_TOOL_INPUT_NAME` | `name` | string | — |", + "", + "
    ", + "", + "
    ", + "workflow_run (3)", + "", + "| Env var | JSON path | Type | Description |", + "| ---------------------------------- | ------------------- | ------- | ----------- |", + "| `MUX_TOOL_INPUT_ARGS` | `args` | unknown | — |", + "| `MUX_TOOL_INPUT_NAME` | `name` | string | — |", + "| `MUX_TOOL_INPUT_RUN_IN_BACKGROUND` | `run_in_background` | boolean | — |", + "", + "
    ", + "", + "
    ", + "workflow_write (3)", + "", + "| Env var | JSON path | Type | Description |", + "| ---------------------------- | ------------- | ------ | ----------- |", + "| `MUX_TOOL_INPUT_DESCRIPTION` | `description` | string | — |", + "| `MUX_TOOL_INPUT_NAME` | `name` | string | — |", + "| `MUX_TOOL_INPUT_SOURCE` | `source` | string | — |", + "", + "
    ", + "", "{/* END TOOL_HOOK_ENV_VARS */}", "", "", diff --git a/src/node/services/aiService.ts b/src/node/services/aiService.ts index a3d168ddbf..ef6d239fdf 100644 --- a/src/node/services/aiService.ts +++ b/src/node/services/aiService.ts @@ -1,3 +1,4 @@ +import * as path from "node:path"; import * as fs from "fs/promises"; import { EventEmitter } from "events"; @@ -119,6 +120,14 @@ import { import { applyToolPolicyAndExperiments, captureMcpToolTelemetry } from "./toolAssembly"; import { getErrorMessage } from "@/common/utils/errors"; import { filterSideQuestionMessages } from "@/common/utils/messages/sideQuestion"; +import { QuickJSRuntimeFactory } from "@/node/services/ptc/quickjsRuntime"; +import { + shouldUseRuntimeWorkflowProjectIO, + WorkflowDefinitionStore, +} from "@/node/services/workflows/WorkflowDefinitionStore"; +import { WorkflowRunStore } from "@/node/services/workflows/WorkflowRunStore"; +import { WorkflowService } from "@/node/services/workflows/WorkflowService"; +import { WorkflowTaskServiceAdapter } from "@/node/services/workflows/WorkflowTaskServiceAdapter"; import { isProjectTrusted } from "@/node/utils/projectTrust"; const STREAM_STARTUP_DIAGNOSTIC_THRESHOLD_MS = 1_000; @@ -1129,6 +1138,12 @@ export class AIService extends EventEmitter { const advisorExperimentEnabled = experiments?.advisorTool ?? this.experimentsService?.isExperimentEnabled(EXPERIMENT_IDS.ADVISOR_TOOL) === true; + const dynamicWorkflowsExperimentEnabled = + experiments?.dynamicWorkflows ?? + this.experimentsService?.isExperimentEnabled(EXPERIMENT_IDS.DYNAMIC_WORKFLOWS) === true; + const subagentFileReportsExperimentEnabled = + experiments?.subagentFileReports ?? + this.experimentsService?.isExperimentEnabled(EXPERIMENT_IDS.SUBAGENT_FILE_REPORTS) === true; emitStartupBreadcrumb("loading_workspace_context"); const resolveAgentForStreamStartedAt = Date.now(); const agentResult = await resolveAgentForStream({ @@ -1473,17 +1488,44 @@ export class AIService extends EventEmitter { advisorModelString, cfg.advisorThinkingLevel ?? THINKING_LEVEL_OFF ); - const muxEnv = getMuxEnv( - metadata.projectPath, - getRuntimeType(metadata.runtimeConfig), - metadata.name, - { - workspaceId, - modelString, - thinkingLevel: thinkingLevel ?? "off", - costsUsd: sessionCostsUsd, - } - ); + const runtimeType = getRuntimeType(metadata.runtimeConfig); + const useRuntimeProjectWorkflowIO = shouldUseRuntimeWorkflowProjectIO(runtimeType); + const muxEnv = getMuxEnv(metadata.projectPath, runtimeType, metadata.name, { + workspaceId, + modelString, + thinkingLevel: thinkingLevel ?? "off", + costsUsd: sessionCostsUsd, + }); + + const workflowService = + dynamicWorkflowsExperimentEnabled && this.taskService != null + ? new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ + projectRoot: runtime.normalizePath(".mux/workflows", workspacePath), + globalRoot: path.join(this.config.rootDir, "workflows"), + scratchRoot: path.join(this.config.getSessionDir(workspaceId), "workflows"), + projectRuntime: useRuntimeProjectWorkflowIO ? runtime : undefined, + projectCwd: useRuntimeProjectWorkflowIO ? workspacePath : undefined, + }), + runStore: new WorkflowRunStore({ + sessionDir: this.config.getSessionDir(workspaceId), + }), + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapterFactory: (runId) => + new WorkflowTaskServiceAdapter({ + taskService: this.taskService!, + parentWorkspaceId: workspaceId, + workflowRunId: runId, + defaultAgentId: "explore", + experiments: { + ...experiments, + dynamicWorkflows: dynamicWorkflowsExperimentEnabled, + subagentFileReports: subagentFileReportsExperimentEnabled, + }, + }), + runnerId: `workflow-runner:${workspaceId}`, + }) + : undefined; // Create assistant message ID early so tool-side usage reporting and nested tool events // stay scoped to this specific assistant turn. The placeholder is appended to history below @@ -1576,10 +1618,14 @@ export class AIService extends EventEmitter { ancestorPlanFilePaths, workspaceId, muxScope, + workflowService, goalService: workspaceGoalService, enableGoalTools: goalToolAvailability, // Only child workspaces (tasks) can report to a parent. enableAgentReport: Boolean(metadata.parentWorkspaceId), + workflowAgentOutputSchema: metadata.workflowTask?.outputSchema, + subagentReportFiles: + subagentFileReportsExperimentEnabled && metadata.parentWorkspaceId != null, // External edit detection callback recordFileState, reportModelUsage: (event) => { @@ -1654,8 +1700,12 @@ export class AIService extends EventEmitter { taskService: this.taskService, analyticsService: this.analyticsService, desktopSessionManager: this.desktopSessionManager, - // Experiments for inheritance to subagents. - experiments, + // Experiments for inheritance to subagents and workflow tool gating. + experiments: { + ...experiments, + dynamicWorkflows: dynamicWorkflowsExperimentEnabled, + subagentFileReports: subagentFileReportsExperimentEnabled, + }, // Dynamic context for tool descriptions (moved from system prompt for better model attention) availableSubagents: agentDefinitions, availableSkills, diff --git a/src/node/services/ptc/quickjsRuntime.test.ts b/src/node/services/ptc/quickjsRuntime.test.ts index 26ad335230..2b65bca485 100644 --- a/src/node/services/ptc/quickjsRuntime.test.ts +++ b/src/node/services/ptc/quickjsRuntime.test.ts @@ -51,6 +51,12 @@ describe("QuickJSRuntime", () => { expect(result.result).toBeNull(); }); + it("resolves returned promises", async () => { + const result = await runtime.eval("return (async () => ({ ok: true }))();"); + expect(result.success).toBe(true); + expect(result.result).toEqual({ ok: true }); + }); + it("handles syntax errors", async () => { const result = await runtime.eval("return {{{;"); expect(result.success).toBe(false); @@ -64,7 +70,7 @@ describe("QuickJSRuntime", () => { }); // Note: With asyncify, async host functions appear SYNC to QuickJS. - // Native JS await/Promise is not supported - use sync calls to host functions. + // Call host functions directly unless the evaluated code intentionally returns a Promise. it("handles multiple statements", async () => { const result = await runtime.eval(` const x = 10; diff --git a/src/node/services/ptc/quickjsRuntime.ts b/src/node/services/ptc/quickjsRuntime.ts index 0e5a981fa2..422ebb4bfb 100644 --- a/src/node/services/ptc/quickjsRuntime.ts +++ b/src/node/services/ptc/quickjsRuntime.ts @@ -305,14 +305,22 @@ export class QuickJSRuntime implements IJSRuntime { }; } - // With asyncify, evalCodeAsync suspends until async host functions complete. - // The result is already resolved - no need to resolve the promise. - const value: unknown = this.ctx.dump(evalResult.value) as unknown; + const resolvedValue = this.resolveReturnedValue(evalResult.value, deadline, timeoutMs); evalResult.value.dispose(); + if (!resolvedValue.success) { + return { + success: false, + error: resolvedValue.error, + toolCalls: this.toolCalls, + consoleOutput: this.consoleOutput, + duration_ms: Date.now() - execStartTime, + }; + } + return { success: true, - result: value, + result: resolvedValue.value, toolCalls: this.toolCalls, consoleOutput: this.consoleOutput, duration_ms: Date.now() - execStartTime, @@ -361,6 +369,44 @@ export class QuickJSRuntime implements IJSRuntime { } } + private resolveReturnedValue( + handle: QuickJSHandle, + deadline: number, + timeoutMs: number + ): { success: true; value: unknown } | { success: false; error: string } { + let promiseState = this.ctx.getPromiseState(handle); + while (promiseState.type === "pending" && this.ctx.runtime.hasPendingJob()) { + const pendingJobs = this.ctx.runtime.executePendingJobs(); + if (pendingJobs.error) { + const errorObj: unknown = pendingJobs.error.context.dump(pendingJobs.error) as unknown; + const error = this.getErrorMessage(errorObj, deadline, timeoutMs); + pendingJobs.dispose(); + return { success: false, error }; + } + pendingJobs.dispose(); + promiseState = this.ctx.getPromiseState(handle); + } + + if (promiseState.type === "pending") { + return { success: false, error: "Execution returned a pending Promise" }; + } + if (promiseState.type === "rejected") { + const errorObj: unknown = this.ctx.dump(promiseState.error) as unknown; + promiseState.error.dispose(); + return { success: false, error: this.getErrorMessage(errorObj, deadline, timeoutMs) }; + } + + try { + const valueHandle = promiseState.notAPromise ? handle : promiseState.value; + const value: unknown = this.ctx.dump(valueHandle) as unknown; + return { success: true, value }; + } finally { + if (!promiseState.notAPromise) { + promiseState.value.dispose(); + } + } + } + /** * Format a QuickJS error object into a readable error message. */ diff --git a/src/node/services/serviceContainer.ts b/src/node/services/serviceContainer.ts index 9ae48bdec0..1b19818376 100644 --- a/src/node/services/serviceContainer.ts +++ b/src/node/services/serviceContainer.ts @@ -57,6 +57,7 @@ import { createCoderUnarchiveHook, } from "@/node/runtime/coderLifecycleHooks"; import { createWorktreeArchiveHook } from "@/node/runtime/worktreeLifecycleHooks"; +import { QuickJSRuntimeFactory } from "@/node/services/ptc/quickjsRuntime"; import { setGlobalCoderService } from "@/node/runtime/runtimeFactory"; import { setSshPromptService } from "@/node/runtime/sshConnectionPool"; import { setSshPromptService as setSSH2SshPromptService } from "@/node/runtime/SSH2ConnectionPool"; @@ -79,6 +80,7 @@ import type { ExternalSecretResolver } from "@/common/types/secrets"; * Services are accessed via the ORPC context object. */ export class ServiceContainer { + public readonly workflowRuntimeFactory = new QuickJSRuntimeFactory(); public readonly config: Config; // Core services — instantiated by createCoreServices (shared with `mux run` CLI) private readonly historyService: CoreServices["historyService"]; @@ -484,6 +486,7 @@ export class ServiceContainer { const resolveOnePasswordService = () => this.onePasswordService; return { + workflowRuntimeFactory: this.workflowRuntimeFactory, config: this.config, aiService: this.aiService, projectService: this.projectService, diff --git a/src/node/services/subagentReportArtifacts.test.ts b/src/node/services/subagentReportArtifacts.test.ts index 8c43642a0b..b7c07debc1 100644 --- a/src/node/services/subagentReportArtifacts.test.ts +++ b/src/node/services/subagentReportArtifacts.test.ts @@ -4,6 +4,7 @@ import * as os from "os"; import * as path from "path"; import { + readSubagentReportArtifact, readSubagentReportArtifactsFile, upsertSubagentReportArtifact, } from "@/node/services/subagentReportArtifacts"; @@ -41,4 +42,25 @@ describe("subagentReportArtifacts", () => { expect(entry).toBeDefined(); expect(entry?.reportTokenEstimate).toBe(100); }); + + test("upsertSubagentReportArtifact preserves structured output", async () => { + const workspaceId = "parent-1"; + const childTaskId = "child-structured"; + const structuredOutput = { claims: ["durable"], confidence: 0.8 }; + + await upsertSubagentReportArtifact({ + workspaceId, + workspaceSessionDir: testDir, + childTaskId, + parentWorkspaceId: workspaceId, + ancestorWorkspaceIds: [workspaceId], + reportMarkdown: "structured report", + structuredOutput, + nowMs: Date.now(), + }); + + const artifact = await readSubagentReportArtifact(testDir, childTaskId); + + expect(artifact?.structuredOutput).toEqual(structuredOutput); + }); }); diff --git a/src/node/services/subagentReportArtifacts.ts b/src/node/services/subagentReportArtifacts.ts index 2bd20a9810..54da7b326e 100644 --- a/src/node/services/subagentReportArtifacts.ts +++ b/src/node/services/subagentReportArtifacts.ts @@ -26,6 +26,7 @@ export interface SubagentReportArtifactIndexEntry { title?: string; /** Full ancestor chain (parent first). Used for descendant scope checks after cleanup. */ ancestorWorkspaceIds: string[]; + structuredOutput?: unknown; /** Estimated token count of delivered report markdown (~4 chars/token). */ reportTokenEstimate?: number; } @@ -135,6 +136,7 @@ export async function readSubagentReportArtifact( thinkingLevel?: unknown; title?: unknown; ancestorWorkspaceIds?: unknown; + structuredOutput?: unknown; reportMarkdown?: unknown; }; @@ -159,6 +161,7 @@ export async function readSubagentReportArtifact( : undefined, thinkingLevel: coerceThinkingLevel(meta.thinkingLevel), title: title ?? meta.title, + structuredOutput: obj.structuredOutput, reportMarkdown, }; } @@ -185,6 +188,7 @@ export async function readSubagentReportArtifact( thinkingLevel, title, ancestorWorkspaceIds, + structuredOutput: obj.structuredOutput, reportMarkdown, }; } catch (error) { @@ -230,6 +234,7 @@ export async function upsertSubagentReportArtifact(params: { model?: string; /** Task-level thinking/reasoning level used when running the sub-agent (optional for legacy entries). */ thinkingLevel?: ThinkingLevel; + structuredOutput?: unknown; title?: string; nowMs?: number; }): Promise { @@ -267,6 +272,7 @@ export async function upsertSubagentReportArtifact(params: { thinkingLevel, title: params.title, ancestorWorkspaceIds: params.ancestorWorkspaceIds, + structuredOutput: params.structuredOutput, reportMarkdown: params.reportMarkdown, }, null, @@ -290,6 +296,7 @@ export async function upsertSubagentReportArtifact(params: { model, thinkingLevel, title: params.title, + structuredOutput: params.structuredOutput, ancestorWorkspaceIds: params.ancestorWorkspaceIds, }; updated.reportTokenEstimate = Math.ceil( diff --git a/src/node/services/taskService.test.ts b/src/node/services/taskService.test.ts index 13294c4a8f..937f0d1064 100644 --- a/src/node/services/taskService.test.ts +++ b/src/node/services/taskService.test.ts @@ -1340,6 +1340,107 @@ describe("TaskService", () => { expect(childEntry?.taskThinkingLevel).toBe("medium"); }, 20_000); + test("appends file-backed report instructions to ordinary subagent prompts", async () => { + const config = await createTestConfig(rootDir); + stubStableIds(config, ["aaaaaaaaaa"], "bbbbbbbbbb"); + + const projectPath = await createTestProject(rootDir, "repo", { initGit: false }); + const parentId = "1111111111"; + await saveWorkspaces( + config, + projectPath, + [ + { + path: projectPath, + id: parentId, + name: "parent", + createdAt: new Date().toISOString(), + runtimeConfig: { type: "local" }, + aiSettings: { model: "openai:gpt-5.2", thinkingLevel: "medium" }, + }, + ], + testTaskSettings() + ); + const { workspaceService, sendMessage } = createWorkspaceServiceMocks(); + const { taskService } = createTaskServiceHarness(config, { workspaceService }); + + const created = await createAgentTask(taskService, parentId, "do the thing", { + experiments: { subagentFileReports: true }, + }); + + expect(created.success).toBe(true); + expect(sendMessage).toHaveBeenCalledWith( + "aaaaaaaaaa", + expect.any(String), + expect.objectContaining({ experiments: { subagentFileReports: true } }), + expect.anything() + ); + const sentPrompt = (sendMessage as unknown as { mock: { calls: Array<[string, string]> } }).mock + .calls[0]?.[1]; + assert(typeof sentPrompt === "string", "sendMessage prompt is required"); + expect(sentPrompt.startsWith("do the thing")).toBe(true); + expect(sentPrompt).toContain("report.md"); + expect(sentPrompt).toContain("agent_report"); + expect(sentPrompt).toContain("reportMarkdownPath"); + expect(sentPrompt).toContain("structuredOutputPath"); + expect(sentPrompt).toContain("title"); + expect(sentPrompt).not.toContain("structured-output.json"); + }, 20_000); + + test("passes workflow output schema through file-backed report instructions", async () => { + const config = await createTestConfig(rootDir); + stubStableIds(config, ["aaaaaaaaaa"], "bbbbbbbbbb"); + + const projectPath = await createTestProject(rootDir, "repo", { initGit: false }); + const parentId = "1111111111"; + await saveWorkspaces( + config, + projectPath, + [ + { + path: projectPath, + id: parentId, + name: "parent", + createdAt: new Date().toISOString(), + runtimeConfig: { type: "local" }, + aiSettings: { model: "openai:gpt-5.2", thinkingLevel: "medium" }, + }, + ], + testTaskSettings() + ); + const { workspaceService, sendMessage } = createWorkspaceServiceMocks(); + const { taskService } = createTaskServiceHarness(config, { workspaceService }); + const outputSchema = { + type: "object", + required: ["claims"], + properties: { + claims: { type: "array", items: { type: "string" } }, + }, + }; + + const created = await createAgentTask(taskService, parentId, "collect claims", { + experiments: { subagentFileReports: true }, + workflowTask: { + runId: "wfr_123", + stepId: "collect-claims", + outputSchema, + }, + }); + + expect(created.success).toBe(true); + const sentPrompt = (sendMessage as unknown as { mock: { calls: Array<[string, string]> } }).mock + .calls[0]?.[1]; + assert(typeof sentPrompt === "string", "sendMessage prompt is required"); + const schemaStart = sentPrompt.indexOf("{"); + const schemaEnd = sentPrompt.lastIndexOf("}"); + assert( + schemaStart >= 0 && schemaEnd > schemaStart, + "file-report prompt must include a JSON schema" + ); + expect(JSON.parse(sentPrompt.slice(schemaStart, schemaEnd + 1))).toEqual(outputSchema); + expect(sentPrompt).toContain("structured-output.json"); + }, 20_000); + test("inherits parent model + thinking when target agent has no global defaults", async () => { const config = await createTestConfig(rootDir); stubStableIds(config, ["aaaaaaaaaa"], "bbbbbbbbbb"); @@ -2052,6 +2153,98 @@ describe("TaskService", () => { ); }, 20_000); + test("Task.create persists workflow task metadata for report validation", async () => { + const config = await createTestConfig(rootDir); + stubStableIds(config, ["taskflow01"]); + const { parentId } = await saveLocalParentWorkspace(config, rootDir); + const { taskService } = createTaskServiceHarness(config); + + const outputSchema = { + type: "object", + required: ["claims"], + properties: { claims: { type: "array", items: { type: "string" } } }, + additionalProperties: false, + }; + + const result = await createAgentTask(taskService, parentId, "extract claims", { + workflowTask: { + runId: "wfr_123", + stepId: "claims", + outputSchema, + }, + }); + + expect(result.success).toBe(true); + const task = findWorkspaceInConfig(config, "taskflow01"); + expect(task?.workflowTask).toEqual({ + runId: "wfr_123", + stepId: "claims", + outputSchema, + }); + }); + + test("TaskService extracts file-backed agent_report payloads from tool output", async () => { + const config = await createTestConfig(rootDir); + const { taskService } = createTaskServiceHarness(config); + const reportReader = taskService as unknown as { + findAgentReportArgsInParts(parts: readonly unknown[]): { + reportMarkdown: string; + title?: string; + structuredOutput?: unknown; + } | null; + }; + + const report = reportReader.findAgentReportArgsInParts([ + { + type: "dynamic-tool", + toolName: "agent_report", + state: "output-available", + input: { reportMarkdownPath: "report.md", structuredOutputPath: "structured-output.json" }, + output: { + success: true, + report: { + reportMarkdown: "# Done", + title: "Done", + structuredOutput: { claims: ["durable"] }, + }, + }, + }, + ]); + + expect(report).toEqual({ + reportMarkdown: "# Done", + title: "Done", + structuredOutput: { claims: ["durable"] }, + }); + }); + + test("TaskService preserves null structuredOutput from inline agent_report args", async () => { + const config = await createTestConfig(rootDir); + const { taskService } = createTaskServiceHarness(config); + const reportReader = taskService as unknown as { + findAgentReportArgsInParts(parts: readonly unknown[]): { + reportMarkdown: string; + title?: string; + structuredOutput?: unknown; + } | null; + }; + + const report = reportReader.findAgentReportArgsInParts([ + { + type: "dynamic-tool", + toolName: "agent_report", + state: "output-available", + input: { reportMarkdown: "# Done", structuredOutput: null, title: null }, + output: { success: true }, + }, + ]); + + expect(report).toEqual({ + reportMarkdown: "# Done", + structuredOutput: null, + }); + }); + test("created task metadata is not recomputed after defaults change", async () => { const config = await createTestConfig(rootDir); stubStableIds(config, ["aaaaaaaaaa"], "bbbbbbbbbb"); @@ -2806,7 +2999,11 @@ describe("TaskService", () => { type: "dynamic-tool", toolCallId: "agent-report-call-1", toolName: "agent_report", - input: { reportMarkdown: "Hello from child", title: "Result" }, + input: { + reportMarkdown: "Hello from child", + title: "Result", + structuredOutput: { claims: ["fast handoff"] }, + }, state: "output-available", output: { success: true }, }, @@ -2814,14 +3011,25 @@ describe("TaskService", () => { }); expect(sendMessage).toHaveBeenCalledTimes(1); + const handoffPrompt = (sendMessage as unknown as { mock: { calls: Array<[string, string]> } }) + .mock.calls[0]?.[1]; + assert(typeof handoffPrompt === "string", "tasks-completed handoff prompt is required"); + expect(handoffPrompt).toContain("structured outputs"); + expect(handoffPrompt).not.toContain("task_await"); expect(sendMessage).toHaveBeenCalledWith( parentWorkspaceId, - expect.stringContaining("sub-agent task(s) have completed"), + expect.stringContaining("Background sub-agent task(s) have completed"), expect.objectContaining({ agentId: "plan", }), expect.objectContaining({ skipAutoResumeReset: true, synthetic: true }) ); + + const parentHistory = await collectFullHistory(historyService, parentWorkspaceId); + const serializedParentHistory = JSON.stringify(parentHistory); + expect(serializedParentHistory).toContain(""); + expect(serializedParentHistory).toContain(""); + expect(serializedParentHistory).toContain("claims"); }); test("foreground waiter suppresses tasks-completed auto-resume notification", async () => { @@ -2887,7 +3095,7 @@ describe("TaskService", () => { expect(sendMessage).not.toHaveBeenCalledWith( parentWorkspaceId, - expect.stringContaining("background sub-agent task(s) have completed"), + expect.stringContaining("task(s) have completed"), expect.anything(), expect.anything() ); @@ -3117,6 +3325,60 @@ describe("TaskService", () => { expect(childTask?.taskStatus).toBe("interrupted"); }); + test("terminateAllDescendantAgentTasks can scope interrupts to one workflow run", async () => { + const config = await createTestConfig(rootDir); + const projectPath = path.join(rootDir, "repo"); + const rootWorkspaceId = "root-111"; + const workflowTaskId = "task-workflow"; + const workflowChildTaskId = "task-workflow-child"; + const otherTaskId = "task-other"; + + await saveWorkspaces( + config, + projectPath, + [ + projectWorkspace(projectPath, "root", rootWorkspaceId), + projectWorkspace(projectPath, "workflow-task", workflowTaskId, { + parentWorkspaceId: rootWorkspaceId, + agentType: "exec", + taskStatus: "running", + workflowTask: { runId: "wfr_target", stepId: "scope" }, + }), + projectWorkspace(projectPath, "workflow-child", workflowChildTaskId, { + parentWorkspaceId: workflowTaskId, + agentType: "explore", + taskStatus: "running", + }), + projectWorkspace(projectPath, "other-task", otherTaskId, { + parentWorkspaceId: rootWorkspaceId, + agentType: "exec", + taskStatus: "running", + workflowTask: { runId: "wfr_other", stepId: "scope" }, + }), + ], + testTaskSettings() + ); + + const { aiService } = createAIServiceMocks(config); + const { workspaceService } = createWorkspaceServiceMocks(); + const { taskService } = createTaskServiceHarness(config, { aiService, workspaceService }); + + const interruptedTaskIds = await taskService.terminateAllDescendantAgentTasks(rootWorkspaceId, { + workflowRunId: "wfr_target", + }); + + expect(interruptedTaskIds).toEqual([workflowChildTaskId, workflowTaskId]); + const saved = config.loadConfigOrDefault(); + const tasks = saved.projects.get(projectPath)?.workspaces ?? []; + expect(tasks.find((workspace) => workspace.id === workflowTaskId)?.taskStatus).toBe( + "interrupted" + ); + expect(tasks.find((workspace) => workspace.id === workflowChildTaskId)?.taskStatus).toBe( + "interrupted" + ); + expect(tasks.find((workspace) => workspace.id === otherTaskId)?.taskStatus).toBe("running"); + }); + test("terminateAllDescendantAgentTasks preserves already-completed descendants", async () => { const config = await createTestConfig(rootDir); @@ -4401,7 +4663,11 @@ describe("TaskService", () => { type: "dynamic-tool", toolCallId: "agent-report-call-1", toolName: "agent_report", - input: { reportMarkdown: "Hello from child", title: "Result" }, + input: { + reportMarkdown: "Hello from child", + title: "Result", + structuredOutput: { claims: ["durable"] }, + }, state: "output-available", output: { success: true }, }, @@ -4460,8 +4726,19 @@ describe("TaskService", () => { expect.objectContaining({ workspaceId: childId }) ); + const reportArtifact = await readSubagentReportArtifact( + config.getSessionDir(parentId), + childId + ); + expect(reportArtifact?.structuredOutput).toEqual({ claims: ["durable"] }); + expect(remove).toHaveBeenCalledTimes(1); expect(remove).toHaveBeenCalledWith(childId, true); + const childReportHandoffPrompt = ( + sendMessage as unknown as { mock: { calls: Array<[string, string]> } } + ).mock.calls[0]?.[1]; + assert(typeof childReportHandoffPrompt === "string", "child report handoff prompt is required"); + expect(childReportHandoffPrompt).not.toContain("task_await"); expect(sendMessage).toHaveBeenCalledWith( parentId, expect.stringContaining("sub-agent task(s) have completed"), @@ -5905,6 +6182,11 @@ describe("TaskService", () => { expect(remove).toHaveBeenCalledTimes(1); expect(remove).toHaveBeenCalledWith(childId, true); + const fallbackHandoffPrompt = ( + sendMessageMock as unknown as { mock: { calls: Array<[string, string]> } } + ).mock.calls[0]?.[1]; + assert(typeof fallbackHandoffPrompt === "string", "fallback handoff prompt is required"); + expect(fallbackHandoffPrompt).not.toContain("task_await"); expect(sendMessageMock).toHaveBeenCalledWith( parentId, expect.stringContaining("sub-agent task(s) have completed"), @@ -7130,6 +7412,7 @@ describe("TaskService", () => { ancestorWorkspaceIds: [parentId], reportMarkdown: "Report from child one", title: "Option one", + structuredOutput: { score: 1 }, nowMs: Date.now(), }); @@ -7158,6 +7441,8 @@ describe("TaskService", () => { const serializedParentHistory = JSON.stringify(parentHistory); expect(serializedParentHistory).toContain(""); expect(serializedParentHistory).toContain("Report from child one"); + expect(serializedParentHistory).toContain(""); + expect(serializedParentHistory).toContain("score"); expect( serializedParentHistory.match( /child-best-of-concurrent-deferred-fallback-1<\/task_id>/g diff --git a/src/node/services/taskService.ts b/src/node/services/taskService.ts index e240da89c0..3204265149 100644 --- a/src/node/services/taskService.ts +++ b/src/node/services/taskService.ts @@ -67,7 +67,8 @@ import type { ThinkingLevel } from "@/common/types/thinking"; import type { ErrorEvent, StreamEndEvent } from "@/common/types/stream"; import { isDynamicToolPart, type DynamicToolPart } from "@/common/types/toolParts"; import { - AgentReportToolArgsSchema, + AgentReportInlineToolArgsSchema, + AgentReportSubmittedReportSchema, TaskToolResultSchema, TaskToolArgsSchema, } from "@/common/utils/tools/toolDefinitions"; @@ -133,15 +134,127 @@ export interface TaskCreateArgs { kind?: TaskGroupKind; label?: string; }; + workflowTask?: { + runId: string; + stepId: string; + outputSchema?: unknown; + }; /** Experiments to inherit to subagent */ experiments?: { programmaticToolCalling?: boolean; programmaticToolCallingExclusive?: boolean; advisorTool?: boolean; execSubagentHardRestart?: boolean; + dynamicWorkflows?: boolean; + subagentFileReports?: boolean; }; } +function appendSubagentFileReportInstructions( + prompt: string, + workflowTask: TaskCreateArgs["workflowTask"] +): string { + assert(prompt.trim().length > 0, "appendSubagentFileReportInstructions requires prompt"); + const outputSchema = workflowTask?.outputSchema; + let schemaInstruction = ""; + if (outputSchema !== undefined) { + const schemaJson = JSON.stringify(outputSchema, null, 2); + assert( + schemaJson !== undefined, + "appendSubagentFileReportInstructions requires JSON output schema" + ); + schemaInstruction = [ + "Write the required structured output as valid JSON to `structured-output.json`.", + // File-backed report mode only exposes file paths in the tool schema, so the prompt must carry + // the workflow output contract that inline `agent_report` arguments would otherwise describe. + "The structured output must match this JSON Schema:", + "```json", + schemaJson, + "```", + ].join("\n"); + } + + return [ + prompt, + "Subagent file-backed report mode is enabled for this task. Before reporting, create or update `report.md` in the workspace root with your final markdown report.", + schemaInstruction, + "When complete, call agent_report with `reportMarkdownPath: null`, `structuredOutputPath: null`, and `title: null` so Mux uses the default report files.", + ] + .filter((instruction) => instruction.length > 0) + .join("\n\n"); +} + +function stringifyStructuredOutputForSubagentReport(structuredOutput: unknown): string { + const json = JSON.stringify(structuredOutput, null, 2); + assert( + json !== undefined, + "stringifyStructuredOutputForSubagentReport requires JSON-serializable structured output" + ); + return json; +} + +function formatSubagentReportUserMessage(params: { + childWorkspaceId: string; + agentType: string; + title: string; + reportMarkdown: string; + structuredOutput?: unknown; +}): string { + assert(params.childWorkspaceId.length > 0, "subagent report message requires child id"); + assert(params.agentType.length > 0, "subagent report message requires agent type"); + assert(params.title.length > 0, "subagent report message requires title"); + assert(params.reportMarkdown.length > 0, "subagent report message requires markdown"); + + const lines = [ + "", + `${params.childWorkspaceId}`, + `${params.agentType}`, + `${params.title}`, + "", + params.reportMarkdown, + "", + ]; + + if (params.structuredOutput !== undefined) { + lines.push( + "", + "```json", + stringifyStructuredOutputForSubagentReport(params.structuredOutput), + "```", + "" + ); + } + + lines.push(""); + return lines.join("\n"); +} + +// Completed background reports are already persisted into the parent context; asking the parent +// to call task_await burns an extra model/tool turn before it can synthesize the final answer. +const COMPLETED_BACKGROUND_SUBAGENT_HANDOFF_PROMPT = + "Background sub-agent task(s) have completed. Their accepted reports and any structured outputs " + + "are already injected into this workspace context as task tool results or synthetic user report " + + "messages. Write the final response now, integrating those results. If a required report appears " + + "missing, explain the missing context instead of waiting for another handoff."; + +function getTaskCompletionInstruction(params: { + completionToolName: "agent_report" | "propose_plan"; + subagentFileReports: boolean; +}): string { + if (params.completionToolName === "propose_plan") { + return "Call propose_plan exactly once now. Base it only on the planning work already completed in this workspace."; + } + + if (params.subagentFileReports) { + return ( + "Create or update report.md with your final report, then call agent_report exactly once now with reportMarkdownPath, structuredOutputPath, and title all set to null. " + + "Base it only on the work already completed in this workspace." + ); + } + + return "Call agent_report exactly once now with your final report. Base it only on the work already completed in this workspace."; +} + export interface TaskCreateResult { taskId: string; kind: TaskKind; @@ -185,7 +298,7 @@ interface AgentTaskIndex { interface PendingTaskWaiter { taskId: string; - resolve: (report: { reportMarkdown: string; title?: string }) => void; + resolve: (report: { reportMarkdown: string; title?: string; structuredOutput?: unknown }) => void; reject: (error: Error) => void; cleanup: () => void; requestingWorkspaceId?: string; @@ -199,6 +312,7 @@ interface PendingTaskStartWaiter { interface CompletedAgentReportCacheEntry { reportMarkdown: string; + structuredOutput?: unknown; title?: string; // Ancestor workspace IDs captured when the report was cached. // Used to keep descendant-scope checks working even if the task workspace is cleaned up. @@ -700,13 +814,15 @@ export class TaskService { isPlanLike, }); const resumeStartedAt = Date.now(); + const restartCompletionInstruction = isPlanLike + ? "When you have a final plan, call propose_plan exactly once." + : task.taskExperiments?.subagentFileReports === true + ? "When you have a final answer, create or update report.md, then call agent_report with reportMarkdownPath, structuredOutputPath, and title all set to null." + : "When you have a final answer, call agent_report exactly once."; const sendResult = await this.workspaceService.sendMessage( task.id, - isPlanLike - ? "Mux restarted while this task was running. Continue where you left off. " + - "When you have a final plan, call propose_plan exactly once." - : "Mux restarted while this task was running. Continue where you left off. " + - "When you have a final answer, call agent_report exactly once.", + "Mux restarted while this task was running. Continue where you left off. " + + restartCompletionInstruction, { model, agentId, @@ -841,10 +957,14 @@ export class TaskService { return Err("Task.create: unsupported kind"); } - const prompt = coerceNonEmptyString(args.prompt); - if (!prompt) { + const basePrompt = coerceNonEmptyString(args.prompt); + if (!basePrompt) { return Err("Task.create: prompt is required"); } + const prompt = + args.experiments?.subagentFileReports === true + ? appendSubagentFileReportInstructions(basePrompt, args.workflowTask) + : basePrompt; const agentIdRaw = coerceNonEmptyString(args.agentId ?? args.agentType); if (!agentIdRaw) { @@ -1048,6 +1168,8 @@ export class TaskService { maxParallelAgentTasks: taskSettings.maxParallelAgentTasks, shouldQueue, runtimeType: taskRuntimeConfig.type, + workflowRunId: args.workflowTask?.runId, + workflowStepId: args.workflowTask?.stepId, promptLength: prompt.length, model: taskModelString, thinkingLevel: effectiveThinkingLevel, @@ -1090,6 +1212,7 @@ export class TaskService { parentWorkspaceId, agentId, agentType, + workflowTask: args.workflowTask, bestOf: normalizedBestOf, taskStatus: "queued", taskPrompt: prompt, @@ -1207,6 +1330,7 @@ export class TaskService { agentId, parentWorkspaceId, agentType, + workflowTask: args.workflowTask, bestOf: normalizedBestOf, taskStatus: "running", taskTrunkBranch: trunkBranch, @@ -1358,7 +1482,10 @@ export class TaskService { * Legacy naming note: this method retains the original "terminate" name for * compatibility with existing call sites. */ - async terminateAllDescendantAgentTasks(workspaceId: string): Promise { + async terminateAllDescendantAgentTasks( + workspaceId: string, + options?: { workflowRunId?: string } + ): Promise { assert( workspaceId.length > 0, "terminateAllDescendantAgentTasks: workspaceId must be non-empty" @@ -1371,7 +1498,11 @@ export class TaskService { const cfg = this.config.loadConfigOrDefault(); const index = this.buildAgentTaskIndex(cfg); - const descendants = this.listDescendantAgentTaskIdsFromIndex(index, workspaceId); + const descendants = this.listDescendantAgentTaskIdsFromIndex(index, workspaceId).filter( + (taskId) => + options?.workflowRunId == null || + this.isWorkflowRunDescendant(index, taskId, options.workflowRunId) + ); if (descendants.length === 0) { return interruptedTaskIds; } @@ -1647,14 +1778,18 @@ export class TaskService { requestingWorkspaceId?: string; backgroundOnMessageQueued?: boolean; } - ): Promise<{ reportMarkdown: string; title?: string }> { + ): Promise<{ reportMarkdown: string; title?: string; structuredOutput?: unknown }> { assert(taskId.length > 0, "waitForAgentReport: taskId must be non-empty"); // Report monotonicity invariant: check the in-memory cache before any status-based // interruption handling so a finalized report stays awaitable once observed. const cached = this.completedReportsByTaskId.get(taskId); if (cached) { - return { reportMarkdown: cached.reportMarkdown, title: cached.title }; + return { + reportMarkdown: cached.reportMarkdown, + title: cached.title, + structuredOutput: cached.structuredOutput, + }; } const timeoutMs = options?.timeoutMs ?? 10 * 60 * 1000; // 10 minutes @@ -1668,6 +1803,7 @@ export class TaskService { const tryReadPersistedReport = async (): Promise<{ reportMarkdown: string; + structuredOutput?: unknown; title?: string; } | null> => { if (!requestingWorkspaceId) { @@ -1684,11 +1820,16 @@ export class TaskService { this.completedReportsByTaskId.set(taskId, { reportMarkdown: artifact.reportMarkdown, title: artifact.title, + structuredOutput: artifact.structuredOutput, ancestorWorkspaceIds: artifact.ancestorWorkspaceIds, }); this.enforceCompletedReportCacheLimit(); - return { reportMarkdown: artifact.reportMarkdown, title: artifact.title }; + return { + reportMarkdown: artifact.reportMarkdown, + title: artifact.title, + structuredOutput: artifact.structuredOutput, + }; }; // Fast-path: if the task is already gone (cleanup) or already reported (restart), return the @@ -1719,7 +1860,11 @@ export class TaskService { } } - return await new Promise<{ reportMarkdown: string; title?: string }>((resolve, reject) => { + return await new Promise<{ + reportMarkdown: string; + title?: string; + structuredOutput?: unknown; + }>((resolve, reject) => { void (async () => { // Validate existence early to avoid waiting on never-resolving task IDs. const cfg = this.config.loadConfigOrDefault(); @@ -2170,6 +2315,22 @@ export class TaskService { return result; } + private isWorkflowRunDescendant( + index: AgentTaskIndex, + taskId: string, + workflowRunId: string + ): boolean { + let current: string | undefined = taskId; + for (let i = 0; current != null && i < 32; i++) { + const entry = index.byId.get(current); + if (entry?.workflowTask?.runId === workflowRunId) { + return true; + } + current = index.parentById.get(current); + } + return false; + } + private listCompletedDescendantAgentTaskIds( index: AgentTaskIndex, workspaceId: string @@ -2992,14 +3153,15 @@ export class TaskService { options?: { reason?: "startup" | "stream_end" | "error"; error?: Pick; + subagentFileReports?: boolean; } ): string { const completionToolLabel = completionToolName === "propose_plan" ? "propose_plan" : "agent_report"; - const completionInstruction = - completionToolName === "propose_plan" - ? "Call propose_plan exactly once now. Base it only on the planning work already completed in this workspace." - : "Call agent_report exactly once now with your final report. Base it only on the work already completed in this workspace."; + const completionInstruction = getTaskCompletionInstruction({ + completionToolName, + subagentFileReports: options?.subagentFileReports === true, + }); const noExtraWorkInstruction = completionToolName === "propose_plan" ? "Do not continue planning or call other tools." @@ -3054,11 +3216,15 @@ export class TaskService { const startedAt = Date.now(); const sendResult = await this.workspaceService.sendMessage( workspaceId, - this.buildCompletionToolRecoveryMessage(completionToolName, options), + this.buildCompletionToolRecoveryMessage(completionToolName, { + ...options, + subagentFileReports: entry.workspace.taskExperiments?.subagentFileReports === true, + }), { model, agentId, thinkingLevel: entry.workspace.taskThinkingLevel, + experiments: entry.workspace.taskExperiments, toolPolicy: [{ regex_match: `^${completionToolName}$`, action: "require" }], }, { synthetic: true, agentInitiated: true } @@ -3309,7 +3475,7 @@ export class TaskService { private async settleInterruptedTaskAtStreamEnd( workspaceId: string, entry: { projectPath: string; workspace: WorkspaceConfigEntry }, - reportArgs: { reportMarkdown: string; title?: string } | null + reportArgs: { reportMarkdown: string; title?: string; structuredOutput?: unknown } | null ): Promise { if (reportArgs) { await this.finalizeAgentTaskReport(workspaceId, entry, reportArgs); @@ -3773,7 +3939,13 @@ export class TaskService { params.parentWorkspaceId, sibling.taskId, findWorkspaceEntry(cfg, sibling.taskId), - { reportMarkdown: artifact.reportMarkdown, title: artifact.title } + { + reportMarkdown: artifact.reportMarkdown, + ...(artifact.title !== undefined ? { title: artifact.title } : {}), + ...(artifact.structuredOutput !== undefined + ? { structuredOutput: artifact.structuredOutput } + : {}), + } ); for (const taskId of siblingCleanupTaskIds) { cleanupTaskIds.add(taskId); @@ -3846,7 +4018,7 @@ export class TaskService { private async finalizeAgentTaskReport( childWorkspaceId: string, childEntry: { projectPath: string; workspace: WorkspaceConfigEntry } | null | undefined, - reportArgs: { reportMarkdown: string; title?: string } + reportArgs: { reportMarkdown: string; title?: string; structuredOutput?: unknown } ): Promise { this.markTaskForegroundRelevant(childWorkspaceId); @@ -3923,6 +4095,7 @@ export class TaskService { model: latestChildEntry?.workspace.taskModelString, thinkingLevel: latestChildEntry?.workspace.taskThinkingLevel, title: reportArgs.title, + structuredOutput: reportArgs.structuredOutput, nowMs: persistedAtMs, }); } catch (error: unknown) { @@ -4004,7 +4177,7 @@ export class TaskService { ); const sendResult = await this.workspaceService.sendMessage( parentWorkspaceId, - "Your background sub-agent task(s) have completed. Use task_await to retrieve their reports and integrate the results.", + COMPLETED_BACKGROUND_SUBAGENT_HANDOFF_PROMPT, { model: resumeOptions.model, agentId: resumeOptions.agentId, @@ -4032,7 +4205,7 @@ export class TaskService { private resolveWaiters( taskId: string, - report: { reportMarkdown: string; title?: string } + report: { reportMarkdown: string; title?: string; structuredOutput?: unknown } ): boolean { this.markTaskForegroundRelevant(taskId); @@ -4043,6 +4216,7 @@ export class TaskService { this.completedReportsByTaskId.set(taskId, { reportMarkdown: report.reportMarkdown, title: report.title, + structuredOutput: report.structuredOutput, ancestorWorkspaceIds, }); this.enforceCompletedReportCacheLimit(); @@ -4125,18 +4299,34 @@ export class TaskService { private findAgentReportArgsInParts( parts: readonly unknown[] - ): { reportMarkdown: string; title?: string } | null { + ): { reportMarkdown: string; title?: string; structuredOutput?: unknown } | null { for (let i = parts.length - 1; i >= 0; i--) { const part = parts[i]; if (!isDynamicToolPart(part)) continue; if (part.toolName !== "agent_report") continue; if (part.state !== "output-available") continue; if (!isSuccessfulToolResult(part.output)) continue; - const parsed = AgentReportToolArgsSchema.safeParse(part.input); + const outputReport = AgentReportSubmittedReportSchema.safeParse( + typeof part.output === "object" && part.output !== null && "report" in part.output + ? (part.output as { report?: unknown }).report + : undefined + ); + if (outputReport.success) { + return outputReport.data; + } + + const parsed = AgentReportInlineToolArgsSchema.safeParse(part.input); if (!parsed.success) continue; // Normalize null → undefined at the schema boundary so downstream // code that expects `title?: string` doesn't need to handle null. - return { reportMarkdown: parsed.data.reportMarkdown, title: parsed.data.title ?? undefined }; + const report: { reportMarkdown: string; title?: string; structuredOutput?: unknown } = { + reportMarkdown: parsed.data.reportMarkdown, + title: parsed.data.title ?? undefined, + }; + if (Object.prototype.hasOwnProperty.call(parsed.data, "structuredOutput")) { + report.structuredOutput = parsed.data.structuredOutput; + } + return report; } return null; } @@ -4226,6 +4416,7 @@ export class TaskService { const reports: Array<{ taskId: string; reportMarkdown: string; + structuredOutput?: unknown; title?: string; agentId?: string; agentType?: string; @@ -4243,6 +4434,7 @@ export class TaskService { taskId: sibling.taskId, reportMarkdown: artifact.reportMarkdown, title: artifact.title, + structuredOutput: artifact.structuredOutput, agentId: sibling.agentId, agentType: sibling.agentType, groupKind: sibling.kind, @@ -4376,7 +4568,7 @@ export class TaskService { parentWorkspaceId: string, childWorkspaceId: string, childEntry: { projectPath: string; workspace: WorkspaceConfigEntry } | null | undefined, - report: { reportMarkdown: string; title?: string } + report: { reportMarkdown: string; title?: string; structuredOutput?: unknown } ): Promise { assert( childWorkspaceId.length > 0, @@ -4412,15 +4604,16 @@ export class TaskService { parentWorkspaceId: string, childWorkspaceId: string, childEntry: { projectPath: string; workspace: WorkspaceConfigEntry } | null | undefined, - report: { reportMarkdown: string; title?: string } + report: { reportMarkdown: string; title?: string; structuredOutput?: unknown } ): Promise { - const agentType = childEntry?.workspace.agentType ?? "agent"; + const agentType = coerceNonEmptyString(childEntry?.workspace.agentType) ?? "agent"; const output = { status: "completed" as const, taskId: childWorkspaceId, reportMarkdown: report.reportMarkdown, title: report.title, + structuredOutput: report.structuredOutput, agentType, }; const parsedOutput = TaskToolResultSchema.safeParse(output); @@ -4476,20 +4669,22 @@ export class TaskService { // Background tasks: append a synthetic user message containing the report so earlier history // remains immutable (append-only) and prompt caches can still reuse the prefix. - const titlePrefix = report.title ?? `Subagent (${agentType}) report`; - const xml = [ - "", - `${childWorkspaceId}`, - `${agentType}`, - `${titlePrefix}`, - "", - report.reportMarkdown, - "", - "", - ].join("\n"); + const titlePrefix = + typeof report.title === "string" && report.title.trim().length > 0 + ? report.title + : `Subagent (${agentType}) report`; + const reportContent = formatSubagentReportUserMessage({ + childWorkspaceId, + agentType, + title: titlePrefix, + reportMarkdown: report.reportMarkdown, + ...(report.structuredOutput !== undefined + ? { structuredOutput: report.structuredOutput } + : {}), + }); const messageId = createTaskReportMessageId(); - const reportMessage = createMuxMessage(messageId, "user", xml, { + const reportMessage = createMuxMessage(messageId, "user", reportContent, { timestamp: Date.now(), synthetic: true, }); diff --git a/src/node/services/tools/agent_report.test.ts b/src/node/services/tools/agent_report.test.ts index 7b4d98388b..bdaf005f25 100644 --- a/src/node/services/tools/agent_report.test.ts +++ b/src/node/services/tools/agent_report.test.ts @@ -1,4 +1,6 @@ import { describe, it, expect, mock } from "bun:test"; +import * as fs from "node:fs/promises"; +import * as path from "node:path"; import type { ToolExecutionOptions } from "ai"; import { createAgentReportTool } from "./agent_report"; @@ -36,6 +38,196 @@ describe("agent_report tool", () => { } }); + it("exposes workflow output schema directly in inline agent_report input", () => { + using tempDir = new TestTempDir("test-agent-report-tool-schema"); + const outputSchema = { + type: "object", + required: ["claims"], + properties: { claims: { type: "array", items: { type: "string" } } }, + additionalProperties: false, + }; + const tool = createAgentReportTool({ + ...createTestToolConfig(tempDir.path, { workspaceId: "task-workspace" }), + taskService: { + hasActiveDescendantAgentTasksForWorkspace: mock(() => false), + } as unknown as TaskService, + workflowAgentOutputSchema: outputSchema, + }); + + const inputSchema = tool.inputSchema as { jsonSchema?: unknown }; + expect(inputSchema.jsonSchema).toEqual({ + type: "object", + properties: { + reportMarkdown: { type: "string", minLength: 1 }, + structuredOutput: outputSchema, + title: { anyOf: [{ type: "string" }, { type: "null" }] }, + }, + required: ["reportMarkdown", "structuredOutput", "title"], + additionalProperties: false, + }); + }); + + it("returns validation failure without finalizing when structured output does not match workflow schema", async () => { + using tempDir = new TestTempDir("test-agent-report-tool-structured-invalid"); + const baseConfig = createTestToolConfig(tempDir.path, { + workspaceId: "task-workspace", + }); + + const taskService = { + hasActiveDescendantAgentTasksForWorkspace: mock(() => false), + } as unknown as TaskService; + + const tool = createAgentReportTool({ + ...baseConfig, + taskService, + workflowAgentOutputSchema: { + type: "object", + required: ["claims"], + properties: { claims: { type: "array", items: { type: "string" } } }, + additionalProperties: false, + }, + }); + + const result: unknown = await Promise.resolve( + tool.execute!( + { reportMarkdown: "done", structuredOutput: { claims: [1] } }, + mockToolCallOptions + ) + ); + + expect(result).toEqual({ + success: false, + message: "Structured output failed schema validation.", + errors: [{ path: "$.claims[0]", message: "Expected string, got number" }], + }); + }); + + it("returns success when structured output satisfies workflow schema", async () => { + using tempDir = new TestTempDir("test-agent-report-tool-structured-ok"); + const baseConfig = createTestToolConfig(tempDir.path, { + workspaceId: "task-workspace", + }); + + const taskService = { + hasActiveDescendantAgentTasksForWorkspace: mock(() => false), + } as unknown as TaskService; + + const tool = createAgentReportTool({ + ...baseConfig, + taskService, + workflowAgentOutputSchema: { + type: "object", + required: ["claims"], + properties: { claims: { type: "array", items: { type: "string" } } }, + additionalProperties: false, + }, + }); + + const result: unknown = await Promise.resolve( + tool.execute!( + { reportMarkdown: "done", structuredOutput: { claims: ["a"] } }, + mockToolCallOptions + ) + ); + + expect(result).toEqual({ + success: true, + message: "Report submitted successfully.", + }); + }); + + it("submits a subagent file-backed report from report.md and structured-output.json", async () => { + using tempDir = new TestTempDir("test-agent-report-tool-file-backed"); + await fs.writeFile(path.join(tempDir.path, "report.md"), "# Done\n\nFindings.", "utf-8"); + await fs.writeFile( + path.join(tempDir.path, "structured-output.json"), + JSON.stringify({ claims: ["durable"] }), + "utf-8" + ); + const taskService = { + hasActiveDescendantAgentTasksForWorkspace: mock(() => false), + } as unknown as TaskService; + const tool = createAgentReportTool({ + ...createTestToolConfig(tempDir.path, { workspaceId: "task-workspace" }), + taskService, + subagentReportFiles: true, + workflowAgentOutputSchema: { + type: "object", + required: ["claims"], + properties: { claims: { type: "array", items: { type: "string" } } }, + additionalProperties: false, + }, + }); + + const inputSchema = tool.inputSchema as { jsonSchema?: unknown }; + expect(inputSchema.jsonSchema).toEqual( + expect.objectContaining({ + required: ["reportMarkdownPath", "structuredOutputPath", "title"], + }) + ); + + const result: unknown = await Promise.resolve(tool.execute!(undefined, mockToolCallOptions)); + + expect(result).toEqual({ + success: true, + message: "Report submitted successfully.", + report: { + reportMarkdown: "# Done\n\nFindings.", + structuredOutput: { claims: ["durable"] }, + }, + }); + }); + + it("submits a subagent file-backed markdown report with empty arguments", async () => { + using tempDir = new TestTempDir("test-agent-report-tool-file-backed-empty-args"); + await fs.writeFile(path.join(tempDir.path, "report.md"), "# Done", "utf-8"); + const tool = createAgentReportTool({ + ...createTestToolConfig(tempDir.path, { workspaceId: "task-workspace" }), + taskService: { + hasActiveDescendantAgentTasksForWorkspace: mock(() => false), + } as unknown as TaskService, + subagentReportFiles: true, + }); + + const result: unknown = await Promise.resolve(tool.execute!({}, mockToolCallOptions)); + + expect(result).toEqual({ + success: true, + message: "Report submitted successfully.", + report: { reportMarkdown: "# Done" }, + }); + }); + + it("rejects file-backed structured output that fails workflow schema validation", async () => { + using tempDir = new TestTempDir("test-agent-report-tool-file-backed-invalid"); + await fs.writeFile(path.join(tempDir.path, "report.md"), "done", "utf-8"); + await fs.writeFile( + path.join(tempDir.path, "structured-output.json"), + '{"claims":[1]}', + "utf-8" + ); + const tool = createAgentReportTool({ + ...createTestToolConfig(tempDir.path, { workspaceId: "task-workspace" }), + taskService: { + hasActiveDescendantAgentTasksForWorkspace: mock(() => false), + } as unknown as TaskService, + subagentReportFiles: true, + workflowAgentOutputSchema: { + type: "object", + required: ["claims"], + properties: { claims: { type: "array", items: { type: "string" } } }, + }, + }); + + const result: unknown = await Promise.resolve(tool.execute!({}, mockToolCallOptions)); + + expect(result).toEqual({ + success: false, + message: "Structured output failed schema validation.", + errors: [{ path: "$.claims[0]", message: "Expected string, got number" }], + }); + }); + it("returns success when the task has no active descendants", async () => { using tempDir = new TestTempDir("test-agent-report-tool-ok"); const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "task-workspace" }); diff --git a/src/node/services/tools/agent_report.ts b/src/node/services/tools/agent_report.ts index 99f949a1cb..0b8e309343 100644 --- a/src/node/services/tools/agent_report.ts +++ b/src/node/services/tools/agent_report.ts @@ -1,15 +1,321 @@ -import { tool } from "ai"; +import { jsonSchema, tool } from "ai"; +import type { JSONSchema7 } from "@ai-sdk/provider"; +import { getErrorMessage } from "@/common/utils/errors"; +import { + validateJsonSchemaSubset, + validateJsonSchemaSubsetSchema, + type JsonSchemaValidationError, +} from "@/common/utils/jsonSchemaSubset"; import type { ToolConfiguration, ToolFactory } from "@/common/utils/tools/tools"; -import { TOOL_DEFINITIONS } from "@/common/utils/tools/toolDefinitions"; +import { + AgentReportFileToolArgsSchema, + AgentReportInlineToolArgsSchema, + TOOL_DEFINITIONS, +} from "@/common/utils/tools/toolDefinitions"; +import { RuntimeError } from "@/node/runtime/Runtime"; +import { readFileString } from "@/node/utils/runtime/helpers"; +import { validateFileSize, validatePathInCwd } from "./fileCommon"; import { requireTaskService, requireWorkspaceId } from "./toolUtils"; +const DEFAULT_REPORT_MARKDOWN_PATH = "report.md"; +const DEFAULT_STRUCTURED_OUTPUT_PATH = "structured-output.json"; + +const REPORT_MARKDOWN_MAX_BYTES = 256 * 1024; +const STRUCTURED_OUTPUT_MAX_BYTES = 64 * 1024; + +interface AgentReportSuccessResult { + success: true; + message: string; + report?: { + reportMarkdown: string; + title?: string; + structuredOutput?: unknown; + }; +} + +interface AgentReportFailureResult { + success: false; + message: string; + errors: JsonSchemaValidationError[]; +} + +type AgentReportResult = AgentReportSuccessResult | AgentReportFailureResult; + +function validationFailure( + message: string, + errors: JsonSchemaValidationError[] +): AgentReportFailureResult { + return { success: false, message, errors }; +} + +function zodValidationFailure( + message: string, + error: { issues: Array<{ path: unknown[]; message: string }> } +) { + return validationFailure( + message, + error.issues.map((issue) => ({ + path: issue.path.length > 0 ? `$.${issue.path.join(".")}` : "$", + message: issue.message, + })) + ); +} + +function validateStructuredOutput(config: ToolConfiguration, structuredOutput: unknown) { + if (config.workflowAgentOutputSchema == null) { + return null; + } + + const validation = validateJsonSchemaSubset(config.workflowAgentOutputSchema, structuredOutput); + return validation.success + ? null + : validationFailure("Structured output failed schema validation.", validation.errors); +} + +function buildInlineInputSchema(config: ToolConfiguration) { + const outputSchema = config.workflowAgentOutputSchema; + if (outputSchema == null || !validateJsonSchemaSubsetSchema(outputSchema).success) { + return AgentReportInlineToolArgsSchema; + } + + return jsonSchema( + { + type: "object", + properties: { + reportMarkdown: { type: "string", minLength: 1 }, + structuredOutput: outputSchema as JSONSchema7, + title: { anyOf: [{ type: "string" }, { type: "null" }] }, + }, + required: ["reportMarkdown", "structuredOutput", "title"], + additionalProperties: false, + } satisfies JSONSchema7, + { + validate: (value) => { + const parsed = AgentReportInlineToolArgsSchema.safeParse(value); + if (!parsed.success) { + return { success: false, error: parsed.error }; + } + const validation = validateStructuredOutput(config, parsed.data.structuredOutput); + if (validation) { + return { success: false, error: new Error(validation.message) }; + } + return { success: true, value: parsed.data }; + }, + } + ); +} + +function buildFileInputSchema() { + return jsonSchema( + { + type: "object", + properties: { + reportMarkdownPath: { + anyOf: [{ type: "string", minLength: 1 }, { type: "null" }], + description: + "Optional path to the markdown report file. Pass null or omit to submit report.md from the workspace root.", + }, + structuredOutputPath: { + anyOf: [{ type: "string", minLength: 1 }, { type: "null" }], + description: + "Optional path to structured output JSON. Pass null or omit to submit structured-output.json when this task requires structured output.", + }, + title: { anyOf: [{ type: "string" }, { type: "null" }] }, + }, + required: ["reportMarkdownPath", "structuredOutputPath", "title"], + additionalProperties: false, + } satisfies JSONSchema7, + { + validate: (value) => { + const parsed = AgentReportFileToolArgsSchema.safeParse(value ?? {}); + return parsed.success + ? { success: true, value: parsed.data } + : { success: false, error: parsed.error }; + }, + } + ); +} + +function getAgentReportInputSchema(config: ToolConfiguration) { + return config.subagentReportFiles ? buildFileInputSchema() : buildInlineInputSchema(config); +} + +function getAgentReportDescription(config: ToolConfiguration): string { + if (!config.subagentReportFiles) { + return TOOL_DEFINITIONS.agent_report.description; + } + + return ( + TOOL_DEFINITIONS.agent_report.description + + "\n\nSubagent file-backed report mode is enabled for this task. " + + "Write the final human-readable report to `report.md` in the workspace root. " + + (config.workflowAgentOutputSchema != null + ? "Write the required structured output as valid JSON to `structured-output.json`. " + : "") + + "Then call agent_report with reportMarkdownPath, structuredOutputPath, and title all set to null so Mux uses the default files. " + + "Only pass non-null file path arguments if you intentionally used non-default filenames." + ); +} + +async function readReportFile(params: { + config: ToolConfiguration; + filePath: string; + fieldPath: string; + maxBytes: number; +}): Promise<{ success: true; content: string } | AgentReportFailureResult> { + const { config, filePath, fieldPath, maxBytes } = params; + const pathValidation = validatePathInCwd(filePath, config.cwd, config.runtime, [ + config.runtimeTempDir, + ]); + if (pathValidation) { + return validationFailure("Report file submission failed.", [ + { path: fieldPath, message: pathValidation.error }, + ]); + } + + const resolvedPath = config.runtime.normalizePath(filePath, config.cwd); + let fileStat; + try { + fileStat = await config.runtime.stat(resolvedPath); + } catch (error) { + const message = error instanceof RuntimeError ? error.message : getErrorMessage(error); + return validationFailure("Report file submission failed.", [{ path: fieldPath, message }]); + } + + if (fileStat.isDirectory) { + return validationFailure("Report file submission failed.", [ + { path: fieldPath, message: `Path is a directory, not a file: ${resolvedPath}` }, + ]); + } + + const sizeValidation = validateFileSize(fileStat); + if (sizeValidation) { + return validationFailure("Report file submission failed.", [ + { path: fieldPath, message: sizeValidation.error }, + ]); + } + if (fileStat.size > maxBytes) { + return validationFailure("Report file submission failed.", [ + { + path: fieldPath, + message: `File is too large (${fileStat.size} bytes). Maximum allowed is ${maxBytes} bytes.`, + }, + ]); + } + + try { + const content = await readFileString(config.runtime, resolvedPath); + if (Buffer.byteLength(content, "utf-8") > maxBytes) { + return validationFailure("Report file submission failed.", [ + { + path: fieldPath, + message: `File is too large after decoding. Maximum allowed is ${maxBytes} bytes.`, + }, + ]); + } + return { success: true, content }; + } catch (error) { + const message = error instanceof RuntimeError ? error.message : getErrorMessage(error); + return validationFailure("Report file submission failed.", [{ path: fieldPath, message }]); + } +} + +async function executeFileBackedReport( + config: ToolConfiguration, + rawArgs: unknown +): Promise { + const parsed = AgentReportFileToolArgsSchema.safeParse(rawArgs ?? {}); + if (!parsed.success) { + return zodValidationFailure("Report file arguments failed validation.", parsed.error); + } + + const reportMarkdownPath = parsed.data.reportMarkdownPath ?? DEFAULT_REPORT_MARKDOWN_PATH; + const structuredOutputPath = + parsed.data.structuredOutputPath ?? + (config.workflowAgentOutputSchema != null ? DEFAULT_STRUCTURED_OUTPUT_PATH : undefined); + + const markdown = await readReportFile({ + config, + filePath: reportMarkdownPath, + fieldPath: "$.reportMarkdownPath", + maxBytes: REPORT_MARKDOWN_MAX_BYTES, + }); + if (!markdown.success) { + return markdown; + } + if (markdown.content.trim().length === 0) { + return validationFailure("Report file submission failed.", [ + { path: "$.reportMarkdownPath", message: "Report markdown must not be empty" }, + ]); + } + + let structuredOutput: unknown; + if (structuredOutputPath != null) { + const structuredOutputFile = await readReportFile({ + config, + filePath: structuredOutputPath, + fieldPath: "$.structuredOutputPath", + maxBytes: STRUCTURED_OUTPUT_MAX_BYTES, + }); + if (!structuredOutputFile.success) { + return structuredOutputFile; + } + try { + structuredOutput = JSON.parse(structuredOutputFile.content) as unknown; + } catch (error) { + return validationFailure("Structured output JSON failed parsing.", [ + { path: "$.structuredOutputPath", message: getErrorMessage(error) }, + ]); + } + } else if (config.workflowAgentOutputSchema != null) { + return validationFailure("Structured output file is required.", [ + { path: "$.structuredOutputPath", message: "Required property is missing" }, + ]); + } + + const structuredValidation = validateStructuredOutput(config, structuredOutput); + if (structuredValidation) { + return structuredValidation; + } + + const title = parsed.data.title?.trim(); + return { + success: true, + message: "Report submitted successfully.", + report: { + reportMarkdown: markdown.content, + ...(title ? { title } : {}), + ...(structuredOutput !== undefined ? { structuredOutput } : {}), + }, + }; +} + +function executeInlineReport(config: ToolConfiguration, rawArgs: unknown): AgentReportResult { + const parsed = AgentReportInlineToolArgsSchema.safeParse(rawArgs); + if (!parsed.success) { + return zodValidationFailure("Report arguments failed validation.", parsed.error); + } + + const structuredValidation = validateStructuredOutput(config, parsed.data.structuredOutput); + if (structuredValidation) { + return structuredValidation; + } + + // Intentionally no report payload on success. The backend orchestrator consumes inline + // tool-call args from persisted history once the tool call completes successfully. + return { + success: true, + message: "Report submitted successfully.", + }; +} + export const createAgentReportTool: ToolFactory = (config: ToolConfiguration) => { return tool({ - description: TOOL_DEFINITIONS.agent_report.description, - inputSchema: TOOL_DEFINITIONS.agent_report.schema, - execute: (): { success: true; message: string } => { + description: getAgentReportDescription(config), + inputSchema: getAgentReportInputSchema(config), + execute: async (args: unknown): Promise => { const workspaceId = requireWorkspaceId(config, "agent_report"); const taskService = requireTaskService(config, "agent_report"); @@ -20,14 +326,11 @@ export const createAgentReportTool: ToolFactory = (config: ToolConfiguration) => ); } - // Intentionally no side-effects. The backend orchestrator consumes the tool-call args - // via persisted history/partial state once the tool call completes successfully. - // The stream continues after this so the SDK can record usage, while StreamManager - // stops autonomous loops once it observes agent_report with output.success === true. - return { - success: true, - message: "Report submitted successfully.", - }; + if (config.subagentReportFiles) { + return await executeFileBackedReport(config, args); + } + + return executeInlineReport(config, args); }, }); }; diff --git a/src/node/services/tools/task.ts b/src/node/services/tools/task.ts index 18feff807b..7da730fa63 100644 --- a/src/node/services/tools/task.ts +++ b/src/node/services/tools/task.ts @@ -78,6 +78,7 @@ interface PendingTaskInfo { interface CompletedTaskInfo { taskId: string; reportMarkdown: string; + structuredOutput?: unknown; title?: string; agentId: string; agentType: string; @@ -126,6 +127,7 @@ function serializeCompletedReport(report: CompletedTaskInfo) { return { taskId: report.taskId, reportMarkdown: report.reportMarkdown, + structuredOutput: report.structuredOutput, title: report.title, agentId: report.agentId, agentType: report.agentType, @@ -210,6 +212,7 @@ function buildCompletedTaskResult(params: { status: "completed", taskId: report.taskId, reportMarkdown: report.reportMarkdown, + structuredOutput: report.structuredOutput, title: report.title, agentId: report.agentId, agentType: report.agentType, @@ -397,6 +400,7 @@ export const createTaskTool: ToolFactory = (config: ToolConfiguration) => { report: { taskId: createdTask.taskId, reportMarkdown: report.reportMarkdown, + structuredOutput: report.structuredOutput, title: report.title, agentId: requestedAgentId, agentType: requestedAgentId, diff --git a/src/node/services/tools/task_await.ts b/src/node/services/tools/task_await.ts index 5715f24c43..0b4bf67e8d 100644 --- a/src/node/services/tools/task_await.ts +++ b/src/node/services/tools/task_await.ts @@ -283,6 +283,7 @@ export const createTaskAwaitTool: ToolFactory = (config: ToolConfiguration) => { status: "completed" as const, taskId, reportMarkdown: report.reportMarkdown, + structuredOutput: report.structuredOutput, title: report.title, ...getAgentTaskElapsedField(taskId), ...(gitFormatPatch ? { artifacts: { gitFormatPatch } } : {}), @@ -309,6 +310,7 @@ export const createTaskAwaitTool: ToolFactory = (config: ToolConfiguration) => { status: "completed" as const, taskId, reportMarkdown: report.reportMarkdown, + structuredOutput: report.structuredOutput, title: report.title, ...getAgentTaskElapsedField(taskId), ...(gitFormatPatch ? { artifacts: { gitFormatPatch } } : {}), diff --git a/src/node/services/tools/workflow_definitions.test.ts b/src/node/services/tools/workflow_definitions.test.ts new file mode 100644 index 0000000000..55a0852cd9 --- /dev/null +++ b/src/node/services/tools/workflow_definitions.test.ts @@ -0,0 +1,120 @@ +/* eslint-disable @typescript-eslint/no-unsafe-assignment, @typescript-eslint/require-await */ +import { describe, expect, mock, test } from "bun:test"; +import type { ToolExecutionOptions } from "ai"; +import { + createWorkflowListTool, + createWorkflowReadTool, + createWorkflowWriteTool, +} from "./workflow_definitions"; +import { TestTempDir, createTestToolConfig } from "./testHelpers"; + +const mockToolCallOptions: ToolExecutionOptions = { + toolCallId: "test-call-id", + messages: [], +}; + +const descriptor = { + name: "deep-research", + description: "Deep research", + scope: "built-in" as const, + executable: true, +}; + +describe("workflow definition tools", () => { + test("lists available workflows through WorkflowService", async () => { + using tempDir = new TestTempDir("test-workflow-list-tool"); + const listDefinitions = mock(async () => [descriptor]); + const tool = createWorkflowListTool({ + ...createTestToolConfig(tempDir.path, { workspaceId: "workspace-1" }), + trusted: true, + workflowService: { + listDefinitions, + readDefinition: mock(async () => ({ + descriptor, + source: "export default function workflow() { return null; }", + })), + startNamedWorkflow: mock(async () => ({ + runId: "wfr_1", + status: "completed" as const, + result: null, + })), + }, + }); + + const result = await tool.execute!({}, mockToolCallOptions); + + expect(listDefinitions).toHaveBeenCalledWith({ projectTrusted: true }); + expect(result).toEqual({ workflows: [descriptor] }); + }); + + test("reads a workflow source through WorkflowService", async () => { + using tempDir = new TestTempDir("test-workflow-read-tool"); + const readDefinition = mock(async () => ({ + descriptor, + source: "export default function workflow() { return null; }", + })); + const tool = createWorkflowReadTool({ + ...createTestToolConfig(tempDir.path, { workspaceId: "workspace-1" }), + trusted: false, + workflowService: { + listDefinitions: mock(async () => []), + readDefinition, + startNamedWorkflow: mock(async () => ({ + runId: "wfr_1", + status: "completed" as const, + result: null, + })), + }, + }); + + const result = await tool.execute!({ name: "deep-research" }, mockToolCallOptions); + + expect(readDefinition).toHaveBeenCalledWith({ name: "deep-research", projectTrusted: false }); + expect(result).toEqual({ + descriptor, + source: "export default function workflow() { return null; }", + }); + }); + + test("writes session-scoped scratch workflows without promoting them", async () => { + using tempDir = new TestTempDir("test-workflow-write-tool"); + const scratchDescriptor = { + name: "scratch-flow", + description: "Scratch flow", + scope: "scratch" as const, + sourcePath: `${tempDir.path}/workflows/scratch-flow.js`, + executable: true, + }; + const writeScratchWorkflow = mock(async () => scratchDescriptor); + const tool = createWorkflowWriteTool({ + ...createTestToolConfig(tempDir.path, { workspaceId: "workspace-1" }), + workflowService: { + listDefinitions: mock(async () => []), + readDefinition: mock(async () => ({ + descriptor: scratchDescriptor, + source: "export default function workflow() { return null; }", + })), + writeScratchWorkflow, + startNamedWorkflow: mock(async () => ({ + runId: "wfr_1", + status: "completed" as const, + result: null, + })), + }, + }); + + const source = "export default function workflow() { return { reportMarkdown: 'ok' }; }"; + const result = await tool.execute!( + { name: "scratch-flow", description: "Scratch flow", source }, + mockToolCallOptions + ); + + expect(writeScratchWorkflow).toHaveBeenCalledWith({ + workspaceId: "workspace-1", + name: "scratch-flow", + description: "Scratch flow", + source, + }); + expect(result).toEqual({ descriptor: scratchDescriptor }); + }); +}); diff --git a/src/node/services/tools/workflow_definitions.ts b/src/node/services/tools/workflow_definitions.ts new file mode 100644 index 0000000000..bb2404966c --- /dev/null +++ b/src/node/services/tools/workflow_definitions.ts @@ -0,0 +1,71 @@ +import { tool } from "ai"; + +import type { ToolConfiguration, ToolFactory } from "@/common/utils/tools/tools"; +import { + TOOL_DEFINITIONS, + WorkflowListToolResultSchema, + WorkflowReadToolResultSchema, + WorkflowWriteToolResultSchema, +} from "@/common/utils/tools/toolDefinitions"; +import { parseToolResult, requireWorkspaceId } from "./toolUtils"; + +function requireWorkflowService(config: ToolConfiguration, toolName: string) { + if (!config.workflowService) { + throw new Error(`${toolName} requires workflowService`); + } + return config.workflowService; +} + +export const createWorkflowListTool: ToolFactory = (config: ToolConfiguration) => { + return tool({ + description: TOOL_DEFINITIONS.workflow_list.description, + inputSchema: TOOL_DEFINITIONS.workflow_list.schema, + execute: async (): Promise => { + const workflowService = requireWorkflowService(config, "workflow_list"); + const workflows = await workflowService.listDefinitions({ + projectTrusted: config.trusted === true, + }); + + return parseToolResult(WorkflowListToolResultSchema, { workflows }, "workflow_list"); + }, + }); +}; + +export const createWorkflowReadTool: ToolFactory = (config: ToolConfiguration) => { + return tool({ + description: TOOL_DEFINITIONS.workflow_read.description, + inputSchema: TOOL_DEFINITIONS.workflow_read.schema, + execute: async (args): Promise => { + const workflowService = requireWorkflowService(config, "workflow_read"); + const result = await workflowService.readDefinition({ + name: args.name, + projectTrusted: config.trusted === true, + }); + + return parseToolResult(WorkflowReadToolResultSchema, result, "workflow_read"); + }, + }); +}; + +export const createWorkflowWriteTool: ToolFactory = (config: ToolConfiguration) => { + return tool({ + description: TOOL_DEFINITIONS.workflow_write.description, + inputSchema: TOOL_DEFINITIONS.workflow_write.schema, + execute: async (args): Promise => { + const workspaceId = requireWorkspaceId(config, "workflow_write"); + const workflowService = requireWorkflowService(config, "workflow_write"); + if (workflowService.writeScratchWorkflow == null) { + throw new Error("workflow_write requires writeScratchWorkflow"); + } + + const descriptor = await workflowService.writeScratchWorkflow({ + workspaceId, + name: args.name, + description: args.description, + source: args.source, + }); + + return parseToolResult(WorkflowWriteToolResultSchema, { descriptor }, "workflow_write"); + }, + }); +}; diff --git a/src/node/services/tools/workflow_run.test.ts b/src/node/services/tools/workflow_run.test.ts new file mode 100644 index 0000000000..95b50933c0 --- /dev/null +++ b/src/node/services/tools/workflow_run.test.ts @@ -0,0 +1,161 @@ +/* eslint-disable @typescript-eslint/await-thenable, @typescript-eslint/no-unsafe-assignment, @typescript-eslint/require-await */ +import { describe, expect, mock, test } from "bun:test"; +import type { ToolExecutionOptions } from "ai"; +import { createWorkflowRunTool } from "./workflow_run"; +import { TestTempDir, createTestToolConfig } from "./testHelpers"; + +const mockToolCallOptions: ToolExecutionOptions = { + toolCallId: "test-call-id", + messages: [], +}; + +describe("workflow_run tool", () => { + test("starts a named workflow through WorkflowService", async () => { + using tempDir = new TestTempDir("test-workflow-run-tool"); + const startNamedWorkflow = mock(async () => ({ + runId: "wfr_123", + status: "completed" as const, + result: { reportMarkdown: "done" }, + })); + const getRun = mock(async () => ({ + id: "wfr_123", + workspaceId: "workspace-1", + definition: { + name: "deep-research", + description: "Deep research", + scope: "built-in" as const, + executable: true, + }, + definitionSource: "export default function workflow() { return null; }", + definitionHash: "sha256:test", + args: { topic: "workflow tools" }, + status: "completed" as const, + createdAt: "2026-05-29T00:00:00.000Z", + updatedAt: "2026-05-29T00:00:01.000Z", + events: [ + { + sequence: 1, + type: "status" as const, + at: "2026-05-29T00:00:00.000Z", + status: "running" as const, + }, + { sequence: 2, type: "phase" as const, at: "2026-05-29T00:00:00.000Z", name: "scope" }, + { + sequence: 3, + type: "result" as const, + at: "2026-05-29T00:00:01.000Z", + result: { reportMarkdown: "done" }, + }, + { + sequence: 4, + type: "status" as const, + at: "2026-05-29T00:00:01.000Z", + status: "completed" as const, + }, + ], + steps: [], + })); + const abortController = new AbortController(); + const tool = createWorkflowRunTool({ + ...createTestToolConfig(tempDir.path, { workspaceId: "workspace-1" }), + trusted: true, + workflowService: { + listDefinitions: mock(async () => []), + readDefinition: mock(async () => ({ + descriptor: { + name: "deep-research", + description: "Deep research", + scope: "built-in", + executable: true, + }, + source: "export default function workflow() { return null; }", + })), + startNamedWorkflow, + getRun, + }, + }); + + const result = await tool.execute!( + { name: "deep-research", args: { topic: "workflow tools" }, run_in_background: false }, + { ...mockToolCallOptions, abortSignal: abortController.signal } + ); + + expect(startNamedWorkflow).toHaveBeenCalledWith({ + name: "deep-research", + workspaceId: "workspace-1", + projectTrusted: true, + args: { topic: "workflow tools" }, + abortSignal: abortController.signal, + }); + expect(getRun).toHaveBeenCalledWith({ workspaceId: "workspace-1", runId: "wfr_123" }); + expect(result).toEqual({ + status: "completed", + runId: "wfr_123", + result: { reportMarkdown: "done" }, + run: expect.objectContaining({ + id: "wfr_123", + status: "completed", + events: expect.arrayContaining([expect.objectContaining({ type: "phase", name: "scope" })]), + }), + }); + }); + + test("starts a workflow in background mode", async () => { + using tempDir = new TestTempDir("test-workflow-run-tool-background"); + const startNamedWorkflow = mock(async () => { + throw new Error("foreground start should not be used"); + }); + const startNamedWorkflowInBackground = mock(async () => ({ + runId: "wfr_background", + status: "running" as const, + result: null, + })); + const getRun = mock(async () => null); + const tool = createWorkflowRunTool({ + ...createTestToolConfig(tempDir.path, { workspaceId: "workspace-1" }), + trusted: false, + workflowService: { + listDefinitions: mock(async () => []), + readDefinition: mock(async () => ({ + descriptor: { + name: "deep-research", + description: "Deep research", + scope: "built-in", + executable: true, + }, + source: "export default function workflow() { return null; }", + })), + startNamedWorkflow, + startNamedWorkflowInBackground, + getRun, + }, + }); + + const result = await tool.execute!( + { name: "deep-research", args: { topic: "workflow tools" }, run_in_background: true }, + mockToolCallOptions + ); + + expect(startNamedWorkflowInBackground).toHaveBeenCalledWith({ + name: "deep-research", + workspaceId: "workspace-1", + projectTrusted: false, + args: { topic: "workflow tools" }, + }); + expect(startNamedWorkflow).not.toHaveBeenCalled(); + expect(result).toEqual({ status: "running", runId: "wfr_background", result: null }); + }); + + test("requires the workflow service", async () => { + using tempDir = new TestTempDir("test-workflow-run-tool-missing"); + const tool = createWorkflowRunTool({ + ...createTestToolConfig(tempDir.path, { workspaceId: "workspace-1" }), + }); + + await expect( + Promise.resolve( + tool.execute!({ name: "demo", args: {}, run_in_background: false }, mockToolCallOptions) + ) + ).rejects.toThrow(/workflowService/); + }); +}); diff --git a/src/node/services/tools/workflow_run.ts b/src/node/services/tools/workflow_run.ts new file mode 100644 index 0000000000..c96d4793d2 --- /dev/null +++ b/src/node/services/tools/workflow_run.ts @@ -0,0 +1,62 @@ +import { tool } from "ai"; + +import type { ToolConfiguration, ToolFactory } from "@/common/utils/tools/tools"; +import { + WorkflowRunToolResultSchema, + TOOL_DEFINITIONS, +} from "@/common/utils/tools/toolDefinitions"; +import { parseToolResult, requireWorkspaceId } from "./toolUtils"; + +function requireWorkflowService(config: ToolConfiguration) { + if (!config.workflowService) { + throw new Error("workflow_run requires workflowService"); + } + return config.workflowService; +} + +function requireBackgroundWorkflowStart( + workflowService: NonNullable +) { + if (workflowService.startNamedWorkflowInBackground == null) { + throw new Error("workflow_run background mode requires startNamedWorkflowInBackground"); + } + return workflowService.startNamedWorkflowInBackground.bind(workflowService); +} + +export const createWorkflowRunTool: ToolFactory = (config: ToolConfiguration) => { + return tool({ + description: TOOL_DEFINITIONS.workflow_run.description, + inputSchema: TOOL_DEFINITIONS.workflow_run.schema, + execute: async (args, options): Promise => { + const workspaceId = requireWorkspaceId(config, "workflow_run"); + const workflowService = requireWorkflowService(config); + + const startInput = { + name: args.name, + workspaceId, + projectTrusted: config.trusted === true, + args: args.args ?? {}, + }; + const result = + args.run_in_background === true + ? await requireBackgroundWorkflowStart(workflowService)(startInput) + : await workflowService.startNamedWorkflow({ + ...startInput, + ...(options.abortSignal != null ? { abortSignal: options.abortSignal } : {}), + }); + + const run = await workflowService.getRun?.({ workspaceId, runId: result.runId }); + + return parseToolResult( + WorkflowRunToolResultSchema, + { + status: result.status, + runId: result.runId, + result: result.result, + ...(run != null ? { run } : {}), + }, + "workflow_run" + ); + }, + }); +}; diff --git a/src/node/services/workflows/WorkflowDefinitionStore.test.ts b/src/node/services/workflows/WorkflowDefinitionStore.test.ts new file mode 100644 index 0000000000..49373e8084 --- /dev/null +++ b/src/node/services/workflows/WorkflowDefinitionStore.test.ts @@ -0,0 +1,237 @@ +import * as fs from "node:fs/promises"; +import * as path from "node:path"; + +import { describe, expect, test } from "bun:test"; +import { RUNTIME_MODE } from "@/common/types/runtime"; +import { DisposableTempDir } from "@/node/services/tempDir"; +import { TrueRemotePathMappedRuntime } from "@/node/services/tools/testHelpers"; +import { + shouldUseRuntimeWorkflowProjectIO, + WorkflowDefinitionStore, +} from "./WorkflowDefinitionStore"; + +async function writeWorkflow( + root: string, + name: string, + description: string, + body = "return args;" +) { + await fs.mkdir(root, { recursive: true }); + await fs.writeFile( + path.join(root, `${name}.js`), + `// description: ${description}\nexport default async function workflow({ args }) { ${body} }\n`, + "utf-8" + ); +} + +describe("WorkflowDefinitionStore", () => { + test("uses runtime project I/O only when workspace paths are runtime-owned", () => { + expect(shouldUseRuntimeWorkflowProjectIO(RUNTIME_MODE.LOCAL)).toBe(false); + expect(shouldUseRuntimeWorkflowProjectIO(RUNTIME_MODE.WORKTREE)).toBe(false); + expect(shouldUseRuntimeWorkflowProjectIO(RUNTIME_MODE.DEVCONTAINER)).toBe(false); + expect(shouldUseRuntimeWorkflowProjectIO(RUNTIME_MODE.SSH)).toBe(true); + expect(shouldUseRuntimeWorkflowProjectIO(RUNTIME_MODE.DOCKER)).toBe(true); + }); + + test("discovers workflows by project, global, then built-in precedence when trusted", async () => { + using tmp = new DisposableTempDir("workflow-definitions"); + const projectRoot = path.join(tmp.path, "project", ".mux", "workflows"); + const globalRoot = path.join(tmp.path, "mux-home", "workflows"); + await writeWorkflow(projectRoot, "demo", "Project demo"); + await writeWorkflow(globalRoot, "demo", "Global demo"); + await writeWorkflow(globalRoot, "global-only", "Global only"); + + const store = new WorkflowDefinitionStore({ + projectRoot, + globalRoot, + builtIns: [ + { name: "demo", description: "Built-in demo", source: "export default () => null;" }, + { + name: "deep-research", + description: "Built-in research", + source: "export default () => null;", + }, + ], + }); + + const definitions = await store.listDefinitions({ projectTrusted: true }); + + expect(definitions.map((definition) => [definition.name, definition.scope])).toEqual([ + ["deep-research", "built-in"], + ["demo", "project"], + ["global-only", "global"], + ]); + expect(definitions.find((definition) => definition.name === "demo")?.description).toBe( + "Project demo" + ); + }); + + test("omits project-local workflows when the project is not trusted", async () => { + using tmp = new DisposableTempDir("workflow-definitions"); + const projectRoot = path.join(tmp.path, "project", ".mux", "workflows"); + const globalRoot = path.join(tmp.path, "mux-home", "workflows"); + await writeWorkflow(projectRoot, "demo", "Project demo"); + await writeWorkflow(globalRoot, "demo", "Global demo"); + + const store = new WorkflowDefinitionStore({ projectRoot, globalRoot, builtIns: [] }); + + const definitions = await store.listDefinitions({ projectTrusted: false }); + + expect(definitions).toEqual([ + { + name: "demo", + description: "Global demo", + scope: "global", + sourcePath: path.join(globalRoot, "demo.js"), + executable: true, + }, + ]); + }); + + test("reads the selected reusable definition source and excludes scratch from discovery", async () => { + using tmp = new DisposableTempDir("workflow-definitions"); + const projectRoot = path.join(tmp.path, "project", ".mux", "workflows"); + const globalRoot = path.join(tmp.path, "mux-home", "workflows"); + await writeWorkflow(projectRoot, "demo", "Project demo", "return { project: true };"); + + const store = new WorkflowDefinitionStore({ + projectRoot, + globalRoot, + builtIns: [ + { + name: "scratch-example", + description: "Built-in fallback", + source: "export default () => null;", + }, + ], + }); + + const definition = await store.readDefinition("demo", { projectTrusted: true }); + const discovered = await store.listDefinitions({ projectTrusted: true }); + + expect(definition.source).toContain("project: true"); + expect(definition.descriptor.scope).toBe("project"); + expect(discovered.every((candidate) => candidate.scope !== "scratch")).toBe(true); + }); + + test("writes session-scoped scratch workflows and discovers them before reusable definitions", async () => { + using tmp = new DisposableTempDir("workflow-definitions"); + const scratchRoot = path.join(tmp.path, "session", "workflows"); + const projectRoot = path.join(tmp.path, "project", ".mux", "workflows"); + const globalRoot = path.join(tmp.path, "mux-home", "workflows"); + await writeWorkflow(globalRoot, "scratch-demo", "Global fallback"); + const store = new WorkflowDefinitionStore({ + scratchRoot, + projectRoot, + globalRoot, + builtIns: [ + { + name: "scratch-demo", + description: "Built-in fallback", + source: "export default () => null;", + }, + ], + }); + + const descriptor = await store.writeScratchDefinition({ + name: "scratch-demo", + description: "Session scratch demo", + source: "export default function workflow() { return { reportMarkdown: 'scratch' }; }", + }); + const definitions = await store.listDefinitions({ projectTrusted: false }); + const definition = await store.readDefinition("scratch-demo", { projectTrusted: false }); + + expect(descriptor).toMatchObject({ + name: "scratch-demo", + description: "Session scratch demo", + scope: "scratch", + sourcePath: path.join(scratchRoot, "scratch-demo.js"), + executable: true, + }); + expect(definitions).toEqual([descriptor]); + expect(definition.descriptor.scope).toBe("scratch"); + expect(definition.source).toContain("// description: Session scratch demo"); + const scratchSource = await fs.readFile(path.join(scratchRoot, "scratch-demo.js"), "utf-8"); + expect(scratchSource).toContain("reportMarkdown: 'scratch'"); + }); + + test("uses runtime I/O for project workflow discovery and promotion", async () => { + using tmp = new DisposableTempDir("workflow-definitions"); + const remoteBase = "/remote-workspaces"; + const workspacePath = path.posix.join(remoteBase, "project", "feature"); + const runtime = new TrueRemotePathMappedRuntime(tmp.path, remoteBase); + const projectRoot = runtime.normalizePath(".mux/workflows", workspacePath); + const localWorkflowRoot = path.join(tmp.path, "project", "feature", ".mux", "workflows"); + const globalRoot = path.join(tmp.path, "mux-home", "workflows"); + await writeWorkflow( + localWorkflowRoot, + "remote-demo", + "Remote project demo", + "return { remote: true };" + ); + + const store = new WorkflowDefinitionStore({ + projectRoot, + projectRuntime: runtime, + projectCwd: workspacePath, + globalRoot, + builtIns: [], + }); + + const definition = await store.readDefinition("remote-demo", { projectTrusted: true }); + const promoted = await store.promoteDefinition({ + name: "promoted-demo", + description: "Promoted over runtime", + source: "export default function workflow() { return { reportMarkdown: 'ok' }; }", + location: "project", + overwrite: false, + projectTrusted: true, + }); + + expect(definition.descriptor.sourcePath).toBe(`${projectRoot}/remote-demo.js`); + expect(definition.source).toContain("remote: true"); + expect(promoted.sourcePath).toBe(`${projectRoot}/promoted-demo.js`); + const promotedSource = await fs.readFile( + path.join(localWorkflowRoot, "promoted-demo.js"), + "utf-8" + ); + expect(promotedSource).toContain("// description: Promoted over runtime"); + + let duplicateError: unknown; + try { + await store.promoteDefinition({ + name: "promoted-demo", + description: "Duplicate", + source: "export default function workflow() { return null; }", + location: "project", + overwrite: false, + projectTrusted: true, + }); + } catch (error) { + duplicateError = error; + } + if (!(duplicateError instanceof Error)) { + throw new Error("Expected duplicate promotion to fail"); + } + expect(duplicateError.message).toMatch(/already exists/); + }); + + test("skips invalid filenames and unreadable descriptors", async () => { + using tmp = new DisposableTempDir("workflow-definitions"); + const projectRoot = path.join(tmp.path, "project", ".mux", "workflows"); + const globalRoot = path.join(tmp.path, "mux-home", "workflows"); + await writeWorkflow(projectRoot, "valid-name", "Valid workflow"); + await fs.writeFile(path.join(projectRoot, "BadName.js"), "// description: bad\n", "utf-8"); + await fs.writeFile( + path.join(projectRoot, "missing-description.js"), + "export default () => null;", + "utf-8" + ); + + const store = new WorkflowDefinitionStore({ projectRoot, globalRoot, builtIns: [] }); + + const definitions = await store.listDefinitions({ projectTrusted: true }); + + expect(definitions.map((definition) => definition.name)).toEqual(["valid-name"]); + }); +}); diff --git a/src/node/services/workflows/WorkflowDefinitionStore.ts b/src/node/services/workflows/WorkflowDefinitionStore.ts new file mode 100644 index 0000000000..3fffa4380d --- /dev/null +++ b/src/node/services/workflows/WorkflowDefinitionStore.ts @@ -0,0 +1,466 @@ +import * as fs from "node:fs/promises"; +import * as path from "node:path"; + +import { WorkflowDefinitionDescriptorSchema, WorkflowNameSchema } from "@/common/orpc/schemas"; +import { RUNTIME_MODE, type RuntimeMode } from "@/common/types/runtime"; +import type { WorkflowDefinitionDescriptor, WorkflowName } from "@/common/types/workflow"; +import assert from "@/common/utils/assert"; +import { getErrorMessage } from "@/common/utils/errors"; +import type { Runtime } from "@/node/runtime/Runtime"; +import { log } from "@/node/services/log"; +import { quoteRuntimeProbePath } from "@/node/services/tools/runtimePathShellQuote"; +import { execBuffered, readFileString, writeFileString } from "@/node/utils/runtime/helpers"; +import { + BUILT_IN_WORKFLOW_DEFINITIONS, + type BuiltInWorkflowDefinition, +} from "./builtInWorkflowDefinitions"; + +export interface WorkflowDefinitionStoreOptions { + projectRoot: string; + globalRoot: string; + scratchRoot?: string; + projectRuntime?: Runtime; + projectCwd?: string; + builtIns?: readonly BuiltInWorkflowDefinition[]; +} + +export function shouldUseRuntimeWorkflowProjectIO(runtimeType: RuntimeMode): boolean { + return runtimeType === RUNTIME_MODE.SSH || runtimeType === RUNTIME_MODE.DOCKER; +} + +export type WorkflowPromotionLocation = "project" | "global"; + +export interface PromoteWorkflowDefinitionInput { + name: string; + description: string; + source: string; + location: WorkflowPromotionLocation; + overwrite: boolean; + projectTrusted: boolean; +} + +export interface WorkflowDefinitionReadResult { + descriptor: WorkflowDefinitionDescriptor; + source: string; +} + +interface ScannedWorkflowDefinition { + descriptor: WorkflowDefinitionDescriptor; + source: string; +} + +const DESCRIPTION_PREFIX = "// description:"; + +function parseWorkflowDescription(source: string): string | null { + const firstMeaningfulLine = source + .split("\n") + .map((line) => line.trim()) + .find((line) => line.length > 0); + + if (!firstMeaningfulLine?.startsWith(DESCRIPTION_PREFIX)) { + return null; + } + + const description = firstMeaningfulLine.slice(DESCRIPTION_PREFIX.length).trim(); + return description.length > 0 ? description : null; +} + +function descriptorForFile(args: { + name: WorkflowName; + description: string; + scope: "project" | "global" | "scratch"; + sourcePath: string; +}): WorkflowDefinitionDescriptor | null { + const descriptor = { + name: args.name, + description: args.description, + scope: args.scope, + sourcePath: args.sourcePath, + executable: true, + } satisfies WorkflowDefinitionDescriptor; + + const parsed = WorkflowDefinitionDescriptorSchema.safeParse(descriptor); + if (!parsed.success) { + log.warn(`Invalid workflow definition descriptor '${args.name}': ${parsed.error.message}`); + return null; + } + + return parsed.data; +} + +async function scanDirectory( + root: string, + scope: "project" | "global" | "scratch" +): Promise { + let entries: string[]; + try { + entries = await fs.readdir(root); + } catch { + return []; + } + + const definitions: ScannedWorkflowDefinition[] = []; + for (const entry of entries) { + if (!entry.endsWith(".js")) { + continue; + } + + const rawName = entry.slice(0, -".js".length); + const nameResult = WorkflowNameSchema.safeParse(rawName); + if (!nameResult.success) { + log.warn(`Skipping invalid workflow filename '${entry}' in ${root}`); + continue; + } + + const sourcePath = path.join(root, entry); + let source: string; + try { + const stat = await fs.stat(sourcePath); + if (!stat.isFile()) { + continue; + } + source = await fs.readFile(sourcePath, "utf-8"); + } catch (error) { + log.warn(`Skipping unreadable workflow '${sourcePath}': ${getErrorMessage(error)}`); + continue; + } + + const description = parseWorkflowDescription(source); + if (description == null) { + log.warn(`Skipping workflow '${sourcePath}' because it is missing a description header`); + continue; + } + + const descriptor = descriptorForFile({ + name: nameResult.data, + description, + scope, + sourcePath, + }); + if (descriptor == null) { + continue; + } + + definitions.push({ descriptor, source }); + } + + return definitions; +} + +async function listRuntimeWorkflowFilenames( + runtime: Runtime, + root: string, + cwd: string +): Promise { + const quotedRoot = quoteRuntimeProbePath(root); + const result = await execBuffered( + runtime, + `if [ ! -d ${quotedRoot} ]; then exit 0; fi +for file in ${quotedRoot}/*.js; do + [ -f "$file" ] || continue + basename "$file" +done`, + { cwd, timeout: 10 } + ); + if (result.exitCode !== 0) { + const details = result.stderr.trim() || result.stdout.trim() || `exit code ${result.exitCode}`; + throw new Error(`Runtime workflow discovery failed: ${details}`); + } + return result.stdout + .split("\n") + .map((entry) => entry.trim()) + .filter((entry) => entry.length > 0); +} + +async function scanRuntimeDirectory( + runtime: Runtime, + root: string, + cwd: string, + scope: "project" +): Promise { + let entries: string[]; + try { + entries = await listRuntimeWorkflowFilenames(runtime, root, cwd); + } catch (error) { + log.warn(`Skipping runtime workflow root '${root}': ${getErrorMessage(error)}`); + return []; + } + + const definitions: ScannedWorkflowDefinition[] = []; + for (const entry of entries) { + if (!entry.endsWith(".js")) { + continue; + } + + const rawName = entry.slice(0, -".js".length); + const nameResult = WorkflowNameSchema.safeParse(rawName); + if (!nameResult.success) { + log.warn(`Skipping invalid workflow filename '${entry}' in ${root}`); + continue; + } + + const sourcePath = runtime.normalizePath(entry, root); + let source: string; + try { + source = await readFileString(runtime, sourcePath); + } catch (error) { + log.warn(`Skipping unreadable runtime workflow '${sourcePath}': ${getErrorMessage(error)}`); + continue; + } + + const description = parseWorkflowDescription(source); + if (description == null) { + log.warn(`Skipping workflow '${sourcePath}' because it is missing a description header`); + continue; + } + + const descriptor = descriptorForFile({ + name: nameResult.data, + description, + scope, + sourcePath, + }); + if (descriptor == null) { + continue; + } + + definitions.push({ descriptor, source }); + } + + return definitions; +} + +async function runtimePathExists( + runtime: Runtime, + targetPath: string, + cwd: string +): Promise { + const result = await execBuffered(runtime, `[ -e ${quoteRuntimeProbePath(targetPath)} ]`, { + cwd, + timeout: 5, + }); + if (result.exitCode === 0) { + return true; + } + if (result.exitCode === 1) { + return false; + } + const details = result.stderr.trim() || result.stdout.trim() || `exit code ${result.exitCode}`; + throw new Error(`Runtime workflow path probe failed: ${details}`); +} + +function readBuiltInDefinitions( + builtIns: readonly BuiltInWorkflowDefinition[] +): ScannedWorkflowDefinition[] { + const definitions: ScannedWorkflowDefinition[] = []; + for (const builtIn of builtIns) { + const descriptor = WorkflowDefinitionDescriptorSchema.parse({ + name: builtIn.name, + description: builtIn.description, + scope: "built-in", + executable: true, + }); + definitions.push({ descriptor, source: builtIn.source }); + } + return definitions; +} + +function normalizePromotionDescription(description: string): string { + const normalized = description.replace(/\s+/gu, " ").trim(); + assert(normalized.length > 0, "Workflow promotion description is required"); + return normalized; +} + +function withDescriptionHeader(source: string, description: string): string { + const lines = source.replace(/^\uFEFF/u, "").split("\n"); + const firstMeaningfulIndex = lines.findIndex((line) => line.trim().length > 0); + if ( + firstMeaningfulIndex >= 0 && + lines[firstMeaningfulIndex]?.trim().startsWith(DESCRIPTION_PREFIX) + ) { + lines.splice(firstMeaningfulIndex, 1, `${DESCRIPTION_PREFIX} ${description}`); + return lines.join("\n"); + } + return `${DESCRIPTION_PREFIX} ${description}\n${source}`; +} + +export class WorkflowDefinitionStore { + private readonly projectRoot: string; + private readonly globalRoot: string; + private readonly scratchRoot?: string; + private readonly projectRuntime?: Runtime; + private readonly projectCwd?: string; + private readonly builtIns: readonly BuiltInWorkflowDefinition[]; + + constructor(options: WorkflowDefinitionStoreOptions) { + assert(options.projectRoot.length > 0, "WorkflowDefinitionStore: projectRoot is required"); + assert(options.globalRoot.length > 0, "WorkflowDefinitionStore: globalRoot is required"); + assert( + options.projectRuntime == null || + (options.projectCwd != null && options.projectCwd.length > 0), + "WorkflowDefinitionStore: projectCwd is required with projectRuntime" + ); + + this.projectRoot = options.projectRoot; + this.globalRoot = options.globalRoot; + this.scratchRoot = options.scratchRoot; + this.projectRuntime = options.projectRuntime; + this.projectCwd = options.projectCwd; + this.builtIns = options.builtIns ?? BUILT_IN_WORKFLOW_DEFINITIONS; + } + + async listDefinitions(options: { + projectTrusted: boolean; + }): Promise { + const byName = await this.collectDefinitions(options); + return Array.from(byName.values()) + .map((definition) => definition.descriptor) + .sort((a, b) => a.name.localeCompare(b.name)); + } + + async readDefinition( + name: string, + options: { projectTrusted: boolean } + ): Promise { + const parsedName = WorkflowNameSchema.parse(name); + const byName = await this.collectDefinitions(options); + const definition = byName.get(parsedName); + if (definition == null) { + throw new Error(`Workflow definition not found: ${parsedName}`); + } + return { + descriptor: definition.descriptor, + source: definition.source, + }; + } + + async writeScratchDefinition(input: { + name: string; + description: string; + source: string; + }): Promise { + assert( + this.scratchRoot != null && this.scratchRoot.length > 0, + "WorkflowDefinitionStore.writeScratchDefinition: scratchRoot is required" + ); + const name = WorkflowNameSchema.parse(input.name); + const description = normalizePromotionDescription(input.description); + assert( + input.source.trim().length > 0, + "WorkflowDefinitionStore.writeScratchDefinition: source is required" + ); + assert( + /export\s+default\s+(async\s+)?function/u.test(input.source), + "Workflow scratch source must export a default function" + ); + + const sourcePath = path.join(this.scratchRoot, `${name}.js`); + const source = withDescriptionHeader(input.source, description); + await fs.mkdir(this.scratchRoot, { recursive: true }); + await fs.writeFile(sourcePath, source, "utf-8"); + + const descriptor = descriptorForFile({ + name, + description, + scope: "scratch", + sourcePath, + }); + assert( + descriptor != null, + "WorkflowDefinitionStore.writeScratchDefinition: descriptor must be valid" + ); + return descriptor; + } + + async promoteDefinition( + input: PromoteWorkflowDefinitionInput + ): Promise { + const name = WorkflowNameSchema.parse(input.name); + const description = normalizePromotionDescription(input.description); + assert( + input.source.trim().length > 0, + "WorkflowDefinitionStore.promoteDefinition: source is required" + ); + if (input.location === "project" && !input.projectTrusted) { + throw new Error("Project trust is required to promote project-local workflows"); + } + + const root = input.location === "project" ? this.projectRoot : this.globalRoot; + const sourcePath = + this.projectRuntime?.normalizePath(`${name}.js`, root) ?? path.join(root, `${name}.js`); + const promotedSource = withDescriptionHeader(input.source, description); + if (input.location === "project" && this.projectRuntime != null) { + assert( + this.projectCwd != null, + "WorkflowDefinitionStore.promoteDefinition: projectCwd missing" + ); + await this.projectRuntime.ensureDir(root); + if ( + !input.overwrite && + (await runtimePathExists(this.projectRuntime, sourcePath, this.projectCwd)) + ) { + throw new Error(`Workflow definition already exists: ${sourcePath}`); + } + await writeFileString(this.projectRuntime, sourcePath, promotedSource); + } else { + await fs.mkdir(root, { recursive: true }); + await fs.writeFile(sourcePath, promotedSource, { + encoding: "utf-8", + flag: input.overwrite ? "w" : "wx", + }); + } + + const descriptor = descriptorForFile({ + name, + description, + scope: input.location, + sourcePath, + }); + assert( + descriptor != null, + "WorkflowDefinitionStore.promoteDefinition: descriptor must be valid" + ); + return descriptor; + } + + private async collectDefinitions(options: { + projectTrusted: boolean; + }): Promise> { + const byName = new Map(); + const sources: ScannedWorkflowDefinition[][] = []; + + if (this.scratchRoot != null) { + sources.push(await scanDirectory(this.scratchRoot, "scratch")); + } + if (options.projectTrusted) { + if (this.projectRuntime != null) { + assert( + this.projectCwd != null, + "WorkflowDefinitionStore.collectDefinitions: projectCwd missing" + ); + sources.push( + await scanRuntimeDirectory( + this.projectRuntime, + this.projectRoot, + this.projectCwd, + "project" + ) + ); + } else { + sources.push(await scanDirectory(this.projectRoot, "project")); + } + } + sources.push(await scanDirectory(this.globalRoot, "global")); + sources.push(readBuiltInDefinitions(this.builtIns)); + + for (const source of sources) { + for (const definition of source) { + if (!byName.has(definition.descriptor.name)) { + byName.set(definition.descriptor.name, definition); + } + } + } + + return byName; + } +} diff --git a/src/node/services/workflows/WorkflowRunStore.test.ts b/src/node/services/workflows/WorkflowRunStore.test.ts new file mode 100644 index 0000000000..3ea3262d17 --- /dev/null +++ b/src/node/services/workflows/WorkflowRunStore.test.ts @@ -0,0 +1,307 @@ +/* eslint-disable @typescript-eslint/await-thenable */ +import * as fs from "node:fs/promises"; +import * as path from "node:path"; + +import { describe, expect, test } from "bun:test"; +import { DisposableTempDir } from "@/node/services/tempDir"; +import { WorkflowRunStore } from "./WorkflowRunStore"; + +const definition = { + name: "deep-research", + description: "Research a topic", + scope: "built-in" as const, + executable: true, +}; + +const source = "export default async function workflow() { return 'ok'; }\n"; + +async function createStore(sessionDir: string, staleLeaseMs = 10) { + const store = new WorkflowRunStore({ sessionDir, staleLeaseMs }); + await store.createRun({ + id: "wfr_123", + workspaceId: "workspace-1", + definition, + definitionSource: source, + args: { topic: "durable runs" }, + now: "2026-05-29T00:00:00.000Z", + }); + return store; +} + +describe("WorkflowRunStore", () => { + test("persists captured definition source and reloads run state", async () => { + using tmp = new DisposableTempDir("workflow-runs"); + const store = await createStore(tmp.path); + + await store.appendEvent("wfr_123", { + sequence: 1, + type: "status", + at: "2026-05-29T00:00:01.000Z", + status: "running", + }); + + const reloadedStore = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + const run = await reloadedStore.getRun("wfr_123"); + + expect(run.definitionSource).toBe(source); + expect(run.definitionHash).toMatch(/^sha256:/); + expect(run.events.map((event) => event.sequence)).toEqual([1]); + }); + + test("ignores malformed journal lines while preserving valid events and steps", async () => { + using tmp = new DisposableTempDir("workflow-runs"); + const store = await createStore(tmp.path); + + await store.appendEvent("wfr_123", { + sequence: 1, + type: "phase", + at: "2026-05-29T00:00:01.000Z", + name: "scope", + }); + await store.recordStepCompleted("wfr_123", { + stepId: "scope-task", + inputHash: "input:1", + taskId: "task_1", + result: { reportMarkdown: "done", structuredOutput: { ok: true } }, + startedAt: "2026-05-29T00:00:01.000Z", + completedAt: "2026-05-29T00:00:02.000Z", + }); + + await fs.appendFile(path.join(tmp.path, "workflows", "wfr_123", "events.jsonl"), "not json\n"); + await fs.appendFile( + path.join(tmp.path, "workflows", "wfr_123", "steps.jsonl"), + '{"bad":true}\n' + ); + + const run = await store.getRun("wfr_123"); + const completed = await store.getCompletedStep("wfr_123", "scope-task", "input:1"); + + expect(run.events).toHaveLength(1); + expect(run.steps).toHaveLength(1); + expect(completed?.result?.structuredOutput).toEqual({ ok: true }); + }); + + test("rejects duplicate or out-of-order event sequence numbers", async () => { + using tmp = new DisposableTempDir("workflow-runs"); + const store = await createStore(tmp.path); + + await store.appendEvent("wfr_123", { + sequence: 1, + type: "log", + at: "2026-05-29T00:00:01.000Z", + message: "first", + }); + + await expect( + store.appendEvent("wfr_123", { + sequence: 1, + type: "log", + at: "2026-05-29T00:00:02.000Z", + message: "duplicate", + }) + ).rejects.toThrow(/strictly ordered/); + }); + + test("preserves interrupted runs unless explicit resume is allowed", async () => { + using tmp = new DisposableTempDir("workflow-runs"); + const store = await createStore(tmp.path); + + await store.appendStatus("wfr_123", "interrupted", "2026-05-29T00:00:01.000Z"); + + await expect( + store.appendStatus("wfr_123", "running", "2026-05-29T00:00:02.000Z") + ).rejects.toThrow(/interrupted/); + await expect( + store.appendStatus("wfr_123", "completed", "2026-05-29T00:00:02.000Z") + ).rejects.toThrow(/interrupted/); + await expect( + store.appendEvent("wfr_123", { + sequence: 2, + type: "log", + at: "2026-05-29T00:00:02.000Z", + message: "too late", + }) + ).rejects.toThrow(/interrupted/); + await expect( + store.recordStepCompleted("wfr_123", { + stepId: "late-step", + inputHash: "hash:late-step", + taskId: "task_late", + result: { reportMarkdown: "late" }, + startedAt: "2026-05-29T00:00:01.000Z", + completedAt: "2026-05-29T00:00:02.000Z", + }) + ).rejects.toThrow(/interrupted/); + await expect(store.getRun("wfr_123")).resolves.toMatchObject({ status: "interrupted" }); + + await expect( + store.appendStatus("wfr_123", "running", "2026-05-29T00:00:03.000Z", { + allowInterruptedResume: true, + }) + ).resolves.toMatchObject({ status: "running" }); + }); + + test("fences journal and step writes by current lease owner", async () => { + using tmp = new DisposableTempDir("workflow-runs"); + const store = await createStore(tmp.path); + + await expect(store.acquireLease("wfr_123", "runner-a", 1000)).resolves.toBe(true); + await store.appendStatus("wfr_123", "running", "2026-05-29T00:00:01.000Z", { + expectedLeaseOwnerId: "runner-a", + }); + await expect(store.acquireLease("wfr_123", "runner-b", 1012)).resolves.toBe(true); + + await expect( + store.appendStatus("wfr_123", "completed", "2026-05-29T00:00:02.000Z", { + expectedLeaseOwnerId: "runner-a", + }) + ).rejects.toThrow(/lease lost/); + await expect( + store.recordStepCompleted( + "wfr_123", + { + stepId: "read-source", + inputHash: "source:a", + taskId: "task_1", + result: { reportMarkdown: "source summary" }, + startedAt: "2026-05-29T00:00:01.000Z", + completedAt: "2026-05-29T00:00:02.000Z", + }, + { expectedLeaseOwnerId: "runner-a" } + ) + ).rejects.toThrow(/lease lost/); + await expect(store.getRun("wfr_123")).resolves.toMatchObject({ status: "running" }); + }); + + test("replays terminal status from journal when run file is stale", async () => { + using tmp = new DisposableTempDir("workflow-runs"); + const store = await createStore(tmp.path); + await fs.appendFile( + path.join(tmp.path, "workflows", "wfr_123", "events.jsonl"), + `${JSON.stringify({ + sequence: 1, + type: "status", + at: "2026-05-29T00:00:01.000Z", + status: "completed", + })}\n`, + "utf-8" + ); + + await expect(store.getRun("wfr_123")).resolves.toMatchObject({ status: "completed" }); + await expect( + store.appendStatus("wfr_123", "interrupted", "2026-05-29T00:00:02.000Z") + ).rejects.toThrow(/Cannot transition/); + }); + + test("does not overwrite terminal runs with later interrupt status", async () => { + using tmp = new DisposableTempDir("workflow-runs"); + const store = await createStore(tmp.path); + + await store.appendStatus("wfr_123", "running", "2026-05-29T00:00:01.000Z"); + await store.appendStatus("wfr_123", "completed", "2026-05-29T00:00:02.000Z"); + + await expect( + store.appendStatus("wfr_123", "interrupted", "2026-05-29T00:00:03.000Z") + ).rejects.toThrow(/Cannot transition/); + await expect(store.getRun("wfr_123")).resolves.toMatchObject({ status: "completed" }); + }); + + test("reuses completed steps by stable step id and input hash", async () => { + using tmp = new DisposableTempDir("workflow-runs"); + const store = await createStore(tmp.path); + + await store.recordStepStarted("wfr_123", { + stepId: "read-source", + inputHash: "source:a", + taskId: "task_1", + startedAt: "2026-05-29T00:00:01.000Z", + }); + await store.recordStepCompleted("wfr_123", { + stepId: "read-source", + inputHash: "source:a", + taskId: "task_1", + result: { reportMarkdown: "source summary" }, + startedAt: "2026-05-29T00:00:01.000Z", + completedAt: "2026-05-29T00:00:02.000Z", + }); + + await expect(store.getCompletedStep("wfr_123", "read-source", "source:b")).resolves.toBeNull(); + await expect( + store.getCompletedStep("wfr_123", "read-source", "source:a") + ).resolves.toMatchObject({ + status: "completed", + result: { reportMarkdown: "source summary" }, + }); + }); + + test("renews active leases so they are not reclaimed as stale", async () => { + using tmp = new DisposableTempDir("workflow-runs"); + const store = await createStore(tmp.path); + + await expect(store.acquireLease("wfr_123", "runner-a", 1000)).resolves.toBe(true); + await expect(store.renewLease("wfr_123", "runner-a", 1008)).resolves.toBe(true); + await expect(store.acquireLease("wfr_123", "runner-b", 1012)).resolves.toBe(false); + await expect(store.acquireLease("wfr_123", "runner-b", 1019)).resolves.toBe(true); + }); + + test("does not acquire through an active lease mutation lock", async () => { + using tmp = new DisposableTempDir("workflow-runs"); + const store = await createStore(tmp.path); + + await expect(store.acquireLease("wfr_123", "runner-a", 1000)).resolves.toBe(true); + const lockDir = path.join(tmp.path, "workflows", "wfr_123", "lease.json.lock"); + await fs.mkdir(lockDir); + + await expect(store.acquireLease("wfr_123", "runner-b", 1012)).resolves.toBe(false); + + await fs.rm(lockDir, { recursive: true, force: true }); + await expect(store.acquireLease("wfr_123", "runner-b", 1012)).resolves.toBe(true); + }); + + test("serializes renewal with lease ownership changes", async () => { + using tmp = new DisposableTempDir("workflow-runs"); + const store = await createStore(tmp.path); + + await expect(store.acquireLease("wfr_123", "runner-a", 1000)).resolves.toBe(true); + const runDir = path.join(tmp.path, "workflows", "wfr_123"); + const leaseFile = path.join(runDir, "lease.json"); + const lockDir = `${leaseFile}.lock`; + await fs.mkdir(lockDir); + + const renewal = store.renewLease("wfr_123", "runner-a", 1005); + await fs.writeFile(leaseFile, JSON.stringify({ ownerId: "runner-b", acquiredAtMs: 1004 })); + await fs.rm(lockDir, { recursive: true, force: true }); + + await expect(renewal).resolves.toBe(false); + await expect(fs.readFile(leaseFile, "utf-8")).resolves.toContain("runner-b"); + }); + + test("release waits for in-flight lease mutations", async () => { + using tmp = new DisposableTempDir("workflow-runs"); + const store = await createStore(tmp.path, 100); + + await expect(store.acquireLease("wfr_123", "runner-a", 1000)).resolves.toBe(true); + const leaseFile = path.join(tmp.path, "workflows", "wfr_123", "lease.json"); + const lockDir = `${leaseFile}.lock`; + await fs.mkdir(lockDir); + + const release = store.releaseLease("wfr_123", "runner-a"); + await new Promise((resolve) => setTimeout(resolve, 5)); + await expect(fs.readFile(leaseFile, "utf-8")).resolves.toContain("runner-a"); + + await fs.rm(lockDir, { recursive: true, force: true }); + await release; + + await expect(store.acquireLease("wfr_123", "runner-b", 1001)).resolves.toBe(true); + }); + + test("prevents concurrent runners while allowing stale lease recovery", async () => { + using tmp = new DisposableTempDir("workflow-runs"); + const store = await createStore(tmp.path); + + await expect(store.acquireLease("wfr_123", "runner-a", 1000)).resolves.toBe(true); + await expect(store.acquireLease("wfr_123", "runner-a", 1001)).resolves.toBe(false); + await expect(store.acquireLease("wfr_123", "runner-b", 1001)).resolves.toBe(false); + await expect(store.acquireLease("wfr_123", "runner-b", 1012)).resolves.toBe(true); + }); +}); diff --git a/src/node/services/workflows/WorkflowRunStore.ts b/src/node/services/workflows/WorkflowRunStore.ts new file mode 100644 index 0000000000..a5934b73b3 --- /dev/null +++ b/src/node/services/workflows/WorkflowRunStore.ts @@ -0,0 +1,659 @@ +import * as crypto from "node:crypto"; +import type { Dirent } from "node:fs"; +import * as fs from "node:fs/promises"; +import * as path from "node:path"; + +import writeFileAtomic from "write-file-atomic"; + +import { + WorkflowEventSequenceSchema, + WorkflowRunEventSchema, + WorkflowRunRecordSchema, + WorkflowStepRecordSchema, +} from "@/common/orpc/schemas"; +import type { + StructuredTaskOutput, + WorkflowDefinitionDescriptor, + WorkflowRunEvent, + WorkflowRunRecord, + WorkflowRunStatus, + WorkflowStepRecord, +} from "@/common/types/workflow"; +import assert from "@/common/utils/assert"; +import { getErrorMessage } from "@/common/utils/errors"; +import { log } from "@/node/services/log"; + +export interface WorkflowRunStoreOptions { + sessionDir: string; + staleLeaseMs?: number; +} + +export interface CreateWorkflowRunInput { + id: string; + workspaceId: string; + definition: WorkflowDefinitionDescriptor; + definitionSource: string; + args: unknown; + now: string; +} + +export interface AppendWorkflowRunEventOptions { + /** + * Only explicit Resume may reopen an interrupted run; stale active runners must preserve the + * interrupt. + */ + allowInterruptedResume?: boolean; + /** Fence a journal/step mutation so only the current lease owner can write it. */ + expectedLeaseOwnerId?: string; +} + +interface LeaseRecord { + ownerId: string; + acquiredAtMs: number; +} + +interface WorkflowStepLookup { + stepId: string; + inputHash: string; +} + +export class WorkflowRunStore { + private readonly sessionDir: string; + private readonly staleLeaseMs: number; + + constructor(options: WorkflowRunStoreOptions) { + assert(options.sessionDir.length > 0, "WorkflowRunStore: sessionDir is required"); + this.sessionDir = options.sessionDir; + this.staleLeaseMs = options.staleLeaseMs ?? 30_000; + } + + async createRun(input: CreateWorkflowRunInput): Promise { + assert(input.id.length > 0, "WorkflowRunStore.createRun: id is required"); + assert(input.workspaceId.length > 0, "WorkflowRunStore.createRun: workspaceId is required"); + assert( + input.definitionSource.length > 0, + "WorkflowRunStore.createRun: definitionSource is required" + ); + + const runDir = this.runDir(input.id); + await fs.mkdir(runDir, { recursive: true }); + await fs.writeFile(path.join(runDir, "definition.js"), input.definitionSource, "utf-8"); + await fs.writeFile(path.join(runDir, "events.jsonl"), "", { flag: "a" }); + await fs.writeFile(path.join(runDir, "steps.jsonl"), "", { flag: "a" }); + + const run = WorkflowRunRecordSchema.parse({ + id: input.id, + workspaceId: input.workspaceId, + definition: input.definition, + definitionSource: input.definitionSource, + definitionHash: hashSource(input.definitionSource), + args: input.args, + status: "pending", + createdAt: input.now, + updatedAt: input.now, + events: [], + steps: [], + }); + + await this.writeRunFile(input.id, run); + return run; + } + + async getRun(runId: string): Promise { + const rawRun = JSON.parse(await fs.readFile(this.runFile(runId), "utf-8")) as unknown; + const partial = WorkflowRunRecordSchema.omit({ events: true, steps: true }).parse(rawRun); + const definitionSource = await fs.readFile( + path.join(this.runDir(runId), "definition.js"), + "utf-8" + ); + const events = await this.readEvents(runId); + const steps = await this.readSteps(runId); + + const latestEvent = events.at(-1); + const status = getRunStatusFromEvents(events) ?? partial.status; + return WorkflowRunRecordSchema.parse({ + ...partial, + definitionSource, + definitionHash: hashSource(definitionSource), + status, + updatedAt: latestEvent?.at ?? partial.updatedAt, + events, + steps, + }); + } + + async listRuns(): Promise { + let entries: Dirent[]; + try { + entries = await fs.readdir(this.workflowsDir(), { withFileTypes: true }); + } catch { + return []; + } + + const runs = await Promise.all( + entries + .filter((entry) => entry.isDirectory()) + .map(async (entry): Promise => { + try { + return await this.getRun(entry.name); + } catch (error) { + log.warn(`Skipping unreadable workflow run '${entry.name}': ${getErrorMessage(error)}`); + return null; + } + }) + ); + + return runs + .filter((run): run is WorkflowRunRecord => run != null) + .sort((a, b) => a.createdAt.localeCompare(b.createdAt)); + } + + async appendEvent( + runId: string, + event: WorkflowRunEvent, + options: AppendWorkflowRunEventOptions = {} + ): Promise { + const lockDir = `${this.eventsFile(runId)}.lock`; + await acquireWorkflowMutationLock( + lockDir, + this.leaseMutationLockStaleMs(), + this.leaseMutationWaitTimeoutMs() + ); + try { + return await this.withExpectedLeaseOwner( + runId, + options.expectedLeaseOwnerId, + async () => await this.appendEventUnlocked(runId, event, options) + ); + } finally { + await fs.rm(lockDir, { recursive: true, force: true }); + } + } + + async appendStatus( + runId: string, + status: WorkflowRunStatus, + at: string, + options: AppendWorkflowRunEventOptions = {} + ): Promise { + const lockDir = `${this.eventsFile(runId)}.lock`; + await acquireWorkflowMutationLock( + lockDir, + this.leaseMutationLockStaleMs(), + this.leaseMutationWaitTimeoutMs() + ); + try { + return await this.withExpectedLeaseOwner(runId, options.expectedLeaseOwnerId, async () => { + const events = await this.readEvents(runId); + return await this.appendEventUnlocked( + runId, + { + sequence: (events.at(-1)?.sequence ?? 0) + 1, + type: "status", + at, + status, + }, + options + ); + }); + } finally { + await fs.rm(lockDir, { recursive: true, force: true }); + } + } + + async recordStepStarted( + runId: string, + input: { + stepId: string; + inputHash: string; + taskId?: string; + startedAt: string; + }, + options: AppendWorkflowRunEventOptions = {} + ): Promise { + await this.appendStepRecord( + runId, + { + stepId: input.stepId, + inputHash: input.inputHash, + taskId: input.taskId, + startedAt: input.startedAt, + status: "started", + }, + options + ); + } + + async recordStepCompleted( + runId: string, + input: { + stepId: string; + inputHash: string; + taskId?: string; + result: StructuredTaskOutput; + startedAt: string; + completedAt: string; + }, + options: AppendWorkflowRunEventOptions = {} + ): Promise { + await this.appendStepRecord( + runId, + { + stepId: input.stepId, + inputHash: input.inputHash, + taskId: input.taskId, + result: input.result, + startedAt: input.startedAt, + completedAt: input.completedAt, + status: "completed", + }, + options + ); + } + + async recordStepFailed( + runId: string, + input: { + stepId: string; + inputHash: string; + taskId?: string; + error: string; + startedAt: string; + completedAt: string; + }, + options: AppendWorkflowRunEventOptions = {} + ): Promise { + await this.appendStepRecord( + runId, + { + stepId: input.stepId, + inputHash: input.inputHash, + taskId: input.taskId, + error: input.error, + startedAt: input.startedAt, + completedAt: input.completedAt, + status: "failed", + }, + options + ); + } + + async getStep( + runId: string, + stepId: string, + inputHash: string + ): Promise { + const [step] = await this.getSteps(runId, [{ stepId, inputHash }]); + return step ?? null; + } + + async getCompletedStep( + runId: string, + stepId: string, + inputHash: string + ): Promise { + const step = await this.getStep(runId, stepId, inputHash); + return step?.status === "completed" ? step : null; + } + + async getSteps( + runId: string, + lookups: readonly WorkflowStepLookup[] + ): Promise> { + if (lookups.length === 0) { + return []; + } + const requestedKeys = new Set(lookups.map(getWorkflowStepKey)); + const byKey = new Map(); + for (const step of await this.readSteps(runId)) { + const key = getWorkflowStepKey(step); + if (requestedKeys.has(key)) { + byKey.set(key, step); + } + } + return lookups.map((lookup) => byKey.get(getWorkflowStepKey(lookup)) ?? null); + } + + async acquireLease(runId: string, ownerId: string, nowMs = Date.now()): Promise { + assert(ownerId.length > 0, "WorkflowRunStore.acquireLease: ownerId is required"); + const leaseFile = this.leaseFile(runId); + const lockDir = `${leaseFile}.lock`; + if (!(await acquireLeaseMutationLock(lockDir, Date.now(), this.leaseMutationLockStaleMs()))) { + return false; + } + + try { + const existing = await readLease(leaseFile); + if (existing != null && nowMs - existing.acquiredAtMs <= this.staleLeaseMs) { + return false; + } + + await fs.mkdir(this.runDir(runId), { recursive: true }); + await writeJsonAtomic(leaseFile, { ownerId, acquiredAtMs: nowMs } satisfies LeaseRecord); + return true; + } finally { + await fs.rm(lockDir, { recursive: true, force: true }); + } + } + + getLeaseRenewalIntervalMs(): number { + return Math.max(1, Math.floor(this.staleLeaseMs / 2)); + } + + private leaseMutationLockStaleMs(): number { + return Math.max(1_000, this.staleLeaseMs); + } + + private leaseMutationWaitTimeoutMs(): number { + return Math.max(4_000, this.leaseMutationLockStaleMs() * 4); + } + + async renewLease(runId: string, ownerId: string, nowMs = Date.now()): Promise { + assert(ownerId.length > 0, "WorkflowRunStore.renewLease: ownerId is required"); + const leaseFile = this.leaseFile(runId); + const lockDir = `${leaseFile}.lock`; + try { + await acquireWorkflowMutationLock( + lockDir, + this.leaseMutationLockStaleMs(), + this.leaseMutationWaitTimeoutMs() + ); + } catch { + return false; + } + + try { + const existing = await readLease(leaseFile); + if (existing?.ownerId !== ownerId) { + return false; + } + await writeJsonAtomic(leaseFile, { ownerId, acquiredAtMs: nowMs } satisfies LeaseRecord); + return true; + } finally { + await fs.rm(lockDir, { recursive: true, force: true }); + } + } + + async releaseLease(runId: string, ownerId: string): Promise { + const leaseFile = this.leaseFile(runId); + const lockDir = `${leaseFile}.lock`; + await acquireWorkflowMutationLock( + lockDir, + this.leaseMutationLockStaleMs(), + this.leaseMutationWaitTimeoutMs() + ); + + try { + const existing = await readLease(leaseFile); + if (existing?.ownerId === ownerId) { + await fs.rm(leaseFile, { force: true }); + } + } finally { + await fs.rm(lockDir, { recursive: true, force: true }); + } + } + + private async withExpectedLeaseOwner( + runId: string, + expectedLeaseOwnerId: string | undefined, + mutation: () => Promise + ): Promise { + if (expectedLeaseOwnerId == null) { + return await mutation(); + } + assert( + expectedLeaseOwnerId.length > 0, + "WorkflowRunStore: expected lease owner id must be non-empty" + ); + const leaseFile = this.leaseFile(runId); + const lockDir = `${leaseFile}.lock`; + await acquireWorkflowMutationLock( + lockDir, + this.leaseMutationLockStaleMs(), + this.leaseMutationWaitTimeoutMs() + ); + try { + const lease = await readLease(leaseFile); + if (lease?.ownerId !== expectedLeaseOwnerId) { + throw new Error(`Workflow run lease lost: ${runId}`); + } + return await mutation(); + } finally { + await fs.rm(lockDir, { recursive: true, force: true }); + } + } + + private async appendEventUnlocked( + runId: string, + event: WorkflowRunEvent, + options: AppendWorkflowRunEventOptions = {} + ): Promise { + const parsedEvent = WorkflowRunEventSchema.parse(event); + const existingEvents = await this.readEvents(runId); + const ordered = WorkflowEventSequenceSchema.safeParse([...existingEvents, parsedEvent]); + if (!ordered.success) { + throw new Error(`Workflow events must be strictly ordered: ${ordered.error.message}`); + } + + const run = await this.getRun(runId); + const isInterruptedResumeEvent = + parsedEvent.type === "status" && + options.allowInterruptedResume === true && + parsedEvent.status === "running"; + const isRepeatedInterruptedStatus = + parsedEvent.type === "status" && parsedEvent.status === "interrupted"; + if (run.status === "interrupted" && !isInterruptedResumeEvent && !isRepeatedInterruptedStatus) { + throw new Error(`Workflow run interrupted: ${runId}`); + } + if (parsedEvent.type === "status") { + if (isTerminalRunStatus(run.status)) { + throw new Error( + `Cannot transition workflow run from ${run.status} to ${parsedEvent.status}` + ); + } + } + + await appendJsonLine(this.eventsFile(runId), parsedEvent); + const updatedRun = { + ...run, + events: [...run.events, parsedEvent], + status: parsedEvent.type === "status" ? parsedEvent.status : run.status, + updatedAt: parsedEvent.at, + } satisfies WorkflowRunRecord; + await this.writeRunFile(runId, updatedRun); + return updatedRun; + } + + private async readEvents(runId: string): Promise { + const events = await readJsonLines(this.eventsFile(runId), WorkflowRunEventSchema); + return WorkflowEventSequenceSchema.parse(events); + } + + private async readSteps(runId: string): Promise { + const records = await readJsonLines(this.stepsFile(runId), WorkflowStepRecordSchema); + const byKey = new Map(); + for (const record of records) { + byKey.set(getWorkflowStepKey(record), record); + } + return Array.from(byKey.values()); + } + + private async appendStepRecord( + runId: string, + record: unknown, + options: AppendWorkflowRunEventOptions = {} + ): Promise { + const lockDir = `${this.eventsFile(runId)}.lock`; + await acquireWorkflowMutationLock( + lockDir, + this.leaseMutationLockStaleMs(), + this.leaseMutationWaitTimeoutMs() + ); + try { + await this.withExpectedLeaseOwner(runId, options.expectedLeaseOwnerId, async () => { + const parsedRecord = WorkflowStepRecordSchema.parse(record); + const run = await this.getRun(runId); + if (run.status === "interrupted") { + throw new Error(`Workflow run interrupted: ${runId}`); + } + await appendJsonLine(this.stepsFile(runId), parsedRecord); + }); + } finally { + await fs.rm(lockDir, { recursive: true, force: true }); + } + } + + private async writeRunFile(runId: string, run: WorkflowRunRecord): Promise { + const runForDisk = WorkflowRunRecordSchema.parse(run); + await writeJsonAtomic(this.runFile(runId), runForDisk); + } + + private workflowsDir(): string { + return path.join(this.sessionDir, "workflows"); + } + + private runDir(runId: string): string { + assert(runId.length > 0, "WorkflowRunStore: runId is required"); + return path.join(this.workflowsDir(), runId); + } + + private runFile(runId: string): string { + return path.join(this.runDir(runId), "run.json"); + } + + private eventsFile(runId: string): string { + return path.join(this.runDir(runId), "events.jsonl"); + } + + private stepsFile(runId: string): string { + return path.join(this.runDir(runId), "steps.jsonl"); + } + + private leaseFile(runId: string): string { + return path.join(this.runDir(runId), "lease.json"); + } +} + +function getWorkflowStepKey(step: WorkflowStepLookup): string { + return `${step.stepId}\0${step.inputHash}`; +} + +function hashSource(source: string): string { + return `sha256:${crypto.createHash("sha256").update(source).digest("hex")}`; +} + +async function appendJsonLine(filePath: string, value: unknown): Promise { + await fs.mkdir(path.dirname(filePath), { recursive: true }); + await fs.appendFile(filePath, `${JSON.stringify(value)}\n`, "utf-8"); +} + +async function writeJsonAtomic(filePath: string, value: unknown): Promise { + await fs.mkdir(path.dirname(filePath), { recursive: true }); + await writeFileAtomic(filePath, `${JSON.stringify(value, null, 2)}\n`, "utf-8"); +} + +async function readJsonLines( + filePath: string, + schema: { safeParse(value: unknown): { success: true; data: T } | { success: false } } +): Promise { + let content: string; + try { + content = await fs.readFile(filePath, "utf-8"); + } catch { + return []; + } + + const records: T[] = []; + for (const [index, line] of content.split("\n").entries()) { + if (line.trim().length === 0) { + continue; + } + + try { + const parsedJson = JSON.parse(line) as unknown; + const parsedRecord = schema.safeParse(parsedJson); + if (parsedRecord.success) { + records.push(parsedRecord.data); + } else { + log.warn(`Skipping malformed workflow journal line ${index + 1} in ${filePath}`); + } + } catch (error) { + log.warn( + `Skipping malformed workflow journal line ${index + 1} in ${filePath}: ${getErrorMessage(error)}` + ); + } + } + + return records; +} + +function getRunStatusFromEvents( + events: readonly WorkflowRunEvent[] +): WorkflowRunStatus | undefined { + return events.findLast((event) => event.type === "status")?.status; +} + +function isTerminalRunStatus(status: WorkflowRunStatus): boolean { + return status === "completed" || status === "failed"; +} + +async function acquireWorkflowMutationLock( + lockDir: string, + staleLeaseMs: number, + timeoutMs = staleLeaseMs +): Promise { + const deadline = Date.now() + timeoutMs; + while (Date.now() <= deadline) { + if (await acquireLeaseMutationLock(lockDir, Date.now(), staleLeaseMs)) { + return; + } + await new Promise((resolve) => setTimeout(resolve, 5)); + } + throw new Error(`Timed out acquiring workflow mutation lock: ${lockDir}`); +} + +async function acquireLeaseMutationLock( + lockDir: string, + nowMs: number, + staleLeaseMs: number +): Promise { + try { + await fs.mkdir(lockDir); + return true; + } catch (error) { + if (!isErrno(error, "EEXIST")) { + throw error; + } + } + + try { + const stat = await fs.stat(lockDir); + if (nowMs - stat.mtimeMs <= staleLeaseMs) { + return false; + } + await fs.rm(lockDir, { recursive: true, force: true }); + await fs.mkdir(lockDir); + return true; + } catch (error) { + if (isErrno(error, "EEXIST") || isErrno(error, "ENOENT")) { + return false; + } + throw error; + } +} + +function isErrno(error: unknown, code: string): boolean { + return error instanceof Error && "code" in error && error.code === code; +} + +async function readLease(leaseFile: string): Promise { + try { + const raw = JSON.parse(await fs.readFile(leaseFile, "utf-8")) as Partial; + if (typeof raw.ownerId === "string" && typeof raw.acquiredAtMs === "number") { + return { ownerId: raw.ownerId, acquiredAtMs: raw.acquiredAtMs }; + } + } catch { + return null; + } + return null; +} diff --git a/src/node/services/workflows/WorkflowRunner.test.ts b/src/node/services/workflows/WorkflowRunner.test.ts new file mode 100644 index 0000000000..36ff9dd3ad --- /dev/null +++ b/src/node/services/workflows/WorkflowRunner.test.ts @@ -0,0 +1,889 @@ +/* eslint-disable @typescript-eslint/await-thenable, @typescript-eslint/no-unsafe-argument, @typescript-eslint/require-await */ +import { describe, expect, test } from "bun:test"; +import { ForegroundWaitBackgroundedError } from "@/node/services/taskService"; +import { DisposableTempDir } from "@/node/services/tempDir"; +import { QuickJSRuntimeFactory } from "@/node/services/ptc/quickjsRuntime"; +import { WorkflowRunStore } from "./WorkflowRunStore"; +import { + WorkflowRunBackgroundedError, + WorkflowRunner, + type WorkflowTaskAdapter, +} from "./WorkflowRunner"; +import { hashWorkflowStepInput } from "./workflowReplayKey"; + +const definition = { + name: "deep-research", + description: "Research a topic", + scope: "built-in" as const, + executable: true, +}; + +const source = `export default function workflow({ args, phase, log, agent }) { + phase("scope", { topic: args.topic }); + log("delegating", { topic: args.topic }); + const summary = agent({ id: "summarize-topic", prompt: "Summarize " + args.topic }); + return { reportMarkdown: "Final: " + summary.reportMarkdown }; +} +`; + +async function createRunStore(sessionDir: string) { + const store = new WorkflowRunStore({ sessionDir, staleLeaseMs: 10 }); + await store.createRun({ + id: "wfr_123", + workspaceId: "workspace-1", + definition, + definitionSource: source, + args: { topic: "durable workflows" }, + now: "2026-05-29T00:00:00.000Z", + }); + return store; +} + +function createRunner(store: WorkflowRunStore, taskAdapter: WorkflowTaskAdapter) { + return new WorkflowRunner({ + runStore: store, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter, + runnerId: "runner-a", + clock: { + nowIso: () => "2026-05-29T00:00:01.000Z", + nowMs: () => 1_000, + }, + }); +} + +describe("WorkflowRunner", () => { + test("executes conductor primitives and persists run events/results", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = await createRunStore(tmp.path); + const taskCalls: unknown[] = []; + let runTimeoutMs: number | undefined; + let runAbortSignal: AbortSignal | undefined; + const runner = createRunner(store, { + async runAgent(spec, _lifecycle, waitOptions) { + taskCalls.push(spec); + runTimeoutMs = waitOptions?.timeoutMs; + runAbortSignal = waitOptions?.abortSignal; + return { + taskId: "task_1", + reportMarkdown: "summary", + structuredOutput: { sources: 3 }, + }; + }, + }); + + const result = await runner.run("wfr_123"); + const run = await store.getRun("wfr_123"); + + expect(result).toEqual({ reportMarkdown: "Final: summary" }); + expect(taskCalls).toEqual([{ id: "summarize-topic", prompt: "Summarize durable workflows" }]); + expect(runTimeoutMs).toBeGreaterThan(5 * 60 * 1000); + expect(runAbortSignal?.aborted).toBe(false); + expect(run.status).toBe("completed"); + expect(run.events.map((event) => event.type)).toEqual([ + "status", + "phase", + "log", + "task", + "result", + "status", + ]); + expect(run.steps).toHaveLength(1); + expect(run.steps[0]).toMatchObject({ + stepId: "summarize-topic", + status: "completed", + taskId: "task_1", + result: { reportMarkdown: "summary", structuredOutput: { sources: 3 } }, + }); + }); + + test("requires explicit resume permission to restart interrupted runs", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = await createRunStore(tmp.path); + await store.appendStatus("wfr_123", "interrupted", "2026-05-29T00:00:00.500Z"); + let taskCalls = 0; + const runner = createRunner(store, { + async runAgent() { + taskCalls += 1; + return { taskId: "task_1", reportMarkdown: "summary" }; + }, + }); + + await expect(runner.run("wfr_123")).rejects.toThrow(/interrupted/); + await expect(store.getRun("wfr_123")).resolves.toMatchObject({ status: "interrupted" }); + expect(taskCalls).toBe(0); + + await expect( + runner.run("wfr_123", { allowResumeFromInterrupted: true }) + ).resolves.toMatchObject({ reportMarkdown: "Final: summary" }); + await expect(store.getRun("wfr_123")).resolves.toMatchObject({ status: "completed" }); + expect(taskCalls).toBe(1); + }); + + test("aborts without terminal writes after losing its lease", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = await createRunStore(tmp.path); + let renewCalls = 0; + let resolveTaskStarted!: () => void; + const taskStarted = new Promise((resolve) => { + resolveTaskStarted = resolve; + }); + store.renewLease = async () => { + renewCalls += 1; + if (renewCalls === 1) { + return true; + } + await taskStarted; + return false; + }; + let sawAbort = false; + const runner = createRunner(store, { + async runAgent(_spec, _lifecycle, waitOptions) { + resolveTaskStarted(); + return await new Promise((_resolve, reject) => { + const signal = waitOptions?.abortSignal; + if (signal == null) { + reject(new Error("missing abort signal")); + return; + } + if (signal.aborted) { + sawAbort = true; + reject(new Error("task aborted")); + return; + } + signal.addEventListener( + "abort", + () => { + sawAbort = true; + reject(new Error("task aborted")); + }, + { once: true } + ); + }); + }, + }); + + await expect(runner.run("wfr_123")).rejects.toThrow(/lease lost/); + const run = await store.getRun("wfr_123"); + + expect(renewCalls).toBeGreaterThan(0); + expect(sawAbort).toBe(true); + expect(run.status).toBe("running"); + expect( + run.events.some((event) => event.type === "status" && event.status === "completed") + ).toBe(false); + expect(run.events.some((event) => event.type === "status" && event.status === "failed")).toBe( + false + ); + }); + + test("replays completed agent steps without respawning child tasks", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = await createRunStore(tmp.path); + let taskCalls = 0; + const runner = createRunner(store, { + async runAgent() { + taskCalls += 1; + return { taskId: "task_1", reportMarkdown: "summary" }; + }, + }); + + await runner.run("wfr_123"); + await runner.run("wfr_123"); + + expect(taskCalls).toBe(1); + }); + + test("reuses a recorded started task id instead of respawning on resume", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = await createRunStore(tmp.path); + const spec = { id: "summarize-topic", prompt: "Summarize durable workflows" }; + await store.recordStepStarted("wfr_123", { + stepId: spec.id, + inputHash: hashWorkflowStepInput(spec.id, spec), + taskId: "task_existing", + startedAt: "2026-05-29T00:00:00.500Z", + }); + let runAgentCalls = 0; + const waitedFor: string[] = []; + let waitTimeoutMs: number | undefined; + let waitAbortSignal: AbortSignal | undefined; + const runner = createRunner(store, { + async runAgent() { + runAgentCalls += 1; + return { taskId: "task_duplicate", reportMarkdown: "duplicate" }; + }, + async waitForAgentTask(taskId, _spec, waitOptions) { + waitedFor.push(taskId); + waitTimeoutMs = waitOptions?.timeoutMs; + waitAbortSignal = waitOptions?.abortSignal; + return { taskId, reportMarkdown: "summary" }; + }, + }); + + await expect(runner.run("wfr_123")).resolves.toEqual({ reportMarkdown: "Final: summary" }); + + expect(runAgentCalls).toBe(0); + expect(waitedFor).toEqual(["task_existing"]); + expect(waitTimeoutMs).toBeGreaterThan(5 * 60 * 1000); + expect(waitAbortSignal?.aborted).toBe(false); + }); + + test("reruns stale started task ids that no longer have recoverable reports", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = await createRunStore(tmp.path); + const spec = { id: "summarize-topic", prompt: "Summarize durable workflows" }; + await store.recordStepStarted("wfr_123", { + stepId: spec.id, + inputHash: hashWorkflowStepInput(spec.id, spec), + taskId: "task_missing", + startedAt: "2026-05-29T00:00:00.500Z", + }); + let runAgentCalls = 0; + const waitedFor: string[] = []; + const runner = createRunner(store, { + async runAgent() { + runAgentCalls += 1; + return { taskId: "task_recovered", reportMarkdown: "summary" }; + }, + async waitForAgentTask(taskId) { + waitedFor.push(taskId); + throw new Error("Task not found"); + }, + }); + + await expect(runner.run("wfr_123")).resolves.toEqual({ reportMarkdown: "Final: summary" }); + const run = await store.getRun("wfr_123"); + + expect(waitedFor).toEqual(["task_missing"]); + expect(runAgentCalls).toBe(1); + expect(run.steps.at(-1)).toMatchObject({ + stepId: "summarize-topic", + status: "completed", + taskId: "task_recovered", + }); + }); + + test("restarts started task records when resuming a user-interrupted run", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = await createRunStore(tmp.path); + const spec = { id: "summarize-topic", prompt: "Summarize durable workflows" }; + await store.recordStepStarted("wfr_123", { + stepId: spec.id, + inputHash: hashWorkflowStepInput(spec.id, spec), + taskId: "task_interrupted", + startedAt: "2026-05-29T00:00:00.500Z", + }); + await store.appendStatus("wfr_123", "interrupted", "2026-05-29T00:00:00.750Z"); + let runAgentCalls = 0; + const waitedFor: string[] = []; + const runner = createRunner(store, { + async runAgent() { + runAgentCalls += 1; + return { taskId: "task_restarted", reportMarkdown: "summary" }; + }, + async waitForAgentTask(taskId) { + waitedFor.push(taskId); + throw new Error("interrupted task should not be awaited"); + }, + }); + + await expect(runner.run("wfr_123", { allowResumeFromInterrupted: true })).resolves.toEqual({ + reportMarkdown: "Final: summary", + }); + + expect(runAgentCalls).toBe(1); + expect(waitedFor).toEqual([]); + }); + + test("runs parallelAgents specs concurrently and returns ordered results", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await store.createRun({ + id: "wfr_parallel", + workspaceId: "workspace-1", + definition, + definitionSource: `export default function workflow({ parallelAgents }) { + const results = parallelAgents([ + { id: "source-a", prompt: "Read source A" }, + { id: "source-b", prompt: "Read source B" }, + ]); + return { reportMarkdown: results.map((result) => result.reportMarkdown).join(" + ") }; + }`, + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + const calls: string[] = []; + let active = 0; + let maxActive = 0; + const runner = createRunner(store, { + async runAgent(spec) { + calls.push(spec.id); + active += 1; + maxActive = Math.max(maxActive, active); + await new Promise((resolve) => setTimeout(resolve, 10)); + active -= 1; + return { taskId: `task_${spec.id}`, reportMarkdown: spec.id }; + }, + }); + + await expect(runner.run("wfr_parallel")).resolves.toEqual({ + reportMarkdown: "source-a + source-b", + }); + + expect(calls).toEqual(["source-a", "source-b"]); + expect(maxActive).toBe(2); + const run = await store.getRun("wfr_parallel"); + expect(run.steps.map((step) => step.stepId).sort()).toEqual(["source-a", "source-b"]); + }); + + test("interrupts sibling parallelAgents when one child task fails", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await store.createRun({ + id: "wfr_parallel_failure", + workspaceId: "workspace-1", + definition, + definitionSource: `export default function workflow({ parallelAgents }) { + parallelAgents([ + { id: "source-a", prompt: "Read source A" }, + { id: "source-b", prompt: "Read source B" }, + ]); + return { reportMarkdown: "unreachable" }; + }`, + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + let interruptRunCalls = 0; + let releaseSourceB!: () => void; + const sourceBInterrupted = new Promise((resolve) => { + releaseSourceB = resolve; + }); + const calls: string[] = []; + const runner = createRunner(store, { + async runAgent(spec) { + calls.push(spec.id); + if (spec.id === "source-a") { + throw new Error("source-a failed"); + } + await sourceBInterrupted; + throw new Error("source-b interrupted"); + }, + async interruptRun() { + interruptRunCalls += 1; + releaseSourceB(); + }, + }); + + await expect(runner.run("wfr_parallel_failure")).rejects.toThrow("source-a failed"); + + expect(calls).toEqual(["source-a", "source-b"]); + expect(interruptRunCalls).toBe(1); + }); + + test("does not interrupt sibling parallelAgents when foreground wait backgrounds", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await store.createRun({ + id: "wfr_parallel_backgrounded", + workspaceId: "workspace-1", + definition, + definitionSource: `export default function workflow({ parallelAgents }) { + parallelAgents([ + { id: "source-a", prompt: "Read source A" }, + { id: "source-b", prompt: "Read source B" }, + ]); + return { reportMarkdown: "unreachable" }; + }`, + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + let interruptRunCalls = 0; + let sourceBStarted = false; + const runner = createRunner(store, { + async runAgent(spec, _lifecycle, waitOptions) { + if (spec.id === "source-a") { + throw new ForegroundWaitBackgroundedError(); + } + sourceBStarted = true; + await new Promise((_resolve, reject) => { + waitOptions?.abortSignal?.addEventListener( + "abort", + () => reject(new ForegroundWaitBackgroundedError()), + { once: true } + ); + }); + throw new Error("unreachable"); + }, + async interruptRun() { + interruptRunCalls += 1; + }, + }); + + await expect(runner.run("wfr_parallel_backgrounded")).rejects.toBeInstanceOf( + WorkflowRunBackgroundedError + ); + + expect(sourceBStarted).toBe(true); + expect(interruptRunCalls).toBe(0); + }); + + test("retries only failed parallelAgents steps after structured output validation errors", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await store.createRun({ + id: "wfr_parallel_retry_validation", + workspaceId: "workspace-1", + definition, + definitionSource: `export default function workflow({ parallelAgents }) { + const results = parallelAgents([ + { + id: "source-a", + prompt: "Summarize A", + outputSchema: { type: "object", required: ["summary"], properties: { summary: { type: "string" } } }, + }, + { + id: "source-b", + prompt: "Summarize B", + outputSchema: { type: "object", required: ["summary"], properties: { summary: { type: "string" } } }, + }, + ]); + return { reportMarkdown: results.map((result) => result.structuredOutput.summary).join(" + ") }; + }`, + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + const calls: string[] = []; + const runner = createRunner(store, { + async runAgent(spec) { + calls.push(spec.id); + if (spec.id === "source-b" && calls.filter((id) => id === "source-b").length === 1) { + return { taskId: "task_source_b_bad", reportMarkdown: "bad" }; + } + return { + taskId: `task_${spec.id}_${calls.length}`, + reportMarkdown: spec.id, + structuredOutput: { summary: spec.id }, + }; + }, + }); + + await expect(runner.run("wfr_parallel_retry_validation")).resolves.toEqual({ + reportMarkdown: "source-a + source-b", + }); + const run = await store.getRun("wfr_parallel_retry_validation"); + + expect(calls).toEqual(["source-a", "source-b", "source-b"]); + expect(run.events).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + type: "task", + stepId: "source-b", + taskId: "task_source_b_bad", + status: "failed", + }), + expect.objectContaining({ + type: "log", + message: "Retrying source-b after validation failure", + }), + ]) + ); + }); + + test("retries workflow agent steps that fail structured output validation", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await store.createRun({ + id: "wfr_retry_validation", + workspaceId: "workspace-1", + definition, + definitionSource: `export default function workflow({ agent }) { + const result = agent({ + id: "claims", + prompt: "Extract claims", + outputSchema: { + type: "object", + required: ["claims"], + properties: { claims: { type: "array", items: { type: "string" } } }, + additionalProperties: false, + }, + }); + return { reportMarkdown: result.structuredOutput.claims.join(", ") }; + }`, + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + const prompts: string[] = []; + const runner = createRunner(store, { + async runAgent(spec) { + prompts.push(spec.prompt); + if (prompts.length === 1) { + return { taskId: "task_bad", reportMarkdown: "bad" }; + } + return { + taskId: "task_good", + reportMarkdown: "good", + structuredOutput: { claims: ["durable"] }, + }; + }, + }); + + await expect(runner.run("wfr_retry_validation")).resolves.toEqual({ + reportMarkdown: "durable", + }); + const run = await store.getRun("wfr_retry_validation"); + + expect(prompts).toHaveLength(2); + expect(prompts[1]).toContain("Previous workflow attempt 1 failed output validation"); + expect(run.status).toBe("completed"); + expect(run.steps).toEqual([ + expect.objectContaining({ stepId: "claims", status: "completed", taskId: "task_good" }), + ]); + expect(run.events).toEqual( + expect.arrayContaining([ + expect.objectContaining({ type: "validation", stepId: "claims", success: false }), + expect.objectContaining({ + type: "task", + stepId: "claims", + taskId: "task_bad", + status: "failed", + }), + expect.objectContaining({ + type: "task", + stepId: "claims", + taskId: "task_good", + status: "completed", + }), + expect.objectContaining({ + type: "log", + message: "Retrying claims after validation failure", + }), + ]) + ); + }); + + test("stops retrying workflow agent validation after the maximum attempts", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await store.createRun({ + id: "wfr_retry_exhausted", + workspaceId: "workspace-1", + definition, + definitionSource: `export default function workflow({ agent }) { + return agent({ + id: "claims", + prompt: "Extract claims", + outputSchema: { type: "object", required: ["claims"], properties: { claims: { type: "array" } } }, + }); + }`, + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + let calls = 0; + const runner = createRunner(store, { + async runAgent() { + calls += 1; + return { taskId: `task_bad_${calls}`, reportMarkdown: "bad" }; + }, + }); + + await expect(runner.run("wfr_retry_exhausted")).rejects.toThrow(/structured output/); + const run = await store.getRun("wfr_retry_exhausted"); + + expect(calls).toBe(3); + expect(run.status).toBe("failed"); + expect(run.steps).toEqual([ + expect.objectContaining({ stepId: "claims", status: "failed", taskId: "task_bad_3" }), + ]); + expect( + run.events.filter( + (event) => event.type === "task" && event.stepId === "claims" && event.status === "failed" + ) + ).toHaveLength(3); + }); + + test("validates workflow agent structured output against requested schema", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await store.createRun({ + id: "wfr_schema", + workspaceId: "workspace-1", + definition, + definitionSource: `export default function workflow({ agent }) { + return agent({ + id: "claims", + prompt: "Extract claims", + outputSchema: { + type: "object", + required: ["claims"], + properties: { claims: { type: "array", items: { type: "string" } } }, + additionalProperties: false, + }, + }); + }`, + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + const runner = createRunner(store, { + async runAgent() { + return { taskId: "task_1", reportMarkdown: "bad", structuredOutput: { claims: [1] } }; + }, + }); + + await expect(runner.run("wfr_schema")).rejects.toThrow( + /structured output failed schema validation.*claims\[0\]/ + ); + const run = await store.getRun("wfr_schema"); + expect(run.steps).toEqual([ + expect.objectContaining({ stepId: "claims", status: "failed", taskId: "task_1" }), + ]); + }); + + test("marks foreground-backgrounded agent waits as backgrounded runs", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = await createRunStore(tmp.path); + const runner = createRunner(store, { + async runAgent() { + throw new ForegroundWaitBackgroundedError(); + }, + }); + + await expect(runner.run("wfr_123")).rejects.toBeInstanceOf(WorkflowRunBackgroundedError); + await expect(store.getRun("wfr_123")).resolves.toMatchObject({ status: "backgrounded" }); + }); + + test("applies sandbox limits before evaluating workflow source", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await store.createRun({ + id: "wfr_limits", + workspaceId: "workspace-1", + definition, + definitionSource: `export default function workflow() { return { reportMarkdown: "limited" }; }`, + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + let limitsApplied = false; + let evalSawLimits = false; + let timeoutMs: number | undefined; + const noop = () => undefined; + const runner = new WorkflowRunner({ + runStore: store, + runtimeFactory: { + async create() { + return { + setLimits(limits) { + limitsApplied = true; + timeoutMs = limits.timeoutMs; + }, + registerFunction: noop, + registerObject: noop, + onEvent: noop, + abort: noop, + getAbortSignal() { + return undefined; + }, + async eval() { + evalSawLimits = limitsApplied; + return { + success: true, + result: { reportMarkdown: "limited" }, + toolCalls: [], + consoleOutput: [], + duration_ms: 0, + }; + }, + dispose: noop, + [Symbol.dispose]: noop, + }; + }, + }, + taskAdapter: { + async runAgent() { + throw new Error("agent should not run"); + }, + }, + runnerId: "runner-a", + clock: { + nowIso: () => "2026-05-29T00:00:01.000Z", + nowMs: () => 1_000, + }, + }); + + await expect(runner.run("wfr_limits")).resolves.toEqual({ reportMarkdown: "limited" }); + expect(timeoutMs).toBeGreaterThan(5 * 60 * 1000); + expect(evalSawLimits).toBe(true); + }); + + test("supports async workflow function exports", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await store.createRun({ + id: "wfr_async", + workspaceId: "workspace-1", + definition, + definitionSource: `export default async function workflow() { return { reportMarkdown: "async ok" }; }`, + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + const runner = createRunner(store, { + async runAgent() { + throw new Error("agent should not run"); + }, + }); + + await expect(runner.run("wfr_async")).resolves.toEqual({ reportMarkdown: "async ok" }); + }); + + test("returns the normalized workflow result for JSON-serializable values", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await store.createRun({ + id: "wfr_normalized_return", + workspaceId: "workspace-1", + definition, + definitionSource: `export default function workflow() { return { summary: "done" }; }`, + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + const runner = createRunner(store, { + async runAgent() { + throw new Error("agent should not run"); + }, + }); + + await expect(runner.run("wfr_normalized_return")).resolves.toEqual({ + reportMarkdown: JSON.stringify({ summary: "done" }), + }); + }); + + test("marks empty workflow returns as failed runs", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await store.createRun({ + id: "wfr_empty_return", + workspaceId: "workspace-1", + definition, + definitionSource: `export default function workflow() {}`, + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + const runner = createRunner(store, { + async runAgent() { + throw new Error("agent should not run"); + }, + }); + + await expect(runner.run("wfr_empty_return")).rejects.toThrow(/must return/); + const run = await store.getRun("wfr_empty_return"); + expect(run.status).toBe("failed"); + expect(run.events.some((event) => event.type === "result")).toBe(false); + }); + + test("does not overwrite an interrupted run with completed status", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = await createRunStore(tmp.path); + let releaseAgent!: () => void; + let runPromise!: Promise; + const agentStarted = new Promise((resolve) => { + const runner = createRunner(store, { + async runAgent() { + resolve(); + await new Promise((release) => { + releaseAgent = release; + }); + return { taskId: "task_1", reportMarkdown: "late summary" }; + }, + }); + runPromise = runner.run("wfr_123"); + }); + + await agentStarted; + await store.appendStatus("wfr_123", "interrupted", "2026-05-29T00:00:02.000Z"); + releaseAgent(); + await expect(runPromise).rejects.toThrow(/interrupted/); + + const run = await store.getRun("wfr_123"); + expect(run.status).toBe("interrupted"); + expect(run.events.some((event) => event.type === "result")).toBe(false); + }); + + test("marks compile failures as failed runs", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await store.createRun({ + id: "wfr_compile_error", + workspaceId: "workspace-1", + definition, + definitionSource: `export default () => ({ reportMarkdown: "bad shape" });`, + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + const runner = createRunner(store, { + async runAgent() { + throw new Error("agent should not run"); + }, + }); + + await expect(runner.run("wfr_compile_error")).rejects.toThrow(/export a default function/); + const run = await store.getRun("wfr_compile_error"); + expect(run.status).toBe("failed"); + expect(run.events.map((event) => event.type)).toContain("error"); + }); + + test("fails fast when a replay-boundary primitive omits a stable id", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await store.createRun({ + id: "wfr_missing_id", + workspaceId: "workspace-1", + definition, + definitionSource: `export default function workflow({ agent }) { return agent({ prompt: "no id" }); }`, + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + const runner = createRunner(store, { + async runAgent() { + throw new Error("agent should not run without a stable id"); + }, + }); + + await expect(runner.run("wfr_missing_id")).rejects.toThrow(/stable id/); + }); + + test("does not expose mux tools, filesystem imports, or timers to workflow code", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await store.createRun({ + id: "wfr_forbidden", + workspaceId: "workspace-1", + definition, + definitionSource: `export default function workflow() { + return { + mux: typeof mux, + require: typeof require, + setTimeout: typeof setTimeout, + Date: typeof Date, + random: typeof Math.random, + }; + }`, + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + const runner = createRunner(store, { + async runAgent() { + throw new Error("agent should not run"); + }, + }); + + const result = await runner.run("wfr_forbidden"); + + expect(JSON.parse(result.reportMarkdown)).toEqual({ + mux: "undefined", + require: "undefined", + setTimeout: "undefined", + Date: "undefined", + random: "undefined", + }); + }); +}); diff --git a/src/node/services/workflows/WorkflowRunner.ts b/src/node/services/workflows/WorkflowRunner.ts new file mode 100644 index 0000000000..8394b1938e --- /dev/null +++ b/src/node/services/workflows/WorkflowRunner.ts @@ -0,0 +1,926 @@ +import { StructuredTaskOutputSchema, WorkflowResultSchema } from "@/common/orpc/schemas"; +import type { + StructuredTaskOutput, + WorkflowResult, + WorkflowRunEvent, +} from "@/common/types/workflow"; +import assert from "@/common/utils/assert"; +import { getErrorMessage } from "@/common/utils/errors"; +import { validateJsonSchemaSubset } from "@/common/utils/jsonSchemaSubset"; +import type { IJSRuntime, IJSRuntimeFactory } from "@/node/services/ptc/runtime"; +import type { AppendWorkflowRunEventOptions, WorkflowRunStore } from "./WorkflowRunStore"; +import { assertWorkflowStepId, hashWorkflowStepInput } from "./workflowReplayKey"; + +export class WorkflowRunBackgroundedError extends Error { + constructor(runId: string) { + super(`Workflow run backgrounded: ${runId}`); + this.name = "WorkflowRunBackgroundedError"; + } +} + +class WorkflowAgentOutputValidationError extends Error { + constructor(message: string) { + super(message); + this.name = "WorkflowAgentOutputValidationError"; + } +} + +export interface WorkflowAgentSpec { + id: string; + prompt: string; + title?: string; + agentId?: string; + outputSchema?: unknown; +} + +export interface WorkflowAgentWaitOptions { + abortSignal?: AbortSignal; + timeoutMs?: number; + backgroundOnMessageQueued?: boolean; +} + +export type WorkflowAgentResult = StructuredTaskOutput & { taskId: string }; + +export interface WorkflowTaskAdapter { + runAgent( + spec: WorkflowAgentSpec, + lifecycle?: { onTaskCreated?: (taskId: string) => Promise | void }, + waitOptions?: WorkflowAgentWaitOptions + ): Promise; + waitForAgentTask?( + taskId: string, + spec: WorkflowAgentSpec, + waitOptions?: WorkflowAgentWaitOptions + ): Promise; + interruptRun?(): Promise; +} + +export interface WorkflowRunnerRunOptions { + onLeaseAcquired?: () => void; + abortSignal?: AbortSignal; + backgroundOnMessageQueued?: boolean; + allowResumeFromInterrupted?: boolean; +} + +interface WorkflowRunnerLeaseGuard { + throwIfLost(): void; +} + +export interface WorkflowRunnerClock { + nowIso(): string; + nowMs(): number; +} + +export interface WorkflowRunnerOptions { + runStore: WorkflowRunStore; + runtimeFactory: IJSRuntimeFactory; + taskAdapter: WorkflowTaskAdapter; + runnerId: string; + clock?: WorkflowRunnerClock; +} + +const WORKFLOW_AGENT_MAX_ATTEMPTS = 3; + +const WORKFLOW_RUNTIME_TIMEOUT_MS = 24 * 60 * 60 * 1000; + +function isForegroundWaitBackgroundedError(error: unknown): boolean { + return error instanceof Error && error.name === "ForegroundWaitBackgroundedError"; +} + +function shouldRestartUnrecoverableStartedTask(error: unknown): boolean { + const message = getErrorMessage(error); + return message === "Task not found" || message === "Task interrupted"; +} + +function isRetryableAgentOutputError(error: unknown): boolean { + return error instanceof WorkflowAgentOutputValidationError; +} + +function getTaskIdFromUnknownAgentResult(result: unknown): string | undefined { + if (result != null && typeof result === "object") { + const taskId = (result as Record).taskId; + if (typeof taskId === "string" && taskId.length > 0) { + return taskId; + } + } + return undefined; +} + +function buildRetryAgentSpec( + spec: WorkflowAgentSpec, + attempt: number, + validationMessage: string +): WorkflowAgentSpec { + return { + ...spec, + prompt: + `${spec.prompt}\n\n` + + `Previous workflow attempt ${attempt} failed output validation: ${validationMessage}\n` + + "Rerun the task from scratch and submit a final report whose structured output satisfies the requested schema. " + + "In file-backed report mode, rewrite structured-output.json and call agent_report with reportMarkdownPath, structuredOutputPath, and title all set to null.", + }; +} + +function abortRuntimeOnSignal(runtime: IJSRuntime, abortSignal?: AbortSignal): () => void { + if (abortSignal == null) { + return () => undefined; + } + if (abortSignal.aborted) { + runtime.abort(); + return () => undefined; + } + const abortRuntime = () => runtime.abort(); + abortSignal.addEventListener("abort", abortRuntime, { once: true }); + return () => abortSignal.removeEventListener("abort", abortRuntime); +} + +function getWorkflowAgentWaitOptions( + runtime: IJSRuntime, + options: WorkflowRunnerRunOptions | undefined +): WorkflowAgentWaitOptions { + return { + abortSignal: runtime.getAbortSignal(), + timeoutMs: WORKFLOW_RUNTIME_TIMEOUT_MS, + backgroundOnMessageQueued: options?.backgroundOnMessageQueued ?? true, + }; +} + +const DEFAULT_CLOCK: WorkflowRunnerClock = { + nowIso: () => new Date().toISOString(), + nowMs: () => Date.now(), +}; + +export class WorkflowRunner { + private readonly runStore: WorkflowRunStore; + private readonly runtimeFactory: IJSRuntimeFactory; + private readonly taskAdapter: WorkflowTaskAdapter; + private readonly runnerId: string; + private readonly clock: WorkflowRunnerClock; + + constructor(options: WorkflowRunnerOptions) { + assert(options.runnerId.length > 0, "WorkflowRunner: runnerId is required"); + this.runStore = options.runStore; + this.runtimeFactory = options.runtimeFactory; + this.taskAdapter = options.taskAdapter; + this.runnerId = options.runnerId; + this.clock = options.clock ?? DEFAULT_CLOCK; + } + + async run(runId: string, options?: WorkflowRunnerRunOptions): Promise { + assert(runId.length > 0, "WorkflowRunner.run: runId is required"); + const leaseAcquired = await this.runStore.acquireLease( + runId, + this.runnerId, + this.clock.nowMs() + ); + if (!leaseAcquired) { + throw new Error(`Workflow run is already active: ${runId}`); + } + + options?.onLeaseAcquired?.(); + let activeRuntime: IJSRuntime | null = null; + let leaseLostError: Error | null = null; + const markLeaseLost = (cause?: unknown) => { + leaseLostError ??= new Error( + cause instanceof Error + ? `Workflow run lease lost: ${runId}: ${cause.message}` + : `Workflow run lease lost: ${runId}` + ); + activeRuntime?.abort(); + }; + const leaseGuard: WorkflowRunnerLeaseGuard = { + throwIfLost() { + if (leaseLostError != null) { + throw leaseLostError; + } + }, + }; + let leaseRenewalInFlight = false; + const leaseRenewal = setInterval(() => { + if (leaseRenewalInFlight) { + return; + } + leaseRenewalInFlight = true; + void this.runStore + .renewLease(runId, this.runnerId, this.clock.nowMs()) + .then((renewed) => { + if (!renewed) { + markLeaseLost(); + } + }) + .catch(markLeaseLost) + .finally(() => { + leaseRenewalInFlight = false; + }); + }, this.runStore.getLeaseRenewalIntervalMs()); + + let removeAbortListener: () => void = () => undefined; + try { + const run = await this.runStore.getRun(runId); + const sequence = new WorkflowEventSequence(run.events.at(-1)?.sequence ?? 0); + if (run.status === "completed") { + const completedResult = run.events.findLast((event) => event.type === "result")?.result; + if (completedResult != null) { + return completedResult; + } + } + const resumingInterruptedRun = run.status === "interrupted"; + if (resumingInterruptedRun && options?.allowResumeFromInterrupted !== true) { + throw new Error(`Workflow run interrupted: ${runId}`); + } + const ignoreStartedTaskIds = resumingInterruptedRun; + let backgrounded: Promise | null = null; + const markBackgrounded = async () => { + leaseGuard.throwIfLost(); + backgrounded ??= this.appendEvent(runId, { + sequence: sequence.next(), + type: "status", + at: this.clock.nowIso(), + status: "backgrounded", + }).then(() => undefined); + await backgrounded; + }; + + leaseGuard.throwIfLost(); + await this.appendEvent( + runId, + { + sequence: sequence.next(), + type: "status", + at: this.clock.nowIso(), + status: "running", + }, + { allowInterruptedResume: resumingInterruptedRun } + ); + + using runtime = await this.runtimeFactory.create(); + activeRuntime = runtime; + if (leaseLostError != null) { + runtime.abort(); + } + removeAbortListener = abortRuntimeOnSignal(runtime, options?.abortSignal); + runtime.setLimits({ timeoutMs: WORKFLOW_RUNTIME_TIMEOUT_MS }); + runtime.registerFunction("__workflowArgs", () => Promise.resolve(run.args)); + runtime.registerFunction("__workflowPhase", async (name, details) => { + assert(typeof name === "string" && name.length > 0, "phase requires a non-empty name"); + leaseGuard.throwIfLost(); + await this.appendEvent(runId, { + sequence: sequence.next(), + type: "phase", + at: this.clock.nowIso(), + name, + details, + }); + return null; + }); + runtime.registerFunction("__workflowLog", async (message, data) => { + assert( + typeof message === "string" && message.length > 0, + "log requires a non-empty message" + ); + leaseGuard.throwIfLost(); + await this.appendEvent(runId, { + sequence: sequence.next(), + type: "log", + at: this.clock.nowIso(), + message, + data, + }); + return null; + }); + runtime.registerFunction("__workflowAgent", async (rawSpec) => { + try { + return await this.runAgentStep(runId, sequence, rawSpec, { + ignoreStartedTaskIds, + waitOptions: getWorkflowAgentWaitOptions(runtime, options), + leaseGuard, + }); + } catch (error) { + if (isForegroundWaitBackgroundedError(error)) { + await markBackgrounded(); + } + throw error; + } + }); + runtime.registerFunction("__workflowParallelAgents", async (rawSpecs) => { + try { + return await this.runAgentStepsInParallel(runId, sequence, rawSpecs, { + ignoreStartedTaskIds, + waitOptions: getWorkflowAgentWaitOptions(runtime, options), + leaseGuard, + }); + } catch (error) { + if (isForegroundWaitBackgroundedError(error)) { + await markBackgrounded(); + } + throw error; + } + }); + + let compiledSource: string; + try { + compiledSource = compileWorkflowSource(run.definitionSource); + } catch (error) { + leaseGuard.throwIfLost(); + await this.appendEvent(runId, { + sequence: sequence.next(), + type: "error", + at: this.clock.nowIso(), + message: error instanceof Error ? error.message : "Workflow compilation failed", + }); + await this.appendEvent(runId, { + sequence: sequence.next(), + type: "status", + at: this.clock.nowIso(), + status: "failed", + }); + throw error; + } + + const execution = await runtime.eval(compiledSource); + if (!execution.success) { + if (backgrounded != null) { + throw new WorkflowRunBackgroundedError(runId); + } + if (options?.abortSignal?.aborted === true) { + throw new Error(execution.error ?? "Workflow run aborted"); + } + await this.throwIfInterrupted(runId); + leaseGuard.throwIfLost(); + await this.appendEvent(runId, { + sequence: sequence.next(), + type: "error", + at: this.clock.nowIso(), + message: execution.error ?? "Workflow execution failed", + }); + await this.appendEvent(runId, { + sequence: sequence.next(), + type: "status", + at: this.clock.nowIso(), + status: "failed", + }); + throw new Error(execution.error ?? "Workflow execution failed"); + } + + await this.throwIfInterrupted(runId); + let result: WorkflowResult; + try { + result = normalizeWorkflowResultForEvent(execution.result); + } catch (error) { + leaseGuard.throwIfLost(); + await this.appendEvent(runId, { + sequence: sequence.next(), + type: "error", + at: this.clock.nowIso(), + message: getErrorMessage(error), + }); + await this.appendEvent(runId, { + sequence: sequence.next(), + type: "status", + at: this.clock.nowIso(), + status: "failed", + }); + throw error; + } + leaseGuard.throwIfLost(); + await this.appendEvent(runId, { + sequence: sequence.next(), + type: "result", + at: this.clock.nowIso(), + result, + }); + await this.throwIfInterrupted(runId); + leaseGuard.throwIfLost(); + await this.appendEvent(runId, { + sequence: sequence.next(), + type: "status", + at: this.clock.nowIso(), + status: "completed", + }); + return result; + } finally { + removeAbortListener(); + clearInterval(leaseRenewal); + await this.runStore.releaseLease(runId, this.runnerId); + } + } + + private async appendEvent( + runId: string, + event: WorkflowRunEvent, + options: AppendWorkflowRunEventOptions = {} + ) { + return await this.runStore.appendEvent(runId, event, { + ...options, + expectedLeaseOwnerId: this.runnerId, + }); + } + + private async recordStepStarted( + runId: string, + input: Parameters[1] + ): Promise { + await this.runStore.recordStepStarted(runId, input, { expectedLeaseOwnerId: this.runnerId }); + } + + private async recordStepCompleted( + runId: string, + input: Parameters[1] + ): Promise { + await this.runStore.recordStepCompleted(runId, input, { expectedLeaseOwnerId: this.runnerId }); + } + + private async recordStepFailed( + runId: string, + input: Parameters[1] + ): Promise { + await this.runStore.recordStepFailed(runId, input, { expectedLeaseOwnerId: this.runnerId }); + } + + private async throwIfInterrupted(runId: string): Promise { + const run = await this.runStore.getRun(runId); + if (run.status === "interrupted") { + throw new Error(`Workflow run interrupted: ${runId}`); + } + } + + private async runAgentStep( + runId: string, + sequence: WorkflowEventSequence, + rawSpec: unknown, + options: { + ignoreStartedTaskIds: boolean; + waitOptions?: WorkflowAgentWaitOptions; + leaseGuard: WorkflowRunnerLeaseGuard; + } + ): Promise { + const spec = parseWorkflowAgentSpec(rawSpec); + assertWorkflowStepId(spec.id, "agent"); + const inputHash = hashWorkflowStepInput(spec.id, spec); + options.leaseGuard.throwIfLost(); + const existingStep = await this.runStore.getStep(runId, spec.id, inputHash); + if (existingStep?.status === "completed" && existingStep.result != null) { + return existingStep.result; + } + + options.leaseGuard.throwIfLost(); + return await this.runAndRecordAgentStepWithRetries(runId, sequence, { + spec, + inputHash, + startedAt: existingStep?.startedAt ?? this.clock.nowIso(), + taskId: + !options.ignoreStartedTaskIds && existingStep?.status === "started" + ? existingStep.taskId + : undefined, + leaseGuard: options.leaseGuard, + waitOptions: options.waitOptions, + }); + } + + private async runAgentStepsInParallel( + runId: string, + sequence: WorkflowEventSequence, + rawSpecs: unknown, + options: { + ignoreStartedTaskIds: boolean; + waitOptions?: WorkflowAgentWaitOptions; + leaseGuard: WorkflowRunnerLeaseGuard; + } + ): Promise { + assert(Array.isArray(rawSpecs), "parallelAgents requires an array of agent specs"); + assert(rawSpecs.length > 0, "parallelAgents requires at least one agent spec"); + + const results = new Array(rawSpecs.length); + const parsedSteps = rawSpecs.map((rawSpec) => { + const spec = parseWorkflowAgentSpec(rawSpec); + assertWorkflowStepId(spec.id, "parallelAgents"); + return { spec, inputHash: hashWorkflowStepInput(spec.id, spec) }; + }); + options.leaseGuard.throwIfLost(); + const existingSteps = await this.runStore.getSteps( + runId, + parsedSteps.map((step) => ({ stepId: step.spec.id, inputHash: step.inputHash })) + ); + let pending: Array<{ + index: number; + spec: WorkflowAgentSpec; + inputHash: string; + startedAt: string; + taskId?: string; + attempt: number; + retryMessage?: string; + }> = []; + for (const [index, step] of parsedSteps.entries()) { + const existingStep = existingSteps[index]; + if (existingStep?.status === "completed" && existingStep.result != null) { + results[index] = existingStep.result; + continue; + } + pending.push({ + index, + spec: step.spec, + inputHash: step.inputHash, + startedAt: existingStep?.startedAt ?? this.clock.nowIso(), + taskId: + !options.ignoreStartedTaskIds && existingStep?.status === "started" + ? existingStep.taskId + : undefined, + attempt: 1, + }); + } + + while (pending.length > 0) { + const currentPending = pending; + pending = []; + const batchAbortController = new AbortController(); + const upstreamAbortSignal = options.waitOptions?.abortSignal; + const abortBatch = () => batchAbortController.abort(); + if (upstreamAbortSignal?.aborted) { + abortBatch(); + } else { + upstreamAbortSignal?.addEventListener("abort", abortBatch, { once: true }); + } + let foregroundBackgrounded = false; + let interruptPromise: Promise | undefined; + const interruptRemainingTasks = async (): Promise => { + interruptPromise ??= this.taskAdapter.interruptRun?.() ?? Promise.resolve(); + try { + await interruptPromise; + } catch { + // Preserve the original child failure; workflow failure handling will surface that cause. + } + }; + const batchWaitOptions: WorkflowAgentWaitOptions = { + ...options.waitOptions, + abortSignal: batchAbortController.signal, + }; + const pendingRuns = currentPending.map(async (step) => { + return await this.runOrResumeAgentStep(runId, { + spec: + step.attempt === 1 + ? step.spec + : buildRetryAgentSpec( + step.spec, + step.attempt - 1, + step.retryMessage ?? "previous attempt failed" + ), + inputHash: step.inputHash, + startedAt: step.startedAt, + taskId: step.taskId, + leaseGuard: options.leaseGuard, + waitOptions: batchWaitOptions, + }); + }); + const guardedRuns = pendingRuns.map(async (pendingRun) => { + try { + return await pendingRun; + } catch (error) { + if (isForegroundWaitBackgroundedError(error)) { + foregroundBackgrounded = true; + abortBatch(); + } else if (!foregroundBackgrounded) { + await interruptRemainingTasks(); + } + throw error; + } + }); + let rawResults: WorkflowAgentResult[]; + try { + rawResults = await Promise.all(guardedRuns); + } catch (error) { + await Promise.allSettled(guardedRuns); + throw error; + } finally { + upstreamAbortSignal?.removeEventListener("abort", abortBatch); + } + for (const [pendingIndex, rawResult] of rawResults.entries()) { + const step = currentPending[pendingIndex]; + assert(step != null, "WorkflowRunner.runAgentStepsInParallel: missing pending step"); + try { + results[step.index] = await this.recordAgentResult(runId, sequence, { + ...step, + leaseGuard: options.leaseGuard, + rawResult, + }); + } catch (error) { + if (!isRetryableAgentOutputError(error) || step.attempt >= WORKFLOW_AGENT_MAX_ATTEMPTS) { + throw error; + } + options.leaseGuard.throwIfLost(); + await this.recordAgentRetry(runId, sequence, step.spec.id, step.attempt, error); + pending.push({ + ...step, + startedAt: this.clock.nowIso(), + taskId: undefined, + attempt: step.attempt + 1, + retryMessage: getErrorMessage(error), + }); + } + } + } + return results; + } + + private async runAndRecordAgentStepWithRetries( + runId: string, + sequence: WorkflowEventSequence, + step: { + spec: WorkflowAgentSpec; + inputHash: string; + startedAt: string; + taskId?: string; + waitOptions?: WorkflowAgentWaitOptions; + leaseGuard: WorkflowRunnerLeaseGuard; + } + ): Promise { + let attempt = 1; + let startedAt = step.startedAt; + let taskId = step.taskId; + let spec = step.spec; + while (attempt <= WORKFLOW_AGENT_MAX_ATTEMPTS) { + const rawResult = await this.runOrResumeAgentStep(runId, { + spec, + inputHash: step.inputHash, + startedAt, + taskId, + leaseGuard: step.leaseGuard, + waitOptions: step.waitOptions, + }); + try { + return await this.recordAgentResult(runId, sequence, { + spec: step.spec, + inputHash: step.inputHash, + startedAt, + leaseGuard: step.leaseGuard, + rawResult, + }); + } catch (error) { + if (!isRetryableAgentOutputError(error) || attempt >= WORKFLOW_AGENT_MAX_ATTEMPTS) { + throw error; + } + step.leaseGuard.throwIfLost(); + await this.recordAgentRetry(runId, sequence, step.spec.id, attempt, error); + spec = buildRetryAgentSpec(step.spec, attempt, getErrorMessage(error)); + startedAt = this.clock.nowIso(); + taskId = undefined; + attempt += 1; + } + } + throw new Error(`agent ${step.spec.id} exhausted validation retries`); + } + + private async recordAgentRetry( + runId: string, + sequence: WorkflowEventSequence, + stepId: string, + attempt: number, + error: unknown + ): Promise { + await this.appendEvent(runId, { + sequence: sequence.next(), + type: "log", + at: this.clock.nowIso(), + message: `Retrying ${stepId} after validation failure`, + data: { + attempt, + maxAttempts: WORKFLOW_AGENT_MAX_ATTEMPTS, + error: getErrorMessage(error), + }, + }); + } + + private async runOrResumeAgentStep( + runId: string, + step: { + spec: WorkflowAgentSpec; + inputHash: string; + startedAt: string; + taskId?: string; + waitOptions?: WorkflowAgentWaitOptions; + leaseGuard: WorkflowRunnerLeaseGuard; + } + ): Promise { + step.leaseGuard.throwIfLost(); + if (step.taskId != null && this.taskAdapter.waitForAgentTask != null) { + try { + return await this.taskAdapter.waitForAgentTask(step.taskId, step.spec, step.waitOptions); + } catch (error) { + if (!shouldRestartUnrecoverableStartedTask(error)) { + throw error; + } + } + } + + step.leaseGuard.throwIfLost(); + let recordedTaskId: string | undefined; + const rawResult = await this.taskAdapter.runAgent( + step.spec, + { + onTaskCreated: async (taskId) => { + step.leaseGuard.throwIfLost(); + recordedTaskId = taskId; + await this.recordStepStarted(runId, { + stepId: step.spec.id, + inputHash: step.inputHash, + taskId, + startedAt: step.startedAt, + }); + }, + }, + step.waitOptions + ); + step.leaseGuard.throwIfLost(); + if (recordedTaskId == null) { + await this.recordStepStarted(runId, { + stepId: step.spec.id, + inputHash: step.inputHash, + taskId: rawResult.taskId, + startedAt: step.startedAt, + }); + } + return rawResult; + } + + private async recordAgentResult( + runId: string, + sequence: WorkflowEventSequence, + step: { + spec: WorkflowAgentSpec; + inputHash: string; + startedAt: string; + leaseGuard: WorkflowRunnerLeaseGuard; + rawResult: WorkflowAgentResult; + } + ): Promise { + let result: StructuredTaskOutput; + try { + result = StructuredTaskOutputSchema.parse(step.rawResult); + } catch (error) { + const message = `agent ${step.spec.id} returned invalid task output: ${getErrorMessage(error)}`; + await this.recordFailedAgentAttempt(runId, sequence, step, message); + throw new WorkflowAgentOutputValidationError(message); + } + + if (step.spec.outputSchema !== undefined) { + const validation = validateJsonSchemaSubset(step.spec.outputSchema, result.structuredOutput); + if (!validation.success) { + const message = `agent ${step.spec.id} structured output failed schema validation: ${validation.errors + .map((error) => `${error.path}: ${error.message}`) + .join("; ")}`; + await this.recordFailedAgentAttempt(runId, sequence, step, message); + throw new WorkflowAgentOutputValidationError(message); + } + } + step.leaseGuard.throwIfLost(); + const taskId = this.getTaskIdFromAgentResult(step.rawResult, step.spec.id); + await this.recordStepCompleted(runId, { + stepId: step.spec.id, + inputHash: step.inputHash, + taskId, + result, + startedAt: step.startedAt, + completedAt: this.clock.nowIso(), + }); + step.leaseGuard.throwIfLost(); + await this.appendEvent(runId, { + sequence: sequence.next(), + type: "task", + at: this.clock.nowIso(), + stepId: step.spec.id, + taskId, + status: "completed", + }); + return result; + } + + private async recordFailedAgentAttempt( + runId: string, + sequence: WorkflowEventSequence, + step: { + spec: WorkflowAgentSpec; + inputHash: string; + startedAt: string; + leaseGuard: WorkflowRunnerLeaseGuard; + rawResult: WorkflowAgentResult; + }, + message: string + ): Promise { + step.leaseGuard.throwIfLost(); + const taskId = getTaskIdFromUnknownAgentResult(step.rawResult); + await this.appendEvent(runId, { + sequence: sequence.next(), + type: "validation", + at: this.clock.nowIso(), + stepId: step.spec.id, + success: false, + message, + }); + step.leaseGuard.throwIfLost(); + await this.recordStepFailed(runId, { + stepId: step.spec.id, + inputHash: step.inputHash, + taskId, + error: message, + startedAt: step.startedAt, + completedAt: this.clock.nowIso(), + }); + if (taskId != null) { + step.leaseGuard.throwIfLost(); + await this.appendEvent(runId, { + sequence: sequence.next(), + type: "task", + at: this.clock.nowIso(), + stepId: step.spec.id, + taskId, + status: "failed", + }); + } + } + + private getTaskIdFromAgentResult(result: WorkflowAgentResult, stepId: string): string { + const maybeTaskId = result.taskId; + assert( + typeof maybeTaskId === "string" && maybeTaskId.length > 0, + `agent ${stepId} returned no taskId` + ); + return maybeTaskId; + } +} + +class WorkflowEventSequence { + constructor(private current: number) {} + + next(): number { + this.current += 1; + return this.current; + } +} + +function parseWorkflowAgentSpec(rawSpec: unknown): WorkflowAgentSpec { + assert(rawSpec != null && typeof rawSpec === "object", "agent requires a spec object"); + const spec = rawSpec as Record; + assert(typeof spec.id === "string", "agent replay boundary requires a stable id"); + assert( + typeof spec.prompt === "string" && spec.prompt.length > 0, + "agent requires a non-empty prompt" + ); + const parsed: WorkflowAgentSpec = { + id: spec.id, + prompt: spec.prompt, + }; + if (typeof spec.title === "string" && spec.title.length > 0) { + parsed.title = spec.title; + } + if (typeof spec.agentId === "string" && spec.agentId.length > 0) { + parsed.agentId = spec.agentId; + } + if (spec.outputSchema !== undefined) { + parsed.outputSchema = spec.outputSchema; + } + return parsed; +} + +function normalizeWorkflowResultForEvent(result: unknown): WorkflowResult { + if (result != null && typeof result === "object") { + const record = result as Record; + if (typeof record.reportMarkdown === "string") { + return WorkflowResultSchema.parse({ + reportMarkdown: record.reportMarkdown, + structuredOutput: record.structuredOutput, + }); + } + } + + let reportMarkdown: string | undefined; + try { + reportMarkdown = JSON.stringify(result); + } catch (error) { + throw new Error(`Workflow result must be JSON-serializable: ${getErrorMessage(error)}`); + } + assert( + typeof reportMarkdown === "string", + "Workflow must return a reportMarkdown result or another JSON-serializable value" + ); + return WorkflowResultSchema.parse({ reportMarkdown }); +} + +function compileWorkflowSource(source: string): string { + const compiled = source.replace( + /export\s+default\s+(async\s+)?function(?:\s+[A-Za-z_$][\w$]*)?\s*\(/u, + (_match, asyncKeyword: string | undefined) => `${asyncKeyword ?? ""}function __muxWorkflow(` + ); + assert(compiled !== source, "Workflow definition must export a default function"); + + return ` +Date = undefined; +Math.random = undefined; +${compiled} +return (async () => await __muxWorkflow({ + args: __workflowArgs(), + phase: __workflowPhase, + log: __workflowLog, + agent: __workflowAgent, + parallelAgents: __workflowParallelAgents, +}))(); +`; +} diff --git a/src/node/services/workflows/WorkflowService.test.ts b/src/node/services/workflows/WorkflowService.test.ts new file mode 100644 index 0000000000..ce73622fbe --- /dev/null +++ b/src/node/services/workflows/WorkflowService.test.ts @@ -0,0 +1,858 @@ +/* eslint-disable @typescript-eslint/await-thenable, @typescript-eslint/no-unsafe-argument, @typescript-eslint/require-await */ +import * as fs from "node:fs/promises"; +import * as path from "node:path"; + +import { describe, expect, test } from "bun:test"; +import { ForegroundWaitBackgroundedError } from "@/node/services/taskService"; +import { DisposableTempDir } from "@/node/services/tempDir"; +import { QuickJSRuntimeFactory } from "@/node/services/ptc/quickjsRuntime"; +import { WorkflowDefinitionStore } from "./WorkflowDefinitionStore"; +import { WorkflowRunStore } from "./WorkflowRunStore"; +import { WorkflowService } from "./WorkflowService"; +import type { WorkflowTaskAdapter } from "./WorkflowRunner"; +import { hashWorkflowStepInput } from "./workflowReplayKey"; + +async function writeWorkflow(root: string, name: string, source: string) { + await fs.mkdir(root, { recursive: true }); + await fs.writeFile(path.join(root, `${name}.js`), source, "utf-8"); +} + +async function waitForCondition(description: string, predicate: () => boolean): Promise { + const deadline = Date.now() + 1_000; + while (Date.now() < deadline) { + if (predicate()) { + return; + } + await new Promise((resolve) => setTimeout(resolve, 10)); + } + throw new Error(`Timed out waiting for ${description}`); +} + +async function waitForWorkflowStatus( + runStore: WorkflowRunStore, + runId: string, + status: string +): Promise { + const deadline = Date.now() + 1_000; + while (Date.now() < deadline) { + const run = await runStore.getRun(runId); + if (run.status === status) { + return; + } + await new Promise((resolve) => setTimeout(resolve, 10)); + } + const run = await runStore.getRun(runId); + throw new Error(`Timed out waiting for ${runId} to become ${status}; got ${run.status}`); +} +async function waitForWorkflowRunFileStatus( + sessionDir: string, + runId: string, + status: string +): Promise { + const runFile = path.join(sessionDir, "workflows", runId, "run.json"); + const deadline = Date.now() + 1_000; + while (Date.now() < deadline) { + try { + const run = JSON.parse(await fs.readFile(runFile, "utf-8")) as { status?: unknown }; + if (run.status === status) { + return; + } + } catch { + // Keep polling until the background writer flushes run.json. + } + await new Promise((resolve) => setTimeout(resolve, 10)); + } + throw new Error(`Timed out waiting for ${runId} run file to become ${status}`); +} + +describe("WorkflowService", () => { + test("starts a named workflow and persists the captured definition source", async () => { + using tmp = new DisposableTempDir("workflow-service"); + const projectRoot = path.join(tmp.path, "project", ".mux", "workflows"); + const globalRoot = path.join(tmp.path, "mux-home", "workflows"); + const source = `// description: Demo workflow +export default function workflow({ args, agent }) { + const child = agent({ id: "summarize", prompt: "Summarize " + args.topic }); + return { reportMarkdown: "Final " + child.reportMarkdown }; +} +`; + await writeWorkflow(globalRoot, "demo", source); + + const taskAdapter: WorkflowTaskAdapter = { + async runAgent() { + return { taskId: "task_1", reportMarkdown: "child summary" }; + }, + }; + const runStore = new WorkflowRunStore({ sessionDir: tmp.path }); + const service = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ projectRoot, globalRoot, builtIns: [] }), + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter, + generateRunId: () => "wfr_demo", + runnerId: "runner-a", + clock: { + nowIso: () => "2026-05-29T00:00:00.000Z", + nowMs: () => 1_000, + }, + }); + + const result = await service.startNamedWorkflow({ + name: "demo", + workspaceId: "workspace-1", + projectTrusted: true, + args: { topic: "workflow services" }, + }); + const run = await runStore.getRun("wfr_demo"); + + expect(result).toEqual({ + runId: "wfr_demo", + status: "completed", + result: { reportMarkdown: "Final child summary" }, + }); + expect(run.definitionSource).toBe(source); + expect(run.definition.scope).toBe("global"); + }); + + test("writes and runs session-scoped scratch workflow definitions", async () => { + using tmp = new DisposableTempDir("workflow-service"); + const scratchRoot = path.join(tmp.path, "session", "workflows"); + const projectRoot = path.join(tmp.path, "project", ".mux", "workflows"); + const globalRoot = path.join(tmp.path, "mux-home", "workflows"); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path }); + const service = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ + projectRoot, + globalRoot, + scratchRoot, + builtIns: [], + }), + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent() { + throw new Error("agent should not run"); + }, + }, + generateRunId: () => "wfr_scratch_run", + runnerId: "runner-a", + }); + + const descriptor = await service.writeScratchWorkflow({ + workspaceId: "workspace-1", + name: "scratch-research", + description: "Scratch research", + source: + "export default function workflow({ args }) { return { reportMarkdown: 'Topic: ' + args.topic }; }", + }); + const result = await service.startNamedWorkflow({ + name: "scratch-research", + workspaceId: "workspace-1", + projectTrusted: false, + args: { topic: "drafts" }, + }); + const run = await runStore.getRun("wfr_scratch_run"); + + expect(descriptor).toMatchObject({ name: "scratch-research", scope: "scratch" }); + expect(result).toEqual({ + runId: "wfr_scratch_run", + status: "completed", + result: { reportMarkdown: "Topic: drafts" }, + }); + expect(run.definition.scope).toBe("scratch"); + await expect( + fs.readFile(path.join(scratchRoot, "scratch-research.js"), "utf-8") + ).resolves.toContain("// description: Scratch research"); + }); + + test("lists definitions through the definition store trust gate", async () => { + using tmp = new DisposableTempDir("workflow-service"); + const projectRoot = path.join(tmp.path, "project", ".mux", "workflows"); + const globalRoot = path.join(tmp.path, "mux-home", "workflows"); + await writeWorkflow( + projectRoot, + "demo", + "// description: Project workflow\nexport default function workflow() { return null; }\n" + ); + await writeWorkflow( + globalRoot, + "demo", + "// description: Global workflow\nexport default function workflow() { return null; }\n" + ); + + const service = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ projectRoot, globalRoot, builtIns: [] }), + runStore: new WorkflowRunStore({ sessionDir: tmp.path }), + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent() { + return { taskId: "task_1", reportMarkdown: "unused" }; + }, + }, + generateRunId: () => "wfr_demo", + runnerId: "runner-a", + }); + + await expect(service.listDefinitions({ projectTrusted: false })).resolves.toEqual([ + expect.objectContaining({ name: "demo", scope: "global" }), + ]); + await expect(service.listDefinitions({ projectTrusted: true })).resolves.toEqual([ + expect.objectContaining({ name: "demo", scope: "project" }), + ]); + }); + + test("interrupts a run without deleting completed step state", async () => { + using tmp = new DisposableTempDir("workflow-service"); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path }); + await runStore.createRun({ + id: "wfr_interrupt", + workspaceId: "workspace-1", + definition: { name: "demo", description: "Demo", scope: "built-in", executable: true }, + definitionSource: + "export default function workflow() { return { reportMarkdown: 'unused' }; }\n", + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + await runStore.recordStepCompleted("wfr_interrupt", { + stepId: "done", + inputHash: "hash:done", + taskId: "task_done", + result: { reportMarkdown: "done" }, + startedAt: "2026-05-29T00:00:01.000Z", + completedAt: "2026-05-29T00:00:02.000Z", + }); + + let interruptCalls = 0; + let statusDuringInterrupt: string | undefined; + const service = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ + projectRoot: path.join(tmp.path, "project"), + globalRoot: path.join(tmp.path, "global"), + builtIns: [], + }), + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent() { + throw new Error("unused"); + }, + async interruptRun() { + statusDuringInterrupt = (await runStore.getRun("wfr_interrupt")).status; + interruptCalls += 1; + }, + }, + runnerId: "runner-a", + clock: { + nowIso: () => "2026-05-29T00:00:03.000Z", + nowMs: () => 1_000, + }, + }); + + const interrupted = await service.interruptRun({ + workspaceId: "workspace-1", + runId: "wfr_interrupt", + }); + const completedStep = await runStore.getCompletedStep("wfr_interrupt", "done", "hash:done"); + + expect(interrupted.status).toBe("interrupted"); + expect(interruptCalls).toBe(1); + expect(statusDuringInterrupt).toBe("interrupted"); + expect(completedStep?.result).toEqual({ reportMarkdown: "done" }); + }); + + test("interrupts foreground workflow runs when the caller aborts", async () => { + using tmp = new DisposableTempDir("workflow-service"); + const projectRoot = path.join(tmp.path, "project", ".mux", "workflows"); + const globalRoot = path.join(tmp.path, "mux-home", "workflows"); + await writeWorkflow( + globalRoot, + "abortable", + "// description: Abortable workflow\nexport default function workflow({ agent }) { return agent({ id: 'slow-step', prompt: 'slow' }); }\n" + ); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path }); + let agentWaitStarted = false; + let interruptCalls = 0; + let agentAbortObserved = false; + let abortObservedDuringInterrupt: boolean | undefined; + let statusDuringAbortInterrupt: string | undefined; + const service = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ projectRoot, globalRoot, builtIns: [] }), + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent(_spec, _lifecycle, waitOptions) { + agentWaitStarted = true; + return await new Promise((_, reject) => { + waitOptions?.abortSignal?.addEventListener( + "abort", + () => { + agentAbortObserved = true; + reject(new Error("Task interrupted")); + }, + { once: true } + ); + }); + }, + async interruptRun() { + abortObservedDuringInterrupt = agentAbortObserved; + statusDuringAbortInterrupt = (await runStore.getRun("wfr_abort")).status; + interruptCalls += 1; + }, + }, + generateRunId: () => "wfr_abort", + runnerId: "runner-a", + }); + const abortController = new AbortController(); + + const runPromise = service.startNamedWorkflow({ + name: "abortable", + workspaceId: "workspace-1", + projectTrusted: false, + args: {}, + abortSignal: abortController.signal, + }); + await waitForCondition("foreground agent to start", () => agentWaitStarted); + abortController.abort(); + + await expect(runPromise).rejects.toThrow(/interrupted|aborted/i); + await expect(runStore.getRun("wfr_abort")).resolves.toMatchObject({ status: "interrupted" }); + expect(interruptCalls).toBe(1); + expect(abortObservedDuringInterrupt).toBe(true); + expect(statusDuringAbortInterrupt).toBe("interrupted"); + }); + + test("interruptRun aborts an active foreground runner from another service instance", async () => { + using tmp = new DisposableTempDir("workflow-service"); + const projectRoot = path.join(tmp.path, "project", ".mux", "workflows"); + const globalRoot = path.join(tmp.path, "mux-home", "workflows"); + await writeWorkflow( + globalRoot, + "interrupt-active", + "// description: Interrupt active\nexport default function workflow({ agent }) { return agent({ id: 'slow-step', prompt: 'slow' }); }\n" + ); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path }); + let agentWaitStarted = false; + let agentAbortObserved = false; + let interruptCalls = 0; + const service = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ projectRoot, globalRoot, builtIns: [] }), + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent(_spec, _lifecycle, waitOptions) { + agentWaitStarted = true; + return await new Promise((_, reject) => { + waitOptions?.abortSignal?.addEventListener( + "abort", + () => { + agentAbortObserved = true; + reject(new Error("Task interrupted")); + }, + { once: true } + ); + }); + }, + async interruptRun() { + throw new Error("starter service interruptRun should not be called"); + }, + }, + generateRunId: () => "wfr_interrupt_active", + runnerId: "runner-a", + }); + const interruptService = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ projectRoot, globalRoot, builtIns: [] }), + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent() { + throw new Error("interrupt service runAgent should not be called"); + }, + async interruptRun() { + interruptCalls += 1; + }, + }, + runnerId: "runner-b", + }); + + const runPromise = service.startNamedWorkflow({ + name: "interrupt-active", + workspaceId: "workspace-1", + projectTrusted: false, + args: {}, + }); + const runErrorPromise = runPromise.then( + () => null, + (error: unknown) => error + ); + await waitForCondition("foreground agent to start", () => agentWaitStarted); + + const interrupted = await interruptService.interruptRun({ + workspaceId: "workspace-1", + runId: "wfr_interrupt_active", + }); + + expect(interrupted.status).toBe("interrupted"); + expect(agentAbortObserved).toBe(true); + expect(interruptCalls).toBe(1); + const runError = await runErrorPromise; + expect(runError).toBeInstanceOf(Error); + expect(runError instanceof Error ? runError.message : "").toMatch(/interrupted|aborted/i); + }); + + test("moves foreground workflow runs to background when child waits are backgrounded", async () => { + using tmp = new DisposableTempDir("workflow-service"); + const projectRoot = path.join(tmp.path, "project", ".mux", "workflows"); + const globalRoot = path.join(tmp.path, "mux-home", "workflows"); + await writeWorkflow( + globalRoot, + "backgroundable", + "// description: Backgroundable workflow\nexport default function workflow({ agent }) { return agent({ id: 'slow-step', prompt: 'slow' }); }\n" + ); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path }); + let calls = 0; + const backgroundFlags: Array = []; + const service = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ projectRoot, globalRoot, builtIns: [] }), + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent(_spec, _lifecycle, waitOptions) { + calls += 1; + backgroundFlags.push(waitOptions?.backgroundOnMessageQueued); + if (calls === 1) { + throw new ForegroundWaitBackgroundedError(); + } + return { taskId: "task_slow", reportMarkdown: "done" }; + }, + }, + generateRunId: () => "wfr_backgrounded", + runnerId: "runner-a", + }); + + const result = await service.startNamedWorkflow({ + name: "backgroundable", + workspaceId: "workspace-1", + projectTrusted: false, + args: {}, + }); + + expect(result).toEqual({ runId: "wfr_backgrounded", status: "backgrounded", result: null }); + await waitForWorkflowStatus(runStore, "wfr_backgrounded", "completed"); + await waitForWorkflowRunFileStatus(tmp.path, "wfr_backgrounded", "completed"); + expect(calls).toBe(2); + expect(backgroundFlags).toEqual([true, false]); + }); + + test("resumes the same run id and reuses completed steps", async () => { + using tmp = new DisposableTempDir("workflow-service"); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path }); + const source = `export default function workflow({ agent }) { + const first = agent({ id: "first", prompt: "first" }); + const second = agent({ id: "second", prompt: "second" }); + return { reportMarkdown: first.reportMarkdown + " + " + second.reportMarkdown }; +} +`; + await runStore.createRun({ + id: "wfr_resume", + workspaceId: "workspace-1", + definition: { name: "demo", description: "Demo", scope: "built-in", executable: true }, + definitionSource: source, + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + await runStore.recordStepCompleted("wfr_resume", { + stepId: "first", + inputHash: hashWorkflowStepInput("first", { id: "first", prompt: "first" }), + taskId: "task_first", + result: { reportMarkdown: "first done" }, + startedAt: "2026-05-29T00:00:01.000Z", + completedAt: "2026-05-29T00:00:02.000Z", + }); + await runStore.appendEvent("wfr_resume", { + sequence: 1, + type: "status", + at: "2026-05-29T00:00:03.000Z", + status: "interrupted", + }); + + const taskCalls: string[] = []; + const service = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ + projectRoot: path.join(tmp.path, "project"), + globalRoot: path.join(tmp.path, "global"), + builtIns: [], + }), + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent(spec) { + taskCalls.push(spec.id); + return { taskId: `task_${spec.id}`, reportMarkdown: `${spec.id} done` }; + }, + }, + runnerId: "runner-a", + clock: { + nowIso: () => "2026-05-29T00:00:04.000Z", + nowMs: () => 1_000, + }, + }); + + const result = await service.resumeRun({ + workspaceId: "workspace-1", + runId: "wfr_resume", + projectTrusted: true, + }); + + expect(result).toEqual({ + runId: "wfr_resume", + status: "completed", + result: { reportMarkdown: "first done + second done" }, + }); + expect(taskCalls).toEqual(["second"]); + }); + + test("does not mark resume running before the runner acquires the lease", async () => { + using tmp = new DisposableTempDir("workflow-service"); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path }); + await runStore.createRun({ + id: "wfr_busy_resume", + workspaceId: "workspace-1", + definition: { name: "demo", description: "Demo", scope: "built-in", executable: true }, + definitionSource: + "export default function workflow() { return { reportMarkdown: 'done' }; }\n", + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + await runStore.appendStatus("wfr_busy_resume", "interrupted", "2026-05-29T00:00:01.000Z"); + await runStore.acquireLease("wfr_busy_resume", "old-runner", Date.now()); + const originalConsoleError = console.error; + console.error = () => undefined; + try { + const service = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ + projectRoot: path.join(tmp.path, "project"), + globalRoot: path.join(tmp.path, "global"), + builtIns: [], + }), + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent() { + return { taskId: "task_1", reportMarkdown: "unused" }; + }, + }, + runnerId: "runner-a", + }); + + await expect( + service.resumeRunInBackground({ + workspaceId: "workspace-1", + runId: "wfr_busy_resume", + projectTrusted: true, + }) + ).rejects.toThrow(/already active/); + + await expect(runStore.getRun("wfr_busy_resume")).resolves.toMatchObject({ + status: "interrupted", + }); + } finally { + console.error = originalConsoleError; + await runStore.releaseLease("wfr_busy_resume", "old-runner"); + } + }); + + test("promotes a scratch workflow run to a reusable global definition", async () => { + using tmp = new DisposableTempDir("workflow-service"); + const projectRoot = path.join(tmp.path, "project", ".mux", "workflows"); + const globalRoot = path.join(tmp.path, "mux-home", "workflows"); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path }); + await runStore.createRun({ + id: "wfr_scratch", + workspaceId: "workspace-1", + definition: { name: "scratch", description: "Scratch", scope: "scratch", executable: true }, + definitionSource: + "export default function workflow() { return { reportMarkdown: 'scratch' }; }\n", + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + const service = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ projectRoot, globalRoot, builtIns: [] }), + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent() { + return { taskId: "task_1", reportMarkdown: "unused" }; + }, + }, + runnerId: "runner-a", + }); + + const descriptor = await service.promoteScratchWorkflow({ + workspaceId: "workspace-1", + runId: "wfr_scratch", + name: "promoted-research", + description: "Promoted research workflow", + location: "global", + overwrite: false, + projectTrusted: false, + }); + const promotedSource = await fs.readFile( + path.join(globalRoot, "promoted-research.js"), + "utf-8" + ); + + expect(descriptor).toMatchObject({ + name: "promoted-research", + description: "Promoted research workflow", + scope: "global", + executable: true, + }); + expect(promotedSource).toContain("// description: Promoted research workflow"); + expect(promotedSource).toContain("reportMarkdown: 'scratch'"); + await expect(service.listDefinitions({ projectTrusted: false })).resolves.toEqual([ + expect.objectContaining({ name: "promoted-research", scope: "global" }), + ]); + }); + + test("can start a workflow in the background and persist a running run immediately", async () => { + using tmp = new DisposableTempDir("workflow-service"); + const projectRoot = path.join(tmp.path, "project", ".mux", "workflows"); + const globalRoot = path.join(tmp.path, "mux-home", "workflows"); + await writeWorkflow( + globalRoot, + "background-research", + "// description: Background workflow\nexport default function workflow({ agent }) { return agent({ id: 'slow-step', prompt: 'slow' }); }\n" + ); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path }); + let releaseAgent: ((value: { taskId: string; reportMarkdown: string }) => void) | undefined; + const service = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ projectRoot, globalRoot, builtIns: [] }), + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent() { + return await new Promise<{ taskId: string; reportMarkdown: string }>((resolve) => { + releaseAgent = resolve; + }); + }, + }, + generateRunId: () => "wfr_background", + runnerId: "runner-a", + }); + + const started = await service.startNamedWorkflowInBackground({ + name: "background-research", + workspaceId: "workspace-1", + projectTrusted: false, + args: {}, + }); + + expect(started).toMatchObject({ runId: "wfr_background", status: "running", result: null }); + await expect(runStore.getRun("wfr_background")).resolves.toMatchObject({ + id: "wfr_background", + status: "running", + }); + + await waitForCondition("background agent to start", () => releaseAgent != null); + releaseAgent?.({ taskId: "task_slow", reportMarkdown: "done" }); + await waitForWorkflowStatus(runStore, "wfr_background", "completed"); + await expect(runStore.getRun("wfr_background")).resolves.toMatchObject({ status: "completed" }); + }); + + test("auto-resumes crash-recovered running runs without resuming user-interrupted runs", async () => { + using tmp = new DisposableTempDir("workflow-service"); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path }); + await runStore.createRun({ + id: "wfr_crash_running", + workspaceId: "workspace-1", + definition: { name: "demo", description: "Demo", scope: "built-in", executable: true }, + definitionSource: + "export default function workflow({ agent }) { return agent({ id: 'after-crash', prompt: 'resume' }); }\n", + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + await runStore.appendStatus("wfr_crash_running", "running", "2026-05-29T00:00:01.000Z"); + await runStore.createRun({ + id: "wfr_user_interrupted", + workspaceId: "workspace-1", + definition: { name: "demo", description: "Demo", scope: "built-in", executable: true }, + definitionSource: + "export default function workflow({ agent }) { return agent({ id: 'should-not-run', prompt: 'blocked' }); }\n", + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + await runStore.appendStatus("wfr_user_interrupted", "interrupted", "2026-05-29T00:00:01.000Z"); + const taskCalls: string[] = []; + const service = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ + projectRoot: path.join(tmp.path, "project"), + globalRoot: path.join(tmp.path, "global"), + builtIns: [], + }), + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent(spec) { + taskCalls.push(spec.id); + return { taskId: `task_${spec.id}`, reportMarkdown: "resumed" }; + }, + }, + runnerId: "runner-a", + }); + + await expect( + service.resumeCrashedRuns({ workspaceId: "workspace-1", projectTrusted: true }) + ).resolves.toEqual(["wfr_crash_running"]); + await waitForWorkflowStatus(runStore, "wfr_crash_running", "completed"); + await waitForWorkflowRunFileStatus(tmp.path, "wfr_crash_running", "completed"); + + expect(taskCalls).toEqual(["after-crash"]); + await expect(runStore.getRun("wfr_user_interrupted")).resolves.toMatchObject({ + status: "interrupted", + }); + }); + + test("uses a fresh lease owner for each runner", async () => { + using tmp = new DisposableTempDir("workflow-service"); + const projectRoot = path.join(tmp.path, "project", ".mux", "workflows"); + const globalRoot = path.join(tmp.path, "mux-home", "workflows"); + const source = `// description: Demo workflow +export default function workflow() { + return { reportMarkdown: "ok" }; +} +`; + await writeWorkflow(globalRoot, "demo", source); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path }); + const ownerIds: string[] = []; + const acquireLease = runStore.acquireLease.bind(runStore); + runStore.acquireLease = async (runId, ownerId, nowMs) => { + ownerIds.push(ownerId); + return await acquireLease(runId, ownerId, nowMs); + }; + let nextRunId = 0; + const service = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ projectRoot, globalRoot, builtIns: [] }), + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent() { + throw new Error("workflow should not spawn tasks"); + }, + }, + generateRunId: () => `wfr_owner_${++nextRunId}`, + runnerId: "runner-a", + clock: { + nowIso: () => "2026-05-29T00:00:00.000Z", + nowMs: () => 1_000, + }, + }); + + await service.startNamedWorkflow({ + name: "demo", + workspaceId: "workspace-1", + projectTrusted: true, + args: {}, + }); + await service.startNamedWorkflow({ + name: "demo", + workspaceId: "workspace-1", + projectTrusted: true, + args: {}, + }); + + expect(ownerIds).toHaveLength(2); + expect(new Set(ownerIds).size).toBe(2); + expect(ownerIds.every((ownerId) => ownerId.startsWith("runner-a:"))).toBe(true); + }); + + test("requires current project trust before resuming project-local workflow runs", async () => { + using tmp = new DisposableTempDir("workflow-service"); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path }); + await runStore.createRun({ + id: "wfr_project_resume", + workspaceId: "workspace-1", + definition: { + name: "project-flow", + description: "Project", + scope: "project", + executable: true, + }, + definitionSource: + "export default function workflow({ agent }) { return agent({ id: 'trusted-step', prompt: 'run' }); }\n", + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + await runStore.appendStatus("wfr_project_resume", "interrupted", "2026-05-29T00:00:01.000Z"); + let taskCalls = 0; + const service = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ + projectRoot: path.join(tmp.path, "project"), + globalRoot: path.join(tmp.path, "global"), + builtIns: [], + }), + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent() { + taskCalls += 1; + return { taskId: "task_trusted", reportMarkdown: "should not run" }; + }, + }, + runnerId: "runner-a", + }); + + await expect( + service.resumeRunInBackground({ + workspaceId: "workspace-1", + runId: "wfr_project_resume", + projectTrusted: false, + }) + ).rejects.toThrow(/Project trust/); + await runStore.appendStatus("wfr_project_resume", "running", "2026-05-29T00:00:02.000Z", { + allowInterruptedResume: true, + }); + + await expect( + service.resumeCrashedRuns({ workspaceId: "workspace-1", projectTrusted: false }) + ).resolves.toEqual([]); + expect(taskCalls).toBe(0); + }); + + test("requires project trust before promoting to project-local workflows", async () => { + using tmp = new DisposableTempDir("workflow-service"); + const projectRoot = path.join(tmp.path, "project", ".mux", "workflows"); + const globalRoot = path.join(tmp.path, "mux-home", "workflows"); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path }); + await runStore.createRun({ + id: "wfr_scratch", + workspaceId: "workspace-1", + definition: { name: "scratch", description: "Scratch", scope: "scratch", executable: true }, + definitionSource: "export default function workflow() { return null; }\n", + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + const service = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ projectRoot, globalRoot, builtIns: [] }), + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent() { + return { taskId: "task_1", reportMarkdown: "unused" }; + }, + }, + runnerId: "runner-a", + }); + + await expect( + service.promoteScratchWorkflow({ + workspaceId: "workspace-1", + runId: "wfr_scratch", + name: "project-research", + description: "Project research workflow", + location: "project", + overwrite: false, + projectTrusted: false, + }) + ).rejects.toThrow(/Project trust/); + }); +}); diff --git a/src/node/services/workflows/WorkflowService.ts b/src/node/services/workflows/WorkflowService.ts new file mode 100644 index 0000000000..07713d04a6 --- /dev/null +++ b/src/node/services/workflows/WorkflowService.ts @@ -0,0 +1,485 @@ +import * as crypto from "node:crypto"; + +import type { + WorkflowDefinitionDescriptor, + WorkflowRunRecord, + WorkflowRunStatus, +} from "@/common/types/workflow"; +import assert from "@/common/utils/assert"; +import type { IJSRuntimeFactory } from "@/node/services/ptc/runtime"; +import type { + WorkflowDefinitionStore, + WorkflowDefinitionReadResult, + WorkflowPromotionLocation, +} from "./WorkflowDefinitionStore"; +import type { WorkflowRunStore } from "./WorkflowRunStore"; +import { + WorkflowRunBackgroundedError, + WorkflowRunner, + type WorkflowRunnerClock, + type WorkflowRunnerRunOptions, + type WorkflowTaskAdapter, +} from "./WorkflowRunner"; + +export interface WorkflowServiceOptions { + definitionStore: WorkflowDefinitionStore; + runStore: WorkflowRunStore; + runtimeFactory: IJSRuntimeFactory; + taskAdapter?: WorkflowTaskAdapter; + taskAdapterFactory?: (runId: string) => WorkflowTaskAdapter; + generateRunId?: () => string; + /** Stable prefix; WorkflowService appends run identity and a nonce for each lease owner. */ + runnerId: string; + clock?: WorkflowRunnerClock; +} + +export interface StartNamedWorkflowInput { + name: string; + workspaceId: string; + projectTrusted: boolean; + args: unknown; + abortSignal?: AbortSignal; +} + +export interface WriteScratchWorkflowInput { + workspaceId: string; + name: string; + description: string; + source: string; +} + +export interface PromoteScratchWorkflowInput { + workspaceId: string; + runId: string; + name: string; + description: string; + location: WorkflowPromotionLocation; + overwrite: boolean; + projectTrusted: boolean; +} + +export interface StartNamedWorkflowResult { + runId: string; + status: WorkflowRunStatus; + result: unknown; +} + +// oRPC creates a WorkflowService per request, so interrupt requests need a process-wide registry +// to find and abort the runner started by a different request handler instance. +const activeWorkflowRunnerAbortControllers = new Map(); + +export class WorkflowService { + private readonly definitionStore: WorkflowDefinitionStore; + private readonly runStore: WorkflowRunStore; + private readonly runtimeFactory: IJSRuntimeFactory; + private readonly taskAdapter?: WorkflowTaskAdapter; + private readonly taskAdapterFactory?: (runId: string) => WorkflowTaskAdapter; + private readonly generateRunId: () => string; + private readonly runnerId: string; + private readonly clock?: WorkflowRunnerClock; + + private readonly backgroundRuns = new Set>(); + + constructor(options: WorkflowServiceOptions) { + assert(options.runnerId.length > 0, "WorkflowService: runnerId is required"); + this.definitionStore = options.definitionStore; + this.runStore = options.runStore; + this.runtimeFactory = options.runtimeFactory; + assert( + options.taskAdapter != null || options.taskAdapterFactory != null, + "WorkflowService: taskAdapter or taskAdapterFactory is required" + ); + this.taskAdapter = options.taskAdapter; + this.taskAdapterFactory = options.taskAdapterFactory; + this.generateRunId = options.generateRunId ?? generateWorkflowRunId; + this.runnerId = options.runnerId; + this.clock = options.clock; + } + + async listDefinitions(options: { + projectTrusted: boolean; + }): Promise { + return await this.definitionStore.listDefinitions(options); + } + + async readDefinition(input: { + name: string; + projectTrusted: boolean; + }): Promise { + return await this.definitionStore.readDefinition(input.name, { + projectTrusted: input.projectTrusted, + }); + } + + async writeScratchWorkflow( + input: WriteScratchWorkflowInput + ): Promise { + assert( + input.workspaceId.length > 0, + "WorkflowService.writeScratchWorkflow: workspaceId is required" + ); + return await this.definitionStore.writeScratchDefinition({ + name: input.name, + description: input.description, + source: input.source, + }); + } + + async listRuns(input: { workspaceId: string }): Promise { + assert(input.workspaceId.length > 0, "WorkflowService.listRuns: workspaceId is required"); + const runs = await this.runStore.listRuns(); + return runs.filter((run) => run.workspaceId === input.workspaceId); + } + + async resumeCrashedRuns(input: { + workspaceId: string; + projectTrusted: boolean; + }): Promise { + assert( + input.workspaceId.length > 0, + "WorkflowService.resumeCrashedRuns: workspaceId is required" + ); + const runs = await this.listRuns({ workspaceId: input.workspaceId }); + const resumable = runs.filter( + (run) => run.status === "running" || run.status === "backgrounded" + ); + const resumedRunIds: string[] = []; + for (const run of resumable) { + if (!canResumeRunWithCurrentTrust(run, input.projectTrusted)) { + continue; + } + void this.runInBackground(run.id, "Auto-resumed workflow run failed:").catch(() => undefined); + resumedRunIds.push(run.id); + } + return resumedRunIds; + } + + async getRun(input: { workspaceId: string; runId: string }): Promise { + assert(input.workspaceId.length > 0, "WorkflowService.getRun: workspaceId is required"); + assert(input.runId.length > 0, "WorkflowService.getRun: runId is required"); + try { + const run = await this.runStore.getRun(input.runId); + return run.workspaceId === input.workspaceId ? run : null; + } catch { + return null; + } + } + + async interruptRun(input: { workspaceId: string; runId: string }): Promise { + // Stop the active coordinator immediately; child cleanup and status writes can block on I/O. + this.abortActiveRunner(input.runId); + const run = await this.requireRunForWorkspace(input); + assertWorkflowRunCanTransition(run.status, "interrupted"); + const interrupted = await this.runStore.appendStatus( + input.runId, + "interrupted", + this.clock?.nowIso() ?? new Date().toISOString() + ); + await (this.taskAdapterFactory?.(input.runId) ?? this.requireTaskAdapter()).interruptRun?.(); + return interrupted; + } + + async resumeRunInBackground(input: { + workspaceId: string; + runId: string; + projectTrusted: boolean; + }): Promise { + const run = await this.requireRunForWorkspace(input); + assertRunCanResumeWithCurrentTrust(run, input.projectTrusted); + assertWorkflowRunCanTransition(run.status, "running"); + await this.runInBackground(input.runId, "Background workflow resume failed:", { + allowResumeFromInterrupted: run.status === "interrupted", + }); + return { runId: input.runId, status: "running", result: null }; + } + + async resumeRun(input: { + workspaceId: string; + runId: string; + projectTrusted: boolean; + }): Promise { + const run = await this.requireRunForWorkspace(input); + assertRunCanResumeWithCurrentTrust(run, input.projectTrusted); + assertWorkflowRunCanTransition(run.status, "running"); + const runnerAbortController = new AbortController(); + let unregisterRunnerAbort: () => void = () => undefined; + try { + const runner = this.createRunner(input.runId); + const result = await runner.run(input.runId, { + abortSignal: runnerAbortController.signal, + onLeaseAcquired: () => { + unregisterRunnerAbort = this.registerActiveRunnerAbortController( + input.runId, + runnerAbortController + ); + }, + allowResumeFromInterrupted: run.status === "interrupted", + }); + return { runId: input.runId, status: "completed", result }; + } finally { + unregisterRunnerAbort(); + } + } + + async promoteScratchWorkflow( + input: PromoteScratchWorkflowInput + ): Promise { + const run = await this.requireRunForWorkspace(input); + if (run.definition.scope !== "scratch") { + throw new Error("Only scratch workflow runs can be promoted"); + } + return await this.definitionStore.promoteDefinition({ + name: input.name, + description: input.description, + source: run.definitionSource, + location: input.location, + overwrite: input.overwrite, + projectTrusted: input.projectTrusted, + }); + } + + async startNamedWorkflowInBackground( + input: StartNamedWorkflowInput + ): Promise { + const runId = await this.createNamedWorkflowRun(input); + await this.runStore.appendStatus( + runId, + "running", + this.clock?.nowIso() ?? new Date().toISOString() + ); + void this.runInBackground(runId, "Background workflow run failed:").catch(() => undefined); + return { runId, status: "running", result: null }; + } + + async startNamedWorkflow(input: StartNamedWorkflowInput): Promise { + const runId = await this.createNamedWorkflowRun(input); + if (input.abortSignal?.aborted === true) { + await this.interruptRun({ workspaceId: input.workspaceId, runId }); + throw new Error(`Workflow run interrupted: ${runId}`); + } + + const runnerAbortController = new AbortController(); + let unregisterRunnerAbort: () => void = () => undefined; + const abortInterrupt = this.interruptRunOnAbort( + input.workspaceId, + runId, + input.abortSignal, + runnerAbortController + ); + try { + const runner = this.createRunner(runId); + const result = await runner.run(runId, { + abortSignal: runnerAbortController.signal, + onLeaseAcquired: () => { + unregisterRunnerAbort = this.registerActiveRunnerAbortController( + runId, + runnerAbortController + ); + }, + }); + return { runId, status: "completed", result }; + } catch (error) { + if (error instanceof WorkflowRunBackgroundedError) { + void this.runInBackground(runId, "Backgrounded workflow run failed:").catch( + () => undefined + ); + return { runId, status: "backgrounded", result: null }; + } + throw error; + } finally { + abortInterrupt.remove(); + await abortInterrupt.wait(); + unregisterRunnerAbort(); + } + } + + private registerActiveRunnerAbortController( + runId: string, + controller: AbortController + ): () => void { + assert(runId.length > 0, "WorkflowService.registerActiveRunnerAbortController: runId required"); + const existing = activeWorkflowRunnerAbortControllers.get(runId); + if (existing != null && existing !== controller) { + existing.abort(); + } + activeWorkflowRunnerAbortControllers.set(runId, controller); + return () => { + if (activeWorkflowRunnerAbortControllers.get(runId) === controller) { + activeWorkflowRunnerAbortControllers.delete(runId); + } + }; + } + + private abortActiveRunner(runId: string): void { + activeWorkflowRunnerAbortControllers.get(runId)?.abort(); + } + + private interruptRunOnAbort( + workspaceId: string, + runId: string, + abortSignal: AbortSignal | undefined, + runnerAbortController: AbortController | undefined + ): { remove: () => void; wait: () => Promise } { + if (abortSignal == null) { + return { remove: () => undefined, wait: () => Promise.resolve() }; + } + let interruptPromise: Promise | null = null; + const interrupt = () => { + // Cancel the coordinator before interrupt side effects can block on task cleanup or disk I/O. + runnerAbortController?.abort(); + interruptPromise = (async () => { + try { + await this.interruptRun({ workspaceId, runId }); + } catch { + // The run may have completed or failed before the abort event was delivered. + } + })(); + }; + abortSignal.addEventListener("abort", interrupt, { once: true }); + return { + remove: () => abortSignal.removeEventListener("abort", interrupt), + wait: async () => { + await interruptPromise; + }, + }; + } + + private async createNamedWorkflowRun(input: StartNamedWorkflowInput): Promise { + assert( + input.workspaceId.length > 0, + "WorkflowService.createNamedWorkflowRun: workspaceId is required" + ); + const definition = await this.definitionStore.readDefinition(input.name, { + projectTrusted: input.projectTrusted, + }); + const runId = this.generateRunId(); + assert( + runId.length > 0, + "WorkflowService.createNamedWorkflowRun: generated run id is required" + ); + + await this.runStore.createRun({ + id: runId, + workspaceId: input.workspaceId, + definition: definition.descriptor, + definitionSource: definition.source, + args: input.args, + now: this.clock?.nowIso() ?? new Date().toISOString(), + }); + return runId; + } + + private runInBackground( + runId: string, + failureMessage: string, + runnerOptions: Pick = {} + ): Promise { + const runner = this.createRunner(runId); + const runnerAbortController = new AbortController(); + let unregisterRunnerAbort: () => void = () => undefined; + let startedSettled = false; + let resolveStarted: (() => void) | null = null; + let rejectStarted: ((error: unknown) => void) | null = null; + const started = new Promise((resolve, reject) => { + resolveStarted = resolve; + rejectStarted = reject; + }); + const markStarted = () => { + if (startedSettled) { + return; + } + startedSettled = true; + assert(resolveStarted != null, "WorkflowService.runInBackground: resolveStarted missing"); + resolveStarted(); + }; + const markStartFailed = (error: unknown) => { + if (startedSettled) { + return; + } + startedSettled = true; + assert(rejectStarted != null, "WorkflowService.runInBackground: rejectStarted missing"); + rejectStarted(error); + }; + const markLeaseAcquired = () => { + unregisterRunnerAbort = this.registerActiveRunnerAbortController( + runId, + runnerAbortController + ); + markStarted(); + }; + const runPromise = runner + .run(runId, { + abortSignal: runnerAbortController.signal, + onLeaseAcquired: markLeaseAcquired, + backgroundOnMessageQueued: false, + ...runnerOptions, + }) + .then(() => undefined) + .catch((error: unknown) => { + markStartFailed(error); + console.error(failureMessage, error); + }); + this.backgroundRuns.add(runPromise); + void runPromise.finally(() => { + unregisterRunnerAbort(); + this.backgroundRuns.delete(runPromise); + }); + return started; + } + + private createRunner(runId: string): WorkflowRunner { + return new WorkflowRunner({ + runStore: this.runStore, + runtimeFactory: this.runtimeFactory, + taskAdapter: this.taskAdapterFactory?.(runId) ?? this.requireTaskAdapter(), + runnerId: generateWorkflowRunnerOwnerId(this.runnerId, runId), + ...(this.clock != null ? { clock: this.clock } : {}), + }); + } + + private async requireRunForWorkspace(input: { + workspaceId: string; + runId: string; + }): Promise { + assert(input.workspaceId.length > 0, "WorkflowService: workspaceId is required"); + assert(input.runId.length > 0, "WorkflowService: runId is required"); + const run = await this.runStore.getRun(input.runId); + if (run.workspaceId !== input.workspaceId) { + throw new Error(`Workflow run not found: ${input.runId}`); + } + return run; + } + + private requireTaskAdapter(): WorkflowTaskAdapter { + assert(this.taskAdapter != null, "WorkflowService: taskAdapter is required"); + return this.taskAdapter; + } +} + +function canResumeRunWithCurrentTrust(run: WorkflowRunRecord, projectTrusted: boolean): boolean { + return run.definition.scope !== "project" || projectTrusted; +} + +function assertRunCanResumeWithCurrentTrust(run: WorkflowRunRecord, projectTrusted: boolean): void { + if (!canResumeRunWithCurrentTrust(run, projectTrusted)) { + throw new Error("Project trust is required to resume project-local workflow runs"); + } +} + +function assertWorkflowRunCanTransition(from: WorkflowRunStatus, to: WorkflowRunStatus): void { + if (from === "completed" || from === "failed") { + throw new Error(`Cannot transition workflow run from ${from} to ${to}`); + } +} + +function generateWorkflowRunnerOwnerId(baseRunnerId: string, runId: string): string { + assert(baseRunnerId.length > 0, "WorkflowService: base runner id is required"); + assert(runId.length > 0, "WorkflowService: run id is required for runner owner id"); + // Lease ownership must fence individual runner processes, not just the workspace/request that + // created them, so stale runners cannot renew or release a replacement runner's lease. + return `${baseRunnerId}:${runId}:${crypto.randomBytes(8).toString("hex")}`; +} + +function generateWorkflowRunId(): string { + return `wfr_${crypto.randomBytes(8).toString("hex")}`; +} diff --git a/src/node/services/workflows/WorkflowTaskServiceAdapter.test.ts b/src/node/services/workflows/WorkflowTaskServiceAdapter.test.ts new file mode 100644 index 0000000000..c3dfe87d0e --- /dev/null +++ b/src/node/services/workflows/WorkflowTaskServiceAdapter.test.ts @@ -0,0 +1,142 @@ +/* eslint-disable @typescript-eslint/await-thenable, @typescript-eslint/require-await */ +import { describe, expect, mock, test } from "bun:test"; +import { Ok } from "@/common/types/result"; +import { WorkflowTaskServiceAdapter } from "./WorkflowTaskServiceAdapter"; + +describe("WorkflowTaskServiceAdapter", () => { + test("spawns a workflow child task with workflow metadata and returns its report", async () => { + const outputSchema = { type: "object", properties: { claims: { type: "array" } } }; + const create = mock(async (_args: unknown) => + Ok({ taskId: "task_1", kind: "agent" as const, status: "running" as const }) + ); + const waitForAgentReport = mock(async () => ({ + reportMarkdown: "child report", + structuredOutput: { claims: ["durable"] }, + })); + const adapter = new WorkflowTaskServiceAdapter({ + taskService: { create, waitForAgentReport }, + parentWorkspaceId: "parent_1", + workflowRunId: "wfr_123", + defaultAgentId: "explore", + }); + + const result = await adapter.runAgent({ + id: "claims", + prompt: "Extract claims", + title: "Claim extractor", + outputSchema, + }); + + expect(create).toHaveBeenCalledWith({ + parentWorkspaceId: "parent_1", + kind: "agent", + agentId: "explore", + prompt: "Extract claims", + title: "Claim extractor", + workflowTask: { + runId: "wfr_123", + stepId: "claims", + outputSchema, + }, + }); + expect(waitForAgentReport).toHaveBeenCalledWith("task_1", { + requestingWorkspaceId: "parent_1", + backgroundOnMessageQueued: true, + }); + expect(result).toEqual({ + taskId: "task_1", + reportMarkdown: "child report", + structuredOutput: { claims: ["durable"] }, + }); + }); + + test("inherits experiments for task creation", async () => { + let createArgs: unknown; + const create = mock(async (args: unknown) => { + createArgs = args; + return Ok({ taskId: "task_1", kind: "agent" as const, status: "running" as const }); + }); + const waitForAgentReport = mock(async () => ({ reportMarkdown: "child report" })); + const adapter = new WorkflowTaskServiceAdapter({ + taskService: { create, waitForAgentReport }, + parentWorkspaceId: "parent_1", + workflowRunId: "wfr_123", + defaultAgentId: "explore", + experiments: { dynamicWorkflows: true, subagentFileReports: true }, + }); + + await adapter.runAgent({ + id: "claims", + prompt: "Extract claims", + outputSchema: { type: "object" }, + }); + + expect(createArgs).toMatchObject({ + prompt: "Extract claims", + experiments: { dynamicWorkflows: true, subagentFileReports: true }, + }); + }); + + test("passes workflow wait options into report waits", async () => { + const abortController = new AbortController(); + const create = mock(async () => + Ok({ taskId: "task_1", kind: "agent" as const, status: "running" as const }) + ); + const waitForAgentReport = mock(async () => ({ reportMarkdown: "child report" })); + const adapter = new WorkflowTaskServiceAdapter({ + taskService: { create, waitForAgentReport }, + parentWorkspaceId: "parent_1", + workflowRunId: "wfr_123", + defaultAgentId: "explore", + }); + + await adapter.runAgent({ id: "claims", prompt: "Extract claims" }, undefined, { + abortSignal: abortController.signal, + timeoutMs: 1_234, + backgroundOnMessageQueued: false, + }); + + expect(waitForAgentReport).toHaveBeenCalledWith("task_1", { + abortSignal: abortController.signal, + timeoutMs: 1_234, + requestingWorkspaceId: "parent_1", + backgroundOnMessageQueued: false, + }); + }); + + test("interrupts preserved descendant task workspaces for the parent workspace", async () => { + const create = mock(async () => + Ok({ taskId: "task_1", kind: "agent" as const, status: "running" as const }) + ); + const waitForAgentReport = mock(async () => ({ reportMarkdown: "unused" })); + const terminateAllDescendantAgentTasks = mock(async () => ["task_1"]); + const adapter = new WorkflowTaskServiceAdapter({ + taskService: { create, waitForAgentReport, terminateAllDescendantAgentTasks }, + parentWorkspaceId: "parent_1", + workflowRunId: "wfr_123", + defaultAgentId: "explore", + }); + + await adapter.interruptRun(); + + expect(terminateAllDescendantAgentTasks).toHaveBeenCalledWith("parent_1", { + workflowRunId: "wfr_123", + }); + }); + + test("fails fast when task creation fails", async () => { + const create = mock(async () => ({ success: false as const, error: "no runnable agent" })); + const waitForAgentReport = mock(async () => ({ reportMarkdown: "should not wait" })); + const adapter = new WorkflowTaskServiceAdapter({ + taskService: { create, waitForAgentReport }, + parentWorkspaceId: "parent_1", + workflowRunId: "wfr_123", + defaultAgentId: "explore", + }); + + await expect(adapter.runAgent({ id: "claims", prompt: "Extract claims" })).rejects.toThrow( + /no runnable agent/ + ); + expect(waitForAgentReport).not.toHaveBeenCalled(); + }); +}); diff --git a/src/node/services/workflows/WorkflowTaskServiceAdapter.ts b/src/node/services/workflows/WorkflowTaskServiceAdapter.ts new file mode 100644 index 0000000000..1892eb93c3 --- /dev/null +++ b/src/node/services/workflows/WorkflowTaskServiceAdapter.ts @@ -0,0 +1,142 @@ +import assert from "@/common/utils/assert"; +import type { TaskCreateResult } from "@/node/services/taskService"; +import type { + WorkflowAgentResult, + WorkflowAgentSpec, + WorkflowAgentWaitOptions, + WorkflowTaskAdapter, +} from "./WorkflowRunner"; + +interface WorkflowTaskExperiments { + programmaticToolCalling?: boolean; + programmaticToolCallingExclusive?: boolean; + advisorTool?: boolean; + execSubagentHardRestart?: boolean; + dynamicWorkflows?: boolean; + subagentFileReports?: boolean; +} + +interface WorkflowTaskServiceLike { + create(args: { + parentWorkspaceId: string; + kind: "agent"; + agentId: string; + prompt: string; + title: string; + workflowTask: { + runId: string; + stepId: string; + outputSchema?: unknown; + }; + experiments?: WorkflowTaskExperiments; + }): Promise<{ success: true; data: TaskCreateResult } | { success: false; error: string }>; + waitForAgentReport( + taskId: string, + options: WorkflowAgentWaitOptions & { + requestingWorkspaceId: string; + backgroundOnMessageQueued: boolean; + } + ): Promise<{ reportMarkdown: string; title?: string; structuredOutput?: unknown }>; + terminateAllDescendantAgentTasks?( + workspaceId: string, + options?: { workflowRunId?: string } + ): Promise; +} + +export interface WorkflowTaskServiceAdapterOptions { + taskService: WorkflowTaskServiceLike; + parentWorkspaceId: string; + workflowRunId: string; + defaultAgentId: string; + experiments?: WorkflowTaskExperiments; +} + +export class WorkflowTaskServiceAdapter implements WorkflowTaskAdapter { + private readonly taskService: WorkflowTaskServiceLike; + private readonly parentWorkspaceId: string; + private readonly workflowRunId: string; + private readonly defaultAgentId: string; + private readonly experiments?: WorkflowTaskExperiments; + + constructor(options: WorkflowTaskServiceAdapterOptions) { + assert( + options.parentWorkspaceId.length > 0, + "WorkflowTaskServiceAdapter: parentWorkspaceId is required" + ); + assert( + options.workflowRunId.length > 0, + "WorkflowTaskServiceAdapter: workflowRunId is required" + ); + assert( + options.defaultAgentId.length > 0, + "WorkflowTaskServiceAdapter: defaultAgentId is required" + ); + this.taskService = options.taskService; + this.parentWorkspaceId = options.parentWorkspaceId; + this.workflowRunId = options.workflowRunId; + this.defaultAgentId = options.defaultAgentId; + this.experiments = options.experiments; + } + + async interruptRun(): Promise { + await this.taskService.terminateAllDescendantAgentTasks?.(this.parentWorkspaceId, { + workflowRunId: this.workflowRunId, + }); + } + + async runAgent( + spec: WorkflowAgentSpec, + lifecycle?: { onTaskCreated?: (taskId: string) => Promise | void }, + waitOptions?: WorkflowAgentWaitOptions + ): Promise { + assert(spec.id.length > 0, "WorkflowTaskServiceAdapter.runAgent: spec.id is required"); + assert(spec.prompt.length > 0, "WorkflowTaskServiceAdapter.runAgent: spec.prompt is required"); + + const workflowTask: { runId: string; stepId: string; outputSchema?: unknown } = { + runId: this.workflowRunId, + stepId: spec.id, + }; + if (spec.outputSchema !== undefined) { + workflowTask.outputSchema = spec.outputSchema; + } + + const createResult = await this.taskService.create({ + parentWorkspaceId: this.parentWorkspaceId, + kind: "agent", + agentId: spec.agentId ?? this.defaultAgentId, + prompt: spec.prompt, + title: spec.title ?? spec.id, + workflowTask, + ...(this.experiments !== undefined ? { experiments: this.experiments } : {}), + }); + if (!createResult.success) { + throw new Error(createResult.error); + } + + await lifecycle?.onTaskCreated?.(createResult.data.taskId); + + return await this.waitForAgentTask(createResult.data.taskId, spec, waitOptions); + } + + async waitForAgentTask( + taskId: string, + _spec: WorkflowAgentSpec, + waitOptions?: WorkflowAgentWaitOptions + ): Promise { + const report = await this.taskService.waitForAgentReport(taskId, { + ...(waitOptions?.abortSignal != null ? { abortSignal: waitOptions.abortSignal } : {}), + ...(waitOptions?.timeoutMs != null ? { timeoutMs: waitOptions.timeoutMs } : {}), + requestingWorkspaceId: this.parentWorkspaceId, + backgroundOnMessageQueued: waitOptions?.backgroundOnMessageQueued ?? true, + }); + + return { + taskId, + reportMarkdown: report.reportMarkdown, + ...(report.title != null ? { title: report.title } : {}), + ...(report.structuredOutput !== undefined + ? { structuredOutput: report.structuredOutput } + : {}), + }; + } +} diff --git a/src/node/services/workflows/builtInWorkflowDefinitions.test.ts b/src/node/services/workflows/builtInWorkflowDefinitions.test.ts new file mode 100644 index 0000000000..997cbf784c --- /dev/null +++ b/src/node/services/workflows/builtInWorkflowDefinitions.test.ts @@ -0,0 +1,383 @@ +/* eslint-disable @typescript-eslint/require-await */ +import { describe, expect, test } from "bun:test"; +import { QuickJSRuntimeFactory } from "@/node/services/ptc/quickjsRuntime"; +import { DisposableTempDir } from "@/node/services/tempDir"; +import { BUILT_IN_WORKFLOW_DEFINITIONS } from "./builtInWorkflowDefinitions"; +import { WorkflowRunStore } from "./WorkflowRunStore"; +import { WorkflowRunner, type WorkflowAgentSpec } from "./WorkflowRunner"; + +const deepResearch = BUILT_IN_WORKFLOW_DEFINITIONS.find( + (definition) => definition.name === "deep-research" +); + +describe("built-in deep-research workflow", () => { + test("coordinates staged research, verification, and final structured synthesis", async () => { + if (!deepResearch) { + throw new Error("Expected built-in deep-research workflow"); + } + using tmp = new DisposableTempDir("deep-research-workflow"); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await runStore.createRun({ + id: "wfr_deep_research", + workspaceId: "workspace-1", + definition: { + name: deepResearch.name, + description: deepResearch.description, + scope: "built-in", + executable: true, + }, + definitionSource: deepResearch.source, + args: { topic: "durable workflow orchestration" }, + now: "2026-05-29T00:00:00.000Z", + }); + + const taskCalls: WorkflowAgentSpec[] = []; + const runner = new WorkflowRunner({ + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent(spec) { + taskCalls.push(spec); + switch (spec.id) { + case "scope-topic": + return { + taskId: "task_scope", + reportMarkdown: "Research durable orchestration semantics.", + structuredOutput: { + refinedTopic: "durable workflow orchestration", + questions: ["How are runs resumed?", "How are tasks verified?"], + }, + }; + case "discover-sources": + return { + taskId: "task_sources", + reportMarkdown: "Found implementation, RFC, and tests.", + structuredOutput: { + sources: [ + { title: "RFC", url: "rfc/20260529_dynamic-workflows.md", relevance: "design" }, + { + title: "Runner", + url: "src/node/services/workflows/WorkflowRunner.ts", + relevance: "implementation", + }, + ], + }, + }; + case "summarize-source-0": + return { + taskId: "task_summary_0", + reportMarkdown: "RFC describes journal replay and validation.", + structuredOutput: { + source: "RFC", + summary: "Defines durable runs and replay.", + }, + }; + case "summarize-source-1": + return { + taskId: "task_summary_1", + reportMarkdown: "Runner describes replay lookup.", + structuredOutput: { + source: "Runner", + summary: "Replays completed steps by hash.", + }, + }; + case "extract-claims": + return { + taskId: "task_claims", + reportMarkdown: "Extracted two claims.", + structuredOutput: { + claims: [ + { + claim: "Completed steps are reused on resume.", + support: "Runner step lookup", + }, + { + claim: "Structured outputs are validated at report time.", + support: "outputSchema", + }, + ], + }, + }; + case "verify-claim-0": + return { + taskId: "task_verify_0", + reportMarkdown: "Completed-step replay is supported.", + structuredOutput: { + claim: "Completed steps are reused on resume.", + verdict: "supported", + risk: "low", + }, + }; + case "verify-claim-1": + return { + taskId: "task_verify_1", + reportMarkdown: "Structured output validation is supported.", + structuredOutput: { + claim: "Structured outputs are validated at report time.", + verdict: "supported", + risk: "low", + }, + }; + case "synthesize-report": + return { + taskId: "task_final", + reportMarkdown: "# Deep Research\nDurable workflows replay completed steps.", + structuredOutput: { confidence: "medium", gaps: ["Needs UI dogfood"] }, + }; + default: + throw new Error(`Unexpected deep-research step: ${spec.id}`); + } + }, + }, + runnerId: "runner-a", + clock: { + nowIso: () => "2026-05-29T00:00:01.000Z", + nowMs: () => 1_000, + }, + }); + + const result = await runner.run("wfr_deep_research"); + const run = await runStore.getRun("wfr_deep_research"); + + expect(taskCalls.map((call) => call.id)).toEqual([ + "scope-topic", + "discover-sources", + "summarize-source-0", + "summarize-source-1", + "extract-claims", + "verify-claim-0", + "verify-claim-1", + "synthesize-report", + ]); + expect(taskCalls.every((call) => call.outputSchema != null)).toBe(true); + expect(run.events.filter((event) => event.type === "phase").map((event) => event.name)).toEqual( + [ + "scope", + "source-discovery", + "source-synthesis", + "claim-extraction", + "adversarial-verification", + "final-synthesis", + ] + ); + expect(result).toEqual({ + reportMarkdown: "# Deep Research\nDurable workflows replay completed steps.", + structuredOutput: { + topic: "durable workflow orchestration", + refinedTopic: "durable workflow orchestration", + sources: [ + { title: "RFC", url: "rfc/20260529_dynamic-workflows.md", relevance: "design" }, + { + title: "Runner", + url: "src/node/services/workflows/WorkflowRunner.ts", + relevance: "implementation", + }, + ], + claims: [ + { claim: "Completed steps are reused on resume.", support: "Runner step lookup" }, + { claim: "Structured outputs are validated at report time.", support: "outputSchema" }, + ], + verification: [ + { claim: "Completed steps are reused on resume.", verdict: "supported", risk: "low" }, + { + claim: "Structured outputs are validated at report time.", + verdict: "supported", + risk: "low", + }, + ], + confidence: "medium", + gaps: ["Needs UI dogfood"], + }, + }); + }); + + test("skips empty source and claim fan-out stages", async () => { + if (!deepResearch) { + throw new Error("Expected built-in deep-research workflow"); + } + using tmp = new DisposableTempDir("deep-research-empty-workflow"); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await runStore.createRun({ + id: "wfr_deep_research_empty", + workspaceId: "workspace-1", + definition: { + name: deepResearch.name, + description: deepResearch.description, + scope: "built-in", + executable: true, + }, + definitionSource: deepResearch.source, + args: { topic: "obscure empty topic" }, + now: "2026-05-29T00:00:00.000Z", + }); + + const taskCalls: WorkflowAgentSpec[] = []; + const runner = new WorkflowRunner({ + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent(spec) { + taskCalls.push(spec); + switch (spec.id) { + case "scope-topic": + return { + taskId: "task_scope", + reportMarkdown: "Scoped obscure topic.", + structuredOutput: { refinedTopic: "obscure empty topic", questions: [] }, + }; + case "discover-sources": + return { + taskId: "task_sources", + reportMarkdown: "No high-signal sources found.", + structuredOutput: { sources: [] }, + }; + case "extract-claims": + return { + taskId: "task_claims", + reportMarkdown: "No claims extracted.", + structuredOutput: { claims: [] }, + }; + case "synthesize-report": + return { + taskId: "task_final", + reportMarkdown: "# Deep Research\nNo sources were found.", + structuredOutput: { confidence: "low", gaps: ["No sources found"] }, + }; + default: + throw new Error(`Unexpected deep-research step: ${spec.id}`); + } + }, + }, + runnerId: "runner-a", + clock: { + nowIso: () => "2026-05-29T00:00:01.000Z", + nowMs: () => 1_000, + }, + }); + + const result = await runner.run("wfr_deep_research_empty"); + const run = await runStore.getRun("wfr_deep_research_empty"); + + expect(run.status).toBe("completed"); + expect(taskCalls.map((call) => call.id)).toEqual([ + "scope-topic", + "discover-sources", + "extract-claims", + "synthesize-report", + ]); + expect(result).toMatchObject({ + structuredOutput: { + sources: [], + claims: [], + verification: [], + }, + }); + }); + + test("caps model-produced deep-research fan-out", async () => { + if (!deepResearch) { + throw new Error("Expected built-in deep-research workflow"); + } + using tmp = new DisposableTempDir("deep-research-capped-workflow"); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await runStore.createRun({ + id: "wfr_deep_research_capped", + workspaceId: "workspace-1", + definition: { + name: deepResearch.name, + description: deepResearch.description, + scope: "built-in", + executable: true, + }, + definitionSource: deepResearch.source, + args: { topic: "fanout cap" }, + now: "2026-05-29T00:00:00.000Z", + }); + + const taskCalls: WorkflowAgentSpec[] = []; + const runner = new WorkflowRunner({ + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent(spec) { + taskCalls.push(spec); + if (spec.id === "scope-topic") { + return { + taskId: "task_scope", + reportMarkdown: "Scoped.", + structuredOutput: { refinedTopic: "fanout cap", questions: ["How much fanout?"] }, + }; + } + if (spec.id === "discover-sources") { + return { + taskId: "task_sources", + reportMarkdown: "Many sources.", + structuredOutput: { + sources: Array.from({ length: 20 }, (_value, index) => ({ + title: `Source ${index}`, + url: `source-${index}.md`, + relevance: "fixture", + })), + }, + }; + } + if (spec.id.startsWith("summarize-source-")) { + return { + taskId: `task_${spec.id}`, + reportMarkdown: spec.id, + structuredOutput: { source: spec.id, summary: "summary" }, + }; + } + if (spec.id === "extract-claims") { + return { + taskId: "task_claims", + reportMarkdown: "Many claims.", + structuredOutput: { + claims: Array.from({ length: 20 }, (_value, index) => ({ + claim: `Claim ${index}`, + support: "fixture", + })), + }, + }; + } + if (spec.id.startsWith("verify-claim-")) { + return { + taskId: `task_${spec.id}`, + reportMarkdown: spec.id, + structuredOutput: { claim: spec.id, verdict: "supported", risk: "low" }, + }; + } + if (spec.id === "synthesize-report") { + return { + taskId: "task_final", + reportMarkdown: "# Capped", + structuredOutput: { confidence: "medium", gaps: [] }, + }; + } + throw new Error(`Unexpected deep-research step: ${spec.id}`); + }, + }, + runnerId: "runner-a", + clock: { + nowIso: () => "2026-05-29T00:00:01.000Z", + nowMs: () => 1_000, + }, + }); + + const result = await runner.run("wfr_deep_research_capped"); + const callIds = taskCalls.map((call) => call.id); + + expect(callIds.filter((id) => id.startsWith("summarize-source-")).length).toBe(16); + expect(callIds.filter((id) => id.startsWith("verify-claim-")).length).toBe(16); + expect(callIds).not.toContain("summarize-source-16"); + expect(callIds).not.toContain("verify-claim-16"); + const structuredOutput = ( + result as { + structuredOutput: { sources: unknown[]; claims: unknown[]; verification: unknown[] }; + } + ).structuredOutput; + expect(structuredOutput.sources).toHaveLength(16); + expect(structuredOutput.claims).toHaveLength(16); + expect(structuredOutput.verification).toHaveLength(16); + }); +}); diff --git a/src/node/services/workflows/builtInWorkflowDefinitions.ts b/src/node/services/workflows/builtInWorkflowDefinitions.ts new file mode 100644 index 0000000000..8e1818b393 --- /dev/null +++ b/src/node/services/workflows/builtInWorkflowDefinitions.ts @@ -0,0 +1,206 @@ +import type { WorkflowName } from "@/common/types/workflow"; + +export interface BuiltInWorkflowDefinition { + name: WorkflowName; + description: string; + source: string; +} + +export const BUILT_IN_WORKFLOW_DEFINITIONS: readonly BuiltInWorkflowDefinition[] = [ + { + name: "deep-research", + description: "Coordinate delegated agents to research, verify, and synthesize a topic.", + source: `export default function deepResearch({ args, phase, log, agent, parallelAgents }) { + const maxFanOut = 16; + const topic = normalizeDeepResearchTopic(args); + + phase("scope", { topic }); + const scope = agent({ + id: "scope-topic", + title: "Scope research topic", + prompt: + "Refine this deep research topic into a focused investigation. Return concise research questions and the refined topic.\\n\\nTopic: " + + topic, + outputSchema: { + type: "object", + required: ["refinedTopic", "questions"], + additionalProperties: false, + properties: { + refinedTopic: { type: "string" }, + questions: { type: "array", items: { type: "string" } }, + }, + }, + }); + const refinedTopic = scope.structuredOutput.refinedTopic || topic; + log("Scoped deep research topic", { refinedTopic }); + + phase("source-discovery", { refinedTopic }); + const sources = agent({ + id: "discover-sources", + title: "Discover high-signal sources", + prompt: + "Find high-signal primary or directly relevant sources for this research topic. Prefer repo files, specs, primary docs, and concrete evidence over summaries. Return sources with title, url/path, and relevance.\\n\\nTopic: " + + refinedTopic + + "\\nQuestions: " + + scope.structuredOutput.questions.join("; "), + outputSchema: { + type: "object", + required: ["sources"], + additionalProperties: false, + properties: { + sources: { + type: "array", + items: { + type: "object", + required: ["title", "url", "relevance"], + additionalProperties: false, + properties: { + title: { type: "string" }, + url: { type: "string" }, + relevance: { type: "string" }, + }, + }, + }, + }, + }, + }); + const discoveredSources = sources.structuredOutput.sources.slice(0, maxFanOut); + log("Discovered sources", { count: sources.structuredOutput.sources.length, selectedCount: discoveredSources.length }); + + phase("source-synthesis", { sourceCount: discoveredSources.length }); + const sourceSummaries = discoveredSources.length > 0 + ? parallelAgents( + discoveredSources.map(function (source, index) { + return { + id: "summarize-source-" + index, + title: "Read and summarize source " + (index + 1), + prompt: + "Read or inspect this discovered source and summarize the evidence relevant to the research questions.\\n\\nTopic: " + + refinedTopic + + "\\nSource: " + + JSON.stringify(source), + outputSchema: { + type: "object", + required: ["source", "summary"], + additionalProperties: false, + properties: { + source: { type: "string" }, + summary: { type: "string" }, + }, + }, + }; + }) + ) + : []; + const summaries = { structuredOutput: { summaries: sourceSummaries.map(function (summary) { return summary.structuredOutput; }) } }; + + phase("claim-extraction", { summaryCount: summaries.structuredOutput.summaries.length }); + const claims = agent({ + id: "extract-claims", + title: "Extract claims and support", + prompt: + "Extract the most important factual claims and supporting evidence from these source summaries. Return claims with support notes.\\n\\nTopic: " + + refinedTopic + + "\\nSummaries: " + + JSON.stringify(summaries.structuredOutput.summaries), + outputSchema: { + type: "object", + required: ["claims"], + additionalProperties: false, + properties: { + claims: { + type: "array", + items: { + type: "object", + required: ["claim", "support"], + additionalProperties: false, + properties: { + claim: { type: "string" }, + support: { type: "string" }, + }, + }, + }, + }, + }, + }); + + const extractedClaims = claims.structuredOutput.claims.slice(0, maxFanOut); + phase("adversarial-verification", { claimCount: extractedClaims.length }); + const verificationFindings = extractedClaims.length > 0 + ? parallelAgents( + extractedClaims.map(function (claim, index) { + return { + id: "verify-claim-" + index, + title: "Adversarially verify claim " + (index + 1), + prompt: + "Challenge this claim. Look for contradictions, missing evidence, overreach, and lower-confidence areas. Return verdict and risk.\\n\\nTopic: " + + refinedTopic + + "\\nClaim: " + + JSON.stringify(claim), + outputSchema: { + type: "object", + required: ["claim", "verdict", "risk"], + additionalProperties: false, + properties: { + claim: { type: "string" }, + verdict: { type: "string", enum: ["supported", "mixed", "refuted", "unclear"] }, + risk: { type: "string", enum: ["low", "medium", "high"] }, + }, + }, + }; + }) + ) + : []; + const verification = { structuredOutput: { findings: verificationFindings.map(function (finding) { return finding.structuredOutput; }) } }; + log("Verified claims", { count: verification.structuredOutput.findings.length }); + + phase("final-synthesis", { topic: refinedTopic }); + const final = agent({ + id: "synthesize-report", + title: "Synthesize final deep research report", + prompt: + "Write the final deep research report. Include key findings, citations/source references by title or path, uncertainty, and recommendations for follow-up. Return confidence and remaining gaps as structured output.\\n\\nTopic: " + + refinedTopic + + "\\nSources: " + + JSON.stringify(discoveredSources) + + "\\nClaims: " + + JSON.stringify(extractedClaims) + + "\\nVerification: " + + JSON.stringify(verification.structuredOutput.findings), + outputSchema: { + type: "object", + required: ["confidence", "gaps"], + additionalProperties: false, + properties: { + confidence: { type: "string", enum: ["low", "medium", "high"] }, + gaps: { type: "array", items: { type: "string" } }, + }, + }, + }); + + return { + reportMarkdown: final.reportMarkdown, + structuredOutput: { + topic, + refinedTopic, + sources: discoveredSources, + claims: extractedClaims, + verification: verification.structuredOutput.findings, + confidence: final.structuredOutput.confidence, + gaps: final.structuredOutput.gaps, + }, + }; +} + +function normalizeDeepResearchTopic(args) { + if (typeof args === "string" && args.trim()) return args.trim(); + if (args && typeof args === "object") { + if (typeof args.topic === "string" && args.topic.trim()) return args.topic.trim(); + if (typeof args.input === "string" && args.input.trim()) return args.input.trim(); + if (typeof args.query === "string" && args.query.trim()) return args.query.trim(); + } + return JSON.stringify(args); +} +`, + }, +]; diff --git a/src/node/services/workflows/workflowReplayKey.test.ts b/src/node/services/workflows/workflowReplayKey.test.ts new file mode 100644 index 0000000000..69727efa4a --- /dev/null +++ b/src/node/services/workflows/workflowReplayKey.test.ts @@ -0,0 +1,41 @@ +import { describe, expect, test } from "bun:test"; +import { + assertWorkflowStepId, + canonicalizeWorkflowInput, + hashWorkflowStepInput, +} from "./workflowReplayKey"; + +describe("workflow replay keys", () => { + test("hashes semantically identical object inputs the same regardless of key order", () => { + const first = hashWorkflowStepInput("source-read", { + query: "mux workflows", + limits: { maxSources: 5, languages: ["ts", "tsx"] }, + }); + const second = hashWorkflowStepInput("source-read", { + limits: { languages: ["ts", "tsx"], maxSources: 5 }, + query: "mux workflows", + }); + + expect(first).toBe(second); + expect(first).toMatch(/^sha256:/); + }); + + test("keeps array order significant", () => { + expect(hashWorkflowStepInput("fanout", ["a", "b"])).not.toBe( + hashWorkflowStepInput("fanout", ["b", "a"]) + ); + }); + + test("rejects nondeterministic or non-JSON input values instead of silently hashing them", () => { + expect(() => canonicalizeWorkflowInput({ now: new Date("2026-05-29T00:00:00.000Z") })).toThrow( + /plain JSON/ + ); + expect(() => canonicalizeWorkflowInput({ missing: undefined })).toThrow(/JSON value/); + expect(() => canonicalizeWorkflowInput({ bad: Number.NaN })).toThrow(/finite/); + }); + + test("requires stable non-empty step ids for replay-boundary primitives", () => { + expect(() => assertWorkflowStepId("", "agent")).toThrow(/stable id/); + expect(() => assertWorkflowStepId("read-sources", "agent")).not.toThrow(); + }); +}); diff --git a/src/node/services/workflows/workflowReplayKey.ts b/src/node/services/workflows/workflowReplayKey.ts new file mode 100644 index 0000000000..80e4ae74f4 --- /dev/null +++ b/src/node/services/workflows/workflowReplayKey.ts @@ -0,0 +1,52 @@ +import * as crypto from "node:crypto"; + +import assert from "@/common/utils/assert"; + +export function assertWorkflowStepId(stepId: string, primitiveName: string): void { + assert( + stepId.trim().length > 0, + `${primitiveName} replay boundary requires a stable id so completed workflow work can be reused` + ); +} + +export function hashWorkflowStepInput(stepId: string, input: unknown): string { + assertWorkflowStepId(stepId, "workflow step"); + const canonical = JSON.stringify({ stepId, input: canonicalizeWorkflowInput(input) }); + return `sha256:${crypto.createHash("sha256").update(canonical).digest("hex")}`; +} + +export function canonicalizeWorkflowInput(input: unknown): unknown { + if (input == null || typeof input === "string" || typeof input === "boolean") { + return input; + } + + if (typeof input === "number") { + assert(Number.isFinite(input), "Workflow replay input numbers must be finite"); + return input; + } + + if (Array.isArray(input)) { + return input.map((value) => canonicalizeWorkflowInput(value)); + } + + if (typeof input === "object") { + assert( + Object.getPrototypeOf(input) === Object.prototype, + "Workflow replay inputs must be plain JSON objects/arrays" + ); + + const record = input as Record; + const result: Record = {}; + for (const key of Object.keys(record).sort()) { + const value = record[key]; + assert( + value !== undefined, + "Workflow replay inputs must not contain non-JSON value undefined" + ); + result[key] = canonicalizeWorkflowInput(value); + } + return result; + } + + throw new Error(`Workflow replay inputs must be JSON values, got ${typeof input}`); +}