diff --git a/AGENTS.md b/AGENTS.md
index 4c8fd31b..bc495458 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -92,7 +92,20 @@ Every compiled pipeline runs as three sequential jobs:
│ │ │ ├── 0002_pool_object_form.rs # Legacy scalar pool → object form codemod
│ │ │ └── helpers.rs # take_key, insert_no_overwrite, rename_key, ConflictPolicy
│ │ ├── codemod_integration_test.rs # White-box rewrite-path tests (stub registry injection)
-│ │ └── types.rs # Front matter grammar and types
+│ │ ├── types.rs # Front matter grammar and types
+│ │ └── ir/ # Typed Azure DevOps pipeline IR (see docs/ir.md)
+│ │ ├── mod.rs # Pipeline / PipelineBody / PipelineShape root types
+│ │ ├── ids.rs # Typed StageId / JobId / StepId newtypes
+│ │ ├── step.rs # Step variants (Bash, Task, Checkout, Download, Publish, RawYaml)
+│ │ ├── job.rs # Job, Pool, TemplateContext, JobVariable
+│ │ ├── stage.rs # Stage + external-params wrap
+│ │ ├── env.rs # Typed EnvValue (Literal, AdoMacro, PipelineVar, Secret, StepOutput, Coalesce, Concat)
+│ │ ├── condition.rs # Typed Condition / Expr AST + codegen to ADO condition syntax
+│ │ ├── output.rs # OutputDecl / OutputRef + location-aware lowering
+│ │ ├── graph.rs # Dependency graph: validation, edge derivation, isOutput promotion, cycle detection
+│ │ ├── lower.rs # IR → serde_yaml::Value lowering
+│ │ ├── emit.rs # Thin `lower() + serde_yaml::to_string()` wrapper
+│ │ └── summary.rs # Public, serializable PipelineSummary / GraphSummary for agent-facing tooling (see docs/ir.md Public JSON summary)
│ ├── init.rs # Repository initialization for AI-first authoring
│ ├── execute.rs # Stage 3 safe output execution
│ ├── fuzzy_schedule.rs # Fuzzy schedule parsing
@@ -130,6 +143,10 @@ Every compiled pipeline runs as three sequential jobs:
│ │ ├── mod.rs
│ │ ├── console.rs # Human-readable console report
│ │ └── json.rs # Machine-readable AuditData JSON
+│ ├── inspect/ # `ado-aw inspect` / `graph` / (planned) `trace` / `whatif` / `lint` / `catalog` — read-only IR queries
+│ │ ├── mod.rs # Module entry; public re-exports of every dispatcher
+│ │ ├── cli.rs # Dispatchers (`dispatch_inspect`, `dispatch_graph`, …) and option structs
+│ │ └── graph_query.rs # Text/DOT renderers for the resolved dependency graph
│ ├── detect.rs # Agentic pipeline detection — discovers compiled pipelines; used by all lifecycle commands
│ ├── update_check.rs # Version update check — queries GitHub Releases and prints advisory when newer version is available
│ ├── ndjson.rs # NDJSON parsing utilities
@@ -276,7 +293,7 @@ index to jump to the right page.
### Compiler internals & operations
-- [`docs/ir.md`](docs/ir.md) — typed Azure DevOps pipeline IR (`Pipeline`, jobs/stages/steps, output refs, graph pass, lowering, and target builders).
+- [`docs/ir.md`](docs/ir.md) — typed Azure DevOps pipeline IR (`Pipeline`, jobs/stages/steps, output refs, graph pass, lowering, target builders, and the public JSON summary consumed by agent-facing tooling).
- [`docs/cli.md`](docs/cli.md) — `ado-aw` CLI commands (`init`, `compile`,
`check`, `mcp`, `mcp-http`, `execute`, `secrets`, `enable`, `disable`,
`remove`, `list`, `status`, `run`, `audit`; `configure` is a deprecated hidden alias).
@@ -285,6 +302,9 @@ index to jump to the right page.
report shape.
- [`docs/mcp.md`](docs/mcp.md) — MCP server configuration (stdio containers,
HTTP servers, env passthrough).
+- [`docs/mcp-author.md`](docs/mcp-author.md) — author-facing MCP server
+ (stdio); exposes `inspect`, `graph`, `whatif`, `lint`, `catalog`, `trace`,
+ `audit_build` over MCP for IDE/Copilot Chat agents.
- [`docs/mcpg.md`](docs/mcpg.md) — MCP Gateway architecture and pipeline
integration.
- [`docs/network.md`](docs/network.md) — AWF network isolation, default
diff --git a/docs/audit.md b/docs/audit.md
index d2c139d8..7e52944e 100644
--- a/docs/audit.md
+++ b/docs/audit.md
@@ -26,7 +26,7 @@ URL-encoded project segments are decoded before the ADO context is resolved. `t=
| Flag | Default | Behavior |
| --- | --- | --- |
-| `-o, --output
` | `./logs` | Directory under which `/build-/` is written. |
+| `-o, --output ` | `./logs` | Directory under which `/build-/` is written. Non-CLI entry points (`ado-aw trace`, the mcp-author tools) instead default to the shared `${TEMP}/ado-aw/audit` cache root so they do not scatter `./logs/` directories under arbitrary working directories. |
| `--json` | off | Emit the full `AuditData` as JSON to stdout (suppresses the trailing `Audit complete` stderr line). |
| `--org ` | auto | Azure DevOps organization override for bare build IDs. Full build URLs provide the host / org directly. |
| `--project ` | auto | Azure DevOps project override for bare build IDs. Full build URLs provide the project directly. |
@@ -86,9 +86,10 @@ Current top-level keys include the following. Optional sections are omitted from
| `rejected_safe_outputs` | Rollup of rejections by reason / threat flag. |
| `detection_analysis` | `threat-analysis.json`. |
| `mcp_server_health` | MCPG logs aggregated per server. |
+| `pipeline_graph` | Optional typed-IR `PipelineSummary` rebuilt from local source metadata (`aw_info.json.source`) for graph correlation. |
| `mcp_tool_usage` | MCPG logs aggregated per `(server, tool)`. |
| `mcp_failures` | MCPG `tool_error` / `server_error` events. |
-| `jobs` | ADO `/timeline` records filtered to `type: Job`. |
+| `jobs` | ADO `/timeline` records filtered to `type: Job`; when `pipeline_graph` is available, each entry may include `upstream_jobs` and `downstream_jobs` from IR job edges. |
| `firewall_analysis` | AWF Squid proxy logs aggregated by domain. |
| `policy_analysis` | AWF policy artifacts aggregated into allow / deny summaries. |
| `missing_tools` / `missing_data` / `noops` | NDJSON entries from the corresponding SafeOutputs MCP tools. |
@@ -109,6 +110,23 @@ Additionally, exactly one severity-`high` finding is emitted summarizing the gat
Per-item detection verdicts are not currently available. `threat-analysis.md` emits an aggregate verdict only; per-item verdicts are a follow-up that should stay aligned with gh-aw.
+## Pipeline graph correlation
+
+After the standard analyzers run, `audit` looks for
+`agent_outputs[_]/staging/aw_info.json` (falling back to the artifact
+top level) and resolves its `source` path relative to the current working
+directory. If that markdown source exists locally, the command rebuilds the
+typed IR with the same public summary shape emitted by `ado-aw inspect --json`
+and stores it under `pipeline_graph.summary`. The audit embeds the full
+`PipelineSummary` rather than a reduced subset so audit, inspect, graph, and
+trace consumers share one schema.
+
+When graph correlation succeeds, `jobs[]` entries also gain optional
+`upstream_jobs` and `downstream_jobs` arrays. These are omitted when empty or
+when the source markdown is unavailable locally. Failed jobs with downstream
+edges emit a medium-severity finding summarizing the downstream runtime
+classifications.
+
## Cache behavior
`/build-/run-summary.json` is written after a successful run. On subsequent invocations against the same build:
@@ -135,7 +153,7 @@ Per-item detection verdicts are not currently available. `threat-analysis.md` em
## Related Documentation
-- [CLI Commands](cli.md) — full CLI reference
+- [CLI Commands](cli.md) — full CLI reference, including `trace`
- [Front Matter](front-matter.md) — agent file format
- [Safe Outputs](safe-outputs.md) — what proposals look like
- [Network](network.md) — AWF firewall configuration
diff --git a/docs/cli.md b/docs/cli.md
index 2e75de4d..96a842f2 100644
--- a/docs/cli.md
+++ b/docs/cli.md
@@ -23,6 +23,7 @@ Global flags (apply to all subcommands): `--verbose, -v` (enable info-level logg
- Useful for CI checks to ensure pipelines are regenerated after source changes
- `mcp ` - Run SafeOutputs as a stdio MCP server
- `--enabled-tools ` - Restrict available tools to those named (repeatable)
+- `mcp-author` - Run the author-facing stdio MCP server for IDE/Copilot Chat integrations. See [`mcp-author.md`](mcp-author.md) for the full tool surface and trust model.
- `mcp-http ` - Run SafeOutputs as an HTTP MCP server (for MCPG integration)
- `--port ` - Port to listen on (default: 8100)
- `--api-key ` - API key for authentication (auto-generated if not provided)
@@ -134,7 +135,7 @@ Both `--all-repos` and `--source` route through `ado-aw`'s `discover_ado_aw_pipe
- `--dry-run` - Print the planned `templateParameters` body without calling the ADO API.
- `audit ` - Audit a single Azure DevOps build: download the known stage artifacts, run the audit analyzers, and render a structured console report or `AuditData` JSON.
- - `-o, --output ` - Output directory for downloaded artifacts and reports. Defaults to `./logs`; the run is stored under `/build-/`.
+ - `-o, --output ` - Output directory for downloaded artifacts and reports. Defaults to `./logs`; the run is stored under `/build-/`. Non-CLI entry points (`ado-aw trace` and the mcp-author tools) instead anchor under `${TEMP}/ado-aw/audit` so they do not scatter directories under arbitrary working directories.
- `--json` - Emit machine-readable JSON (`AuditData`) instead of the console report. Suppresses the trailing `Audit complete` stderr line.
- `--org ` - Override: Azure DevOps organization (used when the input is a bare build ID). Full build URLs provide the host / org directly.
- `--project ` - Override: Azure DevOps project name (used when the input is a bare build ID). Full build URLs provide the project directly.
@@ -143,6 +144,28 @@ Both `--all-repos` and `--source` route through `ado-aw`'s `discover_ado_aw_pipe
- `--no-cache` - Ignore `/build-/run-summary.json` and re-process the build.
- See [`audit.md`](audit.md) for accepted build-reference formats, output layout, cache semantics, and the `AuditData` report shape.
+- `trace [--step ] [--json]` - Query audit telemetry plus local typed-IR graph correlation to explain failed-job chains and downstream skip classifications. Downloads / caches under `${TEMP}/ado-aw/audit/build-/` (separate from `ado-aw audit`'s `./logs` default so the MCP server and IDE-driven traces do not scatter `./logs/` directories under arbitrary working dirs), and degrades to runtime-only output when the source markdown is not local.
+ - `--step ` - Focus the report on a named IR step and show the containing job's runtime status plus upstream/downstream job classifications.
+ - `--json` - Emit a structured `TraceReport`.
+ - `--org `, `--project `, `--pat ` / `AZURE_DEVOPS_EXT_PAT` - Same ADO context/auth passthroughs as `audit`.
+
+- `inspect [--json]` - Build the typed IR for an agent source file and emit a terse summary (jobs, stages, steps, output decls, derived `dependsOn` edges, isOutput-promoted outputs).
+ - `` - Path to the agent markdown file.
+ - `--json` - Emit the full [`PipelineSummary`](ir.md#public-json-summary-irsummary) as pretty-printed JSON instead of the human view.
+ - No YAML is written; this is a read-only query over the same IR the compiler builds.
+
+- `graph dump [--format text|json|dot]` - Print the resolved dependency graph (job edges, stage edges, step locations, outputs needing `isOutput=true`). The graph dump now uses an explicit `dump` subcommand so `graph deps` and `graph outputs` can share the namespace.
+ - `` - Path to the agent markdown file.
+ - `--format text` - Default. Human-scannable plain text.
+ - `--format json` - Emit the [`GraphSummary`](ir.md#public-json-summary-irsummary) JSON.
+ - `--format dot` - Emit Graphviz DOT. Pipe to `dot -Tsvg -o pipeline.svg` to visualize.
+
+- `graph deps [--direction upstream|downstream] [--json]` - Traverse transitive job and step-output dependencies for one step. If `` names a job with no matching step, the command falls back to job-level traversal.
+
+- `graph outputs [--producer ] [--consumer ] [--json]` - Print declared step outputs and the steps that read them from `env` or `condition`.
+
+- `whatif --fail [--json]` - Statically classify downstream jobs that would be skipped, or would run anyway, if a step or job failed.
+
### Hidden Build-Time Tools
These commands are not shown in `--help` but are available for contributors working on the ado-aw compiler itself:
diff --git a/docs/ir.md b/docs/ir.md
index 0f289b42..40c4c67f 100644
--- a/docs/ir.md
+++ b/docs/ir.md
@@ -263,3 +263,84 @@ The production target wrappers are:
The canonical 5-job Setup → Agent → Detection → SafeOutputs → Teardown shape itself lives in `agentic_pipeline.rs` and is reused unchanged by every wrapper above; extensions plug into it via `Declarations` (steps, env, hosts, MCPG entries, and Agent-job condition clauses — see `Declarations::agent_conditions`).
When adding a target, follow the same pattern: parse and validate front matter, collect extension `Declarations`, build typed jobs/stages/steps, set the correct `PipelineShape`, and call the shared emit path.
+
+## Public JSON summary (`ir::summary`)
+
+The internal IR types (`Pipeline`, `Job`, `Step`, `Graph`, …) are
+intentionally tied to the compiler's lowering needs and are **not**
+public API. To give agent-facing tooling a stable view of a compiled
+pipeline, `src/compile/ir/summary.rs` defines a parallel
+**summary tree** with `#[derive(Serialize)]` that is consumed by:
+
+- `ado-aw inspect [--json]` — top-level pipeline summary.
+- `ado-aw graph dump [--format text|json|dot]` — resolved
+ dependency graph (subset of the summary).
+- `ado-aw graph deps ` and `ado-aw graph outputs
+ ` — focused graph queries over step dependencies and output
+ declaration/reference edges.
+- `ado-aw whatif --fail ` — static
+ downstream skip classification from graph reachability and rendered
+ conditions.
+- The `ado-aw audit` JSON (`AuditData.pipeline_graph`) and the
+ author-MCP server.
+
+### Stability contract
+
+`PipelineSummary::schema_version` (currently `1`) is the public schema
+version. **Bump** it when the JSON shape changes in a way a downstream
+consumer would notice (renamed field, removed variant, changed
+semantics). Additive changes like new optional fields do not require a
+bump. New enum variants currently do require a schema-version bump
+because the serialized enums do not have catch-all `Unknown` variants.
+
+The summary is the public schema. Internal IR types may change freely
+without bumping the summary version, as long as the summary lowering
+keeps the existing field set populated correctly.
+
+### Shape
+
+```jsonc
+{
+ "schema_version": 1,
+ "name": "",
+ "shape": "standalone" | "1es" | "job-template" | "stage-template",
+ "body": { "kind": "jobs", "jobs": [...] }
+ // OR
+ { "kind": "stages", "stages": [...] },
+ "graph": {
+ "step_locations": [{ "step", "stage?", "job", "outputs": [...] }],
+ "job_edges": [{ "consumer", "producer" }], // consumer dependsOn producer
+ "stage_edges": [{ "consumer", "producer" }],
+ "outputs_needing_is_output": [{ "step", "outputs": [...] }]
+ }
+}
+```
+
+Per-`JobSummary`: `id`, `stage?`, `display_name`, `depends_on`,
+`condition?` (lowered ADO condition string), `pool`, `steps`.
+
+Per-`StepSummary`: `id?`, `kind` (`bash` / `task` / `checkout` /
+`download` / `publish` / `raw_yaml`), `display_name?`, `task?`,
+`condition?`, `outputs[]` (`{name, is_secret, auto_is_output}`),
+`env_refs[]` (`{step, name}`), `condition_refs[]` (`{step, name}`).
+
+`condition?` is the lowered ADO condition string (e.g.
+`"eq(dependencies.Detection.outputs['threatAnalysis.SafeToProcess'], 'true')"`),
+not the typed AST — consumers don't need the AST to reason about
+"would this run if X failed?".
+
+### Construction
+
+```rust
+let (front_matter, pipeline) = ado_aw::compile::build_pipeline_ir(&source).await?;
+let summary = ado_aw::compile::ir::summary::PipelineSummary::from_pipeline(&pipeline)?;
+let json = serde_json::to_string_pretty(&summary)?;
+```
+
+`build_pipeline_ir` is the public read-only entry point: it parses
+and sanitises front matter, runs the same target dispatch as
+`compile_pipeline`, and returns the typed `Pipeline` without writing
+any YAML. `PipelineSummary::from_pipeline` runs the graph pass
+(reusing `graph::build_graph` for validation + edge derivation) and
+populates `auto_is_output` for any output that has at least one
+cross-step consumer — without mutating the input pipeline.
diff --git a/docs/mcp-author.md b/docs/mcp-author.md
new file mode 100644
index 00000000..5cc5ba3d
--- /dev/null
+++ b/docs/mcp-author.md
@@ -0,0 +1,72 @@
+# Author MCP Server
+
+_Part of the [ado-aw documentation](../AGENTS.md)._
+
+`ado-aw mcp-author` runs a local, author/debug-facing MCP server over stdio for
+IDE and Copilot Chat integrations. It exposes read-only workflow inspection,
+graph, lint, what-if, trace, and audit tools.
+
+It is **not** the SafeOutputs MCP server embedded in compiled pipelines. The
+pipeline SafeOutputs server records proposed mutations for Stage 3 execution;
+`mcp-author` is a local helper for humans and agents authoring or debugging
+workflows.
+
+## Tool surface
+
+| Tool | Description | Input shape |
+| --- | --- | --- |
+| `inspect_workflow` | Build and return the public `PipelineSummary`. | `{ "source_path": "agents/example.md" }` |
+| `graph_summary` | Return the resolved `GraphSummary`. | `{ "source_path": "agents/example.md" }` |
+| `graph_dump` | Render the graph as text or Graphviz DOT. | `{ "source_path": "...", "format": "text" \| "dot" }` |
+| `step_dependencies` | Traverse dependencies for a step or job id. | `{ "source_path": "...", "step_id": "Agent", "direction": "upstream" \| "downstream" }` |
+| `step_outputs` | List declared outputs and consumers. | `{ "source_path": "...", "producer": null, "consumer": null }` |
+| `trace_failure` | Trace a build's failed-job chain using audit data plus any local IR graph. | `{ "build_id_or_url": "123", "step": null, "org": null, "project": null, "pat": null }` |
+| `whatif` | Classify downstream jobs if a step or job fails. | `{ "source_path": "...", "failing_id": "Agent" }` |
+| `lint_workflow` | Run structural lint checks. | `{ "source_path": "agents/example.md" }` |
+| `catalog` | List safe-outputs, runtimes, tools, engines, and models. | `{ "kind": "safe-outputs" }` |
+| `audit_build` | Download and analyze a build; same shape as `ado-aw audit --json`. | `{ "build_id_or_url": "123", "org": null, "project": null, "pat": null, "artifacts": null, "no_cache": false }` |
+
+## Trust model
+
+`mcp-author` runs as the invoking local user. It has no bounding directory,
+sandbox, or pipeline-style filesystem restrictions. ADO-facing calls (`audit`,
+`trace`) use the same `resolve_auth()` path as `ado-aw audit`: explicit PAT,
+environment, or Azure CLI fallback depending on local configuration.
+
+## IDE configuration
+
+### VS Code MCP
+
+```json
+{
+ "mcp": {
+ "servers": {
+ "ado-aw-author": {
+ "command": "ado-aw",
+ "args": ["mcp-author"]
+ }
+ }
+ }
+}
+```
+
+### Claude Desktop
+
+Add this to `claude_desktop_config.json`:
+
+```json
+{
+ "mcpServers": {
+ "ado-aw-author": {
+ "command": "ado-aw",
+ "args": ["mcp-author"]
+ }
+ }
+}
+```
+
+## Related references
+
+- [`docs/ir.md#public-json-summary-irsummary`](ir.md#public-json-summary-irsummary) — public summary schema contract.
+- [`docs/audit.md`](audit.md) — `audit_build` and `trace_failure` build reference and report details.
+- [`docs/cli.md`](cli.md) — CLI counterparts for every MCP tool.
diff --git a/prompts/debug-ado-agentic-workflow.md b/prompts/debug-ado-agentic-workflow.md
index 5c426bca..4a273322 100644
--- a/prompts/debug-ado-agentic-workflow.md
+++ b/prompts/debug-ado-agentic-workflow.md
@@ -79,6 +79,37 @@ The output JSON contains the full `AuditData` (see [What `ado-aw audit` extracts
If the CLI is not available, fall through to the MCP-based steps below.
+#### 2a-prime-bis. Pair `audit` with the IR (when you have local CLI access)
+
+`ado-aw audit` answers "what happened at runtime?". `ado-aw inspect` /
+`graph` / `whatif` answer "what *should* happen, and what depends on
+what?". Pair them when an audit finding points at a specific job /
+step:
+
+```bash
+# Get the typed-IR summary for the source the build came from
+ado-aw inspect path/to/agent.md --json > ir.json
+
+# Print the resolved dependency graph (text, JSON, or Graphviz DOT)
+ado-aw graph dump path/to/agent.md --format text
+ado-aw graph dump path/to/agent.md --format dot | dot -Tsvg -o pipeline.svg
+```
+
+Use these to answer questions the audit alone cannot:
+
+- "Detection failed — which jobs were going to consume its output?"
+ → `ado-aw inspect --json | jq '.graph.job_edges[] | select(.producer == "Detection")'`
+- "If `synthPr` failed, what skips downstream?"
+ → (when wired) `ado-aw whatif --fail synthPr`
+- "Which step produced the empty output the agent step couldn't read?"
+ → `ado-aw inspect --json` then locate the `env_refs` /
+ `outputs_needing_is_output` entry that matches.
+
+The IR view is **statically derived from the agent source**, so it
+reflects the pipeline shape the build was supposed to take. If the
+build's compiled `.lock.yml` diverged from what the current source
+would compile to, `ado-aw check ` will catch it.
+
#### 2a. Find the Pipeline Definition
Use `mcp_ado_pipelines_get_build_definitions` to locate the pipeline by name or definition ID.
diff --git a/prompts/update-ado-agentic-workflow.md b/prompts/update-ado-agentic-workflow.md
index 93fb0f03..a3082566 100644
--- a/prompts/update-ado-agentic-workflow.md
+++ b/prompts/update-ado-agentic-workflow.md
@@ -58,6 +58,26 @@ permissions → parameters
Run through the validation checklist (see below) before finalizing. Fix any issues and inform the user of corrections made.
+When you have local CLI access, two read-only commands give a quick
+structural sanity check **before** you recompile or hand off to the
+user:
+
+```bash
+# Compact summary of jobs, stages, steps, output decls, derived dependsOn
+ado-aw inspect path/to/agent.md
+
+# Resolved dependency graph (text by default; --format dot pipes to Graphviz)
+ado-aw graph dump path/to/agent.md
+```
+
+These build the typed IR from the source and answer "did my change
+add/remove the expected jobs?" and "did the output / dependency wiring
+end up where I expected?" without writing any YAML to disk. The audit
+docs in [`docs/audit.md`](../docs/audit.md) and the IR JSON contract
+in [`docs/ir.md`](../docs/ir.md#public-json-summary-irsummary) cover
+the underlying `PipelineSummary` schema if you want to script against
+the JSON form.
+
### Step 4 — Recompile (if needed)
After any **front matter** changes, the pipeline YAML must be regenerated:
diff --git a/site/src/content/docs/reference/audit.mdx b/site/src/content/docs/reference/audit.mdx
index c303b014..a581d740 100644
--- a/site/src/content/docs/reference/audit.mdx
+++ b/site/src/content/docs/reference/audit.mdx
@@ -29,7 +29,7 @@ URL-encoded project segments are decoded automatically. Both `t=` and `s=` are a
| Flag | Default | Behavior |
|---|---|---|
-| `-o, --output ` | `./logs` | Directory under which `/build-/` is written. |
+| `-o, --output ` | `./logs` | Directory under which `/build-/` is written. Non-CLI entry points (`ado-aw trace` and the mcp-author tools) default to the shared `${TEMP}/ado-aw/audit` cache root so they do not scatter `./logs/` directories under arbitrary working directories. |
| `--json` | off | Emit the full `AuditData` as JSON to stdout. Suppresses the trailing `Audit complete` stderr line. |
| `--org ` | auto | ADO organization override for bare build IDs. Full build URLs supply this directly. |
| `--project ` | auto | ADO project override for bare build IDs. Full build URLs supply this directly. |
diff --git a/site/src/content/docs/setup/cli.mdx b/site/src/content/docs/setup/cli.mdx
index c724a7a8..7b338b07 100644
--- a/site/src/content/docs/setup/cli.mdx
+++ b/site/src/content/docs/setup/cli.mdx
@@ -228,7 +228,7 @@ ado-aw audit [--json] [--output ] [--artifacts ]
Options:
- `--json` -- emit the full `AuditData` as JSON to stdout instead of the console report
-- `-o, --output ` -- local directory for downloaded artifacts and the cached report (default: `./logs`)
+- `-o, --output ` -- local directory for downloaded artifacts and the cached report (default: `./logs`; non-CLI entry points like `ado-aw trace` and the mcp-author tools default to `${TEMP}/ado-aw/audit` instead)
- `--artifacts ` -- restrict download to `agent`, `detection`, and/or `safe-outputs`
- `--no-cache` -- re-process even when a cached `run-summary.json` already exists
- `--org`, `--project`, `--pat` -- same as `enable`
diff --git a/src/audit/analyzers/jobs.rs b/src/audit/analyzers/jobs.rs
index 6051c184..a95b6b25 100644
--- a/src/audit/analyzers/jobs.rs
+++ b/src/audit/analyzers/jobs.rs
@@ -82,6 +82,7 @@ fn record_to_job(record: &Value) -> Option {
started_at,
finished_at,
status,
+ ..Default::default()
})
}
diff --git a/src/audit/cli.rs b/src/audit/cli.rs
index c83eb404..5359ae54 100644
--- a/src/audit/cli.rs
+++ b/src/audit/cli.rs
@@ -15,6 +15,7 @@ use crate::audit::analyzers::{
use crate::audit::cache::{RunSummary, load_run_summary, save_run_summary};
use crate::audit::findings;
use crate::audit::model::{AuditData, ErrorInfo, FileInfo, OverviewData};
+use crate::audit::pipeline_graph;
use crate::audit::render;
use crate::audit::url::{ParsedBuildRef, parse_build_ref};
@@ -29,7 +30,47 @@ pub struct AuditOptions<'a> {
pub no_cache: bool,
}
+/// Canonical cache root for downloaded audit artifacts and the
+/// `run-summary.json` cache files.
+///
+/// Returns `${TEMP}/ado-aw/audit` on every platform. All entry points
+/// — the `ado-aw audit` CLI, `ado-aw trace`, the mcp-author
+/// `audit_build` and `trace_failure` tools — go through this helper so
+/// that runs invoked from different contexts share a single cache
+/// location and never silently scatter `./logs/` directories under
+/// whatever working directory the caller happened to inherit (most
+/// often the IDE's current project when the MCP server is started).
+///
+/// The audit layer creates a per-build subdirectory (`build-`)
+/// under this root, keyed on the build id, so concurrent runs against
+/// different builds are isolated. Callers that need full per-invocation
+/// isolation (e.g. `no_cache: true` audits run concurrently against
+/// the same build) should layer a unique tempdir on top of this root.
+pub fn default_cache_root() -> PathBuf {
+ std::env::temp_dir().join("ado-aw").join("audit")
+}
+
pub async fn dispatch(opts: AuditOptions<'_>) -> Result<()> {
+ let result = fetch_audit_data_inner(opts).await?;
+ render_audit(&result.audit, result.json)?;
+ if !result.json && !result.from_cache {
+ eprintln!("✓ Audit complete. Reports in {}", result.run_dir.display());
+ }
+ Ok(())
+}
+
+pub async fn fetch_audit_data(opts: AuditOptions<'_>) -> Result {
+ Ok(fetch_audit_data_inner(opts).await?.audit)
+}
+
+struct FetchAuditDataResult {
+ audit: AuditData,
+ run_dir: PathBuf,
+ json: bool,
+ from_cache: bool,
+}
+
+async fn fetch_audit_data_inner(opts: AuditOptions<'_>) -> Result {
let parsed = parse_build_ref(opts.build_id_or_url)?;
let artifact_filters = normalize_artifact_filters(opts.artifacts)?;
let cwd = tokio::fs::canonicalize(".")
@@ -52,8 +93,48 @@ pub async fn dispatch(opts: AuditOptions<'_>) -> Result<()> {
summary.processed_at.to_rfc3339()
);
}
- render_audit(&summary.audit_data, opts.json)?;
- return Ok(());
+ let mut audit = summary.audit_data;
+ let cached_audit_before_postprocess = audit.clone();
+ derive_post_processing(&mut audit, &run_dir).await;
+ // Persist recomputed pipeline_graph + findings back to the
+ // cached snapshot so subsequent runs see the same canonical
+ // AuditData shape; tooling that diffs successive outputs would
+ // otherwise observe drift between the saved file and the
+ // in-memory result.
+ //
+ // NOTE on concurrency: two concurrent `ado-aw audit` runs for
+ // the same build id can race on this `save_run_summary` write.
+ // We do not take a filesystem lock — the failure path is
+ // recorded as a warning (see below) rather than aborting the
+ // audit, and the worst case is that one writer's recomputed
+ // snapshot overwrites the other's. Both writers derive from
+ // the same on-disk artifacts, so the resulting summary is
+ // still internally consistent; only the `processed_at`
+ // timestamp may flip between them.
+ if audit != cached_audit_before_postprocess
+ && let Err(error) = save_run_summary(
+ &run_dir,
+ &RunSummary {
+ ado_aw_version: env!("CARGO_PKG_VERSION").to_string(),
+ build_id: parsed.build_id,
+ processed_at: Utc::now(),
+ audit_data: audit.clone(),
+ },
+ )
+ .await
+ {
+ warn_and_record(
+ &mut audit,
+ "audit::cli",
+ format!("failed to refresh cached run-summary.json: {error:#}"),
+ );
+ }
+ return Ok(FetchAuditDataResult {
+ audit,
+ run_dir,
+ json: opts.json,
+ from_cache: true,
+ });
}
let client = reqwest::Client::builder()
@@ -68,9 +149,16 @@ pub async fn dispatch(opts: AuditOptions<'_>) -> Result<()> {
};
let filters = artifact_filters.as_deref();
- let saw_artifact_auth_error =
- fetch_and_record_artifacts(&client, &ctx, &auth, parsed.build_id, filters, &run_dir, &mut audit)
- .await?;
+ let saw_artifact_auth_error = fetch_and_record_artifacts(
+ &client,
+ &ctx,
+ &auth,
+ parsed.build_id,
+ filters,
+ &run_dir,
+ &mut audit,
+ )
+ .await?;
if saw_artifact_auth_error && !has_any_local_artifacts(&run_dir).await {
anyhow::bail!(
@@ -79,12 +167,20 @@ pub async fn dispatch(opts: AuditOptions<'_>) -> Result<()> {
);
}
- run_analyzers(&client, &ctx, &auth, parsed.build_id, filters, &run_dir, &mut audit).await;
+ run_analyzers(
+ &client,
+ &ctx,
+ &auth,
+ parsed.build_id,
+ filters,
+ &run_dir,
+ &mut audit,
+ )
+ .await;
populate_performance_metrics(&mut audit);
audit.metrics.error_count = audit.errors.len() as u64;
- audit.metrics.warning_count = audit.warnings.len() as u64;
- findings::derive_findings(&mut audit);
+ derive_post_processing(&mut audit, &run_dir).await;
save_run_summary(
&run_dir,
@@ -97,11 +193,43 @@ pub async fn dispatch(opts: AuditOptions<'_>) -> Result<()> {
)
.await?;
- render_audit(&audit, opts.json)?;
- if !opts.json {
- eprintln!("✓ Audit complete. Reports in {}", run_dir.display());
+ Ok(FetchAuditDataResult {
+ audit,
+ run_dir,
+ json: opts.json,
+ from_cache: false,
+ })
+}
+
+/// Re-run the audit-time enrichment passes that depend on local state
+/// (pipeline-graph correlation, metric counters, derived findings).
+///
+/// Called both after a fresh download and after a cache load so that
+/// both code paths produce a structurally identical `AuditData`.
+/// `populate_pipeline_graph` failures are downgraded to warnings rather
+/// than aborting the audit.
+///
+/// ## Cache-hit behaviour
+///
+/// When invoked after a cache load this function correlates against
+/// the **current local source markdown**, not the source that was on
+/// disk when the build originally ran. That is intentional: the
+/// `pipeline_graph` section is meant to answer "how does this build's
+/// timeline map onto today's typed IR?", which is what an operator
+/// debugging an old failure with newly-rebased code actually wants.
+/// Do not "fix" this into using a cached graph snapshot — the
+/// downstream `findings::derive_findings` rules (e.g.
+/// downstream-impact) rely on the freshly-correlated graph.
+async fn derive_post_processing(audit: &mut AuditData, run_dir: &Path) {
+ if let Err(error) = pipeline_graph::populate_pipeline_graph(audit, run_dir).await {
+ warn_and_record(
+ audit,
+ "audit::pipeline_graph",
+ format!("pipeline graph correlation failed: {error:#}"),
+ );
}
- Ok(())
+ audit.metrics.warning_count = audit.warnings.len() as u64;
+ findings::derive_findings(audit);
}
/// Download all selected artifacts for the build, recording auth errors and
@@ -152,7 +280,10 @@ async fn fetch_and_record_artifacts(
warn_and_record(
audit,
"audit::artifacts",
- format!("failed to download artifact '{}': {:#}", artifact.name, error),
+ format!(
+ "failed to download artifact '{}': {:#}",
+ artifact.name, error
+ ),
);
}
}
@@ -170,8 +301,7 @@ async fn fetch_and_record_artifacts(
);
}
Err(error) => {
- return Err(error)
- .context(format!("failed to list artifacts for build {}", build_id));
+ return Err(error).context(format!("failed to list artifacts for build {}", build_id));
}
}
Ok(saw_artifact_auth_error)
@@ -891,7 +1021,8 @@ mod tests {
#[test]
fn validate_host_accepts_dev_azure_com_case_insensitively() {
- validate_audit_url_host("Dev.Azure.Com", None).expect("cloud host match is case-insensitive");
+ validate_audit_url_host("Dev.Azure.Com", None)
+ .expect("cloud host match is case-insensitive");
}
#[test]
diff --git a/src/audit/findings.rs b/src/audit/findings.rs
index bdb53d0b..827fd442 100644
--- a/src/audit/findings.rs
+++ b/src/audit/findings.rs
@@ -1,6 +1,6 @@
use std::collections::BTreeMap;
-use crate::audit::model::{AuditData, Finding, Recommendation, Severity};
+use crate::audit::model::{AuditData, Finding, JobData, Recommendation, Severity};
/// Aggregate findings + recommendations from every populated section
/// of `AuditData`. Pure function; does not mutate the input.
@@ -21,6 +21,7 @@ pub fn derive_findings(audit: &mut AuditData) {
add_missing_data_cluster(audit, &mut findings, &mut recommendations);
add_no_safe_outputs_proposed(audit, &mut findings, &mut recommendations);
add_error_count_findings(audit, &mut findings, &mut recommendations);
+ add_downstream_impact_findings(audit, &mut findings, &mut recommendations);
audit.key_findings = findings;
audit.recommendations = recommendations;
@@ -363,6 +364,98 @@ fn add_error_count_findings(
);
}
+fn add_downstream_impact_findings(
+ audit: &AuditData,
+ findings: &mut Vec,
+ recommendations: &mut Vec,
+) {
+ for job in &audit.jobs {
+ if !job.failed() || job.downstream_jobs.is_empty() {
+ continue;
+ }
+
+ // Filter to downstream jobs that actually skipped (or were
+ // absent from the timeline, which also signals an expected
+ // skip). Jobs with bypass conditions like `always()` would
+ // still appear in `job.downstream_jobs` because that field is
+ // populated from typed-IR edges; without this gate we would
+ // emit "Downstream jobs skipped" findings even for cleanup
+ // jobs that successfully ran through the failure.
+ let any_actually_skipped = job.downstream_jobs.iter().any(|downstream_id| {
+ audit
+ .jobs
+ .iter()
+ .find(|candidate| candidate.matches_ir_id(downstream_id))
+ .map(is_skipped_or_cancelled)
+ // Absent from runtime timeline → typed-IR expected it
+ // to skip after the upstream failure.
+ .unwrap_or(true)
+ });
+ if !any_actually_skipped {
+ continue;
+ }
+
+ let downstream = job
+ .downstream_jobs
+ .iter()
+ .map(|downstream_job| {
+ let classification = audit
+ .jobs
+ .iter()
+ .find(|candidate| candidate.matches_ir_id(downstream_job))
+ .map(JobData::classification)
+ .unwrap_or_else(|| String::from("expected to skip"));
+ format!("{downstream_job}: {classification}")
+ })
+ .collect::>()
+ .join("; ");
+
+ push_finding(
+ findings,
+ Finding {
+ category: String::from("pipeline_graph"),
+ severity: Severity::Medium,
+ // Title intentionally says "potentially impacted" rather
+ // than "skipped": even with the `any_actually_skipped`
+ // gate above, some downstream jobs in this set may have
+ // bypassed the failure (e.g. via `always()`). The
+ // description embeds the real per-job classification.
+ title: format!("Downstream jobs potentially impacted by {} failure", job.name),
+ description: format!(
+ "The typed pipeline graph shows downstream impact from {}: {}.",
+ job.name, downstream
+ ),
+ impact: None,
+ },
+ );
+
+ push_recommendation(
+ recommendations,
+ Recommendation {
+ priority: String::from("high"),
+ action: format!(
+ "Inspect the {} job logs to identify the root cause; downstream jobs cannot succeed until this is resolved.",
+ job.name
+ ),
+ reason: format!(
+ "{} failed, which impacted {} downstream job(s).",
+ job.name,
+ job.downstream_jobs.len()
+ ),
+ example: None,
+ },
+ );
+ }
+}
+
+fn is_skipped_or_cancelled(job: &JobData) -> bool {
+ let result = job.result.as_deref().unwrap_or_default();
+ result.eq_ignore_ascii_case("skipped")
+ || result.eq_ignore_ascii_case("canceled")
+ || result.eq_ignore_ascii_case("cancelled")
+ || job.status.eq_ignore_ascii_case("skipped")
+}
+
fn push_finding(findings: &mut Vec, finding: Finding) {
if !findings.contains(&finding) {
findings.push(finding);
@@ -379,7 +472,7 @@ fn push_recommendation(recommendations: &mut Vec, recommendation
mod tests {
use super::derive_findings;
use crate::audit::model::{
- AuditData, DomainStat, Finding, FirewallAnalysis, MCPServerHealth, MCPServerStats,
+ AuditData, DomainStat, Finding, FirewallAnalysis, JobData, MCPServerHealth, MCPServerStats,
MetricsData, MissingDataReport, MissingToolReport, NoopReport, Recommendation,
SafeOutputSummary, Severity,
};
@@ -661,6 +754,78 @@ mod tests {
assert!(audit.recommendations.is_empty());
}
+ #[test]
+ fn downstream_impact_rule_emits_finding_for_failed_job() {
+ let mut audit = AuditData {
+ jobs: vec![
+ JobData {
+ name: String::from("Agent"),
+ status: String::from("completed"),
+ result: Some(String::from("failed")),
+ downstream_jobs: vec![String::from("Detection"), String::from("SafeOutputs")],
+ ..Default::default()
+ },
+ JobData {
+ name: String::from("Detection"),
+ status: String::from("completed"),
+ result: Some(String::from("skipped")),
+ ..Default::default()
+ },
+ ],
+ ..Default::default()
+ };
+
+ derive_findings(&mut audit);
+
+ let finding =
+ finding_by_title(&audit, "Downstream jobs potentially impacted by Agent failure");
+ assert_eq!(finding.severity, Severity::Medium);
+ assert!(finding.description.contains("Detection: skipped"));
+ assert!(
+ finding
+ .description
+ .contains("SafeOutputs: expected to skip")
+ );
+ }
+
+ #[test]
+ fn downstream_impact_rule_suppresses_when_all_downstream_jobs_ran_via_bypass() {
+ // Regression: previously the rule fired whenever an upstream
+ // job failed and had any IR-derived downstream — even when
+ // every downstream job successfully ran via an always()
+ // bypass. The "skipped" wording was then a lie. With the
+ // any_actually_skipped gate the finding is suppressed.
+ let mut audit = AuditData {
+ jobs: vec![
+ JobData {
+ name: String::from("Agent"),
+ status: String::from("completed"),
+ result: Some(String::from("failed")),
+ downstream_jobs: vec![String::from("Cleanup")],
+ ..Default::default()
+ },
+ JobData {
+ name: String::from("Cleanup"),
+ status: String::from("completed"),
+ result: Some(String::from("succeeded")),
+ ..Default::default()
+ },
+ ],
+ ..Default::default()
+ };
+
+ derive_findings(&mut audit);
+
+ assert!(
+ !audit
+ .key_findings
+ .iter()
+ .any(|f| f.title.contains("Downstream jobs potentially impacted")),
+ "must not emit downstream-impact finding when every downstream succeeded, got {:?}",
+ audit.key_findings
+ );
+ }
+
#[test]
fn combined_findings_are_appended_and_preserved_across_passes() {
let mut audit = AuditData {
diff --git a/src/audit/mod.rs b/src/audit/mod.rs
index 77778e0e..ff44b20b 100644
--- a/src/audit/mod.rs
+++ b/src/audit/mod.rs
@@ -7,10 +7,11 @@ pub mod cache;
pub mod cli;
pub mod findings;
pub mod model;
+pub mod pipeline_graph;
pub mod render;
pub mod url;
-pub use cli::{AuditOptions, dispatch};
+pub use cli::{AuditOptions, default_cache_root, dispatch, fetch_audit_data};
#[allow(unused_imports)]
pub use model::*;
@@ -26,7 +27,10 @@ pub use model::*;
/// compares numerically so the highest-numbered build wins.
pub(crate) fn cmp_numeric_suffix(a: &str, b: &str) -> std::cmp::Ordering {
fn suffix(s: &str) -> u64 {
- s.rsplit('_').next().and_then(|s| s.parse().ok()).unwrap_or(0)
+ s.rsplit('_')
+ .next()
+ .and_then(|s| s.parse().ok())
+ .unwrap_or(0)
}
suffix(a).cmp(&suffix(b)).then_with(|| a.cmp(b))
}
diff --git a/src/audit/model.rs b/src/audit/model.rs
index 0ff720fe..07b96931 100644
--- a/src/audit/model.rs
+++ b/src/audit/model.rs
@@ -4,6 +4,8 @@ use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::collections::BTreeMap;
+use crate::compile::ir::summary::PipelineSummary;
+
fn is_zero_u64(value: &u64) -> bool {
*value == 0
}
@@ -59,6 +61,9 @@ pub struct AuditData {
/// MCP server reliability and call health derived from gateway logs.
#[serde(skip_serializing_if = "Option::is_none")]
pub mcp_server_health: Option,
+ /// Optional typed-IR graph correlation for the pipeline source that produced this build.
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub pipeline_graph: Option,
/// Job-level status data derived from the Azure DevOps build timeline.
#[serde(skip_serializing_if = "Vec::is_empty", default)]
pub jobs: Vec,
@@ -300,6 +305,16 @@ pub struct AuditEngineConfig {
pub timeout_minutes: Option,
}
+/// Typed-IR graph correlation derived from the source markdown for this audited run.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct PipelineGraphSection {
+ /// Source markdown path used to rebuild the typed IR.
+ #[serde(default, skip_serializing_if = "String::is_empty")]
+ pub source_path: String,
+ /// Full public pipeline summary, matching `ado-aw inspect --json`.
+ pub summary: PipelineSummary,
+}
+
/// Job-level status information for one stage in the build timeline.
///
/// This is derived from Azure DevOps timeline records for the audited build.
@@ -324,6 +339,59 @@ pub struct JobData {
/// Job finish timestamp.
#[serde(skip_serializing_if = "Option::is_none")]
pub finished_at: Option,
+ /// Upstream job IDs from typed-IR graph correlation.
+ #[serde(skip_serializing_if = "Vec::is_empty", default)]
+ pub upstream_jobs: Vec,
+ /// Downstream job IDs from typed-IR graph correlation.
+ #[serde(skip_serializing_if = "Vec::is_empty", default)]
+ pub downstream_jobs: Vec,
+}
+
+impl JobData {
+ /// Returns true when this job ended in a failure-like state.
+ pub fn failed(&self) -> bool {
+ let result = self.result.as_deref().unwrap_or_default();
+ // Be defensive on US/UK spelling variants from upstream sources.
+ result.eq_ignore_ascii_case("failed")
+ || result.eq_ignore_ascii_case("canceled")
+ || result.eq_ignore_ascii_case("cancelled")
+ || self.status.eq_ignore_ascii_case("failed")
+ || self.status.eq_ignore_ascii_case("canceled")
+ || self.status.eq_ignore_ascii_case("cancelled")
+ }
+
+ /// Returns the best available status/result label for reporting.
+ pub fn classification(&self) -> String {
+ self.result
+ .as_deref()
+ .filter(|result| !result.trim().is_empty())
+ .unwrap_or(&self.status)
+ .to_string()
+ }
+
+ /// Returns `true` when this runtime job corresponds to the typed-IR
+ /// job id `ir_job_id`. Accepts either the bare id or a
+ /// `Stage.Job`-style qualified timeline name.
+ ///
+ /// Centralised so that `audit::findings` and `inspect::trace`
+ /// share one definition — a future typo or extension (e.g. handling
+ /// stage prefixes differently) only needs to change in one place.
+ ///
+ /// Only accepts a **single-level** `Stage.Job` qualifier. Strings
+ /// with two or more dots (e.g. `Stage1.SubStage.Agent`) are
+ /// rejected even when the trailing component matches `ir_job_id`,
+ /// because the old `rsplit('.').next()` form could attach IR edges
+ /// to the wrong runtime job in unusual pipeline shapes.
+ pub fn matches_ir_id(&self, ir_job_id: &str) -> bool {
+ if self.name == ir_job_id {
+ return true;
+ }
+ matches!(
+ self.name.rsplit_once('.'),
+ Some((prefix, suffix))
+ if suffix == ir_job_id && !prefix.contains('.')
+ )
+ }
}
/// Metadata about a file downloaded while assembling the audit.
@@ -942,6 +1010,7 @@ mod tests {
unreliable: true,
}],
}),
+ pipeline_graph: None,
jobs: vec![JobData {
name: String::from("Agent"),
status: String::from("completed"),
@@ -949,6 +1018,7 @@ mod tests {
duration: Some(String::from("4m")),
started_at: Some(String::from("2026-05-21T12:01:00Z")),
finished_at: Some(String::from("2026-05-21T12:05:00Z")),
+ ..Default::default()
}],
downloaded_files: vec![FileInfo {
path: String::from("logs\\build-42\\agent_outputs_42\\otel.jsonl"),
@@ -1068,4 +1138,48 @@ mod tests {
keys_sorted.sort();
assert_eq!(keys_sorted, vec!["downloaded_files", "metrics", "overview"]);
}
+
+ #[test]
+ fn matches_ir_id_accepts_bare_and_single_level_qualified_names() {
+ let bare = JobData {
+ name: "Agent".to_string(),
+ ..Default::default()
+ };
+ assert!(bare.matches_ir_id("Agent"));
+
+ let qualified = JobData {
+ name: "Pipeline.Agent".to_string(),
+ ..Default::default()
+ };
+ assert!(qualified.matches_ir_id("Agent"));
+ }
+
+ #[test]
+ fn matches_ir_id_rejects_multi_level_suffix() {
+ // Regression: the old `rsplit('.').next()` form matched the
+ // last component of any dotted path, which could attach IR
+ // edges to the wrong runtime job in unusual pipeline shapes.
+ let job = JobData {
+ name: "Stage1.SubStage.Agent".to_string(),
+ ..Default::default()
+ };
+
+ assert!(
+ !job.matches_ir_id("Agent"),
+ "multi-level dotted timeline names must not match against a bare id"
+ );
+ assert!(
+ !job.matches_ir_id("SubStage.Agent"),
+ "matches_ir_id must not match an arbitrary tail substring"
+ );
+ }
+
+ #[test]
+ fn matches_ir_id_rejects_unrelated_names() {
+ let job = JobData {
+ name: "Detection".to_string(),
+ ..Default::default()
+ };
+ assert!(!job.matches_ir_id("Agent"));
+ }
}
diff --git a/src/audit/pipeline_graph.rs b/src/audit/pipeline_graph.rs
new file mode 100644
index 00000000..cebf0552
--- /dev/null
+++ b/src/audit/pipeline_graph.rs
@@ -0,0 +1,428 @@
+//! Pipeline-IR graph correlation for `ado-aw audit`.
+
+use std::path::{Path, PathBuf};
+
+use anyhow::{Context, Result};
+
+use crate::audit::model::{AuditData, AwInfo, ErrorInfo, PipelineGraphSection};
+use crate::compile::ir::summary::{JobSummary, PipelineSummary};
+
+/// Populate `audit.pipeline_graph` and per-job upstream/downstream IR edges.
+///
+/// The source markdown is resolved from the runtime `aw_info.json` metadata
+/// emitted by the Agent job. Missing local sources are common when auditing an
+/// arbitrary build, so absence is recorded as a warning rather than an error.
+pub async fn populate_pipeline_graph(audit: &mut AuditData, run_dir: &Path) -> Result<()> {
+ let source = match read_source_from_aw_info(run_dir).await {
+ Some(Ok(value)) if !value.trim().is_empty() => Some(value),
+ Some(Err(err)) => {
+ // Previously `transpose()?` propagated this as a hard
+ // error and aborted the audit. A corrupt aw_info.json
+ // from a bad run is a realistic scenario; downgrade to
+ // the same warn-and-continue path documented for
+ // resolve_source_path failures below.
+ record_warning(
+ audit,
+ "audit::pipeline_graph",
+ format!("failed to read aw_info.json: {err:#}; skipping IR graph correlation"),
+ );
+ return Ok(());
+ }
+ _ => audit
+ .overview
+ .aw_info
+ .as_ref()
+ .and_then(|info| info.source.clone()),
+ };
+ let Some(source) = source else {
+ record_warning(
+ audit,
+ "audit::pipeline_graph",
+ "could not locate aw_info.json source metadata; skipping IR graph correlation",
+ );
+ return Ok(());
+ };
+
+ let source_path = match resolve_source_path(&source).await {
+ Ok(path) => path,
+ Err(err) => {
+ record_warning(
+ audit,
+ "audit::pipeline_graph",
+ format!("could not resolve source path: {err:#}; skipping IR graph correlation"),
+ );
+ return Ok(());
+ }
+ };
+ if tokio::fs::metadata(&source_path).await.is_err() {
+ record_warning(
+ audit,
+ "audit::pipeline_graph",
+ format!(
+ "source markdown '{}' is not available locally; skipping IR graph correlation",
+ source_path.display()
+ ),
+ );
+ return Ok(());
+ }
+
+ let resolved_source_path = tokio::fs::canonicalize(&source_path)
+ .await
+ .unwrap_or_else(|_| source_path.clone());
+ let (_fm, pipeline) = crate::compile::build_pipeline_ir(&resolved_source_path)
+ .await
+ .with_context(|| format!("build IR for {}", resolved_source_path.display()))?;
+ let summary = PipelineSummary::from_pipeline(&pipeline)
+ .with_context(|| format!("summarize IR for {}", resolved_source_path.display()))?;
+
+ populate_job_edges(audit, &summary);
+ audit.pipeline_graph = Some(PipelineGraphSection {
+ source_path: resolved_source_path.display().to_string(),
+ summary,
+ });
+ Ok(())
+}
+
+fn populate_job_edges(audit: &mut AuditData, summary: &PipelineSummary) {
+ for job in &mut audit.jobs {
+ let Some(ir_job) = find_matching_job_summary(summary, &job.name) else {
+ continue;
+ };
+ let job_id = ir_job.id.as_str();
+ job.upstream_jobs = summary
+ .graph
+ .job_edges
+ .iter()
+ .filter(|edge| edge.consumer == job_id)
+ .map(|edge| edge.producer.clone())
+ .collect();
+ job.downstream_jobs = summary
+ .graph
+ .job_edges
+ .iter()
+ .filter(|edge| edge.producer == job_id)
+ .map(|edge| edge.consumer.clone())
+ .collect();
+ }
+}
+
+fn find_matching_job_summary<'a>(
+ summary: &'a PipelineSummary,
+ timeline_name: &str,
+) -> Option<&'a JobSummary> {
+ summary
+ .all_jobs()
+ .find(|job| timeline_name_matches_job(timeline_name, &job.id, job.stage.as_deref()))
+}
+
+pub(crate) fn timeline_name_matches_job(
+ timeline_name: &str,
+ job_id: &str,
+ stage: Option<&str>,
+) -> bool {
+ let timeline_name = timeline_name.trim();
+ if timeline_name == job_id {
+ return true;
+ }
+ if let Some(stage) = stage
+ && timeline_name == format!("{stage}.{job_id}")
+ {
+ return true;
+ }
+ // Fallback for unusual pipelines where the caller did not supply
+ // the stage but the timeline still emits a `Stage.Job` name. We
+ // only accept a *single-level* prefix — strings with two or more
+ // dots like `Stage1.SubStage.Agent` are rejected even when the
+ // trailing component matches, because the old
+ // `rsplit('.').next()` form could attach IR edges to the wrong
+ // runtime job in unusual pipeline shapes.
+ matches!(
+ timeline_name.rsplit_once('.'),
+ Some((prefix, suffix))
+ if suffix == job_id && !prefix.contains('.')
+ )
+}
+
+async fn read_source_from_aw_info(run_dir: &Path) -> Option> {
+ let agent_outputs = find_artifact_dir(run_dir, "agent_outputs").await?;
+ for path in [
+ agent_outputs.join("staging").join("aw_info.json"),
+ agent_outputs.join("aw_info.json"),
+ ] {
+ if tokio::fs::metadata(&path).await.is_err() {
+ continue;
+ }
+ let contents = match tokio::fs::read_to_string(&path).await {
+ Ok(contents) => contents,
+ Err(error) => return Some(Err(error).context(format!("read {}", path.display()))),
+ };
+ let aw_info = match serde_json::from_str::(&contents) {
+ Ok(aw_info) => aw_info,
+ Err(error) => return Some(Err(error).context(format!("parse {}", path.display()))),
+ };
+ return Some(Ok(aw_info.source.unwrap_or_default()));
+ }
+ None
+}
+
+/// Resolve the `source` string taken from a downloaded `aw_info.json`
+/// into an on-disk path.
+///
+/// Delegates the whole security contract to
+/// [`crate::compile::source_path_guard::validate_workflow_source_path`],
+/// which both this entry point and the mcp-author server share.
+/// See that module-level doc for the full list of mitigations.
+async fn resolve_source_path(source: &str) -> Result {
+ let validated = crate::compile::source_path_guard::validate_workflow_source_path(source)
+ .await
+ .with_context(|| "validate aw_info.json source string from audited build artifact")?;
+ Ok(validated.path)
+}
+
+async fn find_artifact_dir(run_dir: &Path, prefix: &str) -> Option {
+ let mut entries = tokio::fs::read_dir(run_dir).await.ok()?;
+ let mut hits: Vec<(String, PathBuf)> = Vec::new();
+ while let Ok(Some(entry)) = entries.next_entry().await {
+ if entry.file_type().await.map(|t| t.is_dir()).unwrap_or(false)
+ && let Some(name) = entry.file_name().to_str()
+ && (name == prefix || name.starts_with(&format!("{prefix}_")))
+ {
+ hits.push((name.to_string(), entry.path()));
+ }
+ }
+ hits.sort_by(|(a, _), (b, _)| crate::audit::cmp_numeric_suffix(a, b));
+ hits.pop().map(|(_, path)| path)
+}
+
+fn record_warning(audit: &mut AuditData, source: &str, message: impl Into) {
+ audit.warnings.push(ErrorInfo {
+ source: source.to_string(),
+ message: message.into(),
+ timestamp: None,
+ });
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::audit::model::JobData;
+
+ #[tokio::test]
+ async fn populate_pipeline_graph_correlates_jobs_from_aw_info_source() {
+ let temp_dir = tempfile::tempdir().expect("tempdir");
+ let run_dir = temp_dir.path().join("build-42");
+ let staging_dir = run_dir.join("agent_outputs_42").join("staging");
+ tokio::fs::create_dir_all(&staging_dir)
+ .await
+ .expect("create staging");
+
+ let source_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+ .join("tests")
+ .join("safe-outputs")
+ .join("create-pull-request.md");
+ let aw_info = serde_json::json!({
+ "source": source_path.display().to_string(),
+ "target": "standalone"
+ });
+ tokio::fs::write(staging_dir.join("aw_info.json"), aw_info.to_string())
+ .await
+ .expect("write aw_info");
+
+ let mut audit = AuditData {
+ jobs: vec![
+ JobData {
+ name: "Agent".to_string(),
+ status: "completed".to_string(),
+ result: Some("succeeded".to_string()),
+ ..Default::default()
+ },
+ JobData {
+ name: "Detection".to_string(),
+ status: "completed".to_string(),
+ result: Some("succeeded".to_string()),
+ ..Default::default()
+ },
+ ],
+ ..Default::default()
+ };
+
+ populate_pipeline_graph(&mut audit, &run_dir)
+ .await
+ .expect("populate graph");
+
+ assert!(audit.pipeline_graph.is_some());
+ let agent = audit
+ .jobs
+ .iter()
+ .find(|job| job.name == "Agent")
+ .expect("agent job");
+ assert!(agent.downstream_jobs.iter().any(|job| job == "Detection"));
+ let detection = audit
+ .jobs
+ .iter()
+ .find(|job| job.name == "Detection")
+ .expect("detection job");
+ assert!(detection.upstream_jobs.iter().any(|job| job == "Agent"));
+ }
+
+ #[tokio::test]
+ async fn resolve_source_path_rejects_non_markdown_absolute_paths() {
+ // The exfiltration vector flagged by the PR reviewer: a malicious
+ // aw_info.json carries an absolute path to a non-`.md` file. The
+ // resolver must refuse before any file open happens.
+ assert!(
+ resolve_source_path("/home/user/.ssh/id_rsa").await.is_err(),
+ "expected resolver to reject non-markdown absolute path"
+ );
+ }
+
+ #[tokio::test]
+ async fn resolve_source_path_rejects_parent_traversal() {
+ assert!(
+ resolve_source_path("../../../etc/passwd.md")
+ .await
+ .is_err(),
+ "expected resolver to reject parent-dir components"
+ );
+ }
+
+ #[tokio::test]
+ async fn resolve_source_path_rejects_tilde_prefix() {
+ assert!(
+ resolve_source_path("~/secret.md").await.is_err(),
+ "expected resolver to reject tilde-prefixed path"
+ );
+ }
+
+ #[tokio::test]
+ async fn resolve_source_path_accepts_markdown_absolute_paths() {
+ // Legitimate compiled-elsewhere workflows: absolute `.md` paths must still work.
+ let path = if cfg!(windows) {
+ r"C:\workflows\foo.md"
+ } else {
+ "/repo/workflows/foo.md"
+ };
+ assert!(
+ resolve_source_path(path).await.is_ok(),
+ "expected absolute `.md` paths to be accepted"
+ );
+ }
+
+ #[cfg(unix)]
+ #[tokio::test]
+ async fn resolve_source_path_rejects_md_symlink_to_non_md_target() {
+ // Symlink-bypass regression: `foo.md` → `/etc/passwd` lexically
+ // satisfies the `.md` extension check but resolves to a
+ // non-markdown file. The post-canonicalize re-check must
+ // reject it.
+ let temp_dir = tempfile::tempdir().expect("tempdir");
+ let target = temp_dir.path().join("not_markdown.bin");
+ tokio::fs::write(&target, b"binary").await.expect("write target");
+ let link = temp_dir.path().join("evil.md");
+ tokio::fs::symlink(&target, &link)
+ .await
+ .expect("create symlink");
+
+ let err = resolve_source_path(link.to_str().unwrap())
+ .await
+ .expect_err("symlink to non-md target must be rejected");
+ let msg = format!("{err:#}");
+ assert!(
+ msg.contains("symlink resolves to non-`.md` target"),
+ "expected symlink-target rejection message, got: {msg}"
+ );
+ }
+
+ #[cfg(unix)]
+ #[tokio::test]
+ async fn resolve_source_path_accepts_md_symlink_to_md_target() {
+ // Legitimate `current.md` → `v1.md` style symlinks must still
+ // be accepted — the post-canonicalize re-check only rejects
+ // when the resolved target lacks the `.md` extension.
+ let temp_dir = tempfile::tempdir().expect("tempdir");
+ let target = temp_dir.path().join("v1.md");
+ tokio::fs::write(&target, b"# pipeline").await.expect("write target");
+ let link = temp_dir.path().join("current.md");
+ tokio::fs::symlink(&target, &link)
+ .await
+ .expect("create symlink");
+
+ let resolved = resolve_source_path(link.to_str().unwrap())
+ .await
+ .expect("md symlink to md target must be accepted");
+ assert_eq!(resolved, link);
+ }
+
+ #[tokio::test]
+ async fn populate_pipeline_graph_records_warning_on_malicious_source() {
+ let temp_dir = tempfile::tempdir().expect("tempdir");
+ let run_dir = temp_dir.path().join("build-99");
+ let staging_dir = run_dir.join("agent_outputs_99").join("staging");
+ tokio::fs::create_dir_all(&staging_dir)
+ .await
+ .expect("create staging");
+
+ let aw_info = serde_json::json!({
+ "source": "/home/user/.ssh/id_rsa",
+ "target": "standalone"
+ });
+ tokio::fs::write(staging_dir.join("aw_info.json"), aw_info.to_string())
+ .await
+ .expect("write aw_info");
+
+ let mut audit = AuditData::default();
+ populate_pipeline_graph(&mut audit, &run_dir)
+ .await
+ .expect("populate graph should not error on malicious source");
+
+ assert!(
+ audit.pipeline_graph.is_none(),
+ "malicious source must not populate pipeline_graph"
+ );
+ assert!(
+ audit
+ .warnings
+ .iter()
+ .any(|w| w.source == "audit::pipeline_graph"
+ && w.message.contains("could not resolve source path")),
+ "expected a warning recording the rejection, got {:?}",
+ audit.warnings
+ );
+ }
+
+ #[tokio::test]
+ async fn populate_pipeline_graph_records_warning_on_corrupt_aw_info_json() {
+ // Regression: previously `read_source_from_aw_info`'s
+ // Some(Err(_)) was propagated via `transpose()?` and aborted
+ // the entire audit. A corrupt aw_info.json from a bad run is
+ // a realistic scenario; it must degrade to a warning.
+ let temp_dir = tempfile::tempdir().expect("tempdir");
+ let run_dir = temp_dir.path().join("build-77");
+ let staging_dir = run_dir.join("agent_outputs_77").join("staging");
+ tokio::fs::create_dir_all(&staging_dir)
+ .await
+ .expect("create staging");
+ tokio::fs::write(staging_dir.join("aw_info.json"), b"{not valid json")
+ .await
+ .expect("write malformed aw_info");
+
+ let mut audit = AuditData::default();
+ populate_pipeline_graph(&mut audit, &run_dir)
+ .await
+ .expect("populate graph must not bail on corrupt aw_info.json");
+
+ assert!(
+ audit.pipeline_graph.is_none(),
+ "corrupt aw_info.json must not populate pipeline_graph"
+ );
+ assert!(
+ audit
+ .warnings
+ .iter()
+ .any(|w| w.source == "audit::pipeline_graph"
+ && w.message.contains("failed to read aw_info.json")),
+ "expected a warning recording the read failure, got {:?}",
+ audit.warnings
+ );
+ }
+}
diff --git a/src/audit/render/console.rs b/src/audit/render/console.rs
index eafb215f..0b33d855 100644
--- a/src/audit/render/console.rs
+++ b/src/audit/render/console.rs
@@ -1297,6 +1297,7 @@ By threat:
unreliable: true,
}],
}),
+ pipeline_graph: None,
jobs: vec![
JobData {
name: "Agent".to_string(),
@@ -1305,6 +1306,7 @@ By threat:
duration: Some("2m 30s".to_string()),
started_at: Some("2026-05-21T12:01:00Z".to_string()),
finished_at: Some("2026-05-21T12:03:30Z".to_string()),
+ ..Default::default()
},
JobData {
name: "Detection".to_string(),
@@ -1313,6 +1315,7 @@ By threat:
duration: Some("30s".to_string()),
started_at: Some("2026-05-21T12:03:30Z".to_string()),
finished_at: Some("2026-05-21T12:04:00Z".to_string()),
+ ..Default::default()
},
JobData {
name: "SafeOutputs".to_string(),
@@ -1321,6 +1324,7 @@ By threat:
duration: Some("12s".to_string()),
started_at: Some("2026-05-21T12:04:00Z".to_string()),
finished_at: Some("2026-05-21T12:04:12Z".to_string()),
+ ..Default::default()
},
],
downloaded_files: vec![FileInfo {
diff --git a/src/audit/render/json.rs b/src/audit/render/json.rs
index 94d6ff01..24b279d7 100644
--- a/src/audit/render/json.rs
+++ b/src/audit/render/json.rs
@@ -156,6 +156,7 @@ mod tests {
unreliable: true,
}],
}),
+ pipeline_graph: None,
jobs: vec![JobData {
name: String::from("Agent"),
status: String::from("completed"),
@@ -163,6 +164,7 @@ mod tests {
duration: Some(String::from("4m")),
started_at: Some(String::from("2026-05-21T12:01:00Z")),
finished_at: Some(String::from("2026-05-21T12:05:00Z")),
+ ..Default::default()
}],
downloaded_files: vec![FileInfo {
path: String::from("logs\\build-42\\agent_outputs_42\\otel.jsonl"),
diff --git a/src/compile/ir/mod.rs b/src/compile/ir/mod.rs
index 0d1ea28b..29a566f4 100644
--- a/src/compile/ir/mod.rs
+++ b/src/compile/ir/mod.rs
@@ -45,6 +45,7 @@ pub mod lower;
pub mod output;
pub mod stage;
pub mod step;
+pub mod summary;
pub mod tasks;
use ids::StageId;
diff --git a/src/compile/ir/summary.rs b/src/compile/ir/summary.rs
new file mode 100644
index 00000000..ba8485c2
--- /dev/null
+++ b/src/compile/ir/summary.rs
@@ -0,0 +1,732 @@
+//! Serializable, agent-facing summary of a typed [`Pipeline`].
+//!
+//! The internal IR (`Pipeline`, `Job`, `Step`, `Graph`, …) is rich and
+//! intentionally tied to the compiler's lowering needs. Exposing those
+//! shapes directly over MCP / JSON would lock us into every internal
+//! field rename. Instead, this module defines a parallel "summary"
+//! tree with `#[derive(Serialize)]` that captures the agent-relevant
+//! signals (ids, kinds, conditions, output declarations, output
+//! references, derived dependency edges) and intentionally **omits**
+//! internal-only bookkeeping (template wraps, 1ES templateContext,
+//! lowering hints).
+//!
+//! ## Stability contract
+//!
+//! [`PipelineSummary::schema_version`] is pinned. Bump it whenever
+//! the JSON shape changes in a way a downstream consumer would
+//! notice (renamed field, removed variant, changed semantics).
+//! Additive changes such as new optional fields do not require a bump.
+//! New enum variants currently require a schema-version bump so older
+//! consumers fail loudly instead of misinterpreting data.
+//!
+//! The summary is the **public** schema. The internal IR types
+//! (`super::Pipeline` and friends) are NOT public API and may change
+//! freely.
+
+use std::collections::BTreeSet;
+
+use anyhow::Result;
+use serde::{Deserialize, Serialize};
+
+use super::condition::{Condition, Expr, codegen::CondCodegenCtx, codegen::lower_condition};
+use super::env::EnvValue;
+use super::graph::{Graph, build_graph};
+use super::output::{OutputDecl, OutputRef};
+use super::step::Step;
+use super::{Pipeline, PipelineBody, PipelineShape};
+
+/// Current public schema version. Bump when the JSON shape changes
+/// in a backwards-incompatible way.
+pub const SCHEMA_VERSION: u32 = 1;
+
+/// Public, serializable summary of a compiled pipeline.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct PipelineSummary {
+ /// Public schema version; see [`SCHEMA_VERSION`].
+ pub schema_version: u32,
+ /// Top-level `name:` (the ADO build-number format string).
+ pub name: String,
+ /// Compile target: `"standalone"`, `"1es"`, `"job-template"`,
+ /// `"stage-template"`.
+ pub shape: String,
+ /// Either a flat list of jobs (`standalone`, `job-template`) or
+ /// a list of stages (`1es`, `stage-template`).
+ pub body: PipelineBodySummary,
+ /// Resolved dependency graph.
+ pub graph: GraphSummary,
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+#[serde(tag = "kind", rename_all = "snake_case")]
+pub enum PipelineBodySummary {
+ Jobs { jobs: Vec },
+ Stages { stages: Vec },
+}
+
+impl PipelineSummary {
+ /// Iterate every job in the pipeline, regardless of whether the
+ /// body is `Jobs`-shaped or `Stages`-shaped.
+ ///
+ /// Single source of truth for body-shape iteration; both
+ /// `audit::pipeline_graph` and the `inspect` commands go through
+ /// this so that future shape additions (e.g. a new `Templates`
+ /// variant) only need to be handled in one place.
+ ///
+ /// Returns an `impl Iterator` rather than a `Vec` so hot paths
+ /// (`populate_job_edges`, `find_matching_job_summary`, the inspect
+ /// traversals) avoid a per-call heap allocation. Callers that
+ /// need a slice can `.collect::>()` at the use site.
+ pub fn all_jobs(&self) -> impl Iterator- + '_ {
+ match &self.body {
+ PipelineBodySummary::Jobs { jobs } => AllJobsIter::Flat(jobs.iter()),
+ PipelineBodySummary::Stages { stages } => {
+ AllJobsIter::Stages(stages.iter().flat_map(stage_jobs))
+ }
+ }
+ }
+}
+
+fn stage_jobs(stage: &StageSummary) -> std::slice::Iter<'_, JobSummary> {
+ stage.jobs.iter()
+}
+
+/// Either-style iterator that yields the same `&JobSummary` element type
+/// for both pipeline body shapes without heap-allocating into a `Vec`.
+#[allow(clippy::type_complexity)]
+enum AllJobsIter<'a> {
+ Flat(std::slice::Iter<'a, JobSummary>),
+ Stages(
+ std::iter::FlatMap<
+ std::slice::Iter<'a, StageSummary>,
+ std::slice::Iter<'a, JobSummary>,
+ fn(&'a StageSummary) -> std::slice::Iter<'a, JobSummary>,
+ >,
+ ),
+}
+
+impl<'a> Iterator for AllJobsIter<'a> {
+ type Item = &'a JobSummary;
+
+ fn next(&mut self) -> Option
{
+ match self {
+ Self::Flat(iter) => iter.next(),
+ Self::Stages(iter) => iter.next(),
+ }
+ }
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct StageSummary {
+ pub id: String,
+ pub display_name: String,
+ pub depends_on: Vec,
+ /// Lowered ADO condition string, when one is set on the stage.
+ pub condition: Option,
+ pub jobs: Vec,
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct JobSummary {
+ pub id: String,
+ /// `None` for top-level jobs in a flat `Jobs` pipeline.
+ pub stage: Option,
+ pub display_name: String,
+ pub depends_on: Vec,
+ /// Lowered ADO condition string, when one is set on the job.
+ pub condition: Option,
+ pub pool: PoolSummary,
+ pub steps: Vec,
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+#[serde(tag = "kind", rename_all = "snake_case")]
+pub enum PoolSummary {
+ VmImage {
+ image: String,
+ },
+ Named {
+ name: String,
+ image: Option,
+ os: Option,
+ },
+}
+
+/// A single step's public summary.
+///
+/// `kind` discriminates the step shape and the rest of the fields
+/// are populated per kind. `id` is the ADO step `name:` (required
+/// when other steps consume this step's outputs).
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct StepSummary {
+ pub id: Option,
+ pub kind: StepKind,
+ pub display_name: Option,
+ /// For `task` steps: the ADO task identifier (e.g. `"NodeTool@0"`).
+ pub task: Option,
+ /// Lowered ADO condition string, when one is set on the step.
+ pub condition: Option,
+ /// Step outputs **declared** by this step (`BashStep::outputs`).
+ pub outputs: Vec,
+ /// Other-step outputs **read** by this step's `env:` map.
+ pub env_refs: Vec,
+ /// Other-step outputs **read** by this step's `condition:`.
+ pub condition_refs: Vec,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum StepKind {
+ Bash,
+ Task,
+ Checkout,
+ Download,
+ Publish,
+ RawYaml,
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct OutputDeclSummary {
+ pub name: String,
+ pub is_secret: bool,
+ /// `true` when at least one cross-step consumer reads this
+ /// output; the producer must emit `isOutput=true` in its
+ /// `##vso[task.setvariable …]` directive.
+ pub auto_is_output: bool,
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct OutputRefSummary {
+ /// Producer step id.
+ pub step: String,
+ /// Output variable name (matches an `OutputDecl::name`).
+ pub name: String,
+}
+
+/// JSON-friendly view of the IR's typed [`Graph`].
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct GraphSummary {
+ /// Every step that carries an id, with its location and declared
+ /// outputs.
+ pub step_locations: Vec,
+ /// Derived job-level `dependsOn` edges (`consumer → producer`).
+ pub job_edges: Vec,
+ /// Derived stage-level `dependsOn` edges (`consumer → producer`).
+ pub stage_edges: Vec,
+ /// Producer-step outputs that need `isOutput=true` because at
+ /// least one cross-step consumer reads them.
+ pub outputs_needing_is_output: Vec,
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct StepLocationEntry {
+ pub step: String,
+ pub stage: Option,
+ pub job: String,
+ pub outputs: Vec,
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct EdgeEntry {
+ /// The job/stage that has the `dependsOn` entry.
+ pub consumer: String,
+ /// The job/stage being depended on.
+ pub producer: String,
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct StepOutputsEntry {
+ pub step: String,
+ pub outputs: Vec,
+}
+
+impl PipelineSummary {
+ /// Build a public summary from a typed [`Pipeline`].
+ ///
+ /// Runs the graph pass to derive `depends_on` and validate
+ /// output references — same flow the YAML emit takes. Returns
+ /// the graph errors verbatim so summary callers see the same
+ /// errors a compile would surface.
+ pub fn from_pipeline(p: &Pipeline) -> Result {
+ let graph = build_graph(p)?;
+ let body = match &p.body {
+ PipelineBody::Jobs(jobs) => PipelineBodySummary::Jobs {
+ jobs: jobs
+ .iter()
+ .map(|j| summarize_job(None, j, &graph))
+ .collect(),
+ },
+ PipelineBody::Stages(stages) => PipelineBodySummary::Stages {
+ stages: stages
+ .iter()
+ .map(|s| {
+ let stage_id = s.id.as_str().to_string();
+ StageSummary {
+ id: stage_id.clone(),
+ display_name: s.display_name.clone(),
+ depends_on: s
+ .depends_on
+ .iter()
+ .map(|d| d.as_str().to_string())
+ .collect(),
+ condition: s.condition.as_ref().and_then(|c| {
+ // Conditions on a stage have no
+ // step-output context; render with
+ // an empty graph and a placeholder
+ // job so callers see the lowered
+ // string. Stage-level conditions
+ // today never reference step
+ // outputs.
+ render_condition(c, &graph, None, None)
+ }),
+ jobs: s
+ .jobs
+ .iter()
+ .map(|j| summarize_job(Some(stage_id.clone()), j, &graph))
+ .collect(),
+ }
+ })
+ .collect(),
+ },
+ };
+
+ Ok(PipelineSummary {
+ schema_version: SCHEMA_VERSION,
+ name: p.name.clone(),
+ shape: shape_label(&p.shape).to_string(),
+ body,
+ graph: GraphSummary::from_graph(&graph),
+ })
+ }
+}
+
+fn shape_label(shape: &PipelineShape) -> &'static str {
+ match shape {
+ PipelineShape::Standalone => "standalone",
+ PipelineShape::OneEs { .. } => "1es",
+ PipelineShape::JobTemplate { .. } => "job-template",
+ PipelineShape::StageTemplate { .. } => "stage-template",
+ }
+}
+
+fn summarize_job(stage: Option, j: &super::job::Job, graph: &Graph) -> JobSummary {
+ let job_id_str = j.id.as_str().to_string();
+ let stage_clone = stage.clone();
+ let stage_for_render = stage_clone.as_deref();
+ JobSummary {
+ id: job_id_str.clone(),
+ stage,
+ display_name: j.display_name.clone(),
+ depends_on: j
+ .depends_on
+ .iter()
+ .map(|d| d.as_str().to_string())
+ .collect(),
+ condition: j
+ .condition
+ .as_ref()
+ .and_then(|c| render_condition(c, graph, stage_for_render, Some(&job_id_str))),
+ pool: summarize_pool(&j.pool),
+ steps: j
+ .steps
+ .iter()
+ .map(|s| summarize_step(s, graph, stage_for_render, &job_id_str))
+ .collect(),
+ }
+}
+
+fn summarize_pool(p: &super::job::Pool) -> PoolSummary {
+ match p {
+ super::job::Pool::VmImage(image) => PoolSummary::VmImage {
+ image: image.clone(),
+ },
+ super::job::Pool::Named { name, image, os } => PoolSummary::Named {
+ name: name.clone(),
+ image: image.clone(),
+ os: os.clone(),
+ },
+ }
+}
+
+fn summarize_step(step: &Step, graph: &Graph, stage: Option<&str>, job: &str) -> StepSummary {
+ let (id, kind, display_name, task, condition, mut outputs, env_refs, condition_refs) =
+ match step {
+ Step::Bash(b) => {
+ let env_refs = collect_env_refs(b.env.values());
+ let cond_refs = b
+ .condition
+ .as_ref()
+ .map(collect_condition_refs)
+ .unwrap_or_default();
+ (
+ b.id.as_ref().map(|i| i.as_str().to_string()),
+ StepKind::Bash,
+ Some(b.display_name.clone()),
+ None,
+ b.condition
+ .as_ref()
+ .and_then(|c| render_condition(c, graph, stage, Some(job))),
+ b.outputs
+ .iter()
+ .map(summarize_output_decl)
+ .collect::>(),
+ env_refs.into_iter().map(summarize_output_ref).collect(),
+ cond_refs.into_iter().map(summarize_output_ref).collect(),
+ )
+ }
+ Step::Task(t) => {
+ let env_refs = collect_env_refs(t.env.values());
+ let cond_refs = t
+ .condition
+ .as_ref()
+ .map(collect_condition_refs)
+ .unwrap_or_default();
+ (
+ t.id.as_ref().map(|i| i.as_str().to_string()),
+ StepKind::Task,
+ Some(t.display_name.clone()),
+ Some(t.task.clone()),
+ t.condition
+ .as_ref()
+ .and_then(|c| render_condition(c, graph, stage, Some(job))),
+ Vec::new(),
+ env_refs.into_iter().map(summarize_output_ref).collect(),
+ cond_refs.into_iter().map(summarize_output_ref).collect(),
+ )
+ }
+ Step::Checkout(_) => (
+ None,
+ StepKind::Checkout,
+ None,
+ None,
+ None,
+ Vec::new(),
+ Vec::new(),
+ Vec::new(),
+ ),
+ Step::Download(d) => {
+ let cond_refs = d
+ .condition
+ .as_ref()
+ .map(collect_condition_refs)
+ .unwrap_or_default();
+ (
+ None,
+ StepKind::Download,
+ Some(format!("download: {}", d.artifact)),
+ None,
+ d.condition
+ .as_ref()
+ .and_then(|c| render_condition(c, graph, stage, Some(job))),
+ Vec::new(),
+ Vec::new(),
+ cond_refs.into_iter().map(summarize_output_ref).collect(),
+ )
+ }
+ Step::Publish(p) => {
+ let cond_refs = p
+ .condition
+ .as_ref()
+ .map(collect_condition_refs)
+ .unwrap_or_default();
+ (
+ None,
+ StepKind::Publish,
+ Some(format!("publish: {}", p.artifact)),
+ None,
+ p.condition
+ .as_ref()
+ .and_then(|c| render_condition(c, graph, stage, Some(job))),
+ Vec::new(),
+ Vec::new(),
+ cond_refs.into_iter().map(summarize_output_ref).collect(),
+ )
+ }
+ Step::RawYaml(_) => (
+ None,
+ StepKind::RawYaml,
+ None,
+ None,
+ None,
+ Vec::new(),
+ Vec::new(),
+ Vec::new(),
+ ),
+ };
+ // Patch auto_is_output from the graph's outputs_needing_is_output
+ // index so it's accurate without requiring the caller to mutate
+ // the Pipeline via apply_auto_is_output.
+ if let Some(step_id) = id.as_deref()
+ && !outputs.is_empty()
+ {
+ let key = super::ids::StepId::new(step_id).ok();
+ if let Some(k) = key
+ && let Some(needs) = graph.outputs_needing_is_output.get(&k)
+ {
+ for o in outputs.iter_mut() {
+ if needs.contains(&o.name) {
+ o.auto_is_output = true;
+ }
+ }
+ }
+ }
+ StepSummary {
+ id,
+ kind,
+ display_name,
+ task,
+ condition,
+ outputs,
+ env_refs,
+ condition_refs,
+ }
+}
+
+fn summarize_output_decl(d: &OutputDecl) -> OutputDeclSummary {
+ OutputDeclSummary {
+ name: d.name.clone(),
+ is_secret: d.is_secret,
+ auto_is_output: d.auto_is_output,
+ }
+}
+
+fn summarize_output_ref(r: OutputRef) -> OutputRefSummary {
+ OutputRefSummary {
+ step: r.step.as_str().to_string(),
+ name: r.name,
+ }
+}
+
+fn render_condition(
+ c: &Condition,
+ graph: &Graph,
+ stage: Option<&str>,
+ job: Option<&str>,
+) -> Option {
+ // Build typed stage/job ids for the codegen context. If the
+ // caller is rendering a stage-level condition we synthesise a
+ // dummy job — stage-level conditions in the canonical pipeline
+ // never reference step outputs, but the codegen API still
+ // requires a `&JobId`, and a placeholder is fine because no
+ // `Expr::StepOutput` should reach this path.
+ let job_id = super::ids::JobId::new(job.unwrap_or("_stage_placeholder")).ok()?;
+ let stage_id = stage
+ .map(super::ids::StageId::new)
+ .transpose()
+ .ok()
+ .flatten();
+ let ctx = CondCodegenCtx {
+ graph,
+ stage: stage_id.as_ref(),
+ job: &job_id,
+ };
+ lower_condition(&ctx, c).ok()
+}
+
+fn collect_env_refs<'a, I: IntoIterator- >(values: I) -> Vec
{
+ let mut out = Vec::new();
+ for v in values {
+ walk_env(v, &mut out);
+ }
+ out
+}
+
+fn walk_env(v: &EnvValue, out: &mut Vec) {
+ match v {
+ EnvValue::StepOutput(r) => out.push(r.clone()),
+ EnvValue::Coalesce(parts) | EnvValue::Concat(parts) => {
+ for p in parts {
+ walk_env(p, out);
+ }
+ }
+ _ => {}
+ }
+}
+
+fn collect_condition_refs(c: &Condition) -> Vec {
+ let mut out = Vec::new();
+ walk_cond(c, &mut out);
+ out
+}
+
+fn walk_cond(c: &Condition, out: &mut Vec) {
+ match c {
+ Condition::And(parts) | Condition::Or(parts) => {
+ for p in parts {
+ walk_cond(p, out);
+ }
+ }
+ Condition::Not(inner) => walk_cond(inner, out),
+ Condition::Eq(a, b) | Condition::Ne(a, b) => {
+ walk_expr(a, out);
+ walk_expr(b, out);
+ }
+ _ => {}
+ }
+}
+
+fn walk_expr(e: &Expr, out: &mut Vec) {
+ if let Expr::StepOutput(r) = e {
+ out.push(r.clone());
+ }
+}
+
+impl GraphSummary {
+ fn from_graph(g: &Graph) -> Self {
+ let step_locations = g
+ .step_locations
+ .iter()
+ .map(|(step, loc)| StepLocationEntry {
+ step: step.as_str().to_string(),
+ stage: loc.stage.as_ref().map(|s| s.as_str().to_string()),
+ job: loc.job.as_str().to_string(),
+ outputs: loc.outputs.iter().cloned().collect(),
+ })
+ .collect();
+ let job_edges = g
+ .job_edges
+ .iter()
+ .map(|(c, p)| EdgeEntry {
+ consumer: c.as_str().to_string(),
+ producer: p.as_str().to_string(),
+ })
+ .collect();
+ let stage_edges = g
+ .stage_edges
+ .iter()
+ .map(|(c, p)| EdgeEntry {
+ consumer: c.as_str().to_string(),
+ producer: p.as_str().to_string(),
+ })
+ .collect();
+ let outputs_needing_is_output = g
+ .outputs_needing_is_output
+ .iter()
+ .map(|(step, outs)| StepOutputsEntry {
+ step: step.as_str().to_string(),
+ outputs: outs
+ .iter()
+ .cloned()
+ .collect::>()
+ .into_iter()
+ .collect(),
+ })
+ .collect();
+ GraphSummary {
+ step_locations,
+ job_edges,
+ stage_edges,
+ outputs_needing_is_output,
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::compile::ir::condition::{Condition, Expr};
+ use crate::compile::ir::env::EnvValue;
+ use crate::compile::ir::ids::{JobId, StepId};
+ use crate::compile::ir::job::{Job, Pool};
+ use crate::compile::ir::output::{OutputDecl, OutputRef};
+ use crate::compile::ir::step::{BashStep, Step};
+ use crate::compile::ir::{Pipeline, PipelineBody, PipelineShape, Resources, Triggers};
+
+ fn fixture_pipeline() -> Pipeline {
+ let producer = Step::Bash(
+ BashStep::new("setup", "echo hi")
+ .with_id(StepId::new("synthPr").unwrap())
+ .with_output(OutputDecl::new("AW_SYNTHETIC_PR_ID")),
+ );
+ let consumer = Step::Bash(
+ BashStep::new("run", "echo bye")
+ .with_env(
+ "PR_ID",
+ EnvValue::step_output(OutputRef::new(
+ StepId::new("synthPr").unwrap(),
+ "AW_SYNTHETIC_PR_ID",
+ )),
+ )
+ .with_condition(Condition::Eq(
+ Expr::StepOutput(OutputRef::new(
+ StepId::new("synthPr").unwrap(),
+ "AW_SYNTHETIC_PR_ID",
+ )),
+ Expr::Literal("42".into()),
+ )),
+ );
+
+ let setup = {
+ let mut j = Job::new(
+ JobId::new("Setup").unwrap(),
+ "Setup",
+ Pool::VmImage("ubuntu-22.04".into()),
+ );
+ j.steps.push(producer);
+ j
+ };
+ let agent = {
+ let mut j = Job::new(
+ JobId::new("Agent").unwrap(),
+ "Agent",
+ Pool::VmImage("ubuntu-22.04".into()),
+ );
+ j.steps.push(consumer);
+ j
+ };
+
+ Pipeline {
+ name: "T".into(),
+ parameters: Vec::new(),
+ resources: Resources::default(),
+ triggers: Triggers::default(),
+ variables: Vec::new(),
+ body: PipelineBody::Jobs(vec![setup, agent]),
+ shape: PipelineShape::Standalone,
+ }
+ }
+
+ #[test]
+ fn summary_schema_version_is_pinned() {
+ assert_eq!(SCHEMA_VERSION, 1);
+ }
+
+ #[test]
+ fn from_pipeline_round_trips_jobs_and_graph() {
+ let p = fixture_pipeline();
+ let s = PipelineSummary::from_pipeline(&p).unwrap();
+ assert_eq!(s.shape, "standalone");
+ let jobs = match s.body {
+ PipelineBodySummary::Jobs { jobs } => jobs,
+ _ => panic!("expected jobs body"),
+ };
+ assert_eq!(jobs.len(), 2);
+ let agent = jobs.iter().find(|j| j.id == "Agent").unwrap();
+ assert_eq!(agent.steps.len(), 1);
+ let step = &agent.steps[0];
+ assert_eq!(step.env_refs.len(), 1);
+ assert_eq!(step.env_refs[0].step, "synthPr");
+ assert_eq!(step.condition_refs.len(), 1);
+ // Graph derived a job edge Agent -> Setup
+ assert!(
+ s.graph
+ .job_edges
+ .iter()
+ .any(|e| e.consumer == "Agent" && e.producer == "Setup"),
+ "expected derived edge Agent -> Setup, got {:?}",
+ s.graph.job_edges
+ );
+ // Producer output is marked auto_is_output
+ let setup = jobs.iter().find(|j| j.id == "Setup").unwrap();
+ let prod_step = &setup.steps[0];
+ assert!(prod_step.outputs[0].auto_is_output);
+ }
+
+ #[test]
+ fn serializes_to_json_without_panicking() {
+ let p = fixture_pipeline();
+ let s = PipelineSummary::from_pipeline(&p).unwrap();
+ let json = serde_json::to_string(&s).unwrap();
+ assert!(json.contains("\"schema_version\":1"));
+ assert!(json.contains("\"shape\":\"standalone\""));
+ }
+}
diff --git a/src/compile/mod.rs b/src/compile/mod.rs
index eb5f6dd2..baafd694 100644
--- a/src/compile/mod.rs
+++ b/src/compile/mod.rs
@@ -21,6 +21,7 @@ mod job_ir;
mod onees;
mod onees_ir;
pub(crate) mod pr_filters;
+pub mod source_path_guard;
mod stage;
mod stage_ir;
mod standalone;
@@ -434,8 +435,7 @@ pub async fn compile_all_pipelines(skip_integrity: bool, debug_pipeline: bool) -
for pipeline in &detected {
let yaml_output_path = root.join(&pipeline.yaml_path);
- let source_path =
- resolve_pipeline_source_path(&yaml_output_path, &pipeline.source, root);
+ let source_path = resolve_pipeline_source_path(&yaml_output_path, &pipeline.source, root);
if !source_path.exists() {
eprintln!(
@@ -779,10 +779,7 @@ fn format_pipeline_version_status(version: &str, current_version: &str) -> Strin
/// Tries the path relative to the YAML file's directory first, then relative
/// to the scan root. This mirrors the lookup the ADO pipeline itself uses.
fn resolve_pipeline_source_path(yaml_output_path: &Path, source: &str, root: &Path) -> PathBuf {
- let candidate_from_yaml_dir = yaml_output_path
- .parent()
- .unwrap_or(root)
- .join(source);
+ let candidate_from_yaml_dir = yaml_output_path.parent().unwrap_or(root).join(source);
if candidate_from_yaml_dir.exists() {
candidate_from_yaml_dir
} else {
@@ -844,7 +841,6 @@ fn log_pipeline_metadata(front_matter: &FrontMatter) {
debug!("Repositories: {}", front_matter.repositories.len());
}
-/// Walk up from `start` to find the nearest directory containing `.git`.
/// Walk up from `start` looking for the nearest ancestor containing a
/// `.git` directory or file.
///
@@ -869,6 +865,91 @@ pub fn find_repo_root(start: &Path) -> Option {
}
}
+/// Public, read-only entry point that returns the typed [`ir::Pipeline`]
+/// for an agent source file **without** writing any YAML.
+///
+/// Mirrors [`compile_pipeline`]'s parse/sanitize/resolve-repos flow,
+/// then dispatches to the appropriate `build_*_pipeline` IR builder
+/// for the front-matter target. Used by commands that need to reason
+/// about a pipeline's structure (e.g. `ado-aw inspect`, `ado-aw graph`)
+/// rather than rebuild it.
+///
+/// Returns both the sanitized front matter and the typed pipeline so
+/// callers do not need to re-parse the source to get at high-level
+/// fields like `front_matter.target`.
+///
+/// **Codemods are applied in memory only**, matching `check_pipeline`'s
+/// behavior: this function never rewrites the source on disk.
+pub async fn build_pipeline_ir(input_path: &Path) -> Result<(FrontMatter, ir::Pipeline)> {
+ let content = tokio::fs::read_to_string(input_path)
+ .await
+ .with_context(|| format!("Failed to read input file: {}", input_path.display()))?;
+
+ let parsed = common::parse_markdown_detailed(&content)?;
+ let mut front_matter = parsed.front_matter;
+ let markdown_body = parsed.markdown_body;
+
+ use crate::sanitize::SanitizeConfig;
+ front_matter.sanitize_config_fields();
+
+ let (resolved_repos, resolved_checkout) = common::resolve_repos(&front_matter)?;
+ front_matter.repositories = resolved_repos;
+ front_matter.checkout = resolved_checkout;
+ common::validate_checkout_list(&front_matter.repositories, &front_matter.checkout)?;
+
+ // Inferred output path for the marker step. Defaults to
+ // `.lock.yml` next to the source, same default as
+ // `compile_pipeline` when `--output` is omitted.
+ let output_path = input_path.with_extension("lock.yml");
+
+ let extensions = extensions::collect_extensions(&front_matter);
+ let ctx = extensions::CompileContext::new(&front_matter, input_path).await?;
+
+ let pipeline = match front_matter.target {
+ CompileTarget::Standalone => standalone_ir::build_standalone_pipeline(
+ &front_matter,
+ &extensions,
+ &ctx,
+ input_path,
+ &output_path,
+ &markdown_body,
+ /* skip_integrity */ true,
+ /* debug_pipeline */ false,
+ )?,
+ CompileTarget::OneES => onees_ir::build_onees_pipeline(
+ &front_matter,
+ &extensions,
+ &ctx,
+ input_path,
+ &output_path,
+ &markdown_body,
+ /* skip_integrity */ true,
+ /* debug_pipeline */ false,
+ )?,
+ CompileTarget::Job => job_ir::build_job_pipeline(
+ &front_matter,
+ &extensions,
+ &ctx,
+ input_path,
+ &output_path,
+ &markdown_body,
+ /* skip_integrity */ true,
+ /* debug_pipeline */ false,
+ )?,
+ CompileTarget::Stage => stage_ir::build_stage_pipeline(
+ &front_matter,
+ &extensions,
+ &ctx,
+ input_path,
+ &output_path,
+ &markdown_body,
+ /* skip_integrity */ true,
+ /* debug_pipeline */ false,
+ )?,
+ };
+ Ok((front_matter, pipeline))
+}
+
/// Clean up spacing artifacts in generated YAML.
///
/// After template placeholder replacement, empty placeholders leave behind
@@ -1184,7 +1265,8 @@ description: "A test agent for directory output"
#[tokio::test]
async fn read_existing_pipeline_version_returns_none_for_missing_file() {
- let version = read_existing_pipeline_version(Path::new("/tmp/does-not-exist.lock.yml")).await;
+ let version =
+ read_existing_pipeline_version(Path::new("/tmp/does-not-exist.lock.yml")).await;
assert!(version.is_none(), "expected None for a non-existent file");
}
@@ -1200,7 +1282,10 @@ description: "A test agent for directory output"
));
std::fs::write(&temp, "# plain yaml\nname: foo\n").unwrap();
let version = read_existing_pipeline_version(&temp).await;
- assert!(version.is_none(), "expected None when file has no @ado-aw header");
+ assert!(
+ version.is_none(),
+ "expected None when file has no @ado-aw header"
+ );
let _ = std::fs::remove_file(&temp);
}
diff --git a/src/compile/source_path_guard.rs b/src/compile/source_path_guard.rs
new file mode 100644
index 00000000..252f73cf
--- /dev/null
+++ b/src/compile/source_path_guard.rs
@@ -0,0 +1,309 @@
+//! Validation for caller-supplied workflow source paths.
+//!
+//! Two entry points feed an untrusted string to
+//! [`crate::compile::build_pipeline_ir`]:
+//!
+//! 1. `audit::pipeline_graph` — `aw_info.json::source` from an
+//! audited build's artifact payload (the build itself may have
+//! been prompt-injected).
+//! 2. `mcp_author` — `source_path` MCP tool parameters supplied by
+//! an IDE/Copilot Chat agent that may be processing untrusted
+//! content (PR descriptions, issue comments, fetched pages).
+//!
+//! Both sites need the same defence: refuse non-markdown paths,
+//! refuse parent-directory traversal, refuse `~`-prefixed
+//! shell-style expansion, refuse `.md` symlinks that resolve to
+//! non-`.md` targets. This module centralises the guard so the two
+//! call sites cannot drift apart.
+//!
+//! See the function-level doc on [`validate_workflow_source_path`]
+//! for the full security contract.
+//!
+//! **Do not weaken any of the listed guards** without simultaneously
+//! adding a stronger containment check (e.g. canonicalize +
+//! prefix-against-cwd). Every existing audit and MCP entry point
+//! relies on this function as the primary gate against arbitrary
+//! file reads.
+
+use std::path::{Component, Path, PathBuf};
+
+use anyhow::Result;
+
+/// Outcome of validating a caller-supplied workflow source path.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct ValidatedSourcePath {
+ /// The validated path. Absolute paths are returned as-is (after
+ /// symlink target re-check); relative paths are returned joined
+ /// to the canonicalized current working directory.
+ pub path: PathBuf,
+ /// The trimmed + separator-normalised form of the original
+ /// input string. Suitable for embedding in user-facing error
+ /// messages without leaking trailing whitespace.
+ pub normalized: String,
+}
+
+/// Validate a caller-supplied workflow source path string.
+///
+/// **Security**: the input is untrusted. Mitigations applied (in
+/// order):
+///
+/// 1. Trim whitespace and normalise platform path separators —
+/// `\\` → `/` on Unix, `/` → `\\` on Windows. This step prevents
+/// a Linux caller from smuggling `..\\workflow.md` past the
+/// `ParentDir` check, since `PathBuf::from` would otherwise
+/// treat the whole string as one `Normal` component on Unix.
+/// 2. Require a `.md` extension — the only valid agentic workflow
+/// source extension. Closes the arbitrary-file-read vector
+/// against keys, `/etc/passwd`, etc.
+/// 3. For absolute paths, canonicalise and **re-check the
+/// extension on the resolved target** so a `foo.md → /etc/passwd`
+/// symlink does not satisfy the lexical check. Canonicalisation
+/// failures are tolerated (file may not exist locally — the
+/// caller upstream surfaces a clean read error in that case).
+/// 4. For relative paths, reject `..` components and a leading
+/// `~` (no directory traversal, no shell-style expansion), then
+/// join to the canonicalised current working directory. **Apply
+/// the same symlink-target extension re-check** as for absolute
+/// paths so a `workflows/evil.md` link to `/etc/passwd` cannot
+/// sneak through after a downstream `canonicalize` resolves it.
+pub async fn validate_workflow_source_path(source: &str) -> Result {
+ let normalized = normalize_separators(source.trim());
+ let path = PathBuf::from(&normalized);
+
+ if !has_md_extension(&path) {
+ anyhow::bail!(
+ "refusing source path '{normalized}': only `.md` files are valid agentic workflow sources"
+ );
+ }
+
+ // ParentDir + tilde checks apply to *both* absolute and relative
+ // inputs. The old layout gated them behind the relative branch,
+ // which let an adversarial absolute path like
+ // `/workspace/../../home/runner/.env.md` through unchecked even
+ // though the module's stated contract refuses parent-directory
+ // traversal. Run the check before splitting on `is_absolute()`.
+ if path
+ .components()
+ .any(|component| matches!(component, Component::ParentDir))
+ || normalized.starts_with('~')
+ {
+ anyhow::bail!(
+ "refusing source path '{normalized}': parent-directory components and `~` are not permitted"
+ );
+ }
+
+ if path.is_absolute() {
+ if let Ok(canonical) = tokio::fs::canonicalize(&path).await
+ && !has_md_extension(&canonical)
+ {
+ anyhow::bail!(
+ "refusing source path '{normalized}': symlink resolves to non-`.md` target '{}'",
+ canonical.display()
+ );
+ }
+ return Ok(ValidatedSourcePath {
+ path,
+ normalized,
+ });
+ }
+
+ let cwd = tokio::fs::canonicalize(".")
+ .await
+ .map_err(|err| anyhow::anyhow!("could not resolve current directory: {err}"))?;
+ let joined = cwd.join(&path);
+
+ // Mirror the absolute-path symlink check so a relative input like
+ // `workflows/evil.md` whose joined form symlinks to `/etc/passwd`
+ // is also rejected. Without this guard the lexical `.md` check on
+ // the link name passes and the eventual `canonicalize` inside
+ // `populate_pipeline_graph` resolves to the arbitrary target,
+ // narrowing the contract documented at module level.
+ if let Ok(canonical) = tokio::fs::canonicalize(&joined).await
+ && !has_md_extension(&canonical)
+ {
+ anyhow::bail!(
+ "refusing source path '{normalized}': symlink resolves to non-`.md` target '{}'",
+ canonical.display()
+ );
+ }
+
+ Ok(ValidatedSourcePath {
+ path: joined,
+ normalized,
+ })
+}
+
+/// Returns `true` when `path` carries a `.md` (case-insensitive)
+/// extension.
+fn has_md_extension(path: &Path) -> bool {
+ path.extension()
+ .and_then(|ext| ext.to_str())
+ .is_some_and(|ext| ext.eq_ignore_ascii_case("md"))
+}
+
+/// Normalise path separators so the platform-native `PathBuf`
+/// machinery treats `..` and similar components consistently.
+///
+/// `PathBuf::from("..\\foo.md")` on Unix produces a single
+/// `Normal("..\\foo.md")` component, which would otherwise sneak
+/// past the `ParentDir` check below. Mirrors the helper that used
+/// to live inside `audit::pipeline_graph::normalize_source_path`.
+fn normalize_separators(source: &str) -> String {
+ if std::path::MAIN_SEPARATOR == '/' {
+ source.replace('\\', "/")
+ } else {
+ source.replace('/', "\\")
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[tokio::test]
+ async fn rejects_non_markdown_extension() {
+ let err = validate_workflow_source_path("/etc/passwd")
+ .await
+ .expect_err("non-md path must be rejected");
+ assert!(
+ format!("{err}").contains("only `.md`"),
+ "expected non-md rejection message, got: {err}"
+ );
+ }
+
+ #[tokio::test]
+ async fn rejects_parent_traversal_with_unix_separators() {
+ let err = validate_workflow_source_path("../../../etc/passwd.md")
+ .await
+ .expect_err("`..` must be rejected");
+ assert!(
+ format!("{err}").contains("parent-directory components"),
+ "expected parent-dir rejection message, got: {err}"
+ );
+ }
+
+ #[tokio::test]
+ async fn rejects_parent_traversal_with_backslash_separators() {
+ // Regression for the linux-side `..\\workflow.md` bypass: on
+ // Unix, `PathBuf::from("..\\workflow.md")` produces a single
+ // Normal component without the separator normalisation, so
+ // the `ParentDir` check would never fire.
+ let err = validate_workflow_source_path("..\\..\\workflow.md")
+ .await
+ .expect_err("backslash-encoded `..` must be rejected");
+ assert!(
+ format!("{err}").contains("parent-directory components"),
+ "expected parent-dir rejection message, got: {err}"
+ );
+ }
+
+ #[tokio::test]
+ async fn rejects_absolute_path_with_parent_dir_component() {
+ // Regression: the ParentDir check was previously inside the
+ // relative-path branch only, so an adversarial absolute path
+ // like `/workspace/../../home/runner/.env.md` slipped through
+ // even though the module documents `..` rejection without
+ // qualifying by absolute/relative.
+ let path = if cfg!(windows) {
+ r"C:\workspace\..\..\home\runner\.env.md"
+ } else {
+ "/workspace/../../home/runner/.env.md"
+ };
+ let err = validate_workflow_source_path(path)
+ .await
+ .expect_err("absolute path with `..` must be rejected");
+ assert!(
+ format!("{err}").contains("parent-directory components"),
+ "expected parent-dir rejection message, got: {err}"
+ );
+ }
+
+ #[tokio::test]
+ async fn rejects_tilde_prefix() {
+ let err = validate_workflow_source_path("~/secret.md")
+ .await
+ .expect_err("tilde prefix must be rejected");
+ assert!(
+ format!("{err}").contains("parent-directory components")
+ || format!("{err}").contains("`~`"),
+ "expected tilde rejection message, got: {err}"
+ );
+ }
+
+ #[tokio::test]
+ async fn accepts_legitimate_relative_md() {
+ let result = validate_workflow_source_path("workflows/foo.md")
+ .await
+ .expect("plain relative .md path must be accepted");
+ assert!(result.path.is_absolute());
+ assert!(result.normalized.ends_with("foo.md"));
+ }
+
+ #[tokio::test]
+ async fn accepts_absolute_markdown_path() {
+ let path = if cfg!(windows) {
+ r"C:\workflows\foo.md"
+ } else {
+ "/repo/workflows/foo.md"
+ };
+ let result = validate_workflow_source_path(path)
+ .await
+ .expect("absolute `.md` paths must be accepted");
+ assert!(result.path.is_absolute());
+ }
+
+ #[cfg(unix)]
+ #[tokio::test]
+ async fn rejects_md_symlink_to_non_md_target() {
+ let temp_dir = tempfile::tempdir().expect("tempdir");
+ let target = temp_dir.path().join("binary.bin");
+ tokio::fs::write(&target, b"x").await.unwrap();
+ let link = temp_dir.path().join("evil.md");
+ tokio::fs::symlink(&target, &link).await.unwrap();
+
+ let err = validate_workflow_source_path(link.to_str().unwrap())
+ .await
+ .expect_err("symlink to non-md target must be rejected");
+ assert!(
+ format!("{err}").contains("symlink resolves to non-`.md` target"),
+ "expected symlink rejection message, got: {err}"
+ );
+ }
+
+ #[cfg(unix)]
+ #[tokio::test]
+ async fn rejects_relative_md_symlink_to_non_md_target() {
+ // Regression: the symlink target re-check originally only
+ // fired for absolute input paths, so a relative
+ // `workflows/evil.md` symlink pointing at /etc/passwd slipped
+ // past the lexical `.md` check and the eventual
+ // `canonicalize` inside `populate_pipeline_graph` would read
+ // the target. The guard now fires on the relative branch
+ // too. Switch the process cwd to a tempdir so the relative
+ // join lands on our symlink.
+ let temp_dir = tempfile::tempdir().expect("tempdir");
+ let target = temp_dir.path().join("binary.bin");
+ tokio::fs::write(&target, b"x").await.unwrap();
+ let workflows = temp_dir.path().join("workflows");
+ tokio::fs::create_dir_all(&workflows).await.unwrap();
+ let link = workflows.join("evil.md");
+ tokio::fs::symlink(&target, &link).await.unwrap();
+
+ // `std::env::set_current_dir` is process-global; serialise
+ // via a static mutex so concurrent tests do not stomp.
+ static CWD_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
+ let _guard = CWD_LOCK.lock().unwrap();
+ let original_cwd = std::env::current_dir().expect("save cwd");
+ std::env::set_current_dir(temp_dir.path()).expect("enter tempdir");
+
+ let result = validate_workflow_source_path("workflows/evil.md").await;
+
+ std::env::set_current_dir(&original_cwd).expect("restore cwd");
+
+ let err = result.expect_err("relative symlink to non-md target must be rejected");
+ assert!(
+ format!("{err}").contains("symlink resolves to non-`.md` target"),
+ "expected symlink rejection message, got: {err}"
+ );
+ }
+}
diff --git a/src/inspect/catalog.rs b/src/inspect/catalog.rs
new file mode 100644
index 00000000..19d5462f
--- /dev/null
+++ b/src/inspect/catalog.rs
@@ -0,0 +1,322 @@
+//! In-tree registry catalog for CLI consumers.
+
+use std::error::Error;
+use std::fmt;
+
+use serde::Serialize;
+
+use crate::engine::DEFAULT_COPILOT_MODEL;
+use crate::safeoutputs::{ALL_KNOWN_SAFE_OUTPUTS, ALWAYS_ON_TOOLS, DEBUG_ONLY_TOOLS};
+
+#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
+pub struct SafeOutputCatalogEntry {
+ pub name: String,
+ pub classification: String,
+ pub description: String,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
+pub struct RuntimeCatalogEntry {
+ pub id: String,
+ pub default_version: Option,
+ pub description: String,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
+pub struct ToolCatalogEntry {
+ pub id: String,
+ pub description: String,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize)]
+pub struct Catalog {
+ #[serde(skip_serializing_if = "Vec::is_empty")]
+ pub safe_outputs: Vec,
+ #[serde(skip_serializing_if = "Vec::is_empty")]
+ pub runtimes: Vec,
+ #[serde(skip_serializing_if = "Vec::is_empty")]
+ pub tools: Vec,
+ #[serde(skip_serializing_if = "Vec::is_empty")]
+ pub engines: Vec,
+ #[serde(skip_serializing_if = "Vec::is_empty")]
+ pub models: Vec,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct UnknownCatalogKind {
+ pub kind: String,
+}
+
+impl fmt::Display for UnknownCatalogKind {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(
+ f,
+ "unknown --kind '{}' (expected one of: safe-outputs, runtimes, tools, engines, models)",
+ self.kind
+ )
+ }
+}
+
+impl Error for UnknownCatalogKind {}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum CatalogKind {
+ SafeOutputs,
+ Runtimes,
+ Tools,
+ Engines,
+ Models,
+}
+
+impl CatalogKind {
+ pub fn parse(kind: &str) -> Result {
+ match kind {
+ "safe-outputs" => Ok(Self::SafeOutputs),
+ "runtimes" => Ok(Self::Runtimes),
+ "tools" => Ok(Self::Tools),
+ "engines" => Ok(Self::Engines),
+ "models" => Ok(Self::Models),
+ other => Err(UnknownCatalogKind {
+ kind: other.to_string(),
+ }),
+ }
+ }
+}
+
+pub fn catalog() -> Catalog {
+ Catalog {
+ safe_outputs: safe_outputs(),
+ runtimes: runtimes(),
+ tools: tools(),
+ engines: engines(),
+ models: models(),
+ }
+}
+
+pub fn catalog_kind(kind: &str) -> Result {
+ let kind = CatalogKind::parse(kind)?;
+ Ok(match kind {
+ CatalogKind::SafeOutputs => Catalog {
+ safe_outputs: safe_outputs(),
+ ..Catalog::default()
+ },
+ CatalogKind::Runtimes => Catalog {
+ runtimes: runtimes(),
+ ..Catalog::default()
+ },
+ CatalogKind::Tools => Catalog {
+ tools: tools(),
+ ..Catalog::default()
+ },
+ CatalogKind::Engines => Catalog {
+ engines: engines(),
+ ..Catalog::default()
+ },
+ CatalogKind::Models => Catalog {
+ models: models(),
+ ..Catalog::default()
+ },
+ })
+}
+
+pub fn render_text(catalog: &Catalog) -> String {
+ let mut out = String::new();
+ if !catalog.safe_outputs.is_empty() {
+ out.push_str("Safe outputs\n");
+ for item in &catalog.safe_outputs {
+ out.push_str(&format!(
+ " {} [{}] - {}\n",
+ item.name, item.classification, item.description
+ ));
+ }
+ out.push('\n');
+ }
+ if !catalog.runtimes.is_empty() {
+ out.push_str("Runtimes\n");
+ for item in &catalog.runtimes {
+ let version = item.default_version.as_deref().unwrap_or("none");
+ out.push_str(&format!(
+ " {} [default: {}] - {}\n",
+ item.id, version, item.description
+ ));
+ }
+ out.push('\n');
+ }
+ if !catalog.tools.is_empty() {
+ out.push_str("Tools\n");
+ for item in &catalog.tools {
+ out.push_str(&format!(" {} - {}\n", item.id, item.description));
+ }
+ out.push('\n');
+ }
+ if !catalog.engines.is_empty() {
+ out.push_str("Engines\n");
+ for engine in &catalog.engines {
+ out.push_str(&format!(" {engine}\n"));
+ }
+ out.push('\n');
+ }
+ if !catalog.models.is_empty() {
+ out.push_str("Models\n");
+ for model in &catalog.models {
+ out.push_str(&format!(" {model}\n"));
+ }
+ }
+ out.trim_end().to_string()
+}
+
+fn safe_outputs() -> Vec {
+ ALL_KNOWN_SAFE_OUTPUTS
+ .iter()
+ .chain(DEBUG_ONLY_TOOLS.iter())
+ .copied()
+ .collect::>()
+ .into_iter()
+ .map(|name| SafeOutputCatalogEntry {
+ name: name.to_string(),
+ classification: safe_output_classification(name).to_string(),
+ description: safe_output_description(name).to_string(),
+ })
+ .collect()
+}
+
+fn safe_output_classification(name: &str) -> &'static str {
+ if DEBUG_ONLY_TOOLS.contains(&name) {
+ "debug-only"
+ } else if ALWAYS_ON_TOOLS.contains(&name) {
+ "always-on"
+ } else {
+ "opt-in"
+ }
+}
+
+fn safe_output_description(name: &str) -> &'static str {
+ match name {
+ "add-build-tag" => "Parameters for adding a tag to an Azure DevOps build",
+ "add-pr-comment" => "Parameters for adding a comment thread on a pull request",
+ "comment-on-work-item" => "Parameters for commenting on a work item",
+ "create-branch" => "Parameters for creating a branch",
+ "create-git-tag" => "Parameters for creating a git tag (agent-provided)",
+ "create-issue" => "Files a GitHub issue against an operator-configured target repository.",
+ "create-pull-request" => "Parameters for creating a pull request",
+ "create-wiki-page" => "Parameters for creating a wiki page (agent-provided)",
+ "create-work-item" => "Parameters for creating a work item",
+ "link-work-items" => "Parameters for linking two work items",
+ "missing-data" => "Parameters for reporting missing data",
+ "missing-tool" => "Parameters for reporting a missing tool",
+ "noop" => "Parameters for describing a no operation. Use this if there is no work to do.",
+ "queue-build" => "Parameters for queuing a build",
+ "reply-to-pr-comment" => {
+ "Parameters for replying to an existing review comment thread on a pull request"
+ }
+ "report-incomplete" => "Parameters for reporting that a task could not be completed",
+ "resolve-pr-thread" => "Parameters for resolving or reactivating a PR review thread",
+ "submit-pr-review" => "Parameters for submitting a pull request review",
+ "update-pr" => "Parameters for updating a pull request",
+ "update-wiki-page" => "Parameters for editing a wiki page (agent-provided)",
+ "update-work-item" => "Parameters for updating a work item",
+ "upload-build-attachment" => "Parameters for attaching a workspace file to an ADO build.",
+ "upload-pipeline-artifact" => {
+ "Parameters for publishing a workspace file as an ADO pipeline artifact."
+ }
+ "upload-workitem-attachment" => "Parameters for uploading an attachment to a work item",
+ _ => "(no description)",
+ }
+}
+
+fn runtimes() -> Vec {
+ vec![
+ RuntimeCatalogEntry {
+ id: "lean".to_string(),
+ default_version: Some("stable".to_string()),
+ description: "Lean 4 runtime support for the ado-aw compiler.".to_string(),
+ },
+ RuntimeCatalogEntry {
+ id: "python".to_string(),
+ default_version: Some("3.x".to_string()),
+ description: "Python runtime support for the ado-aw compiler.".to_string(),
+ },
+ RuntimeCatalogEntry {
+ id: "node".to_string(),
+ default_version: Some("22.x".to_string()),
+ description: "Node.js runtime support for the ado-aw compiler.".to_string(),
+ },
+ RuntimeCatalogEntry {
+ id: "dotnet".to_string(),
+ default_version: Some("8.0.x".to_string()),
+ description: ".NET runtime support for the ado-aw compiler.".to_string(),
+ },
+ ]
+}
+
+fn tools() -> Vec {
+ vec![
+ ToolCatalogEntry {
+ id: "bash".to_string(),
+ description: "Bash command access configured via tools.bash; omitted means unrestricted bash access.".to_string(),
+ },
+ ToolCatalogEntry {
+ id: "edit".to_string(),
+ description: "File writing configured via tools.edit; enabled by default.".to_string(),
+ },
+ ToolCatalogEntry {
+ id: "azure-devops".to_string(),
+ description: "Azure DevOps first-class tool.".to_string(),
+ },
+ ToolCatalogEntry {
+ id: "cache-memory".to_string(),
+ description: "Cache memory first-class tool.".to_string(),
+ },
+ ]
+}
+
+fn engines() -> Vec {
+ // TODO: Switch to an enum-driven Engine::all_ids() API when engine.rs exposes one.
+ vec!["copilot".to_string()]
+}
+
+fn models() -> Vec {
+ // No KNOWN_MODELS registry exists yet; keep this list aligned with
+ // prompts/create-ado-agentic-workflow.md step 2.
+ vec![
+ DEFAULT_COPILOT_MODEL.to_string(),
+ "claude-sonnet-4.5".to_string(),
+ ]
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn catalog_inspect_returns_non_empty_lists_for_every_category() {
+ let catalog = catalog();
+ assert!(!catalog.safe_outputs.is_empty());
+ assert!(!catalog.runtimes.is_empty());
+ assert!(!catalog.tools.is_empty());
+ assert!(!catalog.engines.is_empty());
+ assert!(!catalog.models.is_empty());
+ }
+
+ #[test]
+ fn safe_outputs_inspect_catalog_kind_includes_always_on_tools() {
+ let catalog = catalog_kind("safe-outputs").unwrap();
+ let names: Vec<&str> = catalog
+ .safe_outputs
+ .iter()
+ .map(|e| e.name.as_str())
+ .collect();
+ for always_on in ALWAYS_ON_TOOLS {
+ assert!(
+ names.contains(always_on),
+ "safe-outputs catalog missing always-on tool {always_on}"
+ );
+ }
+ }
+
+ #[test]
+ fn unknown_inspect_catalog_kind_returns_typed_error() {
+ let err = catalog_kind("widgets").unwrap_err();
+ assert_eq!(err.kind, "widgets");
+ }
+}
diff --git a/src/inspect/cli.rs b/src/inspect/cli.rs
new file mode 100644
index 00000000..e330eeda
--- /dev/null
+++ b/src/inspect/cli.rs
@@ -0,0 +1,381 @@
+//! CLI dispatchers for the `inspect` family of subcommands.
+//!
+//! Each `dispatch_*` is the single entry point invoked from
+//! `src/main.rs`. Public option structs are by-reference / `Copy`
+//! where convenient so call sites stay terse.
+
+use std::path::Path;
+
+use anyhow::{Context, Result};
+
+use crate::audit::model::AuditData;
+use crate::compile::{
+ build_pipeline_ir,
+ ir::summary::{GraphSummary, PipelineSummary},
+};
+
+use super::{catalog, graph_deps, graph_outputs, graph_query, lint, trace, whatif};
+
+/// Options for `ado-aw inspect `.
+#[derive(Debug)]
+pub struct InspectOptions<'a> {
+ /// Path to the agent `.md` to inspect.
+ pub source: &'a Path,
+ /// Emit machine-readable JSON to stdout when `true`; otherwise
+ /// render a terse human summary.
+ pub json: bool,
+}
+
+/// Emit the public [`PipelineSummary`] for an agent source file.
+///
+/// In text mode prints a compact, scannable summary suitable for
+/// terminals (counts + a few cross-cutting facts). In JSON mode
+/// writes the full summary to stdout.
+pub async fn dispatch_inspect(opts: InspectOptions<'_>) -> Result<()> {
+ let summary = build_inspect(opts.source).await?;
+
+ if opts.json {
+ let json = serde_json::to_string_pretty(&summary)?;
+ println!("{}", json);
+ } else {
+ print_text_inspect(&summary);
+ }
+ Ok(())
+}
+
+/// Build the public [`PipelineSummary`] for an agent source file.
+pub async fn build_inspect(source: &Path) -> Result {
+ let (_fm, pipeline) = build_pipeline_ir(source)
+ .await
+ .with_context(|| format!("Failed to build IR for {}", source.display()))?;
+ PipelineSummary::from_pipeline(&pipeline)
+}
+
+/// Output format selector for `ado-aw graph`.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
+#[clap(rename_all = "lower")]
+pub enum GraphFormat {
+ Text,
+ Json,
+ Dot,
+}
+
+/// Options for `ado-aw graph `.
+#[derive(Debug)]
+pub struct GraphOptions<'a> {
+ pub source: &'a Path,
+ pub format: GraphFormat,
+}
+
+/// Dump the resolved dependency graph for `source` in the selected
+/// format. Delegates the rendering to [`graph_query`].
+pub async fn dispatch_graph(opts: GraphOptions<'_>) -> Result<()> {
+ let output = build_graph_dump(opts.source, opts.format).await?;
+ println!("{}", output);
+ Ok(())
+}
+
+/// Build the resolved dependency graph summary for an agent source file.
+pub async fn build_graph_summary(source: &Path) -> Result {
+ Ok(build_inspect(source).await?.graph)
+}
+
+/// Render the resolved dependency graph for an agent source file.
+pub async fn build_graph_dump(source: &Path, format: GraphFormat) -> Result {
+ let summary = build_inspect(source).await?;
+ match format {
+ GraphFormat::Text => Ok(graph_query::render_text(&summary)),
+ GraphFormat::Json => serde_json::to_string_pretty(&summary.graph).map_err(Into::into),
+ GraphFormat::Dot => Ok(graph_query::render_dot(&summary)),
+ }
+}
+
+/// Options for `ado-aw graph deps `.
+#[derive(Debug)]
+pub struct GraphDepsOptions<'a> {
+ /// Path to the agent markdown source.
+ pub source: &'a Path,
+ /// Step id to traverse from.
+ pub step: &'a str,
+ /// Traversal direction.
+ pub direction: graph_deps::GraphDepsDirection,
+ /// Emit machine-readable JSON instead of text.
+ pub json: bool,
+}
+
+/// Traverse graph dependencies for one named step.
+pub async fn dispatch_graph_deps(opts: GraphDepsOptions<'_>) -> Result<()> {
+ let report = build_graph_deps(opts.source, opts.step, opts.direction).await?;
+
+ if opts.json {
+ let json = serde_json::to_string_pretty(&report)?;
+ println!("{}", json);
+ } else {
+ println!("{}", graph_deps::render_text(&report));
+ }
+ Ok(())
+}
+
+/// Build a dependency traversal report for one named step.
+pub async fn build_graph_deps(
+ source: &Path,
+ step: &str,
+ direction: graph_deps::GraphDepsDirection,
+) -> Result {
+ let summary = build_inspect(source).await?;
+ graph_deps::analyze(&summary, step, direction)
+}
+
+/// Options for `ado-aw graph outputs `.
+#[derive(Debug)]
+pub struct GraphOutputsOptions<'a> {
+ /// Path to the agent markdown source.
+ pub source: &'a Path,
+ /// Optional producer step id filter.
+ pub producer: Option<&'a str>,
+ /// Optional consumer step id filter.
+ pub consumer: Option<&'a str>,
+ /// Emit machine-readable JSON instead of text.
+ pub json: bool,
+}
+
+/// Print the declared output ↔ consumer reference table.
+pub async fn dispatch_graph_outputs(opts: GraphOutputsOptions<'_>) -> Result<()> {
+ let edges = build_graph_outputs(opts.source, opts.producer, opts.consumer).await?;
+
+ if opts.json {
+ let json = serde_json::to_string_pretty(&edges)?;
+ println!("{}", json);
+ } else {
+ println!("{}", graph_outputs::render_text(&edges));
+ }
+ Ok(())
+}
+
+/// Build the declared-output table, optionally filtered by producer/consumer.
+pub async fn build_graph_outputs(
+ source: &Path,
+ producer: Option<&str>,
+ consumer: Option<&str>,
+) -> Result> {
+ let summary = build_inspect(source).await?;
+ Ok(graph_outputs::output_edges(&summary, producer, consumer))
+}
+
+/// Options for `ado-aw trace `.
+#[derive(Debug)]
+pub struct TraceOptions<'a> {
+ pub build_id_or_url: &'a str,
+ pub step: Option<&'a str>,
+ pub json: bool,
+ pub org: Option<&'a str>,
+ pub project: Option<&'a str>,
+ pub pat: Option<&'a str>,
+ /// Cache root for downloaded build artifacts. When `None`,
+ /// [`build_trace`] anchors writes under
+ /// [`crate::audit::default_cache_root`]
+ /// (`${TEMP}/ado-aw/audit`) so CLI invocations, the mcp-author
+ /// `trace_failure` tool, and `ado-aw audit` all share a single
+ /// cache root — preventing `./logs/` directories from being
+ /// scattered under arbitrary IDE working directories.
+ pub output: Option<&'a Path>,
+}
+
+/// Trace a build by joining audit telemetry with the local typed-IR graph.
+pub async fn dispatch_trace(opts: TraceOptions<'_>) -> Result<()> {
+ let (audit, report) = build_trace(&opts).await?;
+
+ if audit.pipeline_graph.is_none() {
+ eprintln!("warning: source markdown was not available locally; trace is runtime-only");
+ }
+
+ if opts.step.is_some() && report.step.is_none() {
+ eprintln!("warning: requested step was not found in the local IR graph");
+ }
+
+ if opts.json {
+ println!("{}", serde_json::to_string_pretty(&report)?);
+ } else {
+ print!("{}", trace::render_text(&audit, &report, opts.step));
+ }
+ Ok(())
+}
+
+/// Build trace audit data and the derived trace report.
+pub async fn build_trace(opts: &TraceOptions<'_>) -> Result<(AuditData, trace::TraceReport)> {
+ // Default to the canonical audit cache root shared with every
+ // other entry point (CLI `audit`, mcp-author `audit_build` /
+ // `trace_failure`). Callers may pass `opts.output = Some(&Path)`
+ // to override (e.g. for tests).
+ let default_output = crate::audit::default_cache_root();
+ let output = opts.output.unwrap_or(default_output.as_path());
+ let audit = crate::audit::fetch_audit_data(crate::audit::AuditOptions {
+ build_id_or_url: opts.build_id_or_url,
+ output,
+ json: true,
+ org: opts.org,
+ project: opts.project,
+ pat: opts.pat,
+ artifacts: None,
+ no_cache: false,
+ })
+ .await?;
+ let report = trace::build_trace_report(&audit, opts.step);
+ Ok((audit, report))
+}
+
+/// Options for `ado-aw whatif --fail `.
+#[derive(Debug)]
+pub struct WhatIfOptions<'a> {
+ /// Path to the agent markdown source.
+ pub source: &'a Path,
+ /// Step id or job id that should be treated as failing.
+ pub fail: &'a str,
+ /// Emit machine-readable JSON instead of text.
+ pub json: bool,
+}
+
+/// Classify downstream jobs that would skip if a step or job failed.
+pub async fn dispatch_whatif(opts: WhatIfOptions<'_>) -> Result<()> {
+ let report = build_whatif(opts.source, opts.fail).await?;
+
+ if opts.json {
+ let json = serde_json::to_string_pretty(&report)?;
+ println!("{}", json);
+ } else {
+ println!("{}", whatif::render_text(&report));
+ }
+ Ok(())
+}
+
+/// Build a static reachability report for a failing step/job id.
+pub async fn build_whatif(source: &Path, fail: &str) -> Result {
+ let summary = build_inspect(source).await?;
+ whatif::analyze(&summary, fail)
+}
+
+/// Options for `ado-aw lint `.
+#[derive(Debug)]
+pub struct LintOptions<'a> {
+ pub source: &'a Path,
+ pub json: bool,
+}
+
+/// Run structural lint checks over an agent source file.
+///
+/// Returns `true` when at least one error-severity finding was emitted so the
+/// CLI can translate that into exit code 1 without treating warnings/infos as
+/// hard failures.
+pub async fn dispatch_lint(opts: LintOptions<'_>) -> Result {
+ let report = build_lint(opts.source).await?;
+ let had_errors = report.summary.errors > 0;
+
+ if opts.json {
+ let json = serde_json::to_string_pretty(&report)?;
+ println!("{}", json);
+ } else {
+ println!("{}", lint::render_text(&report));
+ }
+
+ Ok(had_errors)
+}
+
+/// Build the structural lint report for an agent source file.
+pub async fn build_lint(source: &Path) -> Result {
+ let summary = build_inspect(source).await?;
+ Ok(lint::report(&summary))
+}
+
+/// Options for `ado-aw catalog`.
+#[derive(Debug)]
+pub struct CatalogOptions<'a> {
+ pub kind: Option<&'a str>,
+ pub json: bool,
+}
+
+/// Emit the in-tree registry catalog.
+pub fn dispatch_catalog(opts: CatalogOptions<'_>) -> Result<()> {
+ let catalog = build_catalog(opts.kind)?;
+
+ if opts.json {
+ let json = serde_json::to_string_pretty(&catalog)?;
+ println!("{}", json);
+ } else {
+ println!("{}", catalog::render_text(&catalog));
+ }
+ Ok(())
+}
+
+/// Build the in-tree registry catalog, optionally filtered by kind.
+pub fn build_catalog(kind: Option<&str>) -> Result {
+ Ok(match kind {
+ Some(kind) => catalog::catalog_kind(kind)?,
+ None => catalog::catalog(),
+ })
+}
+
+fn print_text_inspect(s: &PipelineSummary) {
+ use crate::compile::ir::summary::PipelineBodySummary;
+
+ println!("Pipeline: {}", s.name);
+ println!("Target shape: {}", s.shape);
+ println!("Schema version: {}", s.schema_version);
+ println!();
+ match &s.body {
+ PipelineBodySummary::Jobs { jobs } => {
+ println!("Jobs ({}):", jobs.len());
+ for j in jobs {
+ print_job_summary_line(j);
+ }
+ }
+ PipelineBodySummary::Stages { stages } => {
+ println!("Stages ({}):", stages.len());
+ for st in stages {
+ let dep = format_depends(&st.depends_on);
+ println!("- {} ({}){}", st.id, st.display_name, dep);
+ for j in &st.jobs {
+ print!(" ");
+ print_job_summary_line(j);
+ }
+ }
+ }
+ }
+ println!();
+ println!("Graph:");
+ println!(
+ " step locations: {}",
+ s.graph.step_locations.len()
+ );
+ println!(" derived job edges: {}", s.graph.job_edges.len());
+ println!(" derived stage edges: {}", s.graph.stage_edges.len());
+ let need_io: usize = s
+ .graph
+ .outputs_needing_is_output
+ .iter()
+ .map(|e| e.outputs.len())
+ .sum();
+ println!(" outputs needing isOutput: {}", need_io);
+}
+
+fn print_job_summary_line(j: &crate::compile::ir::summary::JobSummary) {
+ let dep = format_depends(&j.depends_on);
+ let stage = j
+ .stage
+ .as_deref()
+ .map(|s| format!(" [{}]", s))
+ .unwrap_or_default();
+ let step_count = j.steps.len();
+ let id_step_count: usize = j.steps.iter().filter(|s| s.id.is_some()).count();
+ println!(
+ "- {}{} steps: {} ({} named){}",
+ j.id, stage, step_count, id_step_count, dep
+ );
+}
+
+fn format_depends(deps: &[String]) -> String {
+ if deps.is_empty() {
+ String::new()
+ } else {
+ format!(" depends on: {}", deps.join(", "))
+ }
+}
diff --git a/src/inspect/graph_deps.rs b/src/inspect/graph_deps.rs
new file mode 100644
index 00000000..44444417
--- /dev/null
+++ b/src/inspect/graph_deps.rs
@@ -0,0 +1,701 @@
+//! Step-centric dependency traversal for `ado-aw graph deps`.
+//!
+//! The compiler's public [`PipelineSummary`] already contains the
+//! resolved job/stage dependency graph plus per-step output references.
+//! This module answers one focused question over that stable summary:
+//! what sits upstream or downstream of a single named step?
+
+use std::collections::{BTreeMap, BTreeSet, VecDeque};
+use std::error::Error;
+use std::fmt;
+
+use anyhow::{Result, anyhow};
+use serde::Serialize;
+
+use crate::compile::ir::summary::{
+ EdgeEntry, JobSummary, PipelineBodySummary, PipelineSummary, StepLocationEntry, StepSummary,
+};
+
+/// Traversal direction for `ado-aw graph deps`.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
+#[clap(rename_all = "lower")]
+pub enum GraphDepsDirection {
+ /// Walk producer-side dependencies.
+ Upstream,
+ /// Walk consumer-side dependents.
+ Downstream,
+}
+
+impl GraphDepsDirection {
+ /// Stable JSON/text label for the direction.
+ pub fn as_str(self) -> &'static str {
+ match self {
+ Self::Upstream => "upstream",
+ Self::Downstream => "downstream",
+ }
+ }
+}
+
+/// A transitive job reached by the query.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
+pub struct JobDependency {
+ /// Job id.
+ pub job: String,
+ /// Containing stage id for staged pipelines.
+ pub stage: Option,
+}
+
+/// A transitive step reached by following output references.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
+pub struct StepDependency {
+ /// Step id, or a stable anonymous label for steps without `id`.
+ pub step: String,
+ /// Containing job id.
+ pub job: String,
+ /// Containing stage id for staged pipelines.
+ pub stage: Option,
+ /// Output edge that caused the step to be reached, when known.
+ pub via_output: Option,
+}
+
+/// JSON report emitted by `ado-aw graph deps --json`.
+#[derive(Debug, Clone, PartialEq, Serialize)]
+pub struct GraphDepsReport {
+ /// Traversal direction: `upstream` or `downstream`.
+ pub direction: String,
+ /// Input step id.
+ pub step: String,
+ /// Location of the input step in the pipeline.
+ pub step_location: StepLocationEntry,
+ /// Transitive jobs reached through job/stage dependencies.
+ pub transitive_jobs: Vec,
+ /// Transitive steps reached through output references.
+ pub transitive_steps: Vec,
+}
+
+/// Typed errors for graph dependency queries.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum GraphDepsError {
+ /// The requested step id is not present in `summary.graph.step_locations`.
+ StepNotFound {
+ /// Missing step id.
+ step: String,
+ /// Closest known step id, if one was available.
+ suggestion: Option,
+ },
+}
+
+impl fmt::Display for GraphDepsError {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match self {
+ Self::StepNotFound { step, suggestion } => {
+ write!(f, "graph deps: step '{step}' not found")?;
+ if let Some(s) = suggestion {
+ write!(f, " (closest match: '{s}')")?;
+ }
+ Ok(())
+ }
+ }
+ }
+}
+
+impl Error for GraphDepsError {}
+
+/// Analyze transitive dependencies for a single named step.
+///
+/// If `step` does not match a step id but does match a job id, the
+/// query falls back to job-level traversal. That keeps the command
+/// useful for canonical jobs such as `SafeOutputs` that may contain no
+/// named step with the same id.
+pub fn analyze(
+ summary: &PipelineSummary,
+ step: &str,
+ direction: GraphDepsDirection,
+) -> Result {
+ let step_loc = summary
+ .graph
+ .step_locations
+ .iter()
+ .find(|loc| loc.step == step)
+ .cloned();
+ let job_loc = step_loc
+ .is_none()
+ .then(|| find_job(summary, step))
+ .flatten();
+ let loc = if let Some(loc) = step_loc {
+ loc
+ } else if let Some(job) = job_loc {
+ StepLocationEntry {
+ step: step.to_string(),
+ stage: job.stage.clone(),
+ job: job.id.clone(),
+ outputs: Vec::new(),
+ }
+ } else {
+ return Err(anyhow!(GraphDepsError::StepNotFound {
+ step: step.to_string(),
+ suggestion: closest(
+ step,
+ known_step_or_job_ids(summary).iter().map(String::as_str)
+ ),
+ }));
+ };
+
+ let transitive_jobs = transitive_jobs(summary, &loc, direction);
+ let transitive_steps = if job_loc.is_some() {
+ transitive_steps_for_job(summary, &loc.job, direction)
+ } else {
+ transitive_steps(summary, step, direction)
+ };
+
+ Ok(GraphDepsReport {
+ direction: direction.as_str().to_string(),
+ step: step.to_string(),
+ step_location: loc,
+ transitive_jobs,
+ transitive_steps,
+ })
+}
+
+/// Render a dependency report as terminal-friendly text.
+pub fn render_text(report: &GraphDepsReport) -> String {
+ let mut out = String::new();
+ out.push_str(&format!(
+ "Graph dependencies for step '{}' ({})\n",
+ report.step, report.direction
+ ));
+ out.push_str("Step location\n");
+ out.push_str(&format!(
+ " {}\n",
+ qualified(
+ &report.step_location.stage,
+ &report.step_location.job,
+ &report.step_location.step
+ )
+ ));
+ out.push('\n');
+
+ out.push_str("Job-level edges\n");
+ if report.transitive_jobs.is_empty() {
+ out.push_str(" (none)\n");
+ } else {
+ for job in &report.transitive_jobs {
+ out.push_str(&format!(" - {}\n", qualified_job(&job.stage, &job.job)));
+ }
+ }
+ out.push('\n');
+
+ out.push_str("Step-level output edges\n");
+ if report.transitive_steps.is_empty() {
+ out.push_str(" (none)\n");
+ } else {
+ for step in &report.transitive_steps {
+ let via = step
+ .via_output
+ .as_deref()
+ .map(|v| format!(" via {v}"))
+ .unwrap_or_default();
+ out.push_str(&format!(
+ " - {}{}\n",
+ qualified(&step.stage, &step.job, &step.step),
+ via
+ ));
+ }
+ }
+ out
+}
+
+fn transitive_jobs(
+ summary: &PipelineSummary,
+ loc: &StepLocationEntry,
+ direction: GraphDepsDirection,
+) -> Vec {
+ let mut seen: BTreeSet<(Option, String)> = BTreeSet::new();
+
+ for job in reachable_edges(&summary.graph.job_edges, &loc.job, direction) {
+ seen.insert((stage_for_job(summary, &job), job));
+ }
+
+ if let Some(stage) = &loc.stage {
+ for reached_stage in reachable_edges(&summary.graph.stage_edges, stage, direction) {
+ for job in jobs_in_stage(summary, &reached_stage) {
+ seen.insert((Some(reached_stage.clone()), job));
+ }
+ }
+ }
+
+ seen.into_iter()
+ .map(|(stage, job)| JobDependency { job, stage })
+ .collect()
+}
+
+/// BFS-walk a directed edge list from `start`, returning every node
+/// reachable in the requested direction (transitive closure;
+/// `start` itself is not included unless cyclically reachable).
+///
+/// Shared with [`crate::inspect::whatif`] so the two failure
+/// reachability tools cannot drift apart on traversal semantics.
+pub fn reachable_edges(
+ edges: &[EdgeEntry],
+ start: &str,
+ direction: GraphDepsDirection,
+) -> BTreeSet {
+ let mut adjacency: BTreeMap> = BTreeMap::new();
+ for e in edges {
+ match direction {
+ GraphDepsDirection::Upstream => {
+ adjacency
+ .entry(e.consumer.clone())
+ .or_default()
+ .insert(e.producer.clone());
+ }
+ GraphDepsDirection::Downstream => {
+ adjacency
+ .entry(e.producer.clone())
+ .or_default()
+ .insert(e.consumer.clone());
+ }
+ }
+ }
+ let mut seen = BTreeSet::new();
+ let mut queue: VecDeque = adjacency
+ .get(start)
+ .into_iter()
+ .flat_map(|next| next.iter().cloned())
+ .collect();
+ while let Some(node) = queue.pop_front() {
+ if !seen.insert(node.clone()) {
+ continue;
+ }
+ if let Some(next) = adjacency.get(&node) {
+ queue.extend(next.iter().cloned());
+ }
+ }
+ seen
+}
+
+fn transitive_steps(
+ summary: &PipelineSummary,
+ step: &str,
+ direction: GraphDepsDirection,
+) -> Vec {
+ let nodes = step_nodes(summary);
+ let node_by_step: BTreeMap = nodes
+ .iter()
+ .map(|node| (node.step.clone(), node.clone()))
+ .collect();
+
+ match direction {
+ GraphDepsDirection::Upstream => upstream_steps(step, &node_by_step),
+ GraphDepsDirection::Downstream => downstream_steps(step, &nodes),
+ }
+}
+
+fn transitive_steps_for_job(
+ summary: &PipelineSummary,
+ job: &str,
+ direction: GraphDepsDirection,
+) -> Vec {
+ let nodes = step_nodes(summary);
+ let node_by_step: BTreeMap = nodes
+ .iter()
+ .map(|node| (node.step.clone(), node.clone()))
+ .collect();
+
+ match direction {
+ GraphDepsDirection::Upstream => {
+ let refs = nodes
+ .iter()
+ .filter(|node| node.job == job)
+ .flat_map(|node| node.refs.iter().cloned())
+ .collect();
+ upstream_from_refs(refs, &node_by_step)
+ }
+ GraphDepsDirection::Downstream => {
+ let start_steps: Vec = summary
+ .graph
+ .step_locations
+ .iter()
+ .filter(|loc| loc.job == job)
+ .map(|loc| loc.step.clone())
+ .collect();
+ let mut seen = BTreeSet::new();
+ let mut out = Vec::new();
+ for start_step in start_steps {
+ for dep in downstream_steps(&start_step, &nodes) {
+ if seen.insert(dep.step.clone()) {
+ out.push(dep);
+ }
+ }
+ }
+ out
+ }
+ }
+}
+
+fn upstream_steps(step: &str, node_by_step: &BTreeMap) -> Vec {
+ let Some(node) = node_by_step.get(step) else {
+ return Vec::new();
+ };
+ upstream_from_refs(node.refs.clone(), node_by_step)
+}
+
+fn upstream_from_refs(
+ refs: Vec,
+ node_by_step: &BTreeMap,
+) -> Vec {
+ let mut seen = BTreeSet::new();
+ let mut out = Vec::new();
+ let mut queue: VecDeque = refs.into();
+
+ while let Some(reference) = queue.pop_front() {
+ let producer = reference.producer_step.clone();
+ if !seen.insert(producer.clone()) {
+ continue;
+ }
+ if let Some(producer_node) = node_by_step.get(&producer) {
+ out.push(StepDependency {
+ step: producer.clone(),
+ job: producer_node.job.clone(),
+ stage: producer_node.stage.clone(),
+ via_output: Some(format!("{}.{}", producer, reference.output_name)),
+ });
+ queue.extend(producer_node.refs.iter().cloned());
+ }
+ }
+ out
+}
+
+fn downstream_steps(step: &str, nodes: &[StepNode]) -> Vec {
+ let mut reverse: BTreeMap> = BTreeMap::new();
+ for node in nodes {
+ for reference in &node.refs {
+ reverse
+ .entry(reference.producer_step.clone())
+ .or_default()
+ .push((node.clone(), reference.output_name.clone()));
+ }
+ }
+
+ let mut seen = BTreeSet::new();
+ let mut out = Vec::new();
+ let mut queue = VecDeque::from([step.to_string()]);
+ while let Some(producer) = queue.pop_front() {
+ let Some(consumers) = reverse.get(&producer) else {
+ continue;
+ };
+ for (consumer, output_name) in consumers {
+ if !seen.insert(consumer.step.clone()) {
+ continue;
+ }
+ out.push(StepDependency {
+ step: consumer.step.clone(),
+ job: consumer.job.clone(),
+ stage: consumer.stage.clone(),
+ via_output: Some(format!("{}.{}", producer, output_name)),
+ });
+ queue.push_back(consumer.step.clone());
+ }
+ }
+ out
+}
+
+#[derive(Debug, Clone)]
+struct StepNode {
+ step: String,
+ job: String,
+ stage: Option,
+ refs: Vec,
+}
+
+#[derive(Debug, Clone)]
+struct StepReference {
+ producer_step: String,
+ output_name: String,
+}
+
+fn step_nodes(summary: &PipelineSummary) -> Vec {
+ let mut nodes = Vec::new();
+ match &summary.body {
+ PipelineBodySummary::Jobs { jobs } => {
+ for job in jobs {
+ push_job_step_nodes(&mut nodes, job);
+ }
+ }
+ PipelineBodySummary::Stages { stages } => {
+ for stage in stages {
+ for job in &stage.jobs {
+ push_job_step_nodes(&mut nodes, job);
+ }
+ }
+ }
+ }
+ nodes
+}
+
+fn push_job_step_nodes(nodes: &mut Vec, job: &JobSummary) {
+ for (idx, step) in job.steps.iter().enumerate() {
+ let step_label = step_label(step, job, idx);
+ nodes.push(StepNode {
+ step: step_label,
+ job: job.id.clone(),
+ stage: job.stage.clone(),
+ refs: step_refs(step),
+ });
+ }
+}
+
+fn step_refs(step: &StepSummary) -> Vec {
+ step.env_refs
+ .iter()
+ .chain(step.condition_refs.iter())
+ .map(|r| StepReference {
+ producer_step: r.step.clone(),
+ output_name: r.name.clone(),
+ })
+ .collect()
+}
+
+fn step_label(step: &StepSummary, job: &JobSummary, idx: usize) -> String {
+ step.id
+ .clone()
+ .unwrap_or_else(|| format!("{}#{}", job.id, idx + 1))
+}
+
+fn stage_for_job(summary: &PipelineSummary, job_id: &str) -> Option {
+ find_job(summary, job_id).and_then(|job| job.stage.clone())
+}
+
+fn jobs_in_stage(summary: &PipelineSummary, stage_id: &str) -> Vec {
+ match &summary.body {
+ PipelineBodySummary::Jobs { .. } => Vec::new(),
+ PipelineBodySummary::Stages { stages } => stages
+ .iter()
+ .find(|stage| stage.id == stage_id)
+ .map(|stage| stage.jobs.iter().map(|job| job.id.clone()).collect())
+ .unwrap_or_default(),
+ }
+}
+
+fn find_job<'a>(summary: &'a PipelineSummary, job_id: &str) -> Option<&'a JobSummary> {
+ match &summary.body {
+ PipelineBodySummary::Jobs { jobs } => jobs.iter().find(|job| job.id == job_id),
+ PipelineBodySummary::Stages { stages } => stages
+ .iter()
+ .flat_map(|stage| stage.jobs.iter())
+ .find(|job| job.id == job_id),
+ }
+}
+
+fn known_step_or_job_ids(summary: &PipelineSummary) -> Vec {
+ let mut ids: Vec = summary
+ .graph
+ .step_locations
+ .iter()
+ .map(|loc| loc.step.clone())
+ .collect();
+ match &summary.body {
+ PipelineBodySummary::Jobs { jobs } => ids.extend(jobs.iter().map(|job| job.id.clone())),
+ PipelineBodySummary::Stages { stages } => ids.extend(
+ stages
+ .iter()
+ .flat_map(|stage| stage.jobs.iter().map(|job| job.id.clone())),
+ ),
+ }
+ ids
+}
+
+fn qualified(stage: &Option, job: &str, step: &str) -> String {
+ match stage {
+ Some(stage) => format!("{stage}.{job}.{step}"),
+ None => format!("{job}.{step}"),
+ }
+}
+
+fn qualified_job(stage: &Option, job: &str) -> String {
+ match stage {
+ Some(stage) => format!("{stage}.{job}"),
+ None => job.to_string(),
+ }
+}
+
+fn closest<'a>(needle: &str, candidates: impl Iterator- ) -> Option
{
+ // Same Levenshtein threshold as `inspect::whatif::closest`:
+ // suppress low-quality suggestions so an input like `xyzzy`
+ // does not get the lexicographically nearest match as its
+ // "did you mean" hint. Half the needle length + 2 keeps short
+ // single-typo cases (`Aget` → `Agent`) intact while rejecting
+ // genuinely unrelated input.
+ let needle_len = needle.chars().count();
+ let max_distance = needle_len / 2 + 2;
+ candidates
+ .map(|candidate| (levenshtein(needle, candidate), candidate))
+ .filter(|(distance, _)| *distance <= max_distance)
+ .min_by_key(|(distance, candidate)| (*distance, (*candidate).to_string()))
+ .map(|(_, candidate)| candidate.to_string())
+}
+
+fn levenshtein(a: &str, b: &str) -> usize {
+ let mut prev: Vec = (0..=b.chars().count()).collect();
+ for (i, ca) in a.chars().enumerate() {
+ let mut curr = vec![i + 1];
+ for (j, cb) in b.chars().enumerate() {
+ let cost = usize::from(ca != cb);
+ curr.push((curr[j] + 1).min(prev[j + 1] + 1).min(prev[j] + cost));
+ }
+ prev = curr;
+ }
+ prev[b.chars().count()]
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::compile::ir::summary::{
+ GraphSummary, OutputDeclSummary, OutputRefSummary, PipelineBodySummary, PoolSummary,
+ StepKind,
+ };
+
+ fn summary(jobs: Vec, edges: Vec<(&str, &str)>) -> PipelineSummary {
+ let step_locations = jobs
+ .iter()
+ .flat_map(|job| {
+ job.steps.iter().filter_map(|step| {
+ step.id.as_ref().map(|id| StepLocationEntry {
+ step: id.clone(),
+ stage: job.stage.clone(),
+ job: job.id.clone(),
+ outputs: step.outputs.iter().map(|o| o.name.clone()).collect(),
+ })
+ })
+ })
+ .collect();
+ PipelineSummary {
+ schema_version: 1,
+ name: "test".to_string(),
+ shape: "standalone".to_string(),
+ body: PipelineBodySummary::Jobs { jobs },
+ graph: GraphSummary {
+ step_locations,
+ job_edges: edges
+ .into_iter()
+ .map(|(consumer, producer)| EdgeEntry {
+ consumer: consumer.to_string(),
+ producer: producer.to_string(),
+ })
+ .collect(),
+ stage_edges: Vec::new(),
+ outputs_needing_is_output: Vec::new(),
+ },
+ }
+ }
+
+ fn job(id: &str, steps: Vec) -> JobSummary {
+ JobSummary {
+ id: id.to_string(),
+ stage: None,
+ display_name: id.to_string(),
+ depends_on: Vec::new(),
+ condition: None,
+ pool: PoolSummary::VmImage {
+ image: "ubuntu-latest".to_string(),
+ },
+ steps,
+ }
+ }
+
+ fn step(id: &str, outputs: &[&str], refs: &[(&str, &str)]) -> StepSummary {
+ StepSummary {
+ id: Some(id.to_string()),
+ kind: StepKind::Bash,
+ display_name: Some(id.to_string()),
+ task: None,
+ condition: None,
+ outputs: outputs
+ .iter()
+ .map(|name| OutputDeclSummary {
+ name: (*name).to_string(),
+ is_secret: false,
+ auto_is_output: false,
+ })
+ .collect(),
+ env_refs: refs
+ .iter()
+ .map(|(producer, name)| OutputRefSummary {
+ step: (*producer).to_string(),
+ name: (*name).to_string(),
+ })
+ .collect(),
+ condition_refs: Vec::new(),
+ }
+ }
+
+ #[test]
+ fn no_upstream_or_downstream_returns_empty_lists() {
+ let s = summary(vec![job("Solo", vec![step("A", &[], &[])])], vec![]);
+
+ let upstream = analyze(&s, "A", GraphDepsDirection::Upstream).unwrap();
+ let downstream = analyze(&s, "A", GraphDepsDirection::Downstream).unwrap();
+
+ assert!(upstream.transitive_jobs.is_empty());
+ assert!(upstream.transitive_steps.is_empty());
+ assert!(downstream.transitive_jobs.is_empty());
+ assert!(downstream.transitive_steps.is_empty());
+ }
+
+ #[test]
+ fn transitive_walk_crosses_multiple_hops() {
+ let s = summary(
+ vec![
+ job("Setup", vec![step("A", &["one"], &[])]),
+ job("Build", vec![step("B", &["two"], &[("A", "one")])]),
+ job("Test", vec![step("C", &[], &[("B", "two")])]),
+ ],
+ vec![("Build", "Setup"), ("Test", "Build")],
+ );
+
+ let report = analyze(&s, "C", GraphDepsDirection::Upstream).unwrap();
+
+ assert_eq!(
+ report
+ .transitive_jobs
+ .iter()
+ .map(|j| j.job.as_str())
+ .collect::>(),
+ vec!["Build", "Setup"]
+ );
+ assert_eq!(
+ report
+ .transitive_steps
+ .iter()
+ .map(|s| s.step.as_str())
+ .collect::>(),
+ vec!["B", "A"]
+ );
+ }
+
+ #[test]
+ fn step_not_found_returns_typed_error() {
+ let s = summary(vec![job("Solo", vec![step("A", &[], &[])])], vec![]);
+
+ let err = analyze(&s, "Missing", GraphDepsDirection::Upstream).unwrap_err();
+ assert!(err.downcast_ref::().is_some());
+ }
+
+ #[test]
+ fn bidirectional_symmetry_for_step_edges() {
+ let s = summary(
+ vec![
+ job("Setup", vec![step("A", &["one"], &[])]),
+ job("Build", vec![step("B", &[], &[("A", "one")])]),
+ ],
+ vec![("Build", "Setup")],
+ );
+
+ let b_upstream = analyze(&s, "B", GraphDepsDirection::Upstream).unwrap();
+ let a_downstream = analyze(&s, "A", GraphDepsDirection::Downstream).unwrap();
+
+ assert!(b_upstream.transitive_steps.iter().any(|s| s.step == "A"));
+ assert!(a_downstream.transitive_steps.iter().any(|s| s.step == "B"));
+ }
+}
diff --git a/src/inspect/graph_outputs.rs b/src/inspect/graph_outputs.rs
new file mode 100644
index 00000000..e937ecd3
--- /dev/null
+++ b/src/inspect/graph_outputs.rs
@@ -0,0 +1,317 @@
+//! Output declaration/reference table for `ado-aw graph outputs`.
+//!
+//! This module intentionally works from the public [`PipelineSummary`]
+//! instead of the compiler's internal graph. That keeps the command's
+//! JSON shape aligned with the stable inspect schema while still
+//! answering producer/consumer questions precisely.
+
+use std::collections::BTreeSet;
+
+use serde::Serialize;
+
+use crate::compile::ir::summary::{JobSummary, PipelineBodySummary, PipelineSummary, StepSummary};
+
+/// Source location of an output reference on a consumer step.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
+#[serde(rename_all = "snake_case")]
+pub enum OutputConsumerSource {
+ /// Reference came from the step's `env:` map.
+ Env,
+ /// Reference came from the step's `condition:` expression.
+ Condition,
+}
+
+/// A step that reads a producer output.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
+pub struct OutputConsumer {
+ /// Consumer step id, or a stable anonymous label for steps without `id`.
+ pub step: String,
+ /// Whether the reference came from `env` or `condition`.
+ pub source: OutputConsumerSource,
+}
+
+/// Public output edge emitted by `ado-aw graph outputs --json`.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
+pub struct OutputEdge {
+ /// Step that declares the output.
+ pub producer_step: String,
+ /// Declared output variable name.
+ pub output_name: String,
+ /// Whether the output is marked secret.
+ pub is_secret: bool,
+ /// Whether the graph pass determined the output needs `isOutput=true`.
+ pub auto_is_output: bool,
+ /// Steps that read this output.
+ pub consumers: Vec,
+}
+
+/// Build the declared-output table, optionally filtering by producer and/or consumer.
+pub fn output_edges(
+ summary: &PipelineSummary,
+ producer_filter: Option<&str>,
+ consumer_filter: Option<&str>,
+) -> Vec {
+ let steps = step_records(summary);
+ let mut edges = Vec::new();
+
+ for producer in &steps {
+ let Some(producer_step) = producer.id.as_deref() else {
+ continue;
+ };
+ if producer_filter.is_some_and(|filter| filter != producer_step) {
+ continue;
+ }
+
+ for output in &producer.step.outputs {
+ let mut consumers = Vec::new();
+ for consumer in &steps {
+ if consumer_filter.is_some_and(|filter| consumer.id.as_deref() != Some(filter)) {
+ continue;
+ }
+ for r in &consumer.step.env_refs {
+ if r.step == producer_step && r.name == output.name {
+ consumers.push(OutputConsumer {
+ step: consumer.label.clone(),
+ source: OutputConsumerSource::Env,
+ });
+ }
+ }
+ for r in &consumer.step.condition_refs {
+ if r.step == producer_step && r.name == output.name {
+ consumers.push(OutputConsumer {
+ step: consumer.label.clone(),
+ source: OutputConsumerSource::Condition,
+ });
+ }
+ }
+ }
+
+ if consumer_filter.is_some() && consumers.is_empty() {
+ continue;
+ }
+
+ edges.push(OutputEdge {
+ producer_step: producer_step.to_string(),
+ output_name: output.name.clone(),
+ is_secret: output.is_secret,
+ auto_is_output: output.auto_is_output,
+ consumers,
+ });
+ }
+ }
+
+ edges
+}
+
+/// Render output edges as a concise terminal table.
+pub fn render_text(edges: &[OutputEdge]) -> String {
+ let mut out = String::new();
+ if edges.is_empty() {
+ out.push_str("(no declared outputs)\n");
+ return out;
+ }
+
+ for edge in edges {
+ let consumers = unique_consumer_steps(edge);
+ let consumer_text = if consumers.is_empty() {
+ "[]".to_string()
+ } else {
+ format!("[{}]", consumers.into_iter().collect::>().join(", "))
+ };
+ out.push_str(&format!(
+ "{}.{} → consumers: {}\n",
+ edge.producer_step, edge.output_name, consumer_text
+ ));
+ }
+ out
+}
+
+fn unique_consumer_steps(edge: &OutputEdge) -> BTreeSet {
+ edge.consumers
+ .iter()
+ .map(|consumer| consumer.step.clone())
+ .collect()
+}
+
+#[derive(Clone)]
+struct StepRecord<'a> {
+ id: Option,
+ label: String,
+ step: &'a StepSummary,
+}
+
+fn step_records(summary: &PipelineSummary) -> Vec> {
+ let mut records = Vec::new();
+ match &summary.body {
+ PipelineBodySummary::Jobs { jobs } => {
+ for job in jobs {
+ push_job_steps(&mut records, job);
+ }
+ }
+ PipelineBodySummary::Stages { stages } => {
+ for stage in stages {
+ for job in &stage.jobs {
+ push_job_steps(&mut records, job);
+ }
+ }
+ }
+ }
+ records
+}
+
+fn push_job_steps<'a>(records: &mut Vec>, job: &'a JobSummary) {
+ for (idx, step) in job.steps.iter().enumerate() {
+ records.push(StepRecord {
+ id: step.id.clone(),
+ label: step
+ .id
+ .clone()
+ .unwrap_or_else(|| format!("{}#{}", job.id, idx + 1)),
+ step,
+ });
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::compile::ir::summary::{
+ EdgeEntry, GraphSummary, OutputDeclSummary, OutputRefSummary, PoolSummary, StepKind,
+ };
+
+ fn summary(steps: Vec) -> PipelineSummary {
+ let jobs = vec![JobSummary {
+ id: "Job".to_string(),
+ stage: None,
+ display_name: "Job".to_string(),
+ depends_on: Vec::new(),
+ condition: None,
+ pool: PoolSummary::VmImage {
+ image: "ubuntu-latest".to_string(),
+ },
+ steps,
+ }];
+ PipelineSummary {
+ schema_version: 1,
+ name: "test".to_string(),
+ shape: "standalone".to_string(),
+ body: PipelineBodySummary::Jobs { jobs },
+ graph: GraphSummary {
+ step_locations: Vec::new(),
+ job_edges: Vec::::new(),
+ stage_edges: Vec::new(),
+ outputs_needing_is_output: Vec::new(),
+ },
+ }
+ }
+
+ fn producer(id: &str, outputs: &[&str]) -> StepSummary {
+ StepSummary {
+ id: Some(id.to_string()),
+ kind: StepKind::Bash,
+ display_name: Some(id.to_string()),
+ task: None,
+ condition: None,
+ outputs: outputs
+ .iter()
+ .map(|name| OutputDeclSummary {
+ name: (*name).to_string(),
+ is_secret: false,
+ auto_is_output: false,
+ })
+ .collect(),
+ env_refs: Vec::new(),
+ condition_refs: Vec::new(),
+ }
+ }
+
+ fn consumer(
+ id: &str,
+ env_refs: &[(&str, &str)],
+ condition_refs: &[(&str, &str)],
+ ) -> StepSummary {
+ StepSummary {
+ id: Some(id.to_string()),
+ kind: StepKind::Bash,
+ display_name: Some(id.to_string()),
+ task: None,
+ condition: None,
+ outputs: Vec::new(),
+ env_refs: env_refs
+ .iter()
+ .map(|(step, name)| OutputRefSummary {
+ step: (*step).to_string(),
+ name: (*name).to_string(),
+ })
+ .collect(),
+ condition_refs: condition_refs
+ .iter()
+ .map(|(step, name)| OutputRefSummary {
+ step: (*step).to_string(),
+ name: (*name).to_string(),
+ })
+ .collect(),
+ }
+ }
+
+ #[test]
+ fn output_with_no_consumers_is_preserved() {
+ let s = summary(vec![producer("P", &["value"])]);
+
+ let edges = output_edges(&s, None, None);
+
+ assert_eq!(edges.len(), 1);
+ assert!(edges[0].consumers.is_empty());
+ assert!(
+ serde_json::to_string(&edges)
+ .unwrap()
+ .contains("\"consumers\":[]")
+ );
+ }
+
+ #[test]
+ fn producer_filter_selects_matching_outputs() {
+ let s = summary(vec![producer("A", &["one"]), producer("B", &["two"])]);
+
+ let edges = output_edges(&s, Some("B"), None);
+
+ assert_eq!(edges.len(), 1);
+ assert_eq!(edges[0].producer_step, "B");
+ }
+
+ #[test]
+ fn consumer_filter_selects_outputs_read_by_consumer() {
+ let s = summary(vec![
+ producer("A", &["one"]),
+ producer("B", &["two"]),
+ consumer("C", &[("B", "two")], &[]),
+ ]);
+
+ let edges = output_edges(&s, None, Some("C"));
+
+ assert_eq!(edges.len(), 1);
+ assert_eq!(edges[0].producer_step, "B");
+ assert_eq!(edges[0].consumers[0].step, "C");
+ }
+
+ #[test]
+ fn consumers_include_env_and_condition_refs() {
+ let s = summary(vec![
+ producer("A", &["one"]),
+ consumer("Env", &[("A", "one")], &[]),
+ consumer("Cond", &[], &[("A", "one")]),
+ ]);
+
+ let edges = output_edges(&s, None, None);
+ let sources = edges[0]
+ .consumers
+ .iter()
+ .map(|consumer| match consumer.source {
+ OutputConsumerSource::Env => "env",
+ OutputConsumerSource::Condition => "condition",
+ })
+ .collect::>();
+
+ assert_eq!(sources, vec!["env", "condition"]);
+ }
+}
diff --git a/src/inspect/graph_query.rs b/src/inspect/graph_query.rs
new file mode 100644
index 00000000..8c7c713b
--- /dev/null
+++ b/src/inspect/graph_query.rs
@@ -0,0 +1,168 @@
+//! Graph-query rendering helpers.
+//!
+//! `cli::dispatch_graph` builds the [`PipelineSummary`] (which
+//! contains the resolved [`crate::compile::ir::summary::GraphSummary`])
+//! and asks this module to render it in the user-selected format.
+//!
+//! Text mode is human-scannable; JSON is the public schema (rendered
+//! by `cli::dispatch_graph` directly via serde); DOT is a tiny
+//! Graphviz adapter so users can pipe to `dot -Tsvg`.
+
+use crate::compile::ir::summary::{
+ EdgeEntry, GraphSummary, PipelineBodySummary, PipelineSummary, StepOutputsEntry,
+};
+
+/// Render a [`PipelineSummary`] as scannable text suitable for a
+/// terminal.
+pub fn render_text(s: &PipelineSummary) -> String {
+ let mut out = String::new();
+ out.push_str(&format!("Pipeline: {} ({})\n", s.name, s.shape));
+ out.push('\n');
+
+ out.push_str("Step locations\n");
+ if s.graph.step_locations.is_empty() {
+ out.push_str(" (none)\n");
+ } else {
+ for loc in &s.graph.step_locations {
+ let stage = loc
+ .stage
+ .as_deref()
+ .map(|s| format!("{}.", s))
+ .unwrap_or_default();
+ let outs = if loc.outputs.is_empty() {
+ String::new()
+ } else {
+ format!(" outputs=[{}]", loc.outputs.join(", "))
+ };
+ out.push_str(&format!(" {}{}.{}{}\n", stage, loc.job, loc.step, outs));
+ }
+ }
+ out.push('\n');
+
+ out.push_str("Job edges (consumer -> producer)\n");
+ render_edges(&s.graph.job_edges, &mut out);
+ out.push('\n');
+
+ out.push_str("Stage edges (consumer -> producer)\n");
+ render_edges(&s.graph.stage_edges, &mut out);
+ out.push('\n');
+
+ out.push_str("Outputs needing isOutput=true\n");
+ render_step_outputs(&s.graph.outputs_needing_is_output, &mut out);
+
+ // Job step-count footer so users see at-a-glance how many steps
+ // each job carries; helpful when comparing builds.
+ out.push('\n');
+ out.push_str("Job step counts\n");
+ match &s.body {
+ PipelineBodySummary::Jobs { jobs } => {
+ for j in jobs {
+ out.push_str(&format!(" {}: {}\n", j.id, j.steps.len()));
+ }
+ }
+ PipelineBodySummary::Stages { stages } => {
+ for st in stages {
+ for j in &st.jobs {
+ out.push_str(&format!(" {}.{}: {}\n", st.id, j.id, j.steps.len()));
+ }
+ }
+ }
+ }
+ out
+}
+
+/// Render a [`PipelineSummary`] in Graphviz DOT format.
+///
+/// Two clusters are emitted — one for jobs, one for stages — and
+/// edges point from consumer to producer (matching the IR
+/// `depends_on` semantics). Stage-grouped jobs are placed inside
+/// their stage's cluster so `dot` lays them out together.
+pub fn render_dot(s: &PipelineSummary) -> String {
+ let mut out = String::new();
+ out.push_str("digraph ado_aw_pipeline {\n");
+ out.push_str(" rankdir=LR;\n");
+ out.push_str(" node [shape=box, fontname=\"Helvetica\"];\n");
+
+ match &s.body {
+ PipelineBodySummary::Jobs { jobs } => {
+ for j in jobs {
+ out.push_str(&format!(
+ " \"{}\" [label=\"{}\\n({} steps)\"];\n",
+ j.id,
+ escape_dot(&j.display_name),
+ j.steps.len()
+ ));
+ }
+ }
+ PipelineBodySummary::Stages { stages } => {
+ for st in stages {
+ out.push_str(&format!(
+ " subgraph \"cluster_{}\" {{\n label=\"{}\";\n style=dashed;\n",
+ st.id,
+ escape_dot(&st.display_name),
+ ));
+ for j in &st.jobs {
+ out.push_str(&format!(
+ " \"{}.{}\" [label=\"{}\\n({} steps)\"];\n",
+ st.id,
+ j.id,
+ escape_dot(&j.display_name),
+ j.steps.len()
+ ));
+ }
+ out.push_str(" }\n");
+ }
+ }
+ }
+
+ for e in &s.graph.job_edges {
+ // Stages-bodied pipelines use `stage.job` as the node id so
+ // we don't collide on identical job ids across stages.
+ let (cons, prod) = match &s.body {
+ PipelineBodySummary::Jobs { .. } => (e.consumer.clone(), e.producer.clone()),
+ PipelineBodySummary::Stages { stages } => {
+ let lookup = |job: &str| -> String {
+ for st in stages {
+ if st.jobs.iter().any(|j| j.id == job) {
+ return format!("{}.{}", st.id, job);
+ }
+ }
+ job.to_string()
+ };
+ (lookup(&e.consumer), lookup(&e.producer))
+ }
+ };
+ out.push_str(&format!(" \"{}\" -> \"{}\";\n", cons, prod));
+ }
+ out.push_str("}\n");
+ out
+}
+
+fn render_edges(edges: &[EdgeEntry], out: &mut String) {
+ if edges.is_empty() {
+ out.push_str(" (none)\n");
+ } else {
+ for e in edges {
+ out.push_str(&format!(" {} -> {}\n", e.consumer, e.producer));
+ }
+ }
+}
+
+fn render_step_outputs(entries: &[StepOutputsEntry], out: &mut String) {
+ if entries.is_empty() {
+ out.push_str(" (none)\n");
+ } else {
+ for e in entries {
+ out.push_str(&format!(" {}: {}\n", e.step, e.outputs.join(", ")));
+ }
+ }
+}
+
+fn escape_dot(s: &str) -> String {
+ s.replace('"', "\\\"")
+}
+
+#[allow(dead_code)] // Re-export shorthand for future call sites.
+pub fn graph(s: &PipelineSummary) -> &GraphSummary {
+ &s.graph
+}
diff --git a/src/inspect/lint.rs b/src/inspect/lint.rs
new file mode 100644
index 00000000..79fcab34
--- /dev/null
+++ b/src/inspect/lint.rs
@@ -0,0 +1,489 @@
+//! Structural lint rules over [`PipelineSummary`].
+//!
+//! `build_pipeline_ir()` and [`PipelineSummary::from_pipeline`] already run the
+//! compile-time IR graph validation pass. These lint rules are intentionally
+//! lighter-weight, user-facing quality checks; a few are defensive guards for
+//! callers that might construct summaries without the normal graph pass.
+
+use std::collections::{BTreeMap, BTreeSet};
+
+use serde::{Deserialize, Serialize};
+
+use crate::compile::ir::summary::{JobSummary, PipelineSummary, StepSummary};
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
+#[serde(rename_all = "lowercase")]
+pub enum LintSeverity {
+ Error,
+ Warning,
+ Info,
+}
+
+impl LintSeverity {
+ pub fn as_str(self) -> &'static str {
+ match self {
+ Self::Error => "error",
+ Self::Warning => "warning",
+ Self::Info => "info",
+ }
+ }
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub struct LintLocation {
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub stage: Option,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub job: Option,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub step: Option,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub struct LintFinding {
+ pub severity: LintSeverity,
+ pub code: String,
+ pub message: String,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub location: Option,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+pub struct LintSummary {
+ pub errors: u32,
+ pub warnings: u32,
+ pub infos: u32,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub struct LintReport {
+ pub findings: Vec,
+ pub summary: LintSummary,
+}
+
+/// Run every lint rule over a public pipeline summary.
+pub fn lint(summary: &PipelineSummary) -> Vec {
+ let mut findings = Vec::new();
+ rule_unused_output(summary, &mut findings);
+ rule_missing_is_output(summary, &mut findings);
+ rule_anonymous_producer(summary, &mut findings);
+ rule_step_id_collisions(summary, &mut findings);
+ findings
+}
+
+pub fn report(summary: &PipelineSummary) -> LintReport {
+ let findings = lint(summary);
+ // Rename the local to avoid shadowing the `PipelineSummary`
+ // parameter with a `LintSummary` of the same name in the same
+ // scope; the struct field is still called `summary` below.
+ let tally = summarize_findings(&findings);
+ LintReport {
+ findings,
+ summary: tally,
+ }
+}
+
+pub fn summarize_findings(findings: &[LintFinding]) -> LintSummary {
+ let mut summary = LintSummary {
+ errors: 0,
+ warnings: 0,
+ infos: 0,
+ };
+ for finding in findings {
+ match finding.severity {
+ LintSeverity::Error => summary.errors += 1,
+ LintSeverity::Warning => summary.warnings += 1,
+ LintSeverity::Info => summary.infos += 1,
+ }
+ }
+ summary
+}
+
+pub fn render_text(report: &LintReport) -> String {
+ let mut out = String::new();
+ render_group(&mut out, LintSeverity::Error, "Errors", &report.findings);
+ render_group(
+ &mut out,
+ LintSeverity::Warning,
+ "Warnings",
+ &report.findings,
+ );
+ render_group(&mut out, LintSeverity::Info, "Infos", &report.findings);
+ out
+}
+
+fn render_group(out: &mut String, severity: LintSeverity, heading: &str, findings: &[LintFinding]) {
+ out.push_str(heading);
+ out.push('\n');
+ let mut any = false;
+ for finding in findings.iter().filter(|f| f.severity == severity) {
+ any = true;
+ out.push_str(&format!(
+ "{} {}{}: {}\n",
+ finding.severity.as_str(),
+ finding.code,
+ format_location(finding.location.as_ref()),
+ finding.message
+ ));
+ }
+ if !any {
+ out.push_str(" (none)\n");
+ }
+}
+
+fn format_location(location: Option<&LintLocation>) -> String {
+ let Some(location) = location else {
+ return String::new();
+ };
+ let mut parts = Vec::new();
+ if let Some(stage) = &location.stage {
+ parts.push(format!("stage={stage}"));
+ }
+ if let Some(job) = &location.job {
+ parts.push(format!("job={job}"));
+ }
+ if let Some(step) = &location.step {
+ parts.push(format!("step={step}"));
+ }
+ if parts.is_empty() {
+ String::new()
+ } else {
+ format!(" [{}]", parts.join(" "))
+ }
+}
+
+fn rule_unused_output(summary: &PipelineSummary, findings: &mut Vec) {
+ let consumed = consumed_outputs(summary);
+ for (job, step) in all_steps(summary) {
+ let Some(step_id) = step.id.as_deref() else {
+ continue;
+ };
+ for output in &step.outputs {
+ let key = (step_id.to_string(), output.name.clone());
+ if !consumed.contains(&key) {
+ findings.push(LintFinding {
+ severity: LintSeverity::Warning,
+ code: "unused-output".to_string(),
+ message: format!(
+ "output '{}.{}' is declared but never read",
+ step_id, output.name
+ ),
+ location: Some(location_for(job, Some(step_id))),
+ });
+ }
+ }
+ }
+}
+
+/// Lint rule: every output consumed across step boundaries must be
+/// declared with `isOutput=true` so ADO publishes it as a step output.
+///
+/// In the normal compile path `PipelineSummary::from_pipeline` already
+/// patches `auto_is_output = true` on every affected declaration based
+/// on the graph's `outputs_needing_is_output` set, so this rule will
+/// stay quiet for well-formed inputs. We still emit a finding when the
+/// flag is unset so that:
+///
+/// - Summaries constructed without going through `from_pipeline` (e.g.
+/// deserialised straight from disk) are still validated.
+/// - Future drift between the summary patcher and graph codegen — for
+/// instance a new declaration kind that the patcher forgets to touch
+/// — produces a real, surfaced finding instead of silently skipping.
+fn rule_missing_is_output(summary: &PipelineSummary, findings: &mut Vec) {
+ let declarations = output_declarations(summary);
+ for needed in &summary.graph.outputs_needing_is_output {
+ for output_name in &needed.outputs {
+ if let Some((job, step, decl)) =
+ declarations.get(&(needed.step.clone(), output_name.clone()))
+ && !decl.auto_is_output
+ {
+ findings.push(LintFinding {
+ severity: LintSeverity::Info,
+ code: "missing-is-output".to_string(),
+ message: format!(
+ "output '{}.{}' is consumed across steps but is not marked isOutput=true",
+ needed.step, output_name
+ ),
+ location: Some(location_for(job, step.id.as_deref())),
+ });
+ }
+ }
+ }
+}
+
+fn rule_anonymous_producer(summary: &PipelineSummary, findings: &mut Vec) {
+ for (job, step) in all_steps(summary) {
+ if step.id.is_none() && !step.outputs.is_empty() {
+ // The normal graph pass rejects this before lint runs. This
+ // defensive rule also protects callers that lint a PipelineSummary
+ // produced without build_graph validation.
+ findings.push(LintFinding {
+ severity: LintSeverity::Error,
+ code: "anonymous-producer".to_string(),
+ message: "step declares outputs but has no step id/name".to_string(),
+ location: Some(location_for(job, None)),
+ });
+ }
+ }
+}
+
+fn rule_step_id_collisions(summary: &PipelineSummary, findings: &mut Vec) {
+ // Track first-seen job for each step id, then emit one finding per
+ // collision that names BOTH the original producer location and the
+ // colliding consumer — otherwise the finding only points at the
+ // second occurrence and operators have to grep the rest of the
+ // pipeline to find the duplicate.
+ let mut first_seen: BTreeMap = BTreeMap::new();
+ for (job, step) in all_steps(summary) {
+ let Some(step_id) = step.id.as_deref() else {
+ continue;
+ };
+ if let Some(producer) = first_seen.get(step_id) {
+ // The normal graph pass rejects pipeline-wide duplicate step ids.
+ // Keep this defensive check for summaries that bypassed the graph.
+ let producer_location = match &producer.stage {
+ Some(stage) => format!("{stage}.{}", producer.id),
+ None => producer.id.clone(),
+ };
+ findings.push(LintFinding {
+ severity: LintSeverity::Error,
+ code: "step-id-collisions".to_string(),
+ message: format!(
+ "step id '{step_id}' is used more than once in the pipeline (also seen at {producer_location})"
+ ),
+ location: Some(location_for(job, Some(step_id))),
+ });
+ } else {
+ first_seen.insert(step_id.to_string(), job);
+ }
+ }
+}
+
+fn consumed_outputs(summary: &PipelineSummary) -> BTreeSet<(String, String)> {
+ // Cross-step / cross-job consumers are surfaced through
+ // `outputs_needing_is_output` (the set the compiler patches with
+ // `isOutput=true`). That set deliberately omits same-job consumers
+ // because ADO does not require `isOutput=true` for those, so we
+ // additionally walk every step's `env_refs` and `condition_refs`
+ // to count references that stay inside one job. Matches
+ // `graph_deps::step_refs`, which already treats both sets
+ // uniformly regardless of job boundary.
+ let mut consumed: BTreeSet<(String, String)> = summary
+ .graph
+ .outputs_needing_is_output
+ .iter()
+ .flat_map(|entry| {
+ entry
+ .outputs
+ .iter()
+ .map(|output| (entry.step.clone(), output.clone()))
+ })
+ .collect();
+ for (_, step) in all_steps(summary) {
+ for r in step.env_refs.iter().chain(step.condition_refs.iter()) {
+ consumed.insert((r.step.clone(), r.name.clone()));
+ }
+ }
+ consumed
+}
+
+fn output_declarations(
+ summary: &PipelineSummary,
+) -> BTreeMap<
+ (String, String),
+ (
+ &JobSummary,
+ &StepSummary,
+ &crate::compile::ir::summary::OutputDeclSummary,
+ ),
+> {
+ let mut declarations = BTreeMap::new();
+ for (job, step) in all_steps(summary) {
+ if let Some(step_id) = step.id.as_deref() {
+ for decl in &step.outputs {
+ declarations.insert((step_id.to_string(), decl.name.clone()), (job, step, decl));
+ }
+ }
+ }
+ declarations
+}
+
+fn all_steps(summary: &PipelineSummary) -> Vec<(&JobSummary, &StepSummary)> {
+ summary
+ .all_jobs()
+ .flat_map(|job| job.steps.iter().map(move |step| (job, step)))
+ .collect()
+}
+
+fn location_for(job: &JobSummary, step: Option<&str>) -> LintLocation {
+ LintLocation {
+ stage: job.stage.clone(),
+ job: Some(job.id.clone()),
+ step: step.map(str::to_string),
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::compile::ir::summary::{
+ GraphSummary, OutputDeclSummary, OutputRefSummary, PipelineBodySummary, PoolSummary,
+ StepKind, StepOutputsEntry,
+ };
+
+ #[test]
+ fn unused_output_produces_exactly_one_inspect_lint_finding() {
+ let summary =
+ summary_with_steps(vec![step_with_output("producer", "value", false)], vec![]);
+ let findings = lint(&summary);
+ assert_eq!(findings.len(), 1);
+ assert_eq!(findings[0].code, "unused-output");
+ assert_eq!(findings[0].severity, LintSeverity::Warning);
+ }
+
+ #[test]
+ fn no_findings_inspect_lint_emits_empty_list_and_zero_errors() {
+ let summary = summary_with_steps(vec![plain_step("only")], vec![]);
+ let report = report(&summary);
+ assert!(report.findings.is_empty());
+ assert_eq!(report.summary.errors, 0);
+ }
+
+ #[test]
+ fn consumed_outputs_do_not_emit_unused_output_inspect_lint() {
+ let summary = summary_with_steps(
+ vec![step_with_output("producer", "pull_request_id", true)],
+ vec![StepOutputsEntry {
+ step: "producer".to_string(),
+ outputs: vec!["pull_request_id".to_string()],
+ }],
+ );
+ let findings = lint(&summary);
+ assert!(!findings.iter().any(|f| f.code == "unused-output"));
+ }
+
+ #[test]
+ fn same_job_env_ref_does_not_emit_unused_output_inspect_lint() {
+ // Regression: outputs consumed by a peer step **within the
+ // same job** (via env_refs / condition_refs) do not appear in
+ // graph.outputs_needing_is_output — ADO does not require
+ // isOutput=true for same-job reads. consumed_outputs must
+ // still treat them as consumed so we do not emit a
+ // false-positive `unused-output` finding.
+ let mut producer = step_with_output("producer", "value", false);
+ producer.id = Some("producer".to_string());
+ let mut consumer = plain_step("consumer");
+ consumer.env_refs.push(OutputRefSummary {
+ step: "producer".to_string(),
+ name: "value".to_string(),
+ });
+
+ let summary = summary_with_steps(vec![producer, consumer], vec![]);
+ let findings = lint(&summary);
+ assert!(
+ !findings.iter().any(|f| f.code == "unused-output"),
+ "same-job env_ref consumer must suppress unused-output, got {findings:?}"
+ );
+ }
+
+ #[test]
+ fn same_job_condition_ref_does_not_emit_unused_output_inspect_lint() {
+ let producer = step_with_output("producer", "value", false);
+ let mut consumer = plain_step("consumer");
+ consumer.condition_refs.push(OutputRefSummary {
+ step: "producer".to_string(),
+ name: "value".to_string(),
+ });
+
+ let summary = summary_with_steps(vec![producer, consumer], vec![]);
+ let findings = lint(&summary);
+ assert!(
+ !findings.iter().any(|f| f.code == "unused-output"),
+ "same-job condition_ref consumer must suppress unused-output, got {findings:?}"
+ );
+ }
+
+ #[tokio::test]
+ async fn create_pull_request_fixture_has_no_unused_output_inspect_lint() {
+ let fixture = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
+ .join("tests")
+ .join("safe-outputs")
+ .join("create-pull-request.md");
+ let (_fm, pipeline) = crate::compile::build_pipeline_ir(&fixture)
+ .await
+ .unwrap();
+ let summary = PipelineSummary::from_pipeline(&pipeline).unwrap();
+ let findings = lint(&summary);
+ assert!(!findings.iter().any(|f| f.code == "unused-output"));
+ }
+
+ #[test]
+ fn lint_finding_json_serialization_round_trips_for_inspect() {
+ let finding = LintFinding {
+ severity: LintSeverity::Info,
+ code: "no-condition-references".to_string(),
+ message: "example".to_string(),
+ location: Some(LintLocation {
+ stage: Some("Stage".to_string()),
+ job: Some("Job".to_string()),
+ step: None,
+ }),
+ };
+ let json = serde_json::to_string(&finding).unwrap();
+ let round_trip: LintFinding = serde_json::from_str(&json).unwrap();
+ assert_eq!(round_trip, finding);
+ }
+
+ fn summary_with_steps(
+ steps: Vec,
+ outputs_needing_is_output: Vec,
+ ) -> PipelineSummary {
+ PipelineSummary {
+ schema_version: 1,
+ name: "test".to_string(),
+ shape: "standalone".to_string(),
+ body: PipelineBodySummary::Jobs {
+ jobs: vec![JobSummary {
+ id: "Job".to_string(),
+ stage: None,
+ display_name: "Job".to_string(),
+ depends_on: vec![],
+ condition: None,
+ pool: PoolSummary::VmImage {
+ image: "ubuntu-latest".to_string(),
+ },
+ steps,
+ }],
+ },
+ graph: GraphSummary {
+ step_locations: vec![],
+ job_edges: vec![],
+ stage_edges: vec![],
+ outputs_needing_is_output,
+ },
+ }
+ }
+
+ fn plain_step(id: &str) -> StepSummary {
+ StepSummary {
+ id: Some(id.to_string()),
+ kind: StepKind::Bash,
+ display_name: Some(id.to_string()),
+ task: None,
+ condition: None,
+ outputs: vec![],
+ env_refs: vec![],
+ condition_refs: vec![],
+ }
+ }
+
+ fn step_with_output(id: &str, output: &str, auto_is_output: bool) -> StepSummary {
+ let mut step = plain_step(id);
+ step.outputs.push(OutputDeclSummary {
+ name: output.to_string(),
+ is_secret: false,
+ auto_is_output,
+ });
+ step
+ }
+}
diff --git a/src/inspect/mod.rs b/src/inspect/mod.rs
new file mode 100644
index 00000000..0857ad6c
--- /dev/null
+++ b/src/inspect/mod.rs
@@ -0,0 +1,42 @@
+//! Inspection commands: typed-IR queries over agent source files.
+//!
+//! This module is the home for every read-only command that loads an
+//! agent's `.md`, builds the typed [`crate::compile::ir::Pipeline`]
+//! IR, and answers a question about it without producing any YAML on
+//! disk.
+//!
+//! Layout follows `src/audit/`:
+//!
+//! - `cli.rs` — dispatchers for the public CLI subcommands.
+//! - `graph_query.rs` — the `ado-aw graph` family (text/json/dot).
+//! - `graph_deps.rs` — `ado-aw graph deps`: per-step upstream /
+//! downstream walks over the typed graph.
+//! - `graph_outputs.rs` — `ado-aw graph outputs`: producer/consumer
+//! relationships for declared step outputs.
+//! - `trace.rs` — `ado-aw trace`: joins build telemetry from
+//! [`crate::audit`] with the typed-IR graph for failure tracing.
+//! - `whatif.rs` — `ado-aw whatif`: static reachability ("which jobs
+//! skip if X fails?") from the typed `Condition` + `depends_on`.
+//! - `lint.rs` — `ado-aw lint`: structural checks layered on top of
+//! the compile-stage validators.
+//! - `catalog.rs` — `ado-aw catalog`: programmatic listing of
+//! in-tree registries (safe-outputs, runtimes, tools, engines,
+//! models).
+
+pub mod catalog;
+pub mod cli;
+pub mod graph_deps;
+pub mod graph_outputs;
+pub mod graph_query;
+pub mod lint;
+pub mod trace;
+pub mod whatif;
+
+pub use cli::{
+ CatalogOptions, GraphDepsOptions, GraphFormat, GraphOptions, GraphOutputsOptions,
+ InspectOptions, LintOptions, TraceOptions, WhatIfOptions, build_catalog, build_graph_deps,
+ build_graph_dump, build_graph_outputs, build_graph_summary, build_inspect, build_lint,
+ build_trace, build_whatif, dispatch_catalog, dispatch_graph, dispatch_graph_deps,
+ dispatch_graph_outputs, dispatch_inspect, dispatch_lint, dispatch_trace, dispatch_whatif,
+};
+pub use graph_deps::GraphDepsDirection;
diff --git a/src/inspect/trace.rs b/src/inspect/trace.rs
new file mode 100644
index 00000000..22a7f335
--- /dev/null
+++ b/src/inspect/trace.rs
@@ -0,0 +1,397 @@
+//! `ado-aw trace`: runtime audit data joined with typed-IR graph facts.
+
+use std::collections::BTreeSet;
+
+use serde::Serialize;
+
+use crate::audit::model::{AuditData, JobData};
+use crate::compile::ir::summary::StepLocationEntry;
+use crate::inspect::graph_deps::{self, GraphDepsDirection, StepDependency};
+
+#[derive(Debug, Clone, PartialEq, Serialize)]
+pub struct TraceReport {
+ pub build_id: u64,
+ #[serde(skip_serializing_if = "Vec::is_empty", default)]
+ pub failing_jobs: Vec,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub step: Option,
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize)]
+pub struct TraceJobReport {
+ pub job: String,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub stage: Option,
+ pub status: String,
+ #[serde(skip_serializing_if = "Vec::is_empty", default)]
+ pub upstream: Vec,
+ #[serde(skip_serializing_if = "Vec::is_empty", default)]
+ pub downstream: Vec,
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize)]
+pub struct TraceUpstreamJob {
+ pub job: String,
+ pub status: String,
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize)]
+pub struct TraceDownstreamJob {
+ pub job: String,
+ pub classification: String,
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize)]
+pub struct TraceStepReport {
+ pub step: String,
+ pub location: TraceStepLocation,
+ pub status: String,
+ #[serde(skip_serializing_if = "Vec::is_empty", default)]
+ pub upstream: Vec,
+ #[serde(skip_serializing_if = "Vec::is_empty", default)]
+ pub downstream: Vec,
+ #[serde(skip_serializing_if = "Vec::is_empty", default)]
+ pub upstream_steps: Vec,
+ #[serde(skip_serializing_if = "Vec::is_empty", default)]
+ pub downstream_steps: Vec,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
+pub struct TraceStepLocation {
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub stage: Option,
+ pub job: String,
+}
+
+pub fn build_trace_report(audit: &AuditData, step: Option<&str>) -> TraceReport {
+ let failing_jobs = audit
+ .jobs
+ .iter()
+ .filter(|job| job.failed())
+ .map(|job| job_report(audit, job))
+ .collect();
+
+ let step_report = step.and_then(|step_id| build_step_report(audit, step_id));
+
+ TraceReport {
+ build_id: audit.overview.build_id,
+ failing_jobs,
+ step: step_report,
+ }
+}
+
+pub fn render_text(
+ audit: &AuditData,
+ report: &TraceReport,
+ requested_step: Option<&str>,
+) -> String {
+ let mut out = String::new();
+ out.push_str(&format!("Trace for build {}\n", report.build_id));
+ match &audit.pipeline_graph {
+ Some(graph) => out.push_str(&format!("IR graph: {}\n", graph.source_path)),
+ None => out.push_str("IR graph: unavailable (runtime-only trace)\n"),
+ }
+ out.push('\n');
+
+ out.push_str("Failing job chain\n");
+ if report.failing_jobs.is_empty() {
+ out.push_str(" (no failed jobs)\n");
+ } else {
+ for job in &report.failing_jobs {
+ render_job_report(job, &mut out);
+ }
+ }
+
+ if requested_step.is_some() {
+ out.push('\n');
+ out.push_str("Step trace\n");
+ match &report.step {
+ Some(step) => {
+ let stage = step
+ .location
+ .stage
+ .as_deref()
+ .map(|stage| format!("{stage}."))
+ .unwrap_or_default();
+ out.push_str(&format!(
+ " {} in {}{}: {}\n",
+ step.step, stage, step.location.job, step.status
+ ));
+ render_upstream(&step.upstream, &mut out);
+ render_downstream(&step.downstream, &mut out);
+ render_step_dependencies("upstream steps", &step.upstream_steps, &mut out);
+ render_step_dependencies("downstream steps", &step.downstream_steps, &mut out);
+ }
+ None => out.push_str(" (step not found in local IR graph)\n"),
+ }
+ }
+
+ out
+}
+
+fn render_job_report(job: &TraceJobReport, out: &mut String) {
+ let stage = job
+ .stage
+ .as_deref()
+ .map(|stage| format!(" [{stage}]"))
+ .unwrap_or_default();
+ out.push_str(&format!(" {}{}: {}\n", job.job, stage, job.status));
+ render_upstream(&job.upstream, out);
+ render_downstream(&job.downstream, out);
+}
+
+fn render_upstream(upstream: &[TraceUpstreamJob], out: &mut String) {
+ if upstream.is_empty() {
+ out.push_str(" upstream: (none)\n");
+ } else {
+ out.push_str(&format!(
+ " upstream: {}\n",
+ upstream
+ .iter()
+ .map(|job| format!("{} ({})", job.job, job.status))
+ .collect::>()
+ .join(", ")
+ ));
+ }
+}
+
+fn render_downstream(downstream: &[TraceDownstreamJob], out: &mut String) {
+ if downstream.is_empty() {
+ out.push_str(" downstream: (none)\n");
+ } else {
+ out.push_str(&format!(
+ " downstream: {}\n",
+ downstream
+ .iter()
+ .map(|job| format!("{} ({})", job.job, job.classification))
+ .collect::>()
+ .join(", ")
+ ));
+ }
+}
+
+fn render_step_dependencies(label: &str, steps: &[StepDependency], out: &mut String) {
+ if steps.is_empty() {
+ return;
+ }
+ out.push_str(&format!(
+ " {label}: {}\n",
+ steps
+ .iter()
+ .map(|step| {
+ let stage = step
+ .stage
+ .as_deref()
+ .map(|stage| format!("{stage}."))
+ .unwrap_or_default();
+ match &step.via_output {
+ Some(via) => format!("{}{}.{} via {}", stage, step.job, step.step, via),
+ None => format!("{}{}.{}", stage, step.job, step.step),
+ }
+ })
+ .collect::>()
+ .join(", ")
+ ));
+}
+
+fn build_step_report(audit: &AuditData, step_id: &str) -> Option {
+ let pipeline_graph = audit.pipeline_graph.as_ref()?;
+ let location = pipeline_graph
+ .summary
+ .graph
+ .step_locations
+ .iter()
+ .find(|location| location.step == step_id)?;
+ let job = runtime_job_for_location(audit, location);
+ Some(TraceStepReport {
+ step: step_id.to_string(),
+ location: TraceStepLocation {
+ stage: location.stage.clone(),
+ job: location.job.clone(),
+ },
+ status: job
+ .map(JobData::classification)
+ .unwrap_or_else(|| String::from("unknown")),
+ upstream: job
+ .map(|job| upstream_reports(audit, job))
+ .unwrap_or_default(),
+ downstream: job
+ .map(|job| downstream_reports(audit, job))
+ .unwrap_or_default(),
+ upstream_steps: graph_deps::analyze(
+ &pipeline_graph.summary,
+ step_id,
+ GraphDepsDirection::Upstream,
+ )
+ .map(|report| report.transitive_steps)
+ .unwrap_or_default(),
+ downstream_steps: graph_deps::analyze(
+ &pipeline_graph.summary,
+ step_id,
+ GraphDepsDirection::Downstream,
+ )
+ .map(|report| report.transitive_steps)
+ .unwrap_or_default(),
+ })
+}
+
+fn job_report(audit: &AuditData, job: &JobData) -> TraceJobReport {
+ TraceJobReport {
+ job: job.name.clone(),
+ stage: stage_for_job(audit, job),
+ status: job_status(job),
+ upstream: upstream_reports(audit, job),
+ downstream: downstream_reports(audit, job),
+ }
+}
+
+fn upstream_reports(audit: &AuditData, job: &JobData) -> Vec {
+ collect_related_jobs(audit, job, Direction::Upstream)
+ .into_iter()
+ .map(|job_id| TraceUpstreamJob {
+ status: find_runtime_job(audit, &job_id)
+ .map(JobData::classification)
+ .unwrap_or_else(|| String::from("unknown")),
+ job: job_id,
+ })
+ .collect()
+}
+
+fn downstream_reports(audit: &AuditData, job: &JobData) -> Vec {
+ collect_related_jobs(audit, job, Direction::Downstream)
+ .into_iter()
+ .map(|job_id| TraceDownstreamJob {
+ classification: find_runtime_job(audit, &job_id)
+ .map(JobData::classification)
+ .unwrap_or_else(|| String::from("expected to skip")),
+ job: job_id,
+ })
+ .collect()
+}
+
+#[derive(Clone, Copy)]
+enum Direction {
+ Upstream,
+ Downstream,
+}
+
+fn collect_related_jobs(audit: &AuditData, job: &JobData, direction: Direction) -> Vec {
+ let mut seen = BTreeSet::new();
+ let mut ordered = Vec::new();
+ collect_related_jobs_inner(audit, job, direction, &mut seen, &mut ordered);
+ ordered
+}
+
+fn collect_related_jobs_inner(
+ audit: &AuditData,
+ job: &JobData,
+ direction: Direction,
+ seen: &mut BTreeSet