diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json
index 76b3a07..c03099a 100644
--- a/.claude-plugin/marketplace.json
+++ b/.claude-plugin/marketplace.json
@@ -12,6 +12,14 @@
       "version": "0.1.0",
       "category": "ruby",
       "keywords": ["ruby", "bundler", "gem", "dependencies"]
+    },
+    {
+      "name": "security",
+      "source": "./plugins/security",
+      "description": "White-box, dynamically-verified security audit. /security:audit recons a repo, hunts OWASP Top 10:2025 vulnerabilities, proves them with live PoCs in isolated worktrees, and writes a high-signal senior-engineer report.",
+      "version": "0.1.0",
+      "category": "security",
+      "keywords": ["security", "pentest", "vulnerability", "audit", "owasp", "appsec"]
     }
   ]
 }
diff --git a/README.md b/README.md
index 5ee7465..0b9e3c1 100644
--- a/README.md
+++ b/README.md
@@ -25,6 +25,7 @@ If the plugin's commands don't show up in the `/` menu, run `/reload-plugins`.
 | Plugin | Description |
 | --- | --- |
 | [gem](plugins/gem) | Ruby gem helpers. Includes `/gem:bump` for changelog-rich dependency bumps. |
+| [security](plugins/security) | Dynamically-verified security audit. `/security:audit` proves vulnerabilities with live PoCs and writes a senior-engineer report. |
 
 ## Developing plugins
 
diff --git a/plugins/security/.claude-plugin/plugin.json b/plugins/security/.claude-plugin/plugin.json
new file mode 100644
index 0000000..0cfc554
--- /dev/null
+++ b/plugins/security/.claude-plugin/plugin.json
@@ -0,0 +1,13 @@
+{
+  "name": "security",
+  "version": "0.1.0",
+  "description": "White-box, dynamically-verified security audit. /security:audit recons a repo, hunts vulnerabilities across the OWASP Top 10:2025 classes, proves them with live PoCs in isolated worktrees, and writes a high-signal senior-engineer report.",
+  "author": {
+    "name": "84codes",
+    "url": "https://github.com/84codes"
+  },
+  "homepage": "https://github.com/84codes/claude-plugins/tree/main/plugins/security",
+  "repository": "https://github.com/84codes/claude-plugins",
+  "license": "MIT",
+  "keywords": ["security", "pentest", "vulnerability", "audit", "owasp", "appsec", "sast"]
+}
diff --git a/plugins/security/AGENTS.md b/plugins/security/AGENTS.md
new file mode 100644
index 0000000..1ab4295
--- /dev/null
+++ b/plugins/security/AGENTS.md
@@ -0,0 +1,199 @@
+# vuln-audit — agent & design spec
+
+A Claude Code **skill + workflow** that runs a white-box, dynamically-verified
+security audit of a target repository: a
+multi-phase pipeline (recon → triage → deep review → adversarial verify →
+dynamic repro → report) that produces **proven, high-signal findings with
+patches**, not speculative noise.
+
+> Read this file before touching the workflow or prompts. It is the source of
+> truth for the data contracts, taxonomy, severity model, and signal policy.
+
+## Invocation
+
+```
+/security:audit /path/to/target-repo [--no-dynamic] [--classes injection,ssrf] [--out <dir>]
+```
+
+The skill (`skills/audit/SKILL.md`) is the agent-facing entry point. It parses the
+target, picks a writable `outDir`, preflights host capabilities, then calls the
+workflow (`workflows/vuln-audit.js`) with everything assembled in `args` —
+`toolRoot` = `${CLAUDE_PLUGIN_ROOT}` (read-only, holds the prompts), `outDir` =
+where the bundle is written.
+
+## Pipeline
+
+| Phase | What | Primitive |
+|-------|------|-----------|
+| 1. Recon | Detect stack, map attack surface & trust boundaries, pick run strategy, select relevant finder classes | single agent (`prompts/recon.md`) |
+| 2. Triage | One finder per vuln class scans its surface, emits candidate findings (all share `prompts/finder.md`; the workflow injects the class + its OWASP/CWE/ASVS) | `parallel()` finders |
+| 3. Dedup | Collapse same-root-cause findings across call sites | plain JS in the workflow |
+| 4. Deep review | Re-examine each candidate with surrounding context (callers, sanitizers, related files); confirm a reachable source→sink path | `pipeline()` stage |
+| 5. Adversarial verify | Independent skeptics, each a distinct lens, try to **refute** the finding; majority-refute kills it | `parallel()` skeptic panel |
+| 6. Dynamic repro | Survivors are built & run in an isolated git **worktree** (docker-first, via `prompts/playbook.md`); a real PoC is fired and impact observed | `agent(..., {isolation:'worktree'})` |
+| 7. Report | Synthesize the senior-engineer report (`prompts/report-template.md`) | single agent |
+
+## Reference evaluation (why we adopt what we adopt)
+
+- **Anthropic security-guidance** (`code.claude.com/docs/en/security-guidance`)
+  — **adopt methodology.** Validates our core moves: (a) review independence —
+  the reviewer is a *fresh-context* agent, never the author, "instructed only to
+  find problems"; (b) read callers/sanitizers/related files before reporting to
+  keep false positives low. Our tool is the deepest layer: in-session plugin →
+  `/security-review` (branch) → Code Review (PR) → **vuln-audit (on-demand,
+  dynamically verified PoCs)**. We honor its extension convention: if the target
+  has a `.claude/claude-security-guidance.md`, we load it as extra threat-model
+  context.
+- **OWASP Top 10:2025** — **adopt as primary taxonomy.** Current edition; new
+  categories A03 Software Supply Chain Failures and A10 Mishandling of
+  Exceptional Conditions; SSRF folded into A01. Every finder maps to a 2025 ID.
+- **OWASP ASVS v5.0** (17 chapters, ~350 reqs) — **reference only, not a walked
+  checklist.** Walking 350 requirements is exactly the low-signal sidetrack we
+  avoid. Used two ways: (a) coverage map so the finder taxonomy has no blind
+  spots; (b) cite a requirement/chapter ID in findings as a terse, authoritative
+  reference for senior readers.
+- **OSSF Scorecard** — **partial adopt, code-exploitable checks only.** Scorecard
+  scores project *hygiene/posture* (Maintained, License, SBOM, Security-Policy,
+  Contributors) — out of scope for findings. But its CI/CD checks ARE real
+  exploitable issues and feed our `supply-chain` finder: Dangerous-Workflow
+  (`pull_request_target` + untrusted checkout, `${{ }}` script injection),
+  Token-Permissions (over-broad `GITHUB_TOKEN`), Pinned-Dependencies (unpinned
+  actions/deps), Vulnerabilities (known-vuln deps via OSV). Posture/process
+  checks are relegated to the Info appendix, never the high-priority body.
+
+## Vuln-class taxonomy (finders)
+
+Each maps to OWASP Top 10:2025 + CWE + an ASVS v5.0 chapter. The mapping is the
+`CLASS_META` table in `workflows/vuln-audit.js` (single source of truth); all
+classes share one method prompt, `prompts/finder.md`, with the per-class context
+injected by the workflow.
+
+| key | title | OWASP 2025 | ASVS |
+|-----|-------|-----------|------|
+| access-control | Broken Access Control & IDOR | A01 | V8 |
+| ssrf | Server-Side Request Forgery | A01 | V4 |
+| injection | Injection (SQL/NoSQL/OS/LDAP) | A05 | V1/V2 |
+| xss-ssti | XSS & Template Injection | A05 | V1/V3 |
+| auth-session | Authentication & Session | A07 | V6/V7/V9/V10 |
+| crypto | Cryptographic Failures | A04 | V11 |
+| deserialization | Insecure Deserialization & Integrity | A08 | V2/V15 |
+| path-file | Path Traversal & File Handling | A01 | V5 |
+| secrets | Hardcoded Secrets & Credentials | A02 | V14 |
+| misconfig | Security Misconfiguration | A02 | V13 |
+| supply-chain | Software Supply Chain & CI/CD | A03 | V15 |
+| logging-errors | Logging, Error & Exception Handling | A09/A10 | V16 |
+| dos-redos | Denial of Service & ReDoS | A06 | V2 |
+| csrf-cors | CSRF, CORS & Clickjacking | A01 | V3 |
+
+Insecure Design (A06) is cross-cutting and handled in recon/synthesis, not a
+grep-able finder.
+
+## Data contracts
+
+### Finding (finders + deep review)
+`id` · `title` · `vuln_class` · `owasp` (A0x:2025) · `cwe` · `asvs` ·
+`severity` (critical|high|medium|low|info) · `status` (confirmed|likely|triage) ·
+`confidence` (low|medium|high) · `file` · `line` · `end_line` ·
+`code_excerpt` · `source` (untrusted origin) · `sink` (dangerous op) ·
+`data_flow` (source→sink, sanitizers noted) · `sanitizers_checked` (mitigations
+verified absent/ineffective — the FP guard) · `rationale` · `exploit_sketch` ·
+`dynamic_poc_plan` · `proposed_fix` (high-level direction of the change, not a
+patch — implementation is left to whoever takes the issue).
+
+After the pipeline, each finding is also stamped with `fp` (stable fingerprint =
+`djb2(vuln_class | file | sink)`, the cross-scan dedup key), `display_id`
+(`<slug>-<CLASS>-<fp4>`, provisional until the courier swaps in the GitHub issue
+number), `status`, `kept`, `reject_reason`, `verdicts`, and `repro`.
+
+### Verdict (adversarial verify)
+`finding_id` · `lens` · `refuted` (bool) · `confidence` · `reasoning`.
+
+### Repro (dynamic verify)
+`finding_id` · `reproduced` (bool) · `method`
+(live-exploit|unit-test|build-only|static-poc) · `environment` ·
+`setup_commands` · `poc` · `observed` (evidence) · `impact` · `notes`.
+
+## Severity model (exploitability × impact)
+
+- **Critical** — remote, unauth → RCE / full data breach / auth bypass; reachable.
+- **High** — low barrier (authenticated or realistic conditions); significant
+  impact (priv-esc, sensitive data, injection with a real sink).
+- **Medium** — unusual conditions or limited impact, or partial mitigations.
+- **Low** — minor info leak, defense-in-depth gap, hard to exploit.
+- **Info** — hygiene/posture, no direct exploit path.
+
+`status` is orthogonal and drives report placement: **confirmed** (dynamically
+reproduced or statically proven + survived verify), **likely** (strong proof, no
+live repro), **triage** (unverified / split verdicts). Only confirmed+likely go
+in the report body; triage goes to an appendix.
+
+## Signal discipline (the anti-noise contract)
+
+The report is for senior engineers. Stay high-signal — enforced in deep review
+and verify:
+
+- Report only issues with a **reachable** path from untrusted input to a
+  dangerous sink. Check for sanitizers/validators/authz on the path first; if
+  present and effective, drop it.
+- No style/lint nits. No generic "defense-in-depth" without a concrete sink. No
+  unreachable/dead code.
+- Posture/process items (missing SECURITY.md, SBOM, license, maintainership) →
+  Info appendix only, never the body.
+- Dedup: one finding per root cause, list N locations.
+- Prefer few proven findings over many speculative ones. Every High+ finding
+  carries a PoC or an explicit source→sink trace.
+
+## Layout
+
+```
+.claude-plugin/plugin.json           # plugin manifest (name: security)
+skills/audit/SKILL.md                 # agent-facing orchestrator (/security:audit)
+workflows/vuln-audit.js              # the Workflow script (the engine)
+prompts/recon.md                     # phase-1 recon prompt
+prompts/finder.md                     # shared finder method (per-class context injected by the workflow)
+prompts/playbook.md                   # shared build/run/exploit repro playbook (stack-agnostic)
+prompts/report-template.md           # the report format (phase 7)
+docs/issue-tracking.md               # output bundle → GitHub issues + naming rules
+```
+(The output bundle is written to a writable `outDir`, NOT into the plugin root,
+which is read-only/ephemeral.)
+
+Schemas live inline in the workflow (the JS sandbox has no filesystem access at
+runtime); prose content lives in `prompts/` so it is editable without touching
+the script, and is passed into the workflow via `args`.
+
+## Output bundle (VM → courier handoff)
+
+The scan runs on a VM and emits a self-contained **bundle** at
+`reports/<slug>/`; a separate "courier" agent SSHes in, fetches it, and files the
+issues (the courier holds the only GitHub creds — the VM holds none). Bundle:
+
+- `report.md` — the human report (findings referenced by `display_id`).
+- `findings.json` — the structured findings array, verbatim; the machine
+  interface the courier reconciles against, **keyed by `fp`**.
+- `manifest.json` — `{ tool, schema, repo (owner/repo), target_path, ref,
+  commit, slug, date, dynamic, classes_assessed, counts }`; `repo` tells the
+  courier where to file.
+- `evidence/` — optional captured PoC output (repro evidence also lives inline
+  in `findings.json`).
+
+**Issue tracking & the vulnerability ID/naming rules** (scan epic → finding
+sub-issues, reconcile by `fp`, `display_id` = `<slug>-<CLASS>-<issue#>`, the
+courier emitter, and what each host needs) live in
+[`docs/issue-tracking.md`](docs/issue-tracking.md) — the portable source of truth
+that travels with the repo.
+
+## Runtime notes (gotchas)
+
+- **`args` arrives as a JSON string.** The Workflow runtime delivers the `args`
+  payload to the script as a JSON *string*, not a parsed object (verified
+  empirically). `vuln-audit.js` normalizes it (`typeof args === 'string' ?
+  JSON.parse(args) : args`) before reading any input — do not remove this.
+- **Invoke by `scriptPath`, not `name`, mid-session.** Named-workflow discovery
+  only registers files that existed at session start.
+- **Subagents have full tools** (Read/Grep/Bash/Write/ast-grep, and web via
+  ToolSearch) and operate on the *target*; only the orchestration JS is
+  sandboxed. Dynamic repro creates its own `git worktree` of the target — the
+  `isolation:'worktree'` option is about the tool repo and is not used here.
+- **Host adaptivity:** pass `hostNotes` so recon picks a runnable strategy
+  (docker vs native) the host can actually execute.
diff --git a/plugins/security/README.md b/plugins/security/README.md
new file mode 100644
index 0000000..458961a
--- /dev/null
+++ b/plugins/security/README.md
@@ -0,0 +1,85 @@
+# security (vulnerability audit)
+
+A white-box, **dynamically-verified** security-audit plugin for internal
+pentests. `/security:audit` points at a repo you own, recons it, hunts
+vulnerabilities across the OWASP Top 10:2025 classes, **proves them with live
+PoCs in isolated git worktrees**, and writes a terse, senior-engineer report —
+proven findings with a high-level proposed fix, not speculative noise.
+
+## Install
+
+```
+/plugin marketplace add 84codes/claude-plugins
+/plugin install security@84codes
+```
+
+Then run `/reload-plugins` if the command doesn't appear.
+
+## Usage
+
+```
+/security:audit /abs/path/to/target-repo
+/security:audit /abs/path/to/target-repo --no-dynamic
+/security:audit /abs/path/to/target-repo --classes injection,ssrf,access-control --ref v1.2.0
+/security:audit /abs/path/to/target-repo --out /abs/writable/dir
+```
+
+The first argument is the path to the target repo (required). The flags:
+
+| Flag | Meaning |
+|------|---------|
+| `--no-dynamic` | Skip the build/run/PoC phase — static review + adversarial verify only. |
+| `--classes` | Comma-separated vuln-class keys to restrict the audit to (e.g. `injection,ssrf,access-control`; see [`AGENTS.md`](AGENTS.md) for the full taxonomy). Default: classes picked by recon. |
+| `--ref` | Git ref to audit. Default: `HEAD`. |
+| `--out` | Writable directory for the output bundle. Default: `<cwd>/vuln-audit-reports`. |
+
+The output **bundle** is written to `<out>/<slug>/`: `report.md` +
+`findings.json` + `manifest.json`.
+
+## How it works
+
+```
+recon → triage → consolidate → deep review → adversarial verify → dynamic PoC → report
+```
+
+| Phase | Purpose |
+|-------|---------|
+| Recon | Detect stack, map attack surface, pick relevant vuln classes + run strategy. |
+| Triage | One finder agent per relevant class emits candidates. |
+| Consolidate | Dedup by root cause, assign IDs, drop low-signal noise. |
+| Deep review | Confirm a reachable source→sink path with no mitigation. |
+| Adversarial verify | Independent skeptics try to refute each finding; majority kills it. |
+| Dynamic PoC | Build + run the target in an isolated worktree; fire a real exploit. |
+| Report | Senior-engineer report: severity-first, reference-backed, PoC-evidenced. |
+
+## Requirements
+
+- `git` (target must be a git repo for worktree isolation + the live-PoC phase).
+- `docker` for dynamic verification (works via `sudo` if the daemon needs it);
+  otherwise repro falls back to unit-test/static PoCs (`--no-dynamic` skips it).
+- No security scanners required — the tool is LLM-native and uses
+  `semgrep`/`gitleaks`/`trivy` only opportunistically if present.
+
+## Output & issue tracking
+
+Findings carry a stable fingerprint (`fp`) and a `display_id`
+(`<slug>-<CLASS>-<n>`). The bundle is designed to be filed to GitHub issues by a
+separate courier step (scan epic + per-finding sub-issues for Critical/High/
+Medium, reconciled by `fp`). See [`docs/issue-tracking.md`](docs/issue-tracking.md).
+
+## Design
+
+Full pipeline spec, vuln-class taxonomy (OWASP 2025 + CWE + ASVS), data
+contracts, and the signal-discipline policy are in
+[`AGENTS.md`](AGENTS.md).
+
+## Safety & scope
+
+Authorized testing only — audit repositories you own or are explicitly cleared
+to test. All PoC traffic is contained to local processes/containers; the tool
+never fires exploits at external hosts, uses real credentials, or exfiltrates
+data.
+
+## License
+
+MIT
diff --git a/plugins/security/docs/issue-tracking.md b/plugins/security/docs/issue-tracking.md
new file mode 100644
index 0000000..36e972b
--- /dev/null
+++ b/plugins/security/docs/issue-tracking.md
@@ -0,0 +1,118 @@
+# Output handling — findings → GitHub issues
+
+How a scan's findings become tracked, fixable, closeable GitHub issues. This is
+the source of truth for the **vulnerability ID / naming rules** and the
+scan→courier→GitHub pipeline. (Design locked 2026-06-02.)
+
+## Topology: scanner VM + courier
+
+Scans run on a **VM**; a separate **courier** agent SSHes in, fetches the scan's
+output, and files it to GitHub. The two run on different hosts on purpose:
+
+- The **VM** runs `/security:audit`, handles untrusted code and working exploits, and
+  holds **no GitHub credentials**.
+- The **courier** holds the only GitHub creds, fetches the bundle read-only over
+  SSH, and creates/updates issues. It is a *pure function of the bundle* — it
+  needs no access to the target source or the VM's git state.
+
+## The bundle (the scan→courier interface)
+
+Each scan drops a self-contained bundle at `reports/<slug>/` on the VM:
+
+| File | Purpose |
+|------|---------|
+| `report.md` | Human report (findings headed by `display_id`). |
+| `findings.json` | Structured findings array, **verbatim**; the machine interface, **keyed by `fp`**. |
+| `manifest.json` | `{ tool, schema, repo (owner/repo), target_path, ref, commit, slug, date, dynamic, classes_assessed, counts }`. `repo` tells the courier where to file. |
+| `evidence/` | Optional captured PoC output (repro evidence also lives inline in `findings.json`). |
+
+## Vulnerability ID / naming rules
+
+- **Fingerprint** `fp = djb2(vuln_class | file | sink)` (lowercased; line number
+  excluded to reduce churn). This is the **stable, cross-scan dedup key** — same
+  bug → same `fp`, computed identically on the VM and the courier with no shared
+  state. Stored on each issue as a `fp:<hash>` label.
+- **Display ID** `<slug>-<CLASS>-<n>` — e.g. `training-tool-AC-42`. `<slug>` is the
+  repo name, `<CLASS>` the short class code (AC, SSRF, INJ, XSS, AUTH, CRYPTO,
+  DESER, PATH, SEC, MISC, SUPPLY, LOG, DOS, CSRF), and **`<n>` is the GitHub issue
+  number**. So `training-tool-AC-42` *is* `84codes/training-tool#42` — one number,
+  both meanings, permanent (GitHub never reuses issue numbers).
+- **Provisional form** `<slug>-<CLASS>-<fp4>` (first 4 hex of `fp`, e.g.
+  `training-tool-AC-b4a0`) — used in the VM-side `report.md` *before* an issue
+  exists. The courier stamps the final `-<issue#>` ID into the issue at filing;
+  `fp` is the glue linking the two forms.
+- Numbers are **not contiguous per class** (GitHub shares the counter with PRs and
+  other issues) — that is fine; the class prefix carries the meaning.
+
+## Issue model
+
+- **Scan issue** (epic), one per run: holds the report (as a comment, see
+  **Report comment** below) + general comments; closes when all its finding
+  sub-issues close.
+- **Finding sub-issue**, one per **Critical / High / Medium** (confirmed+likely).
+  **Low/Info stay in the report appendix — never issues** (same high-signal
+  contract as the report).
+- **Title:** `[Critical] training-tool-AC-42: <short title> (access-control)`.
+- **Body:** the report's finding block (refs · location · PoC · impact ·
+  proposed fix) + backlink to the scan issue + the `fp` marker.
+- **Labels:** `security`, `security-scan` (epic), and `fp:<hash>` (the dedup
+  key). Severity and class aren't labels — they live in the title
+  (`[Critical] … (access-control)`) and the display ID, so a `sev:`/`vuln:`
+  label would just duplicate that text.
+- **Two distinct "statuses":** *verification* (confirmed/likely — a scan output,
+  carried as the finding's badge in the title/body) vs *lifecycle* (open/fixed —
+  owned entirely by the GitHub issue). The **report has no status table**; the
+  scan epic and its sub-issues are the live status.
+- **PoC handling:** repos are private/internal, so full PoC commands go in the
+  issues (the remediation is a high-level *proposed fix*, not a patch). (If a target were public, use GitHub Security Advisories for
+  Critical/High instead.)
+- **Report comment:** the full `report.md` is **embedded** in a comment on the
+  scan epic, wrapped in a `<details><summary>…</summary>` block (collapsed by
+  default) so the long report never buries the epic's sub-issue checklist. Always
+  embed the report text itself — **never** reference a local bundle path
+  (`reports/<slug>/…`) or any filesystem location, which is unreachable from
+  GitHub. The epic body points readers to this comment, not to disk.
+
+## Reconcile algorithm (idempotent, keyed by `fp`)
+
+For each Critical/High/Medium finding in `findings.json`, look up existing issues
+by the `fp:<hash>` label (`gh issue list --search "label:fp:<fp>" --state all`):
+
+- **no match** → create the finding issue, link it under the scan epic.
+- **open match** → comment "still present in scan `<id>`" (no duplicate).
+- **closed match that still reproduces** → reopen as a regression + comment.
+- **previously open, now absent / not reproduced** (dynamic re-verify) → comment +
+  close.
+
+The **report comment** on the epic is upserted the same way: tag it with a
+hidden marker (`<!-- vuln-audit:report -->`), then find-and-edit that comment
+on re-run instead of posting a new one — so the epic never accumulates
+duplicate report blocks.
+
+Re-running the courier on the same bundle is a no-op. The dynamic-repro phase
+doubles as the fix-verifier, so "everything closed when done" is provable, not
+manual.
+
+## Close loop
+
+Fix PRs use `Fixes #N` to auto-close the finding issue on merge; the next scan
+confirms via dynamic re-verify. When all finding sub-issues are closed, the scan
+epic closes.
+
+## Build status
+
+1. **Done (2026-06-02)** — the workflow emits the bundle and stamps `fp` +
+   provisional `display_id`. See `workflows/vuln-audit.js`.
+2. **Not built yet** — the `/security:track <bundle-dir>` courier skill +
+   `gh` emitter. Blocked on `gh` being installed + authed on the courier host.
+3. **Always gated** — creating real issues on a repo needs an explicit go-ahead.
+
+## What each host needs
+
+| Host | Role | Requirements |
+|------|------|--------------|
+| **VM (scanner)** | runs `/security:audit`, produces the bundle | Claude Code · this repo · `git` · `docker` · **no `gh`, no GitHub creds** |
+| **Courier** | fetches bundle, files issues | Claude Code · this repo (for `/security:track`) · **`gh` + `gh auth login`** (token: Issues read/write) · **SSH key to the VM** (`ssh`/`rsync`) · `jq` (optional) |
+
+Sub-issue linking uses GitHub's GraphQL API, which `gh api graphql` covers — no
+extra tooling.
diff --git a/plugins/security/prompts/finder.md b/plugins/security/prompts/finder.md
new file mode 100644
index 0000000..9dab008
--- /dev/null
+++ b/plugins/security/prompts/finder.md
@@ -0,0 +1,76 @@
+<!--
+FINDER METHOD — Phase 2 of vuln-audit. One fresh-context auditor runs this per
+vuln class (the workflow injects the class + its OWASP/CWE/ASVS + a focus hint).
+Read AGENTS.md for the data contracts, severity model, and the binding
+signal-discipline policy. You hunt ONE class; emit finding objects. Read-only.
+-->
+
+# Finder — method (one vuln class per run)
+
+The workflow tells you which class to hunt and gives its OWASP/CWE/ASVS mapping
+and a one-line focus. You know this class well — apply that knowledge; the focus
+hint scopes it, it is not an exhaustive checklist.
+
+## 1. Taint model (the only thing that makes a finding)
+
+A finding is a REACHABLE path from an untrusted SOURCE to a dangerous SINK with
+NO effective control on the path. Miss any of the three and it is not a finding.
+
+- SOURCE — untrusted input: HTTP query/body/path/header/cookie, JSON/multipart
+  fields & filenames, GraphQL args, queue/webhook payloads, parsed file
+  contents, and DB rows that were originally user-set (second-order). When in
+  doubt, treat input as untrusted until a boundary proves otherwise.
+- SINK — the dangerous operation for this class (the interpreter, renderer,
+  deserializer, file op, outbound call, authz decision, crypto primitive, ...).
+- PATH — the source must actually reach the sink at runtime given routing, auth
+  guards, and feature flags. Dead/unreachable code is not a finding.
+
+## 2. How to hunt
+
+1. Start from recon's prioritized surfaces for this class, then widen.
+2. Grep/ast-grep for this class's sinks; for each hit, trace backward to a
+   source and forward through any control. Read the surrounding code and callers,
+   not just the matched line.
+3. Use your own knowledge of the language/framework for the exact sink and safe
+   APIs — do not assume any list is complete. A sink you know but isn't named
+   anywhere is still a sink.
+
+## 3. False-positive guard (check BEFORE flagging)
+
+Before emitting, prove the control on the path is absent or ineffective. A
+finding survives only if there is no effective:
+
+- parameterization / prepared statement / structural builder (injection),
+- context-correct output encoding / auto-escaping (xss),
+- canonicalize-then-confined-root check (path),
+- allowlist / typed cast that drops dangerous values,
+- authn/authz/ownership check on the route (access-control, csrf),
+- safe deserializer / signature+integrity verification (deserialization),
+- destination allowlist + no-redirect + internal-range block (ssrf).
+
+A control that exists but is bypassable (denylist instead of allowlist, wrong
+context, escaping that misses an encoding, a cast that silently coerces) is NOT
+a mitigation — flag it and name the exact bypass. Record what you checked in
+`sanitizers_checked`; that field is the FP guard made explicit. Posture/process
+items, style nits, and defense-in-depth without a concrete sink are not findings
+(see AGENTS.md signal discipline).
+
+## 4. Severity & status
+
+Score per the AGENTS.md severity model (exploitability x impact): Critical =
+remote unauth high-impact reachable; down to Info = no direct exploit path.
+Set `status`: `likely` for a proven static source->sink trace, `confirmed` only
+after dynamic repro, `triage` if reachability or source is uncertain.
+
+## 5. Emit
+
+Return `{findings:[...]}` (or `{findings:[]}` if nothing real). One object per
+distinct root cause — dedup call sites into `locations[]`, note extras in
+`rationale`. The output schema is enforced by the workflow; fill it accurately.
+Set `owasp`/`cwe`/`asvs` from the class context the workflow gave you (pick the
+most specific CWE for the actual bug). `source`, `sink`, `data_flow`, and
+`sanitizers_checked` must be concrete and true — `data_flow` traces variables
+source->sink and states why no control stops it; `sanitizers_checked` names each
+control checked and why it is absent or bypassable. Include an `exploit_sketch`
+and a `dynamic_poc_plan` (the oracle that would prove it on a running instance),
+and a high-level `proposed_fix` (the direction of the change, not a patch).
diff --git a/plugins/security/prompts/playbook.md b/plugins/security/prompts/playbook.md
new file mode 100644
index 0000000..e176659
--- /dev/null
+++ b/plugins/security/prompts/playbook.md
@@ -0,0 +1,110 @@
+<!--
+REPRO PLAYBOOK — Phase 6 of vuln-audit. One agent per surviving finding builds,
+runs, and exploits the target to prove it with a real PoC. Stack-agnostic: recon
+gives you the stack, frameworks, run command, and port; you supply the
+language-specific build/run details. Docker-first. Keep ALL traffic local — no
+external hosts, no real credentials, no data exfiltration.
+-->
+
+# Repro playbook — build, run, prove (one finding)
+
+Conventions (substitute per finding so parallel repros never collide):
+
+- `FID` — the finding id; use it to make every name/port unique.
+- `WT=/tmp/va-$FID` — isolated git worktree. `IMG=va-$FID:repro` — image tag.
+  `CN=va-$FID` — container name. `PORT` — a free ephemeral host port.
+- The result must set `method` to one of:
+  `live-exploit | unit-test | build-only | static-poc`.
+
+## 1. Isolate
+
+Never touch the original tree. Create a throwaway worktree at the audited ref:
+
+```sh
+git -C <target> worktree add --detach /tmp/va-$FID "$REF"   # $REF default HEAD
+cd /tmp/va-$FID
+```
+
+If `<target>` is not a git repo (rare), `cp -a <target> /tmp/va-$FID` and note
+it. All build/run steps run from `WT`.
+
+## 2. Build & run (docker-first)
+
+Use `recon.run_strategy`, `recon.stack`, and recon's boot notes (run command,
+port, prerequisite services) as your starting point — don't re-derive what recon
+already found.
+
+1. Repo ships Docker → prefer it; it usually wires up DB/env/migrations:
+   `docker compose -p va-$FID up -d --build`, else `docker build -t $IMG .`.
+2. No Dockerfile → write a minimal one for the detected stack: a recent stable
+   base image for the language, install the build deps the native packages need,
+   restore dependencies from the lockfile EXACTLY (never upgrade — that changes
+   the audited dependency set), then run the app's own start command on
+   `0.0.0.0`.
+3. Can't containerize → run natively if the host has the runtime (see HOST
+   CONSTRAINTS passed by the workflow).
+
+Run detached, bound to loopback only, on a finding-keyed port:
+
+```sh
+PORT=$(python3 -c 'import socket;s=socket.socket();s.bind(("",0));print(s.getsockname()[1]);s.close()')
+docker run -d --name $CN -p 127.0.0.1:$PORT:<app-port> $IMG <start-command>
+```
+
+Bind the host port to `127.0.0.1` so the app is never exposed off-box. Poll for
+health, don't sleep blindly; on failure inspect `docker logs --tail 50 $CN`.
+
+## 3. Seed (only what the PoC needs)
+
+Create the minimum synthetic state — a throwaway user, a row, an auth session —
+using the app's own endpoints/console. Use only fake, local-only credentials;
+never reuse real secrets from the repo beyond what's strictly required to boot.
+
+## 4. Fire the PoC safely
+
+Send the exploit to the LOCAL instance only and capture concrete evidence.
+Tailor the oracle to the finding's source->sink path:
+
+- Injection — error/boolean/time oracle (unbalanced quote, `1=1` vs `1=2`,
+  `pg_sleep`/`SLEEP`), or an OS-command marker (`; sleep 5`, `| id`).
+- Path traversal / file read — pull a host file the app should never serve
+  (`?file=../../../../etc/passwd`).
+- SSRF — point at a CONTAINER-LOCAL canary listener you start, never a real host.
+- Deserialization / RCE — prove exec with a benign in-container side effect
+  (touch a sentinel file), then read it back; never run destructive commands.
+- XSS — confirm the payload is reflected unescaped in the response context.
+- Auth/access-control — perform the action as the wrong (or no) principal and
+  show it succeeds.
+
+Record for the result: the exact request (-> `poc`), the response/log line
+proving impact (leaked row, file contents, sentinel, reflected script, 500 with
+stack -> `observed`), and what it means for the target (-> `impact`). Set
+`reproduced: true`, `method: live-exploit`.
+
+Safety invariants (non-negotiable): traffic stays on `127.0.0.1` / inside `$CN`;
+no outbound connections; no real data; side effects are benign sentinels only.
+
+## 5. Teardown (always, even on failure)
+
+```sh
+docker rm -f $CN 2>/dev/null
+docker compose -p va-$FID down -v 2>/dev/null
+docker image rm -f $IMG 2>/dev/null
+cd /                                   # leave the worktree before removing it
+git -C <target> worktree remove --force /tmp/va-$FID
+git -C <target> worktree prune
+```
+
+## 6. Fallbacks
+
+If a live exploit isn't achievable, downgrade deliberately and set `method`;
+never claim `reproduced: true` without observed runtime evidence.
+
+1. Builds but won't serve (library, or boot blocked) → drive the vulnerable API
+   directly from a focused unit test in the container. `method: unit-test`.
+2. Image builds but the app can't start (missing DB/config) → record that deps
+   install and the vulnerable code is present and reachable, with the
+   source->sink trace as evidence. `method: build-only`.
+3. Can't build at all (toolchain/network blocked) → construct a static PoC: the
+   exact crafted input plus the line-referenced source->sink path showing why it
+   triggers. `method: static-poc`, `reproduced: false`.
diff --git a/plugins/security/prompts/recon.md b/plugins/security/prompts/recon.md
new file mode 100644
index 0000000..97d0a48
--- /dev/null
+++ b/plugins/security/prompts/recon.md
@@ -0,0 +1,195 @@
+<!--
+RECON PROMPT — PHASE 1 of vuln-audit. You are a single fresh-context agent that
+runs BEFORE any finder. Your job is reconnaissance only: detect the stack, map
+the attack surface and trust boundaries, decide which finder classes are worth
+running and which to skip, pick a dynamic-verification strategy, and emit ONE
+structured recon summary the workflow forwards to every later phase. You do NOT
+report vulnerabilities here — you scope the hunt. Read AGENTS.md for the data
+contracts, taxonomy, and the binding signal-discipline policy. Read-only: do not
+mutate the target.
+-->
+
+# Recon — Phase 1 (stack, surface, scope, run strategy)
+
+Work the steps in order. Each step's output feeds the recon summary in step 7.
+Be fast and broad first, then precise. When a step is ambiguous, prefer the
+reading that EXPANDS attack surface (assume input is untrusted until proven
+otherwise) but NARROWS finder selection (skip a class only when you can justify
+it). Cite concrete file paths and line numbers for every claim — recon that
+points later phases at real code is worth ten of generic prose.
+
+## 1. Detect stack, frameworks, and build/run system
+
+Identify the primary language(s), frameworks, and how the target builds and runs.
+Record ONE normalized `stack` label (a hint for Phase 6 repro and the report):
+
+`crystal · ruby · node · python · go · php · java-jvm · rust · generic-docker · ci-iac`
+
+Detection signals (read-only; do not install anything):
+
+- **Manifests / lockfiles** — the ground truth for language + package manager:
+  - crystal: `shard.yml`, `shard.lock`
+  - ruby: `Gemfile`, `*.gemspec`, `Gemfile.lock`
+  - node: `package.json` (+ `package-lock.json`/`pnpm-lock.yaml`/`yarn.lock`), `tsconfig.json`
+  - python: `pyproject.toml`, `requirements*.txt`, `Pipfile`, `setup.py`, `poetry.lock`
+  - go: `go.mod`, `go.sum`
+  - php: `composer.json`, `composer.lock`
+  - java-jvm: `pom.xml`, `build.gradle(.kts)`, `settings.gradle`, `*.jar`
+  - rust: `Cargo.toml`, `Cargo.lock`
+- **Build/run tells** — `Dockerfile`, `docker-compose*.yml`, `Procfile`, `Makefile`,
+  `Taskfile.yml`, `bin/`, framework CLIs, and the manifest's scripts/tasks.
+- **Framework** — read deps + entry imports: web (Rails/Sinatra/Lucky/Kemal,
+  Express/Nest/Next/Fastify, Django/Flask/FastAPI, Gin/Echo/chi/net-http,
+  Laravel/Symfony/Slim, Spring/Quarkus/Micronaut, actix/axum/rocket), plus
+  ORMs, template engines, queue/worker libs, and serializers — note each, they
+  steer finder selection.
+
+**Stack label decision:**
+
+- A single dominant app language → that language label.
+- **Polyglot:** label the language that owns the primary attack surface (the
+  network-facing app), note the others in `notes`. A thin shell of one language
+  around a core of another → label the core.
+- No buildable app, just a `Dockerfile`/compose stack to run → `generic-docker`.
+- The repo's PRIMARY artifact is CI/CD pipelines or IaC (GitHub Actions/GitLab
+  CI/Forgejo workflows, Terraform/Pulumi/CloudFormation, k8s/Helm, Ansible) with
+  no app to run → `ci-iac`. (Note: an app repo that ALSO has workflows labels on
+  the app language; `ci-iac` is for infra-/pipeline-primary repos.)
+
+Record `stack` (the label) and `frameworks` (list).
+
+## 2. Map the attack surface and trust boundaries
+
+Enumerate every place untrusted input crosses into the system, and the dangerous
+sinks it could reach. For each, capture `file:line`, the kind, and the untrusted
+source. This is the map every finder navigates — be exhaustive on surface,
+precise on location.
+
+- **HTTP routes/handlers** — every route table, controller, middleware, and
+  handler. Capture method, path, auth requirement, and which params/body/headers
+  flow in. Note dynamic/wildcard routes and catch-alls.
+- **CLIs / entrypoints** — `main`/`bin`, argv parsing, subcommands, scripts run
+  with attacker-influenced args or stdin.
+- **Message/queue consumers** — AMQP/Kafka/SQS/Redis/NATS/cron/webhook handlers;
+  the payload is untrusted input.
+- **Deserialization points** — `JSON.parse`/`Marshal`/`pickle`/`yaml.load`/
+  `ObjectInputStream`/`unserialize`/`serde`/MessagePack/protobuf over untrusted
+  bytes; framework auto-binding/mass-assignment.
+- **File/path operations** — reads/writes/joins/globs/zips/uploads/temp files
+  where any path segment is caller-controlled (traversal, symlink, zip-slip).
+- **Outbound network calls** — every server-side HTTP/DB/SMTP/DNS/socket call
+  whose destination or content can be influenced by a caller (SSRF surface).
+- **Auth/authz boundaries** — login, session/token issuance & validation,
+  role/permission checks, tenant isolation, the line between
+  unauthenticated/authenticated/admin. Mark which routes sit on which side.
+- **Secrets/config loading** — env vars, config files, secret managers, key
+  material, connection strings; note defaults and committed values.
+- **Template/HTML rendering** — server-rendered views, string-built HTML, SSTI-
+  capable engines, `dangerouslySetInnerHTML`/`html_safe`/`|safe`/`v-html`.
+- **Trust boundaries** — draw the line for each: where does data go from
+  trusted→untrusted or low-priv→high-priv? An input is only interesting if it
+  reaches a sink ACROSS a boundary.
+
+For each surface entry note any **sanitizer/validator/authz/parameterization**
+already on the path — the FP guard. A sink fronted by an effective control is
+not a lead; record it so later phases don't re-chase it.
+
+## 3. Select relevant finder classes (and justify skips)
+
+For each of the 14 classes, decide RELEVANT or SKIPPED based on the surface from
+step 2. The 14 classes (the workflow injects each one's full context downstream):
+
+`access-control · ssrf · injection · xss-ssti · auth-session · crypto ·
+deserialization · path-file · secrets · misconfig · supply-chain ·
+logging-errors · dos-redos · csrf-cors`
+
+A class is RELEVANT when its source AND its sink both exist in the surface map —
+e.g. ssrf needs a caller-influenced outbound call; injection needs untrusted
+input reaching a SQL/NoSQL/OS/LDAP/XPath interpreter; xss-ssti needs HTML/template
+rendering of untrusted data; auth-session needs the app to issue/validate
+sessions or tokens; deserialization needs untrusted bytes hitting a deserializer.
+Apply the same source-and-sink test to the rest. `secrets` and `misconfig` are
+near-always worth a quick pass. For `supply-chain`, only code-exploitable CI/CD
+and dependency issues count (per AGENTS.md) — posture/SBOM/maintainership is not.
+
+Output two lists. For every RELEVANT class, add a one-line **priority pointer**:
+the specific surfaces/files from step 2 that finder should hit first. For every
+SKIPPED class, add a one-line **justification** (why no reachable source→sink).
+Default to RELEVANT when uncertain — skipping is a claim you must back.
+
+## 4. Decide the dynamic-verification strategy
+
+Determine how Phase 6 will reproduce findings. Docker-first.
+
+- **Runnable?** Check for `Dockerfile`/`docker-compose*.yml` first (preferred,
+  hermetic), then a native run path (manifest scripts, `Procfile`, `Makefile`
+  targets, framework server command).
+- **Entry command + port** — the exact command that starts the app and the port
+  it binds (read it from config/compose/scripts, don't guess; note env vars and
+  dependent services — DB/cache/queue — needed to boot).
+- **Health check** — how to know it's up (a route, a log line, a port listen).
+- **Not runnable** (library, no server, missing deps, infra-only) → repro falls
+  back to a focused **unit-test** that drives the sink, or a **static PoC** /
+  build-only proof. Say which and why.
+
+Record `run_strategy` as one of:
+`docker-compose | docker | native | unit-test | static-poc`,
+plus the entry command, port, and any boot prerequisites in `notes`.
+
+## 5. Fold in target threat-model guidance
+
+Check for `<target>/.claude/claude-security-guidance.md`. If present, read it and
+fold its threat model into scope: crown-jewel assets, known trust boundaries,
+in/out-of-scope paths, prior findings, and any class-specific guidance. Let it
+RAISE priority and tighten scope; it does NOT lower the signal bar. Summarize the
+relevant points in `notes` and reflect any scope/priority changes in steps 2–4.
+If absent, note that and proceed with defaults.
+
+## 6. Signal discipline (binding — carry it into every later phase)
+
+Recon's selections directly gate noise. Enforce the AGENTS.md contract:
+
+- A class is RELEVANT only when there is a plausible REACHABLE path from
+  untrusted input to a dangerous sink with no effective control already on it.
+  No class earns a slot on defense-in-depth grounds alone.
+- No posture/process items (missing SECURITY.md, SBOM, license, maintainership).
+  These never gate a finder; at most they land in the report's Info appendix.
+- No style/lint nits, no unreachable/dead code, no speculative surfaces.
+- Prefer a tight scope that proves a few real issues over a broad scope that
+  drowns them. When you skip a class, you are asserting there is no reachable
+  source→sink — make that assertion only when the surface map backs it.
+
+## 7. Emit the recon summary
+
+Output exactly ONE structured object (this is the phase deliverable; later
+phases consume it). Shape:
+
+```json
+{
+  "stack": "<one normalized stack label>",
+  "frameworks": ["<framework/orm/template/queue lib>", "..."],
+  "run_strategy": "docker-compose | docker | native | unit-test | static-poc",
+  "entrypoints": [
+    { "kind": "http|cli|queue|cron|webhook", "ref": "file:line",
+      "detail": "GET /x | subcommand | consumer", "auth": "none|user|admin" }
+  ],
+  "attack_surface": [
+    { "kind": "route|cli|consumer|deser|file|outbound|authz|secret|template",
+      "ref": "file:line", "source": "<untrusted origin>",
+      "sink": "<dangerous op>", "existing_control": "<sanitizer/authz or null>",
+      "classes": ["<finder keys this surface feeds>"] }
+  ],
+  "relevant_classes": [
+    { "class": "<key>", "priority_surfaces": ["file:line", "..."] }
+  ],
+  "skipped_classes": [
+    { "class": "<key>", "reason": "<why no reachable source->sink>" }
+  ],
+  "notes": "run command + port + boot prereqs; polyglot/key rationale; target security-guidance points; blind spots/auth-gated areas; anything Phase 6 needs to boot the app"
+}
+```
+
+Rules for the object: `stack` is exactly one normalized label; `relevant_classes` +
+`skipped_classes` together cover all 14, no overlap; every `attack_surface` entry
+has a real `file:line`; `entrypoints` is the subset of surfaces where untrusted
+input first enters. Keep `notes` operational — it is the bridge to Phase 6.
diff --git a/plugins/security/prompts/report-template.md b/plugins/security/prompts/report-template.md
new file mode 100644
index 0000000..a39b2e8
--- /dev/null
+++ b/plugins/security/prompts/report-template.md
@@ -0,0 +1,94 @@
+<!--
+AGENT GUIDANCE — read, do not emit this comment block.
+
+You are writing the final audit report for SENIOR ENGINEERS who understand
+security. Optimize for signal and brevity.
+
+Rules:
+- Lead with the worst. Sort findings by severity desc, then status.
+- Body = Critical/High/Medium that are `confirmed` or `likely` ONLY.
+  Low/Info and `triage` candidates go in the appendix. Never bury a Critical
+  under a pile of nits.
+- Per finding: 2–4 sentences of prose, max. Assume the reader knows what SSRF
+  is. Explain THIS instance, not the vuln class.
+- References do the heavy lifting instead of prose: cite terse linked IDs
+  (CWE-89, A05:2025, ASVS V1.2.4). One line of refs, not a paragraph.
+- Evidence is the point. Every confirmed finding shows the PoC command and the
+  observed result. A finding without evidence or a source→sink trace does not
+  belong in the body — move it to triage.
+- Proposed fix = the high-level DIRECTION of the change (what must change and
+  why), 1–2 sentences. NOT a diff, exact code, or step-by-step patch — the actual
+  implementation is the next human/agent's job.
+- If there are zero Critical/High, say so plainly in the summary — that is a
+  good result, not a reason to inflate Mediums.
+- NO status/tally table. The GitHub scan issue and its sub-issues are the live
+  status (open/fixed); duplicating it here just goes stale. Weave the counts into
+  the summary prose ("one critical and one high, both confirmed"). Verification
+  status (confirmed/likely) stays as each finding's badge.
+- Render the commit SHA BARE — no backticks. The report lands in GitHub issues,
+  which auto-link a bare 7–40 char hex SHA to its commit page; backticks make it
+  inert code and kill the link. Same for any other bare commit hash you cite.
+- Omit empty sections.
+-->
+
+# Security Audit — {{target}} @ {{ref}}
+
+**Scope:** {{paths_in_scope}} · **Out of scope:** {{paths_excluded}}
+**Commit:** {{commit}} · **Date:** {{date}} · **Method:** static + dynamic (isolated worktree, live PoC) · **Tool:** vuln-audit {{version}}
+
+## Summary
+
+{{2–4 sentences: overall posture and the single most important thing to fix
+first. Name the dominant risk theme. Weave the counts into the prose (e.g. "one
+critical and one high, both confirmed; one medium") — no status table, the
+GitHub scan issue is the live status.}}
+
+## Findings
+
+<!-- One block per confirmed/likely Critical/High/Medium. Repeat. -->
+
+### [{{id}}] {{title}} · {{Severity}} · {{Confirmed|Likely}}
+
+**Class:** {{vuln_class}} · **Refs:** [{{CWE}}](https://cwe.mitre.org/data/definitions/{{n}}.html) · [{{A0x:2025}}](https://owasp.org/Top10/) · [ASVS {{Vx.y.z}}](https://github.com/OWASP/ASVS)
+**Location:** `{{file}}:{{line}}`{{ · +N other call sites}}
+
+{{2–4 sentences: the specific flaw, the untrusted source, the sink, and why the
+path is reachable (no effective sanitizer/authz). Senior audience — be direct.}}
+
+**PoC**
+```
+{{$ command that reproduced it}}
+{{observed output that proves impact}}
+```
+
+**Impact:** {{one line.}}
+**Proposed fix:** {{1–2 sentences — the high-level direction of the change needed
+and why (e.g. "resolve identity from a server-side session keyed by user id, not
+the client cookie"). NOT a diff or line-level patch — implementation is left to
+whoever picks up the issue.}}
+
+---
+
+## Lower severity (Medium)
+
+<!-- Confirmed/likely Mediums as one-liners. -->
+- `{{file:line}}` — {{one-line description}} — {{ref}} — **fix:** {{one-liner}}
+
+## Appendix
+
+### Low / Info
+| Location | Note | Ref |
+|----------|------|-----|
+| `{{file:line}}` | {{one line}} | {{ref}} |
+
+### Triage — not confirmed
+<!-- Candidates that did not survive verification or could not be reproduced.
+     Listed for transparency so reviewers can re-check; not asserted as bugs. -->
+- `{{file:line}}` — {{candidate}} — **why unconfirmed:** {{refuted by verify / could not reproduce / needs prod-like data}}
+
+### Coverage & method
+- **Classes assessed:** {{list}} · **Skipped (not applicable):** {{list}}
+- **ASVS chapters touched:** {{list}}
+- **Dynamic verification:** {{how the target was built/run; what was and wasn't reproducible and why}}
+- **Tools used:** {{semgrep/gitleaks/trivy if present, else "LLM-native"}}
+- **Blind spots:** {{anything not reachable by this audit — auth-gated areas, external services, etc.}}
diff --git a/plugins/security/skills/audit/SKILL.md b/plugins/security/skills/audit/SKILL.md
new file mode 100644
index 0000000..fe83f68
--- /dev/null
+++ b/plugins/security/skills/audit/SKILL.md
@@ -0,0 +1,84 @@
+---
+description: >-
+  Run a white-box, dynamically-verified security audit (internal pentest) of a
+  target code repository. Use when the user asks to audit/pentest a repo for
+  vulnerabilities, find security bugs with proof, or runs /security:audit.
+  Produces a terse, senior-engineer report of proven findings with live PoCs and
+  a high-level proposed fix per finding.
+---
+
+# security:audit
+
+Drives the bundled `vuln-audit` workflow: recon → triage → consolidate → deep
+review → adversarial verify → dynamic PoC → report. Design spec and data
+contracts are in `${CLAUDE_PLUGIN_ROOT}/AGENTS.md`; output handling / issue
+tracking in `${CLAUDE_PLUGIN_ROOT}/docs/issue-tracking.md`. Read the spec before
+changing anything.
+
+## Input
+
+```
+/security:audit <target-path> [--no-dynamic] [--classes a,b,c] [--ref <git-ref>] [--out <dir>]
+```
+
+`$ARGUMENTS` holds the target path and any flags.
+
+- `<target-path>` — absolute path to the repo to audit (required).
+- `--no-dynamic` — skip the build/run/PoC phase (static + adversarial verify only).
+- `--classes` — restrict to specific vuln-class keys (see `AGENTS.md` taxonomy).
+- `--ref` — git ref to audit (default `HEAD`).
+- `--out` — writable directory for the output bundle (default: `<cwd>/vuln-audit-reports`).
+
+## Steps
+
+1. **Parse `$ARGUMENTS`** into the target path + flags. The bundled tool root is
+   `${CLAUDE_PLUGIN_ROOT}` (expands to the plugin's install dir; it holds
+   `prompts/`, `workflows/`, and the docs — read-only).
+2. **Pick a writable `outDir`** — the plugin root is read-only/ephemeral, so the
+   bundle must go elsewhere. Use `--out` if given, else `<cwd>/vuln-audit-reports`
+   (absolute path). This is also where a courier later fetches the bundle from.
+3. **Validate the target** — confirm it exists and is a git repo
+   (`git -C <target> rev-parse --git-dir`). Worktree isolation and the live-PoC
+   phase need git. If it isn't a repo, warn and proceed with `--no-dynamic`.
+   Resolve the ref to a concrete commit with
+   `git -C <target> rev-parse --short <ref or HEAD>` so the run is pinned and
+   reproducible; carry both the ref name and the resolved SHA.
+4. **Preflight host capabilities** → assemble a `hostNotes` string: is `docker`
+   usable non-interactively (note if it needs `sudo`); which native runtimes are
+   present (`python3`, `node`, `ruby`, `go`, `crystal`, ...). If dynamic is on
+   but neither docker nor a usable native runtime exists, fall back to
+   `--no-dynamic` and say repro will be static/unit-test only.
+5. **Check target threat-model** — note whether
+   `<target>/.claude/claude-security-guidance.md` exists; recon folds it in.
+6. **Announce the run** — before invoking, print a one-line startup summary:
+   target name, the resolved commit (short SHA), and the absolute output
+   directory. Name the ref only when it isn't `HEAD` (e.g. `v1.2.0 a1b2c3d`);
+   for a plain `HEAD` run just show the SHA. Drop anything left at its default.
+7. **Invoke the workflow** (it runs in the background and notifies on completion):
+   ```
+   Workflow({ scriptPath: '${CLAUDE_PLUGIN_ROOT}/workflows/vuln-audit.js', args: {
+     toolRoot: '${CLAUDE_PLUGIN_ROOT}',
+     outDir: '<abs writable outDir>',
+     target: '<abs target-path>',
+     ref: '<ref or HEAD>',
+     dynamic: <true unless --no-dynamic>,
+     onlyClasses: <array or omit>,
+     scope: '<what is in/out of scope>',
+     hostNotes: '<from step 4>'
+   }})
+   ```
+8. **Present the result** — when it completes, read `report_path` and give a
+   tight summary: severity counts and the top 1–3 confirmed findings (title +
+   location + one-line impact). Point to the bundle dir; don't paste the whole
+   report. Surface anything that blocked dynamic verification.
+
+## Notes
+
+- High-signal is the contract: the workflow drops noise, posture/process items,
+  and unreachable findings on purpose. Don't reintroduce them in the summary.
+- The report's remediation is a **high-level proposed fix** (direction, not a
+  patch) — implementation is left to whoever takes the finding.
+- This is the deepest layer of defense-in-depth, complementing the in-session
+  security-guidance plugin, `/security-review`, and PR Code Review.
+- Authorized testing only: target repos you own or are explicitly cleared to
+  audit. All PoC traffic stays local; never fire exploits at external hosts.
diff --git a/plugins/security/workflows/vuln-audit.js b/plugins/security/workflows/vuln-audit.js
new file mode 100644
index 0000000..b7730d2
--- /dev/null
+++ b/plugins/security/workflows/vuln-audit.js
@@ -0,0 +1,246 @@
+export const meta = {
+  name: 'vuln-audit',
+  description: 'White-box, dynamically-verified security audit of a target repo (recon -> triage -> deep review -> adversarial verify -> dynamic PoC -> report)',
+  whenToUse: 'Invoked by the /vuln-audit skill. Runs a multi-phase, high-signal security audit and writes a senior-engineer report.',
+  phases: [
+    { title: 'Recon', detail: 'detect stack, map attack surface, pick run strategy' },
+    { title: 'Triage', detail: 'one finder per relevant vuln class' },
+    { title: 'Consolidate', detail: 'dedup + drop noise + stable IDs' },
+    { title: 'Review', detail: 'deep review: reachable source->sink with no mitigation' },
+    { title: 'Verify', detail: 'adversarial skeptic panel tries to refute' },
+    { title: 'Repro', detail: 'build + run + live PoC in an isolated worktree' },
+    { title: 'Report', detail: 'synthesize the senior-engineer report' },
+  ],
+}
+
+// ---- inputs (assembled by the skill) ----
+// The Workflow runtime may deliver `args` as a JSON string rather than a parsed
+// object; normalize so every input below is read from a real object.
+const A = (typeof args === 'string') ? JSON.parse(args) : (args && typeof args === 'object' ? args : {})
+if (!A.toolRoot || !A.target) {
+  throw new Error(`vuln-audit: missing required args (toolRoot, target). Got keys: ${Object.keys(A).join(', ') || 'none'}`)
+}
+const TOOL = String(A.toolRoot).replace(/\/+$/, '')   // this tool's repo (has prompts/, reports/)
+const TARGET = String(A.target).replace(/\/+$/, '')
+const REF = A.ref || 'HEAD'
+const DYNAMIC = A.dynamic !== false                    // dynamic verification on by default
+const ONLY = (() => {           // accept array, JSON-string-of-array, or comma-separated string
+  let v = A.onlyClasses
+  if (typeof v === 'string') { try { v = JSON.parse(v) } catch (_) { v = v.split(',') } }
+  if (Array.isArray(v)) { const a = v.map(s => String(s).trim()).filter(Boolean); return a.length ? a : null }
+  return null
+})()
+const TARGET_NAME = TARGET.split('/').pop() || 'target'
+const SCOPE = A.scope || TARGET
+const HOST = A.hostNotes || ''        // host capability/constraint notes (e.g. "docker needs sudo; python3 native available")
+const OUT = A.outDir ? String(A.outDir).replace(/\/+$/, '') : TOOL   // writable bundle output dir; defaults to toolRoot (standalone), but a plugin MUST pass a writable outDir — the plugin root is read-only/ephemeral
+
+// Vuln-class taxonomy — single source of truth (mirrors the AGENTS.md table).
+// `code` is the human-facing display-id token; owasp/cwe/asvs are the canonical
+// mapping injected into the generic finder prompt; `focus` scopes the hunt.
+const CLASS_META = {
+  'access-control': { code: 'AC', title: 'Broken Access Control & IDOR', owasp: 'A01:2025', cwe: 'CWE-639/862/863/601', asvs: 'V8', focus: 'missing/incorrect authz, IDOR by object key, tenant isolation, open redirect' },
+  ssrf: { code: 'SSRF', title: 'Server-Side Request Forgery', owasp: 'A01:2025', cwe: 'CWE-918', asvs: 'V4', focus: 'caller-influenced destination of a server-side outbound request reaching internal/metadata endpoints' },
+  injection: { code: 'INJ', title: 'Injection (SQL/NoSQL/OS/LDAP/XPath)', owasp: 'A05:2025', cwe: 'CWE-89/78/943/90/74', asvs: 'V1/V2', focus: 'untrusted input crossing into the command/query structure with no parameterization/escaping/allowlist' },
+  'xss-ssti': { code: 'XSS', title: 'XSS & Template Injection', owasp: 'A05:2025', cwe: 'CWE-79/1336/116', asvs: 'V1/V3', focus: 'untrusted data emitted into an HTML/JS context unescaped, or controlling template source (SSTI)' },
+  'auth-session': { code: 'AUTH', title: 'Authentication & Session', owasp: 'A07:2025', cwe: 'CWE-287/384/620/640/521', asvs: 'V6/V7/V9/V10', focus: 'login/session/token issuance & validation, password reset, MFA, JWT/OAuth flaws' },
+  crypto: { code: 'CRYPTO', title: 'Cryptographic Failures', owasp: 'A04:2025', cwe: 'CWE-327/328/326/330/916/295', asvs: 'V11', focus: 'weak/broken algos, fast or unsalted password hashing, weak RNG for security, missing cert validation' },
+  deserialization: { code: 'DESER', title: 'Insecure Deserialization & Integrity', owasp: 'A08:2025', cwe: 'CWE-502/494/345', asvs: 'V2/V15', focus: 'untrusted bytes to a native/object deserializer; unverified code/data integrity' },
+  'path-file': { code: 'PATH', title: 'Path Traversal & File Handling', owasp: 'A01:2025', cwe: 'CWE-22/98/73/434', asvs: 'V5', focus: 'caller-controlled path segment reaching a file op (traversal, RFI, zip-slip, dangerous upload)' },
+  secrets: { code: 'SEC', title: 'Hardcoded Secrets & Credentials', owasp: 'A02:2025', cwe: 'CWE-798/259/321/547', asvs: 'V14', focus: 'live credentials/keys/connection strings committed in code or config' },
+  misconfig: { code: 'MISC', title: 'Security Misconfiguration', owasp: 'A02:2025', cwe: 'CWE-16/614/942/1004/611', asvs: 'V13', focus: 'debug flags, permissive CORS, insecure cookie flags, exposed admin, XXE, default creds' },
+  'supply-chain': { code: 'SUPPLY', title: 'Software Supply Chain & CI/CD', owasp: 'A03:2025', cwe: 'CWE-1104/1357/829/506', asvs: 'V15', focus: 'dangerous CI workflows (pull_request_target + untrusted checkout, script injection, over-broad tokens, unpinned actions), malicious lifecycle scripts, known-vuln/typosquat deps — code-exploitable only' },
+  'logging-errors': { code: 'LOG', title: 'Logging, Error & Exception Handling', owasp: 'A09/A10:2025', cwe: 'CWE-532/209/755/703/396', asvs: 'V16', focus: 'sensitive data in logs, stack traces/state leaked in errors, fail-open exception handling, log injection' },
+  'dos-redos': { code: 'DOS', title: 'Denial of Service & ReDoS', owasp: 'A06:2025', cwe: 'CWE-1333/400/770/834', asvs: 'V2', focus: 'user input to a catastrophic-backtracking regex, unbounded alloc/loop, decompression bomb, expensive parse' },
+  'csrf-cors': { code: 'CSRF', title: 'CSRF, CORS & Clickjacking', owasp: 'A01:2025', cwe: 'CWE-352/1021/942', asvs: 'V3', focus: 'state-changing cookie-auth routes lacking CSRF defense, reflective/permissive CORS, missing framing protection' },
+}
+const ALL_CLASSES = Object.keys(CLASS_META)
+const CLASS_CODE = Object.fromEntries(Object.entries(CLASS_META).map(([k, v]) => [k, v.code]))
+// Deterministic fingerprint (djb2) over class|file|sink — the stable dedup key
+// across scans, identical on the VM and the courier (no shared allocator needed).
+function fpHash(s) { let h = 5381; for (let i = 0; i < s.length; i++) h = ((h * 33) ^ s.charCodeAt(i)) >>> 0; return h.toString(16).padStart(8, '0') }
+function fingerprint(f) { return fpHash(`${f.vuln_class}|${(f.file || '').toLowerCase()}|${(f.sink || '').toLowerCase()}`) }
+
+const SIGNAL = 'SIGNAL DISCIPLINE: audience is senior engineers; stay high-signal. Only treat as real an issue with a REACHABLE path from untrusted input to a dangerous sink, with no effective sanitizer/validator/authz on the path. No style nits, no generic defense-in-depth without a concrete sink, no unreachable/dead code, no posture/process items. Prefer a few proven findings over many speculative ones.'
+
+const LENSES = {
+  exploitability: 'Can a real attacker trigger this with realistic access, and is the impact as claimed? If it needs implausible preconditions, refute.',
+  reachability: 'Is the sink actually reachable from untrusted input at runtime given routing, auth guards, and feature flags? If the path is gated or dead, refute.',
+  correctness: 'Is the technical claim accurate — is this API/pattern genuinely dangerous here, or has the code been misread (safe wrapper, parameterized, framework-escaped)? If misread, refute.',
+}
+
+// ---- schemas ----
+const FINDING_PROPS = {
+  id: { type: 'string' },
+  title: { type: 'string' },
+  vuln_class: { type: 'string' },
+  owasp: { type: 'string' },
+  cwe: { type: 'string' },
+  asvs: { type: 'string' },
+  severity: { enum: ['critical', 'high', 'medium', 'low', 'info'] },
+  status: { enum: ['confirmed', 'likely', 'triage'] },
+  confidence: { enum: ['low', 'medium', 'high'] },
+  file: { type: 'string' },
+  line: { type: 'integer' },
+  code_excerpt: { type: 'string' },
+  source: { type: 'string' },
+  sink: { type: 'string' },
+  data_flow: { type: 'string' },
+  sanitizers_checked: { type: 'string' },
+  rationale: { type: 'string' },
+  exploit_sketch: { type: 'string' },
+  dynamic_poc_plan: { type: 'string' },
+  proposed_fix: { type: 'string' },
+  locations: { type: 'array', items: { type: 'string' } },
+}
+const FINDING = { type: 'object', properties: FINDING_PROPS, required: ['title', 'vuln_class', 'severity', 'file', 'rationale'], additionalProperties: true }
+const FINDINGS = { type: 'object', properties: { findings: { type: 'array', items: FINDING } }, required: ['findings'], additionalProperties: true }
+// Matches the contract emitted by prompts/recon.md: `stack` is a label,
+// `run_strategy` is an enum string, `relevant_classes` is [{class, priority_surfaces}].
+const RECON = {
+  type: 'object',
+  properties: {
+    stack: { type: 'string' },
+    frameworks: { type: 'array', items: { type: 'string' } },
+    run_strategy: { enum: ['docker-compose', 'docker', 'native', 'unit-test', 'static-poc'] },
+    entrypoints: { type: 'array', items: { type: 'object', additionalProperties: true } },
+    attack_surface: { type: 'array', items: { type: 'object', additionalProperties: true } },
+    relevant_classes: { type: 'array', items: { type: 'object', properties: { class: { type: 'string' }, priority_surfaces: { type: 'array', items: { type: 'string' } } }, required: ['class'], additionalProperties: true } },
+    skipped_classes: { type: 'array', items: { type: 'object', additionalProperties: true } },
+    notes: { type: 'string' },
+  },
+  required: ['stack', 'run_strategy', 'relevant_classes'],
+  additionalProperties: true,
+}
+const DEEP = { type: 'object', properties: { keep: { type: 'boolean' }, reject_reason: { type: 'string' }, finding: FINDING }, required: ['keep', 'finding'], additionalProperties: true }
+const VERDICT = { type: 'object', properties: { lens: { type: 'string' }, refuted: { type: 'boolean' }, confidence: { enum: ['low', 'medium', 'high'] }, reasoning: { type: 'string' } }, required: ['refuted', 'reasoning'], additionalProperties: true }
+const REPRO = { type: 'object', properties: { reproduced: { type: 'boolean' }, method: { enum: ['live-exploit', 'unit-test', 'build-only', 'static-poc'] }, environment: { type: 'string' }, setup_commands: { type: 'array', items: { type: 'string' } }, poc: { type: 'string' }, observed: { type: 'string' }, impact: { type: 'string' }, notes: { type: 'string' } }, required: ['reproduced', 'method'], additionalProperties: true }
+const SYNTH = { type: 'object', properties: { report: { type: 'string' }, path: { type: 'string' }, stats: { type: 'object' } }, required: ['report'], additionalProperties: true }
+
+const AGENT = { agentType: 'general-purpose' }
+
+// ---- phase 1: recon ----
+phase('Recon')
+const recon = await agent(
+  `Follow the recon instructions in ${TOOL}/prompts/recon.md. Read that file first, then perform PHASE-1 recon on the target repository at ${TARGET} (ref ${REF}). Use Read/Grep/Bash/ast-grep to inspect it. Output the structured recon summary.\nHOST CONSTRAINTS (factor into run_strategy — do not pick a strategy the host can't execute): ${HOST || 'none noted'}.\n${SIGNAL}`,
+  { label: 'recon', phase: 'Recon', schema: RECON, ...AGENT },
+)
+
+// Explicit --classes is authoritative; otherwise use recon's relevant set (objects -> keys).
+let classes
+if (ONLY) {
+  classes = ONLY.filter(c => ALL_CLASSES.includes(c))
+} else {
+  classes = (recon.relevant_classes || []).map(c => (typeof c === 'string' ? c : c && c.class)).filter(c => ALL_CLASSES.includes(c))
+  if (!classes.length) classes = ALL_CLASSES
+}
+const runnable = DYNAMIC && ['docker-compose', 'docker', 'native'].includes(recon.run_strategy)
+log(`recon: ${recon.stack} | strategy: ${recon.run_strategy} | classes: ${classes.join(', ')} | dynamic: ${runnable ? 'yes' : 'no'}`)
+
+// ---- phase 2: triage finders ----
+phase('Triage')
+const finderResults = (await parallel(classes.map(k => () => agent(
+  `Hunt the "${k}" vulnerability class (${CLASS_META[k].title}) in the target. FIRST read the finder method at ${TOOL}/prompts/finder.md and follow it. Class context — OWASP ${CLASS_META[k].owasp}, CWE ${CLASS_META[k].cwe}, ASVS ${CLASS_META[k].asvs}; focus: ${CLASS_META[k].focus}. Target: ${TARGET} (ref ${REF}). Prioritize these surfaces surfaced by recon: ${JSON.stringify((recon.attack_surface || []).slice(0, 40))}. Inspect code with Read/Grep/Bash/ast-grep. ${SIGNAL} Return {findings:[...]}; each candidate must fill source, sink, data_flow, sanitizers_checked, and set owasp/cwe/asvs from the class context. Return {findings:[]} if nothing real.`,
+  { label: `find:${k}`, phase: 'Triage', schema: FINDINGS, ...AGENT },
+)))).filter(Boolean)
+const raw = finderResults.flatMap(r => (r && r.findings) || [])
+log(`triage: ${raw.length} raw candidates from ${classes.length} finders`)
+
+// ---- phase 3: consolidate (barrier: needs all candidates at once) ----
+phase('Consolidate')
+let consolidated = []
+if (raw.length) {
+  const c = await agent(
+    `You are the triage lead for a security audit of ${TARGET}. Raw candidate findings from per-class finders:\n${JSON.stringify(raw)}\n\nDeduplicate: collapse the same root cause across multiple call sites into ONE finding with a locations[] list. Assign stable ids by class (AC-1, SSRF-1, INJ-1, ...). Drop noise per the signal policy. Order by severity. ${SIGNAL} Return {findings:[...]}.`,
+    { label: 'consolidate', phase: 'Consolidate', schema: FINDINGS, ...AGENT },
+  )
+  consolidated = (c && c.findings) || []
+}
+log(`consolidated: ${consolidated.length} candidate findings`)
+
+// ---- phases 4-6: per-finding pipeline (deep review -> verify -> repro) ----
+const processed = consolidated.length ? await pipeline(
+  consolidated,
+  // 4. deep review
+  (f) => agent(
+    `Deep-review this candidate against the target ${TARGET} (ref ${REF}). Finding:\n${JSON.stringify(f)}\n\nRead the surrounding code: the sink, its callers, any sanitizers/validators/authz on the path, and related files — as a careful reviewer would. Decide if there is a REACHABLE path from untrusted input to the sink with no effective mitigation. If it is a false positive, unreachable, mitigated, or out of scope, set keep=false with a short reject_reason. Otherwise keep=true and return the finding enriched with accurate severity, confidence, data_flow, sanitizers_checked, and a high-level proposed_fix (the DIRECTION of the change and why — not a diff or line-level patch; implementation is left to whoever takes the issue). ${SIGNAL}`,
+    { label: `review:${f.id || f.title}`, phase: 'Review', schema: DEEP, ...AGENT },
+  ),
+  // 5. adversarial verify (skeptic panel)
+  async (rev) => {
+    if (!rev || !rev.keep) return rev
+    const votes = (await parallel(Object.keys(LENSES).map(lens => () => agent(
+      `You are an INDEPENDENT security skeptic. Try to REFUTE this finding for target ${TARGET}, using the "${lens}" lens. Read the actual code to check. Finding:\n${JSON.stringify(rev.finding)}\n\nLens: ${LENSES[lens]}\nDefault to refuted=true if you cannot establish a concrete, reachable exploit. Return your verdict.`,
+      { label: `verify:${rev.finding.id || 'f'}:${lens}`, phase: 'Verify', schema: VERDICT, ...AGENT },
+    )))).filter(Boolean)
+    const refutes = votes.filter(v => v.refuted).length
+    return { ...rev, keep: refutes < 2, refuted: refutes >= 2, verdicts: votes }
+  },
+  // 6. dynamic repro (only survivors, only if runnable)
+  async (rev) => {
+    if (!rev) return rev
+    if (!rev.keep || !runnable) return { ...rev, repro: null }
+    const repro = await agent(
+      `Reproduce this finding dynamically against a RUNNING instance of the target, to prove it. Finding:\n${JSON.stringify(rev.finding)}\nRun strategy: ${recon.run_strategy}. Stack: ${recon.stack}; frameworks: ${JSON.stringify(recon.frameworks || [])}. Boot notes from recon: ${JSON.stringify(recon.notes || '')}.\nFollow the repro playbook at ${TOOL}/prompts/playbook.md. Create a git worktree of ${TARGET} at ${REF} so the original tree is untouched; build & run it (docker-first). Use a UNIQUE container name and an ephemeral host port keyed to "${rev.finding.id || 'f'}" to avoid collisions with parallel repros. Fire the PoC and capture the observed result as evidence. Keep ALL traffic local — no external targets, no real credentials, no exfiltration. Tear down containers/processes and the worktree when done. HOST CONSTRAINTS (honor when choosing how to run — e.g. if docker is unavailable, run the app natively instead): ${HOST || 'none noted'}. If it genuinely cannot run live, fall back to a unit-test or static PoC and set method accordingly. Return the repro result.`,
+      { label: `repro:${rev.finding.id || 'f'}`, phase: 'Repro', schema: REPRO, ...AGENT },
+    )
+    return { ...rev, repro }
+  },
+) : []
+
+const results = processed.filter(Boolean)
+
+// normalize status: confirmed (live repro) > likely (kept, no repro) > triage (rejected)
+const finalFindings = results.map(r => {
+  const f = { ...r.finding }
+  if (!r.keep) f.status = 'triage'
+  else if (r.repro && r.repro.reproduced) f.status = 'confirmed'
+  else f.status = 'likely'
+  const fp = fingerprint(f)
+  const display_id = `${TARGET_NAME}-${CLASS_CODE[f.vuln_class] || 'GEN'}-${fp.slice(0, 4)}`  // provisional; courier swaps the suffix for the GitHub issue number
+  return { ...f, fp, display_id, kept: !!r.keep, reject_reason: r.reject_reason || null, verdicts: r.verdicts || null, repro: r.repro || null }
+})
+
+// tally
+const sevOrder = ['critical', 'high', 'medium', 'low', 'info']
+const counts = { bySeverity: {}, byStatus: { confirmed: 0, likely: 0, triage: 0 }, total: finalFindings.length }
+for (const s of sevOrder) counts.bySeverity[s] = 0
+for (const f of finalFindings) {
+  if (counts.bySeverity[f.severity] !== undefined) counts.bySeverity[f.severity]++
+  if (counts.byStatus[f.status] !== undefined) counts.byStatus[f.status]++
+}
+log(`results: ${counts.byStatus.confirmed} confirmed, ${counts.byStatus.likely} likely, ${counts.byStatus.triage} triage`)
+
+// ---- phase 7: synthesize the bundle (report.md + findings.json + manifest.json) ----
+phase('Report')
+const BUNDLE = `${OUT}/reports/${TARGET_NAME}`
+const synth = await agent(
+  `Produce the audit BUNDLE — the self-contained artifact a separate "courier" agent will fetch and file to GitHub. Create the directory ${BUNDLE}/ and write THREE files.
+
+SOURCE DATA — the finalized findings (each carries fp, display_id, severity, status, source/sink/data_flow, PoC via repro.observed, and proposed_fix):
+${JSON.stringify(finalFindings)}
+
+Severity tally: ${JSON.stringify(counts)}
+Recon: ${JSON.stringify({ stack: recon.stack, frameworks: recon.frameworks, run_strategy: recon.run_strategy, relevant_classes: classes, skipped: recon.skipped_classes })}
+
+1. ${BUNDLE}/report.md — the human report. FIRST read ${TOOL}/prompts/report-template.md and follow it EXACTLY. Use each finding's "display_id" as its [ID] in the headings. Target: ${TARGET} (ref ${REF}). Scope: ${SCOPE}. Fill {{commit}} with the resolved commit SHA (the same value you compute for manifest.json via git rev-parse) rendered BARE — no backticks — so GitHub auto-links it. Body = confirmed/likely Critical/High/Medium only; appendix = Low/Info + triage (with why) + coverage & method. Terse, senior-oriented; let CWE/OWASP-2025/ASVS refs carry the explanation; show PoC evidence for confirmed findings.
+
+2. ${BUNDLE}/findings.json — write the SOURCE DATA array above VERBATIM as JSON. Preserve every field and all PoC/observed/fix text exactly; do NOT summarize, reorder, or drop fields. This is the machine interface the courier reconciles against (keyed by "fp").
+
+3. ${BUNDLE}/manifest.json — a JSON object describing the scan. Get real values via Bash: \`date -u +%Y-%m-%dT%H:%M:%SZ\` for date; \`git -C ${TARGET} rev-parse HEAD\` for commit; \`git -C ${TARGET} remote get-url origin\` for the repo (normalize an SSH/HTTPS URL to "owner/repo"). Shape: { "tool": "vuln-audit", "schema": 1, "repo": "<owner/repo or null>", "target_path": "${TARGET}", "ref": "${REF}", "commit": "<full sha>", "slug": "${TARGET_NAME}", "date": "<utc iso8601>", "dynamic": ${runnable}, "classes_assessed": ${JSON.stringify(classes)}, "counts": ${JSON.stringify(counts)} }.
+
+Return {report: "<the full report.md content>", path: "${BUNDLE}/report.md", stats: ${JSON.stringify(counts)}}.`,
+  { label: 'synthesize', phase: 'Report', schema: SYNTH, ...AGENT },
+)
+
+return {
+  bundle_dir: BUNDLE,
+  report_path: (synth && synth.path) || `${BUNDLE}/report.md`,
+  findings_path: `${BUNDLE}/findings.json`,
+  manifest_path: `${BUNDLE}/manifest.json`,
+  report: synth && synth.report,
+  counts,
+  stack: recon.stack,
+  classes_assessed: classes,
+  runnable: !!runnable,
+}