diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json
index 6f29b6b..af34398 100644
--- a/.claude-plugin/marketplace.json
+++ b/.claude-plugin/marketplace.json
@@ -1670,6 +1670,23 @@
         "security",
         "compliance"
       ]
+    },
+    {
+      "name": "tandem",
+      "source": "./plugins/tandem",
+      "description": "Tandem — the Claude Code project-management plugin. Drives a full North Star → Done delivery lifecycle from slash commands, with enforced DoR/DoD gates, mandatory Story↔Testplan pairing, automatic ADRs, auto-filed bugs on test failure, and a self-generating live HTML Command Center.",
+      "version": "2.6.0",
+      "author": {
+        "name": "DATA-AI-XYZ",
+        "url": "https://github.com/DATA-AI-XYZ"
+      },
+      "category": "Project & Product Management",
+      "homepage": "https://github.com/ccplugins/awesome-claude-code-plugins/tree/main/plugins/tandem",
+      "keywords": [
+        "project-management",
+        "okr",
+        "agile"
+      ]
     }
   ]
-}
\ No newline at end of file
+}
diff --git a/README.md b/README.md
index e4de615..7ef4c36 100644
--- a/README.md
+++ b/README.md
@@ -177,6 +177,7 @@ Install or disable them dynamically with the `/plugin` command — enabling you
 - [project-shipper](./plugins/project-shipper)
 - [sprint-prioritizer](./plugins/sprint-prioritizer)
 - [studio-producer](./plugins/studio-producer)
+- [tandem](./plugins/tandem)
 - [tool-evaluator](./plugins/tool-evaluator)
 - [workflow-optimizer](./plugins/workflow-optimizer)
 
diff --git a/plugins/tandem/.claude-plugin/plugin.json b/plugins/tandem/.claude-plugin/plugin.json
new file mode 100644
index 0000000..576a377
--- /dev/null
+++ b/plugins/tandem/.claude-plugin/plugin.json
@@ -0,0 +1,23 @@
+{
+  "name": "tandem",
+  "description": "Tandem — the Claude Code project-management plugin. Drives a full North Star → OKRs → PRD → Epic → Feature → Story → Testplan → ship lifecycle from slash commands, with a closed-set status enum, mandatory Story↔Testplan pairing, Definition-of-Ready/Done gates, automatic ADRs, auto-filed bugs on test failure, and a self-generating interactive HTML Command Center. Plain Markdown, no external tools, stack-agnostic.",
+  "version": "2.6.0",
+  "author": {
+    "name": "DATA-AI-XYZ",
+    "url": "https://github.com/DATA-AI-XYZ"
+  },
+  "homepage": "https://github.com/DATA-AI-XYZ/Tandem",
+  "repository": "https://github.com/DATA-AI-XYZ/Tandem",
+  "license": "MIT",
+  "category": "Project & Product Management",
+  "keywords": [
+    "claude-code",
+    "claude-code-plugin",
+    "claude",
+    "project-management",
+    "agile",
+    "okr",
+    "developer-tools",
+    "slash-commands"
+  ]
+}
diff --git a/plugins/tandem/LICENSE b/plugins/tandem/LICENSE
new file mode 100644
index 0000000..ae11b86
--- /dev/null
+++ b/plugins/tandem/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2026 DATA-AI-XYZ
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/plugins/tandem/README.md b/plugins/tandem/README.md
new file mode 100644
index 0000000..60a5a92
--- /dev/null
+++ b/plugins/tandem/README.md
@@ -0,0 +1,149 @@
+<div align="center">
+
+# Tandem
+
+**Tandem — the Claude Code project-management plugin.** Your co-pilot for shipping ideas without the chaos.
+
+[![version](https://img.shields.io/badge/version-2.6.0-1A1714)](https://github.com/DATA-AI-XYZ/Tandem/releases)
+[![license](https://img.shields.io/badge/license-MIT-2D6CDF)](LICENSE)
+[![Claude Code plugin](https://img.shields.io/badge/Claude%20Code-plugin-D63031)](https://code.claude.com/docs/en/plugins)
+
+[**▶ Live demo — the Tandem Command Center**](https://data-ai-xyz.github.io/Tandem/)
+
+</div>
+
+---
+
+Tandem is a Claude Code plugin that takes you from idea to production — without the chaos. You drive the whole plan with slash commands; Tandem makes sure nothing slips: no stories go in-progress without a testplan, no work ships without passing its gates, no decision disappears into the chat log. The result is a team-quality delivery rhythm, at solo-founder pace.
+
+---
+
+## How it works
+
+Tandem installs a `_00-Project-Management/` scaffold into your project and registers a set of `/Tandem:*` skills that cover the full North Star → Done lifecycle. Two hooks keep everything honest: a linter that runs on every PM file edit, and a generator that rebuilds an interactive HTML **Command Center** whenever your plan changes. Both hooks run a single stdlib-only Node entrypoint (`node ${CLAUDE_PLUGIN_ROOT}/_00-Project-Management/93-Scripts/hook.js`) directly — no `npm` step is involved.
+
+It's **stack-agnostic** — the bootstrap asks what you're building (web, mobile, CLI, library, backend, data-pipeline, Power Platform, or automation) and tailors the guidance to match.
+
+### Why it's different from "AI project management"
+
+Most "AI project management" is a chat log. Tandem is a contract:
+
+- **Closed-set status enum** — exactly nine statuses, never invented ad-hoc, so every board reads the same.
+- **Story ↔ Testplan pairing (enforced)** — you cannot create a Story without a paired Testplan where every acceptance criterion maps to a runnable test case. No "trust me, it works."
+- **DoR / DoD gates** — work can't enter *in-progress* without meeting Definition of Ready, and can't reach *done* without Definition of Done. The gates are checked, not assumed.
+- **ADR-on-the-spot** — every non-obvious decision becomes an Architecture Decision Record in the same edit, so the *why* is never lost.
+- **Auto bug-raising** — the moment a test case fails, a structured BUG file is filed before the failure is even reported back to you.
+- **A living Command Center** — a single self-contained HTML view of your entire plan, regenerated automatically. (That's the [live demo](https://data-ai-xyz.github.io/Tandem/) above.)
+
+## The lifecycle
+
+```mermaid
+flowchart LR
+  NS[North Star] --> OKR[OKRs]
+  OKR --> PRD[PRD]
+  PRD --> E[Epic]
+  E --> F[Feature]
+  F --> S[Story]
+  S --> TP[Testplan]
+  TP --> X[execute-story]
+  X --> R[run-testplan]
+  R --> C[close-out-story]
+  C --> M[(Command Center)]
+  M -. weekly / monthly review .-> OKR
+```
+
+Every arrow is a slash command. Every box is a markdown artefact in your repo.
+
+## Install
+
+```bash
+# 1. Add the Tandem marketplace
+/plugin marketplace add DATA-AI-XYZ/Tandem
+
+# 2. Install the plugin
+/plugin install Tandem@DATA-AI-XYZ
+
+# 3. Bootstrap it into your project (drops _00-Project-Management/, wires hooks, seeds CLAUDE.md)
+/Tandem:session-start
+```
+
+On install Tandem will:
+
+1. Drop the `_00-Project-Management/` scaffold into your project root (if absent).
+2. Register the `/Tandem:*` skills covering the full North Star → Done lifecycle.
+3. Enable two hooks — lint-on-edit and Command-Center-regen-on-stop.
+4. Insert a slim PM rules block into your root `CLAUDE.md` (idempotent, under a managed marker).
+
+> No plugin access? Tandem also ships a paste-prompt installer — see [`BOOTSTRAP-PROMPT.md`](BOOTSTRAP-PROMPT.md).
+
+## Slash commands
+
+| Command | Hat | When to use |
+|---|---|---|
+| `/Tandem:session-start` | any | Orient at the start of a session: read active work, recent ADRs, the board; announce the next step |
+| `/Tandem:draft-okrs` | Founder | Draft quarterly OKRs from a North Star |
+| `/Tandem:draft-prd` | Founder→PM | Draft a PRD from an OKR or raw notes |
+| `/Tandem:draft-epic` | PM | Draft an Epic from an OKR key result or PRD section |
+| `/Tandem:split-into-features` | PM | Decompose an Epic into Features |
+| `/Tandem:split-into-stories` | PM | Decompose a Feature into Stories + paired Testplans |
+| `/Tandem:refine-backlog` | PM | DoR gate — promote to *ready* or list the gaps; never silently promotes |
+| `/Tandem:execution-strategist` | PM | Plan how to execute an Epic — group stories into batches with lanes & sub-agents |
+| `/Tandem:execute-story` | Dev | Pull a *ready* Story into active work |
+| `/Tandem:execute-batch` | Dev | Run a whole strategy "batch" of stories end-to-end |
+| `/Tandem:run-testplan` | QA | Run every test case; auto-file BUGs on failure |
+| `/Tandem:close-out-story` | QA→PM | DoD gate (incl. AI-code review) + board update |
+| `/Tandem:weekly-monitor` | PM | Friday weekly summary; flag stalls and blocks |
+| `/Tandem:monthly-retro` | Founder/PM | Monthly retrospective |
+| `/Tandem:fill-claude-md` | any | Author/refresh `CLAUDE.md` files across the codebase |
+| `/Tandem:reflect` | any | End-of-session reflection: propose improvements (you approve before applying) |
+| `/Tandem:core` | — | Force-load the core PM rules (usually auto-loaded) |
+
+Skills are model-invoked — Claude auto-loads them when your task matches — but explicit invocation always works.
+
+## The Command Center
+
+The headline feature. A single self-contained HTML file, regenerated from your markdown plan, with tabs for the plan tree, the monitor board, the execution strategy, and a glossary. It's built to be glanceable and stays current automatically (the Stop hook regenerates it whenever a PM file changes).
+
+**[▶ Open the live demo](https://data-ai-xyz.github.io/Tandem/)** — generated from a fabricated sample project (not real data), so it's safe to share and explore.
+
+<!-- Screenshots live under docs/ once generated:
+![Tandem Command Center — light](docs/screenshot-light.png)
+![Tandem Command Center — dark](docs/screenshot-dark.png)
+-->
+
+## What's inside
+
+```
+Tandem/
+├── .claude-plugin/
+│   ├── plugin.json            Manifest (name: Tandem)
+│   └── marketplace.json       DATA-AI-XYZ marketplace listing
+├── skills/                    The /Tandem:* skills (full lifecycle)
+├── hooks/                     PostToolUse (lint) + Stop (Command-Center regen)
+├── docs/                      Live-demo Command Center (GitHub Pages)
+├── BOOTSTRAP-PROMPT.md        Paste-prompt installer (no-plugin path)
+├── CONTRIBUTING.md · SECURITY.md · CHANGELOG.md · LICENSE
+└── README.md
+```
+
+## Project types supported
+
+`web-app` · `mobile` · `cli` · `library` · `backend-service` · `data-pipeline` · `power-platform` · `automation` — the bootstrap injects the matching gotchas and per-type guidance.
+
+## Contributing & security
+
+- [`CONTRIBUTING.md`](CONTRIBUTING.md) — how to propose changes.
+- [`SECURITY.md`](SECURITY.md) — responsible disclosure.
+
+## License
+
+[MIT](LICENSE) — provided **"as is"**, without warranty of any kind (see [LICENSE](LICENSE) and [NOTICE.md](NOTICE.md)).
+
+## Disclaimer
+
+"Claude" and "Claude Code" are trademarks of Anthropic, PBC. Tandem is an independent project and is **not affiliated with, endorsed by, or sponsored by Anthropic**. Tandem runs locally; its scripts and hooks make **no network calls** and collect **no telemetry**. It creates and edits files under your project's `_00-Project-Management/` tree — review what it does and use it at your own risk. See [NOTICE.md](NOTICE.md) for full details.
+
+## Contact
+
+- Web: <https://www.dataxyzconnect.com>
+- Email: info@dataxyzconnect.com · Maintained by DATA-AI-XYZ
diff --git a/plugins/tandem/hooks/hooks.json b/plugins/tandem/hooks/hooks.json
new file mode 100644
index 0000000..e944a65
--- /dev/null
+++ b/plugins/tandem/hooks/hooks.json
@@ -0,0 +1,35 @@
+{
+  "hooks": {
+    "PostToolUse": [
+      {
+        "matcher": "Write|Edit",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "node \"${CLAUDE_PLUGIN_ROOT}/_00-Project-Management/93-Scripts/hook.js\" post-tool-use"
+          }
+        ]
+      }
+    ],
+    "UserPromptSubmit": [
+      {
+        "hooks": [
+          {
+            "type": "command",
+            "command": "node \"${CLAUDE_PLUGIN_ROOT}/_00-Project-Management/93-Scripts/hook.js\" user-prompt-submit"
+          }
+        ]
+      }
+    ],
+    "Stop": [
+      {
+        "hooks": [
+          {
+            "type": "command",
+            "command": "node \"${CLAUDE_PLUGIN_ROOT}/_00-Project-Management/93-Scripts/hook.js\" stop"
+          }
+        ]
+      }
+    ]
+  }
+}
diff --git a/plugins/tandem/skills/close-out-story/SKILL.md b/plugins/tandem/skills/close-out-story/SKILL.md
new file mode 100644
index 0000000..29eef99
--- /dev/null
+++ b/plugins/tandem/skills/close-out-story/SKILL.md
@@ -0,0 +1,127 @@
+---
+name: close-out-story
+description: Close out a STORY through the Definition of Done gate. Use when the testplan is fully PASS, when the user asks to close a story, to finish a story, to mark a story done, or to run the DoD check. Runs the DoD checklist, flips status to done, updates MONITOR, regenerates the dashboard.
+---
+
+# Tandem: close-out-story (QA → PM hat)
+
+Operate as **QA hat** transitioning to **PM hat** for the MONITOR update. The testplan is `done` (all TCs PASS) and the story needs to gate through DoD.
+
+## Inputs needed
+
+- Story file path — try canonical (`_00-Project-Management/32-Stories/EPIC-NN/FEAT-NN.M/STORY-NN.M.PP-*.md`) then flattened (`_00-Project-Management/03-Stories/EPIC-NN/FEAT-NN.M/STORY-NN.M.PP-*.md`).
+- The paired testplan should already be `done`. If not, redirect to `/Tandem:run-testplan` first.
+
+## Load into context
+
+The canonical layout is the scaffold under `_00-Project-Management/` (12-Active, 32-Stories, 40-Decisions, 42-Monitor, 90-Standards). Older / flattened repos may use alternate names — accept any of the below as a match. If NONE of the candidates exist for a given role, note it in the DoD output (don't fabricate scaffolding) and degrade gracefully (e.g. skip the WIP-removal step if no ACTIVE.md exists; the `status: done` flip is the source-of-truth anyway).
+
+- **Story file** — at the resolved path from "Inputs needed" above.
+- **Paired testplan** — under `_00-Project-Management/33-Testplans/...` (canonical) OR `_00-Project-Management/05-Test/...` (flattened).
+- **SOP / DoD reference** — `_00-Project-Management/90-Standards/SOP.md` if present. If absent, fall back to project-root `CLAUDE.md` for DoD-equivalent rules.
+- **Project-wide quality gate definitions** — `_00-Project-Management/90-Standards/PROJECT-CONTEXT.md` if present. If absent, infer from `package.json` scripts (see "Lint quality gate" in the DoD section).
+- **Monitor** — try in order:
+  - `_00-Project-Management/42-Monitor/MONITOR.md` (canonical)
+  - `_00-Project-Management/00-Monitor/MONITOR.md`
+  - `_00-Project-Management/00-Monitor/STORY-MONITOR.md` (older naming, e.g. Curated Lagos)
+- **Active WIP index** — `_00-Project-Management/12-Active/ACTIVE.md` (canonical) OR `_00-Project-Management/00-Active/ACTIVE.md`. If neither exists, skip the WIP-removal step.
+- **ADR folder** — `_00-Project-Management/40-Decisions/` (canonical) OR `_00-Project-Management/06-ADR/` (flattened). Required for DoD item 7's "ADRs present + linked" check.
+- **Project root `CLAUDE.md`** — always loaded for project-specific overrides.
+- **Prior HTML context (`html_context:`)** — if the story frontmatter carries a non-empty `html_context:` array, `Read` every repo-relative path it lists (explorations, annotated diffs, options-comparisons) into context **before** running the DoD AI-code review (item 6). These are the prior HTML artefacts the human reviewer read; the review agent reviews against the same architectural reasoning. Skip entries that don't resolve (validator R16 already flags missing/traversal paths at `pm:lint` — don't double-report, just note the skip). Treat the SOP §11 50 KB guideline as advisory: if a listed file is very large, summarise rather than reading it whole.
+
+Use `Read` / `Glob` to detect existence rather than assuming; treat missing files as "not present" rather than throwing.
+
+## Task — run the DoD checklist verbatim
+
+For each item, mark PASS / FAIL with evidence.
+
+### DoD checklist
+
+1. [ ] All AC checkboxes in the story file are ticked.
+2. [ ] All TCs in the testplan have `Result: PASS — YYYY-MM-DD` (or `FAIL` linked to a `wontfix`/accepted BUG with explicit user approval).
+3. [ ] Project quality commands pass (from `PROJECT-CONTEXT.md` if present, else inferred from `package.json` scripts) — **scoped to the changed area, not the full repo**:
+   - [ ] **Lint (scoped).** Resolution order: (a) `npm run lint` if defined in `package.json`; (b) `npx eslint <changed files>` IF a standalone ESLint config (`.eslintrc*`, `eslint.config.*`, or `eslintConfig` in `package.json`) exists at repo root; (c) if neither — most CRA/Vite/Next projects bundle ESLint into the build pipeline and have no standalone config — use `npm run build` as the lint substitute and note it explicitly in the DoD result row. NEVER report "lint skipped" silently; either it ran or it was substituted by build.
+   - [ ] Type check (scoped if possible).
+   - [ ] Unit tests (scoped to the modified module). For CRA projects, note that `npm test` only scans `src/`; tests outside `src/` (e.g. `firebase-functions/`, `scripts/`) need `npx jest <path> --testEnvironment=node`.
+   - [ ] Build.
+4. [ ] No new errors in the error tracker after smoke run (if applicable).
+5. [ ] If UI: visual contract tests green (per `PROJECT-CONTEXT.md`).
+6. [ ] **AI-code review pass.** First, if the story's `html_context:` array is non-empty, `Read` each listed prior HTML artefact into context (see "Load into context" above) so the review runs against the same architectural reasoning the human reviewer had. Then **delegate the code review to `/Tandem:peer-review`** — the canonical, reusable six-dimension, severity-ranked (blocker / major / minor) review contract (FEAT-05.2). Run it against the story's diff plus the prior HTML context; `peer-review` reviews for correctness, security, performance, maintainability, test coverage, and error paths, and **emits the AI-CODE-REVIEW HTML artefact** (see "AI-code-review artefact" below) — one canonical review path whether the review is invoked ad-hoc or here at the DoD gate, so the standalone skill and this gate never drift. Close-out then records the outcome in frontmatter and enforces the blocker gate below; the findings live in the artefact, not just the story body.
+
+   **When to run (closed list — both rules in force):**
+   - **Always** if the story ships test code (any new `*.test.*` / `*.spec.*` / `tests/rules/*` file, OR new `describe(...)` / `test.describe(...)` block in an existing test file), regardless of net-line count. **Reason:** test code that ships without an independent review tends to contain mock-vs-real coverage gaps, duplicate structural blocks (precedent: STORY-00.4.01 close-out 2026-05-23 H1), and assertion-shape brittleness that the dev pass misses. Three close-outs over 2025-05-22..2026-05-23 each surfaced 1+ HIGH finding under this rule.
+   - **Always** if the diff exceeds 50 net lines across >2 files (the original general-purpose threshold).
+   - **Skip** only for typo fixes, copy edits, one-line config tweaks, or pure frontmatter / status edits where no executable code or schema changed.
+
+   **AI-code-review artefact (when the review runs — SOP §7.1):**
+   1. Copy `_00-Project-Management/91-Templates/AI-CODE-REVIEW.template.html` to `_00-Project-Management/41-Reports/AI-CODE-REVIEW-<story-id>-<YYYY-MM-DD>.html` (today's date).
+   2. Interpolate the real unified diff into the diff slot (`data-slot="diff"`) and one `<article class="anno-card severity-<level>">` per finding into the annotation slot (`data-slot="annotations"`). Each annotation carries `data-severity` (`blocker` / `critical` / `warning` / `nit`), `data-file`, `data-line`, `data-category` (security / correctness / perf / style / dead-code), plus reasoning and a suggested fix. Render reasoning/fix as text only — never `innerHTML` (XSS-safe).
+   3. Write the artefact's repo-relative path into the story's `ai_review_artefact:` frontmatter, and set `ai_review: completed-YYYY-MM-DD` (today's date). **Set this token MECHANICALLY — never copy the review's verdict word.** The `ai_review:` field is a lifecycle marker, not a verdict: it is ALWAYS one of `completed-<today>` / `skipped-trivial` / `n-a`, regardless of whether the review's outcome was "APPROVE", "LGTM", "REJECT", or anything else. Copying a verdict word (e.g. `ai_review: approve`) is the exact defect BUG-20260608-01 recorded; validator **R14** now rejects any non-terminal `ai_review` on a `done` story, so a verdict word there will fail `pm:lint`.
+   4. **Blocker gate (hard rule, SOP §7.1):** count the `data-severity="blocker"` annotations. **If blocker count > 0, this DoD item FAILs** — do NOT flip `status: done`. Report the blockers, fix them, re-review, and regenerate the artefact until the blocker count is zero. critical / warning / nit findings do not block the flip but must be triaged.
+   5. For a **skipped** review (`skipped-trivial` / `n-a`), do NOT produce an artefact and leave `ai_review_artefact:` empty — validator R15b exempts those. Don't emit an empty placeholder artefact.
+7. [ ] All ADRs created during execution are present in `40-Decisions/` and linked from the story's `decisions:` array.
+8. [ ] Any tech debt observed during this work has a corresponding BACKLOG-NNNN file.
+
+## If ANY item FAILs
+
+- STOP. Do not flip status to `done`.
+- Show the DoD result table.
+- List the gap clearly. For each gap, propose the smallest fix.
+- Ask the user before continuing.
+
+## If ALL items PASS
+
+1. Flip story status: `in-review` → `done`. Set `completed_at` to now (ISO 8601 + offset). **Atomic edit** — status + timestamp in the same write.
+
+2. Remove the story from the resolved ACTIVE.md (canonical `12-Active/ACTIVE.md` OR flattened variant). **Skip this step entirely if no ACTIVE.md exists** — the `status: done` flip on the story file is the canonical source of WIP truth; the ACTIVE.md is just a cache.
+
+3. Update the resolved MONITOR file in the **same response**:
+   - Increment the shipped count (and per-epic / per-feature counts) IF the monitor maintains numeric totals.
+   - Tick the bar character (░ → █) if MONITOR uses progress bars.
+   - Prepend a one-line entry to the revision history with today's ISO date and the story ID + short outcome.
+
+4. **Dashboard regeneration is project-specific.** Probe `package.json` for a `pm:dash` script (or equivalent — `dash`, `dashboard`, `monitor`). If it exists AND no Stop hook is configured for the project, run it. If neither the script nor a Stop hook exists, skip silently — not every project has a generated dashboard. Don't fabricate the command name.
+
+## Output rules
+
+- Show the DoD result table **before** flipping status — gives the user a chance to override.
+- DoD is non-negotiable. "Tests are flaky so I'll skip" is not allowed — file a BUG or BACKLOG entry instead, and don't close the story until that's addressed.
+- If a TC failed earlier but the bug was accepted as `wontfix` or pushed to BACKLOG, link the decision explicitly in the close-out note on MONITOR.
+
+## Non-negotiable rules from CLAUDE.md
+
+- Frontmatter timestamps (`completed_at` set in the SAME edit as `status: done`).
+- Status enum.
+- DoD gate is mandatory.
+- MONITOR update is in the same response as the status flip.
+- Dashboard regen is handled by the Stop hook IF one is configured for the project; only run manually if no hook is active AND a dashboard script exists.
+
+## End-of-close-out summary (always emit)
+
+- DoD result: PASS / FAIL with gaps
+- Story status now: `done` | still `in-review`
+- MONITOR updated: yes / no
+- Decisions captured: <list of ADRs>
+- Tech debt captured: <list of BACKLOG entries>
+- Uncommitted work: report the `git status --porcelain` line count; if > 0, note plainly that `done` means finished-and-verified-on-disk, NOT committed, and ask "commit now?". Do NOT auto-commit — commit only on explicit user request. (Stops `done` stories silently piling up uncommitted across back-to-back sessions.)
+- Next Ready story to pull: <suggestion>
+
+## Reset conversation Mode if this was the last story in the phase
+
+After the story is `done` and MONITOR is updated, check whether any other story in the
+SAME phase/feature set is still `in-progress`, `in-review`, or `ready`:
+
+- **If none remain** (this was the last open story in the phase) → reset the mode:
+  `node _00-Project-Management/93-Scripts/mode.js set neutral --by auto-neutral --session <session_id>`
+  Announce it: *"Last story in the phase closed — Tandem mode reset to Neutral."*
+- **If others remain** → leave the mode unchanged. Do NOT reset on every story close.
+
+Use the same status scan you already perform for the phase; reuse `npm run pm:monitor`
+output or the live frontmatter you just read. Never reset silently. Use the session ID
+from the session context as `<session_id>`.
+
+## Next command
+
+Next: `/Tandem:close-phase`
+
+When every story in the phase is `done`, close the whole phase (retrospective + gated merge).
diff --git a/plugins/tandem/skills/close-phase/SKILL.md b/plugins/tandem/skills/close-phase/SKILL.md
new file mode 100644
index 0000000..a03b919
--- /dev/null
+++ b/plugins/tandem/skills/close-phase/SKILL.md
@@ -0,0 +1,218 @@
+---
+name: close-phase
+description: Close out a whole phase after an execution-strategist chat / execute-batch run finishes — gate on every phase story being done, compile a phase retrospective, capture follow-ups, update the board, then run a gated merge to main. Use when the user asks to close a phase, wrap up a phase, finish a batch/chat, run a phase retro, or integrate a finished phase.
+---
+
+# Tandem: close-phase (PM hat)
+
+Operate as **PM hat**. `close-phase` is the **phase-level analogue of `close-out-story`**: where
+`close-out-story` is the per-story Definition-of-Done gate, this is its per-phase counterpart —
+invoked after an `execution-strategist` chat / `execute-batch` run finishes, to wrap a whole
+**phase** up safely and integrate it.
+
+> **Opener counterpart:** `start-phase` is the opener this skill closes against. `start-phase`
+> **opens** a phase — it cuts the phase branch `phase/<phase-id>` off `main` per the shared
+> **phase-branch convention** (recorded in `40-Decisions/`, ADR-0045); `close-phase` **closes**
+> that same phase — it merges that branch back to `main` (Steps 6–7). Both skills obey the one
+> branch convention, so the branch the opener creates is exactly the one the closer merges.
+
+It runs in a fixed order, each step gated on the one before it:
+
+1. **Phase-scope detection** — resolve the set of stories in an explicit target.
+2. **Done-gate** — every phase story must be `done`, or abort and list the gaps.
+3. **Retrospective** — what shipped, what went well / what didn't, the phase metrics.
+4. **Follow-up capture** — file BACKLOG items + an ADR backstop for anything surfaced.
+5. **Board update** — write the phase report, update `MONITOR.md`, regenerate the dashboard.
+6. **Gated merge to `main`** — integrate only when the merge gate passes; never force-merge.
+
+> **Dry-run-until-gated.** Steps 3–5 only read and append artefacts; the merge step is hard-gated
+> and never force-merges (see "Integration"). If the done-gate fails, the skill stops and reports
+> — it never partially wraps up an incomplete phase.
+
+## Step 1 — Phase-scope detection (EXPLICIT target — never guess)
+
+Take an **explicit** phase / chat / epic **target** from the user — never infer which phase to
+close from ambient state. Accepted targets:
+
+- a **strategist phase** or **chat id** (e.g. `CHAT-02`) from an `EXECUTION-STRATEGY-*.json`
+  sidecar — resolve to the `stories[]` listed under that chat / phase;
+- an **`EPIC-NN`** (or a single `FEAT-NN.M`) — resolve the **set of stories** belonging to that
+  epic / feature by globbing `32-Stories/EPIC-NN/...`.
+
+**Resolve the set of stories in the target phase** before doing anything else, then echo the
+resolved list (id + status) back to the user so the scope is explicit and reviewable. If the
+target is ambiguous, missing, or resolves to zero stories, **stop and ask** for a concrete
+phase / chat / epic — do not guess which stories are in scope.
+
+## Step 2 — Done-gate (every phase story must be `done`)
+
+Verify **every** resolved phase story is `status: done`. This is a hard gate:
+
+- If **all** phase stories are `done` → proceed to the retrospective (Step 3).
+- If **any** are not `done` → **abort** and **list the not-done stories** (each `id` + its current
+  `status`), so the operator knows exactly which stories still block the close. Do **not** compile
+  a retro, capture follow-ups, update the board, or merge for an incomplete phase.
+
+This mirrors `close-out-story`'s gate-then-act discipline at the phase level: the gate is
+non-negotiable, and the abort-and-list path is the load-bearing behaviour — a half-closed phase
+is worse than an un-closed one.
+
+## Step 3 — Compile the phase retrospective
+
+Once the done-gate passes, compile a **phase retrospective** — **derived from the phase's own
+artefacts** (its stories, their paired testplans, and the `34-Bugs/` + `40-Decisions/` filed
+during the phase), never invented from memory. Three parts:
+
+- **What shipped** — list the phase's `done` **stories** and the **PASS** results of their paired
+  **testplans** (the TCs that verify each story). One line per story: what it delivered.
+- **What went well / what didn't** — a short, honest reflection: **what went well** this phase,
+  and **what didn't go well / what to improve** next phase. Keep it specific to this phase's work,
+  not generic platitudes.
+- **Metrics** — the phase's hard numbers, read straight from the artefacts: **bugs** filed
+  (`34-Bugs/`), **ADRs** created (`40-Decisions/`), and the execution **lanes** used (the
+  serial / parallel lanes from the `execution-strategist` strategy this phase ran under).
+
+Because every part is **sourced from the phase artefacts** — the stories' and testplans' statuses
+and results, the bugs and ADRs filed in the phase — the retro is reproducible and auditable, not a
+subjective recollection. This is **phase-cadence**, distinct from the time-cadence retros
+(`weekly-monitor` / `monthly-retro`): it closes one phase, not a calendar window.
+
+## Step 4 — Capture follow-ups (BACKLOG + ADR backstop)
+
+Before touching the board, sweep the phase for loose ends and **capture** them so nothing
+surfaced during the work is lost:
+
+- **Follow-up capture (BACKLOG)** — for any **tech-debt**, deferred **idea**, or **follow-up**
+  the phase surfaced, **file a BACKLOG item** (`11-Backlog/BACKLOG-NNNN-<slug>.md`, from
+  `91-Templates/BACKLOG.template.md`). This mirrors `reflect` / `refine-backlog`: a follow-up that
+  isn't filed is a follow-up that's lost.
+- **ADR backstop** — verify an **ADR exists** for every non-obvious decision the phase made (the
+  **ADR-on-the-spot** rule). If a decision was made during the phase but no ADR was filed, **file
+  the missing ADR** now (`40-Decisions/ADR-NNNN-<slug>.md`) as a backstop, so the phase's
+  decisions are all on record before the phase closes.
+
+## Step 5 — Update the board
+
+Write the phase up and refresh the live board:
+
+- **Phase report** — write the retrospective (Step 3) plus the captured follow-ups (Step 4) to the
+  **phase-report home**, `41-Reports/PHASE-<phase-id>-<YYYY-MM-DD>.md` (the home is fixed by a
+  recorded ADR — see "Recorded decisions" below).
+- **MONITOR** — update `42-Monitor/MONITOR.md`: a phase summary plus a one-line **revision-history**
+  entry dated today.
+- **Dashboard** — regenerate it with `npm run pm:dash` so `42-Monitor/DASHBOARD.html` reflects the
+  closed phase.
+
+### Recorded decisions
+
+The skill's **name + phase-granularity** and the **phase-report home** are settled once in an ADR
+(`40-Decisions/`), not re-litigated each phase. The home choice is **`41-Reports/PHASE-*`** (the
+phase report is a generated execution artefact alongside `EXECUTION-STRATEGY-*`), rather than
+`14-Retros/` (reserved for the time-cadence weekly/monthly retros).
+
+## Step 6 — Integration: merge the phase to `main` (gated)
+
+The integration step runs after the wrap-up — the retro, follow-up capture, and board update
+(Steps 3–5) all happen first; only then does the phase merge to `main`.
+
+### Step 6a — Already-merged detection (retro-only path)
+
+**Before** evaluating the merge gate, detect whether the phase branch has **already been merged**
+to `main` — the real order of events sometimes runs ahead of the board (this kit hit exactly this:
+a phase branch was fast-forwarded into `main` in git while the board still said "merge pending").
+Probe **true ancestry**, not a clean working tree:
+
+```bash
+git merge-base --is-ancestor phase/<phase-id> main && echo "ALREADY-MERGED" || echo "NOT-MERGED"
+```
+
+- **Already-merged** (`git merge-base --is-ancestor phase/<phase-id> main` exits **0** — the phase
+  branch tip is reachable from `main`): route to the **retro-only path**. Steps 2–5 (done-gate,
+  retrospective, follow-up capture, board + MONITOR + dashboard) **still run in full**; only the
+  merge itself (Step 6 gate + Step 7 mechanism) is **skipped and marked already-integrated** in the
+  phase report and the MONITOR revision-history line (e.g. "merge: already-integrated — phase branch
+  is an ancestor of `main`"). Nothing is force-merged or re-merged; the retro + board update are the
+  whole job.
+- **Not-merged** (`--is-ancestor` exits **non-zero**): the **normal path** — fall through to the
+  merge gate (below) and Step 7.
+
+**Gate on true ancestry, never on a clean tree.** `--is-ancestor` is true only when the phase
+branch tip is *fully* reachable from `main`; a **partially-merged** branch (some commits in `main`,
+tip not yet) returns non-zero and therefore takes the normal merge path — a partial merge must
+**not** be mistaken for a complete one. A clean working tree alone says nothing about whether the
+phase was integrated, so it is **not** the signal used here.
+
+On the **not-merged** (normal) path, before anything reaches `main`, a hard **merge gate** must
+pass — **all four** items:
+
+- **All phase stories `done`** — re-confirm the Step-2 gate still holds for every story in the phase.
+- **`npm run pm:lint` green** — the PM artefacts validate.
+- **Build / tests green** — the project's build and tests pass per `PROJECT-CONTEXT.md`'s quality
+  commands (scoped to the area the phase changed).
+- **Clean working tree** — `git status --porcelain` is empty (no uncommitted changes).
+
+If **any** gate item is unmet, the skill **refuses to merge** and **reports which item failed** —
+it does not proceed. A blocked merge names the failing gate item so the operator knows exactly
+what to fix; it never merges a phase that hasn't cleared all four.
+
+## Step 7 — Merge mechanism: PR-default vs gated direct
+
+Once the merge gate (Step 6) passes, integrate via one of two mechanisms — never a force-merge:
+
+- **Open a PR** — the **review-friendly default**. Open a pull request from the phase branch
+  (`phase/<phase-id>` — the branch `start-phase` cut off `main` per the shared convention,
+  ADR-0045) to `main` and **surface the PR command / link** for the operator to review and merge.
+- **Gated direct merge** — for a solo / no-review workflow, a direct merge to `main` is allowed,
+  but only once the Step-6 gate has passed.
+
+In both cases the skill **surfaces the PR / merge command or link** rather than force-merging — it
+**never force-merges** and never bypasses the gate. No `gh` CLI is assumed: surface a
+copy-pasteable command or link; don't hard-call a host API. The PR-vs-gated-direct default and the
+gate composition are recorded in an ADR (`40-Decisions/`) so the integration path is settled once.
+
+### Step 7a — Log the approval (`10-Inbox/APPROVALS.md`)
+
+A merge to `main` is a **manual, gated approval** — when the operator **confirms** it (the gated
+direct merge, or merging the surfaced PR), **append a one-line approval entry** to
+`10-Inbox/APPROVALS.md` so the sign-off survives beyond the chat transcript (audit + handover).
+The same applies on the **already-integrated** retro-only path (Step 6a): record that the close was
+confirmed even though no fresh merge ran. Append (newest at the bottom):
+
+```
+- <ISO 8601 timestamp> — <what was approved> — by: <who> — gated: <artefact / command>
+```
+
+e.g. `- 2026-06-06T15:55:00+01:00 — merge phase/p1-outcome-contract to main — by: operator — gated: close-phase Step 7 (gated direct merge)`.
+
+- Use the **system clock** for the timestamp (ISO 8601 with offset), not the chat-stated date.
+- It's a **one-line append**, not a ceremony. `10-Inbox/` is not a linted artefact folder, so the
+  entry must keep the file valid plain markdown.
+- If `10-Inbox/APPROVALS.md` does **not exist** yet, **create it** (header + the convention
+  documented at the top of that file) and append the first entry. See the file for the full
+  convention.
+
+## Non-negotiable rules (from CLAUDE.md)
+
+- Operates as **PM hat**; the phase-level analogue of `close-out-story`.
+- The **done-gate** (Step 2) and the **merge-gate** (Step 6) are hard gates — abort and report on
+  any unmet item; never partially close a phase and never force-merge.
+- **ADR-on-the-spot** for any non-obvious phase decision; **auto-raise a BUG** for any defect.
+- Status / timestamp flips, the MONITOR update, and the dashboard regen follow the kit's
+  "when you change a status" rule (atomic edit + same-response board update + `npm run pm:dash`).
+
+## End-of-session summary (always emit)
+
+- Phase target + resolved stories (done / not-done).
+- Done-gate: PASS / aborted (+ the not-done gaps).
+- Retro written (+ path); follow-ups captured (BACKLOG + ADR list); board updated (MONITOR + dashboard).
+- Merge gate: PASS / blocked (+ the failing item); mechanism: PR link or gated direct merge.
+- Next step: the surfaced PR / merge command, or the gate gap to fix.
+
+## Reset conversation Mode to Neutral
+
+A closed phase means the active frame's work is done. Always reset:
+
+`node _00-Project-Management/93-Scripts/mode.js set neutral --by auto-neutral --session <session_id>`
+
+Announce it: *"Phase closed — Tandem mode reset to Neutral."* This is also how `dual`
+returns to Neutral. Use the session ID from the session context.
diff --git a/plugins/tandem/skills/core/SKILL.md b/plugins/tandem/skills/core/SKILL.md
new file mode 100644
index 0000000..af52d7d
--- /dev/null
+++ b/plugins/tandem/skills/core/SKILL.md
@@ -0,0 +1,126 @@
+---
+name: core
+description: Core PM Operating Kit rules — closed-set status enum, frontmatter timestamps, Story-Testplan pairing, DoR/DoD gates, hat protocol, ADR-on-the-spot, bug-auto-raise. Use when working anywhere under _00-Project-Management/, when creating or modifying any artefact (epic/feature/story/testplan/bug/ADR), or when uncertain about the project's PM conventions.
+---
+
+# Tandem — core PM rules
+
+You are working in a project that uses the Greenfield PM Operating Kit. These rules are mandatory whenever you touch anything under `_00-Project-Management/`.
+
+## Lifecycle command chain (canonical order — single source of truth)
+
+This is the **one** place the lifecycle command order is recorded (ADR-0047). Every per-command `Next:` pointer in the lifecycle skills must agree with this chain; if a pointer ever disagrees, **this record wins** and the pointer is the bug.
+
+`/Tandem:draft-okrs` → `/Tandem:draft-prd` → `/Tandem:draft-epic` → `/Tandem:split-into-features` → `/Tandem:split-into-stories` → `/Tandem:refine-backlog` → `/Tandem:execution-strategist` → `/Tandem:execute-batch` → `/Tandem:run-testplan` → `/Tandem:close-out-story` → `/Tandem:close-phase`
+
+`close-phase` is terminal (no `Next:` pointer). Cadence / utility skills (`weekly-monitor`, `monthly-retro`, `reflect`, `session-start`, `critique`, `peer-review`, `document`, `curate-toolkit`, `fill-claude-md`, `execute-story`) are **not** chain members; `execute-story` is the single-story alternative to `execute-batch`.
+
+## Reference order — where to look
+
+1. Project root `CLAUDE.md` — pointers + critical gotchas
+2. `_00-Project-Management/CLAUDE.md` — folder semantics
+3. `_00-Project-Management/90-Standards/SOP.md` — full lifecycle, DoR, DoD, frontmatter contract, subagent policy (§18)
+4. `_00-Project-Management/90-Standards/PROJECT-CONTEXT.md` — this project's stack quirks
+5. `_00-Project-Management/90-Standards/DAILY-WORKFLOW.md` — rhythm + worked example
+6. `_00-Project-Management/90-Standards/CLAUDE-CODE-CONFIG.md` — how this plugin maps to Anthropic's Claude Code best practices
+7. Template in `_00-Project-Management/91-Templates/`
+
+## Non-negotiable rules
+
+### Frontmatter timestamps
+
+Every artefact has three timestamp fields:
+
+```yaml
+created_at: ''      # set on file create; ISO 8601 with offset, quoted string
+started_at: ''      # set when status → in-progress
+completed_at: ''    # set when status → done | wontfix | duplicate | archived
+```
+
+- Format: `YYYY-MM-DDTHH:MM:SS±HH:MM`. Always quoted.
+- Source of "now": system clock (`Get-Date -Format "yyyy-MM-ddTHH:mm:sszzz"` or `date -u +"%Y-%m-%dT%H:%M:%S%z"`), **not** the chat-stated date.
+- Status flip + timestamp set = **same edit**, not separate.
+- Status revert (back to `not-started`) → clear `started_at` and `completed_at` to `''`.
+
+### Status enum — closed set, exactly 9 values
+
+`not-started | ready | in-progress | in-review | done | blocked | wontfix | duplicate | archived`
+
+Never invent values. Never use `open / shipped / completed / fixed / deferred / Planned`.
+
+## Folder layout (full | flattened | custom)
+
+The kit's skills and templates name PM sub-folders in the canonical **"full"** numbering: `30-Epics`, `31-Features`, `32-Stories`, `33-Testplans`, `34-Bugs`, `40-Decisions`, `42-Monitor`, `11-Backlog`, …
+
+A project may instead use a **"flattened"** layout (e.g. `01-EPIC`, `02-Features`, `03-Stories`, `05-Test`, `04-Bug`, `06-ADR`, `00-Monitor`) or a **custom** map.
+
+The scripts resolve the real folder names automatically via `_00-Project-Management/93-Scripts/lib/pm-paths.js`, driven by `.claude-pm-config.json` (`"layout": "full"|"flattened"` and/or a per-key `"paths"` override). `pm:install` pins this; `pm:doctor` reports it.
+
+**INSTRUCTION TO CLAUDE:** when a skill names a folder like `32-Stories`, do **not** assume that literal path — resolve it to the project's actual folder first (read `.claude-pm-config.json`, or list `_00-Project-Management/` and match by role: epics/features/stories/testplans/bugs/decisions/monitor/backlog). On a flattened project: `32-Stories`→`03-Stories`, `33-Testplans`→`05-Test`, `34-Bugs`→`04-Bug`, `40-Decisions`→`06-ADR`, `42-Monitor`→`00-Monitor`.
+
+### Story → Testplan pairing — MANDATORY
+
+When creating a STORY under `32-Stories/EPIC-NN/FEAT-NN.M/`, create the paired TESTPLAN at `33-Testplans/EPIC-NN/FEAT-NN.M/TESTPLAN-NN.M.PP-<slug>.md` **in the same response**.
+
+- Every AC checkbox in the story maps to ≥1 TC in the testplan.
+- Every TC has a runnable `Command:` (no manual steps, no "have a human verify").
+- See `91-Templates/STORY.template.md` + `91-Templates/TESTPLAN.template.md`.
+
+### Bug auto-raise on failure — MANDATORY
+
+Whenever a TC fails or you observe any defect during exploration/code review, file a BUG at `34-Bugs/EPIC-NN/FEAT-NN.M/BUG-<YYYYMMDD-NN>-<slug>.md` **in the same response**, before reporting in chat.
+
+- ID format: `BUG-YYYYMMDD-NN` where `NN` is the day's sequential counter within that FEAT folder.
+- Slug: kebab-case, ≤6 words, describing the symptom.
+- Body includes: reproduction steps, environment snapshot, first analysis hypothesis, suggested fix direction a junior dev can act on.
+- Use `91-Templates/BUG.template.md`.
+
+### DoR gate — before in-progress
+
+Before flipping a story `not-started`/`ready` → `in-progress`, verify the DoR checklist in `SOP.md` §6. If a DoR item is missing, **stop**, list the gap, ask.
+
+### DoD gate — before done
+
+Before flipping a story `in-review` → `done`, verify the DoD checklist in `SOP.md` §7. MONITOR.md update is part of the same response.
+
+### ADR on the spot — MANDATORY for non-obvious decisions
+
+On any non-obvious decision (library choice, schema field name, threshold setting, scope deferral, divergence from defaults), create `40-Decisions/ADR-<NNNN>-<slug>.md` **in the same response**. Number sequentially across the project. Link from the story's `decisions:` array.
+
+### Templates over memory
+
+Every new artefact starts from `91-Templates/<TYPE>.template.md`. Do not redraft section headings from memory.
+
+### Strategy linkage
+
+Every EPIC must have `okr:` or `prd_section:` in frontmatter. Reject epics without strategic linkage — ask "What business outcome does this move?" before writing.
+
+### Hat protocol
+
+State which hat at session start: **Founder · PM · Dev · QA**. Don't mix hats in one session.
+
+| Hat | Owns |
+|---|---|
+| Founder | Strategy, OKRs, epic approvals, sunset decisions |
+| PM | Inbox → Backlog refinement, MONITOR updates |
+| Dev | Code, tests, story status Ready → Active → Review |
+| QA | Testplan execution, bug raising, DoD sign-off |
+
+### Conversation Mode (the moat) — enforces the hats across chats
+
+A project-global **Mode** (`plan · dev · dual · neutral`) groups and enforces the hats across separate chats. It persists in `.tandem-mode.json` (repo root, git-ignored) and is injected each message by the `UserPromptSubmit` hook for **joined** chats. Plan groups Founder/PM/QA-planning; Dev groups Dev/QA-execution. On an out-of-mode request, **nudge** (switch / go Dual / one-off) — never hard-block. Set it with `/mode <plan|dev|dual|neutral>`; it auto-resets to Neutral on `close-out-story` (last story in phase) and `close-phase`. Full rules: the **`mode`** skill.
+
+### MONITOR + dashboard
+
+When a story flips to `done`, update `42-Monitor/MONITOR.md` in the same edit (tick the bar, update shipped count, prepend revision-history one-liner). The dash hook (`Stop` event) regenerates `DASHBOARD.html` at session end — you don't need to run `npm run pm:dash` manually if the plugin is active.
+
+### Subagent delegation (SOP §18)
+
+- Editing / decisions / status flips → **main thread**.
+- "Where is X / which files reference Y" → **Explore agent** (read-only).
+- Multi-step research, running tests, anything producing noisy logs → **fresh agent**.
+- Never delegate understanding. Agents return evidence; main thread synthesises.
+
+## When in doubt
+
+Bring it to the user. Do not invent rules. Do not silently bend.
diff --git a/plugins/tandem/skills/critique/SKILL.md b/plugins/tandem/skills/critique/SKILL.md
new file mode 100644
index 0000000..14c987a
--- /dev/null
+++ b/plugins/tandem/skills/critique/SKILL.md
@@ -0,0 +1,177 @@
+---
+name: critique
+description: Advisory artefact quality review for planning artefacts (Epic, Feature, Story, Testplan). Use when the user asks to critique, review quality, check a planning artefact, audit a story or epic, or run a pre-refinement quality pass. Detects artefact type and runs the matching check set. Never rewrites the file.
+---
+
+# Tandem: critique (PM hat)
+
+Operate as **PM hat**. The user wants a quality critique of a planning artefact before it enters the DoR gate or is shared with the team. This skill is **advisory**: it detects the artefact's type, runs the matching check set, proposes improvements, and reports findings — it never silently rewrites the file and does not promote status.
+
+## Inputs needed
+
+- Artefact path — one of:
+  - Epic: `_00-Project-Management/30-Epics/EPIC-NN-<slug>.md` (canonical) or equivalent flattened path.
+  - Feature: `_00-Project-Management/31-Features/EPIC-NN/FEAT-NN.M-<slug>.md` or equivalent.
+  - Story: `_00-Project-Management/32-Stories/EPIC-NN/FEAT-NN.M/STORY-NN.M.PP-<slug>.md` or equivalent.
+  - Testplan: `_00-Project-Management/33-Testplans/EPIC-NN/FEAT-NN.M/TESTPLAN-NN.M.PP-<slug>.md` or equivalent.
+- If the user did not supply a path, ask: "Which artefact? Paste the file path."
+
+## Load into context
+
+Use `Read` / `Glob` to detect existence. Treat missing files as "not present" rather than throwing.
+
+- **Target artefact** — at the resolved path.
+- **Quality standards reference** — `_00-Project-Management/90-Standards/SOP.md`. This is the authoritative source for what "well-formed" means for every artefact type. Read the relevant sections at runtime; do not rely on a hardcoded copy in this skill.
+- **Artefact template** — the matching template from `_00-Project-Management/91-Templates/` (e.g. `EPIC.template.md`, `FEATURE.template.md`, `STORY.template.md`, `TESTPLAN.template.md`). Used to verify no required section is absent or still holds a placeholder value.
+- **Parent artefacts** (for Stories and Testplans) — load the parent Feature and Epic to verify strategic linkage and AC ancestry. For a Testplan, also load the paired Story.
+- **Project root `CLAUDE.md`** — always loaded for project-specific overrides.
+
+## Task — type detection and routing
+
+### Step 1 — detect and route
+
+Read the artefact's `type:` frontmatter field. Route to the matching check set:
+
+| `type:` value | Dispatch to |
+|---|---|
+| `epic` | Epic checks |
+| `feature` | Feature checks |
+| `story` | Story checks |
+| `testplan` | Testplan checks |
+
+If `type:` is absent or unrecognised, infer from the filename prefix (EPIC-, FEAT-, STORY-, TESTPLAN-). If inference is also ambiguous, stop and ask the user which type to treat the artefact as.
+
+### Step 2 — run the per-type checks
+
+Run every check for the detected type. For each check: mark **PASS**, **WARN**, or **FAIL** with a one-line reason citing the specific evidence found (or missing) in the artefact.
+
+---
+
+#### Epic checks
+
+1. **Strategic linkage** — does the Epic have a clear `okr:` or `prd_section:` reference? Strategic linkage strength: is it traceable to a measurable business outcome, or is it floating?
+2. **In/out-scope clarity** — does the Epic define what is explicitly in scope and what is out of scope? Ambiguous scope leads to scope creep. Check that both sides are stated.
+3. **Measurable success criteria** — does the Epic carry measurable success criteria (metrics, KPIs, or exit conditions), not just narrative intent? Vague "improve the experience" statements fail this check.
+4. **4-week split check** — is the Epic sized to be completed within roughly four weeks of team effort? Larger Epics must be split. If the Epic bundles more than one coherent strategic theme, flag it as an oversized Epic and propose a split.
+5. **Premise ↔ reality reconciliation** — if the Epic makes a status claim about, or proposes to retire/archive/delete/supersede, a named other artefact, resolve those ids and compare actual `status:` to the claim. A mismatch (or unresolvable id) is a **major** finding (see the Story-check definition for the tell + fixture). Advisory only.
+
+---
+
+#### Feature checks
+
+1. **Goal / user value clarity** — does the Feature have a well-stated goal and articulate the user value it delivers? "User value" must be explicit: who benefits, how, and why it matters.
+2. **AC testability** — are Acceptance Criteria written as machine-verifiable conditions? Subjective ACs ("feels fast", "looks good") fail this check. Each AC must be independently testable.
+3. **Dependency realism** — are dependencies on other Features, services, or external systems listed? For each listed dependency: is it done or scheduled? Floating "depends on TBD" is a FAIL.
+4. **Premise ↔ reality reconciliation** — if the Feature makes a status claim about, or proposes to retire/archive/delete/supersede, a named other artefact, resolve those ids and compare actual `status:` to the claim. A mismatch (or unresolvable id) is a **major** finding (see the Story-check definition for the tell + fixture). Advisory only.
+
+---
+
+#### Story checks
+
+1. **Machine-testable ACs** — every AC must be verifiable by a machine (CLI command, assertion, file check, API call). Subjective or manual-only ACs are a FAIL.
+2. **DoR-readiness** — does the Story satisfy the kit's Definition of Ready checklist? Check: AC checkboxes present; `feature:` and `epic:` frontmatter set; risks section non-empty; estimate set. A Story with DoR gaps should go through `/Tandem:refine-backlog` before being pulled to work — that gate, not this skill, is the authoritative promotion path.
+3. **Estimate sanity / XL → split** — is the estimate set to XS / S / M / L? If the estimate is `XL`, the Story is too large: flag it for splitting before promotion. A missing estimate is a WARN.
+4. **≤5 ACs** — Story ACs must number five or fewer (≤5 ACs). More than five ACs suggests the Story is too broad and should be split.
+5. **Paired-testplan AC↔TC coverage** — if a paired Testplan exists, verify that every AC maps to at least one TC (AC↔TC coverage). If coverage is incomplete, list the uncovered ACs. If no Testplan exists yet and the Story is not in `not-started`, flag the absence.
+6. **`type_of_work` set** — the `type_of_work:` frontmatter field must be set to a concrete discipline (`frontend`, `backend`, `infra`, `data`, `docs`). A missing or placeholder value is a FAIL.
+7. **Premise ↔ reality reconciliation** — when the Story names other artefact ids (`STORY-`/`FEAT-`/`EPIC-`/`ADR-`/`BACKLOG-`) **and** makes a status claim about them (*"X is never-started / superseded / done / obsolete"*) **or** proposes to **retire / archive / delete / supersede / mutate** them, resolve those ids and compare their **actual `status:`** to the claim. A mismatch (or an unresolvable id) is a **major** finding — the premise is empirically false. Advisory only: critique never rewrites; the blocking enforcement lives in `refine-backlog`'s DoR gate. _Fixture: STORY-15.1.02 claimed "STORY-04.6.01–05 are never-started/superseded" and proposed `archived`, but all five are `done` — this check flags that as a major._
+
+---
+
+#### Testplan checks
+
+1. **Every AC mapped to ≥1 TC** — the AC → TC coverage map must be complete. List any AC from the paired Story that has no corresponding TC entry.
+2. **Every TC has a runnable `Command:`** — each Test Case must carry a `Command:` that Claude (or a CI runner) can execute unattended. A TC whose `Command:` is blank, says "manual verification", or says "have a human check" is a FAIL. The Command: must be a runnable shell or CLI instruction — no placeholders, no prose descriptions.
+
+---
+
+### Step 3 — compile findings
+
+After running all checks, convert every non-PASS result into a **finding**. Assign each finding a severity (see §Severity model below), attach a concrete suggested fix, then emit the findings severity-ranked: blockers first, then majors, then minors.
+
+---
+
+## Severity model
+
+Every finding carries exactly one of three severity levels. Severity-ranked output means all findings are ordered blocker → major → minor in the report.
+
+| Level | Meaning | Typical check-verdict origin |
+|---|---|---|
+| **blocker** | Must fix before the DoR gate; leaving it in place will cause rework or invalidate the artefact. | FAIL on a gate-critical check (e.g. missing ACs, no `type_of_work:`, blank TC `Command:`, XL estimate un-split). |
+| **major** | Should fix; the artefact is functional but the gap degrades quality or traceability. | FAIL on a quality check that does not directly block promotion (e.g. weak strategic linkage, floating dependency), or a WARN that represents a meaningful quality risk. |
+| **minor** | Polish or nit; the artefact is DoR-ready but could be cleaner. | Soft WARN (e.g. a slightly vague AC that is still testable, a recommended-but-not-required field). |
+
+### Verdict → severity mapping
+
+Per-check verdicts (PASS / WARN / FAIL) map to finding severities as follows:
+
+- **FAIL** on a gate check → **blocker** (the check description marks gate-critical items above; when in doubt, a FAIL is at least major).
+- **FAIL** on a quality/traceability check → **major**.
+- **WARN** with meaningful quality risk → **major**.
+- **WARN** that is advisory or polish → **minor**.
+- **PASS** → no finding; counted in the PASS total only.
+
+### Per-finding suggested fix
+
+Every finding (blocker, major, or minor) must include a **concrete suggested fix**: a specific, minimal, actionable edit — not a general directive. Examples of correct form:
+
+- "Add `type_of_work: frontend` to the frontmatter after the `estimate:` line."
+- "Replace AC-3 ('feels responsive') with a machine-testable condition: `p95 load time < 300 ms measured by Lighthouse CI`."
+- "Split into two stories: one for the API endpoint, one for the UI surface."
+
+"Improve this section" or "add more detail" are not acceptable suggested fixes.
+
+---
+
+## Output rules
+
+- This skill is **advisory**. It proposes edits and improvements; it never silently rewrites the artefact or edits any file on disk. All proposed changes are shown in the findings report as suggestions for the user to apply.
+- This skill does not promote status. Artefact status (e.g. `not-started` → `ready`) stays the DoR gate's job. The `/Tandem:refine-backlog` skill owns that promotion path; this critique skill complements it by surfacing quality issues upstream. Running critique before refinement reduces DoR-gate failures but does not replace the gate.
+- Every check gets an explicit PASS / WARN / FAIL verdict — no silent omissions.
+- Proposed fixes are concrete and minimal: "Add a `type_of_work: frontend` line to frontmatter" not "fill in the missing fields".
+- If the artefact is high quality and all checks PASS, say so plainly — the skill is not obligated to find problems.
+
+### Report format
+
+Emit the following sections in order. Findings are severity-ranked (blockers → majors → minors).
+
+**Artefact:** `<path>` (`<type>`) — `<status>`
+**Critique summary:** B blocker / M major / N minor / K PASS
+
+**Blockers (must fix before DoR gate)**
+For each blocker finding: check name · one-line diagnosis · **Suggested fix:** `<specific, minimal, actionable edit>`.
+
+**Major findings (should fix)**
+For each major finding: check name · one-line diagnosis · **Suggested fix:** `<specific, minimal, actionable edit>`.
+
+**Minor findings (polish / nit)**
+For each minor finding: check name · one-line diagnosis · **Suggested fix:** `<specific, minimal, actionable edit>`.
+
+**All checks PASS** _(list count only — "K checks passed")_
+
+**Recommended next step:**
+- If blockers present → fix the gaps, then run `/Tandem:critique` again, then proceed to `/Tandem:refine-backlog`.
+- If only major / minor findings → user decides whether to address them; proceed to `/Tandem:refine-backlog` when ready.
+- If all PASS → artefact is critique-clean; proceed to `/Tandem:refine-backlog` for the official DoR gate.
+
+---
+
+## Optional HTML critique artefact
+
+The operator MAY request a persisted HTML critique artefact. This is **optional** — it is NOT produced by default. Only generate it when the operator explicitly asks (e.g. "save a critique report" or "write the HTML artefact").
+
+When requested, write the artefact to:
+
+`41-Reports/CRITIQUE-<artefact-id>-<YYYY-MM-DD>.html`
+
+Where `<artefact-id>` is the artefact's ID token (e.g. `STORY-05.1.02`) and `<YYYY-MM-DD>` is today's date.
+
+The HTML file must contain the full severity-ranked findings report (same content as the chat output), structured for human readability. The skill **describes** the artefact and writes its content; it does not bundle a renderer or stylesheet beyond basic inline HTML. Use semantic HTML elements (`<h1>`, `<h2>`, `<table>`, `<ul>`) — no external CSS dependencies.
+
+This follows the kit's `41-Reports/` HTML-output convention: all generated report files live under `41-Reports/` at the project root, named with a type prefix, artefact ID, and ISO date.
+
+## Non-negotiable rules from CLAUDE.md
+
+- Read-only skill: this skill never writes to any file. Findings are chat-only.
+- Status enum: this skill never touches `status:` in any file — status promotion is not its role.
+- All checks run: no check is skipped silently. If a check cannot be evaluated (e.g. paired Testplan is absent), record it as "cannot evaluate — reason" rather than dropping it.
diff --git a/plugins/tandem/skills/curate-toolkit/SKILL.md b/plugins/tandem/skills/curate-toolkit/SKILL.md
new file mode 100644
index 0000000..27b9bbb
--- /dev/null
+++ b/plugins/tandem/skills/curate-toolkit/SKILL.md
@@ -0,0 +1,225 @@
+---
+name: curate-toolkit
+description: Rank installed AI tools (Skills, Agents, Commands, Plugins) by fit for this project and write relevance overlays under 97-AI-Reference/. Reads PROJECT-CONTEXT.md (project type / tech stack) plus the installed inventory and ranks each item HIGH / MED / LOW with a one-line rationale keyed to project type. Use when the user wants to rank or audit which installed tools are relevant vs. off-stack, or invokes /Tandem:curate-toolkit.
+---
+
+# Tandem: curate-toolkit (PM hat)
+
+Operate as **PM hat**. The user has an installed set of AI tools — Skills, Agents, Commands, Plugins — and needs to know which ones are actually relevant to *this* project, and which are off-stack noise that should be deprioritised or ignored.
+
+This skill reads the project's type and stack, enumerates the installed inventory, and produces a ranked, rationale'd relevance report written as overlays under `97-AI-Reference/`. The ranking is **judgment-led and non-deterministic** — this skill describes the *procedure* and *output shape*, not a fixed ranking.
+
+---
+
+## Load into context
+
+Use `Read` / `Glob` to detect file existence. Treat any missing file as "not present" — never hard-fail on absence.
+
+- **Project context** — `_00-Project-Management/90-Standards/PROJECT-CONTEXT.md`. Read the `## Project type` selector (which checkbox is ticked), `## Tech stack`, and `## Sub-agent mapping` table. This is the primary ranking signal: tools that match the project type / stack rank higher; off-stack tools rank lower.
+- **Sub-agent map** (if present) — the `## Sub-agent mapping` table in PROJECT-CONTEXT.md names the preferred sub-agents by `type_of_work`. Cross-reference with the inventory below.
+- **Installed inventory** — enumerate all four categories:
+  1. **Skills** — glob `skills/*/SKILL.md`; read each `name:` and `description:` from frontmatter.
+  2. **Agents** — glob `.claude/agents/*.md` (or the repo's configured agent path); read each agent's name and stated purpose.
+  3. **Commands** — glob `.claude/commands/*.md`; read each command's name and stated purpose.
+  4. **Plugins** — read `plugin.json` at the repo root (if present); list each plugin entry's `name` and `description`.
+- **Existing overlays** — glob `97-AI-Reference/curate-toolkit-*.md` to check whether a prior ranking already exists. If found, note the prior run date and whether a re-rank was requested.
+- **Project root `CLAUDE.md`** — for project-specific overrides or exclusions.
+
+---
+
+## Task
+
+### 1 · Resolve the project type and stack
+
+From PROJECT-CONTEXT.md, identify:
+- The selected **project type** (web-app, mobile, cli, library, backend-service, data-pipeline, power-platform, automation, other).
+- The **primary language(s)** and **framework(s)** from `## Tech stack`.
+- The **preferred sub-agents** from `## Sub-agent mapping`.
+
+If PROJECT-CONTEXT.md has not been filled in (all fields are still template placeholders), note this as a gap in the output and proceed with a best-effort ranking based on whatever stack signals are present in the repo.
+
+### 2 · Enumerate the installed inventory
+
+For each of the four inventory categories — **Skills / Agents / Commands / Plugins** — produce a flat list of every item found, noting:
+- Its name / identifier.
+- Its stated purpose (from frontmatter `description:` or equivalent).
+- Whether it was actually found on disk (present) or only referenced elsewhere (e.g. in `suggested_agents:` frontmatter, the sub-agent map, or a story file) but not installed.
+
+**Gap handling — never hard-fail:** An uninstalled or unknown agent, skill, command, or plugin referenced anywhere in the project (including in `suggested_agents:` frontmatter, the sub-agent map, or any story file) is reported as a **GAP** in the inventory section. A gap is informational — it never hard-fails the resolution or ranking pass. Per the kit's resolution order: a named item that isn't installed causes the executor to degrade gracefully to the next step (discipline fallback → `general-purpose`). This skill mirrors that behaviour: flag the gap, assign a rank of `LOW (gap — not installed)`, and continue. Do not abort.
+
+### 3 · Rank each inventory item
+
+For each item in the combined inventory, assign one of three tiers:
+
+| Tier | Meaning |
+|------|---------|
+| **HIGH** | Directly relevant to this project type / stack — reach for it routinely. |
+| **MED** | Conditionally useful — relevant for specific task types or phases, not every session. |
+| **LOW** | Off-stack or not applicable to this project type — deprioritise; may still be used if the need arises. |
+
+Ranking criteria (apply in order; earlier criteria are stronger signals):
+1. **Project-type match** — if the item's purpose is specific to a project type that differs from the current project (e.g. a React-specific skill in a `data-pipeline` project), prefer LOW.
+2. **Stack / language match** — if the item references a language, framework, or runtime not in the project's stack, prefer LOW or MED.
+3. **Sub-agent map alignment** — if the item matches a preferred sub-agent in the PROJECT-CONTEXT `## Sub-agent mapping` table, prefer HIGH.
+4. **General-purpose / cross-cutting** — skills, agents, or commands that apply regardless of stack (e.g. code-review, security-audit, commit-work) default to MED unless stack signals elevate them.
+5. **Not installed (gap)** — forced LOW with `(gap — not installed)` note regardless of other signals.
+
+Provide a **one-line rationale** for each ranking, keyed to the project type and stack (e.g. "HIGH — Next.js project; this React skill maps directly to the primary framework").
+
+### 4 · Write relevance overlays under `97-AI-Reference/`
+
+Output the ranking as one or more relevance overlay files. The overlay schema and exact field names are defined in ADR-0029 / STORY-04.3.03 — write the overlay to conform to that schema once it is available. Until STORY-04.3.03 delivers the schema, write the overlay in the interim format below and mark the file with `schema: interim` so a later migration pass can upgrade it.
+
+**Interim overlay format** (use until ADR-0029 / STORY-04.3.03 schema lands):
+
+```
+---
+schema: interim
+generated_by: curate-toolkit
+generated_at: <ISO 8601 timestamp>
+project_type: <value from PROJECT-CONTEXT.md>
+---
+
+# Toolkit Relevance Overlay — <project name or repo>
+
+## Skills
+
+| Name | Tier | Rationale |
+|------|------|-----------|
+| <skill-name> | HIGH / MED / LOW | <one-line rationale keyed to project type> |
+
+## Agents
+
+| Name | Tier | Rationale |
+|------|------|-----------|
+
+## Commands
+
+| Name | Tier | Rationale |
+|------|------|-----------|
+
+## Plugins
+
+| Name | Tier | Rationale |
+|------|------|-----------|
+
+## Gaps (referenced but not installed)
+
+| Item | Type | Referenced in | Rationale |
+|------|------|---------------|-----------|
+| <name> | skill/agent/command/plugin | <file or table where referenced> | GAP — not installed; degrade to general-purpose fallback |
+```
+
+Write the overlay to `97-AI-Reference/curate-toolkit-<YYYYMMDD>.md`. If a file for today already exists, append a numeric suffix (e.g. `-2`).
+
+Create the `97-AI-Reference/` directory if it does not exist.
+
+### 5 · Report in chat
+
+After writing the overlay, report:
+- Total items ranked: N (broken down by category).
+- HIGH items: list names.
+- Gaps (not installed): list names and where they were referenced.
+- Path to the written overlay file.
+- Any caveats: e.g. "PROJECT-CONTEXT.md is unfilled — ranking used best-effort stack inference."
+
+---
+
+## Non-negotiable rules
+
+- **Judgment-led, non-deterministic** — do not hard-code a fixed ranking. Always re-derive from the current project context.
+- **Never hard-fail on a missing item** — gaps are informational; they never abort the ranking pass. Degrade gracefully: flag the gap, continue.
+- **No consumer project references** — this skill is self-contained. Do not reference specific client names, internal company names, or project-specific paths beyond the kit's standard layout.
+- **Overlay schema deferred to STORY-04.3.03** — do not define or extend the overlay schema fields here. Reference ADR-0029 / STORY-04.3.03 for the canonical definition; use the interim format above until it lands.
+- **Registration deferred to STORY-04.3.03** — do not wire this skill into the build manifest here. That is STORY-04.3.03's deliverable.
+
+---
+
+## End-of-session summary (always emit)
+
+- Inventory enumerated: Skills N, Agents N, Commands N, Plugins N.
+- HIGH: list.
+- MED: list.
+- LOW: list.
+- Gaps (not installed): list with source reference.
+- Overlay written to: `97-AI-Reference/curate-toolkit-<YYYYMMDD>.md`.
+- PROJECT-CONTEXT.md filled: yes / no (if no, ranking is best-effort).
+
+---
+
+## Next command
+
+`/Tandem:curate-toolkit` — re-run after updating PROJECT-CONTEXT.md or installing new tools to refresh the overlay.
+
+Or: `/Tandem:execute-story` — to begin executing a story, using the HIGH-ranked sub-agents as the preferred executor pool.
+
+---
+
+## Overlay schema (v1 — ADR-0029 / STORY-04.3.03)
+
+The canonical overlay schema is defined in ADR-0029. This section is the normative reference for
+overlay authors (this skill) and overlay consumers (e.g. FEAT-04.6 dashboard renderer).
+
+### Write location
+
+All overlays are written to `97-AI-Reference/` at the root of the consuming project.
+File naming: `curate-toolkit-<YYYYMMDD>.md`. Append `-2`, `-3`, etc. if a same-day file exists.
+Create the directory if absent. Never write overlays outside `97-AI-Reference/`.
+
+### Overlay frontmatter (required)
+
+```yaml
+schema: v1
+generated_by: curate-toolkit
+generated_at: <ISO 8601 timestamp>
+project_type: <value from PROJECT-CONTEXT.md § Project type>
+```
+
+### Per-item record fields (required for every ranked item)
+
+| Field | Type | Allowed values | Description |
+|-------|------|----------------|-------------|
+| `id` | string | — | The item's unique identifier (skill name, agent filename, command name, or plugin name). Primary key for consumer lookups. |
+| `kind` | string | `skill` / `agent` / `command` / `plugin` | Inventory category. |
+| `rank` | string | `HIGH` / `MED` / `LOW` | Relevance tier for this project. |
+| `rationale` | string | one-line prose (≤ 120 chars) | Reason for the assigned rank, keyed to project type and stack. |
+| `installed` | boolean | `true` / `false` | Whether the item was found on disk at overlay-generation time. |
+
+### Tier definitions
+
+| Rank | Meaning |
+|------|---------|
+| `HIGH` | Directly relevant to this project type / stack — reach for it routinely. |
+| `MED` | Conditionally useful — relevant for specific task types or phases, not every session. |
+| `LOW` | Off-stack or not applicable to this project type — deprioritise. |
+
+### Gap / uninstalled items — explicit gap marker
+
+An item referenced anywhere (in `suggested_agents:` frontmatter, the sub-agent map, a story
+file, or any other project artefact) but **not found on disk** is represented in the overlay as
+an **explicit gap marker**. It is never omitted.
+
+Gap marker convention:
+- `installed: false`
+- `rank: LOW`
+- `rationale` prefixed with `GAP — not installed;` (e.g. `GAP — not installed; degrade to general-purpose fallback`)
+
+Consumers must never infer the absence of an item as a ranking signal — the gap marker is the
+authoritative representation of an uninstalled or unknown item. The gap marker is informational
+and never aborts the overlay-generation pass.
+
+### Schema versioning and forward compatibility
+
+- Schema version is `v1` (this ADR). Readers gate on the `schema:` frontmatter field.
+- Readers **must** ignore unrecognised fields (open-world assumption).
+- Adding optional fields is non-breaking (no version bump needed).
+- A breaking change requires bumping to `schema: v2` and a migration pass on existing overlays.
+- FEAT-04.6 may extend this schema (e.g. `display_group`, `badge_color`) without migration.
+
+### Registration / discoverability
+
+This skill is registered via the kit's auto-discovery model (ADR-0003): placing
+`skills/curate-toolkit/SKILL.md` in the `skills/` directory is sufficient — no `plugin.json`
+skills array entry is needed or added. The public Tandem build (`npm run build:tandem`, ADR-0028)
+copies the `skills/` tree and rewrites the name token to `/Tandem:curate-toolkit`; the scrub
+gate confirms no internal token survives. Do not add this skill to `plugin.json`.
diff --git a/plugins/tandem/skills/document/SKILL.md b/plugins/tandem/skills/document/SKILL.md
new file mode 100644
index 0000000..bee1bc7
--- /dev/null
+++ b/plugins/tandem/skills/document/SKILL.md
@@ -0,0 +1,93 @@
+---
+name: document
+description: Author the project's default markdown documentation set from accumulated PM knowledge. Use when the user asks to generate docs, write project documentation, create the documentation set, or invokes /Tandem:document. Reads PROJECT-CONTEXT.md, epics/features/stories, ADRs, and the codebase — then authors one markdown file per doc into the documentation/ folder. Authors markdown only; HTML rendering is a separate step.
+---
+
+# Tandem: document (Technical Writer hat)
+
+Operate as **Technical Writer hat**. The user wants a coherent, shareable documentation set synthesised from what Tandem already knows about the project — no separate doc-writing pass required.
+
+## Default doc set
+
+Author **exactly these five markdown files**, one per document, using these verbatim names as the output filenames:
+
+| # | Document | Output file |
+|---|----------|-------------|
+| 1 | Overview | `documentation/overview.md` |
+| 2 | Getting started | `documentation/getting-started.md` |
+| 3 | Architecture | `documentation/architecture.md` |
+| 4 | Decisions (digest) | `documentation/decisions.md` |
+| 5 | Features (& usage) | `documentation/features.md` |
+
+All output files are written into the **`documentation/` folder** at the project root, one `.md` per doc. Do not create subfolders inside `documentation/` — flat layout.
+
+## Sources (read before authoring)
+
+Read the following in order, resolving paths against the project root. Treat a missing file as "not present" (note the gap in the relevant section) rather than throwing.
+
+1. **`PROJECT-CONTEXT.md`** — canonical project identity: name, purpose, tech stack, audience, deployment. This drives the Overview and Getting started sections.
+2. **Epics** (`_00-Project-Management/30-Epics/EPIC-*.md`) — strategic scope. Skim titles + `## In scope` sections.
+3. **Features** (`_00-Project-Management/31-Features/**/*.md`) — feature-level capabilities. Drives the Features (& usage) doc.
+4. **Stories** (`_00-Project-Management/32-Stories/**/*.md`) — implementation detail and done/not-done status. Informs accuracy of the Getting started and Features docs.
+5. **ADRs** (`_00-Project-Management/40-Decisions/ADR-*.md`) — architectural decisions. Drives the Decisions (digest) and Architecture docs. Read all; summarise the most consequential ones.
+6. **Codebase** — the source tree itself. Read entry points, key modules, README fragments (if any). Drives the Architecture and Getting started docs. Limit scope: entry-point files, major module directories, config files — do not attempt to read every file.
+
+## Per-document authoring guide
+
+### 1 · Overview (`documentation/overview.md`)
+- What the project is, who it is for, and why it exists.
+- One-paragraph project statement sourced from PROJECT-CONTEXT.md.
+- Key capabilities list (3–7 bullets, sourced from epics/features).
+- Current project status (active / beta / archived) — infer from MONITOR if present.
+
+### 2 · Getting started (`documentation/getting-started.md`)
+- Prerequisites (runtime, env vars, credentials) — sourced from PROJECT-CONTEXT.md and codebase config files.
+- Install / setup steps — numbered list, runnable commands.
+- First run — the single command that proves the project is working.
+- Troubleshooting tips — at most 3 common failure modes from stories/bugs if present.
+
+### 3 · Architecture (`documentation/architecture.md`)
+- System diagram described in prose or Mermaid (prefer Mermaid if the structure is clear from the codebase).
+- Key components and their responsibilities — sourced from codebase + ADRs.
+- Data flow — how a request/event moves through the system.
+- External dependencies — services, APIs, storage — sourced from PROJECT-CONTEXT.md and config files.
+- Link to relevant ADRs inline (e.g. "see ADR-0003 for why X was chosen").
+
+### 4 · Decisions (digest) (`documentation/decisions.md`)
+- Introduction: what ADRs are and how to read them.
+- One row per ADR in a markdown table: `| ADR | Title | Status | Date | Summary (one line) |`.
+- Sort by ADR number descending (most recent first).
+- Source: all files matching `_00-Project-Management/40-Decisions/ADR-*.md`. If none exist yet, write a placeholder row.
+
+### 5 · Features (& usage) (`documentation/features.md`)
+- One `##` section per major feature, sourced from the Features files.
+- Each section: brief description, how to invoke / configure, example (code block or command).
+- Status column: note if a feature is in-progress or planned vs. shipped — infer from story statuses.
+
+## Authoring rules
+
+- **Markdown only** — author `.md` files. Do not generate HTML, CSS, or any rendered output. HTML rendering is handled by a separate later step.
+- **Self-contained output** — each doc must be readable standalone. Cross-link between docs with relative markdown links (e.g. `[Architecture](architecture.md)`).
+- **Prose quality** — use plain English, active voice, present tense. No marketing filler.
+- **No invention** — if a fact is not in the sources, say "not yet documented" rather than guessing. Accuracy over completeness.
+- **SELF-CONTAINED SKILL** — this skill contains no references to any specific consumer project or company. Keep output project-neutral in structure; project-specific content comes entirely from the sources above.
+
+## Execution steps
+
+1. Read all sources listed above (parallelise reads where possible).
+2. For each of the five documents, draft content in memory, then write to `documentation/<filename>.md`.
+3. If the `documentation/` folder does not exist, create it before writing.
+4. After writing all five files, emit a short summary:
+   - Files written: list with relative paths.
+   - Sources read: list with any gaps noted.
+   - Sections marked "not yet documented": list, or "none".
+
+## Output rules
+
+- Write all five docs in a single response — do not ask for confirmation between docs.
+- If a source file is missing, note the gap inside the relevant doc section and continue — do not abort.
+- Do not modify any PM artefact (stories, ADRs, MONITOR) during this skill.
+
+## Next command
+
+Once `STORY-04.4.02` ships, `/Tandem:document-html` will render the `documentation/*.md` files as a styled HTML site. (Not yet available — markdown authoring is the final step for now.)
diff --git a/plugins/tandem/skills/draft-epic/SKILL.md b/plugins/tandem/skills/draft-epic/SKILL.md
new file mode 100644
index 0000000..e61ff07
--- /dev/null
+++ b/plugins/tandem/skills/draft-epic/SKILL.md
@@ -0,0 +1,70 @@
+---
+name: draft-epic
+description: Draft a new EPIC from an OKR key result or a PRD section. Use when the user asks to draft an epic, create an epic, write an epic, turn an OKR / PRD into an epic, or invokes /Tandem:draft-epic. Operates as PM hat. Reads the source OKR or PRD plus the EPIC template; produces 30-Epics/EPIC-NN-<slug>.md with mandatory strategic linkage (frontmatter okr: or prd_section:). Aborts if strategic linkage cannot be established.
+---
+
+# Tandem: draft-epic (PM hat)
+
+Operate as **PM hat**. The user has an approved strategic bet (OKR KR or PRD section) and needs to put it into the work graph as an Epic.
+
+This skill is the slash-command wrapper for the kit's canonical epic-drafting prompt. The prompt is the source of truth; this file is the entry point.
+
+## Source of truth
+
+@_00-Project-Management/92-Prompts/02-draft-epic-from-okr-or-prd.md
+
+Follow that prompt verbatim. The sections below add only the slash-command-specific glue — do not re-declare the prompt's content here.
+
+## Inputs needed
+
+- Path to the source OKR file (`_00-Project-Management/00-Strategy/OKR-YYYY-Qx.md`) or PRD file (`_00-Project-Management/20-Requirements/PRD-*.md`).
+- If the user didn't supply one, ask: "Which OKR or PRD am I drafting from? Paste the path or the KR/section text inline."
+
+## Load into context
+
+Use `Read` / `Glob` to detect existence. Treat missing files as "not present" rather than throwing.
+
+- **Source OKR / PRD** — at the resolved path above. If neither is provided and no `okr:` or `prd_section:` can be inferred from the user's inline text, **abort** — the kit's strategic-linkage rule forbids Epics without it. Tell the user: "I need an OKR or PRD reference before drafting. Which one moves this?"
+- **SOP** — `_00-Project-Management/90-Standards/SOP.md` for DoR, estimation, status enum.
+- **Project context** — `_00-Project-Management/90-Standards/PROJECT-CONTEXT.md`.
+- **EPIC template** — `_00-Project-Management/91-Templates/EPIC.template.md`. Use verbatim — do not redraft section headings from memory.
+- **Existing Epics** — glob `_00-Project-Management/30-Epics/EPIC-*.md` to find the next-free `EPIC-NN` (scan for the max NN, increment by 1, pad to 2 digits). Do **not** invent a number; do **not** reuse one.
+- **Project root `CLAUDE.md`** — for project-specific overrides.
+
+## Task
+
+1. Find next-free `EPIC-NN` by globbing `_00-Project-Management/30-Epics/EPIC-*.md` and computing `max(NN) + 1`. Pad to 2 digits.
+2. Draft the Epic at `_00-Project-Management/30-Epics/EPIC-NN-<slug>.md` using `EPIC.template.md` verbatim.
+3. Fill every section, especially:
+   - **Strategic linkage** — must reference the source OKR KR or PRD section. Frontmatter `okr:` OR `prd_section:` is **mandatory** — if neither can be set with a real value, abort and tell the user the linkage is too weak to proceed (kit's strategy-linkage rule).
+   - **In scope / Out of scope** — explicit deferrals are how mid-epic creep is resisted.
+   - **Success criteria** — measurable, not vibes.
+   - **Dependencies, Data touched, Risks** — top 3 risks max, with one-line mitigations.
+4. Outline 3–7 Features at high level (title + one-line goal each) in the `## Features` section. **Do not create FEAT files** — `/Tandem:split-into-features` does that.
+5. Set frontmatter: `status: not-started`, `created_at: <ISO 8601 now from system clock>`, other timestamp fields empty strings.
+6. **Show the file tree of what you'll create before writing.** Wait for user approval.
+
+## Output rules
+
+- If the Business Outcome line can't be written ("what metric does this move and by how much?"), stop and ask. Don't proceed without a clear answer.
+- If the Epic feels > 4 weeks of solo work, propose splitting into two Epics.
+- Strategic-linkage rejection: if the user pushes back on the OKR/PRD requirement, point to `_00-Project-Management/90-Standards/SOP.md` ("Strategy linkage" rule). Do not write the file without `okr:` or `prd_section:` set.
+
+## Non-negotiable rules from CLAUDE.md
+
+- Frontmatter timestamps (quoted ISO 8601 with offset, from system clock).
+- Status enum (`not-started` on creation).
+- Strategy-linkage rule — `okr:` or `prd_section:` is MANDATORY. **Reject** the request and stop if neither can be set.
+- Templates rule — use `EPIC.template.md` verbatim.
+
+## End-of-session summary (always emit)
+
+- File written: `_00-Project-Management/30-Epics/EPIC-NN-<slug>.md`
+- Strategic linkage: `okr: <ref>` or `prd_section: <ref>`
+- Features outlined: X
+- Estimate: S | M | L | XL
+- Status: `not-started`
+
+## Next command
+
+Next: `/Tandem:split-into-features` — decompose this Epic into FEAT files.
diff --git a/plugins/tandem/skills/draft-okrs/SKILL.md b/plugins/tandem/skills/draft-okrs/SKILL.md
new file mode 100644
index 0000000..3b4ebcc
--- /dev/null
+++ b/plugins/tandem/skills/draft-okrs/SKILL.md
@@ -0,0 +1,74 @@
+---
+name: draft-okrs
+description: Draft quarterly OKRs from a North Star. Use when the user asks to draft OKRs, write OKRs, plan a quarter, set objectives, draft KRs / Key Results, or invokes /Tandem:draft-okrs. Operates as Founder hat. Reads 00-Strategy/NORTH-STAR.md, the previous quarter's OKRs, and recent retros; produces 00-Strategy/OKR-YYYY-Qx.md following the kit's strategy template.
+---
+
+# Tandem: draft-okrs (Founder hat)
+
+Operate as **Founder hat**. The user is starting a new quarter (or replacing OKRs that drifted from the work).
+
+This skill is the slash-command wrapper for the kit's canonical OKR-drafting prompt. The prompt is the source of truth; this file is the entry point.
+
+## Source of truth
+
+@_00-Project-Management/92-Prompts/01-draft-okrs-from-northstar.md
+
+Follow that prompt verbatim. The sections below add only the slash-command-specific glue (input resolution, layout detection, post-write handoff) — do not re-declare the prompt's content here.
+
+## Inputs needed
+
+- If the user didn't supply context, ask: "Are we drafting for a fresh quarter, or replacing OKRs that feel disconnected? And which quarter's filename should I use — e.g. OKR-2026-Q3?"
+- Quarter slug format: `YYYY-Qx` (e.g. `2026-Q3`). Used in the output filename.
+
+## Load into context
+
+The canonical layout is under `_00-Project-Management/`. Use `Read` / `Glob` to detect existence rather than assuming; treat missing files as "not present" rather than throwing.
+
+- **North Star** — `_00-Project-Management/00-Strategy/NORTH-STAR.md` if present. If absent, ask the user to paste their North Star text inline before drafting.
+- **Previous quarter's OKRs** — `_00-Project-Management/00-Strategy/OKR-*.md` (most recent by filename). If absent, treat this as the first OKR set.
+- **Customer journey** — `_00-Project-Management/00-Strategy/CUSTOMER-JOURNEY.md` if present (optional context).
+- **Recent retros** — `_00-Project-Management/14-Retros/*.md`, most recent 1–2 by filename (optional).
+- **SOP** — `_00-Project-Management/90-Standards/SOP.md` for OKR rules + frontmatter contract.
+- **OKR template** — `_00-Project-Management/91-Templates/OKRS.template.md` if present; if absent, scaffold from the structure described in the source prompt and note the gap in the end-of-session summary.
+
+## Task
+
+1. Re-read the North Star. If it has shifted from what last quarter assumed, **stop** and ask the user before drafting.
+2. Review the previous quarter's OKRs (if any): which KRs hit ≥70%, which missed, and for each miss whether it was wrong-target / wrong-action / wrong-quarter.
+3. Draft a new `OKR-YYYY-Qx.md` per the source prompt's rules:
+   - ≤ 3 Objectives.
+   - 2–3 KRs per Objective, each a measurable number or binary state.
+   - Confidence column (0–100%) per KR — honest probability of hitting.
+   - "What we are deliberately NOT doing this quarter" section.
+   - Explicit North Star linkage per Objective.
+4. **Show the draft to the user in chat before writing the file.** Wait for edits before saving.
+5. On save: set frontmatter `status: not-started`, `created_at: <ISO 8601 now from system clock>`, all other timestamp fields empty strings.
+6. Final question to the user before saving: **"Which of these will hurt to drop in 4 weeks?"** — if they can't answer, the set isn't focused enough; iterate.
+
+## Output rules
+
+- Three Objectives is the ceiling. Two or one is fine.
+- KRs must be numbers or binary states. "Improve X" is not a KR. "X reaches 100 by Sept 30" is.
+- KR confidence > 90% is not ambitious. < 30% is not realistic. Aim 50–70%.
+- Do NOT mark the OKR file `ready` or `in-progress` — Founder approval (next session) handles that.
+
+## Non-negotiable rules from CLAUDE.md
+
+- Frontmatter timestamps (quoted ISO 8601 with offset, from system clock).
+- Status enum (closed set of 9 — `not-started` on creation).
+- Templates rule — use `91-Templates/OKRS.template.md` verbatim if present; do not redraft section headings from memory.
+- Strategy linkage — every Objective must reference a North Star section.
+
+## End-of-session summary (always emit)
+
+- File written: `_00-Project-Management/00-Strategy/OKR-YYYY-Qx.md`
+- Objectives drafted: X
+- KRs drafted: Y
+- Mean KR confidence: Z%
+- Status: `not-started` (awaiting Founder approval)
+
+## Next command
+
+Next: `/Tandem:draft-prd` — turn one of these Objectives into a PRD.
+
+Or, if you're going straight to execution-shaped strategy: `/Tandem:draft-epic` — turn a KR directly into an Epic.
diff --git a/plugins/tandem/skills/draft-prd/SKILL.md b/plugins/tandem/skills/draft-prd/SKILL.md
new file mode 100644
index 0000000..d46fea6
--- /dev/null
+++ b/plugins/tandem/skills/draft-prd/SKILL.md
@@ -0,0 +1,113 @@
+---
+name: draft-prd
+description: Draft a Product Requirements Document from a North Star, an OKR Key Result, raw founder notes, or a BACKLOG entry that needs more spec. Use when the user asks to draft a PRD, write a PRD, write requirements, write a spec, turn notes into a PRD, or invokes /Tandem:draft-prd. Operates as Founder hat (synthesis) handing to PM hat (write). Reads strategy sources and produces 20-Requirements/PRD-<slug>.md following 91-Templates/PRD.template.md — 8 mandatory H2 sections, lightweight markdown, no frontmatter.
+---
+
+# Tandem: draft-prd (Founder → PM hat)
+
+Operate as **Founder hat** for the synthesis (problem framing, audience, goals), transitioning to **PM hat** for the write (requirements, constraints, open questions). PRDs sit upstream of the Epic/Feature/Story graph — they describe the **problem and desired end state**; the Epic encodes the **work commitment**.
+
+This skill is **net-new content**. There is no paste-prompt for it in `92-Prompts/` — the kit previously assumed PRDs existed but provided no skill to draft them. The synthesis flow below IS the source of truth for PRD drafting in this kit.
+
+## Why this skill exists (gap closed)
+
+The kit's pre-2026-05-23 lifecycle:
+
+```
+North Star  →  ??? (gap)  →  OKR or Epic  →  Feature  →  Story  →  Testplan  →  Done
+```
+
+`prompt 02` (`draft-epic-from-okr-or-prd`) accepts a PRD path as input but no upstream skill produces one. `draft-prd` closes that gap so the chain is unbroken from North Star to shipped work.
+
+## Inputs needed
+
+- A source of strategic intent. Any of:
+  - A North Star file path (`_00-Project-Management/00-Strategy/NORTH-STAR.md`)
+  - An OKR Key Result (file path + which KR — e.g. "OKR-2026-Q3.md, O2 KR-1")
+  - A BACKLOG entry that needs more spec before becoming an Epic (path under `_00-Project-Management/11-Backlog/`)
+  - Raw founder notes pasted inline
+- The intended PRD slug (kebab-case, ≤6 words — e.g. `html-output-convention`).
+- If the user didn't supply input, ask: "What's the source? Paste a path or the notes themselves. What slug should the PRD use?"
+
+## Load into context
+
+Use `Read` / `Glob` to detect existence. Treat missing files as "not present" rather than throwing.
+
+- **Source artefact** at the resolved path (or the inline notes from the user).
+- **North Star** (`_00-Project-Management/00-Strategy/NORTH-STAR.md`) — for strategic linkage even when the source is a BACKLOG entry, so the PRD's `Source:` line can reference upstream intent.
+- **Existing OKRs** (`_00-Project-Management/00-Strategy/OKR-*.md`, most recent) — for the success-metrics section linkage if applicable.
+- **PRD template** — `_00-Project-Management/91-Templates/PRD.template.md`. Use verbatim — do not redraft section headings from memory. The 8 mandatory H2 sections are: `Problem`, `Audience`, `Goals`, `Non-goals`, `Success metrics`, `Key requirements`, `Constraints`, `Open questions`.
+- **Existing PRDs** — glob `_00-Project-Management/20-Requirements/PRD-*.md` (top-level + subdirs) to see established shape conventions and avoid duplicating an existing PRD.
+- **SOP** — `_00-Project-Management/90-Standards/SOP.md` for the strategy-linkage rule (every PRD-driven Epic must reference its PRD section).
+- **Project root `CLAUDE.md`** — for project-specific overrides.
+
+## Synthesis flow (the load-bearing part)
+
+This is what makes a `draft-prd` skill different from a plain `cat template > new-prd.md` operation. Follow these 5 steps in order — do not skip ahead. Each step has a checkpoint.
+
+### Step 1 — Read & cluster
+
+Read all source material end-to-end. Extract every distinct concern, complaint, observation, or stated requirement into a flat list. Then **cluster** them into 5–8 themes by what they're really about. A theme that contains only 1 item is suspicious — either fold it into a neighbouring theme or interrogate whether it's a real concern.
+
+**Checkpoint:** show the user the clustered themes. Wait for confirmation before drafting.
+
+### Step 2 — Frame the problem (Founder hat)
+
+For each theme, write the **lived experience** in one sentence: who feels this, when, and what they do instead today. If you can't write this without naming a solution, the problem isn't framed yet — push back to Step 1. The Problem section is the foundation; everything downstream rots if it's vague.
+
+**Checkpoint:** read the draft Problem section to the user. Ask: "Does this name what hurts, not what we'd build?"
+
+### Step 3 — Draft the PRD body (PM hat)
+
+Open the template and fill all 8 sections in order:
+
+1. **Problem** — synthesised from Step 2.
+2. **Audience** — primary, secondary, explicit out-of-audience. Be specific about role + context.
+3. **Goals** — 3–5 outcomes phrased as desired end-state, not features to build.
+4. **Non-goals** — 3–5 deferrals. The mid-spec-creep firewall.
+5. **Success metrics** — quantitative where possible. Tie each metric back to the source OKR KR if applicable.
+6. **Key requirements** — numbered (R1, R2, R3…) so downstream Epics + Stories can reference them. Each requirement testable.
+7. **Constraints** — technical, business, operational. Make assumed constraints explicit.
+8. **Open questions** — what you don't know yet. Better to list honestly than fake certainty.
+
+### Step 4 — Confirm with the user
+
+Show the full draft PRD in chat **before writing the file**. Ask three questions:
+
+- "Are the non-goals strict enough? Anything stakeholders might assume is in scope?"
+- "Are success metrics measurable, or vibes?"
+- "What's missing? Open questions section is honest about that — but anything else?"
+
+Wait for edits before saving.
+
+### Step 5 — Save
+
+Write to `_00-Project-Management/20-Requirements/PRD-<slug>.md` using the template verbatim. Plain markdown — **no frontmatter** (matches the kit's existing PRD convention; validator does not scan `20-Requirements/`). Set the document `Status:` field at the top to `draft` and the `Date:` to today.
+
+**Auto-dispatch write-outcomes:** Before saving, spawn a sub-agent with the PRD's technical content (Problem through Open questions) plus the `write-outcomes` skill. Capture the returned single-line outcome (founder voice, no label or quotes) and insert it as a founder-facing summary immediately below the `Status:` line. It is one outcome sentence, not a re-digest of every section.
+
+## Output rules
+
+- The 8 H2 section headings are **mandatory and fixed**. Do not rename, reorder, or omit them — the `draft-prd` testplan grep is exact-match, and downstream skills look for them by exact string.
+- PRDs are markdown-only artefacts. **No YAML frontmatter** (no `type:`, `id:`, `status:` block). The header `**Status:** draft | reviewed | approved | superseded` near the top of the body is the status surface.
+- Use appendices for material that supports but doesn't belong inside the 8 sections (user research, competitor scans, glossary, new-artefact-type frontmatter contracts).
+- If during Step 3 you find a requirement that needs its own ADR-level decision (e.g. "should PRDs have frontmatter?"), file an ADR in the same response per the kit's "ADR on the spot" rule.
+
+## Non-negotiable rules from CLAUDE.md
+
+- Strategy linkage — the PRD's `Source:` header line must reference its upstream OKR KR, North Star section, or BACKLOG entry. PRDs without strategic linkage drift.
+- Templates rule — use `PRD.template.md` verbatim. Do not redraft the 8 H2 section headings from memory.
+- ADR on the spot — if the PRD requires a new artefact-type frontmatter contract or other non-obvious decision, file an ADR.
+
+## End-of-session summary (always emit)
+
+- File written: `_00-Project-Management/20-Requirements/PRD-<slug>.md`
+- Status: `draft` (awaiting user review → `approved`)
+- Themes clustered: X
+- Key requirements (R1..Rn): n
+- Open questions captured: y
+- Linked source: <OKR / North Star / BACKLOG ref>
+
+## Next command
+
+Next: `/Tandem:draft-epic` — turn this PRD into an Epic that commits the work.
diff --git a/plugins/tandem/skills/execute-batch-parallel/SKILL.md b/plugins/tandem/skills/execute-batch-parallel/SKILL.md
new file mode 100644
index 0000000..ecf98cc
--- /dev/null
+++ b/plugins/tandem/skills/execute-batch-parallel/SKILL.md
@@ -0,0 +1,202 @@
+---
+name: execute-batch-parallel
+description: Execute one "chat" from an Implementation Strategy by fanning out one scoped sub-agent per Ready story IN PARALLEL, then reconciling the board once on the main thread. Use when the user invokes /Tandem:execute-batch-parallel with a chat id (e.g. CHAT-02), or asks to run a batch of independent stories concurrently. Operates as Dev/QA hat. Refuses to fan out unless the batch is provably file-disjoint (pm:batch-check, ADR-0075) and every story is DoR-ready — otherwise falls back to serial execute-batch. This command DOES change story status.
+---
+
+# Tandem: execute-batch-parallel (Dev/QA hat)
+
+Operate as **Dev/QA hat** in a **fresh chat**. This is the **concurrent** sibling of
+`execute-batch`: where `execute-batch` runs a chat's stories one-at-a-time, `execute-batch-parallel`
+**fans out one sub-agent per story** so a batch of *independent* stories clears in a single chat,
+then folds every result back onto the board in **one serialised reconciliation pass** on the main
+thread.
+
+It is built **against the settled concurrency model in ADR-0075** — *sub-agents-with-merge under a
+disjoint-file precondition, main thread as sole writer of serialised state.* It composes the
+existing per-story skills (`execute-story`, `run-testplan`, `close-out-story`); it does **not**
+re-implement their logic, and it **never** relaxes the kit's gate contract (status enum, atomic
+flip, ADR-on-the-spot, BUG-on-defect).
+
+> **Default is still serial.** Parallel fan-out is *opt-in and earned by precise metadata*. If a
+> batch cannot be **proven** file-disjoint, this skill refuses and the operator runs the serial
+> `execute-batch` instead. A batch that cannot be proven safe is treated exactly like an unsafe one.
+
+## Inputs needed
+
+- **A chat id** — e.g. `CHAT-02`. If the user didn't supply one, ask, or list the chats in the
+  latest strategy.
+- **The strategy** — default to the latest `_00-Project-Management/41-Reports/EXECUTION-STRATEGY-*.json`.
+  Fall back to the paired `.md` report if the JSON is absent. The user may name a specific file.
+
+## Load into context
+
+Use `Read` / `Glob`; treat missing files as "not present", never throw.
+
+- **The named chat** from the latest `EXECUTION-STRATEGY-*.json`: its `stories` (with `ready`
+  flags), `lanes`, `verify` command, `sub_agents`, and `executed` flag.
+- **Each story + its paired testplan** under `32-Stories/` + `33-Testplans/`.
+- **SOP** — `90-Standards/SOP.md` (DoR/DoD, status enum, WIP limits §5, parallel-vs-serial rule).
+- **ADR-0075** — the concurrency model this skill obeys.
+
+## Step 1 — Safety gate (refuse BEFORE any fan-out)
+
+The gate runs **before a single sub-agent is dispatched**. All of the following must pass; on **any**
+failure, **do not fan out** — report the failing item and fall back to serial `execute-batch`:
+
+1. **DoR precheck.** Every story in the chat must be `status: ready`. If any story is `ready: false`
+   in the strategy or not `ready` in its frontmatter, **refuse** and name the unready story — it is
+   `unsafe` to start. (The operator runs `/Tandem:refine-backlog` first.)
+2. **Disjoint-file precondition (`pm:batch-check`, ADR-0075).** Run the static **batch-check** over
+   the batch's stories:
+
+   ```bash
+   node _00-Project-Management/93-Scripts/batch-check.js STORY-A STORY-B STORY-C
+   ```
+
+   It compares every pair's `files_touched:`. A `CONFLICT` (two stories write the same file, or a
+   file under another's directory) **or** any `UNKNOWN` (directory×directory, or empty/absent
+   `files_touched:` — *cannot prove disjoint*) is a **hard stop**: the batch is **not** fanned out.
+   Only a provably **disjoint** batch (exit 0) may run in parallel. `UNKNOWN` never silently passes.
+   **Fail closed:** any **non-zero exit** — a conflict, an unknown, **or a batch-check script error** —
+   blocks fan-out and falls back to serial `execute-batch`; the gate never reads a crash as "safe".
+
+The refusal path **precedes** the fan-out: the skill computes the gate, and only a clean gate
+unlocks Step 2.
+
+## Step 2 — Fan-out: one scoped sub-agent per story
+
+On a safe batch, dispatch **one sub-agent per story, concurrently** (a single message with one
+`Agent` call per story so they run in parallel). Each sub-agent is **tightly scoped** to its own
+story and is structurally unable to corrupt shared state:
+
+- **Scope = one story only.** The sub-agent's prompt contains *only* that story file, its paired
+  testplan, and that story's ACs. It implements that story's ACs (using the chat's resolved
+  sub-agent discipline for that story, e.g. `mcp-developer` / `build-engineer`), runs its testplan
+  TCs, and records AC pass/fail — and nothing about any sibling story.
+- **Board files are FORBIDDEN.** The sub-agent prompt explicitly says **never MONITOR / ACTIVE**: the
+  sub-agent must not read or write `MONITOR.md` / `ACTIVE.md`, and must not create ADR/BUG files or claim an
+  ADR/BUG number. Sub-agents **never** write serialised state — that is the main thread's job alone
+  (ADR-0075 single-writer). A sub-agent that thinks it needs an ADR/BUG **proposes the body** in its
+  handoff; it does **not** number or file it.
+- **No cross-story coordination.** Because the batch is provably file-disjoint, the sub-agents work
+  in the shared tree with no content collision; they do not need to talk to each other.
+
+### Sub-agent prompt template (the safety boundary)
+
+Every dispatched prompt is built from this template — the *exclusions are load-bearing*:
+
+```
+You are implementing exactly ONE story: STORY-NN.M.PP.
+Inputs (the ONLY files you own): <story path>, <paired testplan path>.
+Write ONLY the files listed in this story's files_touched: <files_touched list>.
+Do: implement each AC; run every testplan TC's Command; record PASS/FAIL per TC and per AC.
+Do NOT touch MONITOR.md or ACTIVE.md (forbidden);
+        do NOT create or number any ADR-*/BUG-* file; do NOT edit any sibling story.
+        If the work requires touching ANY file outside files_touched, STOP and report it in the
+        handoff — do not create or edit it (an undeclared write breaks the disjoint-file guarantee).
+Return: the structured handoff described below — raw data, not prose for a human.
+```
+
+## Step 3 — Structured handoff (sub-agents return data, not board writes)
+
+Each sub-agent returns a **structured handoff** that the main thread later reconciles (the
+reconciliation pass itself is defined in the next section). Returning data — rather than writing
+shared state — is what keeps fan-out race-free. The handoff shape per story:
+
+- `story_id` — the story it worked.
+- `status_flip_proposal` — proposed terminal status (`done`, or `in-progress` if a DoD/TC failed)
+  plus the `completed_at` it would stamp. The main thread, not the sub-agent, applies the flip.
+- `ac_verification_log` — each AC with PASS/FAIL and the evidence (the TC command output).
+- `testplan_results` — each TC id → PASS/FAIL (+ the failing output if any).
+- `proposed_adrs` — zero or more **draft ADR bodies** (title + content) for non-obvious decisions,
+  **un-numbered** (the main thread assigns the sequential `ADR-NNNN`).
+- `proposed_bugs` — zero or more **draft BUG bodies** for any TC failure / defect, **un-numbered**
+  (the main thread assigns the sequential `BUG-YYYYMMDD-NN`).
+- `files_changed` — the files the sub-agent actually wrote (for the reconciliation commit).
+
+The main thread collects every story's handoff and applies them in one ordered pass (next section).
+
+## Step 4 — Reconciliation (single-writer, serialised)
+
+After **all** sub-agents return, the **main thread alone** folds the handoffs onto the board in one
+ordered pass. Sub-agents proposed; the main thread disposes. This is the *single-writer* contract of
+ADR-0075 — there is exactly one writer of serialised state, so nothing races.
+
+### Reconciliation order (deterministic — apply in this exact order)
+
+Process the collected handoffs in **deterministic story order** (the chat's listed order), and within
+that, in these stages — **first** collect, **then** number, **then** flip, **then** write the board,
+**then** commit:
+
+1. **Collect** every story's structured handoff (from Step 3). Partition into **succeeded** (proposes
+   `done`, all ACs/TCs PASS) and **failed** (any AC/TC FAIL or DoD gap).
+2. **Assign sequential IDs (main-thread-only).** For every `proposed_adr` / `proposed_bug` body, the
+   main thread computes the **next free** number **at write time** by globbing the existing
+   `ADR-*` / `BUG-*` files — never a number pre-assigned during fan-out — so two stories can **never**
+   collide on the same `ADR-NNNN` / `BUG-YYYYMMDD-NN`. Numbers are allocated in story order, then the
+   files are written by the main thread.
+3. **Run the per-story DoD gate, then apply atomic status flips.** A story only flips to `done` once
+   its **Definition-of-Done gate passes** — the main thread runs the same DoD checks `close-out-story`
+   would (AC verification against the handoff's `ac_verification_log` + testplan PASS + the **R14
+   AI-code review** for that story's diff). Only the **board write** (stage 4) is batched to once; the
+   **DoD gate still runs per story**. For each story that passes, flip **`ready → done`** (the story
+   was `ready` on the board through fan-out — the in-progress work happened inside its sub-agent),
+   setting **both `started_at` and `completed_at`** in a **single edit** (atomic — status + both
+   timestamps together, never half-flipped, never a `done` story with an empty `started_at`). This
+   matches ADR-0077's `ready → in-progress → done` lifecycle, collapsed into one atomic write at
+   reconciliation. For each failed story (DoD gap, TC FAIL, or no usable handoff), see
+   partial-failure handling below.
+4. **Write the board once — `MONITOR.md` + `ACTIVE.md`.** A single (`once`) `MONITOR` + `ACTIVE`
+   write covers the whole batch — one revision-history block for every shipped story, one ACTIVE
+   update — not once per story. Only this board write is batched; the per-story DoD gate in stage 3
+   is **not**. Single writer + single write = no interleaving.
+5. **Per-story commit checkpoint.** Commit each finalised story (`STORY-NN.M.PP — <imperative>`) so a
+   crashed batch leaves completed stories committed and `done` (retained from `execute-batch`).
+
+### Partial-failure handling (honest reporting)
+
+A batch where some sub-agents fail must **not** abort the successful ones:
+
+- A **failed** story is flipped **`ready → in-progress`** (set `started_at`, leave `completed_at`
+  empty) and **left `in-progress`** (recoverable — never half-flipped, never silently `done`), with a
+  one-line note of why; its proposed BUG body is filed (numbered by the main thread).
+- A story whose sub-agent **returned nothing or a garbled handoff** (crash, timeout, or a terminal
+  API error after retries) is treated **exactly like a failed story** — never silently dropped:
+  left recoverable and named in the shipped-vs-not split below, so the operator always sees it.
+- The **succeeded** stories still finalise to `done` and commit — the failure of one does not roll
+  back the others.
+- The run **reports the split**: which stories **shipped** vs which did **not**, plus the chat id to
+  resume. Honest partial success beats an all-or-nothing abort.
+
+> **Two batches back-to-back** must not double-count MONITOR's shipped totals — the count blocks are
+> regenerated by `pm:monitor` from frontmatter (the source of truth), not incremented by hand.
+
+## Step 5 — Guardrails (WIP limit + DoR refusal)
+
+Batch mode must never quietly break the kit's own gate contract. Two guardrails sit on top of the
+safety gate (Step 1):
+
+- **DoR refusal (hard stop).** The skill **will not start** a batch if any story has unmet DoR — it
+  **refuses** and names the failing story **and** the specific missing DoR item (e.g. "STORY-17.1.09
+  — no paired testplan"). An unready story is never fanned out; the operator runs
+  `/Tandem:refine-backlog` on it first. (This is the Step 1 DoR precheck, restated
+  as a guardrail: refuse, don't degrade.)
+- **WIP-limit interaction (preserve, do not raise — ADR-0077).** Batch-parallel **preserves the SOP
+  §5 `in-progress` WIP limit (max 2) unchanged — it does not raise the limit.** This is safe at any
+  fan-out width because fan-out consumes **zero** WIP slots: sub-agents work in isolation and never
+  flip board status, so stories stay `ready` until the serialised reconciliation flips them one at a
+  time (board `in-progress` ≤ 1 during the pass). The only WIP risk is *failed* stories left
+  `in-progress`; to honour the cap, **at most §5-limit (2) failed stories stay `in-progress`; any
+  excess failures are set `blocked`** (recoverable, with a note). Batch **width** is bounded by the
+  soft batch-size limit (ADR-0026), and a batch beyond that bound **queues** — distinct from the WIP
+  cap. See SOP §5 + ADR-0077.
+
+## Output rules
+
+- Status changes are THIS command's job (it is **not** dry-run — that's `execution-strategist`).
+- Per-story commit messages: `STORY-NN.M.PP — <imperative>` (applied by the main thread in
+  reconciliation, not by the sub-agents).
+
+## Next command
+
+Next: `/Tandem:run-testplan`
diff --git a/plugins/tandem/skills/execute-batch/SKILL.md b/plugins/tandem/skills/execute-batch/SKILL.md
new file mode 100644
index 0000000..17277c5
--- /dev/null
+++ b/plugins/tandem/skills/execute-batch/SKILL.md
@@ -0,0 +1,115 @@
+---
+name: execute-batch
+description: Execute one "chat" from an Implementation Strategy in this fresh session — run its stories sequentially (execute-story → run-testplan → close-out-story) with atomic per-story finalisation, a context-budget guard, and clean failure recovery; then flip the chat's executed flag so the dashboard shows it done. Use when the user invokes /Tandem:execute-batch with a chat id (e.g. CHAT-01), or asks to run/execute a batch or chat from the execution-strategist's plan. Operates as Dev/QA hat. This command DOES change story status (unlike execution-strategist, which is dry-run).
+---
+
+# Tandem: execute-batch (Dev/QA hat)
+
+Operate as **Dev/QA hat** in a **fresh chat**. The user has an Implementation Strategy (from
+`/Tandem:execution-strategist`) and wants to clear one of its **chats** — a small
+set of stories grouped to run together — end to end, without re-warming context per story.
+
+A "chat" is the execution-strategist's batch unit (`CHAT-01`, `CHAT-02`, …). This skill runs the
+chat's stories **sequentially**, finalising each atomically before the next. It composes the three
+existing per-story skills; it does **not** re-implement their logic.
+
+## Inputs needed
+
+- **A chat id** — e.g. `CHAT-01`. If the user didn't supply one, ask, or list the chats in the
+  latest strategy.
+- **The strategy** — default to the latest `_00-Project-Management/41-Reports/EXECUTION-STRATEGY-*.json`
+  (the structured sidecar `execution-strategist` writes). Fall back to the paired `.md` report if the
+  JSON is absent. The user may name a specific strategy file.
+
+## Load into context
+
+Use `Read` / `Glob`; treat missing files as "not present", never throw.
+
+- **The named chat** from the latest `EXECUTION-STRATEGY-*.json`: its `stories` (with `ready`
+  flags), `lanes` (serial/parallel), `verify` command, `sub_agents`, and `executed` flag.
+- **Each story + its paired testplan** under `32-Stories/` + `33-Testplans/`.
+- **SOP** — `90-Standards/SOP.md` (DoR/DoD, status enum, WIP limits §5).
+- **The three sub-skills** this one delegates to: `execute-story`, `run-testplan`, `close-out-story`.
+
+## DoR precheck (MANDATORY — before running anything)
+
+A chat may include stories the strategy **flagged un-ready** (`ready: false`). **Do not execute an
+un-ready story.** If any story in the chat is not `ready`, STOP and report which — the user runs
+`/Tandem:refine-backlog` on those first. Only proceed when every story in the chat
+is `ready` (or the user explicitly drops the un-ready ones from this run).
+
+## Algorithm — sequential loop with atomic finalisation
+
+Order the chat's stories by its **lanes**: a `serial` lane runs in its listed order; `parallel`
+lanes are also run **sequentially here** (one at a time) — concurrent fan-out is **BACKLOG-0020**,
+out of scope. Then, **for each story in order**:
+
+1. **execute-story** — verify DoR, flip `ready` → `in-progress` (atomic + `started_at`), implement
+   the ACs one at a time, file ADRs/BUGs as they arise. Use the chat's resolved **sub-agent** for
+   this story's discipline where appropriate.
+2. **run-testplan** — execute every TC's `Command`, mark PASS/FAIL, auto-file `BUG-YYYYMMDD-NN` for
+   any failure.
+3. **close-out-story** — run the DoD gate; flip to `done` (atomic + `completed_at`); update
+   `MONITOR.md`; regenerate the dashboard (`npm run pm:dash`).
+4. **Finalise atomically before the next story.** Do not advance until the current story has flipped
+   to `done` AND MONITOR is updated (status flip + `completed_at` + revision-history line) AND a
+   per-story **commit** has landed. The commit is the load-bearing recovery checkpoint — it is what
+   makes mid-batch failure recoverable (a crashed batch leaves every completed story committed and
+   `done`). The **dashboard regen (`npm run pm:dash`) may be batched to once at batch end** rather
+   than run per story: it is a generated read-view (the Stop hook regenerates it anyway), and on a
+   large board a 9 MB `pm:dash` per story is wasteful churn. Refresh MONITOR's generated count blocks
+   cheaply per story with `npm run pm:monitor`; reserve the heavy `pm:dash` for the end.
+
+### Context-budget guard
+
+**Before each story**, estimate the remaining context. If running the next story would push
+context **utilisation above ~80%**, **abort cleanly** (do not start it). A conservative threshold is
+deliberate: if unsure, abort. Completed stories are already `done` and safe; the batch can be
+resumed in a new chat.
+
+### Failure-recovery contract
+
+On any abort (context overflow, a story's DoD failing, a hard error):
+
+- **Completed stories stay `done`** (they finalised atomically).
+- **The current story** (the one mid-flight) goes to **`blocked`** with a one-line note in its body
+  explaining why; do not leave it half-flipped.
+- **Remaining stories stay `ready`** — untouched.
+- Report: N done, 1 blocked (which), M remaining `ready`, and the chat id to resume.
+
+## On success — mark the chat executed
+
+When **all** the chat's stories reach `done`, set the chat's **`executed: true`** in the
+`EXECUTION-STRATEGY-*.json` sidecar (so the dashboard's Implementation Strategy view renders it as
+`AUTO-EXECUTED`), then regenerate the dashboard. Finally, run the chat's **verify-before-closing**
+command and report its result.
+
+## Output rules
+
+- Commit messages per story: `STORY-NN.M.PP — <imperative>` (close-out-story owns these).
+- Status changes are THIS command's job (it is **not** dry-run — that's `execution-strategist`).
+- Respect SOP §5 WIP: only one story is `in-progress` at a time (sequential loop), so the in-progress
+  limit is never exceeded by this skill.
+
+## End-of-session summary (always emit)
+
+- Chat: CHAT-NN (from EXECUTION-STRATEGY-YYYY-MM-DD)
+- Stories done: X / Y · blocked: <id or none> · remaining `ready`: <list or none>
+- Chat `executed` flag set: yes/no · dashboard regenerated: yes/no
+- Verify-before-closing: <command> → <result>
+
+## Non-negotiable rules from CLAUDE.md
+
+- **Atomic finalisation** per story (status flip + `completed_at` + MONITOR + dashboard) before the
+  next — the recovery contract depends on it.
+- Status enum is the closed set of nine; DoR gate before `in-progress`, DoD gate before `done`.
+- ADR-on-the-spot for non-obvious decisions; BUG auto-raise on any defect / TC failure.
+- Never execute an un-ready story (DoR precheck above).
+
+## Next command
+
+Next: `/Tandem:run-testplan`
+
+`/Tandem:weekly-monitor` — after a chat closes, fold the delta into the Friday
+cadence. Or re-run `/Tandem:execute-batch <next-chat-id>` for the next chat in the
+strategy (mind the chat's `depends_on` edges — run unlocked chats first).
diff --git a/plugins/tandem/skills/execute-story/SKILL.md b/plugins/tandem/skills/execute-story/SKILL.md
new file mode 100644
index 0000000..eef4f7e
--- /dev/null
+++ b/plugins/tandem/skills/execute-story/SKILL.md
@@ -0,0 +1,95 @@
+---
+name: execute-story
+description: Start work on a Ready STORY. Use when the user asks to begin a story, execute a story, pull a story, or work on a story file under the project's stories folder. Verifies Definition of Ready, flips status to in-progress, reads the paired testplan, implements ACs one at a time, files ADRs and BUGs as they arise.
+---
+
+# Tandem: execute-story (Dev hat)
+
+Operate as **Dev hat**. The user is pulling a Ready story into active work.
+
+> **Parallel-execution note.** When this story runs as part of a fanned-out parallel batch, the concurrency model in [ADR-0075](../../_00-Project-Management/40-Decisions/ADR-0075-parallel-execution-concurrency-model.md) governs: the batch must be provably file-disjoint (`pm:batch-check`), and the **main thread is the sole writer** of `MONITOR.md` / `ACTIVE.md` and the sole assigner of sequential `ADR-NNNN` / `BUG-YYYYMMDD-NN` IDs — a sub-agent running this skill proposes ADR/BUG bodies but never claims an ID or edits the board. Atomic status+timestamp flips are unchanged.
+
+## Pre-flight — refuse loudly if the kit isn't wired
+
+Before anything else, run the cheap wiring gate: `node _00-Project-Management/93-Scripts/doctor.js --gate` (npm: `npm run pm:doctor -- --gate`). It is **silent on success**; on an unwired project it exits non-zero and prints one line — **kit not wired — run `npm run pm:install`**. If it fails, **refuse and surface that message verbatim** rather than proceeding — a mis-wired kit must fail loudly, not silently no-op. (STORY-12.2.03)
+
+## Inputs needed
+
+- Story file path — resolve the `stories` folder via the path map (`node _00-Project-Management/93-Scripts/lib/pm-paths.js resolve stories`; the config is `90-Standards/pm-paths.json`), then glob for `EPIC-NN/FEAT-NN.M/STORY-NN.M.PP-*.md` under it. (E.g., it resolves to `03-Stories` in a flattened layout.)
+- If the user didn't supply it, ask: "Which story file? Or want me to list Ready stories?"
+
+## Load into context
+
+Folder locations are resolved through the path map (`pm-paths.js` / `pm-paths.json`) rather than hardcoded, ensuring consistent references across all skills. Use the resolver script `node _00-Project-Management/93-Scripts/lib/pm-paths.js resolve <role>` to determine physical folders for logical roles such as stories, testplans, bugs, decisions, active, and templates. If NONE of the candidates exist for a given role, note it in the output (don't fabricate scaffolding) and degrade gracefully (e.g. redraft from in-context examples rather than from a templates file that doesn't exist; flag the gap in the end-of-session summary).
+
+- **Story file** — at the resolved path from "Inputs needed" above.
+- **Paired testplan** — resolve the `testplans` folder via the path map; glob for `EPIC-NN/FEAT-NN.M/TESTPLAN-NN.M.PP-*.md` under it.
+- **Parent feature + epic** — resolve the `features` and `epics` folders via the path map; read the parent FEAT file and its parent EPIC file.
+- **SOP / DoR / DoD reference** — `_00-Project-Management/90-Standards/SOP.md` if present. If absent, fall back to project-root `CLAUDE.md` for DoD-equivalent rules.
+- **Project-wide stack quirks** — `_00-Project-Management/90-Standards/PROJECT-CONTEXT.md` if present. If absent, infer from project-root `CLAUDE.md` + `package.json` scripts.
+- **Templates folder** — resolve via the path map. If absent, file the gap as tech debt and redraft from sibling artefacts in the same EPIC/FEAT.
+- **ADR folder** — resolve the `decisions` folder via the path map.
+- **Bugs folder** — resolve the `bugs` folder via the path map.
+- **Active WIP index** — resolve the `active` folder via the path map and read `ACTIVE.md` from it. If the file doesn't exist, skip the WIP-removal step; the `status: in-progress` flip on the story file is the canonical source of WIP truth.
+- **Project root `CLAUDE.md`** — always loaded for project-specific overrides.
+
+Use `Read` / `Glob` to detect existence rather than assuming; treat missing files as "not present" rather than throwing.
+
+## Task
+
+1. **Verify the DoR is satisfied** (SOP.md §6). If a checklist item fails — STOP, list the gap, ask the user. Do not proceed.
+
+2. **Flip story status:** `ready` (or `not-started`) → `in-progress`. Set `started_at` to now (ISO 8601 + offset, from system clock). Atomic edit. Update the resolved ACTIVE.md if one exists — skip silently if it doesn't.
+
+3. **Implement the story:**
+   - **Resolve the sub-agent before implementing:** use the story's `suggested_agents:` if set, else the PROJECT-CONTEXT `type_of_work → sub-agent` map, else discipline-only / `general-purpose`. An unknown or uninstalled agent never blocks — degrade to the next step. Name the chosen sub-agent, and dispatch suitable implementation work to it (SOP §18). See SOP §11.3 / FEAT-03.1.
+   - Work the acceptance criteria one at a time.
+   - For each AC, write the code AND wire the corresponding TC's setup.
+   - **Tick the AC's checkbox in the story body in the SAME edit as the code/artefact change that satisfies it.** Do NOT defer body-checkbox ticking to the close-out pass — that creates a gap window where the story's frontmatter trends toward `done` while the body still says nothing's complete. For an AC resolved via spec-error exception (the AC was wrong, replaced by an ADR), still tick the box and append an inline ratification note (e.g., `- [x] <AC text> — ratified via ADR-NNNN §Verification (deferred per <trigger>)`). Strict DoD rule: "All AC checkboxes ticked" is non-negotiable. Precedent: FEAT-00.5 4-way close-out 2026-05-23 caught 20 unticked boxes across 4 stories at the explicit `/close-out-story` invocation because the body-tick step had been skipped through the execute-story cycle.
+   - Tests must run via the commands documented in the TESTPLAN — do not improvise commands during execution.
+   - For multi-file searches like "where is symbol X" — delegate to Explore agent (SOP §18). Don't paste grep results into main thread.
+   - **Before adding any new top-level structural element to an existing file** (jest `describe(...)` block, Playwright `test.describe(...)` block, Firestore rule `match /collection/{id}` block, GraphQL resolver, OpenAPI path, route module export, etc.), grep that file for the exact name to surface collisions. The Explore-agent inventory step at the start of "verify + harden" stories MUST include a "find existing structural-name collisions inside files we'll touch" pass — filename matching alone misses in-file duplicates (precedent: STORY-00.4.01 close-out 2026-05-23 caught a duplicate `describe('activity_logs collection', ...)` block only at the DoD code-review stage, after the testplan had already run with both blocks active).
+
+4. **Whenever you make a non-obvious decision** (library choice, threshold, schema field name, deferred sub-feature, divergence from defaults), STOP coding and:
+   - Find the next free `ADR-NNNN` (glob the resolved ADR folder).
+   - Create `<resolved-adr-folder>/ADR-NNNN-<slug>.md` using `ADR.template.md` if present, else redraft from a sibling ADR in the same folder.
+   - Add the ADR ID to the story's `decisions:` frontmatter array.
+   - Resume coding.
+
+5. **Whenever you observe a defect** (yours or pre-existing), STOP and:
+   - File a BUG at `<resolved-bugs-folder>/EPIC-NN/FEAT-NN.M/BUG-YYYYMMDD-NN-<slug>.md` using `BUG.template.md` if present, else redraft from a sibling BUG.
+   - Decide: fix-now (block this story) or fix-later (link from BACKLOG).
+   - Reference the bug in the story's body.
+   - **Spec-error exception:** if the failure indicates the AC is wrong (budget set without measurement, vocabulary that contradicts shipped code, etc.) rather than a code defect, file an **ADR** documenting the spec correction instead of a BUG. The ADR linked from the TC's `Result:` line IS the resolution. Cite the ADR in the story body. Don't file both for the same root cause.
+
+6. **Default stop-state:** when all ACs implemented and self-review done, flip status to `in-review` and STOP. Do **not** set `completed_at` yet — that's the DoD gate via `/Tandem:close-out-story`. Do **not** auto-run `/run-testplan` or `/close-out-story` — those are deliberate next invocations the user owns, giving a QA-as-second-pair-of-eyes break between dev work and DoD sign-off.
+
+   **Combine-into-one-cycle exception:** it IS appropriate to chain execute → run-testplan → close-out → MONITOR + dashboard sync all the way to `status: done` within a single execute-story invocation when:
+   - The user explicitly requests it ("execute and close out together", "do the full cycle", "4-way batch", etc.), OR
+   - The story is a **paperwork-only ratification spike** — no source-code change, only ADR-writing or status-flipping, where the QA pass adds no signal because there's nothing executable to QA beyond the static-analysis TCs the implementation step just satisfied (precedent: FEAT-00.5 4-way close-out 2026-05-23 — 4 spike stories closed in one cycle with 4 new ratification ADRs).
+
+   Default for code-touching stories is the staged flow (stop at `in-review`); combined-cycle is the exception, not the rule.
+
+## Output rules
+
+- Tick AC checkboxes in the story file as you complete them.
+- Update TC `Result:` lines in the TESTPLAN as you run them.
+- Commit messages: `STORY-NN.M.PP — <imperative>`.
+- If you hit a `blocked` situation, flip status to `blocked`, note the reason in the story body, return to user.
+
+## Non-negotiable rules from CLAUDE.md
+
+- Frontmatter timestamps (set `started_at` on in-progress; do **not** set `completed_at` yet).
+- Status enum (closed set of 9).
+- ADR on the spot for non-obvious decisions.
+- Bug auto-raise for any defect, in the same response as the observation.
+- Templates rule — never redraft section headings.
+
+## End-of-session summary (always emit)
+
+- ACs ticked: X / Y
+- TCs run: X / Y (PASS / FAIL counts)
+- ADRs created: <list of paths>
+- BUGs filed: <list of paths>
+- Status now: `in-progress` | `in-review` | `blocked`
+- Next step: `/Tandem:run-testplan` if in-review; fix block if blocked
diff --git a/plugins/tandem/skills/execution-strategist/SKILL.md b/plugins/tandem/skills/execution-strategist/SKILL.md
new file mode 100644
index 0000000..ebb57c3
--- /dev/null
+++ b/plugins/tandem/skills/execution-strategist/SKILL.md
@@ -0,0 +1,228 @@
+---
+name: execution-strategist
+description: Plan how to execute a whole epic — group its stories into "chats" that are genuinely good to run together, the way a lead plans a sprint. Use when the user asks to plan execution, strategise an epic, group stories to run together, decide what to batch, or invokes /Tandem:execution-strategist (typically with an EPIC-NN). Operates as PM hat. Takes an epic, reads all its not-done stories + paired testplans, and writes an Implementation Strategy — phases → chats, each with execution lanes (serial/parallel), sub-agents, a paste-ready trigger, a verify-before-closing command, and depends/unlocks edges — as a markdown report PLUS a structured JSON sidecar. DRY-RUN: never modifies story status.
+---
+
+# Tandem: execution-strategist (PM hat)
+
+Operate as **PM hat**. After an epic has been planned, the user runs this skill to get the best
+way to clear that epic's stories in a small number of batched "chats" — reasoning about which
+stories are genuinely **good to do together** (a long view, like planning a sprint), not just
+mechanical matching. The output (an **Implementation Strategy**) is the input to
+`execute-batch` / `execute-story` and is rendered by the dashboard's Implementation Strategy view.
+
+This file IS the source of truth for the behaviour. Grouping is **judgment-led** (see ADR-0025):
+two runs may differ, and that is acceptable — the plan is a dry-run proposal you review before
+acting. Verification is therefore **structural** (does each chat carry lanes, sub-agents, a
+trigger, a verify line, valid phases?), not exact-output matching.
+
+## Dry-run contract (read-only — MANDATORY)
+
+- It **reads** story + testplan frontmatter/content and **writes exactly two artefacts**: the
+  Implementation Strategy report (`.md`) and its paired structured sidecar (`.json`), both under
+  `41-Reports/`.
+- It does **NOT modify story status**, does **NOT** flip anything to `in-progress`, does **NOT**
+  edit any story, testplan, MONITOR, or dashboard. Acting on the plan happens later via
+  `/Tandem:execute-batch`. If asked to "start"/"pull" a chat, stop and clarify —
+  that is `execute-batch`.
+
+## Inputs needed
+
+- **An epic identifier** — `EPIC-NN` (or a path under `32-Stories/EPIC-NN/`). If the user didn't
+  supply one, ask: "Which epic? e.g. `EPIC-100`." Optionally a target date for the filename
+  (default: today, system clock).
+
+## Load into context
+
+Use `Read` / `Glob` to detect existence; treat missing files as "not present", never throw.
+
+- **Epic-scoped stories** — glob `_00-Project-Management/32-Stories/EPIC-NN/FEAT-*/STORY-*.md` for
+  the named epic only. Keep every story that is **not-done** (exclude `done` / `wontfix` /
+  `duplicate` / `archived`). This is the corpus — the whole epic, not just `ready` ones.
+- **DoR flag** — a story whose `status` is not `ready` (e.g. `not-started`) is still **included**
+  but **flagged** with a visible DoR-gap marker, so the user knows that chat needs
+  `/Tandem:refine-backlog` before it can actually run. Never silently drop it.
+- **Paired testplans** — for each story, read `_00-Project-Management/33-Testplans/EPIC-NN/FEAT-*/TESTPLAN-NN.M.PP-*.md`
+  (used to compose the verify line — see step 6).
+- **Story frontmatter fields consumed:** `id`, `feature`, `estimate`, `priority`, `status`,
+  `type_of_work`, `suggested_agents` (FEAT-03.1), `depends_on`, `files_touched` (ADR-0020).
+- **PROJECT-CONTEXT.md** — `_00-Project-Management/90-Standards/PROJECT-CONTEXT.md` "Sub-agent
+  mapping" (the `type_of_work → sub-agent` default map) + "Quality commands" (the DoD fallback
+  for verify lines).
+- **SOP** — `90-Standards/SOP.md` (status/estimate enums, §11.3 sub-agent resolution).
+- **ADRs** — ADR-0020 (depends_on/files_touched), ADR-0023 (sub-agent metadata), ADR-0025
+  (judgment-led determinism stance), ADR-0026 (soft batch-size bounds).
+
+## Step 1 — Scope the epic
+
+Collect the epic's not-done stories (above). For each, note id, title, feature, status, estimate,
+priority, `suggested_agents`, `depends_on`, `files_touched`, and a `ready` boolean
+(`status == ready`). Flag any not-`ready` story for the DoR-gap marker.
+
+## Step 2 — Group into chats (JUDGMENT-LED)
+
+**Reason** about which stories are genuinely good to execute together in one fresh chat — shared
+domain, a dependency you'd want warm, the same files, a coherent slice of the epic. Think like a
+lead planning a sprint, not a mechanical matcher. The old affinity signals (same FEAT,
+`depends_on` chain, `files_touched` overlap) are **inputs to your judgment**, not the whole story.
+Each chat gets a **one-line rationale** explaining *why these belong together*.
+
+**Soft bounds (ADR-0026, amends ADR-0021):** aim for **2–5 stories** per chat. These are
+**guidance, not hard caps** — you MAY deviate (a justified single-story chat, or a cohesive
+6-story phase) when the reasoning warrants it, but you MUST record the deviation in that chat's
+rationale. A story with no good companion is its own chat (note "runs solo — no good co-batch").
+
+## Step 3 — Derive lanes (serial vs parallel)
+
+Within each chat, mark how its stories sequence:
+
+- **Serial** — stories linked by `depends_on` form an ordered chain (e.g. "serial (A → B)").
+- **Parallel** — two stories are parallel-**safe** ONLY when their `files_touched` lists are
+  provably **disjoint** (no shared path). Label e.g. "3 parallel (separate files)".
+- **Default to serial** when `files_touched` is missing or overlap is uncertain — be
+  **conservative**; never assert parallel you can't prove disjoint.
+
+This skill only **plans** lanes. Actually running stories concurrently (fan-out) is **BACKLOG-0020**,
+out of scope here.
+
+## Step 4 — Assign sub-agents (per chat)
+
+Resolve each story's sub-agent via the FEAT-03.1 order: the story's **`suggested_agents`** if set
+→ else the **PROJECT-CONTEXT** `type_of_work → sub-agent` **map** → else discipline-only /
+**`general-purpose`** **fallback**. An unknown/uninstalled agent never hard-fails — degrade to the
+next step. Aggregate the distinct resolved agents per chat **with counts** (e.g.
+`react-expert ×2, javascript-pro`) into the chat's `sub_agents`.
+
+## Step 5 — Compose the paste-trigger (per chat)
+
+A ready-to-paste prompt the user drops into a fresh chat. Template:
+
+> Execute STORY-A, STORY-B together. [lanes: serial chain A → B / N parallel, file-isolated].
+> Follow CLAUDE.md gates: paired testplan per story, auto-file BUG-* on any TC failure, atomic
+> status → done. Sub-agents: [resolved list].
+
+The trigger drives the existing `execute-batch` / `execute-story` flow — it does **not** invent a
+new runtime.
+
+## Step 6 — Compose the verify-before-closing line (per chat)
+
+From each constituent story's **paired testplan**, collect the **P0 + integration** TC `Command`s
+and join them with `&&` into one verify line (de-duplicate identical commands across the chat;
+skip `manual-review-by-claude` TCs — they have no runnable command). **Fallback:** when a story
+has no P0/integration TC, use the project **DoD quality gates** (lint / typecheck / test / build
+from PROJECT-CONTEXT) plus `npm run pm:lint`.
+
+**Exit-code gates only (MANDATORY — BUG-20260608-01).** Every gate in the verify line must rely on
+the command's **exit code**, never on a substring of its output. Emit `npm run pm:lint >/dev/null
+2>&1 && echo OK` (the script exits non-zero on any violation — let that gate). **Never** emit the
+`npm run pm:lint 2>&1 | grep -E "violations" | tail -1` shape: the summary reads `N violation(s)`
+(no bare `violations` substring) and a trailing `| tail` always exits 0, so that pipeline can never
+fail and would green-light a dirty corpus. The same rule applies to any `grep … | tail`/`| head`
+"gate" — a pipe into `tail`/`head` masks the real exit status.
+
+**Never re-emit `npm run pm:mirror`.** The scaffold-parity mirror gate was retired in ADR-0074 (the
+script no longer exists in `package.json`), so a verify line that calls it would now hard-fail on a
+missing script. Do not append `&& npm run pm:mirror` (or `npm run pm:mirror &&`) to any emitted
+`verify` block. Historical sidecars that still carry it are a one-time cleanup, not a live gate.
+
+## Step 6b — Generate chat and phase outcomes (via `write-outcomes` skill)
+
+For each **chat** and for each **phase**, dispatch a sub-agent with the `write-outcomes` skill to
+synthesize a **fresh, founder-facing outcome line** — a single plain-text sentence describing *what
+the founder will have* once that chat/phase lands (the new capability, not the implementation).
+**Critical nuance:** The outcome is a **fresh synthesis** of the grouped stories' collective value,
+**NOT** a concatenation or list of individual story outcomes. The sub-agent is handed the grouped
+stories' technical scope and writes one clean line. Capture the returned line verbatim (no
+markdown, no "Outcome:" label, no quotes) and write it into the JSON sidecar's `chats[].outcome`
+and `phases[].outcome` fields, and render it in the markdown report.
+
+**Ordering note (chats vs phases):** chats already exist by Step 2, so chat outcomes can be
+synthesized here. **Phase outcomes are synthesized once Step 7 has grouped chats into phases** — the
+phase set does not exist yet at this step, so run the phase-outcome dispatch after Step 7 (or treat
+this step's phase pass as deferred until the phases are known). Either way a phase outcome is a
+fresh synthesis of its constituent chats' collective value, not a concatenation of their lines.
+
+## Step 7 — Order into phases + edges
+
+Group chats into **ordered phases**, **foundation-first** (chats with no cross-chat dependency
+come first; then themed phases). Compute chat-level **`depends_on` / `unlocks`** edges from the
+cross-chat `depends_on` relationships between the constituent stories. Name phases by their theme
+(e.g. "Foundations", then feature-themed).
+
+## Output — Implementation Strategy (markdown report + JSON sidecar)
+
+Write **two** artefacts (today's date; on same-day re-run append `-02`, `-03`, … — never clobber):
+
+1. `_00-Project-Management/41-Reports/EXECUTION-STRATEGY-YYYY-MM-DD.md` — human-readable.
+2. `_00-Project-Management/41-Reports/EXECUTION-STRATEGY-YYYY-MM-DD.json` — structured; the
+   dashboard's Implementation Strategy view (FEAT-03.3) reads this. The per-chat `executed` flag
+   lives here (default `false`; later flipped by `execute-batch` / by hand, then `pm:dash`).
+
+### JSON sidecar schema
+
+```json
+{
+  "epic": "EPIC-NN",
+  "generated_at": "<ISO 8601>",
+  "phases": [
+    {
+      "name": "Foundations",
+      "outcome": "<optional: founder-facing 'what you'll have' once this phase lands>",
+      "chats": [
+        {
+          "id": "CHAT-01",
+          "title": "<short title>",
+          "outcome": "<optional: founder-facing 'what you'll have' once this chat lands>",
+          "stories": [{ "id": "STORY-NN.M.PP", "status": "ready", "ready": true }],
+          "lanes": [{ "type": "serial", "stories": ["STORY-...", "STORY-..."] }],
+          "sub_agents": ["react-expert ×2", "javascript-pro"],
+          "trigger": "<paste-ready prompt>",
+          "verify": "<&&-joined command>",
+          "depends_on": ["CHAT-..."],
+          "unlocks": ["CHAT-..."],
+          "estimate": "<rolled-up>",
+          "executed": false
+        }
+      ]
+    }
+  ]
+}
+```
+
+**`outcome` (optional, per `phase` and per `chat`)** — a single founder-facing sentence describing *what you'll have* once that phase/chat lands (the capability, not the implementation). Omit it (or use `""`) when there's nothing founder-facing to say; it never affects grouping or the dry-run contract. When present, the dashboard's Implementation Strategy view surfaces it on phase headers and chat cards (FEAT-14.2). This mirrors the optional `outcome:` field on Story/Feature frontmatter (SOP §11; nudged by the non-fatal W1 `pm:lint` warning, ADR-0061).
+
+### Markdown report
+
+Renders the same data human-readably: a `## Phase N · <name>` heading per phase, then one block
+per chat carrying its id, rolled-up estimate, title, **Stories** (DoR-gap flagged where
+not-`ready`), **Lanes**, **Sub-agents**, a fenced **paste-trigger**, a fenced
+**verify-before-closing** command, **Depends on / Unlocks**, and the phase/chat **outcome** line
+(if present, rendered after the phase heading and after the chat title respectively). End with the next-command stub
+`/Tandem:execute-batch <chat-id>`.
+
+### Empty case (handle gracefully — do NOT error)
+
+If the epic has **0** not-done stories, write a valid empty strategy (0 phases / 0 chats) stating
+there is nothing to execute — and, if the only stories are `done`, say the epic is complete.
+
+## End-of-session summary (always emit)
+
+- Artefacts written: `EXECUTION-STRATEGY-YYYY-MM-DD.md` + `.json`
+- Epic: EPIC-NN — stories scanned: N (R ready, U un-ready flagged)
+- Phases: P · Chats: C
+- Confirm: no story status was modified (dry-run).
+
+## Non-negotiable rules from CLAUDE.md
+
+- **Dry-run / read-only** — never changes a `status:` field; writes only the two report artefacts.
+- Grouping is **judgment-led** (ADR-0025) — reason, don't mechanically match; record a rationale.
+- Batch-size bounds are **soft guidance** (ADR-0026) — deviate only with a written rationale.
+- Parallel lanes require **provably-disjoint** `files_touched`; default serial.
+- Reports live in `41-Reports/`.
+
+## Next command
+
+Next: `/Tandem:execute-batch`
+
+`/Tandem:execute-batch <chat-id>` — pull one proposed chat into a fresh working
+chat. That command owns the status changes; this one does not.
diff --git a/plugins/tandem/skills/fill-claude-md/SKILL.md b/plugins/tandem/skills/fill-claude-md/SKILL.md
new file mode 100644
index 0000000..399c75a
--- /dev/null
+++ b/plugins/tandem/skills/fill-claude-md/SKILL.md
@@ -0,0 +1,140 @@
+---
+name: fill-claude-md
+description: Fill in or trim a CLAUDE.md so only non-discoverable, project-specific, broadly-relevant lines remain — the judgement layer applying the three content-economics tests over `pm:claude-scaffold`'s stubs. Use when a CLAUDE.md stub created by `pm:claude-scaffold` still carries `[auto — verify]` or `<fill in>` markers, when `pm:claude-audit` reports `incomplete`, or when an existing CLAUDE.md needs trimming.
+---
+
+# Tandem: fill-claude-md (Dev hat)
+
+You are completing the **judgement layer** of the kit's CLAUDE.md automation.
+The scaffold script (`pm:claude-scaffold`) wrote the *discoverable* starter lines
+and marked them `[auto — verify]`. Your job is the part automation cannot do:
+decide what non-discoverable, project-specific content earns a place, and confirm
+or remove the auto-detected starters.
+
+## The content-economics tests (CLAUDE-CODE-CONFIG §2.1.4)
+
+Before any line stays in a CLAUDE.md, it must pass ALL of these tests:
+
+1. **Applies broadly at this scope.** Root file → relevant to almost any task in
+   the repo. Subdir file → relevant to almost any task in that subdir. Narrower →
+   push it down the tree or into a skill.
+2. **Not discoverable by exploration.** If `grep`, `ls`, or reading the code would
+   reveal it, it does not belong. Encode what the code can't say: gotchas,
+   "looks like X but is actually Y", which command to use *here*, where to start.
+3. **Project-specific, not reusable expertise.** Advice that applies to any repo
+   is a *skill*, not CLAUDE.md.
+4. **Reference, don't duplicate.** If a line is reference material that already
+   has a canonical home — a frontmatter block or section skeleton living in
+   `91-Templates/`, a schema, a status enum, a command list owned by another
+   file — *point to that home*, don't inline a copy. Inlined duplication drifts
+   from its source and is paid for in context every session. Flag any inlined
+   copy of a template/schema/owned-command-list for replacement with a one-line
+   pointer (e.g. "Story frontmatter: see `91-Templates/STORY.template.md`").
+
+**Inclusion gate for every line:** "If I delete this line, what specifically goes wrong, and how often?" If the answer is "nothing, most of the time" — delete it.
+
+## Tiered layout (where a rule lives)
+
+CLAUDE.md is a tree, not one file. Place each rule by **cost-of-a-miss**: a
+serious or irreversible miss (data loss, a broken release, a security hole) earns
+a spot higher up; a recoverable, area-local miss lives lower down.
+
+- **Tier 1 — always-on root `CLAUDE.md`.** Loaded every session, so it costs
+  context on *every* task. Keep only rules relevant to **almost every** task here.
+  Keep it lean; it signposts the rest of the tree in plain words.
+- **Tier 2 — folder-scoped `CLAUDE.md`.** One per major area, auto-loaded by
+  Claude Code when a file in that folder is touched. **Additive** — it adds what's
+  true *only* in that folder; it is never a copy of root.
+- **Tier 3 — thin pointer file.** In a folder that only occasionally needs a rule
+  whose real home is elsewhere, a tiny `CLAUDE.md` that names the rule's single
+  home and is followed on demand — not a second copy of the rule.
+
+**Never use `@import`.** An `@import` (or `@path/to/file`) pulls the target into
+the always-loaded context eagerly, re-bloating Tier 1 with everything it points
+at — the opposite of the tiered model. Use a **plain-text signpost** ("when
+editing X, read `x/CLAUDE.md`") instead; Claude Code loads the folder file on
+demand when it's actually relevant.
+
+## Workflow
+
+```dot
+digraph fill {
+  "Find target stubs" [shape=box];
+  "For each stub: read it + its directory" [shape=box];
+  "Confirm [auto — verify] starters" [shape=box];
+  "Draft gotchas/conventions" [shape=box];
+  "Run each candidate through 3 tests" [shape=diamond];
+  "Write outside managed markers" [shape=box];
+  "Strip confirmed [auto — verify] markers" [shape=box];
+  "Re-audit" [shape=doublecircle];
+
+  "Find target stubs" -> "For each stub: read it + its directory";
+  "For each stub: read it + its directory" -> "Confirm [auto — verify] starters";
+  "Confirm [auto — verify] starters" -> "Draft gotchas/conventions";
+  "Draft gotchas/conventions" -> "Run each candidate through 3 tests";
+  "Run each candidate through 3 tests" -> "Write outside managed markers" [label="passes"];
+  "Run each candidate through 3 tests" -> "Draft gotchas/conventions" [label="fails: drop or move to skill"];
+  "Write outside managed markers" -> "Strip confirmed [auto — verify] markers";
+  "Strip confirmed [auto — verify] markers" -> "Re-audit";
+}
+```
+
+### Step 1 — Find the targets
+
+If given a path, use it. Otherwise run:
+`npm run pm:claude-audit -- --json` and take every entry with state `incomplete`
+(and any `gap` you intend to fill). Each maps to a `CLAUDE.md` file.
+
+### Step 2 — For each stub, confirm the `[auto — verify]` starters
+
+Read the file and the directory it governs. For each `[auto — verify]` line:
+- Verify the command actually exists and is the *right* one (e.g. check the
+  manifest's scripts; confirm `npm run test` isn't actually `pnpm test:unit`).
+- If correct → **remove the `[auto — verify]` suffix**, keep the line. Add a
+  disambiguating note only if the obvious command is wrong (e.g. "NOT `npm test`
+  — that runs e2e, ~20min").
+- If wrong → fix or delete it.
+
+These lines live inside the `<!-- PM-KIT:BEGIN managed:commands -->` block.
+Editing them in place is fine — but do NOT add new human prose inside that block;
+the scaffold rewrites its inner content on the next run.
+
+### Step 3 — Draft and filter gotchas + conventions
+
+Propose candidate lines for `## Critical gotchas` and `## Conventions`. For each,
+apply the four tests and the deletion gate. Concretely reject:
+- "Components live in `/components`" → discoverable by `ls`. **Drop.**
+- "Use TypeScript strict mode" → discoverable from `tsconfig.json`. **Drop.**
+- "Write good commit messages" → reusable expertise. **Move to a skill, not here.**
+- A pasted copy of the Story frontmatter block → has a canonical home in
+  `91-Templates/STORY.template.md`. **Replace with a pointer to it** (test 4).
+
+Concretely keep (when true for this repo):
+- "Migrations run from repo root, not `/services/*`, despite appearances."
+- "DB writes go through `/packages/db/writers/*` — don't call the ORM from routes."
+- "Dates are always `Temporal`, never `Date`; the lint rule misses some paths."
+
+Write these **outside** the managed markers (under the `## Critical gotchas` and
+`## Conventions` headings, in the human area). Keep each file lean — a subdir file
+should be additive to root, never a copy of it.
+
+### Step 4 — Trimming mode (existing bloated files)
+
+If asked to trim, run every existing line through the four tests in reverse.
+Produce a short list: "Recommend deleting — reason" per failing line (including
+"duplicates `<canonical home>` — replace with pointer" for test-4 failures). Make
+the edits only after the user confirms, unless they asked you to trim directly.
+
+### Step 5 — Re-audit
+
+Run `npm run pm:claude-audit -- --json` again. The files you completed should no
+longer be `incomplete` (no `[auto — verify]` / `<fill in>` left). Report what
+changed and any boundary you deliberately left `excluded`.
+
+## Guardrails
+
+- Never write content inside a `PM-KIT:BEGIN/END` managed block except the
+  command lines the scaffold owns — the scaffold overwrites managed inner content.
+- Never copy root content into a subdir file. Subdir files are additive.
+- When in doubt about a line, delete it. A lean file that's all signal beats a
+  complete file that's half noise. The context cost is paid every session.
diff --git a/plugins/tandem/skills/install/SKILL.md b/plugins/tandem/skills/install/SKILL.md
new file mode 100644
index 0000000..ff77dfb
--- /dev/null
+++ b/plugins/tandem/skills/install/SKILL.md
@@ -0,0 +1,51 @@
+---
+name: install
+description: Wire the Tandem PM kit into a project — materialize the full folder tree + seed files from the manifest, pin the folder layout, merge the pm:* scripts, guard-register hooks, and generate the Command Center dashboard. Use when the user asks to install Tandem, set up the PM kit, onboard a new repo, wire the kit, or invokes /Tandem:install. Thin wrapper over the canonical install.js (pm:install) — the script does the deterministic work; this skill handles the conversational choices (layout, PROJECT-CONTEXT) and confirms the result.
+---
+
+# Tandem: install (operator setup)
+
+Wire the PM kit into the current project (or a `--target`) so a fresh repo gets the complete
+`_00-Project-Management/` tree, seed files, wired `pm:*` scripts, and a working dashboard — in one
+command. This skill is the entry point; the deterministic work lives in the canonical
+`install.js` script (`pm:install`), so behaviour stays testable and identical whether invoked here
+or from the CLI.
+
+## Source of truth
+`_00-Project-Management/93-Scripts/install.js` (run as `npm run pm:install`). It is idempotent and
+additive — re-running it never overwrites an existing definition or a user-owned file. See
+ADR-0072 (manifest schema + kit/user ownership boundary) and ADR-0054 (canonical entrypoint).
+
+## What it does (delegated to install.js)
+1. **Materialize the tree** — create every folder declared in `lib/pm-manifest.json` and copy seed
+   files under the kit/user ownership rule (kit-owned overwritten; user-owned written only when
+   absent, so an operator's edits survive a re-install).
+2. **Pin the layout** — write `.claude-pm-config.json` (`layout`, `kitVersion`) and
+   `90-Standards/pm-paths.json` from the detected or chosen preset.
+3. **Wire scripts + hooks** — merge the `pm:*` scripts into the host `package.json` and
+   guard-register the Claude Code hooks (only when absent — ADR-0055).
+4. **Generate HTML** — run the dashboard generator so the Command Center opens with the project's
+   own name and working links.
+
+## How to run it
+- Default (this repo): `npm run pm:install`
+- Another project root: `node _00-Project-Management/93-Scripts/install.js --target <dir>`
+- Pin a layout instead of auto-detecting: `--layout full|flattened`
+- Preview without writing: `--dry-run`
+
+## Conversational steps this skill owns
+- **Layout choice** — if the project's layout is ambiguous, confirm `full` vs `flattened` with the
+  operator before pinning it (default: auto-detect → `full`).
+- **PROJECT-CONTEXT fill** — `90-Standards/PROJECT-CONTEXT.md` is seeded as a user-owned starting
+  point; offer to fill in the project's stack quirks / gotchas so later skills have real context.
+- **Confirm the result** — after install, surface the generated dashboard path and recommend
+  `npm run pm:doctor` to verify the wiring is healthy.
+
+## Non-negotiable rules
+- Adds **no** destructive behaviour beyond `install.js`. This skill never deletes or moves a user's
+  work; it only orchestrates the script and confirms.
+- Keep deterministic logic in the script — the skill only orchestrates + confirms.
+
+## Next
+Next: `/Tandem:session-start` (orient), then begin planning with `/Tandem:draft-okrs`. Pull kit
+improvements later with `/Tandem:update`.
diff --git a/plugins/tandem/skills/mode/SKILL.md b/plugins/tandem/skills/mode/SKILL.md
new file mode 100644
index 0000000..c550b27
--- /dev/null
+++ b/plugins/tandem/skills/mode/SKILL.md
@@ -0,0 +1,65 @@
+---
+name: mode
+description: Set and enforce the project's global conversation Mode (Plan · Dev · Dual · Neutral) — the persistent "moat" that frames every joined chat. Use when the user says "go to plan/dev/dual mode", "set the mode", "isolate this chat", "rejoin global mode", when a request is out-of-mode, or when uncertain which phase the project is in.
+---
+
+# Tandem conversation Mode — "the moat"
+
+A single project-global Mode frames every **joined** chat so suggestions stay in-phase. State lives in `.tandem-mode.json` at the repo root (git-ignored). Read/write it ONLY through `_00-Project-Management/93-Scripts/mode.js` (CLI or import) — never hand-edit the JSON.
+
+## The four modes
+
+| Mode | Meaning | Out-of-mode requests |
+|---|---|---|
+| **plan** | Planning phase. Founder/PM/QA-planning work. | Dev requests → nudge |
+| **dev** | Building phase. Dev/QA-execution work. | Plan requests → nudge |
+| **dual** | Both allowed (planning in one chat, building in another). | No nudge |
+| **neutral** | No active frame (default/resting). | No enforcement |
+
+## What is Plan vs Dev (anchored to lifecycle skills, not keyword-guessing)
+
+- **Plan:** draft-okrs, draft-prd, draft-epic, split-into-features, split-into-stories, refine-backlog, execution-strategist; *planning* a testplan.
+- **Dev:** execute-batch, execute-story, run-testplan, close-out-story; *running* a testplan.
+
+## Joining (opt-in) and isolation
+
+- A chat is **free** (no mode, no nudges) until it **joins**. Joining happens when `session-start` runs, or when the mode is set from within the chat.
+- The `UserPromptSubmit` hook injects a `Tandem mode: …` banner each message for joined chats; a free chat gets nothing.
+- **"isolate this chat"** → free it again: `node _00-Project-Management/93-Scripts/mode.js leave --session <session_id>`.
+
+> Use the session ID from the hook payload / session context as `<session_id>` in every command below.
+
+## Setting the mode
+
+Triggered by `/mode <plan|dev|dual|neutral>` or natural language ("go to plan mode"):
+
+```
+node _00-Project-Management/93-Scripts/mode.js set <mode> --by user --session <session_id> [--context "<short label>"]
+```
+
+Setting from a chat also joins that chat. Confirm the change in one line and state what it now enforces.
+
+## The nudge (soft — never a hard block)
+
+When a JOINED chat's active mode disagrees with the request:
+
+- **In plan, asked for Dev work:**
+  > ⚠️ We're in **Plan** mode — that's Dev work. (a) switch to **Dev**, (b) go **Dual** so planning here + dev elsewhere both run, or (c) do it as a **one-off** without changing the mode?
+
+  On (c), do the single task; leave the flag on `plan` (no silent drift).
+- **In dev, asked for Plan work:** mirror image.
+- **In dual:** no nudge — both allowed.
+- **In neutral:** no enforcement; proceed. If a *sustained* run of one phase begins, you MAY offer once: "Want to set Plan mode for this?" — offer, never force.
+- **Free (un-joined) chat:** no banner, no nudge.
+
+## Auto-reset to Neutral (driven by the close skills)
+
+You do not flip to Neutral spontaneously. It happens via:
+- `close-out-story` — only when the closed story was the **last open story in the phase**.
+- `close-phase` — always (also how Dual exits).
+
+Each auto-reset is announced; it is written with `--by auto-neutral`.
+
+## When in doubt
+
+Bring it to the user. Never invent a fifth mode. Never hand-edit `.tandem-mode.json`.
diff --git a/plugins/tandem/skills/monthly-retro/SKILL.md b/plugins/tandem/skills/monthly-retro/SKILL.md
new file mode 100644
index 0000000..3a56721
--- /dev/null
+++ b/plugins/tandem/skills/monthly-retro/SKILL.md
@@ -0,0 +1,96 @@
+---
+name: monthly-retro
+description: Run the monthly retrospective. Use when the user asks for a monthly retro, monthly retrospective, end-of-month review, month-in-review, or invokes /Tandem:monthly-retro. Operates as Founder + PM hats jointly. Auto-detects the most recently completed full month, reads MONITOR + closed stories + ADRs + bugs from that window, and produces 14-Retros/RETRO-YYYY-MM.md from the kit's retro template.
+---
+
+# Tandem: monthly-retro (Founder + PM hats)
+
+Operate as **Founder + PM hats jointly** for a solo retro. The user is closing out a month and wants the retro filed in one slash command — no paste-prompt copy.
+
+This skill is the slash-command wrapper for the kit's canonical monthly-retro prompt. The prompt is the source of truth; this file is the entry point.
+
+## Source of truth
+
+@_00-Project-Management/92-Prompts/10-monthly-retro.md
+
+Follow that prompt verbatim. The sections below add only the slash-command-specific glue (which month to retro, input resolution, output path, empty-month handling, post-write handoff) — do not re-declare the prompt's content here.
+
+## Which month — the prior-month window rule
+
+**Always retro the MOST RECENTLY COMPLETED FULL month — never the in-progress month.** Auto-detect this month from the system clock unless an argument overrides it.
+
+The rule, stated precisely with its boundary cases:
+
+- Run on **2026-06-01** → retro **2026-05**. June has just begun; May is the most recently completed full month.
+- Run on **2026-05-31** → **also retro 2026-05** (NOT June). You retro the month that just finished, not the in-progress one — and on the last day of May you are wrapping up May itself, so May is the target. The point of the boundary is to never retro a month that is still partly ahead of you: a run dated anywhere in late May still targets **2026-05**, and a run on the very first day of June still targets **2026-05**.
+
+  In short: the target is the **last full month whose work is done** — the prior month relative to a fresh-of-the-month run, and the just-closed current month when you run on its final day. Either way you land on the most recently completed full month and you do not retro a month still in progress beyond "today".
+
+- **Argument override:** if the user passes an explicit month (e.g. `2026-03` or "March"), retro that month instead of the auto-detected one. The argument always wins over the clock.
+
+Derive `YYYY-MM` for the target month and use it for both the output filename and the `period_start` / `period_end` frontmatter (`period_end` = last calendar day of the target month).
+
+## Inputs needed
+
+- If the user passed a month argument, use it. Otherwise auto-detect per the prior-month window rule above from the system clock.
+- If the user is ambiguous ("do the retro"), state which month you resolved to before drafting, so they can correct you in one line.
+
+## Load into context
+
+The canonical layout is under `_00-Project-Management/`. Use `Read` / `Glob` to detect existence rather than assuming; treat missing files as "not present" rather than throwing. Load the inputs the source prompt lists, scoped to the target month's window:
+
+- **MONITOR** — `_00-Project-Management/42-Monitor/MONITOR.md` (last ~4 weeks of revision history).
+- **Previous month's retro** — `_00-Project-Management/14-Retros/RETRO-*.md`, the one immediately before the target month, to check the carry-forward "One change".
+- **Current quarter's OKRs** — `_00-Project-Management/00-Strategy/OKR-*.md` (most recent) for the strategic check.
+- **Closed stories in-window** — stories under `_00-Project-Management/32-Stories/` whose `completed_at` falls inside the target month.
+- **ADRs in-window** — `_00-Project-Management/40-Decisions/ADR-*.md` created in the target month.
+- **Bugs in-window** — `_00-Project-Management/34-Bugs/` filed / fixed in the target month.
+- **Retro template** — `_00-Project-Management/91-Templates/RETRO.template.md`.
+- **SOP** — `_00-Project-Management/90-Standards/SOP.md` for retro rules + frontmatter contract.
+
+For a multi-folder scan of the in-window activity, delegate to an Explore agent (SOP §18) and ingest the summary, not the raw paths.
+
+## Task
+
+Run the source prompt verbatim. The mechanical points that matter for the slash command:
+
+1. Write the retro to **`14-Retros/RETRO-YYYY-MM.md`** (target month's `YYYY-MM`), using **`91-Templates/RETRO.template.md`** verbatim — do not redraft section headings from memory.
+2. Compute the objective metrics (stories shipped, bugs filed/fixed delta, ADRs created, average story cycle time, time in `blocked`) from the in-window data. Auto-draft the objective sections; leave "What worked / What hurt / Surprises" for the user to edit to their voice.
+3. Propose 2-3 candidate "One change" actions — propose only, the user picks one.
+4. Run the strategic check: did the month's work ladder into the current OKRs? Flag drift for the next quarterly review.
+5. **Show the draft in chat before saving.** Wait for the user's edits.
+
+## Empty month is valid output
+
+A month with **0 closed stories is valid** — still produce the retro file. Populate the metrics with zeros, note explicitly in "What worked / What hurt" that the month was quiet (and why, if known — e.g. holiday, founder offline, single long-running story not yet closed), and still propose a "One change" for the next month. Do NOT skip the file or error out on an empty window; early-kit months legitimately have no closed work.
+
+## Output rules
+
+- One retro file per month: `14-Retros/RETRO-YYYY-MM.md`.
+- Objective sections (Metrics, Action-from-last-retro carry-forward, Strategic check) are Claude's to write; subjective sections (What worked / What hurt / Surprises) are the user's voice — draft them as a starting point only.
+- The "One change" — propose 2-3, the user commits to one.
+- Honour the frontmatter contract: quoted ISO 8601 timestamps with offset from the system clock, canonical status enum, template used verbatim. The source prompt sets `status=done` + `completed_at` on save (this is the retro artefact's own lifecycle, not a kit story).
+
+## Non-negotiable rules from CLAUDE.md
+
+- Frontmatter timestamps (quoted ISO 8601 with offset, from the system clock).
+- Status enum (closed set of 9).
+- Templates rule — use `91-Templates/RETRO.template.md` verbatim; do not redraft section headings from memory.
+- Update `42-Monitor/MONITOR.md` with the one-line retro entry per the source prompt's end-of-retro step.
+
+## End-of-session summary (always emit)
+
+- Target month resolved: `YYYY-MM` (and whether from clock or argument).
+- File written: `_00-Project-Management/14-Retros/RETRO-YYYY-MM.md`.
+- Stories shipped in-window: N (0 is valid — empty month noted in the file).
+- Bugs filed / fixed: X / Y. ADRs created: Z.
+- "One change" candidates proposed: list (user to pick one).
+- Strategic drift: Yes / No.
+
+## Next-step guidance
+
+Review the draft with the **founder hat**; capture any decisions that surface as **ADRs**. The retro often exposes a structural choice ("we keep skipping DoR — should we?") worth recording.
+
+## Next command
+
+`/Tandem:weekly-monitor` — the retro's "One change" is tracked from here into next month; the weekly cadence keeps it alive. If the strategic check flagged drift, run `/Tandem:draft-okrs` next quarter to re-anchor.
diff --git a/plugins/tandem/skills/path-scope-example/SKILL.md b/plugins/tandem/skills/path-scope-example/SKILL.md
new file mode 100644
index 0000000..a50bcf1
--- /dev/null
+++ b/plugins/tandem/skills/path-scope-example/SKILL.md
@@ -0,0 +1,28 @@
+---
+name: path-scope-example
+description: Reference example of a PATH-SCOPED skill (ADR-0010). A light ADR-authoring reminder that auto-loads only when working inside the decisions folder — next free ADR-NNNN, the template to start from, and the same-response linking rule. Use when creating or editing an ADR under the decisions folder. Copy this skill's `paths:` frontmatter as the template for your own directory-scoped skills.
+paths:
+  - "_00-Project-Management/40-Decisions/**/*"
+---
+
+# path-scope-example (reference: directory-scoped skill)
+
+This skill is the kit's **worked example of `paths:` scoping** (ADR-0010, proven by STORY-17.2.01). Unlike every other Tandem skill — which is globally relevant and explicitly slash-invoked — this one is bound by its `paths:` frontmatter to **`_00-Project-Management/40-Decisions/`**, so Claude auto-loads it only while reading/writing files in that directory. Outside the decisions folder it stays silent.
+
+It is intentionally minimal and doubles as a genuine, directory-local helper: a quick ADR-authoring checklist.
+
+## When this activates
+
+Auto-loads when you touch a file under `_00-Project-Management/40-Decisions/` (e.g. creating `ADR-0077-*.md`). It does **not** auto-load when you're working in `32-Stories/`, `skills/`, or anywhere else — that's the whole point of path-scoping.
+
+## ADR-authoring reminder (the local concern)
+
+1. **Number sequentially across the whole project.** Glob `ADR-*.md` in this folder, take the max `NNNN`, add 1. No folder grouping.
+2. **Start from the template** — `91-Templates/ADR.template.md`. Don't redraft headings from memory.
+3. **Record the decision in the same response** as the work that forced it (ADR-on-the-spot).
+4. **Link back** — add the new `ADR-NNNN` to the originating story's `decisions:` frontmatter array.
+5. **Commit, don't hedge** — an ADR commits to one option; list the rejected ones under `## Alternatives considered`.
+
+## How to reuse this pattern
+
+To make one of your own skills directory-scoped, copy the `paths:` block above and point it at your directory's glob (repo-relative, `**` recursion, brace expansion — same format as `.claude/rules/` path-specific rules). Remember `paths:` is **additive** with `description:` — it constrains *where* a description-match may fire; it is not a security control. See `90-Standards/CLAUDE-CODE-CONFIG.md` §2.3.1 for when to path-scope vs description-match.
diff --git a/plugins/tandem/skills/path-scope-example/activation-test.md b/plugins/tandem/skills/path-scope-example/activation-test.md
new file mode 100644
index 0000000..8139cd6
--- /dev/null
+++ b/plugins/tandem/skills/path-scope-example/activation-test.md
@@ -0,0 +1,34 @@
+# Activation test — `path-scope-example`
+
+Verifies the `paths:` scoping declared in `SKILL.md` (`_00-Project-Management/40-Decisions/**/*`)
+per ADR-0010: the skill must **activate in-scope** and stay **silent out-of-scope**. This is a
+documented activation spec (the kit's static-analysis testing convention) — each case states the
+trigger, the expected auto-load behaviour, and how to confirm it.
+
+## Case A — in-scope → activates
+
+- **Trigger:** Claude reads or writes a file under `_00-Project-Management/40-Decisions/`
+  (e.g. creating `ADR-0077-some-decision.md`, or editing an existing `ADR-*.md`).
+- **Expected:** the skill **activates** (auto-loads) — the ADR-authoring reminder is available
+  because the touched file matches the `paths:` glob.
+- **Confirm:** the touched path matches `_00-Project-Management/40-Decisions/**/*`. A file at
+  `…/40-Decisions/ADR-0077-x.md` matches → in-scope, skill loads.
+
+## Case B — out-of-scope → does not activate (silent)
+
+- **Trigger:** Claude works anywhere outside the decisions folder — e.g. editing
+  `_00-Project-Management/32-Stories/EPIC-17/FEAT-17.2/STORY-17.2.01-*.md`, a file under `skills/`,
+  or a source file at the repo root.
+- **Expected:** the skill is **silent** — it does **not** auto-load, because no touched file
+  matches the `paths:` glob. (It remains explicitly invocable, but is not auto-surfaced.)
+- **Confirm:** none of `32-Stories/…`, `skills/…`, `package.json` match
+  `_00-Project-Management/40-Decisions/**/*` → out-of-scope, skill stays silent.
+
+## Boundary notes
+
+- `paths:` narrows **auto-load**, not invocation or permissions (ADR-0010, nuance 1): out-of-scope
+  the skill is silent but still reachable if explicitly invoked.
+- Trigger is **file-touch**, not cwd (ADR-0010, nuance 3): opening a decisions file from any working
+  directory activates Case A; merely `cd`-ing into the folder without touching a matching file does not.
+- `paths:` is **additive** with `description:` (nuance 2): in-scope, the description-match still
+  gates the final activation; out-of-scope, the description-match is suppressed.
diff --git a/plugins/tandem/skills/peer-review/SKILL.md b/plugins/tandem/skills/peer-review/SKILL.md
new file mode 100644
index 0000000..e387b78
--- /dev/null
+++ b/plugins/tandem/skills/peer-review/SKILL.md
@@ -0,0 +1,111 @@
+---
+name: peer-review
+description: On-demand code peer review of an explicitly-scoped target. Use when the user asks for a peer review, a code review, to review this diff, review a branch, review a PR, or review a file. Reviews across correctness, security, performance, maintainability, test coverage, and error paths, then returns severity-ranked findings (blocker / major / minor) each with a suggested fix.
+---
+
+# Tandem: peer-review (reviewer / QA hat)
+
+Operate as **reviewer / QA hat**. Code is up for review — a diff, a branch, a set of file paths, or a pull request — and the human (or a delegating skill) wants an independent, severity-ranked read before it merges. You are the second pair of eyes: skeptical, specific, and constructive. You do not change the code; you produce findings.
+
+## Inputs needed
+
+The review needs an **explicit, clear target**. Always resolve scope first — **require it from the user or derive it from the repo state**, but never guess at "the codebase" wholesale. The target is exactly **one** of:
+
+- **diff** — an unstaged/staged/working-tree diff (`git diff`, `git diff --staged`) or a diff against a base ref (`git diff <base>...HEAD`).
+- **branch** — a named branch; review its delta against its merge base with the default branch (`git merge-base main <branch>` → `git diff <base>...<branch>`).
+- **file path(s)** — one or more explicit files or globs to review whole, regardless of git state.
+- **PR / pull request** — a GitHub pull request; pull its diff via `gh pr diff <number>` (and `gh pr view <number>` for title/body/intent).
+
+Scope resolution order:
+
+1. If the user names a target explicitly (a diff, branch, file, or PR), use it verbatim.
+2. If not, **derive** a clear target from repo state — prefer the working-tree diff (`git status --porcelain` + `git diff`), then the current branch's delta vs. its merge base. State the derived scope in one line and proceed.
+3. If neither the user nor the repo yields an unambiguous target (e.g. clean tree, on the default branch, no PR given), **STOP and ask** which diff / branch / file / PR to review. Do not invent a scope or review the whole tree by default.
+
+Echo the resolved scope back in the first line of output so the human can confirm you reviewed what they meant.
+
+## Load into context
+
+Use `Read` / `Glob` / `Bash` (`git`, `gh`) to gather — treat missing files as "not present", degrade gracefully, never fabricate.
+
+- **The review target itself** — the resolved diff / branch delta / file contents / PR diff from "Inputs needed".
+- **Surrounding context for changed regions** — for a diff or PR, `Read` enough of each touched file around the hunks to judge the change in situ (callers, the function's other branches, the type it implements). A diff reviewed without its surroundings produces false positives.
+- **Project conventions** — `_00-Project-Management/90-Standards/PROJECT-CONTEXT.md` if present (lint rules, stack quirks, banned patterns); else infer from `package.json` / config files. Project-root `CLAUDE.md` — always loaded for project-specific overrides.
+- **Intent** — the paired STORY / PR description / commit messages, so you review against what the change is *supposed* to do, not just what it does.
+- **Prior HTML context (`html_context:`)** — if a paired story/testplan frontmatter carries a non-empty `html_context:` array, `Read` every repo-relative path it lists (explorations, annotated diffs, options-comparisons) into context **before** reviewing, so findings are grounded in the same architectural reasoning the human reviewer had. Skip entries that don't resolve. Treat the SOP §11 50 KB guideline as advisory — summarise very large files rather than reading them whole.
+
+## Task — review across the six dimensions
+
+Examine the resolved target against all six dimensions. Each dimension is a distinct lens; a single line of code can raise findings under several.
+
+1. **Correctness** — Does it do what it claims? Logic errors, off-by-one, wrong operator/comparison, inverted conditions, incorrect state transitions, broken invariants, race conditions and concurrency hazards, mishandled nulls/undefined, locale/timezone/encoding assumptions, and divergence from the stated intent (story / PR description).
+2. **Security** — Input validation and sanitisation, injection (SQL / command / template / path traversal), authn/authz checks (missing or bypassable), secrets and credentials in code, unsafe deserialisation, SSRF/XXE, XSS (especially `innerHTML` / `dangerouslySetInnerHTML` with untrusted data), weak or misused crypto, and vulnerable or over-permissive dependencies.
+3. **Performance** (perf) — Algorithmic complexity and accidental quadratic loops, N+1 queries, unbounded result sets, redundant work in hot paths, missing or wrong caching, unnecessary allocations / large copies, blocking calls on hot paths, and resource leaks (unclosed handles, leaked subscriptions/listeners). Flag a perf finding only when it is plausibly reachable at real scale, not as premature optimisation.
+4. **Maintainability** (maintainab) — Readability, naming, function/file size and cyclomatic complexity (flag complexity ≳ 10), duplication (DRY) and missing abstractions, SOLID violations, tight coupling / weak cohesion, dead code, leaky abstractions, magic numbers, and missing or stale documentation/comments where the code is non-obvious.
+5. **Test coverage** (test) — Are the changed paths actually tested? Missing tests for new branches and edge cases, assertions that don't assert (or assert the wrong thing), over-mocking that tests the mock instead of the code, missing negative/failure-path tests, flaky/time-dependent tests, and tests coupled to implementation detail rather than behaviour.
+6. **Error paths** (error) — Error handling and resilience: swallowed exceptions, bare `catch` with no rethrow/log, errors that lose context, missing handling for I/O / network / parse failures, partial-failure and rollback behaviour, retry/timeout/idempotency on external calls, and resource cleanup on the failure path (try/finally, `using`, context managers).
+
+For each issue you find, capture: the **file and line/region**, which **dimension** it falls under, the **severity** (below), a concise explanation of *why* it matters (the consequence, not just the symptom), and a concrete **suggested fix**.
+
+### Severity ranking — exactly three levels
+
+Rank every finding into exactly one of these three levels, and order the output by severity (blockers first):
+
+- **blocker** — must be fixed before merge. Correctness bugs that ship broken behaviour, security vulnerabilities, data loss/corruption, crashes on a realistic path, or a missing test for new risk-bearing logic. A non-empty blocker list means "do not merge".
+- **major** — should be fixed before merge or have an explicit, recorded decision to defer. Significant maintainability/perf problems, weak error handling on a real failure path, meaningful coverage gaps — not ship-breaking, but it will bite.
+- **minor** — nice to fix; non-blocking. Style, naming, small duplication, doc gaps, micro-optimisations. The author can accept or defer at their discretion.
+
+Every finding **requires a suggested fix** — a specific, actionable remedy (a corrected snippet, the missing test to add, the validation to insert), not "consider improving this". One **suggested fix per finding**. If you can't propose a fix, it's an open question, not a finding — label it as such.
+
+## Output rules
+
+- **First line:** echo the resolved review scope (what diff / branch / file / PR, against which base) so the human can confirm you reviewed the right thing.
+- **Then a one-line verdict:** `BLOCK` (≥1 blocker), `APPROVE-WITH-NITS` (only minor), or `APPROVE-WITH-CHANGES` (major present, no blockers) — plus the count per severity.
+- **Then findings, severity-ranked** (blockers → majors → minors). Each finding: `[severity] file:line (dimension)` — what's wrong → why it matters → suggested fix.
+- Be specific and cite real lines; "looks fragile" is not a finding. No finding without a location and a suggested fix.
+- Acknowledge genuinely good practices briefly — a review is feedback, not only a defect list.
+- Do **not** modify the code under review. This skill produces findings; fixing is the Dev hat's job in a separate step.
+- For a large diff that would flood the thread, spawn a fresh agent (SOP §18) to do the heavy read and return the ranked findings, rather than pasting the whole diff into the main thread.
+
+## Reusable review contract — delegated by other skills
+
+The six-dimension lens + three-level severity ranking + one-suggested-fix-per-finding defined here is the **canonical, reusable review contract** for this kit. Other skills delegate to it rather than re-deriving their own review logic:
+
+- **`/Tandem:close-out-story`** runs the Definition-of-Done gate, whose **R14 AI-code-review** pass (FEAT-05.3) delegates to this contract: it invokes `/Tandem:peer-review` against the story's diff, then treats the resulting **blocker** count as its hard gate (blockers > 0 ⇒ DoD R14 FAILs, story does not flip to `done`).
+- Keep the dimension set, the `blocker / major / minor` enum, and the suggested-fix-per-finding requirement stable here so close-out and any future caller stay in lockstep with one source of review truth.
+
+## Emit the AI-CODE-REVIEW artefact (durable HTML output)
+
+Beyond printing the severity-ranked findings to the thread, `peer-review` **emits a durable HTML artefact** so the review survives the conversation and feeds the DoD gate. Produce it from the kit template — do not hand-roll the markup.
+
+1. **Emit from the template.** Copy `_00-Project-Management/91-Templates/AI-CODE-REVIEW.template.html` and produce a populated artefact from it: write the resolved unified diff into the `data-slot="diff"` slot (one `.file-hunk` per file, one `.row` per line, `add`/`del` classes on +/- lines), and write **one `<article class="anno-card severity-<level>">` annotation card per finding** into the `data-slot="annotations"` slot. Render each finding's reasoning and suggested fix as **TEXT** (`textContent` / escaped template literals) into the `.reasoning` and `.fix` elements — **never `innerHTML`**, so untrusted diff/finding text can never execute (XSS-safe, per the template's security note). Keep each card's `data-severity` / `data-file` / `data-line` / `data-category` attributes in sync with the visible text, since the template's verdict, count strip, filters, and PR-comment exporter all read those attributes.
+
+2. **Output path.** Write the emitted artefact into `41-Reports/` named `AI-CODE-REVIEW-<scope>-<YYYY-MM-DD>.html` — i.e. the literal path `41-Reports/AI-CODE-REVIEW-<scope-id>-<YYYY-MM-DD>.html`, where `<scope-id>` identifies the reviewed target (story id, branch, or PR) and `<YYYY-MM-DD>` is the review date. This is the same location and naming convention `close-out-story` expects to find at R14 review time.
+
+3. **R15b conformance.** The emitted artefact MUST be **R15b**-conformant — it must satisfy the validator's **R15b** check (the AI-review-artefact presence + existence rule) so it passes the DoD gate. Because `peer-review` writes the **R15b**-conformant artefact to exactly the path `close-out-story` reads, the close-out gate finds the artefact it expects in the location it expects, with no second emission step.
+
+4. **Severity mapping (fixed by ADR-0035).** This skill's three-level `blocker / major / minor` findings **map onto** the template's four-level `data-severity` rubric, and onto its verdict banner and count strip, exactly as **ADR-0035** fixes them:
+
+   | peer-review finding | template `data-severity` token |
+   |---|---|
+   | `blocker` | `blocker` (`data-severity="blocker"`) |
+   | `major`   | `critical` (`data-severity="critical"`) |
+   | `minor`   | `nit` (`data-severity="nit"`) |
+
+   The template's `warning` token is **reserved** — it carries no direct peer-review finding, and is available only as an optional reviewer down-rank of a borderline `major`. The **blocker count** is the gating signal in both directions: the verdict banner reads **"blocked" iff blocker > 0**, which is precisely the DoD **R14** blocker-gate. So a finding ranked `blocker` here becomes a `data-severity="blocker"` annotation card, increments the template's blocker count, flips the verdict to "blocked", and (via R14) prevents the story flipping to `done`. Keep this mapping aligned with ADR-0035; do not re-derive it.
+
+## Non-negotiable rules from CLAUDE.md
+
+- Status enum and frontmatter timestamps apply to any story/testplan you touch — but note this skill is read-only over the *code*; it does not flip story status itself.
+- Never fabricate scaffolding or paths — if a context source is absent, say so and degrade gracefully.
+- Every finding has a location, a severity, and a suggested fix — a finding without all three is not reportable.
+
+## End-of-review summary (always emit)
+
+- Scope reviewed: <diff | branch | file(s) | PR — against which base>
+- Verdict: `BLOCK` | `APPROVE-WITH-CHANGES` | `APPROVE-WITH-NITS`
+- Findings: blocker X / major Y / minor Z
+- Top blockers (if any): <list>
+- Recommended next step:
+  - Blockers present → assign findings to Dev hat; re-review after fixes.
+  - Clean → safe to merge / proceed to `/Tandem:close-out-story`.
diff --git a/plugins/tandem/skills/refine-backlog/SKILL.md b/plugins/tandem/skills/refine-backlog/SKILL.md
new file mode 100644
index 0000000..9336ea3
--- /dev/null
+++ b/plugins/tandem/skills/refine-backlog/SKILL.md
@@ -0,0 +1,123 @@
+---
+name: refine-backlog
+description: Refine a BACKLOG item or not-started STORY by running the SOP §6 Definition of Ready checklist. Use when the user asks to refine the backlog, refine an item, promote to ready, run DoR, gate a story, do a Friday review, or invokes /Tandem:refine-backlog. Operates as PM hat. Either flips status to ready (if all DoR items pass) OR stops, lists gaps, and asks — never silently promotes.
+---
+
+# Tandem: refine-backlog (PM hat)
+
+Operate as **PM hat**. The user wants to take a BACKLOG entry or a `not-started` STORY through the Definition of Ready gate — either promoting it to `ready` (eligible to pull into work) or surfacing exactly what's missing.
+
+This skill is the slash-command wrapper for the kit's canonical refinement prompt. The prompt is the source of truth; this file is the entry point.
+
+## Source of truth
+
+@_00-Project-Management/92-Prompts/05-refine-backlog-to-ready.md
+
+Follow that prompt verbatim. The sections below add only the slash-command-specific glue + the DoR enforcement contract — do not re-declare the prompt's content here.
+
+## Definition of Ready (DoR) — what we're gating against
+
+Per SOP §6, a story is `ready` only when **all** of the following are true. The skill walks each item; missing items abort the promotion with a clear gap list.
+
+- [ ] **Linked to a Feature → Epic → OKR or PRD.** Story frontmatter `feature:` and `epic:` set; parent Epic has `okr:` or `prd_section:` populated.
+- [ ] **AC written as testable checkboxes.** Each AC verifiable by a machine, not by vibes.
+- [ ] **Paired TESTPLAN exists at the mirrored path.** `33-Testplans/EPIC-NN/FEAT-NN.M/TESTPLAN-NN.M.PP-<slug>.md`.
+- [ ] **Every AC maps to ≥1 TC in the TESTPLAN.** Coverage matrix complete.
+- [ ] **Every TC has a runnable `Command:`.** No "have a human verify" steps.
+- [ ] **Dependencies listed and either done or scheduled.** No floating "depends on TBD".
+- [ ] **Estimate set** (XS / S / M / L). `XL` means split before promoting.
+- [ ] **`type_of_work` set to a concrete discipline** (`frontend` / `backend` / `infra` / `data` / `docs`) — not the template placeholder. A missing or placeholder value is a DoR gap (it fuels `execution-strategist` sub-agent assignment; see SOP §11.3 / FEAT-03.1).
+- [ ] **Risks section non-empty.** "None — reviewed YYYY-MM-DD" counts; blank does not.
+- [ ] **Premise verified.** Any claim the item makes about **another artefact's state** (status / existence / supersession), or any intent to **retire / archive / delete / supersede / mutate** a named `STORY-`/`FEAT-`/`EPIC-`/`ADR-`/`BACKLOG-` artefact, has been **checked against that artefact's current frontmatter**. A claim that contradicts reality — or names an artefact that can't be resolved — is a DoR gap.
+
+## DoR enforcement contract (MANDATORY)
+
+This skill **never silently promotes**. The contract:
+
+1. Walk the DoR checklist verbatim — every item gets PASS / FAIL with a one-line reason.
+2. If **all** items pass → before flipping `status: not-started` → `ready`, perform a fill-if-missing check: if the story's `outcome:` frontmatter is empty or absent, dispatch a sub-agent with the `write-outcomes` skill — handing it the story's **title + acceptance criteria + technical notes** (the same dispatch input the other four FEAT-14.3 producers pass, per ADR-0059) — to auto-generate exactly one plain-text line, then write that line into `outcome:` (fill-if-missing only — never overwrite an existing outcome). Then flip the story (or BACKLOG entry, after conversion) `status: not-started` → `ready`. Atomic edit. Do **not** set `started_at` (that happens at `in-progress`, not `ready`). Show the user the result table.
+3. If **any** item fails → **stop**. Do **not** flip status. Show the gap list and the smallest fix for each. Ask the user before patching — the skill never auto-completes the missing pieces (e.g. don't invent ACs, don't fabricate TCs, don't guess at risks).
+4. **Sunset check** — per SOP §15, if a story has been `not-started` > 90 days, propose `wontfix` or `archived` instead of refinement. Stale items rot; the kit prefers honest sunsetting over false hope.
+
+### Premise resolution (the "Premise verified" DoR item)
+
+A story can be perfectly well-*formed* and still be *wrong*, because the other DoR items only check structure — never whether the item's stated premise is **true**. To walk the **Premise verified** item:
+
+1. Scan the item's body/ACs for the tell — a phrase shape that makes a **state claim about a named other artefact** or proposes to **mutate** one: *"X is never-started / not-started / superseded / done / blocked / obsolete / duplicate"* or *"retire / archive / delete / supersede / close X"*, where **X is a `STORY-`/`FEAT-`/`EPIC-`/`ADR-`/`BACKLOG-` id**. (Only ids named **with a status claim or mutation intent** — a bare reference in prose does not trip the gate.)
+2. **Resolve each named id** and read its current `status:` (and existence). If the artefact can't be found, or its real status contradicts the claim → **DoR FAILS**: do not promote; name the specific mismatch (claimed-vs-actual) and stop, consistent with the "never silently promote" rule above.
+
+**Worked example (the regression fixture this guards) — STORY-15.1.02:** it asserted "STORY-04.6.01–05 are five never-started, superseded stories" and proposed to flip them to `archived` — but all five are genuinely `status: done`. Executing it would have silently corrupted the board (done-count 154→149) while **every structural gate stayed green** (`archived` + `completed_at` is enum-legal; R21 doesn't fire on terminal status). The premise check resolves `STORY-04.6.01…05`, sees `done`, and fails the gate before promotion — catching systematically what was previously caught only by luck (execution happening to inspect the targets first).
+
+The gap-list path is the skill's load-bearing differentiator vs the plain paste-prompt. Silent or partial promotion would corrupt the kit's "Ready means Ready" invariant — every downstream skill (`execute-story` especially) relies on it.
+
+## Inputs needed
+
+- Either:
+  - A path to a BACKLOG entry (`_00-Project-Management/11-Backlog/BACKLOG-NNNN-<slug>.md`) — large items get refined into a Feature spec first; small ones promote directly to a Story+Testplan pair.
+  - A path to an existing `not-started` Story (`_00-Project-Management/32-Stories/EPIC-NN/FEAT-NN.M/STORY-NN.M.PP-<slug>.md`) — direct DoR gate.
+  - A short list of candidates ("top 5 by priority") if the user wants a batch review.
+- If the user didn't supply anything, ask: "Which item(s)? Paste a path, a list of paths, or 'pick from not-started' if you want me to choose."
+
+## Load into context
+
+Use `Read` / `Glob` to detect existence. Treat missing files as "not present" rather than throwing.
+
+- **Target item(s)** at the resolved path(s).
+- **SOP** — `_00-Project-Management/90-Standards/SOP.md` (specifically §6 DoR + §15 sunset rule). The DoR list above is sourced from §6; if §6 has drifted from this skill's list, **§6 wins** — flag the drift to the user.
+- **Project context** — `_00-Project-Management/90-Standards/PROJECT-CONTEXT.md` (for stack-specific runnable-command conventions when checking testplan TCs).
+- **MONITOR** — `_00-Project-Management/42-Monitor/MONITOR.md` for current WIP. After processing, the skill suggests "WIP is currently N in-progress + M in-review. You have capacity to pull K more."
+- **Parent artefacts** — for each target Story, also load its parent Feature + Epic to verify the strategic-linkage DoR item.
+- **Templates** — `_00-Project-Management/91-Templates/STORY.template.md` + `_00-Project-Management/91-Templates/TESTPLAN.template.md` (for the gap-fix proposals when ACs / testplans are incomplete).
+- **Project root `CLAUDE.md`** — for project-specific overrides.
+
+## Task
+
+1. Read each target item end-to-end.
+2. Walk the DoR checklist verbatim. Per item: PASS or FAIL, with a one-line reason citing what was checked.
+3. If the item is a BACKLOG entry (not a Story yet), additionally judge:
+   - **Story-sized?** Single coherent unit of work, fits an XS/S/M/L estimate → promote by creating STORY+TESTPLAN via `/Tandem:split-into-stories` (after refinement). Don't write them inside this skill.
+   - **Feature-sized?** Multi-story scope → suggest the user run `/Tandem:split-into-features` on the parent Epic, or draft a new Feature directly.
+   - **Sunset candidate?** Captured >90 days ago, no movement → propose `wontfix` or `archived`.
+4. If all DoR items PASS for a Story → flip `status: not-started` → `ready`. Atomic edit. Do **not** set `started_at`.
+5. If any FAIL → **stop**. Show the gap list + smallest fix per gap. Ask the user before any patching action.
+6. Repeat per item. Process **serially** — one item at a time, user reviews each pass/fail decision before moving on.
+7. Emit the summary table at the end (see Output rules).
+
+## Output rules
+
+- **Never silently promote.** Every gap blocks the flip. No "I'll just fill that in for you" — the user owns the decision to patch or defer.
+- Process candidates **serially**, not in parallel — gives the user a beat to review each pass/fail call.
+- Show a summary table at the end:
+
+  | Item | DoR result | Notes |
+  |---|---|---|
+  | STORY-NN.M.PP | ✓ ready | Promoted 2026-MM-DD |
+  | STORY-NN.M.PP | ✗ fail | Missing: estimate, risks. Smallest fix: ... |
+  | BACKLOG-NNNN | ✗ sunset | Captured 2025-MM-DD (>90 days), no movement — propose wontfix |
+
+- Do **not** pull anything to `in-progress` here. That's `/Tandem:execute-story`.
+- After processing, surface capacity guidance: "WIP is currently N in-progress (max 2) + M in-review (max 3). You have capacity to pull K more. Top Ready candidate: STORY-NN.M.PP."
+
+## Non-negotiable rules from CLAUDE.md
+
+- Frontmatter timestamps — flipping to `ready` touches `status:` only. **Do not** modify `started_at` or `completed_at`.
+- Status enum — `ready` is one of the closed 9 values; do not invent intermediate states.
+- DoR gate is **MANDATORY** — no shortcuts, no partial credit, no honour-system bypass.
+- Sunset rule (SOP §15) — items `not-started` > 90 days should be honestly retired rather than perpetually refined.
+
+## End-of-session summary (always emit)
+
+- Items processed: N (serial)
+- Promoted to `ready`: list of paths
+- Blocked on DoR gaps: list of paths + gap-count per item
+- Sunset proposed: list of paths
+- WIP after this session: <N in-progress> + <M in-review> (capacity for K more)
+- Top Ready candidate to pull next: STORY-NN.M.PP
+
+## Next command
+
+Next: `/Tandem:execution-strategist`
+
+`/Tandem:split-into-stories <feature-path>` — if a refined item turned out to be feature-sized.
+
+Or, if a Story is now `ready` and within WIP capacity: `/Tandem:execute-story <story-path>` — start the work.
diff --git a/plugins/tandem/skills/reflect/SKILL.md b/plugins/tandem/skills/reflect/SKILL.md
new file mode 100644
index 0000000..9286cf8
--- /dev/null
+++ b/plugins/tandem/skills/reflect/SKILL.md
@@ -0,0 +1,69 @@
+---
+name: reflect
+description: End-of-session reflection that proposes updates to CLAUDE.md / SOP.md / PROJECT-CONTEXT.md based on what happened. Use when the user asks to reflect on a session, capture lessons, propose CLAUDE.md updates, or invokes /Tandem:reflect. The blog's "stop hooks reflect on what happened and propose CLAUDE.md updates" recommendation, implemented as a manual skill (less noisy than an auto-firing Stop hook that triggers on every trivial session).
+---
+
+# Tandem: reflect (self-improvement)
+
+Use at the end of a substantive session (>30 min of real work) to capture lessons that should bleed back into the kit's rules. The blog's "self-improvement loop" pattern.
+
+## What to scan in this session
+
+1. **Decisions made** — did you create an ADR? Did you make a decision that *should* have become an ADR but didn't? (the rule says "any non-obvious decision" — be honest.)
+2. **Friction encountered** — did you have to explain the same convention to Claude twice? That's a `PROJECT-CONTEXT.md` candidate.
+3. **Workarounds applied** — did you bypass a rule or use a one-off command? Why? Should the rule change, or should there be a new entry in PROJECT-CONTEXT.md's "Known stack gotchas"?
+4. **Skills that didn't fire** — did Claude paste a prompt's content instead of loading a skill? The skill's `description:` may not match the trigger phrase the user actually used.
+5. **Tools used that surprised you** — was an agent invoked when a direct Read would have been faster? Was main-thread context bloated by grep results? Subagent policy adjustment needed?
+6. **Patterns that recurred** — did you tell Claude to do something three times that could be a hook?
+
+## What to output
+
+A proposal — **not** a commit. The user decides what lands.
+
+```
+🪞 Session reflection — <ISO date>
+
+What worked:
+  - <specific thing, ≤2 lines>
+
+What hurt:
+  - <specific thing, ≤2 lines>
+
+Proposed kit changes (review before applying):
+  1. [PROJECT-CONTEXT.md] Add to "Known stack gotchas": <symptom> — <fix>. Reason: <one line>.
+  2. [SOP.md §<N>] Tighten rule: <current text> → <proposed text>. Reason: <one line>.
+  3. [skills/<name>/SKILL.md] Update description to include trigger phrase: "<phrase user actually used>". Reason: <one line>.
+  4. [hooks/hooks.json] New hook candidate: <event> running <command>. Reason: <one line>.
+
+ADR backlog (decisions made this session without an ADR — file these now):
+  - <decision> made at <timestamp/commit>. Should be ADR-<NNNN>.
+
+Tech debt observed (file as BACKLOG entries):
+  - <observation>
+
+User confirmation needed to apply any of the above.
+```
+
+## Output rules
+
+- **Propose, don't commit.** No file edits during reflect. The user reviews and explicitly approves each item before it lands.
+- One proposal per finding — not bundled.
+- Cite evidence: "I noticed X at <approx point in session>" — concrete, not vague.
+- Skip the section entirely if there's no finding for that category. Don't pad.
+- ≤ 30 lines total. If the session yielded more than 5 findings, prioritise the top 3 and note the rest as "additional minor findings: <count>".
+
+## Anti-patterns
+
+- Proposing rule changes for one-off situations. The bar is "this would have helped me twice or more in the past month."
+- Suggesting new skills when a richer description on an existing skill would solve it.
+- Auto-applying proposals without user approval — even small ones. The user owns the kit.
+
+## Non-negotiable rules from CLAUDE.md
+
+- Read-only — reflect proposes; user applies.
+- Subagent delegation for multi-file scans of past sessions.
+- If a proposal touches `_00-Project-Management/90-Standards/SOP.md`, also bump the file's `version:` field in frontmatter once the user approves.
+
+## End-of-reflect summary
+
+A single line: "X proposals · Y ADRs to backfill · Z tech-debt items. Apply now? (y/n)"
diff --git a/plugins/tandem/skills/run-testplan/SKILL.md b/plugins/tandem/skills/run-testplan/SKILL.md
new file mode 100644
index 0000000..b9a1d18
--- /dev/null
+++ b/plugins/tandem/skills/run-testplan/SKILL.md
@@ -0,0 +1,93 @@
+---
+name: run-testplan
+description: Run every test case in a TESTPLAN. Use when the user asks to run a testplan, execute tests, verify a story, or work with a testplan file under the project's testplans folder. Runs each TC's Command verbatim, marks PASS/FAIL, and auto-files BUG-YYYYMMDD-NN files for every failure before reporting in chat.
+---
+
+# Tandem: run-testplan (QA hat)
+
+Operate as **QA hat**. A story has flipped to `in-review` and its paired testplan needs verification.
+
+## Pre-flight — refuse loudly if the kit isn't wired
+
+Before running any TC, run the cheap wiring gate: `node _00-Project-Management/93-Scripts/doctor.js --gate` (npm: `npm run pm:doctor -- --gate`). It is **silent on success**; on an unwired project it exits non-zero and prints one line — **kit not wired — run `npm run pm:install`**. If it fails, **refuse and surface that message verbatim** rather than proceeding — a mis-wired kit must fail loudly, not silently no-op. (STORY-12.2.03)
+
+## Inputs needed
+
+- Testplan file path — resolve the `testplans` folder via the path map (`node _00-Project-Management/93-Scripts/lib/pm-paths.js resolve testplans`; the config is `90-Standards/pm-paths.json`), then glob for `EPIC-NN/FEAT-NN.M/TESTPLAN-NN.M.PP-*.md` under it. (E.g., it resolves to `05-Test` in a flattened layout.)
+- If the user didn't supply it, infer from a recently-completed story or ask.
+
+## Load into context
+
+Folder locations are resolved through the path map (`pm-paths.js` / `pm-paths.json`) rather than hardcoded, ensuring consistent references across all skills. Use the resolver script to determine physical folders for logical roles such as stories, testplans, bugs, and templates. If NONE of the candidates exist for a given role, note it in the output (don't fabricate scaffolding) and degrade gracefully.
+
+- **Testplan file** — at the resolved path from "Inputs needed".
+- **Paired story** — resolve the `stories` folder via the path map and read the corresponding STORY file.
+- **SOP / DoD reference** — `_00-Project-Management/90-Standards/SOP.md` if present. If absent, fall back to project-root `CLAUDE.md`.
+- **Stack quirks** — `_00-Project-Management/90-Standards/PROJECT-CONTEXT.md` if present. If absent, infer from `package.json` + project-root `CLAUDE.md`.
+- **BUG template** — resolve the `templates` folder via the path map and read `BUG.template.md` if present. If absent, redraft from a sibling BUG in the same FEAT folder.
+- **BUGs folder** (for filing new ones) — resolve the `bugs` folder via the path map.
+- **Project root `CLAUDE.md`** — always loaded.
+- **Prior HTML context (`html_context:`)** — if the testplan (or its paired story) frontmatter carries a non-empty `html_context:` array, `Read` every repo-relative path it lists (explorations, annotated diffs, options-comparisons) into context **before** executing the test cases, so test interpretation is grounded in the same architectural reasoning the human reviewer had. Skip entries that don't resolve (validator R16 already flags missing/traversal paths at `pm:lint` — don't double-report, just note the skip). Treat the SOP §11 50 KB guideline as advisory: summarise very large files rather than reading them whole.
+
+Use `Read` / `Glob` to detect existence rather than assuming; treat missing files as "not present" rather than throwing.
+
+## Task
+
+1. **Flip the testplan's status:** `not-started` → `in-progress`. Set `started_at` to now. (Testplans don't use `in-review` — that's the story's status. If you find the testplan already at `in-review` from a prior skill misuse, flip it back to `in-progress` for this run, then on to `done` if all PASS.)
+
+2. **Verify all Preconditions** in the testplan are met. If not, STOP and report.
+
+   **Ingest prior HTML context first.** If the testplan (or its paired story) carries a non-empty `html_context:`, `Read` every listed prior HTML artefact into context now — before any TC runs (see "Load into context" above) — so the expected behaviour is interpreted against the same architectural reasoning the human reviewer had.
+
+3. **Run every TC in order:**
+   - Execute the `Command:` exactly as written. **No improvisation.** No "I'll try a slightly different command."
+     - **Runtime-on-PATH exception:** if a command fails only because its runtime isn't on the shell PATH (`exit 127` / "command not found" for `node`/`npm`/etc.), that's an environment gap, not a bad command — resolve it per `PROJECT-CONTEXT.md` "Known stack gotchas" (put the runtime on PATH or invoke its absolute binary), then re-run the **same** command verbatim. Resolving the runtime ≠ rewriting the `Command:`.
+   - Compare actual output to `Expected:`.
+     - **Confound caution (dashboard TCs):** if a TC asserts via a bare `grep <substring> DASHBOARD.html` (no `window.__DATA`/element qualifier), treat a PASS/FAIL with suspicion — the generator embeds every artefact body into `window.__DATA`, so the substring may match indexed *prose*, not the rendered feature (precedents: BUG-20260606-02, TESTPLAN-15.2.02/04). Still run the `Command:` verbatim, but if the result looks confounded, file a BUG against the testplan (not the product) and flag the TC for re-authoring to a payload-/element-scoped assertion (see `split-into-stories` testplan-authoring rule).
+   - Update the TC's `Result:` line:
+     - `PASS — YYYY-MM-DD` on success.
+     - `FAIL — see BUG-YYYYMMDD-NN` on failure (link the bug you file).
+
+4. **If a TC fails — IMMEDIATELY:**
+   - File a BUG at `<resolved-bugs-folder>/EPIC-NN/FEAT-NN.M/BUG-YYYYMMDD-NN-<slug>.md` using `BUG.template.md`. Include full repro, environment snapshot, first analysis hypothesis, suggested fix direction. A junior dev should be able to act on it without asking questions.
+   - Update the failed TC's `Result:` line to reference the bug.
+   - If the failure is critical and blocks the rest of the run, STOP further TC execution. Otherwise continue.
+   - A bug filed in chat-only is a process violation — the BUG file must exist on disk before you report the failure in your summary.
+   - **Spec-error exception:** if the failure indicates the AC / Expected line is wrong (budget set without measurement, expected vocabulary that contradicts shipped code, etc.) rather than a code defect, file an **ADR** documenting the spec correction in place of a BUG. The ADR linked from the TC's `Result:` line IS the resolution. Don't file both for the same root cause.
+
+5. After all TCs run, update the testplan's "Sign-off" section.
+
+6. **If ALL TCs PASS** — flip testplan status to `done`. Set `completed_at` to now. Recommend `/Tandem:close-out-story` next.
+
+7. **If ANY TC FAILed** — leave testplan as `in-progress`. Story stays `in-review`. The dev needs to fix the bugs and re-run the failed TCs.
+
+## Output rules
+
+- Run TCs serially, not in parallel — easier to attribute failures.
+- **Batched-pattern invocation is OK** when commands share a runner (e.g. running multiple jest TCs in one `--testPathPattern` invocation), AS LONG AS the runner's per-file PASS/FAIL output keeps each TC's `Result:` line independently attributable. Don't batch across runners (jest + Playwright + bash scripts in one call); those run separately.
+- For UI test commands: capture screenshot artifacts to the resolved reports folder (`41-Reports/` canonical or `_Reports/` flattened) if helpful.
+- Trim log output in BUG evidence sections to ≤30 lines, key frames preserved.
+- For long-running test suites that produce noisy logs: spawn a fresh agent (SOP §18) and get back the PASS/FAIL summary — don't paste 500 lines of stdout into the main thread.
+
+## Non-negotiable rules from CLAUDE.md
+
+- Frontmatter timestamps.
+- Status enum.
+- Bug auto-raise on failure — MANDATORY, in the same response as the failure observation.
+- Templates rule.
+
+## End-of-run summary (always emit)
+
+- TCs PASSED: X / Y
+- TCs FAILED: Y - X
+- BUGs filed: <list of paths>
+- Testplan status: `done` | `in-progress`
+- Recommended next step:
+  - All PASS → `/Tandem:close-out-story`
+  - Any FAIL → assign bugs to Dev hat; re-run testplan after fixes
+
+## Next command
+
+Next: `/Tandem:close-out-story`
+
+When every TC PASSes, close the story through the Definition of Done gate.
diff --git a/plugins/tandem/skills/session-start/SKILL.md b/plugins/tandem/skills/session-start/SKILL.md
new file mode 100644
index 0000000..929bfbb
--- /dev/null
+++ b/plugins/tandem/skills/session-start/SKILL.md
@@ -0,0 +1,94 @@
+---
+name: session-start
+description: Load active project context at the start of a Claude Code session. Use when the user opens a session and asks "what's going on", "what's next", "where did we leave off", or invokes /Tandem:session-start. Reads 12-Active/ACTIVE.md, the most recent ADRs, the MONITOR revision history, and any stories in `in-progress` or `blocked` — then announces the suggested hat and next step.
+---
+
+# Tandem: session-start (orientation)
+
+Use at the start of a working session to re-orient. The blog's "start hooks load team-specific context dynamically" recommendation, implemented as a manual skill (less noisy than a hook that fires on every session, including 2-message ones).
+
+## Pre-flight — is the kit wired?
+
+Before loading context, run the cheap wiring gate: `node _00-Project-Management/93-Scripts/doctor.js --gate` (npm: `npm run pm:doctor -- --gate`). It is **silent on success**; if it exits non-zero with **kit not wired — run `npm run pm:install`**, surface that first and recommend `npm run pm:install` before orienting — the kit isn't wired yet, so the folders/scripts this skill reads may be missing. (STORY-12.2.03)
+
+## What to load
+
+Folder locations are resolved through the path map (`pm-paths.js` / `pm-paths.json`) rather than hardcoded, ensuring consistent references across all skills regardless of whether the repo uses the canonical or flattened layout. The script `node _00-Project-Management/93-Scripts/lib/pm-paths.js resolve <role>` prints the physical folder for any logical role (e.g., `resolve stories`, `resolve decisions`, `resolve active`, `resolve monitor`).
+
+1. **Active WIP index** — resolve the `active` folder via the path map; it typically maps to `12-Active` (canonical) or `00-Active` (flattened). Read `ACTIVE.md` from the resolved folder.
+   - Falls back to: scan stories with `status: in-progress` directly (slower but always works).
+
+2. **Monitor / revision history** — resolve the `monitor` folder via the path map; it typically maps to `42-Monitor` (canonical) or `00-Monitor` (flattened). Read `MONITOR.md` from the resolved folder.
+   - If the file does not exist, note "no monitor file found".
+
+3. **Last 5 ADRs by filename**, sorted descending by NNNN — resolve the `decisions` folder via the path map; it maps to this repo's canonical decisions folder, or to `06-ADR` under a flattened layout. Glob `ADR-*.md` under that resolved folder.
+   - If the folder does not exist, note "no ADR folder yet — first ADR will need to create it."
+
+4. **Stories folder** — resolve the `stories` folder via the path map; it maps to this repo's canonical stories folder, or to `03-Stories` under a flattened layout. Glob `STORY-*.md` recursively under that resolved folder.
+   - From that folder, surface:
+     - `status: in-progress` — list paths + AC tick state.
+     - `status: blocked` — list paths + reason.
+     - `status: in-review` aged > 3 days — flag for close-out.
+
+For multi-file scans (step 4), delegate to an Explore agent (SOP §18) and ingest the summary — do not paste raw file contents into the main thread. **Use the Explore scan only for the in-progress / blocked / stale-in-review *list*, NOT for project-wide totals** — a broad fan-out scan can silently undercount or sample a subset of the stories folder. Take any shipped / total / blocked / in-progress *counts* from the resolved MONITOR (the maintained source of truth), per the tiebreaker rule in "Output rules" below. (Precedent: a session-start Explore scan once under-reported the story/epic totals and missed a blocked story vs the MONITOR's authoritative count — which is why project-wide counts come from the MONITOR and only the WIP list comes from the scan.)
+
+**Layout detection rule:** check existence with `Bash ls`, `Glob`, or `Read` (which returns an error for missing files — treat that as "not present" rather than throwing). Do not assume any single layout. The orientation must work whether the repo uses the canonical SOP scaffold OR a project-specific flattened variant.
+
+## What to output
+
+A short orientation block:
+
+```
+📌 Session-start orientation — <ISO date>
+
+Active WIP:
+  - STORY-NN.M.PP-<slug> (in-progress, 3/5 ACs ticked, started <date>)
+  - STORY-NN.M.PP-<slug> (in-review, all ACs ticked, awaiting testplan run)
+
+Blocked (1):
+  - STORY-NN.M.PP-<slug> — blocked on ADR-NNNN since <date>
+
+Stale in-review (1):
+  - STORY-NN.M.PP-<slug> — in-review for 5 days, run /Tandem:close-out-story
+
+Recent ADRs:
+  - ADR-NNNN — <title> (<date>)
+
+Last week (from MONITOR):
+  - <copy the most recent revision-history line>
+
+Suggested hat: <Dev | PM | QA | Founder>
+Suggested next step: <one specific action>
+```
+
+## Output rules
+
+- ≤ 25 lines total. This is orientation, not a status report.
+- Do not modify any artefact during session-start — read-only.
+- If the resolved ACTIVE / monitor file is empty (or none exists), say so and recommend the user pull a Ready story.
+- If the resolved monitor file hasn't been updated in > 7 days, flag it.
+- **MONITOR is the tiebreaker over the fan-out story scan.** If the step-4 `Explore` story scan disagrees with the resolved monitor file (different shipped/blocked/in-progress counts, missing a story the monitor lists as blocked, or status values that don't match the project's enum) AND the monitor was updated within 7 days, treat the **monitor as authoritative**, surface the discrepancy in one line, and base the "Blocked / In-progress" lists on the monitor. A broad fan-out scan can silently undercount or sample a subset; the monitor is the maintained source of truth.
+- If the repo's folder layout differs from the canonical scaffold, name the resolved paths in a short note at the top of the output (one line, e.g. "Layout: 00-Monitor + 03-Stories + 06-ADR (flattened variant)") so subsequent skills + the user know what's actually being read.
+
+## Non-negotiable rules from CLAUDE.md
+
+- Subagent delegation (SOP §18) for multi-file scans.
+- Status enum — never invent values when summarising.
+- Do not regenerate the dashboard at session start (that's the Stop hook's job).
+
+## Join this chat to the conversation Mode
+
+This chat opts in to the project's global Mode as part of session start:
+
+1. Get the current state and join:
+   `node _00-Project-Management/93-Scripts/mode.js join --session <session_id>`
+   `node _00-Project-Management/93-Scripts/mode.js get --json`
+2. Lead the session announcement with the active mode, e.g.
+   *"Tandem mode: **DEV** (set by you, 2026-06-03). I'll nudge on planning requests."*
+   If mode is `neutral`, say so and note the user can set one with `/mode <plan|dev|dual|neutral>`.
+
+Use the session ID from the session context as `<session_id>`.
+
+## End-of-session-start
+
+Always end with a single concrete suggested next action — not a menu. The user can override; the default should be obvious.
diff --git a/plugins/tandem/skills/split-into-features/SKILL.md b/plugins/tandem/skills/split-into-features/SKILL.md
new file mode 100644
index 0000000..77a234b
--- /dev/null
+++ b/plugins/tandem/skills/split-into-features/SKILL.md
@@ -0,0 +1,74 @@
+---
+name: split-into-features
+description: Split an EPIC into Feature files. Use when the user asks to split / decompose / break down an epic into features, asks to create features under an epic, or invokes /Tandem:split-into-features. Operates as PM hat. Reads an Epic file and writes FEAT-NN.M-<slug>.md files in 31-Features/EPIC-NN/, then updates the Epic's Features section with relative links.
+---
+
+# Tandem: split-into-features (PM hat)
+
+Operate as **PM hat**. The user has an approved Epic and needs to decompose it into Features that can each ship in 1–2 weeks of solo work.
+
+This skill is the slash-command wrapper for the kit's canonical feature-decomposition prompt. The prompt is the source of truth; this file is the entry point.
+
+## Source of truth
+
+@_00-Project-Management/92-Prompts/03-split-epic-into-features.md
+
+Follow that prompt verbatim. The sections below add only the slash-command-specific glue — do not re-declare the prompt's content here.
+
+## Inputs needed
+
+- Path to the source Epic file under the `epics` folder (resolve via the path map: `node _00-Project-Management/93-Scripts/lib/pm-paths.js resolve epics`; e.g. `_00-Project-Management/01-EPIC/EPIC-NN-<slug>.md`).
+- If the user didn't supply one, ask: "Which Epic file should I decompose? Paste the path or the EPIC-NN id."
+
+## Load into context
+
+Use `Read` / `Glob` to detect existence. Treat missing files as "not present" rather than throwing. Folder locations are resolved through the path map (`pm-paths.js` / `pm-paths.json`) rather than hardcoded.
+
+- **Source Epic** — at the resolved path above. Read it fully — especially the `## Features` outline (high-level titles the Epic already proposed) and `## In scope` / `## Out of scope`.
+- **SOP** — `_00-Project-Management/90-Standards/SOP.md` for DoR + estimation rules.
+- **Project context** — `_00-Project-Management/90-Standards/PROJECT-CONTEXT.md`.
+- **Feature template** — resolve the `templates` folder via the path map and read `FEATURE.template.md`. Use verbatim — do not redraft section headings from memory.
+- **Existing Features under this Epic** — resolve the `features` folder via the path map and glob `EPIC-NN/FEAT-NN.M-*.md` to find the next-free `M` (scan for max M, increment by 1). Create the `EPIC-NN/` subfolder if it doesn't exist yet.
+- **Project root `CLAUDE.md`** — for project-specific overrides.
+
+## Task
+
+1. Read the source Epic's `## Features` outline. Each high-level title becomes one FEAT file (unless the user wants a different split — confirm split count up front to avoid under-decomposition).
+2. For each feature, find next-free `FEAT-NN.M` by scanning the target `EPIC-NN/` folder within the resolved `features` folder.
+3. Write each Feature file under the resolved `features` folder, organized as `EPIC-NN/FEAT-NN.M-<slug>.md`, using `FEATURE.template.md` verbatim. Number sequentially within the Epic (.1, .2, .3 …).
+   - **Dispatch `write-outcomes` for each Feature:** After writing each FEAT file, spawn a sub-agent with the `write-outcomes` skill, passing the Feature's title and technical body. Write the returned single line (plain text, no markdown) into the Feature's `outcome:` frontmatter field.
+4. Fill every section:
+   - **Goal** — the slice of the Epic this delivers.
+   - **User value** — one sentence on UX improvement.
+   - **Scope** — bulleted breakdown.
+   - **Acceptance criteria** — testable checkboxes. Each AC must map to ≥1 story later (don't write the stories — `/Tandem:split-into-stories` does that).
+   - **Dependencies, Data touched, Risks.**
+5. Update the Epic's `## Features` section with relative links to the new FEAT files (relative from the resolved `epics` folder to the resolved `features` folder, e.g. `../31-Features/EPIC-NN/FEAT-NN.M-<slug>.md` in a canonical layout).
+6. Set frontmatter on every Feature: `status: not-started`, `created_at: <ISO 8601 now from system clock>`, other timestamp fields empty strings, `epic: EPIC-NN`.
+7. **Show the file tree of what you'll create before writing.** Wait for user approval.
+
+## Output rules
+
+- If a Feature feels > L estimate (1–2 weeks), propose splitting into two Features before writing.
+- If two Features overlap heavily, propose merging.
+- Under-decomposition risk: if you produce only 1 Feature when the Epic warrants several, **stop and confirm the split count with the user** before writing.
+- Do NOT create STORY files — that's the next phase.
+
+## Non-negotiable rules from CLAUDE.md
+
+- Frontmatter timestamps (quoted ISO 8601 with offset, from system clock).
+- Status enum (`not-started` on creation).
+- Templates rule — use `FEATURE.template.md` verbatim.
+- Epic linkage — every Feature's frontmatter `epic:` must point to the source Epic.
+
+## End-of-session summary (always emit)
+
+- Files written: list of Feature file paths under the resolved `features` folder (e.g. `_00-Project-Management/31-Features/EPIC-NN/FEAT-NN.M-<slug>.md`)
+- Total features: N
+- Estimated total: <weeks>
+- Suggested execution order: which to start with and why
+- Epic `## Features` section updated: yes / no
+
+## Next command
+
+Next: `/Tandem:split-into-stories` — decompose each Feature into Stories + paired Testplans.
diff --git a/plugins/tandem/skills/split-into-stories/SKILL.md b/plugins/tandem/skills/split-into-stories/SKILL.md
new file mode 100644
index 0000000..46f8a55
--- /dev/null
+++ b/plugins/tandem/skills/split-into-stories/SKILL.md
@@ -0,0 +1,97 @@
+---
+name: split-into-stories
+description: Split a Feature into Stories AND paired Testplans in the same response. Use when the user asks to split / decompose a feature into stories, asks to write stories for a feature, asks to create stories + testplans, or invokes /Tandem:split-into-stories. Operates as PM hat. Enforces the kit's MANDATORY Story → Testplan pairing rule — aborts (writes nothing) if it cannot produce both story and matching testplan in the same response.
+---
+
+# Tandem: split-into-stories (PM hat)
+
+Operate as **PM hat**. The user has an approved Feature and needs to decompose it into work-sized Stories. Each Story ships with its paired Testplan in the **same response** — this is the kit's most load-bearing rule.
+
+This skill is the slash-command wrapper for the kit's canonical story-decomposition prompt. The prompt is the source of truth; this file is the entry point.
+
+## Source of truth
+
+@_00-Project-Management/92-Prompts/04-split-feature-into-stories.md
+
+Follow that prompt verbatim. The sections below add only the slash-command-specific glue + the paired-testplan enforcement contract — do not re-declare the prompt's content here.
+
+## Paired-testplan enforcement (MANDATORY)
+
+Per SOP §11 — "Story → Testplan pairing — MANDATORY" — every Story file under the resolved `stories` folder (via `node _00-Project-Management/93-Scripts/lib/pm-paths.js resolve stories`), organized as `EPIC-NN/FEAT-NN.M/STORY-NN.M.PP-*.md`, requires a paired Testplan in the resolved `testplans` folder, organized as `EPIC-NN/FEAT-NN.M/TESTPLAN-NN.M.PP-<slug>.md`, written **in the same response**. Folder locations are resolved through the path map (`pm-paths.json`) rather than hardcoded.
+
+This skill enforces the rule structurally, not as an honour-system convention:
+
+1. Draft **both** the Story and its Testplan in memory first — every AC, every TC, every runnable `Command:`.
+2. Only when both drafts are complete and consistent (every AC maps to ≥1 TC, every TC has a real runnable command), **commit both files to disk in the same response**.
+3. If for ANY reason the paired Testplan cannot be drafted (AC is not machine-testable, command convention unknown, etc.), **abort — do not write the Story file**. Tell the user which AC blocked the pairing and ask them to either rewrite the AC to be testable or accept dropping it. Do not silently leave an orphan Story.
+4. Story + Testplan numbering is lockstep: `STORY-NN.M.PP` ↔ `TESTPLAN-NN.M.PP` for the same `PP`. The skill picks `PP` once and uses it for both files.
+
+This contract IS the differentiator vs. the plain paste-prompt. The prompt advises pairing; this skill rejects the violation.
+
+## Inputs needed
+
+- Path to the source Feature file under the `features` folder (resolve via the path map: `node _00-Project-Management/93-Scripts/lib/pm-paths.js resolve features`; e.g. `_00-Project-Management/31-Features/EPIC-NN/FEAT-NN.M-<slug>.md`).
+- If the user didn't supply one, ask: "Which Feature file should I decompose? Paste the path or the FEAT-NN.M id."
+
+## Load into context
+
+Use `Read` / `Glob` to detect existence. Treat missing files as "not present" rather than throwing. Folder locations are resolved through the path map (`pm-paths.js` / `pm-paths.json`) rather than hardcoded.
+
+- **Source Feature** — at the resolved path above. Read fully — especially the `## Acceptance criteria` checklist (each criterion ~ 1 story).
+- **Parent Epic** — resolve the `epics` folder via the path map and read `EPIC-NN-*.md` for upstream context.
+- **SOP** — `_00-Project-Management/90-Standards/SOP.md` for DoR, estimation, status enum, frontmatter contract.
+- **Project context** — `_00-Project-Management/90-Standards/PROJECT-CONTEXT.md` for runnable test command conventions (which test runner, what ports, how to invoke).
+- **Story + Testplan templates** — resolve the `templates` folder via the path map and read `STORY.template.md` + `TESTPLAN.template.md`. Use both verbatim — do not redraft section headings from memory.
+- **Existing Stories under this Feature** — resolve the `stories` folder via the path map and glob `EPIC-NN/FEAT-NN.M/STORY-NN.M.PP-*.md` to find next-free `PP` (zero-padded to 2 digits). Same `PP` is used for the paired Testplan. Create both subfolders if missing.
+- **Project root `CLAUDE.md`** — for project-specific overrides.
+
+## Task
+
+1. Read the source Feature's `## Acceptance criteria`. Each criterion is typically one Story. If a criterion implies > L work, split into multiple Stories.
+2. For each derived story, **draft both files in memory first** (Story + Testplan). Do not write to disk until both drafts are complete and consistent.
+3. Story file content:
+   - Use `STORY.template.md` verbatim.
+   - Fill `As / I want / So that`, AC checkboxes, technical notes, dependencies, references.
+   - Frontmatter: `status: not-started`, `created_at: <ISO 8601 now>`, other timestamps empty, `epic: EPIC-NN`, `feature: FEAT-NN.M`, `id: STORY-NN.M.PP`, `ai_review: pending`.
+   - Set `type_of_work:` to a concrete discipline (`frontend` / `backend` / `infra` / `data` / `docs`) — **never leave the template placeholder** (it's a DoR gate per SOP §6 and fuels sub-agent assignment). Add `suggested_agents: [agent, …]` only when a specific specialist clearly fits — it overrides the PROJECT-CONTEXT `type_of_work → sub-agent` map (SOP §11.3 / FEAT-03.1). Most stories need only `type_of_work`.
+4. Paired Testplan file content:
+   - Use `TESTPLAN.template.md` verbatim.
+   - One row in the AC→TC map per Story AC, ≥1 TC per AC.
+   - Every TC has a runnable `Command:` (no manual steps, no "have a human verify"). Use commands appropriate to the stack (see PROJECT-CONTEXT.md).
+   - **Never assert by grepping the whole `DASHBOARD.html` for a substring that could appear in indexed artefact prose.** The generator embeds every artefact's `bodyHtml` into the `window.__DATA` payload, so a bare `grep <substring> DASHBOARD.html` is confounded — the substring almost always also appears in some artefact's prose (including the very story/testplan that *discusses* the feature under test). This produced three false results (BUG-20260606-02, TESTPLAN-15.2.02, TESTPLAN-15.2.04). Instead: **(a)** parse the `window.__DATA` payload and assert the relevant array/field, **(b)** assert a **specific rendered element** (e.g. `grep '<h1 class="app-title">'`), or **(c)** use a sentinel that cannot appear in any artefact body. Extraction one-liner for (a): `node -e 'const D=JSON.parse(require("fs").readFileSync("…/DASHBOARD.html","utf8").match(/window\.__DATA = (\{[\s\S]*?\});<\/script>/)[1]); /* assert on D */'`.
+   - Frontmatter: `id: TESTPLAN-NN.M.PP`, `story: STORY-NN.M.PP`, `feature: FEAT-NN.M`, `epic: EPIC-NN`, `status: not-started`, `created_at: <ISO 8601 now>`.
+5. **Validate before writing**: every AC in the draft Story is covered by ≥1 TC in the draft Testplan; every TC has a real `Command:`. If validation fails, **abort** per the enforcement contract above.
+6. Commit both files to disk in the same response. Number stories sequentially within the Feature (.01, .02, .03 …).
+   - As each Story file is written, dispatch the `write-outcomes` skill via a sub-agent, passing the story's title, acceptance criteria, and technical content. Write the returned single-line outcome (verbatim, no markdown) into that Story's `outcome:` frontmatter field — in the same response.
+7. Update the Feature's `## Stories` section with relative links to the new Story files.
+8. **Show the file tree of what you'll create before writing.** Wait for user approval.
+
+## Output rules
+
+- If any AC is not machine-testable, **stop and flag**: "AC-3 'looks good' is not testable. Rewrite as e.g. 'matches mockup screenshot within 5% pixel delta', or accept dropping it." Do not invent a fake TC to hide the gap.
+- If a Story estimates to XL, propose splitting before writing.
+- If a Story has > 5 ACs, propose splitting (AC explosion → TC bloat).
+- Do **not** mark any Story `ready` — that requires DoR gate via a separate refine step.
+
+## Non-negotiable rules from CLAUDE.md
+
+- Frontmatter timestamps (quoted ISO 8601 with offset, from system clock).
+- Status enum (`not-started` on creation).
+- **Story → Testplan pairing — MANDATORY** (enforced above; reject violations, don't bend).
+- Templates rule — use `STORY.template.md` and `TESTPLAN.template.md` verbatim.
+- Project-specific test conventions from `PROJECT-CONTEXT.md` (ports, commands, etc.).
+
+## End-of-session summary (always emit)
+
+- Stories created: N (list paths)
+- Testplans created: N (list paths)
+- Pairing verified: yes / no (must be yes — otherwise the writes shouldn't have happened)
+- ACs flagged as not machine-testable: list, or "none"
+- Estimated total: <days/weeks>
+- Suggested first story to pull: STORY-NN.M.PP (lowest dependency, smallest)
+
+## Next command
+
+Next: `/Tandem:refine-backlog` — promote selected stories `not-started` → `ready` after DoR gate.
+
+Or, if a story is obviously DoR-clean already: `/Tandem:execute-story <story-path>` — start work directly.
diff --git a/plugins/tandem/skills/start-phase/SKILL.md b/plugins/tandem/skills/start-phase/SKILL.md
new file mode 100644
index 0000000..f5bf782
--- /dev/null
+++ b/plugins/tandem/skills/start-phase/SKILL.md
@@ -0,0 +1,110 @@
+---
+name: start-phase
+description: Open a phase — the opener counterpart to close-phase. Use when the user asks to start a phase, open a phase, begin a phase, kick off a phase / batch / chat, or cut the phase branch before an execute-batch run. Resolves an explicit phase target, gates the entry state (clean working tree + currently on main + every phase story DoR-ready), then cuts the phase branch off main and records the open on the board.
+---
+
+# Tandem: start-phase (PM hat)
+
+Operate as **PM hat**. `start-phase` is the **opener counterpart to `close-phase`**: where
+`close-phase` is the per-phase Definition-of-Done gate that wraps a finished phase up and merges it
+to `main`, `start-phase` is its mirror at the front — it **opens** a phase safely before an
+`execution-strategist` chat / `execute-batch` run begins. The two skills share one phase-branch
+convention (`40-Decisions/`, ADR-0045): `start-phase` **cuts** `phase/<phase-id>` off `main`,
+`close-phase` later **merges** that same branch back to `main`.
+
+It runs in a fixed order, each step gated on the one before it:
+
+1. **Phase-scope detection** — resolve the set of stories in an explicit target, and echo them.
+2. **Entry gate** — clean working tree + currently on `main` + every phase story DoR-ready, or
+   **abort and list** the specific gap.
+3. **Branch creation** — cut `phase/<phase-id>` off `main` per the convention; surface the
+   checkout command (never force, no `gh` assumed).
+4. **Board update** — record the open on `MONITOR.md` + `ACTIVE.md` and regenerate the dashboard.
+
+> **Gate-then-act.** Step 1 only reads; Step 2 is a hard gate; once it passes, Steps 3–4 act. If the
+> entry gate fails, `start-phase` **stops and reports the failing item** — it never branches from a
+> dirty tree, off the wrong base, or for a phase whose stories aren't ready. This mirrors
+> `close-phase`'s done-gate discipline, inverted for entry: the abort-and-list path is the
+> load-bearing behaviour.
+
+## Step 1 — Phase-scope detection (EXPLICIT target — never guess)
+
+Take an **explicit** phase / chat / epic **target** from the user — never infer which phase to
+open from ambient state (mirrors `close-phase` Step 1). Accepted targets:
+
+- a **strategist phase** or **chat id** (e.g. `CHAT-01`) from an `EXECUTION-STRATEGY-*.json`
+  sidecar — resolve to the `stories[]` listed under that chat / phase;
+- an **`EPIC-NN`** (or a single **`FEAT-NN.M`**) — resolve the **set of stories** belonging to that
+  epic / feature by globbing `32-Stories/EPIC-NN/...`.
+
+**Resolve the set of stories** in the target, then **echo the resolved list** (each `id` +
+`status`) back to the user so the scope is explicit and reviewable before anything is created. If
+the target is **ambiguous, missing, or zero-story**, **stop and ask** for a concrete
+phase / chat / epic — do not guess which stories are in scope. A target that resolves to no
+stories is treated as a missing target: abort rather than open an empty phase.
+
+## Step 2 — Entry gate (abort-and-list — never open from a bad state)
+
+Before creating anything, verify **all three** entry conditions. This is a hard gate:
+
+- **Clean working tree** — `git status --porcelain` is empty (no uncommitted changes); a phase
+  must not be opened on top of unrelated dirty edits.
+- **Currently on `main`** — the phase branch is cut from `main` (ADR-0045), so the entry branch
+  must be `main` (`git branch --show-current` → `main`). Opening from another branch would give
+  the phase branch the wrong base.
+- **Every phase story DoR-ready** — each resolved story carries `status: ready` in its frontmatter
+  (the kit's Definition-of-Ready gate). A `not-started` / un-refined story must go through
+  `/Tandem:refine-backlog` first.
+
+If **all three** pass → proceed (Step 3, branch creation). If **any** fails → **abort and list the
+specific gap** — name exactly which item failed (the dirty paths, the current branch if not
+`main`, or each not-ready story `id` + its `status`) so the operator knows precisely what to fix.
+`start-phase` never opens a phase from a dirty tree, off a non-`main` base, or with stories that
+aren't ready; a half-opened phase is worse than an un-opened one.
+
+## Step 3 — Create the phase branch off `main` (surface the command — never force)
+
+Once the entry gate passes, create the phase branch per the shared convention (`40-Decisions/`,
+ADR-0045): name **`phase/<phase-id>`**, **cut from `main`**.
+
+- **Branch off `main`.** The phase branch is created **from `main`** (the entry gate already
+  confirmed you're on `main`), giving it the clean base `close-phase` will later merge back.
+  `<phase-id>` is the explicit target's id (e.g. `phase/chat-01`, `phase/epic-10`), lowercased,
+  exactly per the convention ADR-0045.
+- **Surface a copy-pasteable command — never force, no `gh` assumed.** `start-phase` **surfaces**
+  the branch-create command for the operator rather than force-creating it:
+
+  ```bash
+  git switch -c phase/<phase-id> main      # equivalently: git checkout -b phase/<phase-id> main
+  ```
+
+  The command is **never a force operation** — `start-phase` **never force-creates** or clobbers a
+  branch (no `--force`, no `-B` reset), and it assumes **no `gh` CLI** (a plain `git` command /
+  link is surfaced, not a host API call). This mirrors `close-phase` Step 7 — surface the command,
+  never force — inverted for branch creation. If a `phase/<phase-id>` branch already exists, **stop
+  and surface that** rather than overwrite it.
+
+## Step 4 — Record the open on the board (MONITOR + ACTIVE) + regenerate the dashboard
+
+With the phase branch created, record that the phase is now **open** so the live board reflects it
+the moment it opens — the front-end mirror of `close-phase` Step 5 (which records the close):
+
+- **MONITOR** — update `42-Monitor/MONITOR.md`: note the phase's stories are now **in-flight**,
+  plus a one-line **revision-history** entry dated today recording that the phase opened.
+- **ACTIVE** — add the now-in-progress story pointers to `12-Active/ACTIVE.md` (the live WIP index
+  of `in-progress` items), per the kit's "when you change a status" rule — the moment the phase
+  opens, its stories appear on the board.
+- **Dashboard** — regenerate it with `npm run pm:dash` so `42-Monitor/DASHBOARD.html` reflects the
+  opened phase. The MONITOR/ACTIVE edits and the dashboard regen happen in the same step (the kit's
+  atomic status-change rule).
+
+## Non-negotiable rules (from CLAUDE.md)
+
+- Operates as **PM hat**; the phase-level opener analogue of `close-phase`.
+- Scope detection takes an **explicit** target (never guesses); the **entry gate** (Step 2) is a
+  hard gate — abort and list the failing item, never open from a bad entry state.
+- Branch creation (Step 3) **surfaces** the `git switch -c … main` command and **never force**s,
+  assuming **no `gh`** — per the shared phase-branch convention (ADR-0045) and the project root
+  `CLAUDE.md` git rules (branch first; no force without authorisation).
+- Board update (Step 4) follows the kit's "when you change a status" rule — the `MONITOR.md` +
+  `ACTIVE.md` edits and the `npm run pm:dash` regen happen together when the phase opens.
diff --git a/plugins/tandem/skills/update/SKILL.md b/plugins/tandem/skills/update/SKILL.md
new file mode 100644
index 0000000..c10dff1
--- /dev/null
+++ b/plugins/tandem/skills/update/SKILL.md
@@ -0,0 +1,42 @@
+---
+name: update
+description: Pull Tandem kit improvements into an already-installed project without touching your work — refresh only kit-owned files (templates, standards, tooling) + regenerate the dashboard, never the operator's artefacts or folder structure. Use when the user asks to update Tandem, upgrade the PM kit, pull kit changes, refresh tooling, or invokes /Tandem:update. Thin wrapper over the canonical update.js (pm:update); shows the change diff and confirms before applying.
+---
+
+# Tandem: update (non-destructive refresh)
+
+Refresh an already-installed project's kit to the current version **without ever touching the
+operator's work**. This skill is the entry point; the deterministic, safety-critical work lives in
+the canonical `update.js` script (`pm:update`).
+
+## Source of truth
+`_00-Project-Management/93-Scripts/update.js` (run as `npm run pm:update`). See ADR-0073 (update is
+manually triggered; `pm:doctor` only reports drift) and ADR-0072 (kit/user ownership boundary).
+
+## The non-destructive contract (load-bearing)
+- Refreshes **only** `ownership:kit` content — manifest kit seed files (templates, standards, the
+  PM-folder CLAUDE.md) + the `93-Scripts/` tooling.
+- **Never** writes an `ownership:user` path (PROJECT-CONTEXT.md, MONITOR.md, ACTIVE.md, settings.json)
+  or any work folder (epics/stories/…) — they stay byte-identical.
+- **Never** creates, removes, or moves a folder — the folder set is unchanged.
+
+## How to run it
+- Preview the change diff first (recommended): `node _00-Project-Management/93-Scripts/update.js --dry-run`
+- Apply: `npm run pm:update` (or `--target <dir>` for another project root)
+- Check for drift first: `npm run pm:doctor` prints `update available` when the installed
+  `kitVersion` is behind the shipped one (read-only — it writes nothing).
+
+## Conversational steps this skill owns
+- **Show the diff** — run `--dry-run` and present the list of kit files that *would* refresh + the
+  `kitVersion` bump, so the operator sees exactly what changes.
+- **Confirm before applying** — only run the real `pm:update` after the operator confirms the diff.
+- **Report** — after applying, surface the count of refreshed files, the new `kitVersion`, and the
+  regenerated dashboard path.
+
+## Non-negotiable rules
+- Adds **no** destructive behaviour beyond `update.js`. Never deletes or moves the operator's work.
+- Keep deterministic logic in the script — the skill only previews (diff), confirms, and reports.
+
+## Next
+Next: `/Tandem:session-start` to re-orient after a refresh, or `/Tandem:weekly-monitor` to fold the
+change into the Friday cadence.
diff --git a/plugins/tandem/skills/weekly-monitor/SKILL.md b/plugins/tandem/skills/weekly-monitor/SKILL.md
new file mode 100644
index 0000000..264cf60
--- /dev/null
+++ b/plugins/tandem/skills/weekly-monitor/SKILL.md
@@ -0,0 +1,73 @@
+---
+name: weekly-monitor
+description: Weekly MONITOR update (Friday cadence). Use when the user asks for a weekly review, end-of-week summary, monitor update, or weekly cadence. Computes the 7-day delta, updates MONITOR.md with revision history, flags stalled/blocked stories, runs backlog hygiene, regenerates the dashboard.
+---
+
+# Tandem: weekly-monitor (PM hat)
+
+Operate as **PM hat**. Friday 30-minute weekly review.
+
+## Load into context
+
+- `_00-Project-Management/42-Monitor/MONITOR.md` (current state)
+- `_00-Project-Management/12-Active/ACTIVE.md`
+- Last 7 days of activity — scan which files under `30-Epics/`, `31-Features/`, `32-Stories/`, `34-Bugs/`, `40-Decisions/` have `completed_at` or `started_at` within the last 7 days. For a multi-folder scan, delegate to an Explore agent (SOP §18) and ingest the summary, not the raw paths.
+- `_00-Project-Management/14-Retros/` (most recent retro's "One change" — did it happen this week?)
+- `_00-Project-Management/90-Standards/SOP.md`
+
+## Task
+
+### 1. Compute the weekly delta
+
+- Stories shipped (status → `done` in the last 7 days)
+- Stories started (status → `in-progress` in the last 7 days)
+- Stories currently `blocked`
+- BUGs filed
+- BUGs fixed
+- ADRs created
+
+### 2. Update `42-Monitor/MONITOR.md`
+
+- Per-epic and per-feature counts (shipped / total).
+- Progress bars (if used).
+- "Last updated" date.
+- Prepend a revision-history entry dated today, summarising the week in 3-5 lines.
+
+### 3. Audit currents
+
+- Any story `in-progress` for > 5 days? Flag as stall risk.
+- Any story `blocked` for > 5 days? Escalate — Founder hat decision needed?
+- Any story `in-review` for > 3 days? Push to close-out (`/Tandem:close-out-story`).
+
+### 4. Backlog hygiene
+
+- Anything in `not-started` for > 90 days? Propose `wontfix` or `archived` (SOP §15 sunset rule).
+- Inbox count > 20? Propose a quick triage pass.
+
+### 5. Dashboard regen
+
+- Run `npm run pm:all` (validator + dashboard together).
+- Confirm `42-Monitor/DASHBOARD.html` reflects the week's changes.
+- If validator fails, fix the violations BEFORE the dashboard run.
+- If the `Stop` hook is active, the dashboard will also regen at session end — but running it explicitly here is cheap insurance.
+
+## Output rules
+
+- Single revision-history entry per week, prepended to MONITOR.
+- Format: `**YYYY-MM-DD — week summary.** <3-5 lines>.`
+- Be specific. Not "shipped some stories", but "shipped STORY-01.2.07 + STORY-01.3.01, closing FEAT-01.2 except for the sortable-headers AC".
+- Flag carry-forwards: "carrying STORY-02.1.04 into next week — blocked on ADR-0012."
+
+## Non-negotiable rules from CLAUDE.md
+
+- Frontmatter timestamps — do NOT modify any artefact's timestamps as part of this update. Only MONITOR's "Last updated" line changes.
+- Status enum — use canonical statuses in your summary, never synonyms.
+- Dashboard regen at end of update.
+
+## End-of-update summary (always emit)
+
+- Shipped this week: N
+- Stalled: <list of stories in-progress > 5 days>
+- Blocked: <list with reasons>
+- Carry-forward to next week: <list>
+- Suggestion for Founder hat: <if any strategic drift detected>
diff --git a/plugins/tandem/skills/write-outcomes/SKILL.md b/plugins/tandem/skills/write-outcomes/SKILL.md
new file mode 100644
index 0000000..25d7f83
--- /dev/null
+++ b/plugins/tandem/skills/write-outcomes/SKILL.md
@@ -0,0 +1,98 @@
+---
+name: write-outcomes
+description: Dispatched by producer skills via a sub-agent to turn an artefact's technical content into one plain-English founder-outcome line; it is not a manually-invoked lifecycle command.
+---
+
+# Tandem: write-outcomes (dispatch-only)
+
+This skill is **dispatch-only**. Producer skills (via FEAT-14.3 wirings) spawn a sub-agent, hand it an artefact's technical scope, and this skill transforms it into a single plain-English outcome line for founder-facing communication. A human never invokes this directly; it is not part of the lifecycle command chain.
+
+An "outcome line" is the what-you-can-now-do summary: what a founder will *have* or *be able to do* after this artefact ships. It strips internals and surfaces value.
+
+## Voice
+
+The outcome voice is one sentence, plain English, second-person or capability-framed. Hard bans (literal — keep this phrasing intentional and verbatim; TESTPLAN-14.1.01 TC-02 asserts these markers): **no internal IDs**, **no command names**, **no shell**, no jargon. Apply these strict rules:
+
+- **DO:** Write what the founder will have or be able to do (`You can now…`, `You have…`, or `Teams can…`).
+- **DON'T:** Use internal IDs (e.g., PROJ-12, RFC-44, TICKET-318).
+- **DON'T:** Name commands or implementation (e.g., slash-commands, build scripts, CLI flags).
+- **DON'T:** Include shell syntax, jargon, or tool names (e.g., "webhook payload," "API v3.2").
+- **DO:** Keep it under ~20 words — one line, scannable.
+
+## Template
+
+```
+You can now <capability> — <the value it unlocks>.
+```
+
+Or: `You have <artefact> enabling <outcome>.` Adapt as needed; keep it a single line.
+
+## Examples
+
+Each pair is labelled by the artefact type whose technical body the producer hands in. The *Bad* lines use invented placeholders (a fictional product, made-up `PROJ-`/`TICKET-`/`RFC-` ids) — they illustrate the failure mode without referencing this kit's real internals.
+
+**Story:**
+
+**Good:** You can now auto-generate dashboards from your requirements — ship faster without wiring widgets by hand.
+
+**Bad:** PROJ-12 implements the dashboard-generator workflow via the build step (see RFC-44, TICKET-318).
+*Why bad:* Invented internal IDs (PROJ-12, RFC-44, TICKET-318) and build jargon leak; no founder value.
+
+---
+
+**Feature:**
+
+**Good:** Your team can spin up a release pipeline that scrubs gated content before publishing.
+
+**Bad:** Run the publish build on the release branch, then execute the gated workflow per the FEAT-9 wiring.
+*Why bad:* Command/branch references and an invented feature ID (FEAT-9) assume technical knowledge.
+
+---
+
+**Chat:**
+
+**Good:** You can bulk-run a folder of work items in dependency order and track the full cycle to completion.
+
+**Bad:** Batch-execute a work folder via the folder pointer + sub-agent fan-out in dependency order (TICKET-91).
+*Why bad:* Mechanism jargon (batch-execute, folder pointer, sub-agents) and an invented id (TICKET-91) leak.
+
+---
+
+**Phase:**
+
+**Good:** Documentation now stays accurate by testing that every example still works.
+
+**Bad:** The docs-automation feature bundles four tickets into a scripts-plus-skill design for auto-managing the layer.
+*Why bad:* Internal jargon (docs-automation feature, scripts-plus-skill, auto-managing) with no founder-facing outcome.
+
+---
+
+**PRD:**
+
+**Good:** Your monitoring app now updates its alert rules without rebuilding the entire codebase.
+
+**Bad:** Refactored the alert-rules module to hot-reload config per the Q3 polish milestone (see RFC-90).
+*Why bad:* Mechanism leak (refactored, hot-reload config) and an invented decision id (RFC-90) assume background knowledge.
+
+---
+
+**Story:**
+
+**Good:** You can deploy changes across all your sites without manually syncing dashboard copies.
+
+**Bad:** Dashboard-generator dev/sync rules via symlinks and copy-drift gotchas per the reference-fork integration.
+*Why bad:* Mechanism details (symlinks, copy-drift gotchas, dev/sync rules) and tooling jargon; no founder value statement.
+
+## Input / Output Contract
+
+**INPUT:** The artefact's technical content — title, acceptance criteria, scope, technical notes — passed by the producer skill.
+
+**OUTPUT:** Exactly one single line of plain text, no markdown formatting, no leading label (no "Outcome:" prefix), no surrounding quotes. Length-bounded to ~20 words (at most 120 characters — ≤120 char). This bound is agent-enforced (advisory); it is not linted by `pm:lint` and will not cause a build failure. The producer persists this verbatim to the artefact's outcome field.
+
+**Thin / empty input → return an empty line.** When the input describes no user-visible capability yet — a stub, empty or placeholder acceptance criteria, scaffolding-only scope, or a TODO with nothing shipped — return an empty line (no text). The producer then leaves the outcome field blank rather than inventing one. Never fabricate value the artefact does not yet deliver: a blank outcome is correct and honest; a plausible-sounding but unearned promise is not. This keeps the five FEAT-14.3 producers consistent — none should synthesise an outcome from an artefact that has no shipped capability to describe.
+
+## How Producers Dispatch This
+
+A producer skill (e.g., the FEAT-14.3 wirings for dashboard-generator or CLAUDE.md automation) spawns a sub-agent, hands it the artefact's technical body + this skill file, and receives the one-line outcome back to store. The sub-agent runs this skill in isolation, not as part of the lifecycle chain. The outcome line is persisted by the caller, not by this skill.
+
+This skill is dispatch-only and does not execute in the main lifecycle.