From 1d3ef1a846102a8c68ae5640b347f7c09418ae13 Mon Sep 17 00:00:00 2001
From: Constantinos <41453723+constk@users.noreply.github.com>
Date: Thu, 30 Apr 2026 02:03:29 +1000
Subject: [PATCH 1/9] =?UTF-8?q?chore:=20container=20hardening=20=E2=80=94?=
 =?UTF-8?q?=20read-only=20root=20FS=20+=20tmpfs=20/tmp=20+=20Python=20env?=
 =?UTF-8?q?=20(#119,=20#120,=20#170)=20(#68)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Dockerfile         | 9 ++++++++-
 docker-compose.yml | 9 +++++++++
 docs/SECURITY.md   | 8 ++++++++
 3 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index d5135d8..0bdf8f7 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -53,7 +53,14 @@ COPY --chown=app:app src/ src/
 USER app
 
 # Put the venv on PATH so `uvicorn` resolves without `uv run` indirection.
-ENV PATH="/app/.venv/bin:${PATH}"
+# PYTHONDONTWRITEBYTECODE=1 stops Python from attempting `.pyc` writes under
+# `__pycache__/` on cold start — they would EROFS-fail under the read-only
+# root FS configured in docker-compose.yml. PYTHONUNBUFFERED=1 keeps
+# uvicorn's stdout from being held behind line-buffering when running under
+# non-TTY container stdio.
+ENV PATH="/app/.venv/bin:${PATH}" \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1
 
 EXPOSE 8000
 
diff --git a/docker-compose.yml b/docker-compose.yml
index 55fb5ce..9224151 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -23,6 +23,15 @@ services:
       - OTEL_EXPORTER_OTLP_ENDPOINT=http://jaeger:4317
       - OTEL_EXPORTER_OTLP_PROTOCOL=grpc
       - OTEL_SERVICE_NAME=harness-python-react
+    # Container hardening. The root FS is read-only at the kernel level so a
+    # post-exploit shell can't modify /app, persist binaries, or fill disk
+    # under the `app` user's ownership. /tmp is the only writable path —
+    # tmpfs-mounted with a 64 MB ceiling so it can't be abused as unbounded
+    # storage. Verified: `touch /app/foo` → EROFS; `touch /tmp/foo` succeeds;
+    # healthcheck reports healthy. See docs/SECURITY.md "Container Security".
+    read_only: true
+    tmpfs:
+      - /tmp:size=64m,mode=1777
 
   frontend:
     build: ./frontend
diff --git a/docs/SECURITY.md b/docs/SECURITY.md
index 416d7f4..e88a93b 100644
--- a/docs/SECURITY.md
+++ b/docs/SECURITY.md
@@ -61,8 +61,16 @@ Tag v*.*.*       ──►  release.yml: build image, push to ghcr.io, generate
 - **Builder** — runs `uv sync --frozen --no-dev`. Has uv, pip cache, build tools.
 - **Runtime** — `python:3.14-slim`, copies only `.venv` + `src/` from the builder, runs as non-root user `app`. No uv, no pip cache, no build tools, no dev deps.
 
+Runtime stage env: `PYTHONDONTWRITEBYTECODE=1` (no `.pyc` writes — would EROFS-fail under the read-only root FS) and `PYTHONUNBUFFERED=1` (uvicorn stdout flushed immediately).
+
+`docker-compose.yml`'s `app` service runs with `read_only: true` and a `tmpfs: /tmp:size=64m,mode=1777` mount. The kernel rejects writes to every path except the 64 MB tmpfs, so a post-exploit shell under the `app` user cannot modify `/app`, persist binaries, or fill the host's disk under `app`'s ownership. Verified locally: `touch /app/foo` → `Read-only file system`; `touch /tmp/foo` succeeds; healthcheck reports `healthy`.
+
 Healthcheck uses stdlib `urllib.request` so curl isn't in the image.
 
+### Distroless evaluation — deferred
+
+`gcr.io/distroless/python3-debian12` ships Python at `/usr/bin/python3` while the current builder stage materialises a venv whose `pyvenv.cfg` and interpreter symlinks reference `/usr/local/bin/python3.14` (Dockerfile comment makes this constraint explicit). Migrating requires either matching Python paths between stages (no distroless variant matches slim's `/usr/local`) or rebuilding the venv inside the runtime stage (distroless has no `pip` / `uv`). Either route adds engineering risk and operational friction (no `docker exec ... sh`) that outweighs the marginal attack-surface reduction now that read-only-FS + non-root + no-build-tools + trivy-scanning are all in place. Revisit when distroless ships a `/usr/local` variant or when the venv-in-runtime cost shrinks.
+
 ## What's intentionally out of scope (scaffold)
 
 - **WAF / DDoS** — deployment-environment concerns, not template concerns.

From ec96aecbdc56d53bc409d653e570158cabc3a707 Mon Sep 17 00:00:00 2001
From: Constantinos <41453723+constk@users.noreply.github.com>
Date: Thu, 30 Apr 2026 02:33:13 +1000
Subject: [PATCH 2/9] =?UTF-8?q?chore:=20file/function=20length=20caps=20?=
 =?UTF-8?q?=E2=80=94=20ruff=20PLR0915/PLR0912=20+=20check=5Ffile=5Flength.?=
 =?UTF-8?q?py=20(#124)=20(#69)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .github/branch-protection/develop.json |   1 +
 .github/branch-protection/main.json    |   1 +
 .github/scripts/check_file_length.py   | 114 +++++++++++++++++++++++++
 .github/workflows/ci.yml               |  14 +++
 pyproject.toml                         |  14 +++
 5 files changed, 144 insertions(+)
 create mode 100644 .github/scripts/check_file_length.py

diff --git a/.github/branch-protection/develop.json b/.github/branch-protection/develop.json
index 4412a3f..a603ec4 100644
--- a/.github/branch-protection/develop.json
+++ b/.github/branch-protection/develop.json
@@ -8,6 +8,7 @@
       "Coverage",
       "Architecture (import-linter)",
       "Pre-commit",
+      "File length",
       "Frontend Build",
       "Frontend Quality",
       "Branch-protection contexts sync",
diff --git a/.github/branch-protection/main.json b/.github/branch-protection/main.json
index d20d7b6..d476437 100644
--- a/.github/branch-protection/main.json
+++ b/.github/branch-protection/main.json
@@ -8,6 +8,7 @@
       "Coverage",
       "Architecture (import-linter)",
       "Pre-commit",
+      "File length",
       "Frontend Build",
       "Frontend Quality",
       "Branch-protection contexts sync",
diff --git a/.github/scripts/check_file_length.py b/.github/scripts/check_file_length.py
new file mode 100644
index 0000000..b9e4483
--- /dev/null
+++ b/.github/scripts/check_file_length.py
@@ -0,0 +1,114 @@
+#!/usr/bin/env python3
+"""Enforce the per-file line-count cap from `CLAUDE.md`.
+
+`CLAUDE.md` *Code standards*: *"No file over 300 lines, no function
+over ~50 lines."* The function-length half is enforced by ruff's
+`PLR0915` / `PLR0912` rules (`pyproject.toml [tool.ruff.lint].select`).
+This script enforces the file-length half.
+
+Behaviour:
+
+- Walks `src/`, `tests/`, `eval/`, `.github/scripts/` for `*.py` files.
+- For each file, counts lines (newline-terminated and final-line-without-
+  newline both count as one line).
+- Fails when any file exceeds `THRESHOLD = 300`.
+
+There is **no exemption mechanism**. Per `feedback_no_noqa`, an
+allowlist that records "current offenders with a tracker ticket" is a
+non-blocking deferral by another name — the team gets used to seeing
+the offenders listed, the tracker ticket sits open, and the rule never
+fully bites. Pre-existing offenders were refactored in #144 before this
+gate landed (six files / two functions split into helpers).
+
+If a file legitimately should not be capped (generated code, vendored
+sources), put it in a directory this script does not walk — and document
+the exemption as a structural decision in `docs/DEVELOPMENT.md`, not as
+an inline allowlist entry.
+
+Exit codes:
+  0 — every walked file is at or under `THRESHOLD`
+  1 — at least one file exceeds the cap
+  2 — script-level error (no walk-target directories at all)
+
+Usage (from repo root):
+
+    python .github/scripts/check_file_length.py
+"""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+THRESHOLD = 300
+
+# Directories walked. Each is project-owned Python code subject to the
+# `CLAUDE.md` cap. Adding a new walk root requires a code change here
+# (and a comment naming the rationale) — there is deliberately no
+# environment-variable / CLI-flag override.
+ROOTS: tuple[str, ...] = (
+    "src",
+    "tests",
+    "eval",
+    ".github/scripts",
+)
+
+
+def count_lines(path: Path) -> int:
+    """Count newline-terminated lines plus a final un-terminated line, if any."""
+    text = path.read_text(encoding="utf-8")
+    if not text:
+        return 0
+    # `splitlines()` discards a trailing empty token, mirroring `wc -l + 1`
+    # for files without a trailing newline.
+    return len(text.splitlines())
+
+
+def _normalised(path: Path) -> str:
+    """Return the path with forward slashes (Windows / POSIX parity)."""
+    return path.as_posix()
+
+
+def main() -> int:
+    walked: list[Path] = []
+    failures: list[str] = []
+
+    for root_name in ROOTS:
+        root = Path(root_name)
+        if not root.is_dir():
+            # Directory may not exist yet (e.g. eval/ in a new fork). Skip cleanly.
+            continue
+        for path in sorted(root.rglob("*.py")):
+            walked.append(path)
+            lines = count_lines(path)
+            if lines > THRESHOLD:
+                failures.append(
+                    f"::error file={_normalised(path)}::{lines} lines > "
+                    f"{THRESHOLD} (per `CLAUDE.md`). Split the file or "
+                    "refactor — there is no exemption mechanism, see the "
+                    "module docstring."
+                )
+
+    if not walked:
+        print(f"::error::no walk targets exist; checked {ROOTS!r}")
+        return 2
+
+    if failures:
+        for line in failures:
+            print(line)
+        print(
+            f"\n{len(failures)} file(s) exceed the cap. Refactor in this PR — "
+            "splitting into single-responsibility modules or extracting helpers "
+            "is the same shape #144 used for the original offenders."
+        )
+        return 1
+
+    print(
+        f"File-length audit OK — {len(walked)} file(s) checked across "
+        f"{len(ROOTS)} root(s), threshold {THRESHOLD}."
+    )
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index baad7ec..99b29e5 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -91,6 +91,20 @@ jobs:
       - run: uv sync --frozen --extra dev
       - run: uv run pre-commit run --all-files --show-diff-on-failure
 
+  file-length:
+    name: File length
+    runs-on: ubuntu-latest
+    # CLAUDE.md: "no file over 300 lines, no function over ~50 lines". Ruff
+    # PLR0915 / PLR0912 enforce the function-half (run by `Lint & Format`);
+    # this job enforces the file-half. No exemption mechanism — pre-existing
+    # offenders should be split before this job lands, not allowlisted.
+    steps:
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
+      - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065  # v5
+        with:
+          python-version: "3.14"
+      - run: python .github/scripts/check_file_length.py
+
   frontend-build:
     name: Frontend Build
     runs-on: ubuntu-latest
diff --git a/pyproject.toml b/pyproject.toml
index 1d4574a..1789d98 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -82,6 +82,14 @@ select = [
     "TCH",  # flake8-type-checking
     "S",    # flake8-bandit (security checks — SQL injection, hardcoded crypto, etc.)
     "RUF",  # ruff-specific rules
+    # PLR0915 (too-many-statements) + PLR0912 (too-many-branches) enforce the
+    # function-complexity half of CLAUDE.md *Code standards*. Limits pinned
+    # explicitly in [tool.ruff.lint.pylint] below to defend against an
+    # upstream ruff default change silently widening the cap. The file-half
+    # of the rule is enforced by .github/scripts/check_file_length.py
+    # (300-line cap, no exemption mechanism).
+    "PLR0915",
+    "PLR0912",
 ]
 
 [tool.ruff.lint.per-file-ignores]
@@ -90,6 +98,12 @@ select = [
 "tests/**" = ["S101"]  # pytest asserts are idiomatic (rewritten for rich errors)
 ".claude/hooks/**" = ["S603", "S607"]  # hook scripts intentionally run git/uv/npx from PATH
 
+[tool.ruff.lint.pylint]
+# Pinned explicitly — ruff's defaults can drift between versions. Matches
+# CLAUDE.md "no function over ~50 lines".
+max-statements = 50
+max-branches = 12
+
 [tool.ruff.lint.isort]
 known-first-party = ["src"]
 

From caf93d3543125d497e877b20de6ccb44ce74a7bb Mon Sep 17 00:00:00 2001
From: Constantinos <41453723+constk@users.noreply.github.com>
Date: Thu, 30 Apr 2026 12:40:38 +1000
Subject: [PATCH 3/9] =?UTF-8?q?chore:=20three=20CI=20gates=20=E2=80=94=20v?=
 =?UTF-8?q?ersion=20bump,=20action=20pinning,=20tests=20required=20(#122,?=
 =?UTF-8?q?=20#123,=20#125)=20(#70)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .github/branch-protection/develop.json    |   3 +
 .github/branch-protection/main.json       |   3 +
 .github/scripts/check_action_pins.py      | 245 ++++++++++++++++++++
 .github/scripts/check_tests_present.py    | 187 +++++++++++++++
 .github/scripts/check_version_bump.py     | 174 ++++++++++++++
 .github/workflows/ci.yml                  |  46 ++++
 docs/DEVELOPMENT.md                       |  22 +-
 pyproject.toml                            |   2 +-
 tests/test_check_action_pins.py           | 270 ++++++++++++++++++++++
 tests/test_check_action_pins_composite.py | 186 +++++++++++++++
 tests/test_check_tests_present.py         | 219 ++++++++++++++++++
 tests/test_check_version_bump.py          | 241 +++++++++++++++++++
 uv.lock                                   |   2 +-
 13 files changed, 1597 insertions(+), 3 deletions(-)
 create mode 100644 .github/scripts/check_action_pins.py
 create mode 100644 .github/scripts/check_tests_present.py
 create mode 100644 .github/scripts/check_version_bump.py
 create mode 100644 tests/test_check_action_pins.py
 create mode 100644 tests/test_check_action_pins_composite.py
 create mode 100644 tests/test_check_tests_present.py
 create mode 100644 tests/test_check_version_bump.py

diff --git a/.github/branch-protection/develop.json b/.github/branch-protection/develop.json
index a603ec4..6187955 100644
--- a/.github/branch-protection/develop.json
+++ b/.github/branch-protection/develop.json
@@ -9,6 +9,9 @@
       "Architecture (import-linter)",
       "Pre-commit",
       "File length",
+      "Version bump check",
+      "Action pinning audit",
+      "Tests required",
       "Frontend Build",
       "Frontend Quality",
       "Branch-protection contexts sync",
diff --git a/.github/branch-protection/main.json b/.github/branch-protection/main.json
index d476437..fecf6fd 100644
--- a/.github/branch-protection/main.json
+++ b/.github/branch-protection/main.json
@@ -9,6 +9,9 @@
       "Architecture (import-linter)",
       "Pre-commit",
       "File length",
+      "Version bump check",
+      "Action pinning audit",
+      "Tests required",
       "Frontend Build",
       "Frontend Quality",
       "Branch-protection contexts sync",
diff --git a/.github/scripts/check_action_pins.py b/.github/scripts/check_action_pins.py
new file mode 100644
index 0000000..6490fd0
--- /dev/null
+++ b/.github/scripts/check_action_pins.py
@@ -0,0 +1,245 @@
+#!/usr/bin/env python3
+"""Audit GitHub Actions pin shapes against the project policy.
+
+Policy from `docs/DEVELOPMENT.md#action-pinning-policy`:
+
+- **First-party** (`actions/*`, `github/*`) — pin to **major tag**
+  (`@v\\d+`). SHA + trailing `# vN.M.P` comment is also accepted (stricter
+  posture; used in elevated-permissions workflows like `release.yml`).
+
+- **`astral-sh/setup-uv`** — pin to **latest patch tag**
+  (`@vN.M.P`). The maintainers do not publish a floating major tag for
+  the v8 series; `@v8` would not resolve. SHA + comment also accepted.
+
+- **Third-party** (anything else, including `aquasecurity/*`, `gitleaks/*`,
+  `amannn/*`, `release-drafter/*`) — pin to a **40-hex-char SHA** with a
+  trailing `# vN.M.P` comment naming the resolved tag. A moving branch
+  or re-tagged release in a supply-chain workflow defeats the point of
+  the scan/automation.
+
+The script walks every YAML file under `.github/workflows/`, extracts
+each `uses:` line with its trailing comment if any, and validates the
+pin against the policy bucket the action falls into.
+
+Exit codes:
+  0 — every action invocation matches policy
+  1 — one or more violations
+  2 — script-level error (no workflow files found, parse failure)
+
+Usage (from repo root):
+
+    python .github/scripts/check_action_pins.py
+
+Bumping a third-party action: open a focused PR that updates the SHA
+*and* the trailing comment. Dependabot's `github-actions` ecosystem
+opens those PRs automatically when new SHAs land.
+"""
+
+from __future__ import annotations
+
+import re
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+
+WORKFLOWS_DIR = Path(".github/workflows")
+# Composite actions live under `.github/actions/<name>/action.yml` and can
+# carry their own `uses:` invocations of third-party actions in their
+# `runs.steps` block. Walk this dir alongside workflows (#137).
+ACTIONS_DIR = Path(".github/actions")
+
+# Captures `uses: <action>@<ref>` lines, optionally with a trailing comment.
+# Tolerant of the leading `- ` (list item) or no leading dash, and any
+# whitespace indent.
+_USES_RE = re.compile(r"^\s*-?\s*uses:\s*(?P<ref>\S+)\s*(?P<comment>#.*)?$")
+
+_SHA_RE = re.compile(r"^[0-9a-f]{40}$")
+_MAJOR_TAG_RE = re.compile(r"^v\d+$")
+_PATCH_TAG_RE = re.compile(r"^v\d+\.\d+\.\d+$")
+# A `# vN.M.P` annotation in the trailing comment. We accept partial
+# semver (`# v5`, `# v4.2`) because some upstream tags are minor-only,
+# but we require at least the leading `v\d+`.
+_VERSION_COMMENT_RE = re.compile(r"v\d+(\.\d+)*")
+
+
+@dataclass(frozen=True)
+class ActionRef:
+    """One `uses:` invocation: file:line, the @ref, and trailing comment."""
+
+    file: Path
+    line: int
+    action: str  # part before @, e.g. "actions/checkout"
+    pin: str  # part after @, e.g. "v4" or "11bd71..."
+    comment: str | None  # raw trailing comment incl. `#`, or None
+
+    def loc(self) -> str:
+        return f"{self.file}:{self.line}"
+
+
+def parse_workflow(path: Path) -> list[ActionRef]:
+    """Return every action invocation in a workflow or composite-action file.
+
+    Walks `uses:` lines, including those inside composite-action `runs.steps`
+    blocks (#137). Skips local-path references (`uses: ./...`) — those point
+    at repo-internal files (reusable workflows / composite actions in the
+    same repo) and don't carry a third-party-action pin to validate.
+    """
+    refs: list[ActionRef] = []
+    for line_num, line in enumerate(
+        path.read_text(encoding="utf-8").splitlines(), start=1
+    ):
+        match = _USES_RE.match(line)
+        if not match:
+            continue
+        ref_value = match.group("ref")
+        # Skip local-path references — these point at repo-internal files
+        # (reusable workflows or composite actions in this same repo) and
+        # don't carry an external pin shape we should validate. The path
+        # may have a `@ref` suffix (reusable workflows) or none (composite
+        # actions); either is fine.
+        if ref_value.startswith(("./", "../")):
+            continue
+        if "@" not in ref_value:
+            # Malformed — surface as a special error in validate_ref.
+            refs.append(
+                ActionRef(
+                    file=path,
+                    line=line_num,
+                    action=ref_value,
+                    pin="",
+                    comment=match.group("comment"),
+                )
+            )
+            continue
+        action, pin = ref_value.split("@", 1)
+        refs.append(
+            ActionRef(
+                file=path,
+                line=line_num,
+                action=action,
+                pin=pin,
+                comment=match.group("comment"),
+            )
+        )
+    return refs
+
+
+def classify(action: str) -> str:
+    """Return the pin-shape policy bucket for a given action.
+
+    - "patch-tag"        — astral-sh/setup-uv (no floating major tag)
+    - "major-tag"        — actions/* and github/* (first-party)
+    - "third-party-sha"  — everyone else (default for unknown)
+    """
+    if action == "astral-sh/setup-uv":
+        return "patch-tag"
+    if action.startswith(("actions/", "github/")):
+        return "major-tag"
+    return "third-party-sha"
+
+
+def validate_ref(ref: ActionRef) -> str | None:
+    """Return an error message if `ref` violates policy, else None.
+
+    SHA pins are always accepted (most-strict shape) for any action,
+    provided the trailing comment names the resolved version. Tag pins
+    are accepted only for the buckets the policy documents.
+    """
+    if not ref.pin:
+        return f"missing @ref on `uses: {ref.action}`"
+
+    bucket = classify(ref.action)
+
+    # SHA pins are stricter than tag pins; accept everywhere as long
+    # as a trailing version comment is present. The comment ties the
+    # opaque hash back to a human-reviewable changelog.
+    if _SHA_RE.match(ref.pin):
+        if not ref.comment or not _VERSION_COMMENT_RE.search(ref.comment):
+            return (
+                f"{ref.action}@{ref.pin[:8]}… — SHA pin missing trailing "
+                "`# vN.M.P` comment"
+            )
+        return None
+
+    # Tag pins — must match the bucket.
+    if bucket == "patch-tag":
+        if not _PATCH_TAG_RE.match(ref.pin):
+            return (
+                f"{ref.action}@{ref.pin} — astral-sh/setup-uv has no floating "
+                "major tag for the v8 series; pin to a patch tag (e.g. "
+                "`@v8.0.0`) or a SHA + trailing `# vN.M.P` comment"
+            )
+        return None
+
+    if bucket == "major-tag":
+        if not _MAJOR_TAG_RE.match(ref.pin):
+            return (
+                f"{ref.action}@{ref.pin} — first-party action requires major "
+                "tag (`@v4`) or SHA + trailing `# vN.M.P` comment"
+            )
+        return None
+
+    # third-party-sha — only SHA + comment is acceptable.
+    return (
+        f"{ref.action}@{ref.pin} — third-party action requires SHA pin "
+        "(40 hex chars) with trailing `# vN.M.P` comment, not a tag"
+    )
+
+
+def _collect_yaml_files() -> list[Path]:
+    """Workflow files + composite-action files. Composite dir is optional."""
+    files: list[Path] = []
+    files.extend(sorted(WORKFLOWS_DIR.glob("*.yml")))
+    files.extend(sorted(WORKFLOWS_DIR.glob("*.yaml")))
+    if ACTIONS_DIR.is_dir():
+        # `.github/actions/<name>/action.yml` (or `action.yaml`). Recursive
+        # glob so nested composite actions are picked up.
+        files.extend(sorted(ACTIONS_DIR.glob("**/action.yml")))
+        files.extend(sorted(ACTIONS_DIR.glob("**/action.yaml")))
+    return files
+
+
+def main() -> int:
+    if not WORKFLOWS_DIR.is_dir():
+        print(f"::error::workflows dir not found: {WORKFLOWS_DIR}")
+        return 2
+
+    yml_files = _collect_yaml_files()
+    if not yml_files:
+        print(f"::error::no workflow files in {WORKFLOWS_DIR}")
+        return 2
+
+    refs: list[ActionRef] = []
+    for path in yml_files:
+        refs.extend(parse_workflow(path))
+
+    if not refs:
+        print(f"::error::no `uses:` lines found across {len(yml_files)} workflow files")
+        return 2
+
+    failed = False
+    for ref in refs:
+        problem = validate_ref(ref)
+        if problem is None:
+            continue
+        failed = True
+        # GitHub Actions error annotation: surfaces inline on the file in PR review.
+        print(f"::error file={ref.file},line={ref.line}::{problem}")
+
+    if failed:
+        print(
+            "\nSee docs/DEVELOPMENT.md#action-pinning-policy for the rule. "
+            "Dependabot's `github-actions` ecosystem opens SHA-bump PRs "
+            "automatically when new tags ship."
+        )
+        return 1
+
+    print(
+        f"Action pins audit OK — {len(refs)} pins, {len(yml_files)} files "
+        f"(workflows + composite actions)."
+    )
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/.github/scripts/check_tests_present.py b/.github/scripts/check_tests_present.py
new file mode 100644
index 0000000..3c689b1
--- /dev/null
+++ b/.github/scripts/check_tests_present.py
@@ -0,0 +1,187 @@
+#!/usr/bin/env python3
+"""Verify behaviour-changing PRs ship tests alongside the src/ change.
+
+`docs/DEVELOPMENT.md` *Testing Policy*: every PR that adds or changes
+behaviour must include tests. Today only the 75% coverage gate
+approximates this — a PR can drop coverage from 95% to 76% on
+uncovered new code and still pass. This gate adds direct enforcement.
+
+Behaviour:
+
+- Reads `git diff --name-only origin/<base>...HEAD` for `src/**/*.py`
+  and `tests/**/*.py`.
+- Determines the PR's commit-type prefix from the event payload's
+  `pull_request.title` (same source as `check_version_bump.py`).
+- If `src/**/*.py` files changed AND no `tests/**/*.py` file was
+  touched in the same diff:
+  - **Block (exit 1)** when the prefix is `feat:` or `fix:` — these
+    types declare a behaviour change that the policy says must come
+    with tests.
+  - **Warn (exit 0)** for `chore:`, `docs:`, `refactor:`, `test:`,
+    `release:` — these may legitimately touch `src/` without new
+    tests (config rename, internal-only refactor, doc-string fix).
+
+Exit codes:
+  0 — pass (or warn-only)
+  1 — block: behaviour-change PR shipped without tests
+  2 — script-level error (missing env, parse failure)
+
+Usage (CI, on pull_request events):
+
+    python .github/scripts/check_tests_present.py
+
+Required env (set automatically by GitHub Actions):
+- ``GITHUB_BASE_REF``   — base branch name (e.g. "develop")
+- ``GITHUB_EVENT_PATH`` — path to event JSON (for PR title)
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import subprocess
+import sys
+from pathlib import Path
+
+# Prefixes that declare a behaviour change → tests required.
+BLOCKING_PREFIXES: frozenset[str] = frozenset({"feat", "fix"})
+
+# Prefixes that may legitimately touch src/ without new tests → warn-only.
+WARN_ONLY_PREFIXES: frozenset[str] = frozenset(
+    {"chore", "docs", "refactor", "test", "release"}
+)
+
+
+def pr_title_from_event() -> str | None:
+    """Read the PR title from the GitHub Actions event payload, if present."""
+    event_path = os.environ.get("GITHUB_EVENT_PATH")
+    if not event_path:
+        return None
+    try:
+        data = json.loads(Path(event_path).read_text(encoding="utf-8"))
+    except OSError, json.JSONDecodeError:
+        return None
+    pr = data.get("pull_request")
+    if not isinstance(pr, dict):
+        return None
+    title = pr.get("title")
+    return title if isinstance(title, str) else None
+
+
+def commit_type_prefix(title: str | None) -> str | None:
+    """Extract the conventional-commit type prefix from a PR title.
+
+    Examples: "feat: add tool" → "feat", "chore(api): rename" → "chore",
+    "release: v1.2.3" → "release". Returns None if the title doesn't
+    contain a colon (i.e. doesn't match the `type: subject` shape).
+    """
+    if not title or ":" not in title:
+        return None
+    head = title.lstrip()
+    # Type ends at the first of "(", "!", ":" — whichever comes first.
+    # The colon is mandatory (already checked above), so we always find one.
+    earliest = len(head)
+    for terminator in ("(", "!", ":"):
+        idx = head.find(terminator)
+        if 0 < idx < earliest:
+            earliest = idx
+    return head[:earliest].lower() if earliest > 0 else None
+
+
+def changed_files(base_ref: str) -> list[str]:
+    """Return paths changed between `origin/<base_ref>` and HEAD."""
+    try:
+        out = subprocess.check_output(  # noqa: S603 - args are CI-trusted
+            [  # noqa: S607 - git on PATH
+                "git",
+                "diff",
+                "--name-only",
+                f"origin/{base_ref}...HEAD",
+            ],
+            text=True,
+            stderr=subprocess.PIPE,
+        )
+    except subprocess.CalledProcessError as e:
+        msg = (
+            f"git diff origin/{base_ref}...HEAD failed: "
+            f"{e.stderr.strip() or e.returncode}"
+        )
+        raise RuntimeError(msg) from e
+    return [line for line in out.splitlines() if line.strip()]
+
+
+def _is_under(path: str, dirname: str) -> bool:
+    """True when `path` is inside `<dirname>/`."""
+    return path.startswith(dirname + "/") or path == dirname
+
+
+def main() -> int:
+    base_ref = os.environ.get("GITHUB_BASE_REF")
+    if not base_ref:
+        print("::warning::GITHUB_BASE_REF not set; skipping (not a PR run).")
+        return 0
+
+    title = pr_title_from_event()
+    prefix = commit_type_prefix(title)
+
+    try:
+        files = changed_files(base_ref)
+    except RuntimeError as e:
+        print(f"::error::{e}")
+        return 2
+
+    src_py = [f for f in files if _is_under(f, "src") and f.endswith(".py")]
+    tests_py = [f for f in files if _is_under(f, "tests") and f.endswith(".py")]
+
+    if not src_py:
+        print("No `src/**/*.py` changes — gate not applicable.")
+        return 0
+
+    if tests_py:
+        print(
+            f"src/ changed ({len(src_py)} file(s)); tests/ also touched "
+            f"({len(tests_py)} file(s)). OK."
+        )
+        return 0
+
+    # src/ changed, tests/ untouched — decide based on prefix.
+    src_summary = ", ".join(src_py[:5]) + (
+        f" (+{len(src_py) - 5} more)" if len(src_py) > 5 else ""
+    )
+
+    if prefix in BLOCKING_PREFIXES:
+        print(
+            f"::error::PR title prefix `{prefix}:` declares a behaviour "
+            "change but the diff touches `src/` without any "
+            "`tests/**/*.py` change. Per `docs/DEVELOPMENT.md` Testing "
+            "Policy, every behaviour-change PR ships tests.\n"
+            f"  src/ files changed: {src_summary}\n"
+            "  Fix: add or update a test in `tests/`, or restructure the "
+            "PR if the change is genuinely test-exempt (rename, comment-only, "
+            "internal refactor) — and use a different commit-type prefix "
+            f"({sorted(WARN_ONLY_PREFIXES)})."
+        )
+        return 1
+
+    if prefix in WARN_ONLY_PREFIXES:
+        print(
+            f"::warning::PR title prefix `{prefix}:` touches `src/` "
+            "without `tests/` changes. Allowed for this prefix; "
+            "double-check that no behaviour change slipped in.\n"
+            f"  src/ files changed: {src_summary}"
+        )
+        return 0
+
+    # Unknown / missing prefix — fall through to warn-only. The
+    # `Lint PR title` and `Commit-type sync` gates already enforce
+    # the 7-prefix list elsewhere; this script doesn't duplicate that.
+    print(
+        f"::warning::PR title prefix not recognised ({prefix!r}); "
+        "tests-required gate skipped. The `Lint PR title` job is the "
+        "source of truth for prefix validation."
+    )
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/.github/scripts/check_version_bump.py b/.github/scripts/check_version_bump.py
new file mode 100644
index 0000000..4510c0f
--- /dev/null
+++ b/.github/scripts/check_version_bump.py
@@ -0,0 +1,174 @@
+#!/usr/bin/env python3
+"""Verify pyproject version was bumped on this PR.
+
+Per `docs/DEVELOPMENT.md`: every non-`release:` PR bumps `[project] version`
+in `pyproject.toml`. `release:` PRs are exempt — the dev version IS the
+release version. The gate does not enforce semver direction (feat → MINOR
+vs fix → PATCH); it only enforces that *some* bump happened, since
+direction is conventionally documented but mechanically ambiguous.
+
+Sibling check: `uv.lock`'s `[[package]] name = "harness-python-react"`
+block must show the same version. Hand-edit the lockfile to avoid silent
+transitive-dep upgrades that `uv lock` would pull in.
+
+Usage (CI, on pull_request events):
+
+    python .github/scripts/check_version_bump.py
+
+Required env (set by GitHub Actions on `pull_request`):
+
+- `GITHUB_BASE_REF`   — base branch name (e.g. "develop")
+- `GITHUB_EVENT_PATH` — path to event JSON (used to read PR title)
+
+Exit codes:
+  0 — version bumped (or `release:` PR, exempt)
+  1 — version not bumped, or pyproject + uv.lock disagree
+  2 — script-level error (missing env, parse failure)
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import re
+import subprocess
+import sys
+import tomllib
+from pathlib import Path
+
+PYPROJECT = Path("pyproject.toml")
+UV_LOCK = Path("uv.lock")
+PACKAGE_NAME = "harness-python-react"
+
+# Match the project's self-version block in uv.lock:
+#
+#     [[package]]
+#     name = "harness-python-react"
+#     version = "0.1.0"
+#
+# Tolerant of whitespace variation across uv versions.
+_LOCK_BLOCK_RE = re.compile(
+    r"\[\[package\]\]\s*\n"
+    rf'name\s*=\s*"{re.escape(PACKAGE_NAME)}"\s*\n'
+    r'version\s*=\s*"([^"]+)"',
+)
+
+
+def pyproject_version(toml_text: str) -> str:
+    """Extract `[project] version` from a pyproject.toml string."""
+    data = tomllib.loads(toml_text)
+    version = data.get("project", {}).get("version")
+    if not isinstance(version, str) or not version:
+        msg = "[project] version not found in pyproject.toml"
+        raise ValueError(msg)
+    return version
+
+
+def uv_lock_self_version(lock_text: str) -> str:
+    """Extract the project self-version from a uv.lock string."""
+    match = _LOCK_BLOCK_RE.search(lock_text)
+    if not match:
+        msg = (
+            f'Could not find [[package]] name = "{PACKAGE_NAME}" block in '
+            "uv.lock. Has the project name changed?"
+        )
+        raise ValueError(msg)
+    return match.group(1)
+
+
+def git_show_at_base(path: Path, base_ref: str) -> str:
+    """Read a tracked file's content at `origin/<base_ref>` via git.
+
+    `base_ref` is sourced from `GITHUB_BASE_REF` which GitHub Actions sets to
+    a validated branch name; `path` is a static `Path` constant in this module.
+    Neither is user input, so the bandit S603/S607 warnings on the subprocess
+    call are suppressed.
+    """
+    try:
+        return subprocess.check_output(  # noqa: S603 - args are CI-trusted
+            ["git", "show", f"origin/{base_ref}:{path}"],  # noqa: S607 - git on PATH
+            text=True,
+            stderr=subprocess.PIPE,
+        )
+    except subprocess.CalledProcessError as e:
+        msg = (
+            f"git show origin/{base_ref}:{path} failed: "
+            f"{e.stderr.strip() or e.returncode}"
+        )
+        raise RuntimeError(msg) from e
+
+
+def pr_title_from_event() -> str | None:
+    """Read the PR title from the GitHub Actions event payload, if present."""
+    event_path = os.environ.get("GITHUB_EVENT_PATH")
+    if not event_path:
+        return None
+    try:
+        data = json.loads(Path(event_path).read_text(encoding="utf-8"))
+    except OSError, json.JSONDecodeError:
+        return None
+    pr = data.get("pull_request")
+    if not isinstance(pr, dict):
+        return None
+    title = pr.get("title")
+    return title if isinstance(title, str) else None
+
+
+def is_release_pr(title: str | None) -> bool:
+    """A `release:` PR is exempt from the bump requirement."""
+    if not title:
+        return False
+    return title.lstrip().lower().startswith("release:")
+
+
+def main() -> int:
+    base_ref = os.environ.get("GITHUB_BASE_REF")
+    if not base_ref:
+        print("::warning::GITHUB_BASE_REF not set; skipping (not a PR run).")
+        return 0
+
+    title = pr_title_from_event()
+    if is_release_pr(title):
+        print(f"release: PR — version bump not required ({title!r})")
+        return 0
+
+    try:
+        head_pp = pyproject_version(PYPROJECT.read_text(encoding="utf-8"))
+        head_lock = uv_lock_self_version(UV_LOCK.read_text(encoding="utf-8"))
+        base_pp = pyproject_version(git_show_at_base(PYPROJECT, base_ref))
+    except (OSError, ValueError, RuntimeError) as e:
+        print(f"::error::{e}")
+        return 2
+
+    print(f"Base ({base_ref}) pyproject version: {base_pp}")
+    print(f"HEAD pyproject version:               {head_pp}")
+    print(f"HEAD uv.lock self-version:            {head_lock}")
+
+    failed = False
+
+    if head_pp == base_pp:
+        print(
+            f"::error::pyproject version unchanged from {base_ref} "
+            f"({head_pp}). Per docs/DEVELOPMENT.md every non-release: PR "
+            "bumps the version (PATCH for fix/refactor/test/docs/chore; "
+            "MINOR for feat). Hand-edit the self-version line in uv.lock too."
+        )
+        failed = True
+    else:
+        print(f"Version bumped: {base_pp} -> {head_pp} OK")
+
+    if head_pp != head_lock:
+        print(
+            f"::error::pyproject ({head_pp}) and uv.lock ({head_lock}) "
+            "self-version disagree. Hand-edit the uv.lock "
+            f'[[package]] name = "{PACKAGE_NAME}" block to match pyproject.toml.'
+        )
+        failed = True
+    else:
+        print("pyproject + uv.lock self-version match OK")
+
+    return 1 if failed else 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 99b29e5..fcda20a 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -105,6 +105,52 @@ jobs:
           python-version: "3.14"
       - run: python .github/scripts/check_file_length.py
 
+  version-bump:
+    name: Version bump check
+    runs-on: ubuntu-latest
+    # Every non-`release:` PR bumps [project] version in pyproject.toml AND
+    # the matching [[package]] block in uv.lock. Closes the bump-miss class
+    # that the 75 % coverage gate cannot detect on its own.
+    if: github.event_name == 'pull_request'
+    steps:
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
+        with:
+          fetch-depth: 0  # full history so `git show origin/<base>:` resolves
+      - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065  # v5
+        with:
+          python-version: "3.14"
+      - run: python .github/scripts/check_version_bump.py
+
+  action-pinning:
+    name: Action pinning audit
+    runs-on: ubuntu-latest
+    # Validates every `uses:` line in .github/workflows/ + .github/actions/
+    # against the policy in docs/DEVELOPMENT.md#action-pinning-policy.
+    # First-party = major tag; astral-sh/setup-uv = patch tag; third-party
+    # = SHA + trailing `# vN.M.P` comment.
+    steps:
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
+      - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065  # v5
+        with:
+          python-version: "3.14"
+      - run: python .github/scripts/check_action_pins.py
+
+  tests-required:
+    name: Tests required
+    runs-on: ubuntu-latest
+    # `feat:` / `fix:` PRs that touch `src/` must touch `tests/` too.
+    # Per docs/DEVELOPMENT.md Testing Policy. Other prefixes get a warn-only
+    # `::warning::` if src/ is touched without tests/.
+    if: github.event_name == 'pull_request'
+    steps:
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
+        with:
+          fetch-depth: 0  # full history so the diff resolves
+      - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065  # v5
+        with:
+          python-version: "3.14"
+      - run: python .github/scripts/check_tests_present.py
+
   frontend-build:
     name: Frontend Build
     runs-on: ubuntu-latest
diff --git a/docs/DEVELOPMENT.md b/docs/DEVELOPMENT.md
index b42f235..c844ffc 100644
--- a/docs/DEVELOPMENT.md
+++ b/docs/DEVELOPMENT.md
@@ -92,7 +92,7 @@ Subject is **lowercase after the colon** (Title Case is rejected unless it's an
 
 | Workflow | Triggers | Required? |
 |---|---|---|
-| `ci.yml` | push/PR to develop+main | Yes — 8 backend + 2 frontend jobs |
+| `ci.yml` | push/PR to develop+main | Yes — backend + frontend gates + meta-gates + version/action/tests audits |
 | `security.yml` | push/PR + weekly schedule | Yes — 4 jobs (gitleaks, pip-audit, npm audit, trivy) |
 | `pr-title.yml` | PR open/edit/sync | Yes — conventional-commit lint |
 | `release.yml` | tag `v*.*.*` | No — tag-triggered |
@@ -102,6 +102,26 @@ Subject is **lowercase after the colon** (Title Case is rejected unless it's an
 | `eval-nightly.yml` | `workflow_dispatch` only by default | No |
 | `codeql.yml` | `workflow_dispatch` only (placeholder) | No |
 
+### Action-pinning policy
+
+Audited by the `Action pinning audit` CI job (`.github/scripts/check_action_pins.py`). Three buckets:
+
+- **First-party** (`actions/*`, `github/*`) — pin to **major tag** (`@v4`). SHA + trailing `# vN.M.P` comment also accepted; that's the form used in elevated-permissions workflows (release, branch-protection).
+- **`astral-sh/setup-uv`** — pin to **latest patch tag** (`@v8.0.0`) or SHA. astral-sh does not publish a floating major tag for v8; `@v8` would not resolve.
+- **Third-party** (anything else) — pin to a **40-hex-char SHA** with a trailing `# vN.M.P` comment naming the resolved tag. A moving branch in a supply-chain workflow defeats the point of the scan.
+
+When bumping a third-party action, update the SHA *and* the trailing comment in the same PR. Dependabot's `github-actions` ecosystem opens those PRs automatically.
+
+### Version-bump policy
+
+Audited by the `Version bump check` CI job (`.github/scripts/check_version_bump.py`). Every PR bumps `[project] version` in `pyproject.toml` AND the matching `[[package]]` block in `uv.lock`. The bump direction follows commitizen's `bump_map` in `pyproject.toml` — `feat:` is MINOR, everything else is PATCH. `release:` PRs are exempt because the dev version IS the release version.
+
+The `uv.lock` self-version is hand-edited (one line); avoid `uv lock` mid-PR because it would re-resolve transitive deps and pull in unintended upgrades. The `Version bump check` gate enforces both halves.
+
+### Testing policy
+
+Audited by the `Tests required` CI job (`.github/scripts/check_tests_present.py`). `feat:` and `fix:` PRs that touch `src/` MUST also touch `tests/`. Other prefixes get a `::warning::` if `src/` is touched without tests but don't block. The 75 % coverage gate alone doesn't catch behaviour-change-without-test (a single new line on already-covered code can pass), which is why this is a separate axis.
+
 ## Local agent hook setup
 
 The `.claude/hooks/` scripts enforce the harness from the LLM-coder side: blocking `--no-verify`, scanning staged diffs for secrets, formatting after every Write/Edit. Opt in by copying the example settings file:
diff --git a/pyproject.toml b/pyproject.toml
index 1789d98..27e014a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "harness-python-react"
-version = "0.1.0"
+version = "0.2.0"
 description = "Production-quality LLM-driven coding harness — Python (FastAPI) backend, Vite + React + TypeScript frontend."
 readme = "README.md"
 requires-python = ">=3.14"
diff --git a/tests/test_check_action_pins.py b/tests/test_check_action_pins.py
new file mode 100644
index 0000000..9167dc0
--- /dev/null
+++ b/tests/test_check_action_pins.py
@@ -0,0 +1,270 @@
+"""Tests for `.github/scripts/check_action_pins.py`.
+
+Cover the policy buckets, the `parse_workflow` line extractor (including
+trailing comments and missing `@ref`), and `main()` end-to-end against
+fabricated workflow files in `tmp_path`.
+"""
+
+from __future__ import annotations
+
+import importlib.util
+import sys
+from pathlib import Path
+from typing import Any
+
+import pytest
+
+REPO_ROOT = Path(__file__).resolve().parent.parent
+SCRIPT_PATH = REPO_ROOT / ".github" / "scripts" / "check_action_pins.py"
+
+
+def _load_script() -> Any:
+    spec = importlib.util.spec_from_file_location("check_action_pins", SCRIPT_PATH)
+    if spec is None or spec.loader is None:
+        msg = f"Could not load script at {SCRIPT_PATH}"
+        raise RuntimeError(msg)
+    module = importlib.util.module_from_spec(spec)
+    sys.modules[spec.name] = module
+    spec.loader.exec_module(module)
+    return module
+
+
+cap = _load_script()
+
+
+# ---------- classify ----------
+
+
+@pytest.mark.parametrize(
+    "action,bucket",
+    [
+        ("astral-sh/setup-uv", "patch-tag"),
+        ("actions/checkout", "major-tag"),
+        ("actions/setup-python", "major-tag"),
+        ("actions/setup-node", "major-tag"),
+        ("actions/upload-artifact", "major-tag"),
+        ("github/codeql-action", "major-tag"),
+        ("github/codeql-action/init", "major-tag"),
+        ("github/codeql-action/analyze", "major-tag"),
+        ("aquasecurity/trivy-action", "third-party-sha"),
+        ("gitleaks/gitleaks-action", "third-party-sha"),
+        ("amannn/action-semantic-pull-request", "third-party-sha"),
+        ("release-drafter/release-drafter", "third-party-sha"),
+        ("some-other-org/random-action", "third-party-sha"),
+    ],
+)
+def test_classify(action: str, bucket: str) -> None:
+    assert cap.classify(action) == bucket
+
+
+# ---------- validate_ref ----------
+
+
+def _ref(action: str, pin: str, comment: str | None = None) -> Any:
+    return cap.ActionRef(
+        file=Path("dummy.yml"), line=1, action=action, pin=pin, comment=comment
+    )
+
+
+def test_validate_first_party_major_tag_ok() -> None:
+    assert cap.validate_ref(_ref("actions/checkout", "v4")) is None
+
+
+def test_validate_first_party_patch_tag_rejected() -> None:
+    err = cap.validate_ref(_ref("actions/checkout", "v4.2.2"))
+    assert err is not None
+    assert "first-party" in err and "major tag" in err
+
+
+def test_validate_first_party_sha_with_comment_ok() -> None:
+    sha = "11bd71901bbe5b1630ceea73d27597364c9af683"
+    assert cap.validate_ref(_ref("actions/checkout", sha, "# v4.2.2")) is None
+
+
+def test_validate_first_party_sha_without_comment_rejected() -> None:
+    sha = "11bd71901bbe5b1630ceea73d27597364c9af683"
+    err = cap.validate_ref(_ref("actions/checkout", sha))
+    assert err is not None and "missing trailing" in err
+
+
+def test_validate_setup_uv_patch_tag_ok() -> None:
+    assert cap.validate_ref(_ref("astral-sh/setup-uv", "v8.0.0")) is None
+
+
+def test_validate_setup_uv_major_tag_rejected() -> None:
+    err = cap.validate_ref(_ref("astral-sh/setup-uv", "v8"))
+    assert err is not None
+    assert "no floating major tag" in err
+
+
+def test_validate_setup_uv_sha_with_comment_ok() -> None:
+    sha = "cec208311dfd045dd5311c1add060b2062131d57"
+    assert cap.validate_ref(_ref("astral-sh/setup-uv", sha, "# v8.0.0")) is None
+
+
+def test_validate_third_party_tag_rejected() -> None:
+    err = cap.validate_ref(_ref("release-drafter/release-drafter", "v6"))
+    assert err is not None
+    assert "third-party" in err and "SHA pin" in err
+
+
+def test_validate_third_party_sha_with_comment_ok() -> None:
+    sha = "48f256284bd46cdaab1048c3721360e808335d50"
+    assert (
+        cap.validate_ref(_ref("amannn/action-semantic-pull-request", sha, "# v6.1.1"))
+        is None
+    )
+
+
+def test_validate_third_party_sha_without_comment_rejected() -> None:
+    sha = "48f256284bd46cdaab1048c3721360e808335d50"
+    err = cap.validate_ref(_ref("amannn/action-semantic-pull-request", sha))
+    assert err is not None and "missing trailing" in err
+
+
+def test_validate_partial_version_comment_accepted() -> None:
+    # `# v5` is accepted — some upstream tags are major-only.
+    sha = "a26af69be951a213d495a4c3e4e4022e16d87065"
+    assert cap.validate_ref(_ref("actions/setup-python", sha, "# v5")) is None
+
+
+def test_validate_comment_without_version_rejected() -> None:
+    sha = "a26af69be951a213d495a4c3e4e4022e16d87065"
+    err = cap.validate_ref(_ref("actions/setup-python", sha, "# pinned for security"))
+    assert err is not None and "missing trailing" in err
+
+
+def test_validate_missing_at_ref_rejected() -> None:
+    err = cap.validate_ref(_ref("actions/checkout", ""))
+    assert err is not None and "missing @ref" in err
+
+
+def test_validate_third_party_short_hex_rejected() -> None:
+    # Looks like a SHA but isn't 40 chars; falls through to tag-shape check.
+    err = cap.validate_ref(_ref("aquasecurity/trivy-action", "abc123", "# v0.36.0"))
+    assert err is not None  # third-party with non-SHA pin
+
+
+# ---------- parse_workflow ----------
+
+
+def test_parse_workflow_extracts_refs(tmp_path: Path) -> None:
+    yml = tmp_path / "wf.yml"
+    setup_python_sha = "a26af69be951a213d495a4c3e4e4022e16d87065"
+    yml.write_text(
+        "jobs:\n"
+        "  build:\n"
+        "    steps:\n"
+        "      - uses: actions/checkout@v4\n"
+        "      - uses: astral-sh/setup-uv@v8.0.0\n"
+        f"      - uses: actions/setup-python@{setup_python_sha}  # v5\n"
+        "      - run: echo hi\n",
+        encoding="utf-8",
+    )
+    refs = cap.parse_workflow(yml)
+    assert len(refs) == 3
+    assert refs[0].action == "actions/checkout"
+    assert refs[0].pin == "v4"
+    assert refs[0].comment is None
+    assert refs[1].action == "astral-sh/setup-uv"
+    assert refs[1].pin == "v8.0.0"
+    assert refs[2].action == "actions/setup-python"
+    assert refs[2].comment == "# v5"
+
+
+def test_parse_workflow_handles_missing_at_ref(tmp_path: Path) -> None:
+    yml = tmp_path / "wf.yml"
+    yml.write_text(
+        "jobs:\n"
+        "  build:\n"
+        "    steps:\n"
+        "      - uses: actions/checkout\n",  # malformed — no @ref
+        encoding="utf-8",
+    )
+    refs = cap.parse_workflow(yml)
+    assert len(refs) == 1
+    assert refs[0].action == "actions/checkout"
+    assert refs[0].pin == ""
+
+
+# ---------- main() end-to-end ----------
+
+
+def _setup_workflows_dir(tmp_path: Path, files: dict[str, str]) -> Path:
+    """Create a tmp `.github/workflows/` and chdir into the parent."""
+    wf_dir = tmp_path / ".github" / "workflows"
+    wf_dir.mkdir(parents=True)
+    for name, content in files.items():
+        (wf_dir / name).write_text(content, encoding="utf-8")
+    return tmp_path
+
+
+def test_main_clean_passes(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
+) -> None:
+    _setup_workflows_dir(
+        tmp_path,
+        {
+            "ci.yml": (
+                "jobs:\n  build:\n    steps:\n"
+                "      - uses: actions/checkout@v4\n"
+                "      - uses: astral-sh/setup-uv@v8.0.0\n"
+            ),
+        },
+    )
+    monkeypatch.chdir(tmp_path)
+    assert cap.main() == 0
+    assert "Action pins audit OK" in capsys.readouterr().out
+
+
+def test_main_third_party_tag_fails(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
+) -> None:
+    _setup_workflows_dir(
+        tmp_path,
+        {
+            "release-drafter.yml": (
+                "jobs:\n  draft:\n    steps:\n"
+                "      - uses: release-drafter/release-drafter@v6\n"
+            ),
+        },
+    )
+    monkeypatch.chdir(tmp_path)
+    assert cap.main() == 1
+    out = capsys.readouterr().out
+    assert "::error file=" in out
+    assert "third-party" in out
+
+
+def test_main_setup_uv_major_tag_fails(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
+) -> None:
+    _setup_workflows_dir(
+        tmp_path,
+        {
+            "ci.yml": (
+                "jobs:\n  build:\n    steps:\n      - uses: astral-sh/setup-uv@v8\n"
+            ),
+        },
+    )
+    monkeypatch.chdir(tmp_path)
+    assert cap.main() == 1
+    out = capsys.readouterr().out
+    assert "no floating major tag" in out
+
+
+def test_main_no_workflow_files_exits_2(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
+) -> None:
+    (tmp_path / ".github" / "workflows").mkdir(parents=True)
+    monkeypatch.chdir(tmp_path)
+    assert cap.main() == 2
+    assert "no workflow files" in capsys.readouterr().out
+
+
+def test_main_no_workflows_dir_exits_2(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
+) -> None:
+    monkeypatch.chdir(tmp_path)
+    assert cap.main() == 2
+    assert "workflows dir not found" in capsys.readouterr().out
diff --git a/tests/test_check_action_pins_composite.py b/tests/test_check_action_pins_composite.py
new file mode 100644
index 0000000..b3790fb
--- /dev/null
+++ b/tests/test_check_action_pins_composite.py
@@ -0,0 +1,186 @@
+"""Tests for the composite-action + local-path coverage in check_action_pins (#137).
+
+Sibling to `tests/test_check_action_pins.py` (which is at the line-cap
+edge). Covers the `parse_workflow` extensions added in #137:
+
+- `.github/actions/<name>/action.yml` files are walked alongside workflows.
+- Local-path `uses: ./...` references are skipped (not flagged as
+  missing-`@`).
+
+`test_check_action_pins.py` covers the workflow case + policy buckets;
+this file covers the composite + local-path cases.
+"""
+
+from __future__ import annotations
+
+import importlib.util
+import sys
+from pathlib import Path
+from typing import Any
+from unittest.mock import patch
+
+REPO_ROOT = Path(__file__).resolve().parent.parent
+SCRIPT_PATH = REPO_ROOT / ".github" / "scripts" / "check_action_pins.py"
+
+
+def _load_script() -> Any:
+    spec = importlib.util.spec_from_file_location("check_action_pins", SCRIPT_PATH)
+    if spec is None or spec.loader is None:
+        msg = f"Could not load script at {SCRIPT_PATH}"
+        raise RuntimeError(msg)
+    module = importlib.util.module_from_spec(spec)
+    sys.modules[spec.name] = module
+    spec.loader.exec_module(module)
+    return module
+
+
+cap = _load_script()
+
+
+# ---------- parse_workflow on a composite-action file ----------
+
+
+def test_parse_walks_composite_action_steps(tmp_path: Path) -> None:
+    """`.github/actions/<name>/action.yml` files surface their `uses:` lines."""
+    action_yaml = tmp_path / "action.yml"
+    action_yaml.write_text(
+        "name: 'fake composite'\n"
+        "runs:\n"
+        "  using: composite\n"
+        "  steps:\n"
+        "    - uses: actions/checkout@v4\n"
+        "    - uses: aquasecurity/trivy-action@" + ("a" * 40) + "  # v0.36.0\n",
+        encoding="utf-8",
+    )
+    refs = cap.parse_workflow(action_yaml)
+    actions = [r.action for r in refs]
+    assert actions == ["actions/checkout", "aquasecurity/trivy-action"]
+
+
+def test_parse_skips_local_path_uses(tmp_path: Path) -> None:
+    """`uses: ./...` references resolve in-repo and aren't third-party pins."""
+    workflow = tmp_path / "fake.yml"
+    workflow.write_text(
+        "jobs:\n"
+        "  reuse:\n"
+        "    uses: ./.github/workflows/composite.yml@main\n"
+        "  call-composite:\n"
+        "    steps:\n"
+        "      - uses: ./.github/actions/local\n"
+        "      - uses: actions/checkout@v4\n",
+        encoding="utf-8",
+    )
+    refs = cap.parse_workflow(workflow)
+    actions = [r.action for r in refs]
+    # Both local-path entries skipped; only the third-party one survives.
+    assert actions == ["actions/checkout"]
+
+
+def test_parse_skips_dotdot_local_path(tmp_path: Path) -> None:
+    """`uses: ../action` (rare but legal) is also a local-path skip."""
+    workflow = tmp_path / "fake.yml"
+    workflow.write_text(
+        "jobs:\n  j:\n    steps:\n      - uses: ../shared/action\n",
+        encoding="utf-8",
+    )
+    assert cap.parse_workflow(workflow) == []
+
+
+# ---------- _collect_yaml_files end-to-end ----------
+
+
+def test_collect_includes_composite_actions(tmp_path: Path) -> None:
+    """`_collect_yaml_files` walks both workflows and `.github/actions/`."""
+    workflows = tmp_path / "workflows"
+    workflows.mkdir()
+    (workflows / "ci.yml").write_text(
+        "jobs:\n  j:\n    steps:\n      - uses: actions/checkout@v4\n",
+        encoding="utf-8",
+    )
+    actions_dir = tmp_path / "actions"
+    composite = actions_dir / "publish"
+    composite.mkdir(parents=True)
+    (composite / "action.yml").write_text(
+        "name: 'publish'\nruns:\n  using: composite\n  steps:\n"
+        "    - uses: actions/setup-node@v5\n",
+        encoding="utf-8",
+    )
+    with (
+        patch.object(cap, "WORKFLOWS_DIR", workflows),
+        patch.object(cap, "ACTIONS_DIR", actions_dir),
+    ):
+        collected = cap._collect_yaml_files()
+    names = sorted(p.name for p in collected)
+    assert names == ["action.yml", "ci.yml"]
+
+
+def test_collect_handles_missing_actions_dir(tmp_path: Path) -> None:
+    """Repo without `.github/actions/` (today's shape) still scans workflows."""
+    workflows = tmp_path / "workflows"
+    workflows.mkdir()
+    (workflows / "ci.yml").write_text(
+        "jobs:\n  j:\n    steps:\n      - uses: actions/checkout@v4\n",
+        encoding="utf-8",
+    )
+    missing_actions = tmp_path / "no-such-dir"
+    with (
+        patch.object(cap, "WORKFLOWS_DIR", workflows),
+        patch.object(cap, "ACTIONS_DIR", missing_actions),
+    ):
+        collected = cap._collect_yaml_files()
+    assert [p.name for p in collected] == ["ci.yml"]
+
+
+# ---------- main() end-to-end through composite + local-path ----------
+
+
+def test_main_flags_violation_in_composite_action(tmp_path: Path, capsys: Any) -> None:
+    """A bad pin inside a composite action's steps fails the audit."""
+    workflows = tmp_path / "workflows"
+    workflows.mkdir()
+    (workflows / "ci.yml").write_text(
+        "jobs:\n  j:\n    steps:\n      - uses: actions/checkout@v4\n",
+        encoding="utf-8",
+    )
+    actions_dir = tmp_path / "actions"
+    bad = actions_dir / "bad"
+    bad.mkdir(parents=True)
+    # Tag pin on a third-party action — violates third-party-sha policy.
+    (bad / "action.yml").write_text(
+        "name: 'bad'\nruns:\n  using: composite\n  steps:\n"
+        "    - uses: aquasecurity/trivy-action@v0.36.0\n",
+        encoding="utf-8",
+    )
+    with (
+        patch.object(cap, "WORKFLOWS_DIR", workflows),
+        patch.object(cap, "ACTIONS_DIR", actions_dir),
+    ):
+        assert cap.main() == 1
+    out = capsys.readouterr().out
+    assert "aquasecurity/trivy-action" in out
+    assert "third-party action requires SHA pin" in out
+
+
+def test_main_passes_with_clean_composite_action(tmp_path: Path, capsys: Any) -> None:
+    """A clean composite action with a SHA + comment passes the audit."""
+    workflows = tmp_path / "workflows"
+    workflows.mkdir()
+    (workflows / "ci.yml").write_text(
+        "jobs:\n  j:\n    steps:\n      - uses: actions/checkout@v4\n",
+        encoding="utf-8",
+    )
+    actions_dir = tmp_path / "actions"
+    good = actions_dir / "good"
+    good.mkdir(parents=True)
+    (good / "action.yml").write_text(
+        "name: 'good'\nruns:\n  using: composite\n  steps:\n"
+        "    - uses: aquasecurity/trivy-action@" + ("a" * 40) + "  # v0.36.0\n",
+        encoding="utf-8",
+    )
+    with (
+        patch.object(cap, "WORKFLOWS_DIR", workflows),
+        patch.object(cap, "ACTIONS_DIR", actions_dir),
+    ):
+        assert cap.main() == 0
+    out = capsys.readouterr().out
+    assert "Action pins audit OK" in out
diff --git a/tests/test_check_tests_present.py b/tests/test_check_tests_present.py
new file mode 100644
index 0000000..c050d43
--- /dev/null
+++ b/tests/test_check_tests_present.py
@@ -0,0 +1,219 @@
+"""Tests for `.github/scripts/check_tests_present.py`."""
+
+from __future__ import annotations
+
+import importlib.util
+import json
+import sys
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+
+import pytest
+
+if TYPE_CHECKING:
+    from collections.abc import Iterable
+
+REPO_ROOT = Path(__file__).resolve().parent.parent
+SCRIPT_PATH = REPO_ROOT / ".github" / "scripts" / "check_tests_present.py"
+
+
+def _load_script() -> Any:
+    spec = importlib.util.spec_from_file_location("check_tests_present", SCRIPT_PATH)
+    if spec is None or spec.loader is None:
+        msg = f"Could not load script at {SCRIPT_PATH}"
+        raise RuntimeError(msg)
+    module = importlib.util.module_from_spec(spec)
+    sys.modules[spec.name] = module
+    spec.loader.exec_module(module)
+    return module
+
+
+ctp = _load_script()
+
+
+# ---------- commit_type_prefix ----------
+
+
+@pytest.mark.parametrize(
+    "title,expected",
+    [
+        ("feat: add tool", "feat"),
+        ("fix: handle empty list", "fix"),
+        ("chore(api): rename module", "chore"),
+        ("docs: update readme", "docs"),
+        ("test: add coverage", "test"),
+        ("refactor: split module", "refactor"),
+        ("release: v1.2.3", "release"),
+        ("feat!: breaking change", "feat"),
+        ("FEAT: case-insensitive", "feat"),
+        ("  feat: leading-spaces", "feat"),
+        ("no-prefix subject only", None),
+        ("", None),
+        (None, None),
+    ],
+)
+def test_commit_type_prefix(title: str | None, expected: str | None) -> None:
+    assert ctp.commit_type_prefix(title) == expected
+
+
+# ---------- changed_files (via stub) ----------
+
+
+def test_changed_files_filters_blank_lines(monkeypatch: pytest.MonkeyPatch) -> None:
+    """Empty lines in git output are filtered out."""
+
+    def fake_check_output(*_: Any, **__: Any) -> str:
+        return "src/api/main.py\n\ntests/test_api.py\n"
+
+    monkeypatch.setattr(ctp.subprocess, "check_output", fake_check_output)
+    assert ctp.changed_files("develop") == ["src/api/main.py", "tests/test_api.py"]
+
+
+def test_changed_files_propagates_git_failure(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """A git error becomes a RuntimeError with a context-rich message."""
+
+    def fake_check_output(*_: Any, **__: Any) -> str:
+        raise ctp.subprocess.CalledProcessError(
+            128, ["git"], stderr="bad object origin/develop"
+        )
+
+    monkeypatch.setattr(ctp.subprocess, "check_output", fake_check_output)
+    with pytest.raises(RuntimeError, match="bad object origin/develop"):
+        ctp.changed_files("develop")
+
+
+# ---------- main() end-to-end ----------
+
+
+def _set_event_payload(
+    monkeypatch: pytest.MonkeyPatch, tmp_path: Path, title: str
+) -> None:
+    event_file = tmp_path / "event.json"
+    event_file.write_text(
+        json.dumps({"pull_request": {"title": title}}), encoding="utf-8"
+    )
+    monkeypatch.setenv("GITHUB_EVENT_PATH", str(event_file))
+
+
+def _stub_changed_files(monkeypatch: pytest.MonkeyPatch, files: Iterable[str]) -> None:
+    monkeypatch.setattr(ctp, "changed_files", lambda _base_ref: list(files))
+
+
+def test_main_no_src_changes_passes(
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    monkeypatch.setenv("GITHUB_BASE_REF", "develop")
+    _set_event_payload(monkeypatch, tmp_path, "feat: docs-only change")
+    _stub_changed_files(monkeypatch, ["docs/README.md", ".github/workflows/ci.yml"])
+
+    assert ctp.main() == 0
+    assert "gate not applicable" in capsys.readouterr().out
+
+
+def test_main_src_and_tests_changed_passes(
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    monkeypatch.setenv("GITHUB_BASE_REF", "develop")
+    _set_event_payload(monkeypatch, tmp_path, "feat: new tool")
+    _stub_changed_files(monkeypatch, ["src/tools/new.py", "tests/test_new.py"])
+
+    assert ctp.main() == 0
+    out = capsys.readouterr().out
+    assert "tests/ also touched" in out
+
+
+@pytest.mark.parametrize("prefix_title", ["feat: x", "fix: y", "FIX: z"])
+def test_main_feat_or_fix_without_tests_blocks(
+    prefix_title: str,
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    monkeypatch.setenv("GITHUB_BASE_REF", "develop")
+    _set_event_payload(monkeypatch, tmp_path, prefix_title)
+    _stub_changed_files(monkeypatch, ["src/api/routes.py"])
+
+    assert ctp.main() == 1
+    err = capsys.readouterr().out
+    assert "::error::" in err
+    assert "Testing Policy" in err
+
+
+@pytest.mark.parametrize(
+    "prefix_title", ["chore: x", "docs: y", "refactor: z", "test: a", "release: b"]
+)
+def test_main_warn_only_prefixes_pass(
+    prefix_title: str,
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    monkeypatch.setenv("GITHUB_BASE_REF", "develop")
+    _set_event_payload(monkeypatch, tmp_path, prefix_title)
+    _stub_changed_files(monkeypatch, ["src/api/routes.py"])
+
+    assert ctp.main() == 0
+    out = capsys.readouterr().out
+    assert "::warning::" in out
+
+
+def test_main_unknown_prefix_warns(
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    """No conventional prefix → warn-only (Lint PR title is the prefix gate)."""
+    monkeypatch.setenv("GITHUB_BASE_REF", "develop")
+    _set_event_payload(monkeypatch, tmp_path, "no prefix at all")
+    _stub_changed_files(monkeypatch, ["src/api/routes.py"])
+
+    assert ctp.main() == 0
+    out = capsys.readouterr().out
+    assert "prefix not recognised" in out
+
+
+def test_main_excludes_non_python_src_changes(
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    """Non-Python files under src/ shouldn't trigger the gate."""
+    monkeypatch.setenv("GITHUB_BASE_REF", "develop")
+    _set_event_payload(monkeypatch, tmp_path, "feat: copy a fixture")
+    _stub_changed_files(monkeypatch, ["src/api/README.md", "src/data/sample.csv"])
+
+    assert ctp.main() == 0
+    assert "gate not applicable" in capsys.readouterr().out
+
+
+def test_main_no_base_ref_skips(
+    monkeypatch: pytest.MonkeyPatch,
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    monkeypatch.delenv("GITHUB_BASE_REF", raising=False)
+    monkeypatch.delenv("GITHUB_EVENT_PATH", raising=False)
+    assert ctp.main() == 0
+    assert "skipping" in capsys.readouterr().out
+
+
+def test_main_git_failure_exits_2(
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    monkeypatch.setenv("GITHUB_BASE_REF", "develop")
+    _set_event_payload(monkeypatch, tmp_path, "feat: x")
+
+    def boom(_base_ref: str) -> list[str]:
+        msg = "git fetch failed"
+        raise RuntimeError(msg)
+
+    monkeypatch.setattr(ctp, "changed_files", boom)
+    assert ctp.main() == 2
+    assert "::error::" in capsys.readouterr().out
diff --git a/tests/test_check_version_bump.py b/tests/test_check_version_bump.py
new file mode 100644
index 0000000..a1d8dc6
--- /dev/null
+++ b/tests/test_check_version_bump.py
@@ -0,0 +1,241 @@
+"""Tests for `.github/scripts/check_version_bump.py`.
+
+Covers the parsing helpers and end-to-end `main()` behaviour by stubbing
+the `git show` subprocess call and the GitHub Actions event payload.
+"""
+
+from __future__ import annotations
+
+import importlib.util
+import json
+import os
+import sys
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+
+import pytest
+
+if TYPE_CHECKING:
+    from collections.abc import Iterator
+
+REPO_ROOT = Path(__file__).resolve().parent.parent
+SCRIPT_PATH = REPO_ROOT / ".github" / "scripts" / "check_version_bump.py"
+
+
+def _load_script() -> Any:
+    """Load the check_version_bump module by file path (it lives outside src/)."""
+    spec = importlib.util.spec_from_file_location("check_version_bump", SCRIPT_PATH)
+    if spec is None or spec.loader is None:
+        msg = f"Could not load script at {SCRIPT_PATH}"
+        raise RuntimeError(msg)
+    module = importlib.util.module_from_spec(spec)
+    sys.modules[spec.name] = module
+    spec.loader.exec_module(module)
+    return module
+
+
+cvb = _load_script()
+
+
+# ---------- pyproject_version ----------
+
+
+def test_pyproject_version_extracts_string() -> None:
+    text = '[project]\nname = "harness-python-react"\nversion = "1.6.5"\n'
+    assert cvb.pyproject_version(text) == "1.6.5"
+
+
+def test_pyproject_version_missing_raises() -> None:
+    with pytest.raises(ValueError, match="version not found"):
+        cvb.pyproject_version('[project]\nname = "harness-python-react"\n')
+
+
+def test_pyproject_version_empty_raises() -> None:
+    with pytest.raises(ValueError, match="version not found"):
+        cvb.pyproject_version('[project]\nversion = ""\n')
+
+
+# ---------- uv_lock_self_version ----------
+
+
+def test_uv_lock_self_version_extracts() -> None:
+    lock = (
+        '[[package]]\nname = "annotated-doc"\nversion = "0.0.4"\n\n'
+        '[[package]]\nname = "harness-python-react"\nversion = "1.6.5"\n'
+        'source = { editable = "." }\n'
+    )
+    assert cvb.uv_lock_self_version(lock) == "1.6.5"
+
+
+def test_uv_lock_self_version_missing_raises() -> None:
+    with pytest.raises(ValueError, match="harness-python-react"):
+        cvb.uv_lock_self_version('[[package]]\nname = "fastapi"\nversion = "0.100"\n')
+
+
+# ---------- is_release_pr ----------
+
+
+@pytest.mark.parametrize(
+    "title,expected",
+    [
+        ("release: v1.6.5 — harness rollout", True),
+        ("Release: v1.6.5", True),  # leading-cap accepted
+        ("RELEASE: yes", True),
+        ("  release: leading-spaces", True),
+        ("feat: add a thing", False),
+        ("fix: something", False),
+        ("chore: release notes update", False),  # 'release' substring, wrong prefix
+        ("", False),
+        (None, False),
+    ],
+)
+def test_is_release_pr(title: str | None, expected: bool) -> None:
+    assert cvb.is_release_pr(title) is expected
+
+
+# ---------- main() end-to-end ----------
+
+
+@pytest.fixture
+def fake_repo(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Iterator[Path]:
+    """Set up a fake repo root with pyproject.toml + uv.lock and chdir into it."""
+    (tmp_path / "pyproject.toml").write_text(
+        '[project]\nname = "harness-python-react"\nversion = "1.6.6"\n',
+        encoding="utf-8",
+    )
+    (tmp_path / "uv.lock").write_text(
+        '[[package]]\nname = "harness-python-react"\nversion = "1.6.6"\n'
+        'source = { editable = "." }\n',
+        encoding="utf-8",
+    )
+    monkeypatch.chdir(tmp_path)
+    yield tmp_path
+
+
+def _set_event_payload(
+    monkeypatch: pytest.MonkeyPatch, tmp_path: Path, title: str
+) -> None:
+    event_file = tmp_path / "event.json"
+    event_file.write_text(
+        json.dumps({"pull_request": {"title": title}}), encoding="utf-8"
+    )
+    monkeypatch.setenv("GITHUB_EVENT_PATH", str(event_file))
+
+
+def _stub_git_show(monkeypatch: pytest.MonkeyPatch, base_pyproject: str) -> None:
+    def fake(path: Path, base_ref: str) -> str:
+        assert str(path) == "pyproject.toml"
+        assert base_ref == "develop"
+        return base_pyproject
+
+    monkeypatch.setattr(cvb, "git_show_at_base", fake)
+
+
+def test_main_bumped_pass(
+    fake_repo: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
+) -> None:
+    monkeypatch.setenv("GITHUB_BASE_REF", "develop")
+    _set_event_payload(monkeypatch, fake_repo, "feat: add thing")
+    _stub_git_show(
+        monkeypatch, '[project]\nname = "harness-python-react"\nversion = "1.6.5"\n'
+    )
+
+    assert cvb.main() == 0
+    out = capsys.readouterr().out
+    assert "Version bumped: 1.6.5 -> 1.6.6" in out
+
+
+def test_main_unchanged_fails(
+    fake_repo: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
+) -> None:
+    monkeypatch.setenv("GITHUB_BASE_REF", "develop")
+    _set_event_payload(monkeypatch, fake_repo, "feat: forgot to bump")
+    _stub_git_show(
+        monkeypatch, '[project]\nname = "harness-python-react"\nversion = "1.6.6"\n'
+    )
+
+    assert cvb.main() == 1
+    err_out = capsys.readouterr().out
+    assert "::error::" in err_out
+    assert "unchanged from develop" in err_out
+
+
+def test_main_release_prefix_exempt(
+    fake_repo: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
+) -> None:
+    monkeypatch.setenv("GITHUB_BASE_REF", "main")
+    _set_event_payload(monkeypatch, fake_repo, "release: v1.6.6 — harness")
+    # Even though pyproject == base, release: PRs short-circuit before we
+    # inspect git history.
+    _stub_git_show(
+        monkeypatch, '[project]\nname = "harness-python-react"\nversion = "1.6.6"\n'
+    )
+
+    assert cvb.main() == 0
+    assert "release: PR" in capsys.readouterr().out
+
+
+def test_main_uv_lock_mismatch_fails(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    (tmp_path / "pyproject.toml").write_text(
+        '[project]\nname = "harness-python-react"\nversion = "1.6.6"\n',
+        encoding="utf-8",
+    )
+    (tmp_path / "uv.lock").write_text(
+        '[[package]]\nname = "harness-python-react"\nversion = "1.6.5"\n'  # stale!
+        'source = { editable = "." }\n',
+        encoding="utf-8",
+    )
+    monkeypatch.chdir(tmp_path)
+    monkeypatch.setenv("GITHUB_BASE_REF", "develop")
+    _set_event_payload(monkeypatch, tmp_path, "feat: stale lock")
+    _stub_git_show(
+        monkeypatch, '[project]\nname = "harness-python-react"\nversion = "1.6.5"\n'
+    )
+
+    assert cvb.main() == 1
+    err = capsys.readouterr().out
+    assert "self-version disagree" in err
+
+
+def test_main_no_base_ref_skips(
+    fake_repo: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
+) -> None:
+    # Outside a PR run there is no GITHUB_BASE_REF; the gate degrades green.
+    monkeypatch.delenv("GITHUB_BASE_REF", raising=False)
+    monkeypatch.delenv("GITHUB_EVENT_PATH", raising=False)
+    assert cvb.main() == 0
+    assert "skipping" in capsys.readouterr().out
+
+
+def test_main_parse_error_exits_2(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    (tmp_path / "pyproject.toml").write_text(
+        '[project]\nname = "harness-python-react"\n# version missing\n',
+        encoding="utf-8",
+    )
+    (tmp_path / "uv.lock").write_text(
+        '[[package]]\nname = "harness-python-react"\nversion = "1.6.6"\n',
+        encoding="utf-8",
+    )
+    monkeypatch.chdir(tmp_path)
+    monkeypatch.setenv("GITHUB_BASE_REF", "develop")
+    _set_event_payload(monkeypatch, tmp_path, "feat: x")
+    _stub_git_show(
+        monkeypatch, '[project]\nname = "harness-python-react"\nversion = "1.6.5"\n'
+    )
+
+    assert cvb.main() == 2
+    assert "::error::" in capsys.readouterr().out
+
+
+# Ensure the loader didn't accidentally leave os.environ polluted.
+def test_environ_unaffected_by_loader() -> None:
+    # Sanity: nothing the script does at import time touches os.environ.
+    assert "CHECK_VERSION_BUMP_LOADED" not in os.environ
diff --git a/uv.lock b/uv.lock
index b6c0c1e..9ca9a26 100644
--- a/uv.lock
+++ b/uv.lock
@@ -328,7 +328,7 @@ wheels = [
 
 [[package]]
 name = "harness-python-react"
-version = "0.1.0"
+version = "0.2.0"
 source = { virtual = "." }
 dependencies = [
     { name = "fastapi" },

From 843c2537fa2f29399909e8db5eedfebe6fa94995 Mon Sep 17 00:00:00 2001
From: Constantinos <41453723+constk@users.noreply.github.com>
Date: Thu, 30 Apr 2026 20:24:41 +1000
Subject: [PATCH 4/9] chore: src/ README audit gate + 5 package READMEs (#126,
 #152) (#71)

---
 .github/branch-protection/develop.json |   1 +
 .github/branch-protection/main.json    |   1 +
 .github/scripts/check_src_readmes.py   | 142 +++++++++++++++++++++++++
 .github/workflows/ci.yml               |  13 +++
 pyproject.toml                         |   2 +-
 src/api/README.md                      |  18 ++++
 src/eval/README.md                     |  22 ++++
 src/models/README.md                   |  17 +++
 src/observability/README.md            |  20 ++++
 src/tools/README.md                    |  19 ++++
 uv.lock                                |   2 +-
 11 files changed, 255 insertions(+), 2 deletions(-)
 create mode 100644 .github/scripts/check_src_readmes.py
 create mode 100644 src/api/README.md
 create mode 100644 src/eval/README.md
 create mode 100644 src/models/README.md
 create mode 100644 src/observability/README.md
 create mode 100644 src/tools/README.md

diff --git a/.github/branch-protection/develop.json b/.github/branch-protection/develop.json
index 6187955..9543e69 100644
--- a/.github/branch-protection/develop.json
+++ b/.github/branch-protection/develop.json
@@ -12,6 +12,7 @@
       "Version bump check",
       "Action pinning audit",
       "Tests required",
+      "src/ README audit",
       "Frontend Build",
       "Frontend Quality",
       "Branch-protection contexts sync",
diff --git a/.github/branch-protection/main.json b/.github/branch-protection/main.json
index fecf6fd..d7bafaa 100644
--- a/.github/branch-protection/main.json
+++ b/.github/branch-protection/main.json
@@ -12,6 +12,7 @@
       "Version bump check",
       "Action pinning audit",
       "Tests required",
+      "src/ README audit",
       "Frontend Build",
       "Frontend Quality",
       "Branch-protection contexts sync",
diff --git a/.github/scripts/check_src_readmes.py b/.github/scripts/check_src_readmes.py
new file mode 100644
index 0000000..9394271
--- /dev/null
+++ b/.github/scripts/check_src_readmes.py
@@ -0,0 +1,142 @@
+#!/usr/bin/env python3
+"""Enforce the per-package README rule from `CLAUDE.md`.
+
+`CLAUDE.md` *Code standards*: *"Each `src/` directory has a README
+explaining its purpose and key interfaces."* As `src/` grows, READMEs
+go missing silently — and even when one exists, it can degrade into
+unstructured prose that no contributor reads. This script audits both
+shape and substance.
+
+Behaviour:
+
+- Walks every subdirectory under `src/` (recursive, skipping
+  `__pycache__`).
+- A subdirectory must have a `README.md` when it contains at least one
+  `.py` file other than `__init__.py`. Empty directories and
+  `__init__.py`-only namespace packages are exempt — they have no
+  surface to document.
+- The README must be non-trivial: at least `MIN_BYTES = 200` bytes
+  after stripping leading/trailing whitespace. Catches the empty-stub
+  failure mode (#126).
+- The README must contain a `## Key interfaces` heading (or its
+  documented synonym `## Public surface`). #152 promoted the gate
+  from presence + size to presence + structure: a 200-byte unrelated
+  paragraph passed pre-#152, but doesn't actually document the
+  package's public surface. The Purpose statement is encoded
+  positionally as the H1 heading + immediately-following paragraph
+  rather than as an explicit `## Purpose` heading — that's the
+  existing convention across all seven packages and the natural
+  markdown shape; a separate `## Purpose` heading would duplicate
+  what the H1 already establishes.
+
+There is **no exemption mechanism**, mirroring `check_file_length.py`
+(see `feedback_no_noqa`). If a future package legitimately needs a
+shorter README (e.g. a single-file helper with a self-explanatory
+filename), restructure rather than carve out an allowlist entry.
+
+Exit codes:
+  0 — every package with code has a non-trivial, structured README
+  1 — at least one package is missing a README, has a stub one, or
+      lacks the required heading
+  2 — `src/` does not exist (run from the wrong directory?)
+
+Usage (from repo root):
+
+    python .github/scripts/check_src_readmes.py
+"""
+
+from __future__ import annotations
+
+import re
+import sys
+from pathlib import Path
+
+SRC_ROOT = Path("src")
+MIN_BYTES = 200
+
+# Required heading shapes. Each tuple is "any-of" — a README satisfies the
+# rule if it contains at least one matching heading. Match is anchored to
+# line start, case-insensitive, allows `#` levels 2-4 so a deeply nested
+# subsection still counts.
+KEY_INTERFACES_HEADING = re.compile(
+    r"^#{2,4}\s+(Key interfaces|Public surface)\b",
+    re.IGNORECASE | re.MULTILINE,
+)
+
+
+def _normalised(path: Path) -> str:
+    return path.as_posix()
+
+
+def _has_documentable_code(directory: Path) -> bool:
+    """True when the directory has at least one `.py` file beyond `__init__.py`."""
+    for entry in directory.iterdir():
+        if entry.is_file() and entry.suffix == ".py" and entry.name != "__init__.py":
+            return True
+    return False
+
+
+def _readme_failure(directory: Path) -> str | None:
+    """Return an error message if the directory's README is missing or stub-sized."""
+    readme = directory / "README.md"
+    if not readme.is_file():
+        return (
+            f"::error file={_normalised(directory)}::missing README.md. "
+            "`CLAUDE.md` requires every `src/` package to document its "
+            "purpose and key interfaces."
+        )
+    body = readme.read_text(encoding="utf-8").strip()
+    if len(body.encode("utf-8")) < MIN_BYTES:
+        return (
+            f"::error file={_normalised(readme)}::README.md is shorter than "
+            f"{MIN_BYTES} bytes after stripping whitespace. Add purpose + "
+            "key-interfaces text — a single heading does not satisfy the rule."
+        )
+    if not KEY_INTERFACES_HEADING.search(body):
+        return (
+            f"::error file={_normalised(readme)}::README.md missing a "
+            "`## Key interfaces` heading (or the synonym `## Public surface`). "
+            "Per CLAUDE.md the README must document the package's public "
+            "surface; the heading anchors that section so contributors can "
+            "find it. Add the heading and list the package's exported names."
+        )
+    return None
+
+
+def main() -> int:
+    if not SRC_ROOT.is_dir():
+        print(f"::error::{SRC_ROOT.as_posix()} not found; run from repo root")
+        return 2
+
+    audited: list[Path] = []
+    failures: list[str] = []
+
+    for directory in sorted(p for p in SRC_ROOT.rglob("*") if p.is_dir()):
+        if directory.name == "__pycache__":
+            continue
+        if not _has_documentable_code(directory):
+            continue
+        audited.append(directory)
+        message = _readme_failure(directory)
+        if message is not None:
+            failures.append(message)
+
+    if failures:
+        for line in failures:
+            print(line)
+        print(
+            f"\n{len(failures)} package(s) failed the README audit. "
+            "Fix in this PR — there is no exemption mechanism, see the "
+            "module docstring."
+        )
+        return 1
+
+    print(
+        f"src/ README audit OK — {len(audited)} package(s) documented "
+        f"(min {MIN_BYTES} bytes, `## Key interfaces` heading required)."
+    )
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index fcda20a..2f0b2cb 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -151,6 +151,19 @@ jobs:
           python-version: "3.14"
       - run: python .github/scripts/check_tests_present.py
 
+  src-readmes:
+    name: src/ README audit
+    runs-on: ubuntu-latest
+    # CLAUDE.md: every `src/` package documents its purpose + key
+    # interfaces. The audit checks shape (presence + min 200 bytes) and
+    # structure (`## Key interfaces` heading). No exemption mechanism.
+    steps:
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
+      - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065  # v5
+        with:
+          python-version: "3.14"
+      - run: python .github/scripts/check_src_readmes.py
+
   frontend-build:
     name: Frontend Build
     runs-on: ubuntu-latest
diff --git a/pyproject.toml b/pyproject.toml
index 27e014a..2e06350 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "harness-python-react"
-version = "0.2.0"
+version = "0.2.1"
 description = "Production-quality LLM-driven coding harness — Python (FastAPI) backend, Vite + React + TypeScript frontend."
 readme = "README.md"
 requires-python = ">=3.14"
diff --git a/src/api/README.md b/src/api/README.md
new file mode 100644
index 0000000..9bda5e9
--- /dev/null
+++ b/src/api/README.md
@@ -0,0 +1,18 @@
+# `src/api/`
+
+FastAPI HTTP surface for the harness scaffold. Owns request handling, route registration, lifespan hooks, and the in-memory session store. Imports from `src/observability/` (instrumentation), `src/models/` (Pydantic contracts) — never the other way around (enforced by `import-linter`).
+
+## Key interfaces
+
+- **`main.app`** — the `FastAPI()` instance with CORS middleware, OTel auto-instrumentation, and the v1 router mounted. Exposed for `uvicorn src.api.main:app` and the FastAPI `TestClient` fixtures in `tests/conftest.py`.
+- **`main.lifespan(app)`** — async context manager that runs `setup_tracing` → `setup_logging` → `instrument_httpx` → `instrument_fastapi` once on startup, then constructs the `SessionStore`. Single point of "what is process-globally configured?"
+- **`routes.router`** — `APIRouter(prefix="/api/v1")`. Every route a real client should hit lives here; un-versioned routes live on `app` directly (only the FastAPI auto-generated docs at `/docs` and `/redoc` qualify).
+- **`routes.health`** — `GET /api/v1/health` → `HealthResponse` (status + `importlib.metadata.version`-derived build version).
+- **`routes.echo`** — `GET /api/v1/echo?msg=...` → `EchoResponse`. Demonstrates the request/response contract pattern that real domain endpoints follow.
+- **`sessions.SessionStore`** — single-process in-memory dict mapping session id → conversation history. Plumbed onto `app.state.session_store` in lifespan; replace with a Redis or DB-backed implementation for multi-process deployments.
+
+## Conventions
+
+- Every route returns a `StrictModel` subclass — never a raw dict. The `extra="forbid"` config makes typos and renamed fields fail at construction, not three calls deep.
+- Routes are versioned under `/api/v1/`. Adding a v2 endpoint = a new router, never a breaking change to v1.
+- The CORS policy is wide-open in the scaffold so the Vite dev server proxy works on first run; tighten via config when a real auth layer arrives.
diff --git a/src/eval/README.md b/src/eval/README.md
new file mode 100644
index 0000000..088a8db
--- /dev/null
+++ b/src/eval/README.md
@@ -0,0 +1,22 @@
+# `src/eval/`
+
+Provider-agnostic evaluation harness — golden QA cases driven by a caller-supplied `answer_fn`, three tolerance modes (exact / numeric / semantic), and a `Protocol`-based LLM-judge seam so no SDK leaks into this layer. Designed to be runnable on a fresh clone without LLM credentials (the example case uses `exact_match`); the nightly workflow opt-in is documented in `docs/EVAL_HARNESS.md`.
+
+## Key interfaces
+
+- **`models.EvalCase`** — Pydantic shape of an entry in `eval/golden_qa.json`: `id`, `question`, `expected_answer`, `tolerance`, plus optional `category` / `difficulty` / `expected_tools` / `notes`.
+- **`models.EvalResult`** — per-case outcome: actual answer, latency, pass/fail, tolerance score, failure reason.
+- **`runner.load_golden_dataset(path=None)`** — pure JSON loader. Standalone so call sites can introspect the dataset without paying for an `EvalRunner`.
+- **`runner.EvalRunner(answer_fn, judge_client=None, judge_model="")`** — the orchestrator. `answer_fn: Callable[[str], str]` is the single seam — wire your agent loop, direct LLM client, or stub. `judge_client` implements the `judge.LLMClient` Protocol and is only consulted for `semantic_similar` cases.
+  - **`evaluate(case)`** — run one case; returns `EvalResult`.
+  - **`evaluate_all()`** — load the golden dataset, evaluate every case, return the list.
+- **`judge.LLMClient`** — `Protocol` with one method: `complete_json(*, model, prompt) -> str`. Concrete adapters live in your agent code; the harness never imports OpenAI/Anthropic/Azure SDKs.
+- **`judge.evaluate_semantic_similarity(question, expected, actual, client, model)`** — calls the judge, returns `(score, explanation)`. Returns `(None, "no LLM client configured")` when `client is None` (inconclusive — runner treats it as pass with an explanatory note rather than a hard fail).
+- **`report.generate_report(results)`** — markdown summary: overall accuracy, per-category, per-difficulty, failure analysis with reason text.
+- **`__main__`** — `python -m src.eval` runs the dataset with an identity `answer_fn` (echoes the question) and prints the markdown report.
+
+## Conventions
+
+- **`coverage` `omit`s this package** — the eval suite is exercised by `eval/test_golden_qa.py`, not by `tests/`. Counting it would inflate misses on every PR that touches behaviour without re-running the eval workflow.
+- **No domain coupling.** The harness ships one trivial echo case; real users replace `eval/golden_qa.json` with their domain dataset and wire `answer_fn` to their agent.
+- **Tolerance picks happen per case**, not per runner. A dataset can mix `exact_match`, `numeric_close` (within 1%), and `semantic_similar` (LLM judge ≥ 0.8) cases freely.
diff --git a/src/models/README.md b/src/models/README.md
new file mode 100644
index 0000000..321e68d
--- /dev/null
+++ b/src/models/README.md
@@ -0,0 +1,17 @@
+# `src/models/`
+
+Pydantic contracts that cross module or process seams. Import-linter enforces that nothing outside this package matters here — `src.models` depends on **nothing in `src/`** (forbidden contract in `pyproject.toml`). The point: schema bugs surface at construction with clear `ValidationError`s, not as `AttributeError`s deep in the request path.
+
+## Key interfaces
+
+- **`_base.StrictModel`** — `BaseModel` subclass with `model_config = ConfigDict(extra="forbid")`. Inherit this for any contract that crosses a boundary; opt into stricter type coercion with `class Foo(StrictModel, strict=True)` when JSON coercion (UUID, int) is undesirable.
+- **`health.HealthResponse`** — `status: Literal["ok"]` + `version: str`. Returned by `GET /api/v1/health`.
+- **`session.SessionCreate`** / **`session.SessionInfo`** — the create-empty / public-info shapes used by `src.api.sessions.SessionStore`.
+- **`config.Settings`** — `pydantic_settings.BaseSettings` reading the four `LLM_*` env vars (`LLM_PROVIDER`, `LLM_API_KEY`, `LLM_BASE_URL`, `LLM_MODEL`). Provider-pluggable seam — wire OpenAI, Anthropic, Azure, or vLLM by swapping the values without touching this code.
+- **`config.get_settings()`** — fresh `Settings` constructor (deliberately unmemoised so tests can re-construct after `monkeypatch.setenv`). Real callers should hold a single instance at startup.
+
+## Conventions
+
+- One module per logical contract group — `health`, `session`, `config`. Add a new file rather than appending to an existing one.
+- Per-class `strict=True` is the call site for any model that should reject `"3.14"` for a float; HTTP-boundary models often skip it because JSON requires the coercion.
+- `pyproject.toml` `[tool.ruff.lint.per-file-ignores]` adds `"src/models/**" = ["TCH003"]` because Pydantic needs runtime imports for type annotations.
diff --git a/src/observability/README.md b/src/observability/README.md
new file mode 100644
index 0000000..d621674
--- /dev/null
+++ b/src/observability/README.md
@@ -0,0 +1,20 @@
+# `src/observability/`
+
+OpenTelemetry SDK setup, OTLP exporter wiring, structured-JSON logging with trace correlation, and span helpers built around the OTel GenAI semantic conventions. Wired once from `src.api.main.lifespan` so the rest of the codebase calls into a configured tracer / logger without orchestrating startup.
+
+## Key interfaces
+
+- **`tracing.setup_tracing()`** — creates the global `TracerProvider`, attaches a `BatchSpanProcessor` with the OTLP gRPC exporter (or `ConsoleSpanExporter` when `OTEL_EXPORTER=console`). Reads `OTEL_SERVICE_NAME` (default `harness-python-react`) and `OTEL_EXPORTER_OTLP_ENDPOINT` (default `http://localhost:4317`).
+- **`tracing.instrument_fastapi(app)`** + **`tracing.instrument_httpx()`** — auto-instrumentation hooks. Each FastAPI request and outbound httpx call gets a span with the standard semantic-convention attributes.
+- **`tracing.get_tracer(name)`** — retrieve a tracer by module name when manual instrumentation is needed.
+- **`logging.setup_logging(level=None)`** — configures stdlib `logging` to emit single-line JSON via `_JSONFormatter`. Reads `LOG_LEVEL` env when no argument is passed. Loops in `LoggingInstrumentor` so the `otelTraceID` / `otelSpanID` fields land on every record.
+- **`logging._JSONFormatter`** — the formatter; tested directly in `tests/test_observability.py` for the trace-correlation contract.
+- **`spans.agent_span(name, attributes=None)`** — context manager that opens a span, sets initial attributes from a `Mapping`, yields the live span for further mutation. Use this rather than calling `tracer.start_as_current_span` directly so the attribute-shape stays consistent.
+- **`spans.set_span_attributes(span, **kwargs)`** — set multiple attributes; `None` values are silently skipped.
+- **`spans.GENAI_*` / `DB_*` constants** — exported attribute-key constants for the OTel GenAI + database semantic conventions. Use these instead of raw strings so a typo is a `NameError`, not a silently-different attribute.
+
+## Conventions
+
+- **Semconv keys only.** The constants at the top of `spans.py` are the single source of attribute names. Adding a custom `agent.foo` attribute is a hard no — extend the semconv list (or wait for upstream OTel to bless one).
+- **Provider-agnostic exporter.** `OTEL_EXPORTER_OTLP_ENDPOINT` is the standard variable; `docker-compose.yml` sets it to `http://jaeger:4317` in the compose network.
+- **Lifespan-only setup.** Don't call `setup_tracing()` from anywhere except `src.api.main.lifespan` — duplicate calls trigger OTel's "Overriding of current TracerProvider" warning, and tests rely on the test-fixture variant that attaches an in-memory exporter to whatever provider exists.
diff --git a/src/tools/README.md b/src/tools/README.md
new file mode 100644
index 0000000..ad54658
--- /dev/null
+++ b/src/tools/README.md
@@ -0,0 +1,19 @@
+# `src/tools/`
+
+Generic tool registry — the dispatcher pattern an LLM-driven agent uses to call typed tools. Each tool is a function `StrictModel -> StrictModel`; the registry maps a name to `(input_schema, callable)` and validates the dict-shaped input against the schema before dispatch. Layer-wise sits below `agent` / `api` / `eval`, above `data` / `models` (enforced by `import-linter`).
+
+## Key interfaces
+
+- **`registry.Registry`** — instance class. Construct one per agent/test fixture; the module also exposes a global `registry` singleton that the example `echo_tool` self-registers into at import time.
+  - **`register(name, input_schema)`** — decorator factory. Wraps a function and inserts the `(input_schema, fn)` pair under `name`. Raises `ValueError` on duplicate registration.
+  - **`dispatch(name, raw_input)`** — `name`-lookup → `input_schema.model_validate(raw_input)` → call. Pydantic `ValidationError` propagates on bad input (wrong type or unknown keys via `StrictModel.extra="forbid"`).
+  - **`names()`** — sorted list of registered tool names; cheap introspection for the agent's tool-listing prompt.
+- **`registry.UnknownToolError`** — `KeyError` subclass raised by `dispatch` when *name* isn't registered. Caught and rendered as a tool-call-error event by the agent loop.
+- **`registry.registry`** — the module-global `Registry` instance. Real agents use this so the `echo_tool` is reachable from any consumer; tests construct private `Registry()` instances to avoid cross-test contamination.
+- **`registry.echo_tool`** + **`EchoToolInput`** + **`EchoToolOutput`** — example tool demonstrating the layer. Ships pre-registered under name `"echo"` to give a working dispatch path on a fresh clone.
+
+## Conventions
+
+- **Inputs and outputs are `StrictModel`s.** Avoid raw `dict`s — the `extra="forbid"` posture is the entire reason this pattern beats hand-rolled `if/elif` dispatch over JSON.
+- **Tools are pure functions** of their input. State the tool needs (DB connection, HTTP client) is injected via partial / closure at registration time, not via module globals.
+- **Add a tool = add a module.** Real tools beyond the example get their own file under `src/tools/` and self-register the same way `echo_tool` does. Keep `registry.py` itself focused on the dispatcher.
diff --git a/uv.lock b/uv.lock
index 9ca9a26..df07bc8 100644
--- a/uv.lock
+++ b/uv.lock
@@ -328,7 +328,7 @@ wheels = [
 
 [[package]]
 name = "harness-python-react"
-version = "0.2.0"
+version = "0.2.1"
 source = { virtual = "." }
 dependencies = [
     { name = "fastapi" },

From 461cab6277997717e26a6c2c66bf2e78bbd135bb Mon Sep 17 00:00:00 2001
From: Constantinos <41453723+constk@users.noreply.github.com>
Date: Thu, 30 Apr 2026 22:19:29 +1000
Subject: [PATCH 5/9] chore: aspirational-ticket gate + INVARIANTS marker
 convention (#133, #153) (#72)

---
 .github/branch-protection/develop.json        |   1 +
 .github/branch-protection/main.json           |   1 +
 .github/scripts/check_aspirational_tickets.py | 183 ++++++++++++++++++
 .github/workflows/ci.yml                      |  15 ++
 docs/INVARIANTS.md                            |  14 ++
 pyproject.toml                                |   2 +-
 uv.lock                                       |   2 +-
 7 files changed, 216 insertions(+), 2 deletions(-)
 create mode 100644 .github/scripts/check_aspirational_tickets.py

diff --git a/.github/branch-protection/develop.json b/.github/branch-protection/develop.json
index 9543e69..54cbbe7 100644
--- a/.github/branch-protection/develop.json
+++ b/.github/branch-protection/develop.json
@@ -13,6 +13,7 @@
       "Action pinning audit",
       "Tests required",
       "src/ README audit",
+      "Aspirational ticket cite",
       "Frontend Build",
       "Frontend Quality",
       "Branch-protection contexts sync",
diff --git a/.github/branch-protection/main.json b/.github/branch-protection/main.json
index d7bafaa..455b74a 100644
--- a/.github/branch-protection/main.json
+++ b/.github/branch-protection/main.json
@@ -13,6 +13,7 @@
       "Action pinning audit",
       "Tests required",
       "src/ README audit",
+      "Aspirational ticket cite",
       "Frontend Build",
       "Frontend Quality",
       "Branch-protection contexts sync",
diff --git a/.github/scripts/check_aspirational_tickets.py b/.github/scripts/check_aspirational_tickets.py
new file mode 100644
index 0000000..ded7013
--- /dev/null
+++ b/.github/scripts/check_aspirational_tickets.py
@@ -0,0 +1,183 @@
+#!/usr/bin/env python3
+"""Enforce the *aspirational invariants are ticketed* rule from `docs/HARNESS.md`.
+
+`docs/HARNESS.md`: *"Do not merge an 'aspirational' invariant without a
+follow-up issue."* `docs/INVARIANTS.md` documents this with the
+`*Aspirational:*` marker line per invariant. Until #133 this rule was
+enforced by reviewer memory only.
+
+Behaviour:
+
+- Walks `docs/INVARIANTS.md`, reading line by line.
+- A line is treated as an *aspirational marker* when it starts with
+  `*Aspirational` (one leading asterisk) or `**Aspirational**` (two).
+  Mid-sentence prose like "items marked *aspirational* are…" is
+  ignored — the marker shape is documented in
+  `docs/INVARIANTS.md`.
+- For each marker line, requires at least one `#NNN` reference on the
+  same line; fails when none is present.
+- When `GITHUB_TOKEN` and `GITHUB_REPOSITORY` are set, fetches each
+  cited issue via the REST API and emits a `::warning::` (not
+  failure) when the ticket is closed. API failures (network, rate
+  limit, 404) downgrade to a warning — the gate's job is to catch
+  drift in the doc, not to be a transient-CI tripwire.
+- When `ASPIRATIONAL_STRICT=1` is set, a closed-ticket cite is
+  promoted from `::warning::` to a hard failure (#153). Default off.
+  This is the toggle to flip when the project decides "an aspirational
+  invariant whose cite is closed must be promoted to enforced or
+  refiled in this PR" — useful as the harness matures and accumulated
+  closed-cite drift would otherwise sit unread. API failures still
+  downgrade to a warning even under strict mode (the gate is still
+  not a transient-CI tripwire — only documented closed state hits).
+
+There is **no exemption mechanism** (see `feedback_no_noqa`). If an
+entry should not be flagged, it is not aspirational — reword it with
+a different marker (e.g. `**Production note:**` for future product
+evolution that's not a yet-to-be-enforced rule).
+
+Exit codes:
+  0 — every aspirational marker cites at least one open / unverified ticket
+  1 — at least one marker has no ticket cite, or (strict mode) has a
+      closed cite
+  2 — script-level error (`docs/INVARIANTS.md` not found)
+
+Usage (from repo root):
+
+    python .github/scripts/check_aspirational_tickets.py
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import re
+import sys
+import urllib.error
+import urllib.request
+from pathlib import Path
+
+INVARIANTS_DOC = Path("docs/INVARIANTS.md")
+
+# A marker line *starts* with one or two asterisks immediately followed by
+# `Aspirational` and a word boundary. Avoids picking up mid-sentence prose
+# such as `Items marked *aspirational* are rules…` (lower-case + non-anchored).
+_MARKER_RE = re.compile(r"^\*{1,2}Aspirational\b")
+_TICKET_RE = re.compile(r"#(\d+)")
+
+
+def _find_markers(text: str) -> list[tuple[int, str]]:
+    """Return (1-indexed line number, line text) for each aspirational marker."""
+    found: list[tuple[int, str]] = []
+    for index, line in enumerate(text.splitlines(), start=1):
+        if _MARKER_RE.match(line.lstrip()):
+            found.append((index, line.strip()))
+    return found
+
+
+def _issue_state(repo: str, number: str, token: str) -> str | None:
+    """Return the issue state ("open" / "closed") or None on any API failure."""
+    url = f"https://api.github.com/repos/{repo}/issues/{number}"
+    req = urllib.request.Request(  # noqa: S310 — fixed api.github.com host
+        url,
+        headers={
+            "Authorization": f"Bearer {token}",
+            "Accept": "application/vnd.github+json",
+            "X-GitHub-Api-Version": "2022-11-28",
+        },
+    )
+    try:
+        with urllib.request.urlopen(req, timeout=5) as response:  # noqa: S310
+            payload = json.loads(response.read().decode("utf-8"))
+    except urllib.error.URLError, TimeoutError, json.JSONDecodeError:
+        return None
+    state = payload.get("state")
+    return state if isinstance(state, str) else None
+
+
+def _check_closed_cites(
+    line_number: int,
+    tickets: list[str],
+    repo: str,
+    token: str,
+    *,
+    strict: bool,
+) -> list[str]:
+    """Return failure messages (strict only); print warnings otherwise."""
+    extra_failures: list[str] = []
+    for ticket in tickets:
+        state = _issue_state(repo, ticket, token)
+        if state != "closed":
+            continue
+        severity = "error" if strict else "warning"
+        message = (
+            f"::{severity} file={INVARIANTS_DOC.as_posix()},line={line_number}::"
+            f"cited ticket #{ticket} is closed. Promote the invariant to "
+            "enforced (and remove the marker), or refile with a fresh ticket."
+        )
+        if strict:
+            extra_failures.append(message)
+        else:
+            print(message)
+    return extra_failures
+
+
+def main() -> int:
+    if not INVARIANTS_DOC.is_file():
+        print(f"::error::{INVARIANTS_DOC.as_posix()} not found; run from repo root")
+        return 2
+
+    text = INVARIANTS_DOC.read_text(encoding="utf-8")
+    markers = _find_markers(text)
+
+    if not markers:
+        print("No aspirational markers found in docs/INVARIANTS.md.")
+        return 0
+
+    failures: list[str] = []
+    repo = os.environ.get("GITHUB_REPOSITORY", "")
+    token = os.environ.get("GITHUB_TOKEN", "")
+    can_check_state = bool(repo and token)
+    strict_closed = os.environ.get("ASPIRATIONAL_STRICT", "") == "1"
+
+    for line_number, line in markers:
+        tickets = _TICKET_RE.findall(line)
+        if not tickets:
+            failures.append(
+                f"::error file={INVARIANTS_DOC.as_posix()},line={line_number}::"
+                "aspirational marker has no `#NNN` ticket reference. Add one or "
+                "reword (see the doc's *How to add an invariant* footer)."
+            )
+            continue
+        print(
+            f"{INVARIANTS_DOC.as_posix()}:{line_number} — "
+            f"aspirational, cites: {', '.join('#' + t for t in tickets)}"
+        )
+        if can_check_state:
+            failures.extend(
+                _check_closed_cites(
+                    line_number, tickets, repo, token, strict=strict_closed
+                )
+            )
+
+    if failures:
+        for line in failures:
+            print(line)
+        print(
+            f"\n{len(failures)} aspirational marker(s) missing a ticket cite. "
+            "Fix in this PR — there is no exemption mechanism, see the "
+            "module docstring."
+        )
+        return 1
+
+    suffix_parts: list[str] = []
+    if can_check_state:
+        suffix_parts.append("with state lookup")
+    if strict_closed:
+        suffix_parts.append("strict closed-cite mode")
+    suffix = f" ({', '.join(suffix_parts)})" if suffix_parts else ""
+    print(f"Aspirational-ticket audit OK — {len(markers)} marker(s) checked{suffix}.")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 2f0b2cb..0c9bf08 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -151,6 +151,21 @@ jobs:
           python-version: "3.14"
       - run: python .github/scripts/check_tests_present.py
 
+  aspirational-tickets:
+    name: Aspirational ticket cite
+    runs-on: ubuntu-latest
+    # docs/INVARIANTS.md: every `*Aspirational` / `**Aspirational**` marker
+    # line cites a `#NNN` ticket; closed cites warn (or fail under
+    # ASPIRATIONAL_STRICT=1). GITHUB_TOKEN enables ticket-state lookup.
+    steps:
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
+      - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065  # v5
+        with:
+          python-version: "3.14"
+      - env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: python .github/scripts/check_aspirational_tickets.py
+
   src-readmes:
     name: src/ README audit
     runs-on: ubuntu-latest
diff --git a/docs/INVARIANTS.md b/docs/INVARIANTS.md
index 093305b..74c3010 100644
--- a/docs/INVARIANTS.md
+++ b/docs/INVARIANTS.md
@@ -48,3 +48,17 @@ Add invariants below as your domain stabilises. Each entry should describe:
 - *Enforced by:* test, review, or specific CI job.
 
 Examples of the kind of invariant that earns a slot here: a domain-specific data contract that must validate at ingestion, a security boundary that must not log PII, a tool-call protocol that the agent must follow before the LLM emits a final response.
+
+---
+
+## How to add an invariant
+
+Add a new `## N. <rule>` section at the bottom of slots 6+. Each entry has three lines:
+
+- The rule, one sentence.
+- **Where:** module / config file path.
+- **Enforced by:** test name, CI job, or `review` if no automated check exists yet.
+
+If the invariant is not yet automated, add a marker line whose first non-whitespace characters are `*Aspirational` or `**Aspirational**` — both shapes are recognised by the `Aspirational ticket cite` CI job (`.github/scripts/check_aspirational_tickets.py`). The marker line MUST cite at least one `#NNN` ticket; the gate fails CI otherwise. When the cited ticket closes, promote the invariant to enforced in the same PR (delete the marker line, fill in `Enforced by:`). Set `ASPIRATIONAL_STRICT=1` on the gate's CI job to escalate closed-cite drift from `::warning::` to a hard failure.
+
+Use `**Production note:**` (not `**Aspirational**`) for forward-looking product evolution that is NOT a future-enforced rule — e.g. "this changes when multi-tenant lands". Production notes are not picked up by the gate.
diff --git a/pyproject.toml b/pyproject.toml
index 2e06350..1f84568 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "harness-python-react"
-version = "0.2.1"
+version = "0.2.2"
 description = "Production-quality LLM-driven coding harness — Python (FastAPI) backend, Vite + React + TypeScript frontend."
 readme = "README.md"
 requires-python = ">=3.14"
diff --git a/uv.lock b/uv.lock
index df07bc8..e4cbcb4 100644
--- a/uv.lock
+++ b/uv.lock
@@ -328,7 +328,7 @@ wheels = [
 
 [[package]]
 name = "harness-python-react"
-version = "0.2.1"
+version = "0.2.2"
 source = { virtual = "." }
 dependencies = [
     { name = "fastapi" },

From 6375817ac51aaf31477b541e5246a150a9cfdb2b Mon Sep 17 00:00:00 2001
From: Constantinos <41453723+constk@users.noreply.github.com>
Date: Fri, 1 May 2026 00:24:39 +1000
Subject: [PATCH 6/9] =?UTF-8?q?chore:=20subject-case=20sync=20extension=20?=
 =?UTF-8?q?=E2=80=94=20commitizen=20+=20pr-title=20(#128,=20#154)=20(#73)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .github/scripts/check_commit_types.py    | 202 ++++++++++++++----
 pyproject.toml                           |   4 +-
 tests/test_check_commit_types_subject.py | 250 +++++++++++++++++++++++
 uv.lock                                  |   2 +-
 4 files changed, 410 insertions(+), 48 deletions(-)
 create mode 100644 tests/test_check_commit_types_subject.py

diff --git a/.github/scripts/check_commit_types.py b/.github/scripts/check_commit_types.py
index c3d9ec9..b1fe7a7 100644
--- a/.github/scripts/check_commit_types.py
+++ b/.github/scripts/check_commit_types.py
@@ -1,22 +1,30 @@
 #!/usr/bin/env python3
-"""Verify the commit-type allowlist stays in sync across two configs.
+"""Verify the commit-type allowlist + subject-case rule stay in sync.
 
-Seven prefixes are allowed on commits and PR titles: feat, fix, docs,
-test, refactor, chore, release. Two places enforce that list today:
+Two configs hand-encode the same conventional-commit policy:
 
 1. ``[tool.commitizen].customize.schema_pattern`` in ``pyproject.toml`` —
    the commitizen regex (commit-msg hook, local).
 2. ``.github/workflows/pr-title.yml`` ``types:`` input to the
-   ``amannn/action-semantic-pull-request`` step — the PR-title CI check.
+   ``amannn/action-semantic-pull-request`` step plus its ``subjectPattern``
+   — the PR-title CI check.
 
 Both are hand-maintained. Add a type in one, forget the other, and the
 two layers drift: commits fail locally but PR titles pass (or vice
 versa). ``docs/DEVELOPMENT.md`` explicitly warns these must stay in
 sync, but prose warnings drift too.
 
-This script mirrors the ``check_required_contexts.py`` pattern from #72
-for this second drift class. Fails CI when the two sets disagree in
-either direction.
+This script enforces sync on **two axes**:
+
+- **Type allowlist** — the seven prefixes (feat, fix, docs, test,
+  refactor, chore, release). Mirrors the ``check_required_contexts.py``
+  pattern from #72.
+- **Subject-case rule** — the negative-lookahead constraint that rejects
+  Title-Case subjects (``feat: Add thing`` → reject; ``feat: add thing``
+  / ``feat: CI failure`` → accept). Added in #128 so commitizen rejects
+  Title Case at commit-msg time, not just at the CI gate.
+
+Fails CI when either axis disagrees in either direction.
 
 Usage (from repo root):
 
@@ -48,6 +56,19 @@
 # extraction when a future type contained digits or hyphens.
 _SCHEMA_ALTERNATION_RE = re.compile(r"\^\(([a-z0-9\-|]+)\)")
 
+# Matches the subject-case constraint between `:\s` and the trailing `.+`
+# in the commitizen schema_pattern. Tolerates three shapes seen across
+# revisions:
+#   :\s    — original #128 shape (single-space, susceptible to the
+#            `feat:  Add thing` double-space bypass).
+#   :\s+   — naive widening (still backtracks on Title-Case input).
+#   :\s++  — possessive quantifier (#154); the schema we want long-term
+#            because it forbids the lookahead-bypass via backtracking.
+# All three encode the same "after `:` then whitespace, then this lookahead"
+# semantics; the regex captures the lookahead chunk regardless.
+# Returns "" if no subject constraint is present (commitizen pre-#128 shape).
+_SCHEMA_SUBJECT_RE = re.compile(r":\\s\+{0,2}(.*?)\.\+$")
+
 
 def commitizen_types() -> set[str]:
     """Return the set of types allowed by the commitizen schema regex."""
@@ -83,28 +104,87 @@ def commitizen_types() -> set[str]:
     return types
 
 
+def commitizen_subject_pattern() -> str:
+    """Extract the subject-case constraint from commitizen's schema_pattern.
+
+    The schema_pattern shape (post-#128):
+        ^(feat|fix|...)(\\([\\w\\-]+\\))?!?:\\s(?![A-Z][a-z]).+
+
+    Returns the chunk between ``:\\s`` and the trailing ``.+`` — i.e. the
+    negative-lookahead constraint on the subject. Returns "" when no
+    subject constraint is present (commitizen pre-#128 shape).
+    """
+    data = tomllib.loads(PYPROJECT.read_text(encoding="utf-8"))
+    schema: str = (
+        data.get("tool", {})
+        .get("commitizen", {})
+        .get("customize", {})
+        .get("schema_pattern", "")
+    )
+    if not schema:
+        # Same error commitizen_types() raises — caller already enforces.
+        return ""
+    match = _SCHEMA_SUBJECT_RE.search(schema)
+    if not match:
+        return ""
+    return match.group(1)
+
+
 def pr_title_types() -> set[str]:
     """Return the set of types declared in the pr-title workflow."""
+    return _pr_title_field("types", _parse_types)  # type: ignore[return-value]
+
+
+def pr_title_subject_pattern() -> str:
+    """Return the subject-case constraint declared in the pr-title workflow.
+
+    Strips the leading ``^`` anchor and the trailing ``.+$`` from the
+    YAML ``subjectPattern`` field so the comparison with commitizen's
+    constraint is normalised. Returns "" when the field is absent.
+    """
+    raw: str = _pr_title_field("subjectPattern", lambda v: v or "", required=False)  # type: ignore[assignment]
+    if not raw:
+        return ""
+    pattern = re.sub(r"^\^", "", raw)
+    pattern = re.sub(r"\.\+\$$", "", pattern)
+    return pattern
+
+
+def _parse_types(value: str) -> set[str]:
+    """Parse the YAML ``types`` field (newline-separated string) into a set."""
+    types = {line.strip() for line in value.splitlines() if line.strip()}
+    if not types:
+        msg = (
+            f"`types:` block in {PR_TITLE_YML} is empty or "
+            "whitespace-only. Expected at least one commit type per line."
+        )
+        raise ValueError(msg)
+    return types
+
+
+def _pr_title_field(
+    name: str,
+    parse: object,
+    *,
+    required: bool = True,
+) -> object:
+    """Extract a single field from the action-semantic-pull-request step."""
     data = yaml.safe_load(PR_TITLE_YML.read_text(encoding="utf-8"))
     for job in data.get("jobs", {}).values():
         for step in job.get("steps", []):
             uses = step.get("uses", "")
             if "action-semantic-pull-request" in uses:
-                types_block: str = step.get("with", {}).get("types", "")
-                types = {
-                    line.strip() for line in types_block.splitlines() if line.strip()
-                }
-                # An empty or whitespace-only `types:` block would return an
-                # empty set and trivially match an empty commitizen set —
-                # masking a real config error. Fail loudly instead (#92).
-                if not types:
-                    msg = (
-                        f"`types:` block in {PR_TITLE_YML} is empty or "
-                        "whitespace-only. Expected at least one commit type "
-                        "per line."
-                    )
-                    raise ValueError(msg)
-                return types
+                value = step.get("with", {}).get(name)
+                if value is None:
+                    if required:
+                        msg = (
+                            f"`with.{name}` not found in the "
+                            "action-semantic-pull-request step. Update this "
+                            "script if the action's input names changed."
+                        )
+                        raise ValueError(msg)
+                    return ""
+                return parse(value)  # type: ignore[operator]
     msg = (
         "Could not find an `amannn/action-semantic-pull-request` step in "
         f"{PR_TITLE_YML}. If the action was renamed or the file moved, "
@@ -114,38 +194,70 @@ def pr_title_types() -> set[str]:
 
 
 def main() -> int:
-    cz = commitizen_types()
-    pr = pr_title_types()
+    cz_types = commitizen_types()
+    pr_types = pr_title_types()
+    cz_subject = commitizen_subject_pattern()
+    pr_subject = pr_title_subject_pattern()
+
+    failed = False
 
     # Belt-and-braces safety net: both extractors raise on empty, but guard
     # against a future refactor that drops the raise (#92).
-    if not cz or not pr:
+    if not cz_types or not pr_types:
         print(
             "::error::One or both extractors returned empty; sync check cannot "
-            "proceed. commitizen_types() empty: "
-            f"{not cz}; pr_title_types() empty: {not pr}."
+            f"proceed. commitizen_types() empty: {not cz_types}; "
+            f"pr_title_types() empty: {not pr_types}."
         )
         return 1
 
-    if cz == pr:
-        print(f"Commit types in sync ({len(cz)} types): {sorted(cz)}")
-        return 0
+    if cz_types == pr_types:
+        print(f"Commit types in sync ({len(cz_types)} types): {sorted(cz_types)}")
+    else:
+        failed = True
+        print(
+            "::error::[tool.commitizen].customize.schema_pattern and "
+            ".github/workflows/pr-title.yml types are out of sync"
+        )
+        for name in sorted(cz_types - pr_types):
+            print(f"::error::  + in commitizen only: {name!r}")
+        for name in sorted(pr_types - cz_types):
+            print(f"::error::  - in pr-title.yml only: {name!r}")
+        print(
+            "\nFix: update both the schema_pattern in pyproject.toml AND "
+            "the `types` list in .github/workflows/pr-title.yml so they "
+            "contain the same type names. See docs/DEVELOPMENT.md#commit-messages."
+        )
+
+    if cz_subject == pr_subject:
+        if cz_subject:
+            print(f"Subject-case constraint in sync: {cz_subject!r}")
+        else:
+            # Both empty — older shape, before #128's subject-case landed in
+            # commitizen. Don't fail here; the `Lint PR title` workflow remains
+            # the single layer if commitizen drops back. Surface as a warning.
+            print(
+                "::warning::Both commitizen and pr-title.yml have empty "
+                "subject-case constraints. Per docs/DEVELOPMENT.md the rule "
+                "should be enforced at both layers — re-add `(?![A-Z][a-z])` "
+                "to commitizen's schema_pattern after `:\\s`."
+            )
+    else:
+        failed = True
+        print(
+            "::error::commitizen schema_pattern subject-case constraint "
+            "and pr-title.yml `subjectPattern` are out of sync"
+        )
+        print(f"::error::  commitizen extracted: {cz_subject!r}")
+        print(f"::error::  pr-title.yml extracted: {pr_subject!r}")
+        print(
+            "\nFix: keep both regexes equivalent after stripping anchors. "
+            "Commitizen's chunk lives between `:\\s` and `.+` in "
+            "schema_pattern; pr-title.yml's lives in the `subjectPattern` "
+            "field stripped of `^` and `.+$`."
+        )
 
-    print(
-        "::error::[tool.commitizen].customize.schema_pattern and "
-        ".github/workflows/pr-title.yml types are out of sync"
-    )
-    for name in sorted(cz - pr):
-        print(f"::error::  + in commitizen only: {name!r}")
-    for name in sorted(pr - cz):
-        print(f"::error::  - in pr-title.yml only: {name!r}")
-    print(
-        "\nFix: update both the schema_pattern in pyproject.toml AND "
-        "the `types` list in .github/workflows/pr-title.yml so they "
-        "contain the same type names. See docs/DEVELOPMENT.md#commit-messages "
-        "for the current allowed list."
-    )
-    return 1
+    return 1 if failed else 0
 
 
 if __name__ == "__main__":
diff --git a/pyproject.toml b/pyproject.toml
index 1f84568..ab267f3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "harness-python-react"
-version = "0.2.2"
+version = "0.2.3"
 description = "Production-quality LLM-driven coding harness — Python (FastAPI) backend, Vite + React + TypeScript frontend."
 readme = "README.md"
 requires-python = ">=3.14"
@@ -165,7 +165,7 @@ name = "cz_customize"
 
 [tool.commitizen.customize]
 schema = "<type>(<scope>): <subject>"
-schema_pattern = '^(feat|fix|docs|test|refactor|chore|release)(\([\w\-]+\))?!?:\s.+'
+schema_pattern = '^(feat|fix|docs|test|refactor|chore|release)(\([\w\-]+\))?!?:\s++(?![A-Z][a-z]).+'
 bump_pattern = '^(feat|fix|refactor)'
 bump_map = {feat = "MINOR", fix = "PATCH", refactor = "PATCH", chore = "PATCH", docs = "PATCH", test = "PATCH"}
 example = "feat: add an example feature"
diff --git a/tests/test_check_commit_types_subject.py b/tests/test_check_commit_types_subject.py
new file mode 100644
index 0000000..e1a8f11
--- /dev/null
+++ b/tests/test_check_commit_types_subject.py
@@ -0,0 +1,250 @@
+"""Tests for the subject-case sync extension to ``check_commit_types.py``.
+
+#128 added a second axis to the existing commit-type sync gate: the
+subject-case constraint must match between commitizen's ``schema_pattern``
+and pr-title.yml's ``subjectPattern``. Tests for the type-allowlist axis
+(#91 / #92) live in ``test_check_commit_types.py``; this sibling file
+isolates the subject-pattern tests so neither file approaches the
+300-line cap.
+"""
+
+from __future__ import annotations
+
+import importlib.util
+import re
+from pathlib import Path
+from typing import TYPE_CHECKING
+from unittest.mock import patch
+
+if TYPE_CHECKING:
+    from types import ModuleType
+
+    import pytest
+
+
+def _load_script() -> ModuleType:
+    script_path = (
+        Path(__file__).parent.parent / ".github" / "scripts" / "check_commit_types.py"
+    )
+    spec = importlib.util.spec_from_file_location("check_commit_types", script_path)
+    assert spec is not None
+    assert spec.loader is not None
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module
+
+
+cct = _load_script()
+
+
+# ---------------------------------------------------------------------------
+# commitizen_subject_pattern() — extracts the lookahead chunk from
+# pyproject.toml's schema_pattern between `:\s` and the trailing `.+`.
+# ---------------------------------------------------------------------------
+
+
+class TestCommitizenSubjectExtraction:
+    """commitizen_subject_pattern() extracts the right chunk from pyproject."""
+
+    def test_extracts_negative_lookahead_when_present(self, tmp_path: Path) -> None:
+        pyproject = tmp_path / "pyproject.toml"
+        pyproject.write_text(
+            "[tool.commitizen.customize]\n"
+            "schema_pattern = "
+            r"'^(feat|fix)(\([\w\-]+\))?!?:\s(?![A-Z][a-z]).+'"
+            "\n",
+            encoding="utf-8",
+        )
+        with patch.object(cct, "PYPROJECT", pyproject):
+            assert cct.commitizen_subject_pattern() == "(?![A-Z][a-z])"
+
+    def test_returns_empty_when_no_subject_constraint(self, tmp_path: Path) -> None:
+        """Pre-#128 schema shape (just `.+` after `:\\s`) returns empty."""
+        pyproject = tmp_path / "pyproject.toml"
+        pyproject.write_text(
+            "[tool.commitizen.customize]\n"
+            "schema_pattern = "
+            r"'^(feat|fix)(\([\w\-]+\))?!?:\s.+'"
+            "\n",
+            encoding="utf-8",
+        )
+        with patch.object(cct, "PYPROJECT", pyproject):
+            assert cct.commitizen_subject_pattern() == ""
+
+    def test_returns_empty_when_schema_absent(self, tmp_path: Path) -> None:
+        pyproject = tmp_path / "pyproject.toml"
+        pyproject.write_text("[tool.other]\nfoo = 1\n", encoding="utf-8")
+        with patch.object(cct, "PYPROJECT", pyproject):
+            assert cct.commitizen_subject_pattern() == ""
+
+
+# ---------------------------------------------------------------------------
+# pr_title_subject_pattern() — extracts the YAML subjectPattern field and
+# strips `^` / `.+$` anchors so the comparison with commitizen normalises.
+# ---------------------------------------------------------------------------
+
+
+class TestPrTitleSubjectExtraction:
+    """pr_title_subject_pattern() parses YAML and strips anchors."""
+
+    def test_extracts_and_strips_anchors(self, tmp_path: Path) -> None:
+        yml = tmp_path / "pr-title.yml"
+        yml.write_text(
+            "jobs:\n"
+            "  lint:\n"
+            "    steps:\n"
+            "      - uses: amannn/action-semantic-pull-request@abc\n"
+            "        with:\n"
+            "          types: |\n"
+            "            feat\n"
+            "          subjectPattern: '^(?![A-Z][a-z]).+$'\n",
+            encoding="utf-8",
+        )
+        with patch.object(cct, "PR_TITLE_YML", yml):
+            assert cct.pr_title_subject_pattern() == "(?![A-Z][a-z])"
+
+    def test_returns_empty_when_field_absent(self, tmp_path: Path) -> None:
+        yml = tmp_path / "pr-title.yml"
+        yml.write_text(
+            "jobs:\n"
+            "  lint:\n"
+            "    steps:\n"
+            "      - uses: amannn/action-semantic-pull-request@abc\n"
+            "        with:\n"
+            "          types: |\n"
+            "            feat\n",
+            encoding="utf-8",
+        )
+        with patch.object(cct, "PR_TITLE_YML", yml):
+            assert cct.pr_title_subject_pattern() == ""
+
+
+# ---------------------------------------------------------------------------
+# main() with subject-pattern axis
+# ---------------------------------------------------------------------------
+
+
+def _setup_both(
+    tmp_path: Path,
+    cz_subject: str,
+    pr_subject: str,
+) -> tuple[Path, Path]:
+    """Stub PYPROJECT + PR_TITLE_YML with the given subject-case patterns."""
+    cz_extra = cz_subject if cz_subject else ""
+    cz_pattern = (
+        r"^(feat|fix|docs|test|refactor|chore|release)(\([\w\-]+\))?!?:\s"
+        f"{cz_extra}.+"
+    )
+    pyproject = tmp_path / "pyproject.toml"
+    pyproject.write_text(
+        f"[tool.commitizen.customize]\nschema_pattern = '{cz_pattern}'\n",
+        encoding="utf-8",
+    )
+    pr_field = f"          subjectPattern: '^{pr_subject}.+$'\n" if pr_subject else ""
+    yml = tmp_path / "pr-title.yml"
+    yml.write_text(
+        "jobs:\n"
+        "  lint:\n"
+        "    steps:\n"
+        "      - uses: amannn/action-semantic-pull-request@abc\n"
+        "        with:\n"
+        "          types: |\n"
+        "            feat\n            fix\n            docs\n            test\n"
+        "            refactor\n            chore\n            release\n"
+        f"{pr_field}",
+        encoding="utf-8",
+    )
+    return pyproject, yml
+
+
+def test_main_subjects_match_passes(
+    tmp_path: Path, capsys: pytest.CaptureFixture[str]
+) -> None:
+    pyproject, yml = _setup_both(tmp_path, "(?![A-Z][a-z])", "(?![A-Z][a-z])")
+    with (
+        patch.object(cct, "PYPROJECT", pyproject),
+        patch.object(cct, "PR_TITLE_YML", yml),
+    ):
+        assert cct.main() == 0
+    out = capsys.readouterr().out
+    assert "Subject-case constraint in sync" in out
+
+
+def test_main_subject_drift_fails(
+    tmp_path: Path, capsys: pytest.CaptureFixture[str]
+) -> None:
+    """commitizen has the constraint, pr-title.yml doesn't → fail."""
+    pyproject, yml = _setup_both(tmp_path, "(?![A-Z][a-z])", "")
+    with (
+        patch.object(cct, "PYPROJECT", pyproject),
+        patch.object(cct, "PR_TITLE_YML", yml),
+    ):
+        assert cct.main() == 1
+    err = capsys.readouterr().out
+    assert "subject-case constraint" in err
+    assert "out of sync" in err
+
+
+def test_main_both_subjects_empty_warns_not_fails(
+    tmp_path: Path, capsys: pytest.CaptureFixture[str]
+) -> None:
+    """Both empty (pre-#128 shape) → warn, don't block."""
+    pyproject, yml = _setup_both(tmp_path, "", "")
+    with (
+        patch.object(cct, "PYPROJECT", pyproject),
+        patch.object(cct, "PR_TITLE_YML", yml),
+    ):
+        assert cct.main() == 0
+    out = capsys.readouterr().out
+    assert "::warning::" in out
+    assert "subject-case" in out
+
+
+# ---------------------------------------------------------------------------
+# #154 — schema rejects double-space-then-Title-Case (`feat:  Add thing`)
+# ---------------------------------------------------------------------------
+#
+# The schema_pattern landed in #128 used `:\s` (single whitespace), which the
+# negative lookahead `(?![A-Z][a-z])` then anchors to the *first* non-space
+# character. With `\s` consuming exactly one space, `feat:  Add thing` (two
+# spaces) bypassed the constraint: `\s` ate the first space, the lookahead
+# saw the second space (not `[A-Z]`), succeeded, and `.+` swallowed the
+# Title-Case subject. #154 widens to `\s+` so the lookahead anchors to the
+# first non-whitespace character regardless of run length.
+#
+# These tests exercise the LIVE pyproject.toml schema_pattern (not a fixture)
+# so a future regression that drops back to `\s` fails here.
+
+
+def _live_schema_pattern() -> re.Pattern[str]:
+    import tomllib  # local import — stdlib, only this test class needs it
+
+    data = tomllib.loads(cct.PYPROJECT.read_text(encoding="utf-8"))
+    schema: str = data["tool"]["commitizen"]["customize"]["schema_pattern"]
+    return re.compile(schema)
+
+
+class TestSchemaRejectsTitleCaseRegardlessOfSpaceCount:
+    """The negative lookahead must anchor to the first non-whitespace char."""
+
+    def test_single_space_lowercase_subject_passes(self) -> None:
+        assert _live_schema_pattern().fullmatch("feat: add thing")
+
+    def test_single_space_title_case_subject_fails(self) -> None:
+        assert not _live_schema_pattern().fullmatch("feat: Add thing")
+
+    def test_double_space_lowercase_subject_passes(self) -> None:
+        """Liberal on input shape — multi-space after `:` is unusual but not banned."""
+        assert _live_schema_pattern().fullmatch("feat:  add thing")
+
+    def test_double_space_title_case_subject_fails(self) -> None:
+        """The #154 regression — pre-fix this passed silently."""
+        assert not _live_schema_pattern().fullmatch("feat:  Add thing")
+
+    def test_triple_space_title_case_subject_fails(self) -> None:
+        """Liberal `\\s+` keeps anchoring regardless of run length."""
+        assert not _live_schema_pattern().fullmatch("feat:   Add thing")
+
+    def test_subject_starting_with_uppercase_acronym_passes(self) -> None:
+        """`CI failure` is two-uppercase-letters — lookahead `[A-Z][a-z]` ignores it."""
+        assert _live_schema_pattern().fullmatch("feat: CI failure")
diff --git a/uv.lock b/uv.lock
index e4cbcb4..5edabc5 100644
--- a/uv.lock
+++ b/uv.lock
@@ -328,7 +328,7 @@ wheels = [
 
 [[package]]
 name = "harness-python-react"
-version = "0.2.2"
+version = "0.2.3"
 source = { virtual = "." }
 dependencies = [
     { name = "fastapi" },

From 5b8284dfe07de974bf45ed8051b5a81177de7136 Mon Sep 17 00:00:00 2001
From: Constantinos <41453723+constk@users.noreply.github.com>
Date: Fri, 1 May 2026 09:00:00 +1000
Subject: [PATCH 7/9] chore: post-release CHANGELOG rollup automation (#150,
 #166, #167) (#74)

---
 .github/scripts/check_required_contexts.py |   4 +
 .github/scripts/rollup_changelog.py        | 268 +++++++++++++++++++++
 .github/workflows/changelog-rollup.yml     | 155 ++++++++++++
 CHANGELOG.md                               |  17 +-
 pyproject.toml                             |   2 +-
 tests/test_rollup_changelog.py             | 204 ++++++++++++++++
 uv.lock                                    |   2 +-
 7 files changed, 645 insertions(+), 7 deletions(-)
 create mode 100644 .github/scripts/rollup_changelog.py
 create mode 100644 .github/workflows/changelog-rollup.yml
 create mode 100644 tests/test_rollup_changelog.py

diff --git a/.github/scripts/check_required_contexts.py b/.github/scripts/check_required_contexts.py
index 518d946..0ff657a 100644
--- a/.github/scripts/check_required_contexts.py
+++ b/.github/scripts/check_required_contexts.py
@@ -51,6 +51,10 @@
         "Tag-triggered (v*.*.*); builds image, generates SBOM, publishes"
         " release. Never appears on PR check sets."
     ),
+    "changelog-rollup.yml": (
+        "workflow_run-triggered after release.yml + workflow_dispatch only;"
+        " opens its own roll-up PR (which goes through ci.yml as normal)."
+    ),
 }
 
 
diff --git a/.github/scripts/rollup_changelog.py b/.github/scripts/rollup_changelog.py
new file mode 100644
index 0000000..db3c5fc
--- /dev/null
+++ b/.github/scripts/rollup_changelog.py
@@ -0,0 +1,268 @@
+#!/usr/bin/env python3
+"""Roll up the CHANGELOG `[Unreleased]` section under a `[<version>]` heading.
+
+Triggered automatically by `.github/workflows/changelog-rollup.yml` after a
+successful `release.yml` run. Performs the mechanical edits that would
+otherwise be hand-rolled per release.
+
+Edits to `CHANGELOG.md`:
+
+1. Insert `## [<version>] - <date>` heading immediately after `## [Unreleased]`.
+2. Update `[Unreleased]: …/compare/<old>...HEAD` → `…/compare/<tag>...HEAD`.
+3. Insert `[<version>]: …/compare/<prior-tag>...<tag>` link in the footer.
+
+Edits to `pyproject.toml` and `uv.lock`:
+
+4. Bump `[project].version` PATCH (e.g. `0.2.10` → `0.2.11`). The release
+   tag's version IS the current dev version (release: PRs don't bump),
+   so the rollup PR is what advances develop into the next cycle.
+5. Sync the `[[package]] name = "harness-python-react"` self-version line
+   in `uv.lock` to match `pyproject.toml`. Same hand-edit pattern as
+   the version-bump gate enforces on regular PRs.
+
+Idempotency:
+
+- If a `## [<version>]` heading already exists, step 1 is skipped (the
+  rollup PR can be re-run without duplicating sections).
+- If a `[<version>]:` footer link already exists, step 3 is skipped.
+- The version-bump steps are idempotent only if pyproject's version still
+  equals the released tag; running twice on an already-bumped develop
+  would push it forward again. Workflow uses a fresh checkout so this
+  doesn't compound across re-runs.
+
+Modes:
+
+- Default: full roll-up (CHANGELOG edits + version bump). This is what
+  `changelog-rollup.yml` does after a release tag is cut.
+- `--no-bump`: CHANGELOG edits only, skip the version bump. Use when
+  pre-staging the CHANGELOG before a release PR is opened — develop's
+  current version is the about-to-be-released version, and the
+  post-release rollup is what advances develop into the next cycle.
+
+Edge case — no prior tag (first release):
+
+- `--prior-tag ""` produces a footer link of shape
+  `[<version>]: …/releases/tag/<tag>` (mirrors the existing `[1.0.0]`
+  / first-tag entry shape).
+
+Exit codes:
+  0 — file edits applied (or already-rolled-up; idempotent path)
+  1 — argument validation failure (bad version shape, etc.)
+  2 — file not found / write error / TOML parse error
+
+Usage:
+
+    python .github/scripts/rollup_changelog.py \\
+        --tag v0.3.0 --prior-tag v0.2.5 --date 2026-05-01
+"""
+
+from __future__ import annotations
+
+import argparse
+import re
+import sys
+import tomllib
+from pathlib import Path
+
+CHANGELOG = Path("CHANGELOG.md")
+PYPROJECT = Path("pyproject.toml")
+UV_LOCK = Path("uv.lock")
+
+REPO_PATH = "constk/harness-python-react"
+PACKAGE_NAME = "harness-python-react"
+COMPARE_URL_BASE = f"https://github.com/{REPO_PATH}/compare"
+RELEASES_TAG_BASE = f"https://github.com/{REPO_PATH}/releases/tag"
+
+_SEMVER_RE = re.compile(r"^v?(\d+)\.(\d+)\.(\d+)$")
+_DATE_RE = re.compile(r"^\d{4}-\d{2}-\d{2}$")
+
+
+def _strip_v(tag: str) -> str:
+    return tag[1:] if tag.startswith("v") else tag
+
+
+def _bump_patch(version: str) -> str:
+    """`0.2.10` → `0.2.11`. Raises ValueError on non-semver input."""
+    match = _SEMVER_RE.fullmatch(version)
+    if not match:
+        msg = f"unsupported semver shape: {version!r}"
+        raise ValueError(msg)
+    major, minor, patch = match.groups()
+    return f"{major}.{minor}.{int(patch) + 1}"
+
+
+def rollup_changelog_text(
+    text: str,
+    tag: str,
+    prior_tag: str,
+    date: str,
+) -> str:
+    """Pure-string transform — `text` → updated `text`. Idempotent."""
+    version = _strip_v(tag)
+    heading_re = re.compile(
+        rf"^## \[{re.escape(version)}\]\s+-\s+\d{{4}}-\d{{2}}-\d{{2}}",
+        re.MULTILINE,
+    )
+    if not heading_re.search(text):
+        # Trailing `\n\n` keeps the blank line between the new heading and
+        # its first subsection that every existing release section has.
+        # Without it the rendered diff reads `## [v0.3.0] - …\n### Features`
+        # which Keep-a-Changelog tolerates but is cosmetically inconsistent.
+        text = re.sub(
+            r"^## \[Unreleased\]\s*\n",
+            f"## [Unreleased]\n\n## [{version}] - {date}\n\n",
+            text,
+            count=1,
+            flags=re.MULTILINE,
+        )
+
+    text = re.sub(
+        r"^\[Unreleased\]:\s+(.*?/compare/)\S+?\.\.\.HEAD\s*$",
+        rf"[Unreleased]: \1{tag}...HEAD",
+        text,
+        count=1,
+        flags=re.MULTILINE,
+    )
+
+    if f"[{version}]:" not in text:
+        if prior_tag:
+            new_link = f"[{version}]: {COMPARE_URL_BASE}/{prior_tag}...{tag}"
+        else:
+            new_link = f"[{version}]: {RELEASES_TAG_BASE}/{tag}"
+        text = re.sub(
+            r"^(\[Unreleased\]:.*)$",
+            rf"\1\n{new_link}",
+            text,
+            count=1,
+            flags=re.MULTILINE,
+        )
+    return text
+
+
+def bump_pyproject_text(text: str, current: str, new: str) -> str:
+    """Replace `version = "<current>"` with `version = "<new>"` once."""
+    pattern = re.compile(rf'^version\s*=\s*"{re.escape(current)}"', re.MULTILINE)
+    if not pattern.search(text):
+        msg = (
+            f'pyproject.toml version line `version = "{current}"` not found; '
+            "either the rollup ran out-of-order or the file shape changed."
+        )
+        raise ValueError(msg)
+    return pattern.sub(f'version = "{new}"', text, count=1)
+
+
+def bump_uv_lock_text(text: str, current: str, new: str) -> str:
+    """Replace the project's self-version line in `uv.lock` once."""
+    pattern = re.compile(
+        rf'^name = "{re.escape(PACKAGE_NAME)}"\nversion = "{re.escape(current)}"\n',
+        re.MULTILINE,
+    )
+    if not pattern.search(text):
+        msg = (
+            f'uv.lock self-version line `version = "{current}"` not found '
+            f'under the `[[package]] name = "{PACKAGE_NAME}"` block.'
+        )
+        raise ValueError(msg)
+    return pattern.sub(f'name = "{PACKAGE_NAME}"\nversion = "{new}"\n', text, count=1)
+
+
+def _read_pyproject_version() -> str:
+    data = tomllib.loads(PYPROJECT.read_text(encoding="utf-8"))
+    version = data.get("project", {}).get("version", "")
+    if not isinstance(version, str) or not _SEMVER_RE.fullmatch(version):
+        msg = f"unable to read [project].version from {PYPROJECT}: {version!r}"
+        raise ValueError(msg)
+    return version
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description=__doc__.splitlines()[0])
+    parser.add_argument("--tag", required=True, help="released tag, e.g. v0.3.0")
+    parser.add_argument(
+        "--prior-tag",
+        default="",
+        help="previous tag for compare link; empty for first release",
+    )
+    parser.add_argument("--date", required=True, help="release date YYYY-MM-DD (UTC)")
+    parser.add_argument(
+        "--no-bump",
+        action="store_true",
+        help=(
+            "skip the pyproject.toml + uv.lock version bump — used when "
+            "pre-staging the CHANGELOG before the release tag is cut; the "
+            "post-release rollup is what advances develop into the next cycle"
+        ),
+    )
+    args = parser.parse_args()
+
+    if not _SEMVER_RE.fullmatch(args.tag):
+        print(f"::error::--tag must be vMAJOR.MINOR.PATCH; got {args.tag!r}")
+        return 1
+    if args.prior_tag and not _SEMVER_RE.fullmatch(args.prior_tag):
+        print(f"::error::--prior-tag must be vX.Y.Z or empty; got {args.prior_tag!r}")
+        return 1
+    if not _DATE_RE.fullmatch(args.date):
+        print(f"::error::--date must be YYYY-MM-DD; got {args.date!r}")
+        return 1
+
+    released_version = _strip_v(args.tag)
+    next_version = _bump_patch(released_version)
+
+    try:
+        current_version = _read_pyproject_version()
+    except (FileNotFoundError, ValueError) as exc:
+        print(f"::error::{exc}")
+        return 2
+    # Version-vs-tag sanity. Skipped under --no-bump because the prestage
+    # runs *before* the release tag is cut: develop's current version IS
+    # the about-to-be-released version, which matches `released_version`,
+    # but we don't want to bump it (the post-release rollup does that).
+    if not args.no_bump and current_version != released_version:
+        print(
+            f"::error::pyproject.toml version is {current_version!r} but tag is "
+            f"{args.tag!r} (expected {released_version!r}). The rollup workflow "
+            "must run against develop *as the release was cut from*; if develop "
+            "moved on, replay manually after rebasing."
+        )
+        return 1
+
+    try:
+        new_changelog = rollup_changelog_text(
+            CHANGELOG.read_text(encoding="utf-8"),
+            args.tag,
+            args.prior_tag,
+            args.date,
+        )
+        if not args.no_bump:
+            new_pyproject = bump_pyproject_text(
+                PYPROJECT.read_text(encoding="utf-8"),
+                current_version,
+                next_version,
+            )
+            new_uv_lock = bump_uv_lock_text(
+                UV_LOCK.read_text(encoding="utf-8"),
+                current_version,
+                next_version,
+            )
+    except (FileNotFoundError, ValueError) as exc:
+        print(f"::error::{exc}")
+        return 2
+
+    CHANGELOG.write_text(new_changelog, encoding="utf-8")
+    if not args.no_bump:
+        PYPROJECT.write_text(new_pyproject, encoding="utf-8")
+        UV_LOCK.write_text(new_uv_lock, encoding="utf-8")
+        print(
+            f"Rolled up CHANGELOG [Unreleased] under [{released_version}] - "
+            f"{args.date}; bumped {current_version} -> {next_version}."
+        )
+    else:
+        print(
+            f"Pre-staged CHANGELOG [Unreleased] under [{released_version}] - "
+            f"{args.date}; version bump deferred to the post-release rollup."
+        )
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/.github/workflows/changelog-rollup.yml b/.github/workflows/changelog-rollup.yml
new file mode 100644
index 0000000..3851077
--- /dev/null
+++ b/.github/workflows/changelog-rollup.yml
@@ -0,0 +1,155 @@
+name: Roll up CHANGELOG after release
+
+# Triggered automatically after `release.yml` succeeds. Performs the
+# mechanical CHANGELOG roll-up (move `[Unreleased]` entries under a
+# `[<version>]` heading, update footer compare links, bump pyproject.toml +
+# uv.lock to the next PATCH). Opens a `chore:` PR against develop for
+# owner review + admin-squash-merge.
+#
+# The script logic + edge cases live in
+# `.github/scripts/rollup_changelog.py` with full unit-test coverage in
+# `tests/test_rollup_changelog.py`.
+
+on:
+  workflow_run:
+    workflows: [Release]
+    types: [completed]
+  workflow_dispatch:
+    inputs:
+      tag:
+        description: "Released tag (e.g. v0.3.0) — required only for manual replay"
+        required: true
+        type: string
+
+permissions:
+  contents: write
+  pull-requests: write
+
+jobs:
+  rollup:
+    name: Open rollup PR
+    runs-on: ubuntu-latest
+    # `workflow_run` fires regardless of conclusion; gate so we only act on
+    # successful release runs (skip when release.yml itself failed).
+    if: >
+      github.event_name == 'workflow_dispatch' ||
+      github.event.workflow_run.conclusion == 'success'
+    steps:
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
+        with:
+          ref: develop
+          # full history needed for `git describe --abbrev=0 --tags <ref>^`
+          # to resolve the prior tag.
+          fetch-depth: 0
+          # Prefer RELEASE_BOT_TOKEN (a non-GITHUB_TOKEN identity) so the
+          # resulting branch push fires `pull_request` workflows on the
+          # auto-PR. Falls back to GITHUB_TOKEN when the secret isn't set —
+          # the auto-PR still opens, but its CI doesn't run until a user
+          # pushes a commit on top.
+          token: ${{ secrets.RELEASE_BOT_TOKEN || secrets.GITHUB_TOKEN }}
+
+      - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065  # v5
+        with:
+          python-version: "3.14"
+
+      - name: Resolve tags + date
+        id: resolve
+        run: |
+          set -euo pipefail
+          if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
+            TAG="${{ inputs.tag }}"
+          else
+            # release.yml runs on tag pushes; the head_branch on the
+            # workflow_run payload carries the tag name in that case.
+            TAG="${{ github.event.workflow_run.head_branch }}"
+          fi
+          if [ -z "${TAG}" ]; then
+            echo "::error::could not resolve released tag from trigger payload"
+            exit 1
+          fi
+          # Resolve the tag's prior tag. `git describe ... <tag>^` errors when
+          # there is no prior — catch and treat as empty (first release).
+          if PRIOR=$(git describe --abbrev=0 --tags --match 'v*.*.*' "${TAG}^" 2>/dev/null); then
+            echo "prior tag: ${PRIOR}"
+          else
+            PRIOR=""
+            echo "no prior tag (first release)"
+          fi
+          # Resolve the release date from the released tag's CHANGELOG.md.
+          # The release PR is the authoritative source — using `date -u` at
+          # workflow run time can drift if the workflow fires in a different
+          # UTC day than the release PR was crafted. Falls back to date -u
+          # with a warning when the heading isn't found, so a degenerate
+          # CHANGELOG shape doesn't block the workflow.
+          VERSION="${TAG#v}"
+          if DATE=$(git show "${TAG}":CHANGELOG.md \
+              | grep -oE "^## \[${VERSION}\] - [0-9]{4}-[0-9]{2}-[0-9]{2}" \
+              | head -1 \
+              | sed 's/.* - //'); then
+            if [ -z "${DATE}" ]; then
+              echo "::warning::could not extract date for ${VERSION} from CHANGELOG.md; falling back to date -u"
+              DATE=$(date -u +%Y-%m-%d)
+            else
+              echo "date from CHANGELOG: ${DATE}"
+            fi
+          else
+            echo "::warning::git show ${TAG}:CHANGELOG.md failed; falling back to date -u"
+            DATE=$(date -u +%Y-%m-%d)
+          fi
+          echo "tag=${TAG}" >> "$GITHUB_OUTPUT"
+          echo "prior=${PRIOR}" >> "$GITHUB_OUTPUT"
+          echo "date=${DATE}" >> "$GITHUB_OUTPUT"
+          echo "branch=chore/changelog-rollup-${TAG}" >> "$GITHUB_OUTPUT"
+
+      - name: Run rollup script
+        run: |
+          python .github/scripts/rollup_changelog.py \
+            --tag "${{ steps.resolve.outputs.tag }}" \
+            --prior-tag "${{ steps.resolve.outputs.prior }}" \
+            --date "${{ steps.resolve.outputs.date }}"
+
+      - name: Open rollup PR
+        env:
+          GH_TOKEN: ${{ secrets.RELEASE_BOT_TOKEN || secrets.GITHUB_TOKEN }}
+        run: |
+          set -euo pipefail
+          BRANCH="${{ steps.resolve.outputs.branch }}"
+          TAG="${{ steps.resolve.outputs.tag }}"
+          DATE="${{ steps.resolve.outputs.date }}"
+          # Idempotent: if the branch already exists from a previous replay,
+          # bail rather than force-push (the existing PR is the source of truth).
+          if git ls-remote --exit-code --heads origin "${BRANCH}" >/dev/null 2>&1; then
+            echo "::warning::branch ${BRANCH} already exists; skipping push to avoid clobbering an in-flight rollup PR"
+            exit 0
+          fi
+          git config user.name "github-actions[bot]"
+          git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
+          git checkout -b "${BRANCH}"
+          git add CHANGELOG.md pyproject.toml uv.lock
+          git commit -m "chore: roll up CHANGELOG [Unreleased] under [${TAG#v}] - ${DATE}"
+          git push origin "${BRANCH}"
+          gh pr create \
+            --base develop \
+            --head "${BRANCH}" \
+            --title "chore: roll up CHANGELOG [Unreleased] under [${TAG#v}] - ${DATE}" \
+            --body "$(cat <<EOF
+          Auto-opened by [.github/workflows/changelog-rollup.yml](.github/workflows/changelog-rollup.yml) after the ${TAG} release.
+
+          ## What this PR does
+
+          - Inserts \`## [${TAG#v}] - ${DATE}\` heading after \`## [Unreleased]\` in CHANGELOG.md.
+          - Updates \`[Unreleased]: ...compare/${TAG}...HEAD\` footer link.
+          - Adds \`[${TAG#v}]: ...compare/...${TAG}\` footer link.
+          - Bumps \`pyproject.toml\` \`[project].version\` and \`uv.lock\` self-version one PATCH (so develop's \`[Unreleased]\` can accumulate again).
+
+          See [docs/DEVELOPMENT.md](docs/DEVELOPMENT.md) for the cycle.
+
+          ## Review
+
+          - [ ] CHANGELOG diff matches the released tag's published notes.
+          - [ ] Version bump is the next PATCH after \`${TAG#v}\` (no skipped bumps).
+          - [ ] CI green.
+
+          After review: \`gh pr merge <N> --admin --squash\`.
+          EOF
+          )"
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0e359f1..0c22c50 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,20 +2,27 @@
 
 All notable changes to this project will be documented in this file.
 
-The format is loosely based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and the project adheres to [Semantic Versioning](https://semver.org/).
 
-Released versions are drafted automatically by [release-drafter](https://github.com/release-drafter/release-drafter); see `.github/release-drafter.yml` and `.github/workflows/release-drafter.yml`. Each entry on the GitHub Releases page corresponds to a tag of the form `vX.Y.Z`.
+Future entries are drafted automatically by [release-drafter](.github/release-drafter.yml) on every merge to `main`. After a release tag is cut, the post-release rollup workflow (`.github/workflows/changelog-rollup.yml`) opens a PR that moves the `[Unreleased]` block under a versioned heading and bumps `pyproject.toml` + `uv.lock` to the next PATCH. Categories map to conventional-commit prefixes:
 
-## Unreleased
+- `feat:` → **Features**
+- `fix:` → **Fixes**
+- `chore:` / `docs:` / `refactor:` / `test:` / `release:` → **Housekeeping**
+
+## [Unreleased]
 
 ### Added
 
 - Initial harness scaffold (Python 3.14 + FastAPI + Pydantic v2 + OpenTelemetry; React 19.2 + Vite + TypeScript strict).
-- 15 required CI status checks (lint, typecheck, tests, coverage ≥ 75 %, import-linter, pre-commit, frontend build/quality, security suite, two meta-gates, PR-title lint).
-- Release pipeline: tag-triggered build, push to GHCR, CycloneDX SBOM, GitHub Release publish.
+- 21 required CI status checks (lint, typecheck, tests, coverage ≥ 75 %, import-linter, pre-commit, frontend build/quality, file-length cap, version-bump, action-pinning audit, tests-required, src/ README audit, aspirational-ticket cite, security suite, two meta-gates, PR-title lint).
+- Release pipeline: tag-triggered build, push to GHCR, CycloneDX SBOM, GitHub Release publish, post-release CHANGELOG rollup automation.
 - Eval harness scaffold (provider-agnostic runner + LLM-judge Protocol + 1 example golden case + workflow_dispatch nightly).
 - `.claude/` agent integration (3 hooks, 6 auto-activating skills, settings example).
+- Container hardening: read-only root FS, tmpfs `/tmp`, `PYTHONDONTWRITEBYTECODE` / `PYTHONUNBUFFERED` env in runtime stage.
 
 ### Notes
 
 - This template was extracted from a prior LLM-coded project and generalised. The harness is the product; the scaffold exists so every gate has something to operate on.
+
+[Unreleased]: https://github.com/constk/harness-python-react/compare/v0.0.0...HEAD
diff --git a/pyproject.toml b/pyproject.toml
index ab267f3..a143b0d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "harness-python-react"
-version = "0.2.3"
+version = "0.2.4"
 description = "Production-quality LLM-driven coding harness — Python (FastAPI) backend, Vite + React + TypeScript frontend."
 readme = "README.md"
 requires-python = ">=3.14"
diff --git a/tests/test_rollup_changelog.py b/tests/test_rollup_changelog.py
new file mode 100644
index 0000000..01dbb21
--- /dev/null
+++ b/tests/test_rollup_changelog.py
@@ -0,0 +1,204 @@
+"""Tests for `.github/scripts/rollup_changelog.py` (#150)."""
+
+from __future__ import annotations
+
+import importlib.util
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+import pytest
+
+if TYPE_CHECKING:
+    from types import ModuleType
+
+
+def _load_script() -> ModuleType:
+    script_path = (
+        Path(__file__).parent.parent / ".github" / "scripts" / "rollup_changelog.py"
+    )
+    spec = importlib.util.spec_from_file_location("rollup_changelog", script_path)
+    assert spec is not None
+    assert spec.loader is not None
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module
+
+
+rc = _load_script()
+
+
+# ---------------------------------------------------------------------------
+# _bump_patch / _strip_v — semver helpers.
+# ---------------------------------------------------------------------------
+
+
+class TestBumpPatch:
+    def test_basic_increment(self) -> None:
+        assert rc._bump_patch("1.9.10") == "1.9.11"
+
+    def test_zero_patch(self) -> None:
+        assert rc._bump_patch("0.1.0") == "0.1.1"
+
+    def test_strips_v_prefix(self) -> None:
+        assert rc._bump_patch(rc._strip_v("v2.5.7")) == "2.5.8"
+
+    def test_rejects_non_semver(self) -> None:
+        with pytest.raises(ValueError, match="unsupported semver"):
+            rc._bump_patch("1.9")
+
+
+# ---------------------------------------------------------------------------
+# rollup_changelog_text — pure-string transform.
+# ---------------------------------------------------------------------------
+
+
+_BASE_CHANGELOG = """# Changelog
+
+## [Unreleased]
+
+### Tests
+
+- Some test entry. ([#999](https://github.com/x/y/issues/999))
+
+## [1.9.4] - 2026-04-29
+
+### Features
+
+- Old feature.
+
+[Unreleased]: https://github.com/constk/harness-python-react/compare/v1.9.4...HEAD
+[1.9.4]: https://github.com/constk/harness-python-react/compare/v1.4.13...v1.9.4
+"""
+
+
+def test_rollup_inserts_version_heading() -> None:
+    out = rc.rollup_changelog_text(_BASE_CHANGELOG, "v1.9.10", "v1.9.4", "2026-05-15")
+    assert "## [1.9.10] - 2026-05-15" in out
+    assert "## [Unreleased]\n\n## [1.9.10] - 2026-05-15" in out
+
+
+def test_rollup_heading_followed_by_blank_line() -> None:
+    """#166 — the new heading must have a blank line before the next section.
+
+    Pre-#166 the rollup produced `## [v1.9.10] - …\n### Tests`, missing the
+    blank line that every existing release section has. Cosmetic, but the
+    kind of inconsistency that accumulates as the file grows.
+    """
+    out = rc.rollup_changelog_text(_BASE_CHANGELOG, "v1.9.10", "v1.9.4", "2026-05-15")
+    assert "## [1.9.10] - 2026-05-15\n\n### Tests" in out
+    assert "## [1.9.10] - 2026-05-15\n### Tests" not in out
+
+
+def test_rollup_updates_unreleased_compare_link() -> None:
+    out = rc.rollup_changelog_text(_BASE_CHANGELOG, "v1.9.10", "v1.9.4", "2026-05-15")
+    assert (
+        "[Unreleased]: https://github.com/constk/harness-python-react/"
+        "compare/v1.9.10...HEAD" in out
+    )
+    assert "compare/v1.9.4...HEAD" not in out
+
+
+def test_rollup_inserts_new_version_footer_link() -> None:
+    out = rc.rollup_changelog_text(_BASE_CHANGELOG, "v1.9.10", "v1.9.4", "2026-05-15")
+    assert (
+        "[1.9.10]: https://github.com/constk/harness-python-react/"
+        "compare/v1.9.4...v1.9.10" in out
+    )
+
+
+def test_rollup_preserves_existing_version_links() -> None:
+    out = rc.rollup_changelog_text(_BASE_CHANGELOG, "v1.9.10", "v1.9.4", "2026-05-15")
+    assert "[1.9.4]: https://github.com/constk/harness-python-react/" in out
+
+
+def test_rollup_idempotent_on_existing_heading() -> None:
+    """Re-running on already-rolled-up text doesn't duplicate the heading."""
+    once = rc.rollup_changelog_text(_BASE_CHANGELOG, "v1.9.10", "v1.9.4", "2026-05-15")
+    twice = rc.rollup_changelog_text(once, "v1.9.10", "v1.9.4", "2026-05-15")
+    assert once.count("## [1.9.10] - 2026-05-15") == 1
+    assert twice.count("## [1.9.10] - 2026-05-15") == 1
+
+
+def test_rollup_idempotent_on_existing_footer_link() -> None:
+    once = rc.rollup_changelog_text(_BASE_CHANGELOG, "v1.9.10", "v1.9.4", "2026-05-15")
+    assert once.count("[1.9.10]:") == 1
+    twice = rc.rollup_changelog_text(once, "v1.9.10", "v1.9.4", "2026-05-15")
+    assert twice.count("[1.9.10]:") == 1
+
+
+def test_rollup_first_release_uses_releases_tag_url() -> None:
+    """No prior tag → footer link points at /releases/tag/<tag> not /compare/."""
+    initial = """# Changelog
+
+## [Unreleased]
+
+[Unreleased]: https://github.com/constk/harness-python-react/compare/HEAD...HEAD
+"""
+    out = rc.rollup_changelog_text(initial, "v0.1.0", "", "2026-01-01")
+    assert (
+        "[0.1.0]: https://github.com/constk/harness-python-react/"
+        "releases/tag/v0.1.0" in out
+    )
+    # Compare-style link not used when prior_tag is empty.
+    assert "compare/...v0.1.0" not in out
+
+
+def test_rollup_handles_two_digit_minor_and_patch() -> None:
+    """v1.10.42 has two-digit numbers — common after enough patch cycles."""
+    out = rc.rollup_changelog_text(_BASE_CHANGELOG, "v1.10.42", "v1.9.4", "2026-12-31")
+    assert "## [1.10.42] - 2026-12-31" in out
+    assert "compare/v1.9.4...v1.10.42" in out
+
+
+# ---------------------------------------------------------------------------
+# bump_pyproject_text / bump_uv_lock_text — line-level edits.
+# ---------------------------------------------------------------------------
+
+
+def test_bump_pyproject_basic() -> None:
+    text = '[project]\nname = "harness-python-react"\nversion = "1.9.10"\n'
+    out = rc.bump_pyproject_text(text, "1.9.10", "1.9.11")
+    assert 'version = "1.9.11"' in out
+    assert 'version = "1.9.10"' not in out
+
+
+def test_bump_pyproject_only_replaces_project_version_line() -> None:
+    """A `version = "..."` line elsewhere (e.g. a dependency) isn't touched."""
+    text = (
+        '[project]\nversion = "1.9.10"\n\n'
+        '[tool.dummy]\nversion = "0.0.1"  # unrelated\n'
+    )
+    out = rc.bump_pyproject_text(text, "1.9.10", "1.9.11")
+    assert out.count('version = "1.9.11"') == 1
+    assert 'version = "0.0.1"' in out
+
+
+def test_bump_pyproject_raises_when_current_missing() -> None:
+    with pytest.raises(ValueError, match="not found"):
+        rc.bump_pyproject_text('version = "9.9.9"\n', "1.9.10", "1.9.11")
+
+
+def test_bump_uv_lock_basic() -> None:
+    text = (
+        '[[package]]\nname = "harness-python-react"\nversion = "1.9.10"\n'
+        'source = { editable = "." }\n'
+    )
+    out = rc.bump_uv_lock_text(text, "1.9.10", "1.9.11")
+    assert 'version = "1.9.11"' in out
+
+
+def test_bump_uv_lock_does_not_touch_other_packages() -> None:
+    """Only the self-version block's version line is replaced."""
+    text = (
+        '[[package]]\nname = "fastapi"\nversion = "1.9.10"\n\n'
+        '[[package]]\nname = "harness-python-react"\nversion = "1.9.10"\n'
+    )
+    out = rc.bump_uv_lock_text(text, "1.9.10", "1.9.11")
+    # FastAPI's coincidentally-same version stays put.
+    assert out.count('name = "fastapi"\nversion = "1.9.10"') == 1
+    assert out.count('name = "harness-python-react"\nversion = "1.9.11"') == 1
+
+
+# main() integration tests live in `tests/test_rollup_changelog_main.py` —
+# split out so neither file approaches the 300-line cap (same pattern as
+# `test_check_commit_types_subject.py`'s sibling-file split for #128).
diff --git a/uv.lock b/uv.lock
index 5edabc5..a444275 100644
--- a/uv.lock
+++ b/uv.lock
@@ -328,7 +328,7 @@ wheels = [
 
 [[package]]
 name = "harness-python-react"
-version = "0.2.3"
+version = "0.2.4"
 source = { virtual = "." }
 dependencies = [
     { name = "fastapi" },

From 4de2a0790cbb3e3cd83b62f25946e1e8b19bec01 Mon Sep 17 00:00:00 2001
From: Constantinos <41453723+constk@users.noreply.github.com>
Date: Fri, 1 May 2026 23:58:41 +1000
Subject: [PATCH 8/9] test: route-versioning audit + OTel semconv backstop
 tests (#130, #132, #147) (#75)

---
 pyproject.toml                 |   2 +-
 tests/test_otel_semconv.py     | 225 +++++++++++++++++++++++++++++++++
 tests/test_route_versioning.py | 105 +++++++++++++++
 uv.lock                        |   2 +-
 4 files changed, 332 insertions(+), 2 deletions(-)
 create mode 100644 tests/test_otel_semconv.py
 create mode 100644 tests/test_route_versioning.py

diff --git a/pyproject.toml b/pyproject.toml
index a143b0d..3fafc5a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "harness-python-react"
-version = "0.2.4"
+version = "0.2.5"
 description = "Production-quality LLM-driven coding harness — Python (FastAPI) backend, Vite + React + TypeScript frontend."
 readme = "README.md"
 requires-python = ">=3.14"
diff --git a/tests/test_otel_semconv.py b/tests/test_otel_semconv.py
new file mode 100644
index 0000000..f318818
--- /dev/null
+++ b/tests/test_otel_semconv.py
@@ -0,0 +1,225 @@
+"""Assertions that emitted spans use OTel semantic-convention attribute names.
+
+Test-side enforcement of the rule "use the official OTel semconv attribute
+names where one exists" (see `src/observability/spans.py` and
+`docs/INVARIANTS.md`).
+
+The scaffold ships only semconv-defined keys (no custom ones); the
+`ALLOWED_CUSTOM_KEYS` set below is intentionally empty. When a project
+that extends this template needs a custom attribute (e.g.
+`agent.tool_calls_count`), it MUST add a constant to
+`src/observability/spans.py` AND register it in `ALLOWED_CUSTOM_KEYS` —
+the registry-closure test fails otherwise. That's the forcing function
+that keeps custom keys to their documented set rather than letting them
+proliferate as one-off raw strings.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+from opentelemetry import trace
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import SimpleSpanProcessor
+from opentelemetry.sdk.trace.export.in_memory_span_exporter import (
+    InMemorySpanExporter,
+)
+
+from src.observability import spans as spans_module
+from src.observability.spans import (
+    DB_QUERY_TEXT,
+    DB_RESPONSE_RETURNED_ROWS,
+    GENAI_CONVERSATION_ID,
+    GENAI_OPERATION_NAME,
+    GENAI_PROVIDER_NAME,
+    GENAI_REQUEST_MODEL,
+    GENAI_TOOL_CALL_ARGUMENTS,
+    GENAI_TOOL_CALL_RESULT,
+    GENAI_TOOL_NAME,
+    GENAI_USAGE_INPUT_TOKENS,
+    GENAI_USAGE_OUTPUT_TOKENS,
+    agent_span,
+)
+
+if TYPE_CHECKING:
+    from collections.abc import Sequence
+
+    from opentelemetry.sdk.trace import ReadableSpan
+
+
+SEMCONV_KEYS: frozenset[str] = frozenset(
+    {
+        GENAI_CONVERSATION_ID,
+        GENAI_REQUEST_MODEL,
+        GENAI_OPERATION_NAME,
+        GENAI_PROVIDER_NAME,
+        GENAI_USAGE_INPUT_TOKENS,
+        GENAI_USAGE_OUTPUT_TOKENS,
+        GENAI_TOOL_NAME,
+        GENAI_TOOL_CALL_ARGUMENTS,
+        GENAI_TOOL_CALL_RESULT,
+        DB_QUERY_TEXT,
+        DB_RESPONSE_RETURNED_ROWS,
+    }
+)
+
+# Custom keys allowed because no semconv equivalent exists. Empty in the
+# scaffold — projects that extend this template add entries here as they
+# add custom span attributes (and constants in `src/observability/spans.py`).
+ALLOWED_CUSTOM_KEYS: frozenset[str] = frozenset()
+
+ACCEPTED_KEYS: frozenset[str] = SEMCONV_KEYS | ALLOWED_CUSTOM_KEYS
+
+
+@pytest.fixture
+def memory_exporter(monkeypatch: pytest.MonkeyPatch) -> InMemorySpanExporter:
+    """Install a TracerProvider that records spans into an in-memory exporter.
+
+    Uses `monkeypatch.setattr` to swap `trace.get_tracer_provider` for the
+    test's lifetime instead of `trace.set_tracer_provider` — the OTel SDK
+    refuses to override the global provider once any other test has called
+    `set_tracer_provider`, which would silently route spans to the wrong
+    exporter.
+
+    Uses `SimpleSpanProcessor` so spans are visible to `get_finished_spans()`
+    immediately after the span context exits — no flush race.
+    """
+    exporter = InMemorySpanExporter()
+    provider = TracerProvider()
+    provider.add_span_processor(SimpleSpanProcessor(exporter))
+    monkeypatch.setattr(trace, "get_tracer_provider", lambda: provider)
+    return exporter
+
+
+def _span_by_name(spans: Sequence[ReadableSpan], name: str) -> ReadableSpan:
+    """Return the unique span with the given name (assert exactly one)."""
+    matches = [s for s in spans if s.name == name]
+    assert len(matches) == 1, (
+        f"expected exactly one span named {name!r}, got {len(matches)} "
+        f"(all names: {[s.name for s in spans]})"
+    )
+    return matches[0]
+
+
+def test_tool_call_span_carries_genai_tool_attributes(
+    memory_exporter: InMemorySpanExporter,
+) -> None:
+    """A tool-call span carries the documented `gen_ai.tool.*` keys."""
+    tool_attrs: dict[str, str | int] = {
+        GENAI_TOOL_NAME: "echo",
+        GENAI_TOOL_CALL_ARGUMENTS: '{"msg": "hi"}',
+        GENAI_TOOL_CALL_RESULT: '{"echoed": "hi"}',
+    }
+    with agent_span("tool.echo", tool_attrs):
+        pass
+
+    spans = memory_exporter.get_finished_spans()
+    tool_span = _span_by_name(spans, "tool.echo")
+    assert tool_span.attributes is not None
+    keys = set(tool_span.attributes)
+
+    assert GENAI_TOOL_NAME in keys
+    assert GENAI_TOOL_CALL_ARGUMENTS in keys
+    assert GENAI_TOOL_CALL_RESULT in keys
+    assert tool_span.attributes[GENAI_TOOL_NAME] == "echo"
+
+
+def test_llm_call_span_carries_genai_attributes(
+    memory_exporter: InMemorySpanExporter,
+) -> None:
+    """An LLM-style span carries the documented `gen_ai.*` keys + token usage."""
+    llm_attrs: dict[str, str | int] = {
+        GENAI_CONVERSATION_ID: "session-test-1",
+        GENAI_REQUEST_MODEL: "gpt-4o-mini",
+        GENAI_OPERATION_NAME: "chat",
+        GENAI_PROVIDER_NAME: "openai",
+        GENAI_USAGE_INPUT_TOKENS: 42,
+        GENAI_USAGE_OUTPUT_TOKENS: 100,
+    }
+    with agent_span("agent.llm_call", llm_attrs):
+        pass
+
+    spans = memory_exporter.get_finished_spans()
+    llm_span = _span_by_name(spans, "agent.llm_call")
+    assert llm_span.attributes is not None
+
+    for key, value in llm_attrs.items():
+        assert key in llm_span.attributes, f"missing {key}"
+        assert llm_span.attributes[key] == value
+
+
+def test_db_query_span_carries_db_attributes(
+    memory_exporter: InMemorySpanExporter,
+) -> None:
+    """A DB-query span carries `db.query.text` + `db.response.returned_rows`."""
+    db_attrs: dict[str, str | int] = {
+        DB_QUERY_TEXT: "SELECT 1",
+        DB_RESPONSE_RETURNED_ROWS: 1,
+    }
+    with agent_span("db.example_query", db_attrs):
+        pass
+
+    spans = memory_exporter.get_finished_spans()
+    db_span = _span_by_name(spans, "db.example_query")
+    assert db_span.attributes is not None
+    keys = set(db_span.attributes)
+
+    assert DB_QUERY_TEXT in keys
+    assert DB_RESPONSE_RETURNED_ROWS in keys
+
+
+def test_no_unregistered_attribute_keys_emitted(
+    memory_exporter: InMemorySpanExporter,
+) -> None:
+    """Every emitted attribute key is in `SEMCONV_KEYS` or `ALLOWED_CUSTOM_KEYS`."""
+    with agent_span(
+        "tool.echo",
+        {
+            GENAI_TOOL_NAME: "echo",
+            GENAI_TOOL_CALL_ARGUMENTS: "{}",
+            GENAI_TOOL_CALL_RESULT: "{}",
+        },
+    ):
+        pass
+
+    spans = memory_exporter.get_finished_spans()
+
+    rogue: list[tuple[str, str]] = []
+    for span in spans:
+        if span.attributes is None:
+            continue
+        for key in span.attributes:
+            if key not in ACCEPTED_KEYS:
+                rogue.append((span.name, key))
+
+    assert not rogue, (
+        "Span attributes emitted with keys outside the documented registry: "
+        f"{rogue}. Either use a semconv name from `src/observability/spans.py` "
+        "or add the new key to both `spans.py` (with a comment explaining why "
+        "no semconv equivalent applies) and `ALLOWED_CUSTOM_KEYS` in this test."
+    )
+
+
+def test_spans_module_constants_are_registered() -> None:
+    """Every uppercase `str` constant in `spans.py` belongs to the test allowlist.
+
+    The allowlist is `SEMCONV_KEYS | ALLOWED_CUSTOM_KEYS`. Catches the
+    "added a new constant in `spans.py` without updating the test" drift class.
+    If this fails, either:
+
+    - the new constant is a semconv name → add it to `SEMCONV_KEYS`.
+    - the new constant is a custom name → add it to `ALLOWED_CUSTOM_KEYS`
+      AND ensure the comment in `spans.py` explains why no semconv applies.
+    """
+    declared = {
+        getattr(spans_module, name)
+        for name in dir(spans_module)
+        if name.isupper() and isinstance(getattr(spans_module, name), str)
+    }
+    unregistered = declared - ACCEPTED_KEYS
+    assert not unregistered, (
+        f"`spans.py` defines constants not in the test allowlist: "
+        f"{sorted(unregistered)!r}. Update `SEMCONV_KEYS` or "
+        "`ALLOWED_CUSTOM_KEYS` in this test."
+    )
diff --git a/tests/test_route_versioning.py b/tests/test_route_versioning.py
new file mode 100644
index 0000000..450c736
--- /dev/null
+++ b/tests/test_route_versioning.py
@@ -0,0 +1,105 @@
+"""Audit every FastAPI route is `/api/v*/...` or in the documented allowlist.
+
+Backstops invariant 7 in `docs/INVARIANTS.md` (was *aspirational on test*).
+A future contributor adding `@app.get("/foo")` instead of registering the
+endpoint on the versioned `APIRouter(prefix="/api/v1")` would silently
+introduce an un-versioned route — breaking the API-versioning contract from
+`docs/DEVELOPMENT.md` without any gate firing. This test is the gate.
+"""
+
+from __future__ import annotations
+
+import re
+
+from src.api.main import app
+
+VERSIONED_PREFIX = re.compile(r"^/api/v\d+/")
+
+# Routes deliberately NOT under `/api/v*/`. Add an entry here only with a
+# comment naming the structural reason — never just "we needed an exception".
+UNVERSIONED_ALLOWLIST: frozenset[str] = frozenset(
+    {
+        # FastAPI auto-generated documentation surface. Disabled by passing
+        # docs_url=None / redoc_url=None to FastAPI() if needed; the
+        # scaffold keeps them enabled at the unversioned root.
+        "/openapi.json",
+        "/docs",
+        "/docs/oauth2-redirect",
+        "/redoc",
+    }
+)
+
+
+def test_every_route_is_versioned_or_allowlisted() -> None:
+    """Every routable path is `/api/v*/` or in the explicit allowlist."""
+    offenders: list[str] = []
+    for route in app.routes:
+        path = getattr(route, "path", None)
+        if path is None:
+            # Not an APIRoute (e.g. Mount). Skip cleanly — `path` is the
+            # discriminator we care about.
+            continue
+        if path in UNVERSIONED_ALLOWLIST:
+            continue
+        if not VERSIONED_PREFIX.match(path):
+            offenders.append(path)
+
+    assert not offenders, (
+        f"un-versioned route(s) detected in app.routes: {offenders!r}. Per "
+        "docs/DEVELOPMENT.md API Versioning, every routable path must match "
+        "`/api/v*/<…>` or be added to UNVERSIONED_ALLOWLIST in this test "
+        "with a comment naming the structural reason. Common entry points "
+        'for an un-versioned path: a router wired without `prefix="/api/vN"`, '
+        "an `@app.<verb>` decorator on the FastAPI app instead of a versioned "
+        "APIRouter, a manual `app.add_api_route(...)` call, or a Mount that "
+        "exposes paths at the unversioned root. Fix at the wiring site or, "
+        "if the path is genuinely unversioned by design (cf. `/api/health`), "
+        "add it to the allowlist."
+    )
+
+
+def test_allowlist_has_no_dead_entries() -> None:
+    """Every UNVERSIONED_ALLOWLIST entry must correspond to a real route.
+
+    Without this assertion, removing a docs surface (e.g. constructing
+    FastAPI with `docs_url=None`) leaves `/docs` in the allowlist as cruft;
+    over time the allowlist becomes a graveyard that masks what is
+    *actually* deliberately unversioned. Any drop-out fails fast and forces
+    a deletion in the same PR.
+    """
+    actual_paths = {getattr(r, "path", None) for r in app.routes}
+    actual_paths.discard(None)
+    dead = sorted(UNVERSIONED_ALLOWLIST - actual_paths)
+    assert not dead, (
+        f"UNVERSIONED_ALLOWLIST entries no longer present in app.routes: "
+        f"{dead}. Remove the dead entry — keeping it is allowlist cruft. "
+        "If the path was intentionally removed (e.g. FastAPI docs disabled "
+        "via `docs_url=None`), drop the corresponding allowlist line in "
+        "this same PR so the allowlist tracks reality."
+    )
+
+
+# ---------- Regex sanity checks ----------
+#
+# Belt-and-braces against the allowlist swallowing a typo that the route
+# walk wouldn't catch (e.g. someone mutating the regex to be too permissive).
+
+
+def test_regex_accepts_v1_routes() -> None:
+    assert VERSIONED_PREFIX.match("/api/v1/chat")
+    assert VERSIONED_PREFIX.match("/api/v1/sessions")
+    assert VERSIONED_PREFIX.match("/api/v1/sessions/{session_id}")
+
+
+def test_regex_accepts_future_major_versions() -> None:
+    """`/api/v2/...` and beyond are accepted; the rule isn't v1-only."""
+    assert VERSIONED_PREFIX.match("/api/v2/chat")
+    assert VERSIONED_PREFIX.match("/api/v10/some-endpoint")
+
+
+def test_regex_rejects_unversioned_paths() -> None:
+    assert not VERSIONED_PREFIX.match("/foo")
+    assert not VERSIONED_PREFIX.match("/api/foo")
+    assert not VERSIONED_PREFIX.match("/api/health")  # in allowlist, not regex
+    assert not VERSIONED_PREFIX.match("/api/v/chat")  # missing version digit
+    assert not VERSIONED_PREFIX.match("/api/version1/chat")  # wrong prefix shape
diff --git a/uv.lock b/uv.lock
index a444275..bd15374 100644
--- a/uv.lock
+++ b/uv.lock
@@ -328,7 +328,7 @@ wheels = [
 
 [[package]]
 name = "harness-python-react"
-version = "0.2.4"
+version = "0.2.5"
 source = { virtual = "." }
 dependencies = [
     { name = "fastapi" },

From 578ef8a92ad652b06af776b940234ef76146f90e Mon Sep 17 00:00:00 2001
From: Constantinos <41453723+constk@users.noreply.github.com>
Date: Sat, 2 May 2026 12:21:00 +1000
Subject: [PATCH 9/9] docs: HARNESS_PRIMER + INVARIANTS marker for
 route-versioning enforcement (#130, #132) (#76)

---
 docs/HARNESS.md        |   4 +
 docs/HARNESS_PRIMER.md | 375 +++++++++++++++++++++++++++++++++++++++++
 docs/INVARIANTS.md     |   2 +-
 pyproject.toml         |   2 +-
 uv.lock                |   2 +-
 5 files changed, 382 insertions(+), 3 deletions(-)
 create mode 100644 docs/HARNESS_PRIMER.md

diff --git a/docs/HARNESS.md b/docs/HARNESS.md
index 0a8ecc6..e24007c 100644
--- a/docs/HARNESS.md
+++ b/docs/HARNESS.md
@@ -32,6 +32,10 @@ The harness is independent of the project's domain. The included `src/api/echo`
 
 ## Reading order
 
+For someone who hasn't worked in a harness like this before, start with [`docs/HARNESS_PRIMER.md`](HARNESS_PRIMER.md) — it's the same surface as this doc but written for non-engineers.
+
+For an engineer setting up the template:
+
 1. **`docs/INVARIANTS.md`** — the load-bearing rules. Every PR is checked against them.
 2. **`docs/BOUNDARIES.md`** — module layering and the import-linter contracts.
 3. **`docs/DEVELOPMENT.md`** — local setup, the `justfile`, the CI pipeline.
diff --git a/docs/HARNESS_PRIMER.md b/docs/HARNESS_PRIMER.md
new file mode 100644
index 0000000..7d9606d
--- /dev/null
+++ b/docs/HARNESS_PRIMER.md
@@ -0,0 +1,375 @@
+# harness-python-react — Harness Primer
+
+> A plain-English companion to [HARNESS.md](HARNESS.md). If `HARNESS.md` is the
+> map, this is the guided tour: written for someone who understands modern AI
+> coding conceptually but is not the engineer who would set up a harness from
+> scratch. You should be able to read this end-to-end without opening any other
+> file — though every claim links back to a source-of-truth file you can verify.
+
+---
+
+## Table of contents
+
+1. [What a harness is, and why this template has one](#1-what-a-harness-is-and-why-this-template-has-one)
+2. [The three principles, with code](#2-the-three-principles-with-code)
+3. [The numbered invariants — at a glance](#3-the-numbered-invariants--at-a-glance)
+4. [Pre-commit — the local checkpoint](#4-pre-commit--the-local-checkpoint)
+5. [CI gates — the cloud checkpoint](#5-ci-gates--the-cloud-checkpoint)
+6. [Agent-level hooks — opt-in per developer](#6-agent-level-hooks--opt-in-per-developer)
+7. [Skills — guidance, not enforcement](#7-skills--guidance-not-enforcement)
+8. [The evaluation harness — a one-page summary](#8-the-evaluation-harness--a-one-page-summary)
+9. [Process-as-code](#9-process-as-code)
+10. [Defence in depth — two worked examples](#10-defence-in-depth--two-worked-examples)
+11. [Operating within the harness — a survival guide](#11-operating-within-the-harness--a-survival-guide)
+12. [Setup checklist for your harness maintainer](#12-setup-checklist-for-your-harness-maintainer)
+13. [Glossary](#13-glossary)
+14. [Where to go next](#14-where-to-go-next)
+
+---
+
+## 1. What a harness is, and why this template has one
+
+A **harness**, in this codebase, is the set of automated rails that catch bad changes *before* they reach the `main` branch. Think of it like the safety systems in a modern car: the seatbelt warning that beeps at you, the interlock that stops the engine starting in gear, and the crash-rated chassis that tolerates the mistakes the first two missed. None of those systems are the driver — but together they make a single bad input survivable.
+
+The template's harness has the same shape, in three layers:
+
+| Layer | What it does | Car analogy |
+|---|---|---|
+| **Prompts describe** | `CLAUDE.md` and the `docs/` files tell a human or AI agent what the project values. Advisory only. | The seatbelt warning beep — informative, ignorable. |
+| **Skills guide** | Topic-specific briefings the agent loads when relevant (`.claude/skills/`). Shape style and approach but reject nothing. | Lane-keep assist — nudges you back, doesn't lock the wheel. |
+| **Hooks and CI enforce** | Mechanical checks: pre-commit, agent-level hooks, GitHub Actions. A violation here blocks the change. | The crash-rated chassis and the interlock — non-negotiable. |
+
+The principle: **anything that matters is enforced mechanically.** Prose can drift across model versions, sessions, and reviewers; a regex in a CI job cannot. Where this primer says "the system requires X," it means a script somewhere will refuse to let X be violated — and this doc tells you which script.
+
+A second harness is the **evaluation harness** ([§8](#8-the-evaluation-harness--a-one-page-summary)) — tests whether the *agent* answers questions correctly, not whether the *code* compiles.
+
+> **If `HARNESS.md` is the map, this is the guided tour.** Engineers reach for the map. Everyone else can start here.
+
+---
+
+## 2. The three principles, with code
+
+### 2.1 Invariants — *what cannot be true*
+
+An **invariant** is a numbered rule the system maintains, with a pointer to the code that enforces it. "An invariant in a validator is a fact; an invariant in prose is a suggestion." The repo treats unenforced invariants as bugs — every aspirational entry must cite a tracking issue (`Aspirational ticket cite` CI job).
+
+→ Source of truth: [docs/INVARIANTS.md](INVARIANTS.md).
+
+### 2.2 Contracts — *the shape at every seam*
+
+Every piece of data that crosses a module boundary or process boundary is a Pydantic model that inherits from a strict base class. The base class refuses unknown fields at construction. A typo fails at the seam, not three calls deeper.
+
+```python
+# src/models/_base.py
+from pydantic import BaseModel, ConfigDict
+
+
+class StrictModel(BaseModel):
+    """Base for every contract that crosses a module or process seam."""
+
+    model_config = ConfigDict(extra="forbid")
+```
+
+**What you'll see if you violate it.** A typo like `HealthResponse(status="ok", versoin="0.1.0")` raises:
+
+```text
+pydantic_core._pydantic_core.ValidationError: 1 validation error for HealthResponse
+versoin
+  Extra inputs are not permitted [type=extra_forbidden, input_value='0.1.0', input_type=str]
+```
+
+The rejection happens at the API boundary, in the FastAPI handler, before any business logic runs.
+
+### 2.3 Boundaries — *who depends on whom*
+
+`src/` is a one-way dependency graph:
+
+```
+   src.api ─┐         ┌─ src.eval     (HTTP / golden dataset, siblings)
+            ▼         ▼
+            src.agent                 (the tool-calling loop)
+                ▼
+            src.tools                 (typed tool registry)
+                ▼
+            src.data                  (DB / external systems)
+                ▼
+            src.observability         (OTel spans, logging)
+                ▼
+            src.models                (Pydantic contracts; depends on nothing)
+```
+
+Enforced by `import-linter` ([pyproject.toml](../pyproject.toml) `[tool.importlinter]`).
+
+**What you'll see if you violate it.** Adding `from src.api.routes import router` to `src/models/foo.py` fails the `Architecture (import-linter)` CI job:
+
+```text
+src.models is not allowed to import src.api:
+
+-   src.models.foo -> src.api.routes (l.4)
+
+Contracts: 0 kept, 2 broken.
+```
+
+There is no override flag.
+
+---
+
+## 3. The numbered invariants — at a glance
+
+| # | Rule | Where enforced |
+|---|---|---|
+| 1 | Every contract crossing a module seam forbids unknown keys | `StrictModel` in [src/models/_base.py](../src/models/_base.py) + [tests/test_models.py](../tests/test_models.py) |
+| 2 | API endpoints live under `/api/v1/` and return typed responses | Router prefix in [src/api/routes.py](../src/api/routes.py) + route-walk test in [tests/test_route_versioning.py](../tests/test_route_versioning.py) |
+| 3 | Layer flow is one-way | `import-linter` contracts in [pyproject.toml](../pyproject.toml) + the `Architecture` CI job |
+| 4 | Coverage ≥ 75 % on `src/` | `[tool.coverage.report].fail_under = 75` + the `Coverage` CI job |
+| 5 | No secret leaves the repo unscanned | Three-layer scan: `.claude/hooks/pretooluse_bash.py` → pre-commit gitleaks → `Secret scan (gitleaks)` CI job |
+
+→ Full detail: [docs/INVARIANTS.md](INVARIANTS.md). Slots 6+ are reserved for project-specific invariants the team adds as the domain stabilises; the `Aspirational ticket cite` gate enforces that any aspirational marker line cites a `#NNN` ticket.
+
+---
+
+## 4. Pre-commit — the local checkpoint
+
+Pre-commit runs on your machine before each commit is created. Failures here cost zero CI minutes.
+
+→ Source of truth: [.pre-commit-config.yaml](../.pre-commit-config.yaml).
+
+| Hook | What it does |
+|---|---|
+| **ruff** (`--fix`) | Lints Python and auto-fixes what it can. |
+| **ruff-format** | Applies the project's Python code style. |
+| **check-yaml / check-toml / check-json** | Parses every YAML/TOML/JSON file you're committing. |
+| **check-merge-conflict** | Refuses to commit a file with `<<<<<<<` markers. |
+| **check-added-large-files** | Blocks any new file over 500 KB. |
+| **end-of-file-fixer / trailing-whitespace / mixed-line-ending** | Hygiene. |
+| **gitleaks** | Scans the diff for secrets — first of three independent scan layers. |
+| **commitizen** | At `commit-msg` stage. Refuses commit messages that don't match `<type>(<scope>): <subject>` with one of the seven allowed types and a non-Title-Case subject. |
+| **mypy** (`--strict`) | Type-checks all of `src/` and `tests/`. |
+
+Wire once with `uv run pre-commit install --hook-type pre-commit --hook-type commit-msg`.
+
+---
+
+## 5. CI gates — the cloud checkpoint
+
+Every push and every PR triggers GitHub Actions. **21 required contexts** must pass before merge.
+
+→ Source of truth: [.github/workflows/](../.github/workflows/).
+
+### 5.1 Code quality
+
+| Job | What it does |
+|---|---|
+| `Lint & Format` | `ruff check .` + `ruff format --check .`. Zero tolerance. |
+| `Type Check` | `mypy --strict src/ tests/`. |
+| `Pre-commit` | Re-runs `pre-commit run --all-files` in CI. |
+| `Lint PR title` | Conventional-commit prefix + lowercase-or-initialism subject. |
+| `Commit-type sync` | Asserts commitizen schema and `pr-title.yml` agree on both the type allowlist and the subject-case constraint. |
+| `Branch-protection contexts sync` | Asserts every workflow job is listed in `.github/branch-protection/{develop,main}.json` (or in the script's `EXEMPT_WORKFLOWS` map). |
+| `Version bump check` | `pyproject.toml` `[project].version` differs from base; `uv.lock` self-version matches. `release:` PRs exempt. |
+| `Tests required` | `feat:`/`fix:` PRs that touch `src/` must touch `tests/`. Other prefixes get a warn-only. |
+| `File length` | `*.py` files in `src/`, `tests/`, `eval/`, `.github/scripts/` capped at 300 lines. Function caps via ruff `PLR0915`/`PLR0912` (50 stmts / 12 branches). |
+| `src/ README audit` | Every `src/` package with code has a README ≥ 200 bytes containing `## Key interfaces` (or `## Public surface`). |
+| `Aspirational ticket cite` | Lines starting with `*Aspirational` in `docs/INVARIANTS.md` must cite at least one `#NNN` ticket; closed cites warn (or fail under `ASPIRATIONAL_STRICT=1`). |
+
+### 5.2 Correctness
+
+| Job | What it does |
+|---|---|
+| `Unit tests` | `pytest tests/ -m "not integration"` — fast feedback. Includes doc-vs-code drift backstops (`test_route_versioning`, `test_otel_semconv`, etc.). |
+| `Coverage` | `pytest tests/ --cov=src` with `fail_under = 75`. |
+
+### 5.3 Architecture
+
+| Job | What it does |
+|---|---|
+| `Architecture (import-linter)` | Runs `import-linter` against the two contracts in `pyproject.toml`. |
+| `Frontend Build` | `npm run build` from `frontend/`. |
+| `Frontend Quality` | eslint (`max-warnings 0`) + prettier `--check` + `tsc --noEmit` + vitest. |
+
+### 5.4 Security
+
+| Job | What it does |
+|---|---|
+| `Secret scan (gitleaks)` | Repo-wide secret scan on every push and PR. Third independent layer. |
+| `Python deps (pip-audit)` | CVE scan against `uv.lock`. Per-CVE ignore list at `.github/security/pip-audit-ignore.txt`. |
+| `Frontend deps (npm audit)` | `--audit-level=high`. |
+| `Container image scan (trivy)` | OS- and library-level CVE scan on the built image. |
+| `Action pinning audit` | Walks every `.github/workflows/*.yml`. First-party = major tag; `astral-sh/setup-uv` = patch tag; third-party = SHA + `# vN.M.P` comment. |
+
+### 5.5 Operations (workflows on a schedule / trigger)
+
+| Workflow | When | What it does |
+|---|---|---|
+| `branch-protection.yml` | Weekly + dispatch | Re-applies `.github/branch-protection/*.json` via `gh api`. Drift re-assertion. |
+| `eval-nightly.yml` | `workflow_dispatch` only by default | Runs `pytest eval/` against the configured LLM. Documented opt-in for `schedule:`. |
+| `artifact-cleanup.yml` | Weekly + dispatch | Prunes Actions artifacts older than 7 days. |
+| `release-drafter.yml` | Push to main + PR label events | Updates a draft GitHub Release under `v$RESOLVED_VERSION`. |
+| `release.yml` | On tag `v*.*.*` | Builds the image, pushes to GHCR, generates a CycloneDX SBOM, publishes the release. |
+| `changelog-rollup.yml` | After release.yml succeeds + dispatch | Opens a `chore: roll up CHANGELOG …` PR against `develop` — moves `[Unreleased]` entries under the released version's heading + bumps `pyproject.toml` + `uv.lock` PATCH. |
+| `codeql.yml` | `workflow_dispatch` only (placeholder) | Static analysis. Activate when the repo is public or has GHAS. |
+
+---
+
+## 6. Agent-level hooks — opt-in per developer
+
+`.claude/hooks/` scripts run *around* the LLM agent's own actions — pre-commit and CI run when *code* moves, hooks run when the *agent* moves.
+
+→ Source of truth: [.claude/hooks/](../.claude/hooks/) (3 Python scripts) + [.claude/settings.local.json.example](../.claude/settings.local.json.example).
+
+| Script | Purpose |
+|---|---|
+| `pretooluse_bash.py` | (1) Refuses Bash commands containing bypass flags (`git --no-verify`, `--no-hooks`, `--no-gpg-sign`); (2) on `git commit`, scans the staged diff for AWS/sk-/ghp_/PEM/Slack patterns and blocks if any match; (3) appends every Bash invocation to `.claude/bash-log.txt` (gitignored) for forensics. |
+| `posttooluse_writeedit.py` | After every Write/Edit, dispatches the right formatter — ruff for `.py`, prettier for `.ts/.tsx/.js/.jsx/.css/.json/.html/.md`. |
+| `sessionstart.py` | At session start, injects current branch + `git status --short` as `additionalContext` so the agent is grounded in the actual repo state. |
+
+Wire once: `cp .claude/settings.local.json.example .claude/settings.local.json`.
+
+Until you wire it, *your local agent's actions are not policed by these scripts* — but pre-commit and CI still are, so nothing slips through to `main`.
+
+---
+
+## 7. Skills — guidance, not enforcement
+
+A **skill** is a markdown briefing the agent loads when its topic matches the current task. Skills shape *style*; they reject nothing.
+
+→ Source of truth: [.claude/skills/](../.claude/skills/).
+
+| Skill | When it activates |
+|---|---|
+| `architect` | Architecture decisions, module boundaries, tech-stack choices, API contracts. |
+| `code-reviewer` | After code is written or edited. 10-point review checklist. |
+| `devops` | Docker, docker-compose, CI/CD, `pyproject.toml`, observability config. |
+| `frontend` | React/TS work in `frontend/`. |
+| `qa-engineer` | Tests, eval harness, golden-dataset changes. |
+| `technical-writer` | Docs, READMEs, inline documentation. |
+
+A skill is a librarian who hands you a relevant book before you start; a hook is a gate guard who refuses to let you leave with the wrong book.
+
+---
+
+## 8. The evaluation harness — a one-page summary
+
+Distinct from the **build harness** (everything above), the **evaluation harness** tests whether the agent answers questions correctly.
+
+→ Source of truth: [docs/EVAL_HARNESS.md](EVAL_HARNESS.md), with the dataset in [eval/golden_qa.json](../eval/golden_qa.json) and the runner in [eval/test_golden_qa.py](../eval/test_golden_qa.py).
+
+**Shape:**
+
+- A **golden dataset** of question / expected-answer pairs (one trivial echo case ships; replace with your domain dataset).
+- **Three tolerance modes**: `exact_match`, `numeric_close` (within 1 %), `semantic_similar` (LLM judge ≥ 0.8).
+- **Provider-agnostic** — wire your concrete LLM client via the `LLMClient` Protocol in `src/eval/judge.py`.
+- **Disabled-by-default nightly** ([eval-nightly.yml](../.github/workflows/eval-nightly.yml)) — `workflow_dispatch` only, opt-in to `schedule:` after configuring secrets.
+
+---
+
+## 9. Process-as-code
+
+| Concern | File / mechanism |
+|---|---|
+| PR template | [.github/pull_request_template.md](../.github/pull_request_template.md). |
+| Issue templates | [.github/ISSUE_TEMPLATE/](../.github/ISSUE_TEMPLATE/): `bug.md`, `feature.md`, `eval-regression.md`. Blank issues disabled. |
+| Code ownership | [.github/CODEOWNERS](../.github/CODEOWNERS). |
+| Branch protection | [.github/branch-protection/{main,develop}.json](../.github/branch-protection/) declarative configs, re-applied weekly by [branch-protection.yml](../.github/workflows/branch-protection.yml). |
+| Commit message shape | Commitizen, configured in `pyproject.toml`. |
+| Branching model | `feat/*` → `develop` → `main`. No direct commits. |
+
+The 7 allowed conventional-commit prefixes:
+
+```text
+feat | fix | docs | test | refactor | chore | release
+```
+
+`release:` is project-specific — `develop → main` release PRs only.
+
+---
+
+## 10. Defence in depth — two worked examples
+
+### 10.1 A secret is accidentally committed
+
+| Layer | What happens |
+|---|---|
+| **1. Pre-commit `gitleaks`** | Local hook refuses the commit; never makes a commit object. |
+| **2. Agent-level secret-scan hook** *(opt-in)* | If the agent runs `git commit`, `pretooluse_bash.py` scans the staged diff itself and blocks. |
+| **3. CI `Secret scan (gitleaks)`** | If layers 1–2 were bypassed, the GitHub Action runs on push and fails the PR. |
+| **4. Manual review via CODEOWNERS** | Reviewers see the diff. |
+
+For the secret to land on `main`, **all four** have to fail or be bypassed on the same change.
+
+### 10.2 A boundary violation is introduced
+
+A developer adds `from src.api.routes import router` inside `src/models/foo.py`.
+
+| Layer | What happens |
+|---|---|
+| **1. Local `lint-imports`** | If the developer runs `just architecture` (or has it in their editor), they see the error at their desk. |
+| **2. CI `Architecture (import-linter)`** | Runs on every push. `import-linter` reports the forbidden chain and exits non-zero. PR cannot merge. |
+
+The error names the offending module, line, and contract — no guessing.
+
+---
+
+## 11. Operating within the harness — a survival guide
+
+| Error you see | What it means | What to do |
+|---|---|---|
+| `pydantic_core ... extra_forbidden` | A contract got a field name it doesn't recognise. Often a typo. | Open the model under `src/models/`. Fix the field name. |
+| CI `Lint & Format` failed | Ruff caught a style or correctness issue. | `uv run ruff check . --fix` locally. |
+| CI `Type Check` failed | mypy strict caught an untyped function or unsafe coercion. | The error names file + line. Add the missing annotation. |
+| CI `Architecture (import-linter)` failed | An import crossed a layer boundary. | Read the chain. Move the code or delete the import. |
+| CI `Lint PR title` failed | Title doesn't start with one of the 7 prefixes, or starts with Title Case. | Edit the PR title. Lowercase verb after the colon, or all-caps initialism. |
+| CI `Version bump check` failed | `pyproject.toml` `[project].version` is unchanged from the base branch. | Bump pyproject.toml + the matching `[[package]]` block in `uv.lock`. |
+| CI `Tests required` failed | `feat:`/`fix:` PR touched `src/` without touching `tests/`. | Add a test. If genuinely test-exempt, use `chore:` / `refactor:` instead. |
+| CI `File length` failed | A `.py` file > 300 lines or a function > 50 stmts / 12 branches. | Split. There is no `# noqa` exemption. |
+| CI `src/ README audit` failed | A package missing README, < 200 bytes, or missing `## Key interfaces`. | Add or extend the README. |
+| CI `Aspirational ticket cite` failed | A `*Aspirational` line in `docs/INVARIANTS.md` cites no ticket. | File the ticket and add the cite, or remove the marker line if the rule is now enforced. |
+| CI `Action pinning audit` failed | A `uses:` line doesn't match the bucket policy. | First-party = major tag; setup-uv = patch tag; third-party = SHA + `# vN.M.P` comment. |
+| Local commit rejected by commitizen | Commit message doesn't match `<type>(<scope>): <subject>`. | Re-run with the correct shape. |
+| `Coverage` failed | Test coverage on `src/` dropped below 75 %. | Add tests for the lines flagged in the report. |
+
+> **Rule of thumb.** If a check rejects a change, the rejection message names the file, the line, and the rule. Read it carefully before reaching for an override flag — there is almost always a real bug under it.
+
+---
+
+## 12. Setup checklist for your harness maintainer
+
+- [ ] **Install dependencies.** `uv sync --extra dev`.
+- [ ] **Wire pre-commit.** `uv run pre-commit install --hook-type pre-commit --hook-type commit-msg`.
+- [ ] **(Opt-in) Wire agent-level hooks.** `cp .claude/settings.local.json.example .claude/settings.local.json`.
+- [ ] **Verify CI is green on a no-op PR** before merging real work.
+- [ ] **Apply branch protection** by setting a `BRANCH_PROTECTION_TOKEN` secret with `admin:repo` scope and triggering `branch-protection.yml`. The default `GITHUB_TOKEN` cannot edit branch protection on the repo it runs in.
+- [ ] **(Opt-in) Set `LLM_*` secrets** for the eval harness when you flip `eval-nightly.yml` to `schedule:`.
+- [ ] **(Opt-in) Provision `RELEASE_BOT_TOKEN`** so `changelog-rollup.yml`'s auto-PR triggers its own CI matrix on creation. Falls back to `GITHUB_TOKEN` (auto-PR opens, only the auto-CI is lost).
+
+---
+
+## 13. Glossary
+
+| Term | What it is |
+|---|---|
+| **Pydantic** | Python library for runtime data validation. The basis of every contract in this repo. |
+| **mypy** | Static type checker. *Strict mode* refuses missing/implicit annotations. |
+| **ruff** | Fast Rust-based Python linter and formatter. Catches style + dead code + security smells. |
+| **import-linter** | Checks the import graph against declared *contracts* (layered, forbidden). |
+| **pre-commit** | Framework that registers itself as a git hook so configured checks run before every commit. |
+| **GitHub Actions** | GitHub's built-in CI. Workflows under `.github/workflows/` run on triggers. |
+| **Conventional commits** | `<type>(<scope>): <subject>` shape with a fixed type set (this repo allows 7). |
+| **Semantic versioning** | `MAJOR.MINOR.PATCH` — each component conveys the kind of change. |
+| **OpenTelemetry (OTel)** | Vendor-neutral standard for traces, metrics, logs. The repo follows `gen_ai.*` and `db.*` semantic conventions for attribute names. |
+| **CycloneDX** | An SBOM format. Generated per release and attached to the GitHub Release. |
+| **gitleaks** | Pattern-based secret scanner. |
+
+---
+
+## 14. Where to go next
+
+| Doc | What it covers |
+|---|---|
+| [HARNESS.md](HARNESS.md) | The engineer-facing map — same surface, terser, more linkable. |
+| [INVARIANTS.md](INVARIANTS.md) | The numbered rules in full. |
+| [BOUNDARIES.md](BOUNDARIES.md) | The dependency graph + the layer rules. |
+| [ARCHITECTURE.md](ARCHITECTURE.md) | The system design — components, request flow. |
+| [SECURITY.md](SECURITY.md) | Threat model + defence-in-depth mapping. |
+| [EVAL_HARNESS.md](EVAL_HARNESS.md) | The eval flywheel. |
+| [DEVELOPMENT.md](DEVELOPMENT.md) | Local setup, branching, releases. |
diff --git a/docs/INVARIANTS.md b/docs/INVARIANTS.md
index 74c3010..8935b09 100644
--- a/docs/INVARIANTS.md
+++ b/docs/INVARIANTS.md
@@ -14,7 +14,7 @@ Pydantic with `extra="forbid"` raises on unknown keys at construction. That kill
 A versioned prefix means future breaking changes ship at `/api/v2/` without coordinated client deploys. Typed responses mean an OpenAPI schema is correct by construction.
 
 - **Where:** `src/api/routes.py`
-- **Enforced by:** route review; FastAPI's response model inference.
+- **Enforced by:** `tests/test_route_versioning.py` walks `app.routes` and asserts every path matches `^/api/v\d+/` or is in the explicit `UNVERSIONED_ALLOWLIST` (FastAPI's auto-doc routes only); FastAPI's response model inference checks the typed-response side at request time.
 
 ## 3. Layer flow is one-way
 
diff --git a/pyproject.toml b/pyproject.toml
index 3fafc5a..1d80f8d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "harness-python-react"
-version = "0.2.5"
+version = "0.2.6"
 description = "Production-quality LLM-driven coding harness — Python (FastAPI) backend, Vite + React + TypeScript frontend."
 readme = "README.md"
 requires-python = ">=3.14"
diff --git a/uv.lock b/uv.lock
index bd15374..c20d4d6 100644
--- a/uv.lock
+++ b/uv.lock
@@ -328,7 +328,7 @@ wheels = [
 
 [[package]]
 name = "harness-python-react"
-version = "0.2.5"
+version = "0.2.6"
 source = { virtual = "." }
 dependencies = [
     { name = "fastapi" },