From 1d3ef1a846102a8c68ae5640b347f7c09418ae13 Mon Sep 17 00:00:00 2001 From: Constantinos <41453723+constk@users.noreply.github.com> Date: Thu, 30 Apr 2026 02:03:29 +1000 Subject: [PATCH 1/9] =?UTF-8?q?chore:=20container=20hardening=20=E2=80=94?= =?UTF-8?q?=20read-only=20root=20FS=20+=20tmpfs=20/tmp=20+=20Python=20env?= =?UTF-8?q?=20(#119,=20#120,=20#170)=20(#68)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 9 ++++++++- docker-compose.yml | 9 +++++++++ docs/SECURITY.md | 8 ++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index d5135d8..0bdf8f7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -53,7 +53,14 @@ COPY --chown=app:app src/ src/ USER app # Put the venv on PATH so `uvicorn` resolves without `uv run` indirection. -ENV PATH="/app/.venv/bin:${PATH}" +# PYTHONDONTWRITEBYTECODE=1 stops Python from attempting `.pyc` writes under +# `__pycache__/` on cold start — they would EROFS-fail under the read-only +# root FS configured in docker-compose.yml. PYTHONUNBUFFERED=1 keeps +# uvicorn's stdout from being held behind line-buffering when running under +# non-TTY container stdio. +ENV PATH="/app/.venv/bin:${PATH}" \ + PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 EXPOSE 8000 diff --git a/docker-compose.yml b/docker-compose.yml index 55fb5ce..9224151 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -23,6 +23,15 @@ services: - OTEL_EXPORTER_OTLP_ENDPOINT=http://jaeger:4317 - OTEL_EXPORTER_OTLP_PROTOCOL=grpc - OTEL_SERVICE_NAME=harness-python-react + # Container hardening. The root FS is read-only at the kernel level so a + # post-exploit shell can't modify /app, persist binaries, or fill disk + # under the `app` user's ownership. /tmp is the only writable path — + # tmpfs-mounted with a 64 MB ceiling so it can't be abused as unbounded + # storage. Verified: `touch /app/foo` → EROFS; `touch /tmp/foo` succeeds; + # healthcheck reports healthy. See docs/SECURITY.md "Container Security". + read_only: true + tmpfs: + - /tmp:size=64m,mode=1777 frontend: build: ./frontend diff --git a/docs/SECURITY.md b/docs/SECURITY.md index 416d7f4..e88a93b 100644 --- a/docs/SECURITY.md +++ b/docs/SECURITY.md @@ -61,8 +61,16 @@ Tag v*.*.* ──► release.yml: build image, push to ghcr.io, generate - **Builder** — runs `uv sync --frozen --no-dev`. Has uv, pip cache, build tools. - **Runtime** — `python:3.14-slim`, copies only `.venv` + `src/` from the builder, runs as non-root user `app`. No uv, no pip cache, no build tools, no dev deps. +Runtime stage env: `PYTHONDONTWRITEBYTECODE=1` (no `.pyc` writes — would EROFS-fail under the read-only root FS) and `PYTHONUNBUFFERED=1` (uvicorn stdout flushed immediately). + +`docker-compose.yml`'s `app` service runs with `read_only: true` and a `tmpfs: /tmp:size=64m,mode=1777` mount. The kernel rejects writes to every path except the 64 MB tmpfs, so a post-exploit shell under the `app` user cannot modify `/app`, persist binaries, or fill the host's disk under `app`'s ownership. Verified locally: `touch /app/foo` → `Read-only file system`; `touch /tmp/foo` succeeds; healthcheck reports `healthy`. + Healthcheck uses stdlib `urllib.request` so curl isn't in the image. +### Distroless evaluation — deferred + +`gcr.io/distroless/python3-debian12` ships Python at `/usr/bin/python3` while the current builder stage materialises a venv whose `pyvenv.cfg` and interpreter symlinks reference `/usr/local/bin/python3.14` (Dockerfile comment makes this constraint explicit). Migrating requires either matching Python paths between stages (no distroless variant matches slim's `/usr/local`) or rebuilding the venv inside the runtime stage (distroless has no `pip` / `uv`). Either route adds engineering risk and operational friction (no `docker exec ... sh`) that outweighs the marginal attack-surface reduction now that read-only-FS + non-root + no-build-tools + trivy-scanning are all in place. Revisit when distroless ships a `/usr/local` variant or when the venv-in-runtime cost shrinks. + ## What's intentionally out of scope (scaffold) - **WAF / DDoS** — deployment-environment concerns, not template concerns. From ec96aecbdc56d53bc409d653e570158cabc3a707 Mon Sep 17 00:00:00 2001 From: Constantinos <41453723+constk@users.noreply.github.com> Date: Thu, 30 Apr 2026 02:33:13 +1000 Subject: [PATCH 2/9] =?UTF-8?q?chore:=20file/function=20length=20caps=20?= =?UTF-8?q?=E2=80=94=20ruff=20PLR0915/PLR0912=20+=20check=5Ffile=5Flength.?= =?UTF-8?q?py=20(#124)=20(#69)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/branch-protection/develop.json | 1 + .github/branch-protection/main.json | 1 + .github/scripts/check_file_length.py | 114 +++++++++++++++++++++++++ .github/workflows/ci.yml | 14 +++ pyproject.toml | 14 +++ 5 files changed, 144 insertions(+) create mode 100644 .github/scripts/check_file_length.py diff --git a/.github/branch-protection/develop.json b/.github/branch-protection/develop.json index 4412a3f..a603ec4 100644 --- a/.github/branch-protection/develop.json +++ b/.github/branch-protection/develop.json @@ -8,6 +8,7 @@ "Coverage", "Architecture (import-linter)", "Pre-commit", + "File length", "Frontend Build", "Frontend Quality", "Branch-protection contexts sync", diff --git a/.github/branch-protection/main.json b/.github/branch-protection/main.json index d20d7b6..d476437 100644 --- a/.github/branch-protection/main.json +++ b/.github/branch-protection/main.json @@ -8,6 +8,7 @@ "Coverage", "Architecture (import-linter)", "Pre-commit", + "File length", "Frontend Build", "Frontend Quality", "Branch-protection contexts sync", diff --git a/.github/scripts/check_file_length.py b/.github/scripts/check_file_length.py new file mode 100644 index 0000000..b9e4483 --- /dev/null +++ b/.github/scripts/check_file_length.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 +"""Enforce the per-file line-count cap from `CLAUDE.md`. + +`CLAUDE.md` *Code standards*: *"No file over 300 lines, no function +over ~50 lines."* The function-length half is enforced by ruff's +`PLR0915` / `PLR0912` rules (`pyproject.toml [tool.ruff.lint].select`). +This script enforces the file-length half. + +Behaviour: + +- Walks `src/`, `tests/`, `eval/`, `.github/scripts/` for `*.py` files. +- For each file, counts lines (newline-terminated and final-line-without- + newline both count as one line). +- Fails when any file exceeds `THRESHOLD = 300`. + +There is **no exemption mechanism**. Per `feedback_no_noqa`, an +allowlist that records "current offenders with a tracker ticket" is a +non-blocking deferral by another name — the team gets used to seeing +the offenders listed, the tracker ticket sits open, and the rule never +fully bites. Pre-existing offenders were refactored in #144 before this +gate landed (six files / two functions split into helpers). + +If a file legitimately should not be capped (generated code, vendored +sources), put it in a directory this script does not walk — and document +the exemption as a structural decision in `docs/DEVELOPMENT.md`, not as +an inline allowlist entry. + +Exit codes: + 0 — every walked file is at or under `THRESHOLD` + 1 — at least one file exceeds the cap + 2 — script-level error (no walk-target directories at all) + +Usage (from repo root): + + python .github/scripts/check_file_length.py +""" + +from __future__ import annotations + +import sys +from pathlib import Path + +THRESHOLD = 300 + +# Directories walked. Each is project-owned Python code subject to the +# `CLAUDE.md` cap. Adding a new walk root requires a code change here +# (and a comment naming the rationale) — there is deliberately no +# environment-variable / CLI-flag override. +ROOTS: tuple[str, ...] = ( + "src", + "tests", + "eval", + ".github/scripts", +) + + +def count_lines(path: Path) -> int: + """Count newline-terminated lines plus a final un-terminated line, if any.""" + text = path.read_text(encoding="utf-8") + if not text: + return 0 + # `splitlines()` discards a trailing empty token, mirroring `wc -l + 1` + # for files without a trailing newline. + return len(text.splitlines()) + + +def _normalised(path: Path) -> str: + """Return the path with forward slashes (Windows / POSIX parity).""" + return path.as_posix() + + +def main() -> int: + walked: list[Path] = [] + failures: list[str] = [] + + for root_name in ROOTS: + root = Path(root_name) + if not root.is_dir(): + # Directory may not exist yet (e.g. eval/ in a new fork). Skip cleanly. + continue + for path in sorted(root.rglob("*.py")): + walked.append(path) + lines = count_lines(path) + if lines > THRESHOLD: + failures.append( + f"::error file={_normalised(path)}::{lines} lines > " + f"{THRESHOLD} (per `CLAUDE.md`). Split the file or " + "refactor — there is no exemption mechanism, see the " + "module docstring." + ) + + if not walked: + print(f"::error::no walk targets exist; checked {ROOTS!r}") + return 2 + + if failures: + for line in failures: + print(line) + print( + f"\n{len(failures)} file(s) exceed the cap. Refactor in this PR — " + "splitting into single-responsibility modules or extracting helpers " + "is the same shape #144 used for the original offenders." + ) + return 1 + + print( + f"File-length audit OK — {len(walked)} file(s) checked across " + f"{len(ROOTS)} root(s), threshold {THRESHOLD}." + ) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index baad7ec..99b29e5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -91,6 +91,20 @@ jobs: - run: uv sync --frozen --extra dev - run: uv run pre-commit run --all-files --show-diff-on-failure + file-length: + name: File length + runs-on: ubuntu-latest + # CLAUDE.md: "no file over 300 lines, no function over ~50 lines". Ruff + # PLR0915 / PLR0912 enforce the function-half (run by `Lint & Format`); + # this job enforces the file-half. No exemption mechanism — pre-existing + # offenders should be split before this job lands, not allowlisted. + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + with: + python-version: "3.14" + - run: python .github/scripts/check_file_length.py + frontend-build: name: Frontend Build runs-on: ubuntu-latest diff --git a/pyproject.toml b/pyproject.toml index 1d4574a..1789d98 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -82,6 +82,14 @@ select = [ "TCH", # flake8-type-checking "S", # flake8-bandit (security checks — SQL injection, hardcoded crypto, etc.) "RUF", # ruff-specific rules + # PLR0915 (too-many-statements) + PLR0912 (too-many-branches) enforce the + # function-complexity half of CLAUDE.md *Code standards*. Limits pinned + # explicitly in [tool.ruff.lint.pylint] below to defend against an + # upstream ruff default change silently widening the cap. The file-half + # of the rule is enforced by .github/scripts/check_file_length.py + # (300-line cap, no exemption mechanism). + "PLR0915", + "PLR0912", ] [tool.ruff.lint.per-file-ignores] @@ -90,6 +98,12 @@ select = [ "tests/**" = ["S101"] # pytest asserts are idiomatic (rewritten for rich errors) ".claude/hooks/**" = ["S603", "S607"] # hook scripts intentionally run git/uv/npx from PATH +[tool.ruff.lint.pylint] +# Pinned explicitly — ruff's defaults can drift between versions. Matches +# CLAUDE.md "no function over ~50 lines". +max-statements = 50 +max-branches = 12 + [tool.ruff.lint.isort] known-first-party = ["src"] From caf93d3543125d497e877b20de6ccb44ce74a7bb Mon Sep 17 00:00:00 2001 From: Constantinos <41453723+constk@users.noreply.github.com> Date: Thu, 30 Apr 2026 12:40:38 +1000 Subject: [PATCH 3/9] =?UTF-8?q?chore:=20three=20CI=20gates=20=E2=80=94=20v?= =?UTF-8?q?ersion=20bump,=20action=20pinning,=20tests=20required=20(#122,?= =?UTF-8?q?=20#123,=20#125)=20(#70)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/branch-protection/develop.json | 3 + .github/branch-protection/main.json | 3 + .github/scripts/check_action_pins.py | 245 ++++++++++++++++++++ .github/scripts/check_tests_present.py | 187 +++++++++++++++ .github/scripts/check_version_bump.py | 174 ++++++++++++++ .github/workflows/ci.yml | 46 ++++ docs/DEVELOPMENT.md | 22 +- pyproject.toml | 2 +- tests/test_check_action_pins.py | 270 ++++++++++++++++++++++ tests/test_check_action_pins_composite.py | 186 +++++++++++++++ tests/test_check_tests_present.py | 219 ++++++++++++++++++ tests/test_check_version_bump.py | 241 +++++++++++++++++++ uv.lock | 2 +- 13 files changed, 1597 insertions(+), 3 deletions(-) create mode 100644 .github/scripts/check_action_pins.py create mode 100644 .github/scripts/check_tests_present.py create mode 100644 .github/scripts/check_version_bump.py create mode 100644 tests/test_check_action_pins.py create mode 100644 tests/test_check_action_pins_composite.py create mode 100644 tests/test_check_tests_present.py create mode 100644 tests/test_check_version_bump.py diff --git a/.github/branch-protection/develop.json b/.github/branch-protection/develop.json index a603ec4..6187955 100644 --- a/.github/branch-protection/develop.json +++ b/.github/branch-protection/develop.json @@ -9,6 +9,9 @@ "Architecture (import-linter)", "Pre-commit", "File length", + "Version bump check", + "Action pinning audit", + "Tests required", "Frontend Build", "Frontend Quality", "Branch-protection contexts sync", diff --git a/.github/branch-protection/main.json b/.github/branch-protection/main.json index d476437..fecf6fd 100644 --- a/.github/branch-protection/main.json +++ b/.github/branch-protection/main.json @@ -9,6 +9,9 @@ "Architecture (import-linter)", "Pre-commit", "File length", + "Version bump check", + "Action pinning audit", + "Tests required", "Frontend Build", "Frontend Quality", "Branch-protection contexts sync", diff --git a/.github/scripts/check_action_pins.py b/.github/scripts/check_action_pins.py new file mode 100644 index 0000000..6490fd0 --- /dev/null +++ b/.github/scripts/check_action_pins.py @@ -0,0 +1,245 @@ +#!/usr/bin/env python3 +"""Audit GitHub Actions pin shapes against the project policy. + +Policy from `docs/DEVELOPMENT.md#action-pinning-policy`: + +- **First-party** (`actions/*`, `github/*`) — pin to **major tag** + (`@v\\d+`). SHA + trailing `# vN.M.P` comment is also accepted (stricter + posture; used in elevated-permissions workflows like `release.yml`). + +- **`astral-sh/setup-uv`** — pin to **latest patch tag** + (`@vN.M.P`). The maintainers do not publish a floating major tag for + the v8 series; `@v8` would not resolve. SHA + comment also accepted. + +- **Third-party** (anything else, including `aquasecurity/*`, `gitleaks/*`, + `amannn/*`, `release-drafter/*`) — pin to a **40-hex-char SHA** with a + trailing `# vN.M.P` comment naming the resolved tag. A moving branch + or re-tagged release in a supply-chain workflow defeats the point of + the scan/automation. + +The script walks every YAML file under `.github/workflows/`, extracts +each `uses:` line with its trailing comment if any, and validates the +pin against the policy bucket the action falls into. + +Exit codes: + 0 — every action invocation matches policy + 1 — one or more violations + 2 — script-level error (no workflow files found, parse failure) + +Usage (from repo root): + + python .github/scripts/check_action_pins.py + +Bumping a third-party action: open a focused PR that updates the SHA +*and* the trailing comment. Dependabot's `github-actions` ecosystem +opens those PRs automatically when new SHAs land. +""" + +from __future__ import annotations + +import re +import sys +from dataclasses import dataclass +from pathlib import Path + +WORKFLOWS_DIR = Path(".github/workflows") +# Composite actions live under `.github/actions//action.yml` and can +# carry their own `uses:` invocations of third-party actions in their +# `runs.steps` block. Walk this dir alongside workflows (#137). +ACTIONS_DIR = Path(".github/actions") + +# Captures `uses: @` lines, optionally with a trailing comment. +# Tolerant of the leading `- ` (list item) or no leading dash, and any +# whitespace indent. +_USES_RE = re.compile(r"^\s*-?\s*uses:\s*(?P\S+)\s*(?P#.*)?$") + +_SHA_RE = re.compile(r"^[0-9a-f]{40}$") +_MAJOR_TAG_RE = re.compile(r"^v\d+$") +_PATCH_TAG_RE = re.compile(r"^v\d+\.\d+\.\d+$") +# A `# vN.M.P` annotation in the trailing comment. We accept partial +# semver (`# v5`, `# v4.2`) because some upstream tags are minor-only, +# but we require at least the leading `v\d+`. +_VERSION_COMMENT_RE = re.compile(r"v\d+(\.\d+)*") + + +@dataclass(frozen=True) +class ActionRef: + """One `uses:` invocation: file:line, the @ref, and trailing comment.""" + + file: Path + line: int + action: str # part before @, e.g. "actions/checkout" + pin: str # part after @, e.g. "v4" or "11bd71..." + comment: str | None # raw trailing comment incl. `#`, or None + + def loc(self) -> str: + return f"{self.file}:{self.line}" + + +def parse_workflow(path: Path) -> list[ActionRef]: + """Return every action invocation in a workflow or composite-action file. + + Walks `uses:` lines, including those inside composite-action `runs.steps` + blocks (#137). Skips local-path references (`uses: ./...`) — those point + at repo-internal files (reusable workflows / composite actions in the + same repo) and don't carry a third-party-action pin to validate. + """ + refs: list[ActionRef] = [] + for line_num, line in enumerate( + path.read_text(encoding="utf-8").splitlines(), start=1 + ): + match = _USES_RE.match(line) + if not match: + continue + ref_value = match.group("ref") + # Skip local-path references — these point at repo-internal files + # (reusable workflows or composite actions in this same repo) and + # don't carry an external pin shape we should validate. The path + # may have a `@ref` suffix (reusable workflows) or none (composite + # actions); either is fine. + if ref_value.startswith(("./", "../")): + continue + if "@" not in ref_value: + # Malformed — surface as a special error in validate_ref. + refs.append( + ActionRef( + file=path, + line=line_num, + action=ref_value, + pin="", + comment=match.group("comment"), + ) + ) + continue + action, pin = ref_value.split("@", 1) + refs.append( + ActionRef( + file=path, + line=line_num, + action=action, + pin=pin, + comment=match.group("comment"), + ) + ) + return refs + + +def classify(action: str) -> str: + """Return the pin-shape policy bucket for a given action. + + - "patch-tag" — astral-sh/setup-uv (no floating major tag) + - "major-tag" — actions/* and github/* (first-party) + - "third-party-sha" — everyone else (default for unknown) + """ + if action == "astral-sh/setup-uv": + return "patch-tag" + if action.startswith(("actions/", "github/")): + return "major-tag" + return "third-party-sha" + + +def validate_ref(ref: ActionRef) -> str | None: + """Return an error message if `ref` violates policy, else None. + + SHA pins are always accepted (most-strict shape) for any action, + provided the trailing comment names the resolved version. Tag pins + are accepted only for the buckets the policy documents. + """ + if not ref.pin: + return f"missing @ref on `uses: {ref.action}`" + + bucket = classify(ref.action) + + # SHA pins are stricter than tag pins; accept everywhere as long + # as a trailing version comment is present. The comment ties the + # opaque hash back to a human-reviewable changelog. + if _SHA_RE.match(ref.pin): + if not ref.comment or not _VERSION_COMMENT_RE.search(ref.comment): + return ( + f"{ref.action}@{ref.pin[:8]}… — SHA pin missing trailing " + "`# vN.M.P` comment" + ) + return None + + # Tag pins — must match the bucket. + if bucket == "patch-tag": + if not _PATCH_TAG_RE.match(ref.pin): + return ( + f"{ref.action}@{ref.pin} — astral-sh/setup-uv has no floating " + "major tag for the v8 series; pin to a patch tag (e.g. " + "`@v8.0.0`) or a SHA + trailing `# vN.M.P` comment" + ) + return None + + if bucket == "major-tag": + if not _MAJOR_TAG_RE.match(ref.pin): + return ( + f"{ref.action}@{ref.pin} — first-party action requires major " + "tag (`@v4`) or SHA + trailing `# vN.M.P` comment" + ) + return None + + # third-party-sha — only SHA + comment is acceptable. + return ( + f"{ref.action}@{ref.pin} — third-party action requires SHA pin " + "(40 hex chars) with trailing `# vN.M.P` comment, not a tag" + ) + + +def _collect_yaml_files() -> list[Path]: + """Workflow files + composite-action files. Composite dir is optional.""" + files: list[Path] = [] + files.extend(sorted(WORKFLOWS_DIR.glob("*.yml"))) + files.extend(sorted(WORKFLOWS_DIR.glob("*.yaml"))) + if ACTIONS_DIR.is_dir(): + # `.github/actions//action.yml` (or `action.yaml`). Recursive + # glob so nested composite actions are picked up. + files.extend(sorted(ACTIONS_DIR.glob("**/action.yml"))) + files.extend(sorted(ACTIONS_DIR.glob("**/action.yaml"))) + return files + + +def main() -> int: + if not WORKFLOWS_DIR.is_dir(): + print(f"::error::workflows dir not found: {WORKFLOWS_DIR}") + return 2 + + yml_files = _collect_yaml_files() + if not yml_files: + print(f"::error::no workflow files in {WORKFLOWS_DIR}") + return 2 + + refs: list[ActionRef] = [] + for path in yml_files: + refs.extend(parse_workflow(path)) + + if not refs: + print(f"::error::no `uses:` lines found across {len(yml_files)} workflow files") + return 2 + + failed = False + for ref in refs: + problem = validate_ref(ref) + if problem is None: + continue + failed = True + # GitHub Actions error annotation: surfaces inline on the file in PR review. + print(f"::error file={ref.file},line={ref.line}::{problem}") + + if failed: + print( + "\nSee docs/DEVELOPMENT.md#action-pinning-policy for the rule. " + "Dependabot's `github-actions` ecosystem opens SHA-bump PRs " + "automatically when new tags ship." + ) + return 1 + + print( + f"Action pins audit OK — {len(refs)} pins, {len(yml_files)} files " + f"(workflows + composite actions)." + ) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/.github/scripts/check_tests_present.py b/.github/scripts/check_tests_present.py new file mode 100644 index 0000000..3c689b1 --- /dev/null +++ b/.github/scripts/check_tests_present.py @@ -0,0 +1,187 @@ +#!/usr/bin/env python3 +"""Verify behaviour-changing PRs ship tests alongside the src/ change. + +`docs/DEVELOPMENT.md` *Testing Policy*: every PR that adds or changes +behaviour must include tests. Today only the 75% coverage gate +approximates this — a PR can drop coverage from 95% to 76% on +uncovered new code and still pass. This gate adds direct enforcement. + +Behaviour: + +- Reads `git diff --name-only origin/...HEAD` for `src/**/*.py` + and `tests/**/*.py`. +- Determines the PR's commit-type prefix from the event payload's + `pull_request.title` (same source as `check_version_bump.py`). +- If `src/**/*.py` files changed AND no `tests/**/*.py` file was + touched in the same diff: + - **Block (exit 1)** when the prefix is `feat:` or `fix:` — these + types declare a behaviour change that the policy says must come + with tests. + - **Warn (exit 0)** for `chore:`, `docs:`, `refactor:`, `test:`, + `release:` — these may legitimately touch `src/` without new + tests (config rename, internal-only refactor, doc-string fix). + +Exit codes: + 0 — pass (or warn-only) + 1 — block: behaviour-change PR shipped without tests + 2 — script-level error (missing env, parse failure) + +Usage (CI, on pull_request events): + + python .github/scripts/check_tests_present.py + +Required env (set automatically by GitHub Actions): +- ``GITHUB_BASE_REF`` — base branch name (e.g. "develop") +- ``GITHUB_EVENT_PATH`` — path to event JSON (for PR title) +""" + +from __future__ import annotations + +import json +import os +import subprocess +import sys +from pathlib import Path + +# Prefixes that declare a behaviour change → tests required. +BLOCKING_PREFIXES: frozenset[str] = frozenset({"feat", "fix"}) + +# Prefixes that may legitimately touch src/ without new tests → warn-only. +WARN_ONLY_PREFIXES: frozenset[str] = frozenset( + {"chore", "docs", "refactor", "test", "release"} +) + + +def pr_title_from_event() -> str | None: + """Read the PR title from the GitHub Actions event payload, if present.""" + event_path = os.environ.get("GITHUB_EVENT_PATH") + if not event_path: + return None + try: + data = json.loads(Path(event_path).read_text(encoding="utf-8")) + except OSError, json.JSONDecodeError: + return None + pr = data.get("pull_request") + if not isinstance(pr, dict): + return None + title = pr.get("title") + return title if isinstance(title, str) else None + + +def commit_type_prefix(title: str | None) -> str | None: + """Extract the conventional-commit type prefix from a PR title. + + Examples: "feat: add tool" → "feat", "chore(api): rename" → "chore", + "release: v1.2.3" → "release". Returns None if the title doesn't + contain a colon (i.e. doesn't match the `type: subject` shape). + """ + if not title or ":" not in title: + return None + head = title.lstrip() + # Type ends at the first of "(", "!", ":" — whichever comes first. + # The colon is mandatory (already checked above), so we always find one. + earliest = len(head) + for terminator in ("(", "!", ":"): + idx = head.find(terminator) + if 0 < idx < earliest: + earliest = idx + return head[:earliest].lower() if earliest > 0 else None + + +def changed_files(base_ref: str) -> list[str]: + """Return paths changed between `origin/` and HEAD.""" + try: + out = subprocess.check_output( # noqa: S603 - args are CI-trusted + [ # noqa: S607 - git on PATH + "git", + "diff", + "--name-only", + f"origin/{base_ref}...HEAD", + ], + text=True, + stderr=subprocess.PIPE, + ) + except subprocess.CalledProcessError as e: + msg = ( + f"git diff origin/{base_ref}...HEAD failed: " + f"{e.stderr.strip() or e.returncode}" + ) + raise RuntimeError(msg) from e + return [line for line in out.splitlines() if line.strip()] + + +def _is_under(path: str, dirname: str) -> bool: + """True when `path` is inside `/`.""" + return path.startswith(dirname + "/") or path == dirname + + +def main() -> int: + base_ref = os.environ.get("GITHUB_BASE_REF") + if not base_ref: + print("::warning::GITHUB_BASE_REF not set; skipping (not a PR run).") + return 0 + + title = pr_title_from_event() + prefix = commit_type_prefix(title) + + try: + files = changed_files(base_ref) + except RuntimeError as e: + print(f"::error::{e}") + return 2 + + src_py = [f for f in files if _is_under(f, "src") and f.endswith(".py")] + tests_py = [f for f in files if _is_under(f, "tests") and f.endswith(".py")] + + if not src_py: + print("No `src/**/*.py` changes — gate not applicable.") + return 0 + + if tests_py: + print( + f"src/ changed ({len(src_py)} file(s)); tests/ also touched " + f"({len(tests_py)} file(s)). OK." + ) + return 0 + + # src/ changed, tests/ untouched — decide based on prefix. + src_summary = ", ".join(src_py[:5]) + ( + f" (+{len(src_py) - 5} more)" if len(src_py) > 5 else "" + ) + + if prefix in BLOCKING_PREFIXES: + print( + f"::error::PR title prefix `{prefix}:` declares a behaviour " + "change but the diff touches `src/` without any " + "`tests/**/*.py` change. Per `docs/DEVELOPMENT.md` Testing " + "Policy, every behaviour-change PR ships tests.\n" + f" src/ files changed: {src_summary}\n" + " Fix: add or update a test in `tests/`, or restructure the " + "PR if the change is genuinely test-exempt (rename, comment-only, " + "internal refactor) — and use a different commit-type prefix " + f"({sorted(WARN_ONLY_PREFIXES)})." + ) + return 1 + + if prefix in WARN_ONLY_PREFIXES: + print( + f"::warning::PR title prefix `{prefix}:` touches `src/` " + "without `tests/` changes. Allowed for this prefix; " + "double-check that no behaviour change slipped in.\n" + f" src/ files changed: {src_summary}" + ) + return 0 + + # Unknown / missing prefix — fall through to warn-only. The + # `Lint PR title` and `Commit-type sync` gates already enforce + # the 7-prefix list elsewhere; this script doesn't duplicate that. + print( + f"::warning::PR title prefix not recognised ({prefix!r}); " + "tests-required gate skipped. The `Lint PR title` job is the " + "source of truth for prefix validation." + ) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/.github/scripts/check_version_bump.py b/.github/scripts/check_version_bump.py new file mode 100644 index 0000000..4510c0f --- /dev/null +++ b/.github/scripts/check_version_bump.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python3 +"""Verify pyproject version was bumped on this PR. + +Per `docs/DEVELOPMENT.md`: every non-`release:` PR bumps `[project] version` +in `pyproject.toml`. `release:` PRs are exempt — the dev version IS the +release version. The gate does not enforce semver direction (feat → MINOR +vs fix → PATCH); it only enforces that *some* bump happened, since +direction is conventionally documented but mechanically ambiguous. + +Sibling check: `uv.lock`'s `[[package]] name = "harness-python-react"` +block must show the same version. Hand-edit the lockfile to avoid silent +transitive-dep upgrades that `uv lock` would pull in. + +Usage (CI, on pull_request events): + + python .github/scripts/check_version_bump.py + +Required env (set by GitHub Actions on `pull_request`): + +- `GITHUB_BASE_REF` — base branch name (e.g. "develop") +- `GITHUB_EVENT_PATH` — path to event JSON (used to read PR title) + +Exit codes: + 0 — version bumped (or `release:` PR, exempt) + 1 — version not bumped, or pyproject + uv.lock disagree + 2 — script-level error (missing env, parse failure) +""" + +from __future__ import annotations + +import json +import os +import re +import subprocess +import sys +import tomllib +from pathlib import Path + +PYPROJECT = Path("pyproject.toml") +UV_LOCK = Path("uv.lock") +PACKAGE_NAME = "harness-python-react" + +# Match the project's self-version block in uv.lock: +# +# [[package]] +# name = "harness-python-react" +# version = "0.1.0" +# +# Tolerant of whitespace variation across uv versions. +_LOCK_BLOCK_RE = re.compile( + r"\[\[package\]\]\s*\n" + rf'name\s*=\s*"{re.escape(PACKAGE_NAME)}"\s*\n' + r'version\s*=\s*"([^"]+)"', +) + + +def pyproject_version(toml_text: str) -> str: + """Extract `[project] version` from a pyproject.toml string.""" + data = tomllib.loads(toml_text) + version = data.get("project", {}).get("version") + if not isinstance(version, str) or not version: + msg = "[project] version not found in pyproject.toml" + raise ValueError(msg) + return version + + +def uv_lock_self_version(lock_text: str) -> str: + """Extract the project self-version from a uv.lock string.""" + match = _LOCK_BLOCK_RE.search(lock_text) + if not match: + msg = ( + f'Could not find [[package]] name = "{PACKAGE_NAME}" block in ' + "uv.lock. Has the project name changed?" + ) + raise ValueError(msg) + return match.group(1) + + +def git_show_at_base(path: Path, base_ref: str) -> str: + """Read a tracked file's content at `origin/` via git. + + `base_ref` is sourced from `GITHUB_BASE_REF` which GitHub Actions sets to + a validated branch name; `path` is a static `Path` constant in this module. + Neither is user input, so the bandit S603/S607 warnings on the subprocess + call are suppressed. + """ + try: + return subprocess.check_output( # noqa: S603 - args are CI-trusted + ["git", "show", f"origin/{base_ref}:{path}"], # noqa: S607 - git on PATH + text=True, + stderr=subprocess.PIPE, + ) + except subprocess.CalledProcessError as e: + msg = ( + f"git show origin/{base_ref}:{path} failed: " + f"{e.stderr.strip() or e.returncode}" + ) + raise RuntimeError(msg) from e + + +def pr_title_from_event() -> str | None: + """Read the PR title from the GitHub Actions event payload, if present.""" + event_path = os.environ.get("GITHUB_EVENT_PATH") + if not event_path: + return None + try: + data = json.loads(Path(event_path).read_text(encoding="utf-8")) + except OSError, json.JSONDecodeError: + return None + pr = data.get("pull_request") + if not isinstance(pr, dict): + return None + title = pr.get("title") + return title if isinstance(title, str) else None + + +def is_release_pr(title: str | None) -> bool: + """A `release:` PR is exempt from the bump requirement.""" + if not title: + return False + return title.lstrip().lower().startswith("release:") + + +def main() -> int: + base_ref = os.environ.get("GITHUB_BASE_REF") + if not base_ref: + print("::warning::GITHUB_BASE_REF not set; skipping (not a PR run).") + return 0 + + title = pr_title_from_event() + if is_release_pr(title): + print(f"release: PR — version bump not required ({title!r})") + return 0 + + try: + head_pp = pyproject_version(PYPROJECT.read_text(encoding="utf-8")) + head_lock = uv_lock_self_version(UV_LOCK.read_text(encoding="utf-8")) + base_pp = pyproject_version(git_show_at_base(PYPROJECT, base_ref)) + except (OSError, ValueError, RuntimeError) as e: + print(f"::error::{e}") + return 2 + + print(f"Base ({base_ref}) pyproject version: {base_pp}") + print(f"HEAD pyproject version: {head_pp}") + print(f"HEAD uv.lock self-version: {head_lock}") + + failed = False + + if head_pp == base_pp: + print( + f"::error::pyproject version unchanged from {base_ref} " + f"({head_pp}). Per docs/DEVELOPMENT.md every non-release: PR " + "bumps the version (PATCH for fix/refactor/test/docs/chore; " + "MINOR for feat). Hand-edit the self-version line in uv.lock too." + ) + failed = True + else: + print(f"Version bumped: {base_pp} -> {head_pp} OK") + + if head_pp != head_lock: + print( + f"::error::pyproject ({head_pp}) and uv.lock ({head_lock}) " + "self-version disagree. Hand-edit the uv.lock " + f'[[package]] name = "{PACKAGE_NAME}" block to match pyproject.toml.' + ) + failed = True + else: + print("pyproject + uv.lock self-version match OK") + + return 1 if failed else 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 99b29e5..fcda20a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -105,6 +105,52 @@ jobs: python-version: "3.14" - run: python .github/scripts/check_file_length.py + version-bump: + name: Version bump check + runs-on: ubuntu-latest + # Every non-`release:` PR bumps [project] version in pyproject.toml AND + # the matching [[package]] block in uv.lock. Closes the bump-miss class + # that the 75 % coverage gate cannot detect on its own. + if: github.event_name == 'pull_request' + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + with: + fetch-depth: 0 # full history so `git show origin/:` resolves + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + with: + python-version: "3.14" + - run: python .github/scripts/check_version_bump.py + + action-pinning: + name: Action pinning audit + runs-on: ubuntu-latest + # Validates every `uses:` line in .github/workflows/ + .github/actions/ + # against the policy in docs/DEVELOPMENT.md#action-pinning-policy. + # First-party = major tag; astral-sh/setup-uv = patch tag; third-party + # = SHA + trailing `# vN.M.P` comment. + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + with: + python-version: "3.14" + - run: python .github/scripts/check_action_pins.py + + tests-required: + name: Tests required + runs-on: ubuntu-latest + # `feat:` / `fix:` PRs that touch `src/` must touch `tests/` too. + # Per docs/DEVELOPMENT.md Testing Policy. Other prefixes get a warn-only + # `::warning::` if src/ is touched without tests/. + if: github.event_name == 'pull_request' + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + with: + fetch-depth: 0 # full history so the diff resolves + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + with: + python-version: "3.14" + - run: python .github/scripts/check_tests_present.py + frontend-build: name: Frontend Build runs-on: ubuntu-latest diff --git a/docs/DEVELOPMENT.md b/docs/DEVELOPMENT.md index b42f235..c844ffc 100644 --- a/docs/DEVELOPMENT.md +++ b/docs/DEVELOPMENT.md @@ -92,7 +92,7 @@ Subject is **lowercase after the colon** (Title Case is rejected unless it's an | Workflow | Triggers | Required? | |---|---|---| -| `ci.yml` | push/PR to develop+main | Yes — 8 backend + 2 frontend jobs | +| `ci.yml` | push/PR to develop+main | Yes — backend + frontend gates + meta-gates + version/action/tests audits | | `security.yml` | push/PR + weekly schedule | Yes — 4 jobs (gitleaks, pip-audit, npm audit, trivy) | | `pr-title.yml` | PR open/edit/sync | Yes — conventional-commit lint | | `release.yml` | tag `v*.*.*` | No — tag-triggered | @@ -102,6 +102,26 @@ Subject is **lowercase after the colon** (Title Case is rejected unless it's an | `eval-nightly.yml` | `workflow_dispatch` only by default | No | | `codeql.yml` | `workflow_dispatch` only (placeholder) | No | +### Action-pinning policy + +Audited by the `Action pinning audit` CI job (`.github/scripts/check_action_pins.py`). Three buckets: + +- **First-party** (`actions/*`, `github/*`) — pin to **major tag** (`@v4`). SHA + trailing `# vN.M.P` comment also accepted; that's the form used in elevated-permissions workflows (release, branch-protection). +- **`astral-sh/setup-uv`** — pin to **latest patch tag** (`@v8.0.0`) or SHA. astral-sh does not publish a floating major tag for v8; `@v8` would not resolve. +- **Third-party** (anything else) — pin to a **40-hex-char SHA** with a trailing `# vN.M.P` comment naming the resolved tag. A moving branch in a supply-chain workflow defeats the point of the scan. + +When bumping a third-party action, update the SHA *and* the trailing comment in the same PR. Dependabot's `github-actions` ecosystem opens those PRs automatically. + +### Version-bump policy + +Audited by the `Version bump check` CI job (`.github/scripts/check_version_bump.py`). Every PR bumps `[project] version` in `pyproject.toml` AND the matching `[[package]]` block in `uv.lock`. The bump direction follows commitizen's `bump_map` in `pyproject.toml` — `feat:` is MINOR, everything else is PATCH. `release:` PRs are exempt because the dev version IS the release version. + +The `uv.lock` self-version is hand-edited (one line); avoid `uv lock` mid-PR because it would re-resolve transitive deps and pull in unintended upgrades. The `Version bump check` gate enforces both halves. + +### Testing policy + +Audited by the `Tests required` CI job (`.github/scripts/check_tests_present.py`). `feat:` and `fix:` PRs that touch `src/` MUST also touch `tests/`. Other prefixes get a `::warning::` if `src/` is touched without tests but don't block. The 75 % coverage gate alone doesn't catch behaviour-change-without-test (a single new line on already-covered code can pass), which is why this is a separate axis. + ## Local agent hook setup The `.claude/hooks/` scripts enforce the harness from the LLM-coder side: blocking `--no-verify`, scanning staged diffs for secrets, formatting after every Write/Edit. Opt in by copying the example settings file: diff --git a/pyproject.toml b/pyproject.toml index 1789d98..27e014a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "harness-python-react" -version = "0.1.0" +version = "0.2.0" description = "Production-quality LLM-driven coding harness — Python (FastAPI) backend, Vite + React + TypeScript frontend." readme = "README.md" requires-python = ">=3.14" diff --git a/tests/test_check_action_pins.py b/tests/test_check_action_pins.py new file mode 100644 index 0000000..9167dc0 --- /dev/null +++ b/tests/test_check_action_pins.py @@ -0,0 +1,270 @@ +"""Tests for `.github/scripts/check_action_pins.py`. + +Cover the policy buckets, the `parse_workflow` line extractor (including +trailing comments and missing `@ref`), and `main()` end-to-end against +fabricated workflow files in `tmp_path`. +""" + +from __future__ import annotations + +import importlib.util +import sys +from pathlib import Path +from typing import Any + +import pytest + +REPO_ROOT = Path(__file__).resolve().parent.parent +SCRIPT_PATH = REPO_ROOT / ".github" / "scripts" / "check_action_pins.py" + + +def _load_script() -> Any: + spec = importlib.util.spec_from_file_location("check_action_pins", SCRIPT_PATH) + if spec is None or spec.loader is None: + msg = f"Could not load script at {SCRIPT_PATH}" + raise RuntimeError(msg) + module = importlib.util.module_from_spec(spec) + sys.modules[spec.name] = module + spec.loader.exec_module(module) + return module + + +cap = _load_script() + + +# ---------- classify ---------- + + +@pytest.mark.parametrize( + "action,bucket", + [ + ("astral-sh/setup-uv", "patch-tag"), + ("actions/checkout", "major-tag"), + ("actions/setup-python", "major-tag"), + ("actions/setup-node", "major-tag"), + ("actions/upload-artifact", "major-tag"), + ("github/codeql-action", "major-tag"), + ("github/codeql-action/init", "major-tag"), + ("github/codeql-action/analyze", "major-tag"), + ("aquasecurity/trivy-action", "third-party-sha"), + ("gitleaks/gitleaks-action", "third-party-sha"), + ("amannn/action-semantic-pull-request", "third-party-sha"), + ("release-drafter/release-drafter", "third-party-sha"), + ("some-other-org/random-action", "third-party-sha"), + ], +) +def test_classify(action: str, bucket: str) -> None: + assert cap.classify(action) == bucket + + +# ---------- validate_ref ---------- + + +def _ref(action: str, pin: str, comment: str | None = None) -> Any: + return cap.ActionRef( + file=Path("dummy.yml"), line=1, action=action, pin=pin, comment=comment + ) + + +def test_validate_first_party_major_tag_ok() -> None: + assert cap.validate_ref(_ref("actions/checkout", "v4")) is None + + +def test_validate_first_party_patch_tag_rejected() -> None: + err = cap.validate_ref(_ref("actions/checkout", "v4.2.2")) + assert err is not None + assert "first-party" in err and "major tag" in err + + +def test_validate_first_party_sha_with_comment_ok() -> None: + sha = "11bd71901bbe5b1630ceea73d27597364c9af683" + assert cap.validate_ref(_ref("actions/checkout", sha, "# v4.2.2")) is None + + +def test_validate_first_party_sha_without_comment_rejected() -> None: + sha = "11bd71901bbe5b1630ceea73d27597364c9af683" + err = cap.validate_ref(_ref("actions/checkout", sha)) + assert err is not None and "missing trailing" in err + + +def test_validate_setup_uv_patch_tag_ok() -> None: + assert cap.validate_ref(_ref("astral-sh/setup-uv", "v8.0.0")) is None + + +def test_validate_setup_uv_major_tag_rejected() -> None: + err = cap.validate_ref(_ref("astral-sh/setup-uv", "v8")) + assert err is not None + assert "no floating major tag" in err + + +def test_validate_setup_uv_sha_with_comment_ok() -> None: + sha = "cec208311dfd045dd5311c1add060b2062131d57" + assert cap.validate_ref(_ref("astral-sh/setup-uv", sha, "# v8.0.0")) is None + + +def test_validate_third_party_tag_rejected() -> None: + err = cap.validate_ref(_ref("release-drafter/release-drafter", "v6")) + assert err is not None + assert "third-party" in err and "SHA pin" in err + + +def test_validate_third_party_sha_with_comment_ok() -> None: + sha = "48f256284bd46cdaab1048c3721360e808335d50" + assert ( + cap.validate_ref(_ref("amannn/action-semantic-pull-request", sha, "# v6.1.1")) + is None + ) + + +def test_validate_third_party_sha_without_comment_rejected() -> None: + sha = "48f256284bd46cdaab1048c3721360e808335d50" + err = cap.validate_ref(_ref("amannn/action-semantic-pull-request", sha)) + assert err is not None and "missing trailing" in err + + +def test_validate_partial_version_comment_accepted() -> None: + # `# v5` is accepted — some upstream tags are major-only. + sha = "a26af69be951a213d495a4c3e4e4022e16d87065" + assert cap.validate_ref(_ref("actions/setup-python", sha, "# v5")) is None + + +def test_validate_comment_without_version_rejected() -> None: + sha = "a26af69be951a213d495a4c3e4e4022e16d87065" + err = cap.validate_ref(_ref("actions/setup-python", sha, "# pinned for security")) + assert err is not None and "missing trailing" in err + + +def test_validate_missing_at_ref_rejected() -> None: + err = cap.validate_ref(_ref("actions/checkout", "")) + assert err is not None and "missing @ref" in err + + +def test_validate_third_party_short_hex_rejected() -> None: + # Looks like a SHA but isn't 40 chars; falls through to tag-shape check. + err = cap.validate_ref(_ref("aquasecurity/trivy-action", "abc123", "# v0.36.0")) + assert err is not None # third-party with non-SHA pin + + +# ---------- parse_workflow ---------- + + +def test_parse_workflow_extracts_refs(tmp_path: Path) -> None: + yml = tmp_path / "wf.yml" + setup_python_sha = "a26af69be951a213d495a4c3e4e4022e16d87065" + yml.write_text( + "jobs:\n" + " build:\n" + " steps:\n" + " - uses: actions/checkout@v4\n" + " - uses: astral-sh/setup-uv@v8.0.0\n" + f" - uses: actions/setup-python@{setup_python_sha} # v5\n" + " - run: echo hi\n", + encoding="utf-8", + ) + refs = cap.parse_workflow(yml) + assert len(refs) == 3 + assert refs[0].action == "actions/checkout" + assert refs[0].pin == "v4" + assert refs[0].comment is None + assert refs[1].action == "astral-sh/setup-uv" + assert refs[1].pin == "v8.0.0" + assert refs[2].action == "actions/setup-python" + assert refs[2].comment == "# v5" + + +def test_parse_workflow_handles_missing_at_ref(tmp_path: Path) -> None: + yml = tmp_path / "wf.yml" + yml.write_text( + "jobs:\n" + " build:\n" + " steps:\n" + " - uses: actions/checkout\n", # malformed — no @ref + encoding="utf-8", + ) + refs = cap.parse_workflow(yml) + assert len(refs) == 1 + assert refs[0].action == "actions/checkout" + assert refs[0].pin == "" + + +# ---------- main() end-to-end ---------- + + +def _setup_workflows_dir(tmp_path: Path, files: dict[str, str]) -> Path: + """Create a tmp `.github/workflows/` and chdir into the parent.""" + wf_dir = tmp_path / ".github" / "workflows" + wf_dir.mkdir(parents=True) + for name, content in files.items(): + (wf_dir / name).write_text(content, encoding="utf-8") + return tmp_path + + +def test_main_clean_passes( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] +) -> None: + _setup_workflows_dir( + tmp_path, + { + "ci.yml": ( + "jobs:\n build:\n steps:\n" + " - uses: actions/checkout@v4\n" + " - uses: astral-sh/setup-uv@v8.0.0\n" + ), + }, + ) + monkeypatch.chdir(tmp_path) + assert cap.main() == 0 + assert "Action pins audit OK" in capsys.readouterr().out + + +def test_main_third_party_tag_fails( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] +) -> None: + _setup_workflows_dir( + tmp_path, + { + "release-drafter.yml": ( + "jobs:\n draft:\n steps:\n" + " - uses: release-drafter/release-drafter@v6\n" + ), + }, + ) + monkeypatch.chdir(tmp_path) + assert cap.main() == 1 + out = capsys.readouterr().out + assert "::error file=" in out + assert "third-party" in out + + +def test_main_setup_uv_major_tag_fails( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] +) -> None: + _setup_workflows_dir( + tmp_path, + { + "ci.yml": ( + "jobs:\n build:\n steps:\n - uses: astral-sh/setup-uv@v8\n" + ), + }, + ) + monkeypatch.chdir(tmp_path) + assert cap.main() == 1 + out = capsys.readouterr().out + assert "no floating major tag" in out + + +def test_main_no_workflow_files_exits_2( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] +) -> None: + (tmp_path / ".github" / "workflows").mkdir(parents=True) + monkeypatch.chdir(tmp_path) + assert cap.main() == 2 + assert "no workflow files" in capsys.readouterr().out + + +def test_main_no_workflows_dir_exits_2( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] +) -> None: + monkeypatch.chdir(tmp_path) + assert cap.main() == 2 + assert "workflows dir not found" in capsys.readouterr().out diff --git a/tests/test_check_action_pins_composite.py b/tests/test_check_action_pins_composite.py new file mode 100644 index 0000000..b3790fb --- /dev/null +++ b/tests/test_check_action_pins_composite.py @@ -0,0 +1,186 @@ +"""Tests for the composite-action + local-path coverage in check_action_pins (#137). + +Sibling to `tests/test_check_action_pins.py` (which is at the line-cap +edge). Covers the `parse_workflow` extensions added in #137: + +- `.github/actions//action.yml` files are walked alongside workflows. +- Local-path `uses: ./...` references are skipped (not flagged as + missing-`@`). + +`test_check_action_pins.py` covers the workflow case + policy buckets; +this file covers the composite + local-path cases. +""" + +from __future__ import annotations + +import importlib.util +import sys +from pathlib import Path +from typing import Any +from unittest.mock import patch + +REPO_ROOT = Path(__file__).resolve().parent.parent +SCRIPT_PATH = REPO_ROOT / ".github" / "scripts" / "check_action_pins.py" + + +def _load_script() -> Any: + spec = importlib.util.spec_from_file_location("check_action_pins", SCRIPT_PATH) + if spec is None or spec.loader is None: + msg = f"Could not load script at {SCRIPT_PATH}" + raise RuntimeError(msg) + module = importlib.util.module_from_spec(spec) + sys.modules[spec.name] = module + spec.loader.exec_module(module) + return module + + +cap = _load_script() + + +# ---------- parse_workflow on a composite-action file ---------- + + +def test_parse_walks_composite_action_steps(tmp_path: Path) -> None: + """`.github/actions//action.yml` files surface their `uses:` lines.""" + action_yaml = tmp_path / "action.yml" + action_yaml.write_text( + "name: 'fake composite'\n" + "runs:\n" + " using: composite\n" + " steps:\n" + " - uses: actions/checkout@v4\n" + " - uses: aquasecurity/trivy-action@" + ("a" * 40) + " # v0.36.0\n", + encoding="utf-8", + ) + refs = cap.parse_workflow(action_yaml) + actions = [r.action for r in refs] + assert actions == ["actions/checkout", "aquasecurity/trivy-action"] + + +def test_parse_skips_local_path_uses(tmp_path: Path) -> None: + """`uses: ./...` references resolve in-repo and aren't third-party pins.""" + workflow = tmp_path / "fake.yml" + workflow.write_text( + "jobs:\n" + " reuse:\n" + " uses: ./.github/workflows/composite.yml@main\n" + " call-composite:\n" + " steps:\n" + " - uses: ./.github/actions/local\n" + " - uses: actions/checkout@v4\n", + encoding="utf-8", + ) + refs = cap.parse_workflow(workflow) + actions = [r.action for r in refs] + # Both local-path entries skipped; only the third-party one survives. + assert actions == ["actions/checkout"] + + +def test_parse_skips_dotdot_local_path(tmp_path: Path) -> None: + """`uses: ../action` (rare but legal) is also a local-path skip.""" + workflow = tmp_path / "fake.yml" + workflow.write_text( + "jobs:\n j:\n steps:\n - uses: ../shared/action\n", + encoding="utf-8", + ) + assert cap.parse_workflow(workflow) == [] + + +# ---------- _collect_yaml_files end-to-end ---------- + + +def test_collect_includes_composite_actions(tmp_path: Path) -> None: + """`_collect_yaml_files` walks both workflows and `.github/actions/`.""" + workflows = tmp_path / "workflows" + workflows.mkdir() + (workflows / "ci.yml").write_text( + "jobs:\n j:\n steps:\n - uses: actions/checkout@v4\n", + encoding="utf-8", + ) + actions_dir = tmp_path / "actions" + composite = actions_dir / "publish" + composite.mkdir(parents=True) + (composite / "action.yml").write_text( + "name: 'publish'\nruns:\n using: composite\n steps:\n" + " - uses: actions/setup-node@v5\n", + encoding="utf-8", + ) + with ( + patch.object(cap, "WORKFLOWS_DIR", workflows), + patch.object(cap, "ACTIONS_DIR", actions_dir), + ): + collected = cap._collect_yaml_files() + names = sorted(p.name for p in collected) + assert names == ["action.yml", "ci.yml"] + + +def test_collect_handles_missing_actions_dir(tmp_path: Path) -> None: + """Repo without `.github/actions/` (today's shape) still scans workflows.""" + workflows = tmp_path / "workflows" + workflows.mkdir() + (workflows / "ci.yml").write_text( + "jobs:\n j:\n steps:\n - uses: actions/checkout@v4\n", + encoding="utf-8", + ) + missing_actions = tmp_path / "no-such-dir" + with ( + patch.object(cap, "WORKFLOWS_DIR", workflows), + patch.object(cap, "ACTIONS_DIR", missing_actions), + ): + collected = cap._collect_yaml_files() + assert [p.name for p in collected] == ["ci.yml"] + + +# ---------- main() end-to-end through composite + local-path ---------- + + +def test_main_flags_violation_in_composite_action(tmp_path: Path, capsys: Any) -> None: + """A bad pin inside a composite action's steps fails the audit.""" + workflows = tmp_path / "workflows" + workflows.mkdir() + (workflows / "ci.yml").write_text( + "jobs:\n j:\n steps:\n - uses: actions/checkout@v4\n", + encoding="utf-8", + ) + actions_dir = tmp_path / "actions" + bad = actions_dir / "bad" + bad.mkdir(parents=True) + # Tag pin on a third-party action — violates third-party-sha policy. + (bad / "action.yml").write_text( + "name: 'bad'\nruns:\n using: composite\n steps:\n" + " - uses: aquasecurity/trivy-action@v0.36.0\n", + encoding="utf-8", + ) + with ( + patch.object(cap, "WORKFLOWS_DIR", workflows), + patch.object(cap, "ACTIONS_DIR", actions_dir), + ): + assert cap.main() == 1 + out = capsys.readouterr().out + assert "aquasecurity/trivy-action" in out + assert "third-party action requires SHA pin" in out + + +def test_main_passes_with_clean_composite_action(tmp_path: Path, capsys: Any) -> None: + """A clean composite action with a SHA + comment passes the audit.""" + workflows = tmp_path / "workflows" + workflows.mkdir() + (workflows / "ci.yml").write_text( + "jobs:\n j:\n steps:\n - uses: actions/checkout@v4\n", + encoding="utf-8", + ) + actions_dir = tmp_path / "actions" + good = actions_dir / "good" + good.mkdir(parents=True) + (good / "action.yml").write_text( + "name: 'good'\nruns:\n using: composite\n steps:\n" + " - uses: aquasecurity/trivy-action@" + ("a" * 40) + " # v0.36.0\n", + encoding="utf-8", + ) + with ( + patch.object(cap, "WORKFLOWS_DIR", workflows), + patch.object(cap, "ACTIONS_DIR", actions_dir), + ): + assert cap.main() == 0 + out = capsys.readouterr().out + assert "Action pins audit OK" in out diff --git a/tests/test_check_tests_present.py b/tests/test_check_tests_present.py new file mode 100644 index 0000000..c050d43 --- /dev/null +++ b/tests/test_check_tests_present.py @@ -0,0 +1,219 @@ +"""Tests for `.github/scripts/check_tests_present.py`.""" + +from __future__ import annotations + +import importlib.util +import json +import sys +from pathlib import Path +from typing import TYPE_CHECKING, Any + +import pytest + +if TYPE_CHECKING: + from collections.abc import Iterable + +REPO_ROOT = Path(__file__).resolve().parent.parent +SCRIPT_PATH = REPO_ROOT / ".github" / "scripts" / "check_tests_present.py" + + +def _load_script() -> Any: + spec = importlib.util.spec_from_file_location("check_tests_present", SCRIPT_PATH) + if spec is None or spec.loader is None: + msg = f"Could not load script at {SCRIPT_PATH}" + raise RuntimeError(msg) + module = importlib.util.module_from_spec(spec) + sys.modules[spec.name] = module + spec.loader.exec_module(module) + return module + + +ctp = _load_script() + + +# ---------- commit_type_prefix ---------- + + +@pytest.mark.parametrize( + "title,expected", + [ + ("feat: add tool", "feat"), + ("fix: handle empty list", "fix"), + ("chore(api): rename module", "chore"), + ("docs: update readme", "docs"), + ("test: add coverage", "test"), + ("refactor: split module", "refactor"), + ("release: v1.2.3", "release"), + ("feat!: breaking change", "feat"), + ("FEAT: case-insensitive", "feat"), + (" feat: leading-spaces", "feat"), + ("no-prefix subject only", None), + ("", None), + (None, None), + ], +) +def test_commit_type_prefix(title: str | None, expected: str | None) -> None: + assert ctp.commit_type_prefix(title) == expected + + +# ---------- changed_files (via stub) ---------- + + +def test_changed_files_filters_blank_lines(monkeypatch: pytest.MonkeyPatch) -> None: + """Empty lines in git output are filtered out.""" + + def fake_check_output(*_: Any, **__: Any) -> str: + return "src/api/main.py\n\ntests/test_api.py\n" + + monkeypatch.setattr(ctp.subprocess, "check_output", fake_check_output) + assert ctp.changed_files("develop") == ["src/api/main.py", "tests/test_api.py"] + + +def test_changed_files_propagates_git_failure( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """A git error becomes a RuntimeError with a context-rich message.""" + + def fake_check_output(*_: Any, **__: Any) -> str: + raise ctp.subprocess.CalledProcessError( + 128, ["git"], stderr="bad object origin/develop" + ) + + monkeypatch.setattr(ctp.subprocess, "check_output", fake_check_output) + with pytest.raises(RuntimeError, match="bad object origin/develop"): + ctp.changed_files("develop") + + +# ---------- main() end-to-end ---------- + + +def _set_event_payload( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path, title: str +) -> None: + event_file = tmp_path / "event.json" + event_file.write_text( + json.dumps({"pull_request": {"title": title}}), encoding="utf-8" + ) + monkeypatch.setenv("GITHUB_EVENT_PATH", str(event_file)) + + +def _stub_changed_files(monkeypatch: pytest.MonkeyPatch, files: Iterable[str]) -> None: + monkeypatch.setattr(ctp, "changed_files", lambda _base_ref: list(files)) + + +def test_main_no_src_changes_passes( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, + capsys: pytest.CaptureFixture[str], +) -> None: + monkeypatch.setenv("GITHUB_BASE_REF", "develop") + _set_event_payload(monkeypatch, tmp_path, "feat: docs-only change") + _stub_changed_files(monkeypatch, ["docs/README.md", ".github/workflows/ci.yml"]) + + assert ctp.main() == 0 + assert "gate not applicable" in capsys.readouterr().out + + +def test_main_src_and_tests_changed_passes( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, + capsys: pytest.CaptureFixture[str], +) -> None: + monkeypatch.setenv("GITHUB_BASE_REF", "develop") + _set_event_payload(monkeypatch, tmp_path, "feat: new tool") + _stub_changed_files(monkeypatch, ["src/tools/new.py", "tests/test_new.py"]) + + assert ctp.main() == 0 + out = capsys.readouterr().out + assert "tests/ also touched" in out + + +@pytest.mark.parametrize("prefix_title", ["feat: x", "fix: y", "FIX: z"]) +def test_main_feat_or_fix_without_tests_blocks( + prefix_title: str, + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, + capsys: pytest.CaptureFixture[str], +) -> None: + monkeypatch.setenv("GITHUB_BASE_REF", "develop") + _set_event_payload(monkeypatch, tmp_path, prefix_title) + _stub_changed_files(monkeypatch, ["src/api/routes.py"]) + + assert ctp.main() == 1 + err = capsys.readouterr().out + assert "::error::" in err + assert "Testing Policy" in err + + +@pytest.mark.parametrize( + "prefix_title", ["chore: x", "docs: y", "refactor: z", "test: a", "release: b"] +) +def test_main_warn_only_prefixes_pass( + prefix_title: str, + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, + capsys: pytest.CaptureFixture[str], +) -> None: + monkeypatch.setenv("GITHUB_BASE_REF", "develop") + _set_event_payload(monkeypatch, tmp_path, prefix_title) + _stub_changed_files(monkeypatch, ["src/api/routes.py"]) + + assert ctp.main() == 0 + out = capsys.readouterr().out + assert "::warning::" in out + + +def test_main_unknown_prefix_warns( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, + capsys: pytest.CaptureFixture[str], +) -> None: + """No conventional prefix → warn-only (Lint PR title is the prefix gate).""" + monkeypatch.setenv("GITHUB_BASE_REF", "develop") + _set_event_payload(monkeypatch, tmp_path, "no prefix at all") + _stub_changed_files(monkeypatch, ["src/api/routes.py"]) + + assert ctp.main() == 0 + out = capsys.readouterr().out + assert "prefix not recognised" in out + + +def test_main_excludes_non_python_src_changes( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, + capsys: pytest.CaptureFixture[str], +) -> None: + """Non-Python files under src/ shouldn't trigger the gate.""" + monkeypatch.setenv("GITHUB_BASE_REF", "develop") + _set_event_payload(monkeypatch, tmp_path, "feat: copy a fixture") + _stub_changed_files(monkeypatch, ["src/api/README.md", "src/data/sample.csv"]) + + assert ctp.main() == 0 + assert "gate not applicable" in capsys.readouterr().out + + +def test_main_no_base_ref_skips( + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + monkeypatch.delenv("GITHUB_BASE_REF", raising=False) + monkeypatch.delenv("GITHUB_EVENT_PATH", raising=False) + assert ctp.main() == 0 + assert "skipping" in capsys.readouterr().out + + +def test_main_git_failure_exits_2( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, + capsys: pytest.CaptureFixture[str], +) -> None: + monkeypatch.setenv("GITHUB_BASE_REF", "develop") + _set_event_payload(monkeypatch, tmp_path, "feat: x") + + def boom(_base_ref: str) -> list[str]: + msg = "git fetch failed" + raise RuntimeError(msg) + + monkeypatch.setattr(ctp, "changed_files", boom) + assert ctp.main() == 2 + assert "::error::" in capsys.readouterr().out diff --git a/tests/test_check_version_bump.py b/tests/test_check_version_bump.py new file mode 100644 index 0000000..a1d8dc6 --- /dev/null +++ b/tests/test_check_version_bump.py @@ -0,0 +1,241 @@ +"""Tests for `.github/scripts/check_version_bump.py`. + +Covers the parsing helpers and end-to-end `main()` behaviour by stubbing +the `git show` subprocess call and the GitHub Actions event payload. +""" + +from __future__ import annotations + +import importlib.util +import json +import os +import sys +from pathlib import Path +from typing import TYPE_CHECKING, Any + +import pytest + +if TYPE_CHECKING: + from collections.abc import Iterator + +REPO_ROOT = Path(__file__).resolve().parent.parent +SCRIPT_PATH = REPO_ROOT / ".github" / "scripts" / "check_version_bump.py" + + +def _load_script() -> Any: + """Load the check_version_bump module by file path (it lives outside src/).""" + spec = importlib.util.spec_from_file_location("check_version_bump", SCRIPT_PATH) + if spec is None or spec.loader is None: + msg = f"Could not load script at {SCRIPT_PATH}" + raise RuntimeError(msg) + module = importlib.util.module_from_spec(spec) + sys.modules[spec.name] = module + spec.loader.exec_module(module) + return module + + +cvb = _load_script() + + +# ---------- pyproject_version ---------- + + +def test_pyproject_version_extracts_string() -> None: + text = '[project]\nname = "harness-python-react"\nversion = "1.6.5"\n' + assert cvb.pyproject_version(text) == "1.6.5" + + +def test_pyproject_version_missing_raises() -> None: + with pytest.raises(ValueError, match="version not found"): + cvb.pyproject_version('[project]\nname = "harness-python-react"\n') + + +def test_pyproject_version_empty_raises() -> None: + with pytest.raises(ValueError, match="version not found"): + cvb.pyproject_version('[project]\nversion = ""\n') + + +# ---------- uv_lock_self_version ---------- + + +def test_uv_lock_self_version_extracts() -> None: + lock = ( + '[[package]]\nname = "annotated-doc"\nversion = "0.0.4"\n\n' + '[[package]]\nname = "harness-python-react"\nversion = "1.6.5"\n' + 'source = { editable = "." }\n' + ) + assert cvb.uv_lock_self_version(lock) == "1.6.5" + + +def test_uv_lock_self_version_missing_raises() -> None: + with pytest.raises(ValueError, match="harness-python-react"): + cvb.uv_lock_self_version('[[package]]\nname = "fastapi"\nversion = "0.100"\n') + + +# ---------- is_release_pr ---------- + + +@pytest.mark.parametrize( + "title,expected", + [ + ("release: v1.6.5 — harness rollout", True), + ("Release: v1.6.5", True), # leading-cap accepted + ("RELEASE: yes", True), + (" release: leading-spaces", True), + ("feat: add a thing", False), + ("fix: something", False), + ("chore: release notes update", False), # 'release' substring, wrong prefix + ("", False), + (None, False), + ], +) +def test_is_release_pr(title: str | None, expected: bool) -> None: + assert cvb.is_release_pr(title) is expected + + +# ---------- main() end-to-end ---------- + + +@pytest.fixture +def fake_repo(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Iterator[Path]: + """Set up a fake repo root with pyproject.toml + uv.lock and chdir into it.""" + (tmp_path / "pyproject.toml").write_text( + '[project]\nname = "harness-python-react"\nversion = "1.6.6"\n', + encoding="utf-8", + ) + (tmp_path / "uv.lock").write_text( + '[[package]]\nname = "harness-python-react"\nversion = "1.6.6"\n' + 'source = { editable = "." }\n', + encoding="utf-8", + ) + monkeypatch.chdir(tmp_path) + yield tmp_path + + +def _set_event_payload( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path, title: str +) -> None: + event_file = tmp_path / "event.json" + event_file.write_text( + json.dumps({"pull_request": {"title": title}}), encoding="utf-8" + ) + monkeypatch.setenv("GITHUB_EVENT_PATH", str(event_file)) + + +def _stub_git_show(monkeypatch: pytest.MonkeyPatch, base_pyproject: str) -> None: + def fake(path: Path, base_ref: str) -> str: + assert str(path) == "pyproject.toml" + assert base_ref == "develop" + return base_pyproject + + monkeypatch.setattr(cvb, "git_show_at_base", fake) + + +def test_main_bumped_pass( + fake_repo: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] +) -> None: + monkeypatch.setenv("GITHUB_BASE_REF", "develop") + _set_event_payload(monkeypatch, fake_repo, "feat: add thing") + _stub_git_show( + monkeypatch, '[project]\nname = "harness-python-react"\nversion = "1.6.5"\n' + ) + + assert cvb.main() == 0 + out = capsys.readouterr().out + assert "Version bumped: 1.6.5 -> 1.6.6" in out + + +def test_main_unchanged_fails( + fake_repo: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] +) -> None: + monkeypatch.setenv("GITHUB_BASE_REF", "develop") + _set_event_payload(monkeypatch, fake_repo, "feat: forgot to bump") + _stub_git_show( + monkeypatch, '[project]\nname = "harness-python-react"\nversion = "1.6.6"\n' + ) + + assert cvb.main() == 1 + err_out = capsys.readouterr().out + assert "::error::" in err_out + assert "unchanged from develop" in err_out + + +def test_main_release_prefix_exempt( + fake_repo: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] +) -> None: + monkeypatch.setenv("GITHUB_BASE_REF", "main") + _set_event_payload(monkeypatch, fake_repo, "release: v1.6.6 — harness") + # Even though pyproject == base, release: PRs short-circuit before we + # inspect git history. + _stub_git_show( + monkeypatch, '[project]\nname = "harness-python-react"\nversion = "1.6.6"\n' + ) + + assert cvb.main() == 0 + assert "release: PR" in capsys.readouterr().out + + +def test_main_uv_lock_mismatch_fails( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + (tmp_path / "pyproject.toml").write_text( + '[project]\nname = "harness-python-react"\nversion = "1.6.6"\n', + encoding="utf-8", + ) + (tmp_path / "uv.lock").write_text( + '[[package]]\nname = "harness-python-react"\nversion = "1.6.5"\n' # stale! + 'source = { editable = "." }\n', + encoding="utf-8", + ) + monkeypatch.chdir(tmp_path) + monkeypatch.setenv("GITHUB_BASE_REF", "develop") + _set_event_payload(monkeypatch, tmp_path, "feat: stale lock") + _stub_git_show( + monkeypatch, '[project]\nname = "harness-python-react"\nversion = "1.6.5"\n' + ) + + assert cvb.main() == 1 + err = capsys.readouterr().out + assert "self-version disagree" in err + + +def test_main_no_base_ref_skips( + fake_repo: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] +) -> None: + # Outside a PR run there is no GITHUB_BASE_REF; the gate degrades green. + monkeypatch.delenv("GITHUB_BASE_REF", raising=False) + monkeypatch.delenv("GITHUB_EVENT_PATH", raising=False) + assert cvb.main() == 0 + assert "skipping" in capsys.readouterr().out + + +def test_main_parse_error_exits_2( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + (tmp_path / "pyproject.toml").write_text( + '[project]\nname = "harness-python-react"\n# version missing\n', + encoding="utf-8", + ) + (tmp_path / "uv.lock").write_text( + '[[package]]\nname = "harness-python-react"\nversion = "1.6.6"\n', + encoding="utf-8", + ) + monkeypatch.chdir(tmp_path) + monkeypatch.setenv("GITHUB_BASE_REF", "develop") + _set_event_payload(monkeypatch, tmp_path, "feat: x") + _stub_git_show( + monkeypatch, '[project]\nname = "harness-python-react"\nversion = "1.6.5"\n' + ) + + assert cvb.main() == 2 + assert "::error::" in capsys.readouterr().out + + +# Ensure the loader didn't accidentally leave os.environ polluted. +def test_environ_unaffected_by_loader() -> None: + # Sanity: nothing the script does at import time touches os.environ. + assert "CHECK_VERSION_BUMP_LOADED" not in os.environ diff --git a/uv.lock b/uv.lock index b6c0c1e..9ca9a26 100644 --- a/uv.lock +++ b/uv.lock @@ -328,7 +328,7 @@ wheels = [ [[package]] name = "harness-python-react" -version = "0.1.0" +version = "0.2.0" source = { virtual = "." } dependencies = [ { name = "fastapi" }, From 843c2537fa2f29399909e8db5eedfebe6fa94995 Mon Sep 17 00:00:00 2001 From: Constantinos <41453723+constk@users.noreply.github.com> Date: Thu, 30 Apr 2026 20:24:41 +1000 Subject: [PATCH 4/9] chore: src/ README audit gate + 5 package READMEs (#126, #152) (#71) --- .github/branch-protection/develop.json | 1 + .github/branch-protection/main.json | 1 + .github/scripts/check_src_readmes.py | 142 +++++++++++++++++++++++++ .github/workflows/ci.yml | 13 +++ pyproject.toml | 2 +- src/api/README.md | 18 ++++ src/eval/README.md | 22 ++++ src/models/README.md | 17 +++ src/observability/README.md | 20 ++++ src/tools/README.md | 19 ++++ uv.lock | 2 +- 11 files changed, 255 insertions(+), 2 deletions(-) create mode 100644 .github/scripts/check_src_readmes.py create mode 100644 src/api/README.md create mode 100644 src/eval/README.md create mode 100644 src/models/README.md create mode 100644 src/observability/README.md create mode 100644 src/tools/README.md diff --git a/.github/branch-protection/develop.json b/.github/branch-protection/develop.json index 6187955..9543e69 100644 --- a/.github/branch-protection/develop.json +++ b/.github/branch-protection/develop.json @@ -12,6 +12,7 @@ "Version bump check", "Action pinning audit", "Tests required", + "src/ README audit", "Frontend Build", "Frontend Quality", "Branch-protection contexts sync", diff --git a/.github/branch-protection/main.json b/.github/branch-protection/main.json index fecf6fd..d7bafaa 100644 --- a/.github/branch-protection/main.json +++ b/.github/branch-protection/main.json @@ -12,6 +12,7 @@ "Version bump check", "Action pinning audit", "Tests required", + "src/ README audit", "Frontend Build", "Frontend Quality", "Branch-protection contexts sync", diff --git a/.github/scripts/check_src_readmes.py b/.github/scripts/check_src_readmes.py new file mode 100644 index 0000000..9394271 --- /dev/null +++ b/.github/scripts/check_src_readmes.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python3 +"""Enforce the per-package README rule from `CLAUDE.md`. + +`CLAUDE.md` *Code standards*: *"Each `src/` directory has a README +explaining its purpose and key interfaces."* As `src/` grows, READMEs +go missing silently — and even when one exists, it can degrade into +unstructured prose that no contributor reads. This script audits both +shape and substance. + +Behaviour: + +- Walks every subdirectory under `src/` (recursive, skipping + `__pycache__`). +- A subdirectory must have a `README.md` when it contains at least one + `.py` file other than `__init__.py`. Empty directories and + `__init__.py`-only namespace packages are exempt — they have no + surface to document. +- The README must be non-trivial: at least `MIN_BYTES = 200` bytes + after stripping leading/trailing whitespace. Catches the empty-stub + failure mode (#126). +- The README must contain a `## Key interfaces` heading (or its + documented synonym `## Public surface`). #152 promoted the gate + from presence + size to presence + structure: a 200-byte unrelated + paragraph passed pre-#152, but doesn't actually document the + package's public surface. The Purpose statement is encoded + positionally as the H1 heading + immediately-following paragraph + rather than as an explicit `## Purpose` heading — that's the + existing convention across all seven packages and the natural + markdown shape; a separate `## Purpose` heading would duplicate + what the H1 already establishes. + +There is **no exemption mechanism**, mirroring `check_file_length.py` +(see `feedback_no_noqa`). If a future package legitimately needs a +shorter README (e.g. a single-file helper with a self-explanatory +filename), restructure rather than carve out an allowlist entry. + +Exit codes: + 0 — every package with code has a non-trivial, structured README + 1 — at least one package is missing a README, has a stub one, or + lacks the required heading + 2 — `src/` does not exist (run from the wrong directory?) + +Usage (from repo root): + + python .github/scripts/check_src_readmes.py +""" + +from __future__ import annotations + +import re +import sys +from pathlib import Path + +SRC_ROOT = Path("src") +MIN_BYTES = 200 + +# Required heading shapes. Each tuple is "any-of" — a README satisfies the +# rule if it contains at least one matching heading. Match is anchored to +# line start, case-insensitive, allows `#` levels 2-4 so a deeply nested +# subsection still counts. +KEY_INTERFACES_HEADING = re.compile( + r"^#{2,4}\s+(Key interfaces|Public surface)\b", + re.IGNORECASE | re.MULTILINE, +) + + +def _normalised(path: Path) -> str: + return path.as_posix() + + +def _has_documentable_code(directory: Path) -> bool: + """True when the directory has at least one `.py` file beyond `__init__.py`.""" + for entry in directory.iterdir(): + if entry.is_file() and entry.suffix == ".py" and entry.name != "__init__.py": + return True + return False + + +def _readme_failure(directory: Path) -> str | None: + """Return an error message if the directory's README is missing or stub-sized.""" + readme = directory / "README.md" + if not readme.is_file(): + return ( + f"::error file={_normalised(directory)}::missing README.md. " + "`CLAUDE.md` requires every `src/` package to document its " + "purpose and key interfaces." + ) + body = readme.read_text(encoding="utf-8").strip() + if len(body.encode("utf-8")) < MIN_BYTES: + return ( + f"::error file={_normalised(readme)}::README.md is shorter than " + f"{MIN_BYTES} bytes after stripping whitespace. Add purpose + " + "key-interfaces text — a single heading does not satisfy the rule." + ) + if not KEY_INTERFACES_HEADING.search(body): + return ( + f"::error file={_normalised(readme)}::README.md missing a " + "`## Key interfaces` heading (or the synonym `## Public surface`). " + "Per CLAUDE.md the README must document the package's public " + "surface; the heading anchors that section so contributors can " + "find it. Add the heading and list the package's exported names." + ) + return None + + +def main() -> int: + if not SRC_ROOT.is_dir(): + print(f"::error::{SRC_ROOT.as_posix()} not found; run from repo root") + return 2 + + audited: list[Path] = [] + failures: list[str] = [] + + for directory in sorted(p for p in SRC_ROOT.rglob("*") if p.is_dir()): + if directory.name == "__pycache__": + continue + if not _has_documentable_code(directory): + continue + audited.append(directory) + message = _readme_failure(directory) + if message is not None: + failures.append(message) + + if failures: + for line in failures: + print(line) + print( + f"\n{len(failures)} package(s) failed the README audit. " + "Fix in this PR — there is no exemption mechanism, see the " + "module docstring." + ) + return 1 + + print( + f"src/ README audit OK — {len(audited)} package(s) documented " + f"(min {MIN_BYTES} bytes, `## Key interfaces` heading required)." + ) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fcda20a..2f0b2cb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -151,6 +151,19 @@ jobs: python-version: "3.14" - run: python .github/scripts/check_tests_present.py + src-readmes: + name: src/ README audit + runs-on: ubuntu-latest + # CLAUDE.md: every `src/` package documents its purpose + key + # interfaces. The audit checks shape (presence + min 200 bytes) and + # structure (`## Key interfaces` heading). No exemption mechanism. + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + with: + python-version: "3.14" + - run: python .github/scripts/check_src_readmes.py + frontend-build: name: Frontend Build runs-on: ubuntu-latest diff --git a/pyproject.toml b/pyproject.toml index 27e014a..2e06350 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "harness-python-react" -version = "0.2.0" +version = "0.2.1" description = "Production-quality LLM-driven coding harness — Python (FastAPI) backend, Vite + React + TypeScript frontend." readme = "README.md" requires-python = ">=3.14" diff --git a/src/api/README.md b/src/api/README.md new file mode 100644 index 0000000..9bda5e9 --- /dev/null +++ b/src/api/README.md @@ -0,0 +1,18 @@ +# `src/api/` + +FastAPI HTTP surface for the harness scaffold. Owns request handling, route registration, lifespan hooks, and the in-memory session store. Imports from `src/observability/` (instrumentation), `src/models/` (Pydantic contracts) — never the other way around (enforced by `import-linter`). + +## Key interfaces + +- **`main.app`** — the `FastAPI()` instance with CORS middleware, OTel auto-instrumentation, and the v1 router mounted. Exposed for `uvicorn src.api.main:app` and the FastAPI `TestClient` fixtures in `tests/conftest.py`. +- **`main.lifespan(app)`** — async context manager that runs `setup_tracing` → `setup_logging` → `instrument_httpx` → `instrument_fastapi` once on startup, then constructs the `SessionStore`. Single point of "what is process-globally configured?" +- **`routes.router`** — `APIRouter(prefix="/api/v1")`. Every route a real client should hit lives here; un-versioned routes live on `app` directly (only the FastAPI auto-generated docs at `/docs` and `/redoc` qualify). +- **`routes.health`** — `GET /api/v1/health` → `HealthResponse` (status + `importlib.metadata.version`-derived build version). +- **`routes.echo`** — `GET /api/v1/echo?msg=...` → `EchoResponse`. Demonstrates the request/response contract pattern that real domain endpoints follow. +- **`sessions.SessionStore`** — single-process in-memory dict mapping session id → conversation history. Plumbed onto `app.state.session_store` in lifespan; replace with a Redis or DB-backed implementation for multi-process deployments. + +## Conventions + +- Every route returns a `StrictModel` subclass — never a raw dict. The `extra="forbid"` config makes typos and renamed fields fail at construction, not three calls deep. +- Routes are versioned under `/api/v1/`. Adding a v2 endpoint = a new router, never a breaking change to v1. +- The CORS policy is wide-open in the scaffold so the Vite dev server proxy works on first run; tighten via config when a real auth layer arrives. diff --git a/src/eval/README.md b/src/eval/README.md new file mode 100644 index 0000000..088a8db --- /dev/null +++ b/src/eval/README.md @@ -0,0 +1,22 @@ +# `src/eval/` + +Provider-agnostic evaluation harness — golden QA cases driven by a caller-supplied `answer_fn`, three tolerance modes (exact / numeric / semantic), and a `Protocol`-based LLM-judge seam so no SDK leaks into this layer. Designed to be runnable on a fresh clone without LLM credentials (the example case uses `exact_match`); the nightly workflow opt-in is documented in `docs/EVAL_HARNESS.md`. + +## Key interfaces + +- **`models.EvalCase`** — Pydantic shape of an entry in `eval/golden_qa.json`: `id`, `question`, `expected_answer`, `tolerance`, plus optional `category` / `difficulty` / `expected_tools` / `notes`. +- **`models.EvalResult`** — per-case outcome: actual answer, latency, pass/fail, tolerance score, failure reason. +- **`runner.load_golden_dataset(path=None)`** — pure JSON loader. Standalone so call sites can introspect the dataset without paying for an `EvalRunner`. +- **`runner.EvalRunner(answer_fn, judge_client=None, judge_model="")`** — the orchestrator. `answer_fn: Callable[[str], str]` is the single seam — wire your agent loop, direct LLM client, or stub. `judge_client` implements the `judge.LLMClient` Protocol and is only consulted for `semantic_similar` cases. + - **`evaluate(case)`** — run one case; returns `EvalResult`. + - **`evaluate_all()`** — load the golden dataset, evaluate every case, return the list. +- **`judge.LLMClient`** — `Protocol` with one method: `complete_json(*, model, prompt) -> str`. Concrete adapters live in your agent code; the harness never imports OpenAI/Anthropic/Azure SDKs. +- **`judge.evaluate_semantic_similarity(question, expected, actual, client, model)`** — calls the judge, returns `(score, explanation)`. Returns `(None, "no LLM client configured")` when `client is None` (inconclusive — runner treats it as pass with an explanatory note rather than a hard fail). +- **`report.generate_report(results)`** — markdown summary: overall accuracy, per-category, per-difficulty, failure analysis with reason text. +- **`__main__`** — `python -m src.eval` runs the dataset with an identity `answer_fn` (echoes the question) and prints the markdown report. + +## Conventions + +- **`coverage` `omit`s this package** — the eval suite is exercised by `eval/test_golden_qa.py`, not by `tests/`. Counting it would inflate misses on every PR that touches behaviour without re-running the eval workflow. +- **No domain coupling.** The harness ships one trivial echo case; real users replace `eval/golden_qa.json` with their domain dataset and wire `answer_fn` to their agent. +- **Tolerance picks happen per case**, not per runner. A dataset can mix `exact_match`, `numeric_close` (within 1%), and `semantic_similar` (LLM judge ≥ 0.8) cases freely. diff --git a/src/models/README.md b/src/models/README.md new file mode 100644 index 0000000..321e68d --- /dev/null +++ b/src/models/README.md @@ -0,0 +1,17 @@ +# `src/models/` + +Pydantic contracts that cross module or process seams. Import-linter enforces that nothing outside this package matters here — `src.models` depends on **nothing in `src/`** (forbidden contract in `pyproject.toml`). The point: schema bugs surface at construction with clear `ValidationError`s, not as `AttributeError`s deep in the request path. + +## Key interfaces + +- **`_base.StrictModel`** — `BaseModel` subclass with `model_config = ConfigDict(extra="forbid")`. Inherit this for any contract that crosses a boundary; opt into stricter type coercion with `class Foo(StrictModel, strict=True)` when JSON coercion (UUID, int) is undesirable. +- **`health.HealthResponse`** — `status: Literal["ok"]` + `version: str`. Returned by `GET /api/v1/health`. +- **`session.SessionCreate`** / **`session.SessionInfo`** — the create-empty / public-info shapes used by `src.api.sessions.SessionStore`. +- **`config.Settings`** — `pydantic_settings.BaseSettings` reading the four `LLM_*` env vars (`LLM_PROVIDER`, `LLM_API_KEY`, `LLM_BASE_URL`, `LLM_MODEL`). Provider-pluggable seam — wire OpenAI, Anthropic, Azure, or vLLM by swapping the values without touching this code. +- **`config.get_settings()`** — fresh `Settings` constructor (deliberately unmemoised so tests can re-construct after `monkeypatch.setenv`). Real callers should hold a single instance at startup. + +## Conventions + +- One module per logical contract group — `health`, `session`, `config`. Add a new file rather than appending to an existing one. +- Per-class `strict=True` is the call site for any model that should reject `"3.14"` for a float; HTTP-boundary models often skip it because JSON requires the coercion. +- `pyproject.toml` `[tool.ruff.lint.per-file-ignores]` adds `"src/models/**" = ["TCH003"]` because Pydantic needs runtime imports for type annotations. diff --git a/src/observability/README.md b/src/observability/README.md new file mode 100644 index 0000000..d621674 --- /dev/null +++ b/src/observability/README.md @@ -0,0 +1,20 @@ +# `src/observability/` + +OpenTelemetry SDK setup, OTLP exporter wiring, structured-JSON logging with trace correlation, and span helpers built around the OTel GenAI semantic conventions. Wired once from `src.api.main.lifespan` so the rest of the codebase calls into a configured tracer / logger without orchestrating startup. + +## Key interfaces + +- **`tracing.setup_tracing()`** — creates the global `TracerProvider`, attaches a `BatchSpanProcessor` with the OTLP gRPC exporter (or `ConsoleSpanExporter` when `OTEL_EXPORTER=console`). Reads `OTEL_SERVICE_NAME` (default `harness-python-react`) and `OTEL_EXPORTER_OTLP_ENDPOINT` (default `http://localhost:4317`). +- **`tracing.instrument_fastapi(app)`** + **`tracing.instrument_httpx()`** — auto-instrumentation hooks. Each FastAPI request and outbound httpx call gets a span with the standard semantic-convention attributes. +- **`tracing.get_tracer(name)`** — retrieve a tracer by module name when manual instrumentation is needed. +- **`logging.setup_logging(level=None)`** — configures stdlib `logging` to emit single-line JSON via `_JSONFormatter`. Reads `LOG_LEVEL` env when no argument is passed. Loops in `LoggingInstrumentor` so the `otelTraceID` / `otelSpanID` fields land on every record. +- **`logging._JSONFormatter`** — the formatter; tested directly in `tests/test_observability.py` for the trace-correlation contract. +- **`spans.agent_span(name, attributes=None)`** — context manager that opens a span, sets initial attributes from a `Mapping`, yields the live span for further mutation. Use this rather than calling `tracer.start_as_current_span` directly so the attribute-shape stays consistent. +- **`spans.set_span_attributes(span, **kwargs)`** — set multiple attributes; `None` values are silently skipped. +- **`spans.GENAI_*` / `DB_*` constants** — exported attribute-key constants for the OTel GenAI + database semantic conventions. Use these instead of raw strings so a typo is a `NameError`, not a silently-different attribute. + +## Conventions + +- **Semconv keys only.** The constants at the top of `spans.py` are the single source of attribute names. Adding a custom `agent.foo` attribute is a hard no — extend the semconv list (or wait for upstream OTel to bless one). +- **Provider-agnostic exporter.** `OTEL_EXPORTER_OTLP_ENDPOINT` is the standard variable; `docker-compose.yml` sets it to `http://jaeger:4317` in the compose network. +- **Lifespan-only setup.** Don't call `setup_tracing()` from anywhere except `src.api.main.lifespan` — duplicate calls trigger OTel's "Overriding of current TracerProvider" warning, and tests rely on the test-fixture variant that attaches an in-memory exporter to whatever provider exists. diff --git a/src/tools/README.md b/src/tools/README.md new file mode 100644 index 0000000..ad54658 --- /dev/null +++ b/src/tools/README.md @@ -0,0 +1,19 @@ +# `src/tools/` + +Generic tool registry — the dispatcher pattern an LLM-driven agent uses to call typed tools. Each tool is a function `StrictModel -> StrictModel`; the registry maps a name to `(input_schema, callable)` and validates the dict-shaped input against the schema before dispatch. Layer-wise sits below `agent` / `api` / `eval`, above `data` / `models` (enforced by `import-linter`). + +## Key interfaces + +- **`registry.Registry`** — instance class. Construct one per agent/test fixture; the module also exposes a global `registry` singleton that the example `echo_tool` self-registers into at import time. + - **`register(name, input_schema)`** — decorator factory. Wraps a function and inserts the `(input_schema, fn)` pair under `name`. Raises `ValueError` on duplicate registration. + - **`dispatch(name, raw_input)`** — `name`-lookup → `input_schema.model_validate(raw_input)` → call. Pydantic `ValidationError` propagates on bad input (wrong type or unknown keys via `StrictModel.extra="forbid"`). + - **`names()`** — sorted list of registered tool names; cheap introspection for the agent's tool-listing prompt. +- **`registry.UnknownToolError`** — `KeyError` subclass raised by `dispatch` when *name* isn't registered. Caught and rendered as a tool-call-error event by the agent loop. +- **`registry.registry`** — the module-global `Registry` instance. Real agents use this so the `echo_tool` is reachable from any consumer; tests construct private `Registry()` instances to avoid cross-test contamination. +- **`registry.echo_tool`** + **`EchoToolInput`** + **`EchoToolOutput`** — example tool demonstrating the layer. Ships pre-registered under name `"echo"` to give a working dispatch path on a fresh clone. + +## Conventions + +- **Inputs and outputs are `StrictModel`s.** Avoid raw `dict`s — the `extra="forbid"` posture is the entire reason this pattern beats hand-rolled `if/elif` dispatch over JSON. +- **Tools are pure functions** of their input. State the tool needs (DB connection, HTTP client) is injected via partial / closure at registration time, not via module globals. +- **Add a tool = add a module.** Real tools beyond the example get their own file under `src/tools/` and self-register the same way `echo_tool` does. Keep `registry.py` itself focused on the dispatcher. diff --git a/uv.lock b/uv.lock index 9ca9a26..df07bc8 100644 --- a/uv.lock +++ b/uv.lock @@ -328,7 +328,7 @@ wheels = [ [[package]] name = "harness-python-react" -version = "0.2.0" +version = "0.2.1" source = { virtual = "." } dependencies = [ { name = "fastapi" }, From 461cab6277997717e26a6c2c66bf2e78bbd135bb Mon Sep 17 00:00:00 2001 From: Constantinos <41453723+constk@users.noreply.github.com> Date: Thu, 30 Apr 2026 22:19:29 +1000 Subject: [PATCH 5/9] chore: aspirational-ticket gate + INVARIANTS marker convention (#133, #153) (#72) --- .github/branch-protection/develop.json | 1 + .github/branch-protection/main.json | 1 + .github/scripts/check_aspirational_tickets.py | 183 ++++++++++++++++++ .github/workflows/ci.yml | 15 ++ docs/INVARIANTS.md | 14 ++ pyproject.toml | 2 +- uv.lock | 2 +- 7 files changed, 216 insertions(+), 2 deletions(-) create mode 100644 .github/scripts/check_aspirational_tickets.py diff --git a/.github/branch-protection/develop.json b/.github/branch-protection/develop.json index 9543e69..54cbbe7 100644 --- a/.github/branch-protection/develop.json +++ b/.github/branch-protection/develop.json @@ -13,6 +13,7 @@ "Action pinning audit", "Tests required", "src/ README audit", + "Aspirational ticket cite", "Frontend Build", "Frontend Quality", "Branch-protection contexts sync", diff --git a/.github/branch-protection/main.json b/.github/branch-protection/main.json index d7bafaa..455b74a 100644 --- a/.github/branch-protection/main.json +++ b/.github/branch-protection/main.json @@ -13,6 +13,7 @@ "Action pinning audit", "Tests required", "src/ README audit", + "Aspirational ticket cite", "Frontend Build", "Frontend Quality", "Branch-protection contexts sync", diff --git a/.github/scripts/check_aspirational_tickets.py b/.github/scripts/check_aspirational_tickets.py new file mode 100644 index 0000000..ded7013 --- /dev/null +++ b/.github/scripts/check_aspirational_tickets.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python3 +"""Enforce the *aspirational invariants are ticketed* rule from `docs/HARNESS.md`. + +`docs/HARNESS.md`: *"Do not merge an 'aspirational' invariant without a +follow-up issue."* `docs/INVARIANTS.md` documents this with the +`*Aspirational:*` marker line per invariant. Until #133 this rule was +enforced by reviewer memory only. + +Behaviour: + +- Walks `docs/INVARIANTS.md`, reading line by line. +- A line is treated as an *aspirational marker* when it starts with + `*Aspirational` (one leading asterisk) or `**Aspirational**` (two). + Mid-sentence prose like "items marked *aspirational* are…" is + ignored — the marker shape is documented in + `docs/INVARIANTS.md`. +- For each marker line, requires at least one `#NNN` reference on the + same line; fails when none is present. +- When `GITHUB_TOKEN` and `GITHUB_REPOSITORY` are set, fetches each + cited issue via the REST API and emits a `::warning::` (not + failure) when the ticket is closed. API failures (network, rate + limit, 404) downgrade to a warning — the gate's job is to catch + drift in the doc, not to be a transient-CI tripwire. +- When `ASPIRATIONAL_STRICT=1` is set, a closed-ticket cite is + promoted from `::warning::` to a hard failure (#153). Default off. + This is the toggle to flip when the project decides "an aspirational + invariant whose cite is closed must be promoted to enforced or + refiled in this PR" — useful as the harness matures and accumulated + closed-cite drift would otherwise sit unread. API failures still + downgrade to a warning even under strict mode (the gate is still + not a transient-CI tripwire — only documented closed state hits). + +There is **no exemption mechanism** (see `feedback_no_noqa`). If an +entry should not be flagged, it is not aspirational — reword it with +a different marker (e.g. `**Production note:**` for future product +evolution that's not a yet-to-be-enforced rule). + +Exit codes: + 0 — every aspirational marker cites at least one open / unverified ticket + 1 — at least one marker has no ticket cite, or (strict mode) has a + closed cite + 2 — script-level error (`docs/INVARIANTS.md` not found) + +Usage (from repo root): + + python .github/scripts/check_aspirational_tickets.py +""" + +from __future__ import annotations + +import json +import os +import re +import sys +import urllib.error +import urllib.request +from pathlib import Path + +INVARIANTS_DOC = Path("docs/INVARIANTS.md") + +# A marker line *starts* with one or two asterisks immediately followed by +# `Aspirational` and a word boundary. Avoids picking up mid-sentence prose +# such as `Items marked *aspirational* are rules…` (lower-case + non-anchored). +_MARKER_RE = re.compile(r"^\*{1,2}Aspirational\b") +_TICKET_RE = re.compile(r"#(\d+)") + + +def _find_markers(text: str) -> list[tuple[int, str]]: + """Return (1-indexed line number, line text) for each aspirational marker.""" + found: list[tuple[int, str]] = [] + for index, line in enumerate(text.splitlines(), start=1): + if _MARKER_RE.match(line.lstrip()): + found.append((index, line.strip())) + return found + + +def _issue_state(repo: str, number: str, token: str) -> str | None: + """Return the issue state ("open" / "closed") or None on any API failure.""" + url = f"https://api.github.com/repos/{repo}/issues/{number}" + req = urllib.request.Request( # noqa: S310 — fixed api.github.com host + url, + headers={ + "Authorization": f"Bearer {token}", + "Accept": "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28", + }, + ) + try: + with urllib.request.urlopen(req, timeout=5) as response: # noqa: S310 + payload = json.loads(response.read().decode("utf-8")) + except urllib.error.URLError, TimeoutError, json.JSONDecodeError: + return None + state = payload.get("state") + return state if isinstance(state, str) else None + + +def _check_closed_cites( + line_number: int, + tickets: list[str], + repo: str, + token: str, + *, + strict: bool, +) -> list[str]: + """Return failure messages (strict only); print warnings otherwise.""" + extra_failures: list[str] = [] + for ticket in tickets: + state = _issue_state(repo, ticket, token) + if state != "closed": + continue + severity = "error" if strict else "warning" + message = ( + f"::{severity} file={INVARIANTS_DOC.as_posix()},line={line_number}::" + f"cited ticket #{ticket} is closed. Promote the invariant to " + "enforced (and remove the marker), or refile with a fresh ticket." + ) + if strict: + extra_failures.append(message) + else: + print(message) + return extra_failures + + +def main() -> int: + if not INVARIANTS_DOC.is_file(): + print(f"::error::{INVARIANTS_DOC.as_posix()} not found; run from repo root") + return 2 + + text = INVARIANTS_DOC.read_text(encoding="utf-8") + markers = _find_markers(text) + + if not markers: + print("No aspirational markers found in docs/INVARIANTS.md.") + return 0 + + failures: list[str] = [] + repo = os.environ.get("GITHUB_REPOSITORY", "") + token = os.environ.get("GITHUB_TOKEN", "") + can_check_state = bool(repo and token) + strict_closed = os.environ.get("ASPIRATIONAL_STRICT", "") == "1" + + for line_number, line in markers: + tickets = _TICKET_RE.findall(line) + if not tickets: + failures.append( + f"::error file={INVARIANTS_DOC.as_posix()},line={line_number}::" + "aspirational marker has no `#NNN` ticket reference. Add one or " + "reword (see the doc's *How to add an invariant* footer)." + ) + continue + print( + f"{INVARIANTS_DOC.as_posix()}:{line_number} — " + f"aspirational, cites: {', '.join('#' + t for t in tickets)}" + ) + if can_check_state: + failures.extend( + _check_closed_cites( + line_number, tickets, repo, token, strict=strict_closed + ) + ) + + if failures: + for line in failures: + print(line) + print( + f"\n{len(failures)} aspirational marker(s) missing a ticket cite. " + "Fix in this PR — there is no exemption mechanism, see the " + "module docstring." + ) + return 1 + + suffix_parts: list[str] = [] + if can_check_state: + suffix_parts.append("with state lookup") + if strict_closed: + suffix_parts.append("strict closed-cite mode") + suffix = f" ({', '.join(suffix_parts)})" if suffix_parts else "" + print(f"Aspirational-ticket audit OK — {len(markers)} marker(s) checked{suffix}.") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2f0b2cb..0c9bf08 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -151,6 +151,21 @@ jobs: python-version: "3.14" - run: python .github/scripts/check_tests_present.py + aspirational-tickets: + name: Aspirational ticket cite + runs-on: ubuntu-latest + # docs/INVARIANTS.md: every `*Aspirational` / `**Aspirational**` marker + # line cites a `#NNN` ticket; closed cites warn (or fail under + # ASPIRATIONAL_STRICT=1). GITHUB_TOKEN enables ticket-state lookup. + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + with: + python-version: "3.14" + - env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: python .github/scripts/check_aspirational_tickets.py + src-readmes: name: src/ README audit runs-on: ubuntu-latest diff --git a/docs/INVARIANTS.md b/docs/INVARIANTS.md index 093305b..74c3010 100644 --- a/docs/INVARIANTS.md +++ b/docs/INVARIANTS.md @@ -48,3 +48,17 @@ Add invariants below as your domain stabilises. Each entry should describe: - *Enforced by:* test, review, or specific CI job. Examples of the kind of invariant that earns a slot here: a domain-specific data contract that must validate at ingestion, a security boundary that must not log PII, a tool-call protocol that the agent must follow before the LLM emits a final response. + +--- + +## How to add an invariant + +Add a new `## N. ` section at the bottom of slots 6+. Each entry has three lines: + +- The rule, one sentence. +- **Where:** module / config file path. +- **Enforced by:** test name, CI job, or `review` if no automated check exists yet. + +If the invariant is not yet automated, add a marker line whose first non-whitespace characters are `*Aspirational` or `**Aspirational**` — both shapes are recognised by the `Aspirational ticket cite` CI job (`.github/scripts/check_aspirational_tickets.py`). The marker line MUST cite at least one `#NNN` ticket; the gate fails CI otherwise. When the cited ticket closes, promote the invariant to enforced in the same PR (delete the marker line, fill in `Enforced by:`). Set `ASPIRATIONAL_STRICT=1` on the gate's CI job to escalate closed-cite drift from `::warning::` to a hard failure. + +Use `**Production note:**` (not `**Aspirational**`) for forward-looking product evolution that is NOT a future-enforced rule — e.g. "this changes when multi-tenant lands". Production notes are not picked up by the gate. diff --git a/pyproject.toml b/pyproject.toml index 2e06350..1f84568 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "harness-python-react" -version = "0.2.1" +version = "0.2.2" description = "Production-quality LLM-driven coding harness — Python (FastAPI) backend, Vite + React + TypeScript frontend." readme = "README.md" requires-python = ">=3.14" diff --git a/uv.lock b/uv.lock index df07bc8..e4cbcb4 100644 --- a/uv.lock +++ b/uv.lock @@ -328,7 +328,7 @@ wheels = [ [[package]] name = "harness-python-react" -version = "0.2.1" +version = "0.2.2" source = { virtual = "." } dependencies = [ { name = "fastapi" }, From 6375817ac51aaf31477b541e5246a150a9cfdb2b Mon Sep 17 00:00:00 2001 From: Constantinos <41453723+constk@users.noreply.github.com> Date: Fri, 1 May 2026 00:24:39 +1000 Subject: [PATCH 6/9] =?UTF-8?q?chore:=20subject-case=20sync=20extension=20?= =?UTF-8?q?=E2=80=94=20commitizen=20+=20pr-title=20(#128,=20#154)=20(#73)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/scripts/check_commit_types.py | 202 ++++++++++++++---- pyproject.toml | 4 +- tests/test_check_commit_types_subject.py | 250 +++++++++++++++++++++++ uv.lock | 2 +- 4 files changed, 410 insertions(+), 48 deletions(-) create mode 100644 tests/test_check_commit_types_subject.py diff --git a/.github/scripts/check_commit_types.py b/.github/scripts/check_commit_types.py index c3d9ec9..b1fe7a7 100644 --- a/.github/scripts/check_commit_types.py +++ b/.github/scripts/check_commit_types.py @@ -1,22 +1,30 @@ #!/usr/bin/env python3 -"""Verify the commit-type allowlist stays in sync across two configs. +"""Verify the commit-type allowlist + subject-case rule stay in sync. -Seven prefixes are allowed on commits and PR titles: feat, fix, docs, -test, refactor, chore, release. Two places enforce that list today: +Two configs hand-encode the same conventional-commit policy: 1. ``[tool.commitizen].customize.schema_pattern`` in ``pyproject.toml`` — the commitizen regex (commit-msg hook, local). 2. ``.github/workflows/pr-title.yml`` ``types:`` input to the - ``amannn/action-semantic-pull-request`` step — the PR-title CI check. + ``amannn/action-semantic-pull-request`` step plus its ``subjectPattern`` + — the PR-title CI check. Both are hand-maintained. Add a type in one, forget the other, and the two layers drift: commits fail locally but PR titles pass (or vice versa). ``docs/DEVELOPMENT.md`` explicitly warns these must stay in sync, but prose warnings drift too. -This script mirrors the ``check_required_contexts.py`` pattern from #72 -for this second drift class. Fails CI when the two sets disagree in -either direction. +This script enforces sync on **two axes**: + +- **Type allowlist** — the seven prefixes (feat, fix, docs, test, + refactor, chore, release). Mirrors the ``check_required_contexts.py`` + pattern from #72. +- **Subject-case rule** — the negative-lookahead constraint that rejects + Title-Case subjects (``feat: Add thing`` → reject; ``feat: add thing`` + / ``feat: CI failure`` → accept). Added in #128 so commitizen rejects + Title Case at commit-msg time, not just at the CI gate. + +Fails CI when either axis disagrees in either direction. Usage (from repo root): @@ -48,6 +56,19 @@ # extraction when a future type contained digits or hyphens. _SCHEMA_ALTERNATION_RE = re.compile(r"\^\(([a-z0-9\-|]+)\)") +# Matches the subject-case constraint between `:\s` and the trailing `.+` +# in the commitizen schema_pattern. Tolerates three shapes seen across +# revisions: +# :\s — original #128 shape (single-space, susceptible to the +# `feat: Add thing` double-space bypass). +# :\s+ — naive widening (still backtracks on Title-Case input). +# :\s++ — possessive quantifier (#154); the schema we want long-term +# because it forbids the lookahead-bypass via backtracking. +# All three encode the same "after `:` then whitespace, then this lookahead" +# semantics; the regex captures the lookahead chunk regardless. +# Returns "" if no subject constraint is present (commitizen pre-#128 shape). +_SCHEMA_SUBJECT_RE = re.compile(r":\\s\+{0,2}(.*?)\.\+$") + def commitizen_types() -> set[str]: """Return the set of types allowed by the commitizen schema regex.""" @@ -83,28 +104,87 @@ def commitizen_types() -> set[str]: return types +def commitizen_subject_pattern() -> str: + """Extract the subject-case constraint from commitizen's schema_pattern. + + The schema_pattern shape (post-#128): + ^(feat|fix|...)(\\([\\w\\-]+\\))?!?:\\s(?![A-Z][a-z]).+ + + Returns the chunk between ``:\\s`` and the trailing ``.+`` — i.e. the + negative-lookahead constraint on the subject. Returns "" when no + subject constraint is present (commitizen pre-#128 shape). + """ + data = tomllib.loads(PYPROJECT.read_text(encoding="utf-8")) + schema: str = ( + data.get("tool", {}) + .get("commitizen", {}) + .get("customize", {}) + .get("schema_pattern", "") + ) + if not schema: + # Same error commitizen_types() raises — caller already enforces. + return "" + match = _SCHEMA_SUBJECT_RE.search(schema) + if not match: + return "" + return match.group(1) + + def pr_title_types() -> set[str]: """Return the set of types declared in the pr-title workflow.""" + return _pr_title_field("types", _parse_types) # type: ignore[return-value] + + +def pr_title_subject_pattern() -> str: + """Return the subject-case constraint declared in the pr-title workflow. + + Strips the leading ``^`` anchor and the trailing ``.+$`` from the + YAML ``subjectPattern`` field so the comparison with commitizen's + constraint is normalised. Returns "" when the field is absent. + """ + raw: str = _pr_title_field("subjectPattern", lambda v: v or "", required=False) # type: ignore[assignment] + if not raw: + return "" + pattern = re.sub(r"^\^", "", raw) + pattern = re.sub(r"\.\+\$$", "", pattern) + return pattern + + +def _parse_types(value: str) -> set[str]: + """Parse the YAML ``types`` field (newline-separated string) into a set.""" + types = {line.strip() for line in value.splitlines() if line.strip()} + if not types: + msg = ( + f"`types:` block in {PR_TITLE_YML} is empty or " + "whitespace-only. Expected at least one commit type per line." + ) + raise ValueError(msg) + return types + + +def _pr_title_field( + name: str, + parse: object, + *, + required: bool = True, +) -> object: + """Extract a single field from the action-semantic-pull-request step.""" data = yaml.safe_load(PR_TITLE_YML.read_text(encoding="utf-8")) for job in data.get("jobs", {}).values(): for step in job.get("steps", []): uses = step.get("uses", "") if "action-semantic-pull-request" in uses: - types_block: str = step.get("with", {}).get("types", "") - types = { - line.strip() for line in types_block.splitlines() if line.strip() - } - # An empty or whitespace-only `types:` block would return an - # empty set and trivially match an empty commitizen set — - # masking a real config error. Fail loudly instead (#92). - if not types: - msg = ( - f"`types:` block in {PR_TITLE_YML} is empty or " - "whitespace-only. Expected at least one commit type " - "per line." - ) - raise ValueError(msg) - return types + value = step.get("with", {}).get(name) + if value is None: + if required: + msg = ( + f"`with.{name}` not found in the " + "action-semantic-pull-request step. Update this " + "script if the action's input names changed." + ) + raise ValueError(msg) + return "" + return parse(value) # type: ignore[operator] msg = ( "Could not find an `amannn/action-semantic-pull-request` step in " f"{PR_TITLE_YML}. If the action was renamed or the file moved, " @@ -114,38 +194,70 @@ def pr_title_types() -> set[str]: def main() -> int: - cz = commitizen_types() - pr = pr_title_types() + cz_types = commitizen_types() + pr_types = pr_title_types() + cz_subject = commitizen_subject_pattern() + pr_subject = pr_title_subject_pattern() + + failed = False # Belt-and-braces safety net: both extractors raise on empty, but guard # against a future refactor that drops the raise (#92). - if not cz or not pr: + if not cz_types or not pr_types: print( "::error::One or both extractors returned empty; sync check cannot " - "proceed. commitizen_types() empty: " - f"{not cz}; pr_title_types() empty: {not pr}." + f"proceed. commitizen_types() empty: {not cz_types}; " + f"pr_title_types() empty: {not pr_types}." ) return 1 - if cz == pr: - print(f"Commit types in sync ({len(cz)} types): {sorted(cz)}") - return 0 + if cz_types == pr_types: + print(f"Commit types in sync ({len(cz_types)} types): {sorted(cz_types)}") + else: + failed = True + print( + "::error::[tool.commitizen].customize.schema_pattern and " + ".github/workflows/pr-title.yml types are out of sync" + ) + for name in sorted(cz_types - pr_types): + print(f"::error:: + in commitizen only: {name!r}") + for name in sorted(pr_types - cz_types): + print(f"::error:: - in pr-title.yml only: {name!r}") + print( + "\nFix: update both the schema_pattern in pyproject.toml AND " + "the `types` list in .github/workflows/pr-title.yml so they " + "contain the same type names. See docs/DEVELOPMENT.md#commit-messages." + ) + + if cz_subject == pr_subject: + if cz_subject: + print(f"Subject-case constraint in sync: {cz_subject!r}") + else: + # Both empty — older shape, before #128's subject-case landed in + # commitizen. Don't fail here; the `Lint PR title` workflow remains + # the single layer if commitizen drops back. Surface as a warning. + print( + "::warning::Both commitizen and pr-title.yml have empty " + "subject-case constraints. Per docs/DEVELOPMENT.md the rule " + "should be enforced at both layers — re-add `(?![A-Z][a-z])` " + "to commitizen's schema_pattern after `:\\s`." + ) + else: + failed = True + print( + "::error::commitizen schema_pattern subject-case constraint " + "and pr-title.yml `subjectPattern` are out of sync" + ) + print(f"::error:: commitizen extracted: {cz_subject!r}") + print(f"::error:: pr-title.yml extracted: {pr_subject!r}") + print( + "\nFix: keep both regexes equivalent after stripping anchors. " + "Commitizen's chunk lives between `:\\s` and `.+` in " + "schema_pattern; pr-title.yml's lives in the `subjectPattern` " + "field stripped of `^` and `.+$`." + ) - print( - "::error::[tool.commitizen].customize.schema_pattern and " - ".github/workflows/pr-title.yml types are out of sync" - ) - for name in sorted(cz - pr): - print(f"::error:: + in commitizen only: {name!r}") - for name in sorted(pr - cz): - print(f"::error:: - in pr-title.yml only: {name!r}") - print( - "\nFix: update both the schema_pattern in pyproject.toml AND " - "the `types` list in .github/workflows/pr-title.yml so they " - "contain the same type names. See docs/DEVELOPMENT.md#commit-messages " - "for the current allowed list." - ) - return 1 + return 1 if failed else 0 if __name__ == "__main__": diff --git a/pyproject.toml b/pyproject.toml index 1f84568..ab267f3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "harness-python-react" -version = "0.2.2" +version = "0.2.3" description = "Production-quality LLM-driven coding harness — Python (FastAPI) backend, Vite + React + TypeScript frontend." readme = "README.md" requires-python = ">=3.14" @@ -165,7 +165,7 @@ name = "cz_customize" [tool.commitizen.customize] schema = "(): " -schema_pattern = '^(feat|fix|docs|test|refactor|chore|release)(\([\w\-]+\))?!?:\s.+' +schema_pattern = '^(feat|fix|docs|test|refactor|chore|release)(\([\w\-]+\))?!?:\s++(?![A-Z][a-z]).+' bump_pattern = '^(feat|fix|refactor)' bump_map = {feat = "MINOR", fix = "PATCH", refactor = "PATCH", chore = "PATCH", docs = "PATCH", test = "PATCH"} example = "feat: add an example feature" diff --git a/tests/test_check_commit_types_subject.py b/tests/test_check_commit_types_subject.py new file mode 100644 index 0000000..e1a8f11 --- /dev/null +++ b/tests/test_check_commit_types_subject.py @@ -0,0 +1,250 @@ +"""Tests for the subject-case sync extension to ``check_commit_types.py``. + +#128 added a second axis to the existing commit-type sync gate: the +subject-case constraint must match between commitizen's ``schema_pattern`` +and pr-title.yml's ``subjectPattern``. Tests for the type-allowlist axis +(#91 / #92) live in ``test_check_commit_types.py``; this sibling file +isolates the subject-pattern tests so neither file approaches the +300-line cap. +""" + +from __future__ import annotations + +import importlib.util +import re +from pathlib import Path +from typing import TYPE_CHECKING +from unittest.mock import patch + +if TYPE_CHECKING: + from types import ModuleType + + import pytest + + +def _load_script() -> ModuleType: + script_path = ( + Path(__file__).parent.parent / ".github" / "scripts" / "check_commit_types.py" + ) + spec = importlib.util.spec_from_file_location("check_commit_types", script_path) + assert spec is not None + assert spec.loader is not None + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +cct = _load_script() + + +# --------------------------------------------------------------------------- +# commitizen_subject_pattern() — extracts the lookahead chunk from +# pyproject.toml's schema_pattern between `:\s` and the trailing `.+`. +# --------------------------------------------------------------------------- + + +class TestCommitizenSubjectExtraction: + """commitizen_subject_pattern() extracts the right chunk from pyproject.""" + + def test_extracts_negative_lookahead_when_present(self, tmp_path: Path) -> None: + pyproject = tmp_path / "pyproject.toml" + pyproject.write_text( + "[tool.commitizen.customize]\n" + "schema_pattern = " + r"'^(feat|fix)(\([\w\-]+\))?!?:\s(?![A-Z][a-z]).+'" + "\n", + encoding="utf-8", + ) + with patch.object(cct, "PYPROJECT", pyproject): + assert cct.commitizen_subject_pattern() == "(?![A-Z][a-z])" + + def test_returns_empty_when_no_subject_constraint(self, tmp_path: Path) -> None: + """Pre-#128 schema shape (just `.+` after `:\\s`) returns empty.""" + pyproject = tmp_path / "pyproject.toml" + pyproject.write_text( + "[tool.commitizen.customize]\n" + "schema_pattern = " + r"'^(feat|fix)(\([\w\-]+\))?!?:\s.+'" + "\n", + encoding="utf-8", + ) + with patch.object(cct, "PYPROJECT", pyproject): + assert cct.commitizen_subject_pattern() == "" + + def test_returns_empty_when_schema_absent(self, tmp_path: Path) -> None: + pyproject = tmp_path / "pyproject.toml" + pyproject.write_text("[tool.other]\nfoo = 1\n", encoding="utf-8") + with patch.object(cct, "PYPROJECT", pyproject): + assert cct.commitizen_subject_pattern() == "" + + +# --------------------------------------------------------------------------- +# pr_title_subject_pattern() — extracts the YAML subjectPattern field and +# strips `^` / `.+$` anchors so the comparison with commitizen normalises. +# --------------------------------------------------------------------------- + + +class TestPrTitleSubjectExtraction: + """pr_title_subject_pattern() parses YAML and strips anchors.""" + + def test_extracts_and_strips_anchors(self, tmp_path: Path) -> None: + yml = tmp_path / "pr-title.yml" + yml.write_text( + "jobs:\n" + " lint:\n" + " steps:\n" + " - uses: amannn/action-semantic-pull-request@abc\n" + " with:\n" + " types: |\n" + " feat\n" + " subjectPattern: '^(?![A-Z][a-z]).+$'\n", + encoding="utf-8", + ) + with patch.object(cct, "PR_TITLE_YML", yml): + assert cct.pr_title_subject_pattern() == "(?![A-Z][a-z])" + + def test_returns_empty_when_field_absent(self, tmp_path: Path) -> None: + yml = tmp_path / "pr-title.yml" + yml.write_text( + "jobs:\n" + " lint:\n" + " steps:\n" + " - uses: amannn/action-semantic-pull-request@abc\n" + " with:\n" + " types: |\n" + " feat\n", + encoding="utf-8", + ) + with patch.object(cct, "PR_TITLE_YML", yml): + assert cct.pr_title_subject_pattern() == "" + + +# --------------------------------------------------------------------------- +# main() with subject-pattern axis +# --------------------------------------------------------------------------- + + +def _setup_both( + tmp_path: Path, + cz_subject: str, + pr_subject: str, +) -> tuple[Path, Path]: + """Stub PYPROJECT + PR_TITLE_YML with the given subject-case patterns.""" + cz_extra = cz_subject if cz_subject else "" + cz_pattern = ( + r"^(feat|fix|docs|test|refactor|chore|release)(\([\w\-]+\))?!?:\s" + f"{cz_extra}.+" + ) + pyproject = tmp_path / "pyproject.toml" + pyproject.write_text( + f"[tool.commitizen.customize]\nschema_pattern = '{cz_pattern}'\n", + encoding="utf-8", + ) + pr_field = f" subjectPattern: '^{pr_subject}.+$'\n" if pr_subject else "" + yml = tmp_path / "pr-title.yml" + yml.write_text( + "jobs:\n" + " lint:\n" + " steps:\n" + " - uses: amannn/action-semantic-pull-request@abc\n" + " with:\n" + " types: |\n" + " feat\n fix\n docs\n test\n" + " refactor\n chore\n release\n" + f"{pr_field}", + encoding="utf-8", + ) + return pyproject, yml + + +def test_main_subjects_match_passes( + tmp_path: Path, capsys: pytest.CaptureFixture[str] +) -> None: + pyproject, yml = _setup_both(tmp_path, "(?![A-Z][a-z])", "(?![A-Z][a-z])") + with ( + patch.object(cct, "PYPROJECT", pyproject), + patch.object(cct, "PR_TITLE_YML", yml), + ): + assert cct.main() == 0 + out = capsys.readouterr().out + assert "Subject-case constraint in sync" in out + + +def test_main_subject_drift_fails( + tmp_path: Path, capsys: pytest.CaptureFixture[str] +) -> None: + """commitizen has the constraint, pr-title.yml doesn't → fail.""" + pyproject, yml = _setup_both(tmp_path, "(?![A-Z][a-z])", "") + with ( + patch.object(cct, "PYPROJECT", pyproject), + patch.object(cct, "PR_TITLE_YML", yml), + ): + assert cct.main() == 1 + err = capsys.readouterr().out + assert "subject-case constraint" in err + assert "out of sync" in err + + +def test_main_both_subjects_empty_warns_not_fails( + tmp_path: Path, capsys: pytest.CaptureFixture[str] +) -> None: + """Both empty (pre-#128 shape) → warn, don't block.""" + pyproject, yml = _setup_both(tmp_path, "", "") + with ( + patch.object(cct, "PYPROJECT", pyproject), + patch.object(cct, "PR_TITLE_YML", yml), + ): + assert cct.main() == 0 + out = capsys.readouterr().out + assert "::warning::" in out + assert "subject-case" in out + + +# --------------------------------------------------------------------------- +# #154 — schema rejects double-space-then-Title-Case (`feat: Add thing`) +# --------------------------------------------------------------------------- +# +# The schema_pattern landed in #128 used `:\s` (single whitespace), which the +# negative lookahead `(?![A-Z][a-z])` then anchors to the *first* non-space +# character. With `\s` consuming exactly one space, `feat: Add thing` (two +# spaces) bypassed the constraint: `\s` ate the first space, the lookahead +# saw the second space (not `[A-Z]`), succeeded, and `.+` swallowed the +# Title-Case subject. #154 widens to `\s+` so the lookahead anchors to the +# first non-whitespace character regardless of run length. +# +# These tests exercise the LIVE pyproject.toml schema_pattern (not a fixture) +# so a future regression that drops back to `\s` fails here. + + +def _live_schema_pattern() -> re.Pattern[str]: + import tomllib # local import — stdlib, only this test class needs it + + data = tomllib.loads(cct.PYPROJECT.read_text(encoding="utf-8")) + schema: str = data["tool"]["commitizen"]["customize"]["schema_pattern"] + return re.compile(schema) + + +class TestSchemaRejectsTitleCaseRegardlessOfSpaceCount: + """The negative lookahead must anchor to the first non-whitespace char.""" + + def test_single_space_lowercase_subject_passes(self) -> None: + assert _live_schema_pattern().fullmatch("feat: add thing") + + def test_single_space_title_case_subject_fails(self) -> None: + assert not _live_schema_pattern().fullmatch("feat: Add thing") + + def test_double_space_lowercase_subject_passes(self) -> None: + """Liberal on input shape — multi-space after `:` is unusual but not banned.""" + assert _live_schema_pattern().fullmatch("feat: add thing") + + def test_double_space_title_case_subject_fails(self) -> None: + """The #154 regression — pre-fix this passed silently.""" + assert not _live_schema_pattern().fullmatch("feat: Add thing") + + def test_triple_space_title_case_subject_fails(self) -> None: + """Liberal `\\s+` keeps anchoring regardless of run length.""" + assert not _live_schema_pattern().fullmatch("feat: Add thing") + + def test_subject_starting_with_uppercase_acronym_passes(self) -> None: + """`CI failure` is two-uppercase-letters — lookahead `[A-Z][a-z]` ignores it.""" + assert _live_schema_pattern().fullmatch("feat: CI failure") diff --git a/uv.lock b/uv.lock index e4cbcb4..5edabc5 100644 --- a/uv.lock +++ b/uv.lock @@ -328,7 +328,7 @@ wheels = [ [[package]] name = "harness-python-react" -version = "0.2.2" +version = "0.2.3" source = { virtual = "." } dependencies = [ { name = "fastapi" }, From 5b8284dfe07de974bf45ed8051b5a81177de7136 Mon Sep 17 00:00:00 2001 From: Constantinos <41453723+constk@users.noreply.github.com> Date: Fri, 1 May 2026 09:00:00 +1000 Subject: [PATCH 7/9] chore: post-release CHANGELOG rollup automation (#150, #166, #167) (#74) --- .github/scripts/check_required_contexts.py | 4 + .github/scripts/rollup_changelog.py | 268 +++++++++++++++++++++ .github/workflows/changelog-rollup.yml | 155 ++++++++++++ CHANGELOG.md | 17 +- pyproject.toml | 2 +- tests/test_rollup_changelog.py | 204 ++++++++++++++++ uv.lock | 2 +- 7 files changed, 645 insertions(+), 7 deletions(-) create mode 100644 .github/scripts/rollup_changelog.py create mode 100644 .github/workflows/changelog-rollup.yml create mode 100644 tests/test_rollup_changelog.py diff --git a/.github/scripts/check_required_contexts.py b/.github/scripts/check_required_contexts.py index 518d946..0ff657a 100644 --- a/.github/scripts/check_required_contexts.py +++ b/.github/scripts/check_required_contexts.py @@ -51,6 +51,10 @@ "Tag-triggered (v*.*.*); builds image, generates SBOM, publishes" " release. Never appears on PR check sets." ), + "changelog-rollup.yml": ( + "workflow_run-triggered after release.yml + workflow_dispatch only;" + " opens its own roll-up PR (which goes through ci.yml as normal)." + ), } diff --git a/.github/scripts/rollup_changelog.py b/.github/scripts/rollup_changelog.py new file mode 100644 index 0000000..db3c5fc --- /dev/null +++ b/.github/scripts/rollup_changelog.py @@ -0,0 +1,268 @@ +#!/usr/bin/env python3 +"""Roll up the CHANGELOG `[Unreleased]` section under a `[]` heading. + +Triggered automatically by `.github/workflows/changelog-rollup.yml` after a +successful `release.yml` run. Performs the mechanical edits that would +otherwise be hand-rolled per release. + +Edits to `CHANGELOG.md`: + +1. Insert `## [] - ` heading immediately after `## [Unreleased]`. +2. Update `[Unreleased]: …/compare/...HEAD` → `…/compare/...HEAD`. +3. Insert `[]: …/compare/...` link in the footer. + +Edits to `pyproject.toml` and `uv.lock`: + +4. Bump `[project].version` PATCH (e.g. `0.2.10` → `0.2.11`). The release + tag's version IS the current dev version (release: PRs don't bump), + so the rollup PR is what advances develop into the next cycle. +5. Sync the `[[package]] name = "harness-python-react"` self-version line + in `uv.lock` to match `pyproject.toml`. Same hand-edit pattern as + the version-bump gate enforces on regular PRs. + +Idempotency: + +- If a `## []` heading already exists, step 1 is skipped (the + rollup PR can be re-run without duplicating sections). +- If a `[]:` footer link already exists, step 3 is skipped. +- The version-bump steps are idempotent only if pyproject's version still + equals the released tag; running twice on an already-bumped develop + would push it forward again. Workflow uses a fresh checkout so this + doesn't compound across re-runs. + +Modes: + +- Default: full roll-up (CHANGELOG edits + version bump). This is what + `changelog-rollup.yml` does after a release tag is cut. +- `--no-bump`: CHANGELOG edits only, skip the version bump. Use when + pre-staging the CHANGELOG before a release PR is opened — develop's + current version is the about-to-be-released version, and the + post-release rollup is what advances develop into the next cycle. + +Edge case — no prior tag (first release): + +- `--prior-tag ""` produces a footer link of shape + `[]: …/releases/tag/` (mirrors the existing `[1.0.0]` + / first-tag entry shape). + +Exit codes: + 0 — file edits applied (or already-rolled-up; idempotent path) + 1 — argument validation failure (bad version shape, etc.) + 2 — file not found / write error / TOML parse error + +Usage: + + python .github/scripts/rollup_changelog.py \\ + --tag v0.3.0 --prior-tag v0.2.5 --date 2026-05-01 +""" + +from __future__ import annotations + +import argparse +import re +import sys +import tomllib +from pathlib import Path + +CHANGELOG = Path("CHANGELOG.md") +PYPROJECT = Path("pyproject.toml") +UV_LOCK = Path("uv.lock") + +REPO_PATH = "constk/harness-python-react" +PACKAGE_NAME = "harness-python-react" +COMPARE_URL_BASE = f"https://github.com/{REPO_PATH}/compare" +RELEASES_TAG_BASE = f"https://github.com/{REPO_PATH}/releases/tag" + +_SEMVER_RE = re.compile(r"^v?(\d+)\.(\d+)\.(\d+)$") +_DATE_RE = re.compile(r"^\d{4}-\d{2}-\d{2}$") + + +def _strip_v(tag: str) -> str: + return tag[1:] if tag.startswith("v") else tag + + +def _bump_patch(version: str) -> str: + """`0.2.10` → `0.2.11`. Raises ValueError on non-semver input.""" + match = _SEMVER_RE.fullmatch(version) + if not match: + msg = f"unsupported semver shape: {version!r}" + raise ValueError(msg) + major, minor, patch = match.groups() + return f"{major}.{minor}.{int(patch) + 1}" + + +def rollup_changelog_text( + text: str, + tag: str, + prior_tag: str, + date: str, +) -> str: + """Pure-string transform — `text` → updated `text`. Idempotent.""" + version = _strip_v(tag) + heading_re = re.compile( + rf"^## \[{re.escape(version)}\]\s+-\s+\d{{4}}-\d{{2}}-\d{{2}}", + re.MULTILINE, + ) + if not heading_re.search(text): + # Trailing `\n\n` keeps the blank line between the new heading and + # its first subsection that every existing release section has. + # Without it the rendered diff reads `## [v0.3.0] - …\n### Features` + # which Keep-a-Changelog tolerates but is cosmetically inconsistent. + text = re.sub( + r"^## \[Unreleased\]\s*\n", + f"## [Unreleased]\n\n## [{version}] - {date}\n\n", + text, + count=1, + flags=re.MULTILINE, + ) + + text = re.sub( + r"^\[Unreleased\]:\s+(.*?/compare/)\S+?\.\.\.HEAD\s*$", + rf"[Unreleased]: \1{tag}...HEAD", + text, + count=1, + flags=re.MULTILINE, + ) + + if f"[{version}]:" not in text: + if prior_tag: + new_link = f"[{version}]: {COMPARE_URL_BASE}/{prior_tag}...{tag}" + else: + new_link = f"[{version}]: {RELEASES_TAG_BASE}/{tag}" + text = re.sub( + r"^(\[Unreleased\]:.*)$", + rf"\1\n{new_link}", + text, + count=1, + flags=re.MULTILINE, + ) + return text + + +def bump_pyproject_text(text: str, current: str, new: str) -> str: + """Replace `version = ""` with `version = ""` once.""" + pattern = re.compile(rf'^version\s*=\s*"{re.escape(current)}"', re.MULTILINE) + if not pattern.search(text): + msg = ( + f'pyproject.toml version line `version = "{current}"` not found; ' + "either the rollup ran out-of-order or the file shape changed." + ) + raise ValueError(msg) + return pattern.sub(f'version = "{new}"', text, count=1) + + +def bump_uv_lock_text(text: str, current: str, new: str) -> str: + """Replace the project's self-version line in `uv.lock` once.""" + pattern = re.compile( + rf'^name = "{re.escape(PACKAGE_NAME)}"\nversion = "{re.escape(current)}"\n', + re.MULTILINE, + ) + if not pattern.search(text): + msg = ( + f'uv.lock self-version line `version = "{current}"` not found ' + f'under the `[[package]] name = "{PACKAGE_NAME}"` block.' + ) + raise ValueError(msg) + return pattern.sub(f'name = "{PACKAGE_NAME}"\nversion = "{new}"\n', text, count=1) + + +def _read_pyproject_version() -> str: + data = tomllib.loads(PYPROJECT.read_text(encoding="utf-8")) + version = data.get("project", {}).get("version", "") + if not isinstance(version, str) or not _SEMVER_RE.fullmatch(version): + msg = f"unable to read [project].version from {PYPROJECT}: {version!r}" + raise ValueError(msg) + return version + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__.splitlines()[0]) + parser.add_argument("--tag", required=True, help="released tag, e.g. v0.3.0") + parser.add_argument( + "--prior-tag", + default="", + help="previous tag for compare link; empty for first release", + ) + parser.add_argument("--date", required=True, help="release date YYYY-MM-DD (UTC)") + parser.add_argument( + "--no-bump", + action="store_true", + help=( + "skip the pyproject.toml + uv.lock version bump — used when " + "pre-staging the CHANGELOG before the release tag is cut; the " + "post-release rollup is what advances develop into the next cycle" + ), + ) + args = parser.parse_args() + + if not _SEMVER_RE.fullmatch(args.tag): + print(f"::error::--tag must be vMAJOR.MINOR.PATCH; got {args.tag!r}") + return 1 + if args.prior_tag and not _SEMVER_RE.fullmatch(args.prior_tag): + print(f"::error::--prior-tag must be vX.Y.Z or empty; got {args.prior_tag!r}") + return 1 + if not _DATE_RE.fullmatch(args.date): + print(f"::error::--date must be YYYY-MM-DD; got {args.date!r}") + return 1 + + released_version = _strip_v(args.tag) + next_version = _bump_patch(released_version) + + try: + current_version = _read_pyproject_version() + except (FileNotFoundError, ValueError) as exc: + print(f"::error::{exc}") + return 2 + # Version-vs-tag sanity. Skipped under --no-bump because the prestage + # runs *before* the release tag is cut: develop's current version IS + # the about-to-be-released version, which matches `released_version`, + # but we don't want to bump it (the post-release rollup does that). + if not args.no_bump and current_version != released_version: + print( + f"::error::pyproject.toml version is {current_version!r} but tag is " + f"{args.tag!r} (expected {released_version!r}). The rollup workflow " + "must run against develop *as the release was cut from*; if develop " + "moved on, replay manually after rebasing." + ) + return 1 + + try: + new_changelog = rollup_changelog_text( + CHANGELOG.read_text(encoding="utf-8"), + args.tag, + args.prior_tag, + args.date, + ) + if not args.no_bump: + new_pyproject = bump_pyproject_text( + PYPROJECT.read_text(encoding="utf-8"), + current_version, + next_version, + ) + new_uv_lock = bump_uv_lock_text( + UV_LOCK.read_text(encoding="utf-8"), + current_version, + next_version, + ) + except (FileNotFoundError, ValueError) as exc: + print(f"::error::{exc}") + return 2 + + CHANGELOG.write_text(new_changelog, encoding="utf-8") + if not args.no_bump: + PYPROJECT.write_text(new_pyproject, encoding="utf-8") + UV_LOCK.write_text(new_uv_lock, encoding="utf-8") + print( + f"Rolled up CHANGELOG [Unreleased] under [{released_version}] - " + f"{args.date}; bumped {current_version} -> {next_version}." + ) + else: + print( + f"Pre-staged CHANGELOG [Unreleased] under [{released_version}] - " + f"{args.date}; version bump deferred to the post-release rollup." + ) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/.github/workflows/changelog-rollup.yml b/.github/workflows/changelog-rollup.yml new file mode 100644 index 0000000..3851077 --- /dev/null +++ b/.github/workflows/changelog-rollup.yml @@ -0,0 +1,155 @@ +name: Roll up CHANGELOG after release + +# Triggered automatically after `release.yml` succeeds. Performs the +# mechanical CHANGELOG roll-up (move `[Unreleased]` entries under a +# `[]` heading, update footer compare links, bump pyproject.toml + +# uv.lock to the next PATCH). Opens a `chore:` PR against develop for +# owner review + admin-squash-merge. +# +# The script logic + edge cases live in +# `.github/scripts/rollup_changelog.py` with full unit-test coverage in +# `tests/test_rollup_changelog.py`. + +on: + workflow_run: + workflows: [Release] + types: [completed] + workflow_dispatch: + inputs: + tag: + description: "Released tag (e.g. v0.3.0) — required only for manual replay" + required: true + type: string + +permissions: + contents: write + pull-requests: write + +jobs: + rollup: + name: Open rollup PR + runs-on: ubuntu-latest + # `workflow_run` fires regardless of conclusion; gate so we only act on + # successful release runs (skip when release.yml itself failed). + if: > + github.event_name == 'workflow_dispatch' || + github.event.workflow_run.conclusion == 'success' + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + with: + ref: develop + # full history needed for `git describe --abbrev=0 --tags ^` + # to resolve the prior tag. + fetch-depth: 0 + # Prefer RELEASE_BOT_TOKEN (a non-GITHUB_TOKEN identity) so the + # resulting branch push fires `pull_request` workflows on the + # auto-PR. Falls back to GITHUB_TOKEN when the secret isn't set — + # the auto-PR still opens, but its CI doesn't run until a user + # pushes a commit on top. + token: ${{ secrets.RELEASE_BOT_TOKEN || secrets.GITHUB_TOKEN }} + + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + with: + python-version: "3.14" + + - name: Resolve tags + date + id: resolve + run: | + set -euo pipefail + if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then + TAG="${{ inputs.tag }}" + else + # release.yml runs on tag pushes; the head_branch on the + # workflow_run payload carries the tag name in that case. + TAG="${{ github.event.workflow_run.head_branch }}" + fi + if [ -z "${TAG}" ]; then + echo "::error::could not resolve released tag from trigger payload" + exit 1 + fi + # Resolve the tag's prior tag. `git describe ... ^` errors when + # there is no prior — catch and treat as empty (first release). + if PRIOR=$(git describe --abbrev=0 --tags --match 'v*.*.*' "${TAG}^" 2>/dev/null); then + echo "prior tag: ${PRIOR}" + else + PRIOR="" + echo "no prior tag (first release)" + fi + # Resolve the release date from the released tag's CHANGELOG.md. + # The release PR is the authoritative source — using `date -u` at + # workflow run time can drift if the workflow fires in a different + # UTC day than the release PR was crafted. Falls back to date -u + # with a warning when the heading isn't found, so a degenerate + # CHANGELOG shape doesn't block the workflow. + VERSION="${TAG#v}" + if DATE=$(git show "${TAG}":CHANGELOG.md \ + | grep -oE "^## \[${VERSION}\] - [0-9]{4}-[0-9]{2}-[0-9]{2}" \ + | head -1 \ + | sed 's/.* - //'); then + if [ -z "${DATE}" ]; then + echo "::warning::could not extract date for ${VERSION} from CHANGELOG.md; falling back to date -u" + DATE=$(date -u +%Y-%m-%d) + else + echo "date from CHANGELOG: ${DATE}" + fi + else + echo "::warning::git show ${TAG}:CHANGELOG.md failed; falling back to date -u" + DATE=$(date -u +%Y-%m-%d) + fi + echo "tag=${TAG}" >> "$GITHUB_OUTPUT" + echo "prior=${PRIOR}" >> "$GITHUB_OUTPUT" + echo "date=${DATE}" >> "$GITHUB_OUTPUT" + echo "branch=chore/changelog-rollup-${TAG}" >> "$GITHUB_OUTPUT" + + - name: Run rollup script + run: | + python .github/scripts/rollup_changelog.py \ + --tag "${{ steps.resolve.outputs.tag }}" \ + --prior-tag "${{ steps.resolve.outputs.prior }}" \ + --date "${{ steps.resolve.outputs.date }}" + + - name: Open rollup PR + env: + GH_TOKEN: ${{ secrets.RELEASE_BOT_TOKEN || secrets.GITHUB_TOKEN }} + run: | + set -euo pipefail + BRANCH="${{ steps.resolve.outputs.branch }}" + TAG="${{ steps.resolve.outputs.tag }}" + DATE="${{ steps.resolve.outputs.date }}" + # Idempotent: if the branch already exists from a previous replay, + # bail rather than force-push (the existing PR is the source of truth). + if git ls-remote --exit-code --heads origin "${BRANCH}" >/dev/null 2>&1; then + echo "::warning::branch ${BRANCH} already exists; skipping push to avoid clobbering an in-flight rollup PR" + exit 0 + fi + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + git checkout -b "${BRANCH}" + git add CHANGELOG.md pyproject.toml uv.lock + git commit -m "chore: roll up CHANGELOG [Unreleased] under [${TAG#v}] - ${DATE}" + git push origin "${BRANCH}" + gh pr create \ + --base develop \ + --head "${BRANCH}" \ + --title "chore: roll up CHANGELOG [Unreleased] under [${TAG#v}] - ${DATE}" \ + --body "$(cat < --admin --squash\`. + EOF + )" diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e359f1..0c22c50 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,20 +2,27 @@ All notable changes to this project will be documented in this file. -The format is loosely based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and the project adheres to [Semantic Versioning](https://semver.org/). -Released versions are drafted automatically by [release-drafter](https://github.com/release-drafter/release-drafter); see `.github/release-drafter.yml` and `.github/workflows/release-drafter.yml`. Each entry on the GitHub Releases page corresponds to a tag of the form `vX.Y.Z`. +Future entries are drafted automatically by [release-drafter](.github/release-drafter.yml) on every merge to `main`. After a release tag is cut, the post-release rollup workflow (`.github/workflows/changelog-rollup.yml`) opens a PR that moves the `[Unreleased]` block under a versioned heading and bumps `pyproject.toml` + `uv.lock` to the next PATCH. Categories map to conventional-commit prefixes: -## Unreleased +- `feat:` → **Features** +- `fix:` → **Fixes** +- `chore:` / `docs:` / `refactor:` / `test:` / `release:` → **Housekeeping** + +## [Unreleased] ### Added - Initial harness scaffold (Python 3.14 + FastAPI + Pydantic v2 + OpenTelemetry; React 19.2 + Vite + TypeScript strict). -- 15 required CI status checks (lint, typecheck, tests, coverage ≥ 75 %, import-linter, pre-commit, frontend build/quality, security suite, two meta-gates, PR-title lint). -- Release pipeline: tag-triggered build, push to GHCR, CycloneDX SBOM, GitHub Release publish. +- 21 required CI status checks (lint, typecheck, tests, coverage ≥ 75 %, import-linter, pre-commit, frontend build/quality, file-length cap, version-bump, action-pinning audit, tests-required, src/ README audit, aspirational-ticket cite, security suite, two meta-gates, PR-title lint). +- Release pipeline: tag-triggered build, push to GHCR, CycloneDX SBOM, GitHub Release publish, post-release CHANGELOG rollup automation. - Eval harness scaffold (provider-agnostic runner + LLM-judge Protocol + 1 example golden case + workflow_dispatch nightly). - `.claude/` agent integration (3 hooks, 6 auto-activating skills, settings example). +- Container hardening: read-only root FS, tmpfs `/tmp`, `PYTHONDONTWRITEBYTECODE` / `PYTHONUNBUFFERED` env in runtime stage. ### Notes - This template was extracted from a prior LLM-coded project and generalised. The harness is the product; the scaffold exists so every gate has something to operate on. + +[Unreleased]: https://github.com/constk/harness-python-react/compare/v0.0.0...HEAD diff --git a/pyproject.toml b/pyproject.toml index ab267f3..a143b0d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "harness-python-react" -version = "0.2.3" +version = "0.2.4" description = "Production-quality LLM-driven coding harness — Python (FastAPI) backend, Vite + React + TypeScript frontend." readme = "README.md" requires-python = ">=3.14" diff --git a/tests/test_rollup_changelog.py b/tests/test_rollup_changelog.py new file mode 100644 index 0000000..01dbb21 --- /dev/null +++ b/tests/test_rollup_changelog.py @@ -0,0 +1,204 @@ +"""Tests for `.github/scripts/rollup_changelog.py` (#150).""" + +from __future__ import annotations + +import importlib.util +from pathlib import Path +from typing import TYPE_CHECKING + +import pytest + +if TYPE_CHECKING: + from types import ModuleType + + +def _load_script() -> ModuleType: + script_path = ( + Path(__file__).parent.parent / ".github" / "scripts" / "rollup_changelog.py" + ) + spec = importlib.util.spec_from_file_location("rollup_changelog", script_path) + assert spec is not None + assert spec.loader is not None + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +rc = _load_script() + + +# --------------------------------------------------------------------------- +# _bump_patch / _strip_v — semver helpers. +# --------------------------------------------------------------------------- + + +class TestBumpPatch: + def test_basic_increment(self) -> None: + assert rc._bump_patch("1.9.10") == "1.9.11" + + def test_zero_patch(self) -> None: + assert rc._bump_patch("0.1.0") == "0.1.1" + + def test_strips_v_prefix(self) -> None: + assert rc._bump_patch(rc._strip_v("v2.5.7")) == "2.5.8" + + def test_rejects_non_semver(self) -> None: + with pytest.raises(ValueError, match="unsupported semver"): + rc._bump_patch("1.9") + + +# --------------------------------------------------------------------------- +# rollup_changelog_text — pure-string transform. +# --------------------------------------------------------------------------- + + +_BASE_CHANGELOG = """# Changelog + +## [Unreleased] + +### Tests + +- Some test entry. ([#999](https://github.com/x/y/issues/999)) + +## [1.9.4] - 2026-04-29 + +### Features + +- Old feature. + +[Unreleased]: https://github.com/constk/harness-python-react/compare/v1.9.4...HEAD +[1.9.4]: https://github.com/constk/harness-python-react/compare/v1.4.13...v1.9.4 +""" + + +def test_rollup_inserts_version_heading() -> None: + out = rc.rollup_changelog_text(_BASE_CHANGELOG, "v1.9.10", "v1.9.4", "2026-05-15") + assert "## [1.9.10] - 2026-05-15" in out + assert "## [Unreleased]\n\n## [1.9.10] - 2026-05-15" in out + + +def test_rollup_heading_followed_by_blank_line() -> None: + """#166 — the new heading must have a blank line before the next section. + + Pre-#166 the rollup produced `## [v1.9.10] - …\n### Tests`, missing the + blank line that every existing release section has. Cosmetic, but the + kind of inconsistency that accumulates as the file grows. + """ + out = rc.rollup_changelog_text(_BASE_CHANGELOG, "v1.9.10", "v1.9.4", "2026-05-15") + assert "## [1.9.10] - 2026-05-15\n\n### Tests" in out + assert "## [1.9.10] - 2026-05-15\n### Tests" not in out + + +def test_rollup_updates_unreleased_compare_link() -> None: + out = rc.rollup_changelog_text(_BASE_CHANGELOG, "v1.9.10", "v1.9.4", "2026-05-15") + assert ( + "[Unreleased]: https://github.com/constk/harness-python-react/" + "compare/v1.9.10...HEAD" in out + ) + assert "compare/v1.9.4...HEAD" not in out + + +def test_rollup_inserts_new_version_footer_link() -> None: + out = rc.rollup_changelog_text(_BASE_CHANGELOG, "v1.9.10", "v1.9.4", "2026-05-15") + assert ( + "[1.9.10]: https://github.com/constk/harness-python-react/" + "compare/v1.9.4...v1.9.10" in out + ) + + +def test_rollup_preserves_existing_version_links() -> None: + out = rc.rollup_changelog_text(_BASE_CHANGELOG, "v1.9.10", "v1.9.4", "2026-05-15") + assert "[1.9.4]: https://github.com/constk/harness-python-react/" in out + + +def test_rollup_idempotent_on_existing_heading() -> None: + """Re-running on already-rolled-up text doesn't duplicate the heading.""" + once = rc.rollup_changelog_text(_BASE_CHANGELOG, "v1.9.10", "v1.9.4", "2026-05-15") + twice = rc.rollup_changelog_text(once, "v1.9.10", "v1.9.4", "2026-05-15") + assert once.count("## [1.9.10] - 2026-05-15") == 1 + assert twice.count("## [1.9.10] - 2026-05-15") == 1 + + +def test_rollup_idempotent_on_existing_footer_link() -> None: + once = rc.rollup_changelog_text(_BASE_CHANGELOG, "v1.9.10", "v1.9.4", "2026-05-15") + assert once.count("[1.9.10]:") == 1 + twice = rc.rollup_changelog_text(once, "v1.9.10", "v1.9.4", "2026-05-15") + assert twice.count("[1.9.10]:") == 1 + + +def test_rollup_first_release_uses_releases_tag_url() -> None: + """No prior tag → footer link points at /releases/tag/ not /compare/.""" + initial = """# Changelog + +## [Unreleased] + +[Unreleased]: https://github.com/constk/harness-python-react/compare/HEAD...HEAD +""" + out = rc.rollup_changelog_text(initial, "v0.1.0", "", "2026-01-01") + assert ( + "[0.1.0]: https://github.com/constk/harness-python-react/" + "releases/tag/v0.1.0" in out + ) + # Compare-style link not used when prior_tag is empty. + assert "compare/...v0.1.0" not in out + + +def test_rollup_handles_two_digit_minor_and_patch() -> None: + """v1.10.42 has two-digit numbers — common after enough patch cycles.""" + out = rc.rollup_changelog_text(_BASE_CHANGELOG, "v1.10.42", "v1.9.4", "2026-12-31") + assert "## [1.10.42] - 2026-12-31" in out + assert "compare/v1.9.4...v1.10.42" in out + + +# --------------------------------------------------------------------------- +# bump_pyproject_text / bump_uv_lock_text — line-level edits. +# --------------------------------------------------------------------------- + + +def test_bump_pyproject_basic() -> None: + text = '[project]\nname = "harness-python-react"\nversion = "1.9.10"\n' + out = rc.bump_pyproject_text(text, "1.9.10", "1.9.11") + assert 'version = "1.9.11"' in out + assert 'version = "1.9.10"' not in out + + +def test_bump_pyproject_only_replaces_project_version_line() -> None: + """A `version = "..."` line elsewhere (e.g. a dependency) isn't touched.""" + text = ( + '[project]\nversion = "1.9.10"\n\n' + '[tool.dummy]\nversion = "0.0.1" # unrelated\n' + ) + out = rc.bump_pyproject_text(text, "1.9.10", "1.9.11") + assert out.count('version = "1.9.11"') == 1 + assert 'version = "0.0.1"' in out + + +def test_bump_pyproject_raises_when_current_missing() -> None: + with pytest.raises(ValueError, match="not found"): + rc.bump_pyproject_text('version = "9.9.9"\n', "1.9.10", "1.9.11") + + +def test_bump_uv_lock_basic() -> None: + text = ( + '[[package]]\nname = "harness-python-react"\nversion = "1.9.10"\n' + 'source = { editable = "." }\n' + ) + out = rc.bump_uv_lock_text(text, "1.9.10", "1.9.11") + assert 'version = "1.9.11"' in out + + +def test_bump_uv_lock_does_not_touch_other_packages() -> None: + """Only the self-version block's version line is replaced.""" + text = ( + '[[package]]\nname = "fastapi"\nversion = "1.9.10"\n\n' + '[[package]]\nname = "harness-python-react"\nversion = "1.9.10"\n' + ) + out = rc.bump_uv_lock_text(text, "1.9.10", "1.9.11") + # FastAPI's coincidentally-same version stays put. + assert out.count('name = "fastapi"\nversion = "1.9.10"') == 1 + assert out.count('name = "harness-python-react"\nversion = "1.9.11"') == 1 + + +# main() integration tests live in `tests/test_rollup_changelog_main.py` — +# split out so neither file approaches the 300-line cap (same pattern as +# `test_check_commit_types_subject.py`'s sibling-file split for #128). diff --git a/uv.lock b/uv.lock index 5edabc5..a444275 100644 --- a/uv.lock +++ b/uv.lock @@ -328,7 +328,7 @@ wheels = [ [[package]] name = "harness-python-react" -version = "0.2.3" +version = "0.2.4" source = { virtual = "." } dependencies = [ { name = "fastapi" }, From 4de2a0790cbb3e3cd83b62f25946e1e8b19bec01 Mon Sep 17 00:00:00 2001 From: Constantinos <41453723+constk@users.noreply.github.com> Date: Fri, 1 May 2026 23:58:41 +1000 Subject: [PATCH 8/9] test: route-versioning audit + OTel semconv backstop tests (#130, #132, #147) (#75) --- pyproject.toml | 2 +- tests/test_otel_semconv.py | 225 +++++++++++++++++++++++++++++++++ tests/test_route_versioning.py | 105 +++++++++++++++ uv.lock | 2 +- 4 files changed, 332 insertions(+), 2 deletions(-) create mode 100644 tests/test_otel_semconv.py create mode 100644 tests/test_route_versioning.py diff --git a/pyproject.toml b/pyproject.toml index a143b0d..3fafc5a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "harness-python-react" -version = "0.2.4" +version = "0.2.5" description = "Production-quality LLM-driven coding harness — Python (FastAPI) backend, Vite + React + TypeScript frontend." readme = "README.md" requires-python = ">=3.14" diff --git a/tests/test_otel_semconv.py b/tests/test_otel_semconv.py new file mode 100644 index 0000000..f318818 --- /dev/null +++ b/tests/test_otel_semconv.py @@ -0,0 +1,225 @@ +"""Assertions that emitted spans use OTel semantic-convention attribute names. + +Test-side enforcement of the rule "use the official OTel semconv attribute +names where one exists" (see `src/observability/spans.py` and +`docs/INVARIANTS.md`). + +The scaffold ships only semconv-defined keys (no custom ones); the +`ALLOWED_CUSTOM_KEYS` set below is intentionally empty. When a project +that extends this template needs a custom attribute (e.g. +`agent.tool_calls_count`), it MUST add a constant to +`src/observability/spans.py` AND register it in `ALLOWED_CUSTOM_KEYS` — +the registry-closure test fails otherwise. That's the forcing function +that keeps custom keys to their documented set rather than letting them +proliferate as one-off raw strings. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pytest +from opentelemetry import trace +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import SimpleSpanProcessor +from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( + InMemorySpanExporter, +) + +from src.observability import spans as spans_module +from src.observability.spans import ( + DB_QUERY_TEXT, + DB_RESPONSE_RETURNED_ROWS, + GENAI_CONVERSATION_ID, + GENAI_OPERATION_NAME, + GENAI_PROVIDER_NAME, + GENAI_REQUEST_MODEL, + GENAI_TOOL_CALL_ARGUMENTS, + GENAI_TOOL_CALL_RESULT, + GENAI_TOOL_NAME, + GENAI_USAGE_INPUT_TOKENS, + GENAI_USAGE_OUTPUT_TOKENS, + agent_span, +) + +if TYPE_CHECKING: + from collections.abc import Sequence + + from opentelemetry.sdk.trace import ReadableSpan + + +SEMCONV_KEYS: frozenset[str] = frozenset( + { + GENAI_CONVERSATION_ID, + GENAI_REQUEST_MODEL, + GENAI_OPERATION_NAME, + GENAI_PROVIDER_NAME, + GENAI_USAGE_INPUT_TOKENS, + GENAI_USAGE_OUTPUT_TOKENS, + GENAI_TOOL_NAME, + GENAI_TOOL_CALL_ARGUMENTS, + GENAI_TOOL_CALL_RESULT, + DB_QUERY_TEXT, + DB_RESPONSE_RETURNED_ROWS, + } +) + +# Custom keys allowed because no semconv equivalent exists. Empty in the +# scaffold — projects that extend this template add entries here as they +# add custom span attributes (and constants in `src/observability/spans.py`). +ALLOWED_CUSTOM_KEYS: frozenset[str] = frozenset() + +ACCEPTED_KEYS: frozenset[str] = SEMCONV_KEYS | ALLOWED_CUSTOM_KEYS + + +@pytest.fixture +def memory_exporter(monkeypatch: pytest.MonkeyPatch) -> InMemorySpanExporter: + """Install a TracerProvider that records spans into an in-memory exporter. + + Uses `monkeypatch.setattr` to swap `trace.get_tracer_provider` for the + test's lifetime instead of `trace.set_tracer_provider` — the OTel SDK + refuses to override the global provider once any other test has called + `set_tracer_provider`, which would silently route spans to the wrong + exporter. + + Uses `SimpleSpanProcessor` so spans are visible to `get_finished_spans()` + immediately after the span context exits — no flush race. + """ + exporter = InMemorySpanExporter() + provider = TracerProvider() + provider.add_span_processor(SimpleSpanProcessor(exporter)) + monkeypatch.setattr(trace, "get_tracer_provider", lambda: provider) + return exporter + + +def _span_by_name(spans: Sequence[ReadableSpan], name: str) -> ReadableSpan: + """Return the unique span with the given name (assert exactly one).""" + matches = [s for s in spans if s.name == name] + assert len(matches) == 1, ( + f"expected exactly one span named {name!r}, got {len(matches)} " + f"(all names: {[s.name for s in spans]})" + ) + return matches[0] + + +def test_tool_call_span_carries_genai_tool_attributes( + memory_exporter: InMemorySpanExporter, +) -> None: + """A tool-call span carries the documented `gen_ai.tool.*` keys.""" + tool_attrs: dict[str, str | int] = { + GENAI_TOOL_NAME: "echo", + GENAI_TOOL_CALL_ARGUMENTS: '{"msg": "hi"}', + GENAI_TOOL_CALL_RESULT: '{"echoed": "hi"}', + } + with agent_span("tool.echo", tool_attrs): + pass + + spans = memory_exporter.get_finished_spans() + tool_span = _span_by_name(spans, "tool.echo") + assert tool_span.attributes is not None + keys = set(tool_span.attributes) + + assert GENAI_TOOL_NAME in keys + assert GENAI_TOOL_CALL_ARGUMENTS in keys + assert GENAI_TOOL_CALL_RESULT in keys + assert tool_span.attributes[GENAI_TOOL_NAME] == "echo" + + +def test_llm_call_span_carries_genai_attributes( + memory_exporter: InMemorySpanExporter, +) -> None: + """An LLM-style span carries the documented `gen_ai.*` keys + token usage.""" + llm_attrs: dict[str, str | int] = { + GENAI_CONVERSATION_ID: "session-test-1", + GENAI_REQUEST_MODEL: "gpt-4o-mini", + GENAI_OPERATION_NAME: "chat", + GENAI_PROVIDER_NAME: "openai", + GENAI_USAGE_INPUT_TOKENS: 42, + GENAI_USAGE_OUTPUT_TOKENS: 100, + } + with agent_span("agent.llm_call", llm_attrs): + pass + + spans = memory_exporter.get_finished_spans() + llm_span = _span_by_name(spans, "agent.llm_call") + assert llm_span.attributes is not None + + for key, value in llm_attrs.items(): + assert key in llm_span.attributes, f"missing {key}" + assert llm_span.attributes[key] == value + + +def test_db_query_span_carries_db_attributes( + memory_exporter: InMemorySpanExporter, +) -> None: + """A DB-query span carries `db.query.text` + `db.response.returned_rows`.""" + db_attrs: dict[str, str | int] = { + DB_QUERY_TEXT: "SELECT 1", + DB_RESPONSE_RETURNED_ROWS: 1, + } + with agent_span("db.example_query", db_attrs): + pass + + spans = memory_exporter.get_finished_spans() + db_span = _span_by_name(spans, "db.example_query") + assert db_span.attributes is not None + keys = set(db_span.attributes) + + assert DB_QUERY_TEXT in keys + assert DB_RESPONSE_RETURNED_ROWS in keys + + +def test_no_unregistered_attribute_keys_emitted( + memory_exporter: InMemorySpanExporter, +) -> None: + """Every emitted attribute key is in `SEMCONV_KEYS` or `ALLOWED_CUSTOM_KEYS`.""" + with agent_span( + "tool.echo", + { + GENAI_TOOL_NAME: "echo", + GENAI_TOOL_CALL_ARGUMENTS: "{}", + GENAI_TOOL_CALL_RESULT: "{}", + }, + ): + pass + + spans = memory_exporter.get_finished_spans() + + rogue: list[tuple[str, str]] = [] + for span in spans: + if span.attributes is None: + continue + for key in span.attributes: + if key not in ACCEPTED_KEYS: + rogue.append((span.name, key)) + + assert not rogue, ( + "Span attributes emitted with keys outside the documented registry: " + f"{rogue}. Either use a semconv name from `src/observability/spans.py` " + "or add the new key to both `spans.py` (with a comment explaining why " + "no semconv equivalent applies) and `ALLOWED_CUSTOM_KEYS` in this test." + ) + + +def test_spans_module_constants_are_registered() -> None: + """Every uppercase `str` constant in `spans.py` belongs to the test allowlist. + + The allowlist is `SEMCONV_KEYS | ALLOWED_CUSTOM_KEYS`. Catches the + "added a new constant in `spans.py` without updating the test" drift class. + If this fails, either: + + - the new constant is a semconv name → add it to `SEMCONV_KEYS`. + - the new constant is a custom name → add it to `ALLOWED_CUSTOM_KEYS` + AND ensure the comment in `spans.py` explains why no semconv applies. + """ + declared = { + getattr(spans_module, name) + for name in dir(spans_module) + if name.isupper() and isinstance(getattr(spans_module, name), str) + } + unregistered = declared - ACCEPTED_KEYS + assert not unregistered, ( + f"`spans.py` defines constants not in the test allowlist: " + f"{sorted(unregistered)!r}. Update `SEMCONV_KEYS` or " + "`ALLOWED_CUSTOM_KEYS` in this test." + ) diff --git a/tests/test_route_versioning.py b/tests/test_route_versioning.py new file mode 100644 index 0000000..450c736 --- /dev/null +++ b/tests/test_route_versioning.py @@ -0,0 +1,105 @@ +"""Audit every FastAPI route is `/api/v*/...` or in the documented allowlist. + +Backstops invariant 7 in `docs/INVARIANTS.md` (was *aspirational on test*). +A future contributor adding `@app.get("/foo")` instead of registering the +endpoint on the versioned `APIRouter(prefix="/api/v1")` would silently +introduce an un-versioned route — breaking the API-versioning contract from +`docs/DEVELOPMENT.md` without any gate firing. This test is the gate. +""" + +from __future__ import annotations + +import re + +from src.api.main import app + +VERSIONED_PREFIX = re.compile(r"^/api/v\d+/") + +# Routes deliberately NOT under `/api/v*/`. Add an entry here only with a +# comment naming the structural reason — never just "we needed an exception". +UNVERSIONED_ALLOWLIST: frozenset[str] = frozenset( + { + # FastAPI auto-generated documentation surface. Disabled by passing + # docs_url=None / redoc_url=None to FastAPI() if needed; the + # scaffold keeps them enabled at the unversioned root. + "/openapi.json", + "/docs", + "/docs/oauth2-redirect", + "/redoc", + } +) + + +def test_every_route_is_versioned_or_allowlisted() -> None: + """Every routable path is `/api/v*/` or in the explicit allowlist.""" + offenders: list[str] = [] + for route in app.routes: + path = getattr(route, "path", None) + if path is None: + # Not an APIRoute (e.g. Mount). Skip cleanly — `path` is the + # discriminator we care about. + continue + if path in UNVERSIONED_ALLOWLIST: + continue + if not VERSIONED_PREFIX.match(path): + offenders.append(path) + + assert not offenders, ( + f"un-versioned route(s) detected in app.routes: {offenders!r}. Per " + "docs/DEVELOPMENT.md API Versioning, every routable path must match " + "`/api/v*/<…>` or be added to UNVERSIONED_ALLOWLIST in this test " + "with a comment naming the structural reason. Common entry points " + 'for an un-versioned path: a router wired without `prefix="/api/vN"`, ' + "an `@app.` decorator on the FastAPI app instead of a versioned " + "APIRouter, a manual `app.add_api_route(...)` call, or a Mount that " + "exposes paths at the unversioned root. Fix at the wiring site or, " + "if the path is genuinely unversioned by design (cf. `/api/health`), " + "add it to the allowlist." + ) + + +def test_allowlist_has_no_dead_entries() -> None: + """Every UNVERSIONED_ALLOWLIST entry must correspond to a real route. + + Without this assertion, removing a docs surface (e.g. constructing + FastAPI with `docs_url=None`) leaves `/docs` in the allowlist as cruft; + over time the allowlist becomes a graveyard that masks what is + *actually* deliberately unversioned. Any drop-out fails fast and forces + a deletion in the same PR. + """ + actual_paths = {getattr(r, "path", None) for r in app.routes} + actual_paths.discard(None) + dead = sorted(UNVERSIONED_ALLOWLIST - actual_paths) + assert not dead, ( + f"UNVERSIONED_ALLOWLIST entries no longer present in app.routes: " + f"{dead}. Remove the dead entry — keeping it is allowlist cruft. " + "If the path was intentionally removed (e.g. FastAPI docs disabled " + "via `docs_url=None`), drop the corresponding allowlist line in " + "this same PR so the allowlist tracks reality." + ) + + +# ---------- Regex sanity checks ---------- +# +# Belt-and-braces against the allowlist swallowing a typo that the route +# walk wouldn't catch (e.g. someone mutating the regex to be too permissive). + + +def test_regex_accepts_v1_routes() -> None: + assert VERSIONED_PREFIX.match("/api/v1/chat") + assert VERSIONED_PREFIX.match("/api/v1/sessions") + assert VERSIONED_PREFIX.match("/api/v1/sessions/{session_id}") + + +def test_regex_accepts_future_major_versions() -> None: + """`/api/v2/...` and beyond are accepted; the rule isn't v1-only.""" + assert VERSIONED_PREFIX.match("/api/v2/chat") + assert VERSIONED_PREFIX.match("/api/v10/some-endpoint") + + +def test_regex_rejects_unversioned_paths() -> None: + assert not VERSIONED_PREFIX.match("/foo") + assert not VERSIONED_PREFIX.match("/api/foo") + assert not VERSIONED_PREFIX.match("/api/health") # in allowlist, not regex + assert not VERSIONED_PREFIX.match("/api/v/chat") # missing version digit + assert not VERSIONED_PREFIX.match("/api/version1/chat") # wrong prefix shape diff --git a/uv.lock b/uv.lock index a444275..bd15374 100644 --- a/uv.lock +++ b/uv.lock @@ -328,7 +328,7 @@ wheels = [ [[package]] name = "harness-python-react" -version = "0.2.4" +version = "0.2.5" source = { virtual = "." } dependencies = [ { name = "fastapi" }, From 578ef8a92ad652b06af776b940234ef76146f90e Mon Sep 17 00:00:00 2001 From: Constantinos <41453723+constk@users.noreply.github.com> Date: Sat, 2 May 2026 12:21:00 +1000 Subject: [PATCH 9/9] docs: HARNESS_PRIMER + INVARIANTS marker for route-versioning enforcement (#130, #132) (#76) --- docs/HARNESS.md | 4 + docs/HARNESS_PRIMER.md | 375 +++++++++++++++++++++++++++++++++++++++++ docs/INVARIANTS.md | 2 +- pyproject.toml | 2 +- uv.lock | 2 +- 5 files changed, 382 insertions(+), 3 deletions(-) create mode 100644 docs/HARNESS_PRIMER.md diff --git a/docs/HARNESS.md b/docs/HARNESS.md index 0a8ecc6..e24007c 100644 --- a/docs/HARNESS.md +++ b/docs/HARNESS.md @@ -32,6 +32,10 @@ The harness is independent of the project's domain. The included `src/api/echo` ## Reading order +For someone who hasn't worked in a harness like this before, start with [`docs/HARNESS_PRIMER.md`](HARNESS_PRIMER.md) — it's the same surface as this doc but written for non-engineers. + +For an engineer setting up the template: + 1. **`docs/INVARIANTS.md`** — the load-bearing rules. Every PR is checked against them. 2. **`docs/BOUNDARIES.md`** — module layering and the import-linter contracts. 3. **`docs/DEVELOPMENT.md`** — local setup, the `justfile`, the CI pipeline. diff --git a/docs/HARNESS_PRIMER.md b/docs/HARNESS_PRIMER.md new file mode 100644 index 0000000..7d9606d --- /dev/null +++ b/docs/HARNESS_PRIMER.md @@ -0,0 +1,375 @@ +# harness-python-react — Harness Primer + +> A plain-English companion to [HARNESS.md](HARNESS.md). If `HARNESS.md` is the +> map, this is the guided tour: written for someone who understands modern AI +> coding conceptually but is not the engineer who would set up a harness from +> scratch. You should be able to read this end-to-end without opening any other +> file — though every claim links back to a source-of-truth file you can verify. + +--- + +## Table of contents + +1. [What a harness is, and why this template has one](#1-what-a-harness-is-and-why-this-template-has-one) +2. [The three principles, with code](#2-the-three-principles-with-code) +3. [The numbered invariants — at a glance](#3-the-numbered-invariants--at-a-glance) +4. [Pre-commit — the local checkpoint](#4-pre-commit--the-local-checkpoint) +5. [CI gates — the cloud checkpoint](#5-ci-gates--the-cloud-checkpoint) +6. [Agent-level hooks — opt-in per developer](#6-agent-level-hooks--opt-in-per-developer) +7. [Skills — guidance, not enforcement](#7-skills--guidance-not-enforcement) +8. [The evaluation harness — a one-page summary](#8-the-evaluation-harness--a-one-page-summary) +9. [Process-as-code](#9-process-as-code) +10. [Defence in depth — two worked examples](#10-defence-in-depth--two-worked-examples) +11. [Operating within the harness — a survival guide](#11-operating-within-the-harness--a-survival-guide) +12. [Setup checklist for your harness maintainer](#12-setup-checklist-for-your-harness-maintainer) +13. [Glossary](#13-glossary) +14. [Where to go next](#14-where-to-go-next) + +--- + +## 1. What a harness is, and why this template has one + +A **harness**, in this codebase, is the set of automated rails that catch bad changes *before* they reach the `main` branch. Think of it like the safety systems in a modern car: the seatbelt warning that beeps at you, the interlock that stops the engine starting in gear, and the crash-rated chassis that tolerates the mistakes the first two missed. None of those systems are the driver — but together they make a single bad input survivable. + +The template's harness has the same shape, in three layers: + +| Layer | What it does | Car analogy | +|---|---|---| +| **Prompts describe** | `CLAUDE.md` and the `docs/` files tell a human or AI agent what the project values. Advisory only. | The seatbelt warning beep — informative, ignorable. | +| **Skills guide** | Topic-specific briefings the agent loads when relevant (`.claude/skills/`). Shape style and approach but reject nothing. | Lane-keep assist — nudges you back, doesn't lock the wheel. | +| **Hooks and CI enforce** | Mechanical checks: pre-commit, agent-level hooks, GitHub Actions. A violation here blocks the change. | The crash-rated chassis and the interlock — non-negotiable. | + +The principle: **anything that matters is enforced mechanically.** Prose can drift across model versions, sessions, and reviewers; a regex in a CI job cannot. Where this primer says "the system requires X," it means a script somewhere will refuse to let X be violated — and this doc tells you which script. + +A second harness is the **evaluation harness** ([§8](#8-the-evaluation-harness--a-one-page-summary)) — tests whether the *agent* answers questions correctly, not whether the *code* compiles. + +> **If `HARNESS.md` is the map, this is the guided tour.** Engineers reach for the map. Everyone else can start here. + +--- + +## 2. The three principles, with code + +### 2.1 Invariants — *what cannot be true* + +An **invariant** is a numbered rule the system maintains, with a pointer to the code that enforces it. "An invariant in a validator is a fact; an invariant in prose is a suggestion." The repo treats unenforced invariants as bugs — every aspirational entry must cite a tracking issue (`Aspirational ticket cite` CI job). + +→ Source of truth: [docs/INVARIANTS.md](INVARIANTS.md). + +### 2.2 Contracts — *the shape at every seam* + +Every piece of data that crosses a module boundary or process boundary is a Pydantic model that inherits from a strict base class. The base class refuses unknown fields at construction. A typo fails at the seam, not three calls deeper. + +```python +# src/models/_base.py +from pydantic import BaseModel, ConfigDict + + +class StrictModel(BaseModel): + """Base for every contract that crosses a module or process seam.""" + + model_config = ConfigDict(extra="forbid") +``` + +**What you'll see if you violate it.** A typo like `HealthResponse(status="ok", versoin="0.1.0")` raises: + +```text +pydantic_core._pydantic_core.ValidationError: 1 validation error for HealthResponse +versoin + Extra inputs are not permitted [type=extra_forbidden, input_value='0.1.0', input_type=str] +``` + +The rejection happens at the API boundary, in the FastAPI handler, before any business logic runs. + +### 2.3 Boundaries — *who depends on whom* + +`src/` is a one-way dependency graph: + +``` + src.api ─┐ ┌─ src.eval (HTTP / golden dataset, siblings) + ▼ ▼ + src.agent (the tool-calling loop) + ▼ + src.tools (typed tool registry) + ▼ + src.data (DB / external systems) + ▼ + src.observability (OTel spans, logging) + ▼ + src.models (Pydantic contracts; depends on nothing) +``` + +Enforced by `import-linter` ([pyproject.toml](../pyproject.toml) `[tool.importlinter]`). + +**What you'll see if you violate it.** Adding `from src.api.routes import router` to `src/models/foo.py` fails the `Architecture (import-linter)` CI job: + +```text +src.models is not allowed to import src.api: + +- src.models.foo -> src.api.routes (l.4) + +Contracts: 0 kept, 2 broken. +``` + +There is no override flag. + +--- + +## 3. The numbered invariants — at a glance + +| # | Rule | Where enforced | +|---|---|---| +| 1 | Every contract crossing a module seam forbids unknown keys | `StrictModel` in [src/models/_base.py](../src/models/_base.py) + [tests/test_models.py](../tests/test_models.py) | +| 2 | API endpoints live under `/api/v1/` and return typed responses | Router prefix in [src/api/routes.py](../src/api/routes.py) + route-walk test in [tests/test_route_versioning.py](../tests/test_route_versioning.py) | +| 3 | Layer flow is one-way | `import-linter` contracts in [pyproject.toml](../pyproject.toml) + the `Architecture` CI job | +| 4 | Coverage ≥ 75 % on `src/` | `[tool.coverage.report].fail_under = 75` + the `Coverage` CI job | +| 5 | No secret leaves the repo unscanned | Three-layer scan: `.claude/hooks/pretooluse_bash.py` → pre-commit gitleaks → `Secret scan (gitleaks)` CI job | + +→ Full detail: [docs/INVARIANTS.md](INVARIANTS.md). Slots 6+ are reserved for project-specific invariants the team adds as the domain stabilises; the `Aspirational ticket cite` gate enforces that any aspirational marker line cites a `#NNN` ticket. + +--- + +## 4. Pre-commit — the local checkpoint + +Pre-commit runs on your machine before each commit is created. Failures here cost zero CI minutes. + +→ Source of truth: [.pre-commit-config.yaml](../.pre-commit-config.yaml). + +| Hook | What it does | +|---|---| +| **ruff** (`--fix`) | Lints Python and auto-fixes what it can. | +| **ruff-format** | Applies the project's Python code style. | +| **check-yaml / check-toml / check-json** | Parses every YAML/TOML/JSON file you're committing. | +| **check-merge-conflict** | Refuses to commit a file with `<<<<<<<` markers. | +| **check-added-large-files** | Blocks any new file over 500 KB. | +| **end-of-file-fixer / trailing-whitespace / mixed-line-ending** | Hygiene. | +| **gitleaks** | Scans the diff for secrets — first of three independent scan layers. | +| **commitizen** | At `commit-msg` stage. Refuses commit messages that don't match `(): ` with one of the seven allowed types and a non-Title-Case subject. | +| **mypy** (`--strict`) | Type-checks all of `src/` and `tests/`. | + +Wire once with `uv run pre-commit install --hook-type pre-commit --hook-type commit-msg`. + +--- + +## 5. CI gates — the cloud checkpoint + +Every push and every PR triggers GitHub Actions. **21 required contexts** must pass before merge. + +→ Source of truth: [.github/workflows/](../.github/workflows/). + +### 5.1 Code quality + +| Job | What it does | +|---|---| +| `Lint & Format` | `ruff check .` + `ruff format --check .`. Zero tolerance. | +| `Type Check` | `mypy --strict src/ tests/`. | +| `Pre-commit` | Re-runs `pre-commit run --all-files` in CI. | +| `Lint PR title` | Conventional-commit prefix + lowercase-or-initialism subject. | +| `Commit-type sync` | Asserts commitizen schema and `pr-title.yml` agree on both the type allowlist and the subject-case constraint. | +| `Branch-protection contexts sync` | Asserts every workflow job is listed in `.github/branch-protection/{develop,main}.json` (or in the script's `EXEMPT_WORKFLOWS` map). | +| `Version bump check` | `pyproject.toml` `[project].version` differs from base; `uv.lock` self-version matches. `release:` PRs exempt. | +| `Tests required` | `feat:`/`fix:` PRs that touch `src/` must touch `tests/`. Other prefixes get a warn-only. | +| `File length` | `*.py` files in `src/`, `tests/`, `eval/`, `.github/scripts/` capped at 300 lines. Function caps via ruff `PLR0915`/`PLR0912` (50 stmts / 12 branches). | +| `src/ README audit` | Every `src/` package with code has a README ≥ 200 bytes containing `## Key interfaces` (or `## Public surface`). | +| `Aspirational ticket cite` | Lines starting with `*Aspirational` in `docs/INVARIANTS.md` must cite at least one `#NNN` ticket; closed cites warn (or fail under `ASPIRATIONAL_STRICT=1`). | + +### 5.2 Correctness + +| Job | What it does | +|---|---| +| `Unit tests` | `pytest tests/ -m "not integration"` — fast feedback. Includes doc-vs-code drift backstops (`test_route_versioning`, `test_otel_semconv`, etc.). | +| `Coverage` | `pytest tests/ --cov=src` with `fail_under = 75`. | + +### 5.3 Architecture + +| Job | What it does | +|---|---| +| `Architecture (import-linter)` | Runs `import-linter` against the two contracts in `pyproject.toml`. | +| `Frontend Build` | `npm run build` from `frontend/`. | +| `Frontend Quality` | eslint (`max-warnings 0`) + prettier `--check` + `tsc --noEmit` + vitest. | + +### 5.4 Security + +| Job | What it does | +|---|---| +| `Secret scan (gitleaks)` | Repo-wide secret scan on every push and PR. Third independent layer. | +| `Python deps (pip-audit)` | CVE scan against `uv.lock`. Per-CVE ignore list at `.github/security/pip-audit-ignore.txt`. | +| `Frontend deps (npm audit)` | `--audit-level=high`. | +| `Container image scan (trivy)` | OS- and library-level CVE scan on the built image. | +| `Action pinning audit` | Walks every `.github/workflows/*.yml`. First-party = major tag; `astral-sh/setup-uv` = patch tag; third-party = SHA + `# vN.M.P` comment. | + +### 5.5 Operations (workflows on a schedule / trigger) + +| Workflow | When | What it does | +|---|---|---| +| `branch-protection.yml` | Weekly + dispatch | Re-applies `.github/branch-protection/*.json` via `gh api`. Drift re-assertion. | +| `eval-nightly.yml` | `workflow_dispatch` only by default | Runs `pytest eval/` against the configured LLM. Documented opt-in for `schedule:`. | +| `artifact-cleanup.yml` | Weekly + dispatch | Prunes Actions artifacts older than 7 days. | +| `release-drafter.yml` | Push to main + PR label events | Updates a draft GitHub Release under `v$RESOLVED_VERSION`. | +| `release.yml` | On tag `v*.*.*` | Builds the image, pushes to GHCR, generates a CycloneDX SBOM, publishes the release. | +| `changelog-rollup.yml` | After release.yml succeeds + dispatch | Opens a `chore: roll up CHANGELOG …` PR against `develop` — moves `[Unreleased]` entries under the released version's heading + bumps `pyproject.toml` + `uv.lock` PATCH. | +| `codeql.yml` | `workflow_dispatch` only (placeholder) | Static analysis. Activate when the repo is public or has GHAS. | + +--- + +## 6. Agent-level hooks — opt-in per developer + +`.claude/hooks/` scripts run *around* the LLM agent's own actions — pre-commit and CI run when *code* moves, hooks run when the *agent* moves. + +→ Source of truth: [.claude/hooks/](../.claude/hooks/) (3 Python scripts) + [.claude/settings.local.json.example](../.claude/settings.local.json.example). + +| Script | Purpose | +|---|---| +| `pretooluse_bash.py` | (1) Refuses Bash commands containing bypass flags (`git --no-verify`, `--no-hooks`, `--no-gpg-sign`); (2) on `git commit`, scans the staged diff for AWS/sk-/ghp_/PEM/Slack patterns and blocks if any match; (3) appends every Bash invocation to `.claude/bash-log.txt` (gitignored) for forensics. | +| `posttooluse_writeedit.py` | After every Write/Edit, dispatches the right formatter — ruff for `.py`, prettier for `.ts/.tsx/.js/.jsx/.css/.json/.html/.md`. | +| `sessionstart.py` | At session start, injects current branch + `git status --short` as `additionalContext` so the agent is grounded in the actual repo state. | + +Wire once: `cp .claude/settings.local.json.example .claude/settings.local.json`. + +Until you wire it, *your local agent's actions are not policed by these scripts* — but pre-commit and CI still are, so nothing slips through to `main`. + +--- + +## 7. Skills — guidance, not enforcement + +A **skill** is a markdown briefing the agent loads when its topic matches the current task. Skills shape *style*; they reject nothing. + +→ Source of truth: [.claude/skills/](../.claude/skills/). + +| Skill | When it activates | +|---|---| +| `architect` | Architecture decisions, module boundaries, tech-stack choices, API contracts. | +| `code-reviewer` | After code is written or edited. 10-point review checklist. | +| `devops` | Docker, docker-compose, CI/CD, `pyproject.toml`, observability config. | +| `frontend` | React/TS work in `frontend/`. | +| `qa-engineer` | Tests, eval harness, golden-dataset changes. | +| `technical-writer` | Docs, READMEs, inline documentation. | + +A skill is a librarian who hands you a relevant book before you start; a hook is a gate guard who refuses to let you leave with the wrong book. + +--- + +## 8. The evaluation harness — a one-page summary + +Distinct from the **build harness** (everything above), the **evaluation harness** tests whether the agent answers questions correctly. + +→ Source of truth: [docs/EVAL_HARNESS.md](EVAL_HARNESS.md), with the dataset in [eval/golden_qa.json](../eval/golden_qa.json) and the runner in [eval/test_golden_qa.py](../eval/test_golden_qa.py). + +**Shape:** + +- A **golden dataset** of question / expected-answer pairs (one trivial echo case ships; replace with your domain dataset). +- **Three tolerance modes**: `exact_match`, `numeric_close` (within 1 %), `semantic_similar` (LLM judge ≥ 0.8). +- **Provider-agnostic** — wire your concrete LLM client via the `LLMClient` Protocol in `src/eval/judge.py`. +- **Disabled-by-default nightly** ([eval-nightly.yml](../.github/workflows/eval-nightly.yml)) — `workflow_dispatch` only, opt-in to `schedule:` after configuring secrets. + +--- + +## 9. Process-as-code + +| Concern | File / mechanism | +|---|---| +| PR template | [.github/pull_request_template.md](../.github/pull_request_template.md). | +| Issue templates | [.github/ISSUE_TEMPLATE/](../.github/ISSUE_TEMPLATE/): `bug.md`, `feature.md`, `eval-regression.md`. Blank issues disabled. | +| Code ownership | [.github/CODEOWNERS](../.github/CODEOWNERS). | +| Branch protection | [.github/branch-protection/{main,develop}.json](../.github/branch-protection/) declarative configs, re-applied weekly by [branch-protection.yml](../.github/workflows/branch-protection.yml). | +| Commit message shape | Commitizen, configured in `pyproject.toml`. | +| Branching model | `feat/*` → `develop` → `main`. No direct commits. | + +The 7 allowed conventional-commit prefixes: + +```text +feat | fix | docs | test | refactor | chore | release +``` + +`release:` is project-specific — `develop → main` release PRs only. + +--- + +## 10. Defence in depth — two worked examples + +### 10.1 A secret is accidentally committed + +| Layer | What happens | +|---|---| +| **1. Pre-commit `gitleaks`** | Local hook refuses the commit; never makes a commit object. | +| **2. Agent-level secret-scan hook** *(opt-in)* | If the agent runs `git commit`, `pretooluse_bash.py` scans the staged diff itself and blocks. | +| **3. CI `Secret scan (gitleaks)`** | If layers 1–2 were bypassed, the GitHub Action runs on push and fails the PR. | +| **4. Manual review via CODEOWNERS** | Reviewers see the diff. | + +For the secret to land on `main`, **all four** have to fail or be bypassed on the same change. + +### 10.2 A boundary violation is introduced + +A developer adds `from src.api.routes import router` inside `src/models/foo.py`. + +| Layer | What happens | +|---|---| +| **1. Local `lint-imports`** | If the developer runs `just architecture` (or has it in their editor), they see the error at their desk. | +| **2. CI `Architecture (import-linter)`** | Runs on every push. `import-linter` reports the forbidden chain and exits non-zero. PR cannot merge. | + +The error names the offending module, line, and contract — no guessing. + +--- + +## 11. Operating within the harness — a survival guide + +| Error you see | What it means | What to do | +|---|---|---| +| `pydantic_core ... extra_forbidden` | A contract got a field name it doesn't recognise. Often a typo. | Open the model under `src/models/`. Fix the field name. | +| CI `Lint & Format` failed | Ruff caught a style or correctness issue. | `uv run ruff check . --fix` locally. | +| CI `Type Check` failed | mypy strict caught an untyped function or unsafe coercion. | The error names file + line. Add the missing annotation. | +| CI `Architecture (import-linter)` failed | An import crossed a layer boundary. | Read the chain. Move the code or delete the import. | +| CI `Lint PR title` failed | Title doesn't start with one of the 7 prefixes, or starts with Title Case. | Edit the PR title. Lowercase verb after the colon, or all-caps initialism. | +| CI `Version bump check` failed | `pyproject.toml` `[project].version` is unchanged from the base branch. | Bump pyproject.toml + the matching `[[package]]` block in `uv.lock`. | +| CI `Tests required` failed | `feat:`/`fix:` PR touched `src/` without touching `tests/`. | Add a test. If genuinely test-exempt, use `chore:` / `refactor:` instead. | +| CI `File length` failed | A `.py` file > 300 lines or a function > 50 stmts / 12 branches. | Split. There is no `# noqa` exemption. | +| CI `src/ README audit` failed | A package missing README, < 200 bytes, or missing `## Key interfaces`. | Add or extend the README. | +| CI `Aspirational ticket cite` failed | A `*Aspirational` line in `docs/INVARIANTS.md` cites no ticket. | File the ticket and add the cite, or remove the marker line if the rule is now enforced. | +| CI `Action pinning audit` failed | A `uses:` line doesn't match the bucket policy. | First-party = major tag; setup-uv = patch tag; third-party = SHA + `# vN.M.P` comment. | +| Local commit rejected by commitizen | Commit message doesn't match `(): `. | Re-run with the correct shape. | +| `Coverage` failed | Test coverage on `src/` dropped below 75 %. | Add tests for the lines flagged in the report. | + +> **Rule of thumb.** If a check rejects a change, the rejection message names the file, the line, and the rule. Read it carefully before reaching for an override flag — there is almost always a real bug under it. + +--- + +## 12. Setup checklist for your harness maintainer + +- [ ] **Install dependencies.** `uv sync --extra dev`. +- [ ] **Wire pre-commit.** `uv run pre-commit install --hook-type pre-commit --hook-type commit-msg`. +- [ ] **(Opt-in) Wire agent-level hooks.** `cp .claude/settings.local.json.example .claude/settings.local.json`. +- [ ] **Verify CI is green on a no-op PR** before merging real work. +- [ ] **Apply branch protection** by setting a `BRANCH_PROTECTION_TOKEN` secret with `admin:repo` scope and triggering `branch-protection.yml`. The default `GITHUB_TOKEN` cannot edit branch protection on the repo it runs in. +- [ ] **(Opt-in) Set `LLM_*` secrets** for the eval harness when you flip `eval-nightly.yml` to `schedule:`. +- [ ] **(Opt-in) Provision `RELEASE_BOT_TOKEN`** so `changelog-rollup.yml`'s auto-PR triggers its own CI matrix on creation. Falls back to `GITHUB_TOKEN` (auto-PR opens, only the auto-CI is lost). + +--- + +## 13. Glossary + +| Term | What it is | +|---|---| +| **Pydantic** | Python library for runtime data validation. The basis of every contract in this repo. | +| **mypy** | Static type checker. *Strict mode* refuses missing/implicit annotations. | +| **ruff** | Fast Rust-based Python linter and formatter. Catches style + dead code + security smells. | +| **import-linter** | Checks the import graph against declared *contracts* (layered, forbidden). | +| **pre-commit** | Framework that registers itself as a git hook so configured checks run before every commit. | +| **GitHub Actions** | GitHub's built-in CI. Workflows under `.github/workflows/` run on triggers. | +| **Conventional commits** | `(): ` shape with a fixed type set (this repo allows 7). | +| **Semantic versioning** | `MAJOR.MINOR.PATCH` — each component conveys the kind of change. | +| **OpenTelemetry (OTel)** | Vendor-neutral standard for traces, metrics, logs. The repo follows `gen_ai.*` and `db.*` semantic conventions for attribute names. | +| **CycloneDX** | An SBOM format. Generated per release and attached to the GitHub Release. | +| **gitleaks** | Pattern-based secret scanner. | + +--- + +## 14. Where to go next + +| Doc | What it covers | +|---|---| +| [HARNESS.md](HARNESS.md) | The engineer-facing map — same surface, terser, more linkable. | +| [INVARIANTS.md](INVARIANTS.md) | The numbered rules in full. | +| [BOUNDARIES.md](BOUNDARIES.md) | The dependency graph + the layer rules. | +| [ARCHITECTURE.md](ARCHITECTURE.md) | The system design — components, request flow. | +| [SECURITY.md](SECURITY.md) | Threat model + defence-in-depth mapping. | +| [EVAL_HARNESS.md](EVAL_HARNESS.md) | The eval flywheel. | +| [DEVELOPMENT.md](DEVELOPMENT.md) | Local setup, branching, releases. | diff --git a/docs/INVARIANTS.md b/docs/INVARIANTS.md index 74c3010..8935b09 100644 --- a/docs/INVARIANTS.md +++ b/docs/INVARIANTS.md @@ -14,7 +14,7 @@ Pydantic with `extra="forbid"` raises on unknown keys at construction. That kill A versioned prefix means future breaking changes ship at `/api/v2/` without coordinated client deploys. Typed responses mean an OpenAPI schema is correct by construction. - **Where:** `src/api/routes.py` -- **Enforced by:** route review; FastAPI's response model inference. +- **Enforced by:** `tests/test_route_versioning.py` walks `app.routes` and asserts every path matches `^/api/v\d+/` or is in the explicit `UNVERSIONED_ALLOWLIST` (FastAPI's auto-doc routes only); FastAPI's response model inference checks the typed-response side at request time. ## 3. Layer flow is one-way diff --git a/pyproject.toml b/pyproject.toml index 3fafc5a..1d80f8d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "harness-python-react" -version = "0.2.5" +version = "0.2.6" description = "Production-quality LLM-driven coding harness — Python (FastAPI) backend, Vite + React + TypeScript frontend." readme = "README.md" requires-python = ">=3.14" diff --git a/uv.lock b/uv.lock index bd15374..c20d4d6 100644 --- a/uv.lock +++ b/uv.lock @@ -328,7 +328,7 @@ wheels = [ [[package]] name = "harness-python-react" -version = "0.2.5" +version = "0.2.6" source = { virtual = "." } dependencies = [ { name = "fastapi" },