From eca6454fa925b887f9aca1b36da60f7cd7d34b75 Mon Sep 17 00:00:00 2001 From: "const.koutsakis@aurecongroup.com" Date: Fri, 1 May 2026 00:23:02 +1000 Subject: [PATCH] =?UTF-8?q?chore:=20subject-case=20sync=20extension=20?= =?UTF-8?q?=E2=80=94=20commitizen=20+=20pr-title=20(#128,=20#154)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/scripts/check_commit_types.py | 202 ++++++++++++++---- pyproject.toml | 4 +- tests/test_check_commit_types_subject.py | 250 +++++++++++++++++++++++ uv.lock | 2 +- 4 files changed, 410 insertions(+), 48 deletions(-) create mode 100644 tests/test_check_commit_types_subject.py diff --git a/.github/scripts/check_commit_types.py b/.github/scripts/check_commit_types.py index c3d9ec9..b1fe7a7 100644 --- a/.github/scripts/check_commit_types.py +++ b/.github/scripts/check_commit_types.py @@ -1,22 +1,30 @@ #!/usr/bin/env python3 -"""Verify the commit-type allowlist stays in sync across two configs. +"""Verify the commit-type allowlist + subject-case rule stay in sync. -Seven prefixes are allowed on commits and PR titles: feat, fix, docs, -test, refactor, chore, release. Two places enforce that list today: +Two configs hand-encode the same conventional-commit policy: 1. ``[tool.commitizen].customize.schema_pattern`` in ``pyproject.toml`` — the commitizen regex (commit-msg hook, local). 2. ``.github/workflows/pr-title.yml`` ``types:`` input to the - ``amannn/action-semantic-pull-request`` step — the PR-title CI check. + ``amannn/action-semantic-pull-request`` step plus its ``subjectPattern`` + — the PR-title CI check. Both are hand-maintained. Add a type in one, forget the other, and the two layers drift: commits fail locally but PR titles pass (or vice versa). ``docs/DEVELOPMENT.md`` explicitly warns these must stay in sync, but prose warnings drift too. -This script mirrors the ``check_required_contexts.py`` pattern from #72 -for this second drift class. Fails CI when the two sets disagree in -either direction. +This script enforces sync on **two axes**: + +- **Type allowlist** — the seven prefixes (feat, fix, docs, test, + refactor, chore, release). Mirrors the ``check_required_contexts.py`` + pattern from #72. +- **Subject-case rule** — the negative-lookahead constraint that rejects + Title-Case subjects (``feat: Add thing`` → reject; ``feat: add thing`` + / ``feat: CI failure`` → accept). Added in #128 so commitizen rejects + Title Case at commit-msg time, not just at the CI gate. + +Fails CI when either axis disagrees in either direction. Usage (from repo root): @@ -48,6 +56,19 @@ # extraction when a future type contained digits or hyphens. _SCHEMA_ALTERNATION_RE = re.compile(r"\^\(([a-z0-9\-|]+)\)") +# Matches the subject-case constraint between `:\s` and the trailing `.+` +# in the commitizen schema_pattern. Tolerates three shapes seen across +# revisions: +# :\s — original #128 shape (single-space, susceptible to the +# `feat: Add thing` double-space bypass). +# :\s+ — naive widening (still backtracks on Title-Case input). +# :\s++ — possessive quantifier (#154); the schema we want long-term +# because it forbids the lookahead-bypass via backtracking. +# All three encode the same "after `:` then whitespace, then this lookahead" +# semantics; the regex captures the lookahead chunk regardless. +# Returns "" if no subject constraint is present (commitizen pre-#128 shape). +_SCHEMA_SUBJECT_RE = re.compile(r":\\s\+{0,2}(.*?)\.\+$") + def commitizen_types() -> set[str]: """Return the set of types allowed by the commitizen schema regex.""" @@ -83,28 +104,87 @@ def commitizen_types() -> set[str]: return types +def commitizen_subject_pattern() -> str: + """Extract the subject-case constraint from commitizen's schema_pattern. + + The schema_pattern shape (post-#128): + ^(feat|fix|...)(\\([\\w\\-]+\\))?!?:\\s(?![A-Z][a-z]).+ + + Returns the chunk between ``:\\s`` and the trailing ``.+`` — i.e. the + negative-lookahead constraint on the subject. Returns "" when no + subject constraint is present (commitizen pre-#128 shape). + """ + data = tomllib.loads(PYPROJECT.read_text(encoding="utf-8")) + schema: str = ( + data.get("tool", {}) + .get("commitizen", {}) + .get("customize", {}) + .get("schema_pattern", "") + ) + if not schema: + # Same error commitizen_types() raises — caller already enforces. + return "" + match = _SCHEMA_SUBJECT_RE.search(schema) + if not match: + return "" + return match.group(1) + + def pr_title_types() -> set[str]: """Return the set of types declared in the pr-title workflow.""" + return _pr_title_field("types", _parse_types) # type: ignore[return-value] + + +def pr_title_subject_pattern() -> str: + """Return the subject-case constraint declared in the pr-title workflow. + + Strips the leading ``^`` anchor and the trailing ``.+$`` from the + YAML ``subjectPattern`` field so the comparison with commitizen's + constraint is normalised. Returns "" when the field is absent. + """ + raw: str = _pr_title_field("subjectPattern", lambda v: v or "", required=False) # type: ignore[assignment] + if not raw: + return "" + pattern = re.sub(r"^\^", "", raw) + pattern = re.sub(r"\.\+\$$", "", pattern) + return pattern + + +def _parse_types(value: str) -> set[str]: + """Parse the YAML ``types`` field (newline-separated string) into a set.""" + types = {line.strip() for line in value.splitlines() if line.strip()} + if not types: + msg = ( + f"`types:` block in {PR_TITLE_YML} is empty or " + "whitespace-only. Expected at least one commit type per line." + ) + raise ValueError(msg) + return types + + +def _pr_title_field( + name: str, + parse: object, + *, + required: bool = True, +) -> object: + """Extract a single field from the action-semantic-pull-request step.""" data = yaml.safe_load(PR_TITLE_YML.read_text(encoding="utf-8")) for job in data.get("jobs", {}).values(): for step in job.get("steps", []): uses = step.get("uses", "") if "action-semantic-pull-request" in uses: - types_block: str = step.get("with", {}).get("types", "") - types = { - line.strip() for line in types_block.splitlines() if line.strip() - } - # An empty or whitespace-only `types:` block would return an - # empty set and trivially match an empty commitizen set — - # masking a real config error. Fail loudly instead (#92). - if not types: - msg = ( - f"`types:` block in {PR_TITLE_YML} is empty or " - "whitespace-only. Expected at least one commit type " - "per line." - ) - raise ValueError(msg) - return types + value = step.get("with", {}).get(name) + if value is None: + if required: + msg = ( + f"`with.{name}` not found in the " + "action-semantic-pull-request step. Update this " + "script if the action's input names changed." + ) + raise ValueError(msg) + return "" + return parse(value) # type: ignore[operator] msg = ( "Could not find an `amannn/action-semantic-pull-request` step in " f"{PR_TITLE_YML}. If the action was renamed or the file moved, " @@ -114,38 +194,70 @@ def pr_title_types() -> set[str]: def main() -> int: - cz = commitizen_types() - pr = pr_title_types() + cz_types = commitizen_types() + pr_types = pr_title_types() + cz_subject = commitizen_subject_pattern() + pr_subject = pr_title_subject_pattern() + + failed = False # Belt-and-braces safety net: both extractors raise on empty, but guard # against a future refactor that drops the raise (#92). - if not cz or not pr: + if not cz_types or not pr_types: print( "::error::One or both extractors returned empty; sync check cannot " - "proceed. commitizen_types() empty: " - f"{not cz}; pr_title_types() empty: {not pr}." + f"proceed. commitizen_types() empty: {not cz_types}; " + f"pr_title_types() empty: {not pr_types}." ) return 1 - if cz == pr: - print(f"Commit types in sync ({len(cz)} types): {sorted(cz)}") - return 0 + if cz_types == pr_types: + print(f"Commit types in sync ({len(cz_types)} types): {sorted(cz_types)}") + else: + failed = True + print( + "::error::[tool.commitizen].customize.schema_pattern and " + ".github/workflows/pr-title.yml types are out of sync" + ) + for name in sorted(cz_types - pr_types): + print(f"::error:: + in commitizen only: {name!r}") + for name in sorted(pr_types - cz_types): + print(f"::error:: - in pr-title.yml only: {name!r}") + print( + "\nFix: update both the schema_pattern in pyproject.toml AND " + "the `types` list in .github/workflows/pr-title.yml so they " + "contain the same type names. See docs/DEVELOPMENT.md#commit-messages." + ) + + if cz_subject == pr_subject: + if cz_subject: + print(f"Subject-case constraint in sync: {cz_subject!r}") + else: + # Both empty — older shape, before #128's subject-case landed in + # commitizen. Don't fail here; the `Lint PR title` workflow remains + # the single layer if commitizen drops back. Surface as a warning. + print( + "::warning::Both commitizen and pr-title.yml have empty " + "subject-case constraints. Per docs/DEVELOPMENT.md the rule " + "should be enforced at both layers — re-add `(?![A-Z][a-z])` " + "to commitizen's schema_pattern after `:\\s`." + ) + else: + failed = True + print( + "::error::commitizen schema_pattern subject-case constraint " + "and pr-title.yml `subjectPattern` are out of sync" + ) + print(f"::error:: commitizen extracted: {cz_subject!r}") + print(f"::error:: pr-title.yml extracted: {pr_subject!r}") + print( + "\nFix: keep both regexes equivalent after stripping anchors. " + "Commitizen's chunk lives between `:\\s` and `.+` in " + "schema_pattern; pr-title.yml's lives in the `subjectPattern` " + "field stripped of `^` and `.+$`." + ) - print( - "::error::[tool.commitizen].customize.schema_pattern and " - ".github/workflows/pr-title.yml types are out of sync" - ) - for name in sorted(cz - pr): - print(f"::error:: + in commitizen only: {name!r}") - for name in sorted(pr - cz): - print(f"::error:: - in pr-title.yml only: {name!r}") - print( - "\nFix: update both the schema_pattern in pyproject.toml AND " - "the `types` list in .github/workflows/pr-title.yml so they " - "contain the same type names. See docs/DEVELOPMENT.md#commit-messages " - "for the current allowed list." - ) - return 1 + return 1 if failed else 0 if __name__ == "__main__": diff --git a/pyproject.toml b/pyproject.toml index 1f84568..ab267f3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "harness-python-react" -version = "0.2.2" +version = "0.2.3" description = "Production-quality LLM-driven coding harness — Python (FastAPI) backend, Vite + React + TypeScript frontend." readme = "README.md" requires-python = ">=3.14" @@ -165,7 +165,7 @@ name = "cz_customize" [tool.commitizen.customize] schema = "(): " -schema_pattern = '^(feat|fix|docs|test|refactor|chore|release)(\([\w\-]+\))?!?:\s.+' +schema_pattern = '^(feat|fix|docs|test|refactor|chore|release)(\([\w\-]+\))?!?:\s++(?![A-Z][a-z]).+' bump_pattern = '^(feat|fix|refactor)' bump_map = {feat = "MINOR", fix = "PATCH", refactor = "PATCH", chore = "PATCH", docs = "PATCH", test = "PATCH"} example = "feat: add an example feature" diff --git a/tests/test_check_commit_types_subject.py b/tests/test_check_commit_types_subject.py new file mode 100644 index 0000000..e1a8f11 --- /dev/null +++ b/tests/test_check_commit_types_subject.py @@ -0,0 +1,250 @@ +"""Tests for the subject-case sync extension to ``check_commit_types.py``. + +#128 added a second axis to the existing commit-type sync gate: the +subject-case constraint must match between commitizen's ``schema_pattern`` +and pr-title.yml's ``subjectPattern``. Tests for the type-allowlist axis +(#91 / #92) live in ``test_check_commit_types.py``; this sibling file +isolates the subject-pattern tests so neither file approaches the +300-line cap. +""" + +from __future__ import annotations + +import importlib.util +import re +from pathlib import Path +from typing import TYPE_CHECKING +from unittest.mock import patch + +if TYPE_CHECKING: + from types import ModuleType + + import pytest + + +def _load_script() -> ModuleType: + script_path = ( + Path(__file__).parent.parent / ".github" / "scripts" / "check_commit_types.py" + ) + spec = importlib.util.spec_from_file_location("check_commit_types", script_path) + assert spec is not None + assert spec.loader is not None + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +cct = _load_script() + + +# --------------------------------------------------------------------------- +# commitizen_subject_pattern() — extracts the lookahead chunk from +# pyproject.toml's schema_pattern between `:\s` and the trailing `.+`. +# --------------------------------------------------------------------------- + + +class TestCommitizenSubjectExtraction: + """commitizen_subject_pattern() extracts the right chunk from pyproject.""" + + def test_extracts_negative_lookahead_when_present(self, tmp_path: Path) -> None: + pyproject = tmp_path / "pyproject.toml" + pyproject.write_text( + "[tool.commitizen.customize]\n" + "schema_pattern = " + r"'^(feat|fix)(\([\w\-]+\))?!?:\s(?![A-Z][a-z]).+'" + "\n", + encoding="utf-8", + ) + with patch.object(cct, "PYPROJECT", pyproject): + assert cct.commitizen_subject_pattern() == "(?![A-Z][a-z])" + + def test_returns_empty_when_no_subject_constraint(self, tmp_path: Path) -> None: + """Pre-#128 schema shape (just `.+` after `:\\s`) returns empty.""" + pyproject = tmp_path / "pyproject.toml" + pyproject.write_text( + "[tool.commitizen.customize]\n" + "schema_pattern = " + r"'^(feat|fix)(\([\w\-]+\))?!?:\s.+'" + "\n", + encoding="utf-8", + ) + with patch.object(cct, "PYPROJECT", pyproject): + assert cct.commitizen_subject_pattern() == "" + + def test_returns_empty_when_schema_absent(self, tmp_path: Path) -> None: + pyproject = tmp_path / "pyproject.toml" + pyproject.write_text("[tool.other]\nfoo = 1\n", encoding="utf-8") + with patch.object(cct, "PYPROJECT", pyproject): + assert cct.commitizen_subject_pattern() == "" + + +# --------------------------------------------------------------------------- +# pr_title_subject_pattern() — extracts the YAML subjectPattern field and +# strips `^` / `.+$` anchors so the comparison with commitizen normalises. +# --------------------------------------------------------------------------- + + +class TestPrTitleSubjectExtraction: + """pr_title_subject_pattern() parses YAML and strips anchors.""" + + def test_extracts_and_strips_anchors(self, tmp_path: Path) -> None: + yml = tmp_path / "pr-title.yml" + yml.write_text( + "jobs:\n" + " lint:\n" + " steps:\n" + " - uses: amannn/action-semantic-pull-request@abc\n" + " with:\n" + " types: |\n" + " feat\n" + " subjectPattern: '^(?![A-Z][a-z]).+$'\n", + encoding="utf-8", + ) + with patch.object(cct, "PR_TITLE_YML", yml): + assert cct.pr_title_subject_pattern() == "(?![A-Z][a-z])" + + def test_returns_empty_when_field_absent(self, tmp_path: Path) -> None: + yml = tmp_path / "pr-title.yml" + yml.write_text( + "jobs:\n" + " lint:\n" + " steps:\n" + " - uses: amannn/action-semantic-pull-request@abc\n" + " with:\n" + " types: |\n" + " feat\n", + encoding="utf-8", + ) + with patch.object(cct, "PR_TITLE_YML", yml): + assert cct.pr_title_subject_pattern() == "" + + +# --------------------------------------------------------------------------- +# main() with subject-pattern axis +# --------------------------------------------------------------------------- + + +def _setup_both( + tmp_path: Path, + cz_subject: str, + pr_subject: str, +) -> tuple[Path, Path]: + """Stub PYPROJECT + PR_TITLE_YML with the given subject-case patterns.""" + cz_extra = cz_subject if cz_subject else "" + cz_pattern = ( + r"^(feat|fix|docs|test|refactor|chore|release)(\([\w\-]+\))?!?:\s" + f"{cz_extra}.+" + ) + pyproject = tmp_path / "pyproject.toml" + pyproject.write_text( + f"[tool.commitizen.customize]\nschema_pattern = '{cz_pattern}'\n", + encoding="utf-8", + ) + pr_field = f" subjectPattern: '^{pr_subject}.+$'\n" if pr_subject else "" + yml = tmp_path / "pr-title.yml" + yml.write_text( + "jobs:\n" + " lint:\n" + " steps:\n" + " - uses: amannn/action-semantic-pull-request@abc\n" + " with:\n" + " types: |\n" + " feat\n fix\n docs\n test\n" + " refactor\n chore\n release\n" + f"{pr_field}", + encoding="utf-8", + ) + return pyproject, yml + + +def test_main_subjects_match_passes( + tmp_path: Path, capsys: pytest.CaptureFixture[str] +) -> None: + pyproject, yml = _setup_both(tmp_path, "(?![A-Z][a-z])", "(?![A-Z][a-z])") + with ( + patch.object(cct, "PYPROJECT", pyproject), + patch.object(cct, "PR_TITLE_YML", yml), + ): + assert cct.main() == 0 + out = capsys.readouterr().out + assert "Subject-case constraint in sync" in out + + +def test_main_subject_drift_fails( + tmp_path: Path, capsys: pytest.CaptureFixture[str] +) -> None: + """commitizen has the constraint, pr-title.yml doesn't → fail.""" + pyproject, yml = _setup_both(tmp_path, "(?![A-Z][a-z])", "") + with ( + patch.object(cct, "PYPROJECT", pyproject), + patch.object(cct, "PR_TITLE_YML", yml), + ): + assert cct.main() == 1 + err = capsys.readouterr().out + assert "subject-case constraint" in err + assert "out of sync" in err + + +def test_main_both_subjects_empty_warns_not_fails( + tmp_path: Path, capsys: pytest.CaptureFixture[str] +) -> None: + """Both empty (pre-#128 shape) → warn, don't block.""" + pyproject, yml = _setup_both(tmp_path, "", "") + with ( + patch.object(cct, "PYPROJECT", pyproject), + patch.object(cct, "PR_TITLE_YML", yml), + ): + assert cct.main() == 0 + out = capsys.readouterr().out + assert "::warning::" in out + assert "subject-case" in out + + +# --------------------------------------------------------------------------- +# #154 — schema rejects double-space-then-Title-Case (`feat: Add thing`) +# --------------------------------------------------------------------------- +# +# The schema_pattern landed in #128 used `:\s` (single whitespace), which the +# negative lookahead `(?![A-Z][a-z])` then anchors to the *first* non-space +# character. With `\s` consuming exactly one space, `feat: Add thing` (two +# spaces) bypassed the constraint: `\s` ate the first space, the lookahead +# saw the second space (not `[A-Z]`), succeeded, and `.+` swallowed the +# Title-Case subject. #154 widens to `\s+` so the lookahead anchors to the +# first non-whitespace character regardless of run length. +# +# These tests exercise the LIVE pyproject.toml schema_pattern (not a fixture) +# so a future regression that drops back to `\s` fails here. + + +def _live_schema_pattern() -> re.Pattern[str]: + import tomllib # local import — stdlib, only this test class needs it + + data = tomllib.loads(cct.PYPROJECT.read_text(encoding="utf-8")) + schema: str = data["tool"]["commitizen"]["customize"]["schema_pattern"] + return re.compile(schema) + + +class TestSchemaRejectsTitleCaseRegardlessOfSpaceCount: + """The negative lookahead must anchor to the first non-whitespace char.""" + + def test_single_space_lowercase_subject_passes(self) -> None: + assert _live_schema_pattern().fullmatch("feat: add thing") + + def test_single_space_title_case_subject_fails(self) -> None: + assert not _live_schema_pattern().fullmatch("feat: Add thing") + + def test_double_space_lowercase_subject_passes(self) -> None: + """Liberal on input shape — multi-space after `:` is unusual but not banned.""" + assert _live_schema_pattern().fullmatch("feat: add thing") + + def test_double_space_title_case_subject_fails(self) -> None: + """The #154 regression — pre-fix this passed silently.""" + assert not _live_schema_pattern().fullmatch("feat: Add thing") + + def test_triple_space_title_case_subject_fails(self) -> None: + """Liberal `\\s+` keeps anchoring regardless of run length.""" + assert not _live_schema_pattern().fullmatch("feat: Add thing") + + def test_subject_starting_with_uppercase_acronym_passes(self) -> None: + """`CI failure` is two-uppercase-letters — lookahead `[A-Z][a-z]` ignores it.""" + assert _live_schema_pattern().fullmatch("feat: CI failure") diff --git a/uv.lock b/uv.lock index e4cbcb4..5edabc5 100644 --- a/uv.lock +++ b/uv.lock @@ -328,7 +328,7 @@ wheels = [ [[package]] name = "harness-python-react" -version = "0.2.2" +version = "0.2.3" source = { virtual = "." } dependencies = [ { name = "fastapi" },