diff --git a/src/clayde/config.py b/src/clayde/config.py index 72e7be3..60a2b66 100644 --- a/src/clayde/config.py +++ b/src/clayde/config.py @@ -44,6 +44,7 @@ def effective_git_name(self) -> str: fs_reviewer: str = "max-tet" fs_parallelism: int = 1 fs_loop_interval_s: int = 120 + fs_max_branch_commits: int = 20 # Pebble webhook pebble_enabled: bool = False diff --git a/src/clayde/freeshard/loop.py b/src/clayde/freeshard/loop.py index 81ac2f8..e662453 100644 --- a/src/clayde/freeshard/loop.py +++ b/src/clayde/freeshard/loop.py @@ -15,11 +15,13 @@ from clayde.freeshard.repos import is_non_core from clayde.freeshard.steps import ( run_ci_fix, + run_escalate, run_handoff, run_implement, run_manual_verify, ) from clayde.github import ( + count_branch_commits, count_fix_commits, find_open_pr, get_assigned_issues, @@ -67,6 +69,8 @@ def _route( ref = f"{owner}/{repo}#{number}" if phase is Phase.IMPLEMENT: run_implement(g, owner, repo, number, default_branch) + elif phase is Phase.ESCALATE: + run_escalate(g, owner, repo, number, settings.fs_reviewer) elif phase is Phase.CI_FIX: ci_log = _ci_failure_summary(g, owner, repo, head) if head else "CI failed" run_ci_fix(g, owner, repo, number, default_branch, ci_log) @@ -117,6 +121,8 @@ def _process_issue(g, settings, issue) -> bool: is_rev = is_reviewer_assigned(g, owner, repo, pr_number, settings.fs_reviewer) fix_attempts = count_fix_commits(g, owner, repo, branch, default_branch) + attempts = count_branch_commits(g, owner, repo, branch, default_branch) + try: ci_required = has_ci_workflows(g, owner, repo) except Exception: @@ -131,6 +137,8 @@ def _process_issue(g, settings, issue) -> bool: max_is_reviewer=bool(is_rev), fix_attempts=fix_attempts, ci_required=ci_required, + attempts=attempts, + attempt_cap=settings.fs_max_branch_commits, ) log.info("%s — phase=%s", ref, phase) diff --git a/src/clayde/freeshard/phase.py b/src/clayde/freeshard/phase.py index 7ae2be1..c6c3a45 100644 --- a/src/clayde/freeshard/phase.py +++ b/src/clayde/freeshard/phase.py @@ -18,19 +18,21 @@ class Phase(StrEnum): MANUAL_VERIFY = "manual_verify" # PR open, CI failed, fix cap reached HANDOFF = "handoff" # PR open, CI green, Max not yet reviewer AWAITING_MERGE = "awaiting_merge" # PR open, CI green, Max is reviewer + ESCALATE = "escalate" # no open PR, branch-commit cap reached def derive_phase(*, pr_open: bool, ci_status: str | None, max_is_reviewer: bool, fix_attempts: int, fix_cap: int = 2, - ci_required: bool = True) -> Phase: + ci_required: bool = True, + attempts: int = 0, attempt_cap: int = 20) -> Phase: if not pr_open: - return Phase.IMPLEMENT + return Phase.ESCALATE if attempts >= attempt_cap else Phase.IMPLEMENT if not ci_required: return Phase.AWAITING_MERGE if max_is_reviewer else Phase.HANDOFF if ci_status in _CI_PENDING: return Phase.CI_WAIT if ci_status in _CI_FAILED: - return Phase.MANUAL_VERIFY if fix_attempts >= fix_cap else Phase.CI_FIX + return Phase.MANUAL_VERIFY if (fix_attempts >= fix_cap or attempts >= attempt_cap) else Phase.CI_FIX if ci_status in _CI_SUCCESS: return Phase.AWAITING_MERGE if max_is_reviewer else Phase.HANDOFF # Unrecognized status: never assume green. Treat as still pending — a diff --git a/src/clayde/freeshard/steps.py b/src/clayde/freeshard/steps.py index 73f7a39..1d7d5ae 100644 --- a/src/clayde/freeshard/steps.py +++ b/src/clayde/freeshard/steps.py @@ -71,6 +71,7 @@ def run_implement(g, owner: str, repo: str, number: int, default_branch: str) -> "Local verify failed for %s/%s#%d — leaving WIP on branch: %s", owner, repo, number, log_tail[-200:], ) + _push_branch(worktree, branch) return _push_branch(worktree, branch) @@ -121,6 +122,23 @@ def run_handoff(g, owner: str, repo: str, number: int, pr_number: int, reviewer: remove_worktree(owner, repo, number) +def run_escalate(g, owner: str, repo: str, number: int, reviewer: str) -> None: + """Add manual-verify-required label on issue, assign reviewer, post comment, remove worktree. + + Called when the branch-commit cap is reached with no open PR — auto-implement + gave up. Idempotent: if the label is already present, returns immediately. + """ + repo_obj = g.get_repo(f"{owner}/{repo}") + issue_obj = repo_obj.get_issue(number) + existing_labels = {lbl.name for lbl in issue_obj.get_labels()} + if "manual-verify-required" in existing_labels: + return + issue_obj.add_to_labels("manual-verify-required") + issue_obj.add_to_assignees(reviewer) + post_comment(g, owner, repo, number, "Auto-implement gave up after repeated failed attempts — needs your hands") + remove_worktree(owner, repo, number) + + def run_manual_verify(g, owner: str, repo: str, number: int, pr_number: int, reviewer: str) -> None: """Add manual-verify label, assign reviewer, post comment, remove worktree. diff --git a/src/clayde/github.py b/src/clayde/github.py index 69c89b6..83e8e2f 100644 --- a/src/clayde/github.py +++ b/src/clayde/github.py @@ -211,11 +211,18 @@ def get_ci_status(g: Github, owner: str, repo: str, ref: str) -> str | None: commit = _get_repo(g, owner, repo).get_commit(ref) states: list[str] = [] - combined = commit.get_combined_status().state # success|failure|pending|error - if combined and combined != "pending": - states.append("failure" if combined in ("failure", "error") else "success") - elif combined == "pending": - states.append("pending") + # Legacy commit-status API defaults state to "pending" when a repo has zero + # statuses (modern repos use check-runs only). Only trust it when statuses + # actually exist — otherwise it poisons a green check-runs result to pending. + combined_status = commit.get_combined_status() + if combined_status.total_count > 0: + combined = combined_status.state # success|failure|pending|error + if combined in ("failure", "error"): + states.append("failure") + elif combined == "pending": + states.append("pending") + else: + states.append("success") for run in commit.get_check_runs(): if run.status != "completed": @@ -251,6 +258,18 @@ def count_fix_commits(g: Github, owner: str, repo: str, branch: str, default_bra return sum(1 for c in commits if c.commit.message.startswith("fix(ci):")) +def count_branch_commits(g: Github, owner: str, repo: str, branch: str, default_branch: str) -> int: + """Return total number of commits unique to branch vs default_branch. + + Uses the compare API so only branch-unique commits are counted. Returns 0 + on GithubException (branch missing, branch equal to base, etc.). + """ + try: + return len(_get_repo(g, owner, repo).compare(default_branch, branch).commits) + except GithubException: + return 0 + + def has_ci_workflows(g: Github, owner: str, repo: str) -> bool: """Return True if the repo has any file under .github/workflows/. diff --git a/tests/freeshard/test_github_obs.py b/tests/freeshard/test_github_obs.py index 53ad77f..3ae9a13 100644 --- a/tests/freeshard/test_github_obs.py +++ b/tests/freeshard/test_github_obs.py @@ -7,6 +7,7 @@ def test_get_ci_status_maps_combined_state(): g = MagicMock() commit = g.get_repo.return_value.get_commit.return_value commit.get_combined_status.return_value.state = "success" + commit.get_combined_status.return_value.total_count = 1 commit.get_check_runs.return_value = [] assert get_ci_status(g, "o", "r", "abc") == "success" @@ -42,6 +43,7 @@ def test_get_ci_status_failure_dominates_success(): g = MagicMock() commit = g.get_repo.return_value.get_commit.return_value commit.get_combined_status.return_value.state = "success" + commit.get_combined_status.return_value.total_count = 1 run = MagicMock() run.status = "completed" run.conclusion = "failure" @@ -54,6 +56,7 @@ def test_get_ci_status_error_normalized_to_failure(): g = MagicMock() commit = g.get_repo.return_value.get_commit.return_value commit.get_combined_status.return_value.state = "error" + commit.get_combined_status.return_value.total_count = 1 commit.get_check_runs.return_value = [] assert get_ci_status(g, "o", "r", "abc") == "failure" @@ -63,6 +66,7 @@ def test_get_ci_status_pending_when_incomplete_check_run(): g = MagicMock() commit = g.get_repo.return_value.get_commit.return_value commit.get_combined_status.return_value.state = "success" + commit.get_combined_status.return_value.total_count = 1 run = MagicMock() run.status = "in_progress" run.conclusion = None @@ -94,3 +98,32 @@ def test_is_reviewer_assigned_false_neither_request_nor_review(): rev.user.login = "another" pr.get_reviews.return_value = [rev] assert not is_reviewer_assigned(g, "o", "r", 5, "maxtepkasper") + + +def test_get_ci_status_ignores_empty_combined_status(): + """A repo with zero legacy commit-statuses (total_count=0) reports combined + state 'pending' by default; green check-runs must NOT be poisoned to pending.""" + from unittest.mock import MagicMock + from clayde.github import get_ci_status + g = MagicMock() + commit = g.get_repo.return_value.get_commit.return_value + cs = commit.get_combined_status.return_value + cs.total_count = 0 + cs.state = "pending" + build = MagicMock(); build.status = "completed"; build.conclusion = "success" + deploy = MagicMock(); deploy.status = "completed"; deploy.conclusion = "skipped" + commit.get_check_runs.return_value = [build, deploy] + assert get_ci_status(g, "o", "r", "sha") == "success" + + +def test_get_ci_status_respects_real_combined_pending(): + """When commit-statuses DO exist and are pending, still report pending.""" + from unittest.mock import MagicMock + from clayde.github import get_ci_status + g = MagicMock() + commit = g.get_repo.return_value.get_commit.return_value + cs = commit.get_combined_status.return_value + cs.total_count = 2 + cs.state = "pending" + commit.get_check_runs.return_value = [] + assert get_ci_status(g, "o", "r", "sha") == "pending" diff --git a/tests/freeshard/test_loop.py b/tests/freeshard/test_loop.py index 842130d..45127e1 100644 --- a/tests/freeshard/test_loop.py +++ b/tests/freeshard/test_loop.py @@ -21,6 +21,7 @@ def _issue(url, number=7): def _settings(reviewer="max"): s = MagicMock() s.fs_reviewer = reviewer + s.fs_max_branch_commits = 20 return s @@ -76,8 +77,10 @@ def _patched_tick(phase, *, pr_url=None): "clayde.freeshard.loop.get_ci_status": MagicMock(return_value="failure"), "clayde.freeshard.loop.is_reviewer_assigned": MagicMock(return_value=False), "clayde.freeshard.loop.count_fix_commits": MagicMock(return_value=0), + "clayde.freeshard.loop.count_branch_commits": MagicMock(return_value=0), "clayde.freeshard.loop.derive_phase": MagicMock(return_value=phase), "clayde.freeshard.loop.run_implement": MagicMock(), + "clayde.freeshard.loop.run_escalate": MagicMock(), "clayde.freeshard.loop.run_ci_fix": MagicMock(), "clayde.freeshard.loop.run_handoff": MagicMock(), "clayde.freeshard.loop.run_manual_verify": MagicMock(), @@ -106,6 +109,24 @@ def test_phase_routes_to_correct_handler(phase, handler_key, pr_url, call_args): patches[handler_key].assert_called_once_with(g, *call_args) +@patch("clayde.freeshard.loop.run_escalate") +@patch("clayde.freeshard.loop.count_branch_commits", return_value=20) +@patch("clayde.freeshard.loop.derive_phase", return_value=Phase.ESCALATE) +@patch("clayde.freeshard.loop.is_blocked", return_value=False) +@patch("clayde.freeshard.loop.find_open_pr", return_value=None) +@patch("clayde.freeshard.loop.get_default_branch", return_value="main") +@patch("clayde.freeshard.loop.get_assigned_issues") +def test_tick_routes_escalate(mock_assigned, _gdb, _fop, _ib, _dp, _cbc, mock_escalate): + """No-PR card with attempts >= cap routes to run_escalate.""" + mock_assigned.return_value = [_issue(APP_ISSUE_URL)] + s = _settings() + s.fs_max_branch_commits = 20 + g = MagicMock() + n = loop.tick(g, s) + assert n == 1 + mock_escalate.assert_called_once_with(g, "FreeshardBase", "app-repository", 7, "max") + + @pytest.mark.parametrize("phase", [Phase.CI_WAIT, Phase.AWAITING_MERGE]) def test_noop_phases_do_not_call_handlers(phase): n, patches, _g = _patched_tick(phase, pr_url=APP_PR_URL) diff --git a/tests/freeshard/test_phase.py b/tests/freeshard/test_phase.py index 7715322..ab65a69 100644 --- a/tests/freeshard/test_phase.py +++ b/tests/freeshard/test_phase.py @@ -65,3 +65,58 @@ def test_no_ci_required_no_pr_still_implement(): pr_open=False, ci_status=None, max_is_reviewer=False, fix_attempts=0, ci_required=False, ) == Phase.IMPLEMENT + + +# --------------------------------------------------------------------------- +# Attempt cap: ESCALATE when branch-commit count hits the cap +# --------------------------------------------------------------------------- + +def test_no_pr_attempts_at_cap_means_escalate(): + assert derive_phase( + pr_open=False, ci_status=None, max_is_reviewer=False, fix_attempts=0, + attempts=20, attempt_cap=20, + ) == Phase.ESCALATE + + +def test_no_pr_attempts_below_cap_means_implement(): + assert derive_phase( + pr_open=False, ci_status=None, max_is_reviewer=False, fix_attempts=0, + attempts=19, attempt_cap=20, + ) == Phase.IMPLEMENT + + +def test_pr_ci_failed_attempts_at_cap_means_manual_verify(): + """Backstop: attempt cap overrides fix_cap for CI_FIX → MANUAL_VERIFY.""" + assert derive_phase( + pr_open=True, ci_status="failure", max_is_reviewer=False, + fix_attempts=0, fix_cap=2, + attempts=20, attempt_cap=20, + ) == Phase.MANUAL_VERIFY + + +def test_ci_success_at_cap_still_handoff(): + """Cap must not override terminal/waiting phases — CI green → HANDOFF regardless.""" + assert derive_phase( + pr_open=True, ci_status="success", max_is_reviewer=False, + fix_attempts=0, attempts=20, attempt_cap=20, + ) == Phase.HANDOFF + + +def test_ci_success_at_cap_with_reviewer_still_awaiting_merge(): + assert derive_phase( + pr_open=True, ci_status="success", max_is_reviewer=True, + fix_attempts=0, attempts=20, attempt_cap=20, + ) == Phase.AWAITING_MERGE + + +def test_ci_pending_at_cap_still_ci_wait(): + """attempts cap must NOT override a pending CI — still wait.""" + assert derive_phase(pr_open=True, ci_status="queued", max_is_reviewer=False, + fix_attempts=0, attempts=20, attempt_cap=20) == Phase.CI_WAIT + + +def test_ci_not_required_at_cap_still_handoff(): + """attempts cap must NOT override the no-CI early handoff.""" + assert derive_phase(pr_open=True, ci_status=None, max_is_reviewer=False, + fix_attempts=0, ci_required=False, + attempts=20, attempt_cap=20) == Phase.HANDOFF diff --git a/tests/freeshard/test_steps.py b/tests/freeshard/test_steps.py index 1dfe0d7..636f1fb 100644 --- a/tests/freeshard/test_steps.py +++ b/tests/freeshard/test_steps.py @@ -58,7 +58,7 @@ def test_implement_no_pr_when_verify_red( mock_wt.return_value = Path("/tmp/wt") steps.run_implement(MagicMock(), "o", "r", 7, "main") mock_pr.assert_not_called() - mock_push.assert_not_called() + mock_push.assert_called_once() @patch("clayde.freeshard.steps.local_verify", return_value=(True, "")) @@ -248,3 +248,44 @@ def test_manual_verify_is_idempotent(mock_apr, mock_pc, mock_rw): mock_pc.assert_not_called() mock_apr.assert_not_called() issue_obj.add_to_labels.assert_not_called() + + +# --------------------------------------------------------------------------- +# run_escalate +# --------------------------------------------------------------------------- + +@patch("clayde.freeshard.steps.remove_worktree") +@patch("clayde.freeshard.steps.post_comment") +def test_escalate_adds_label_assignee_comment_removes_worktree(mock_pc, mock_rw): + g = MagicMock() + issue_obj = MagicMock() + issue_obj.get_labels.return_value = [] + g.get_repo.return_value.get_issue.return_value = issue_obj + + steps.run_escalate(g, "o", "r", 7, "reviewer") + + issue_obj.add_to_labels.assert_called_once_with("manual-verify-required") + issue_obj.add_to_assignees.assert_called_once_with("reviewer") + mock_pc.assert_called_once() + comment_body = mock_pc.call_args[0][4] + assert "needs your hands" in comment_body + mock_rw.assert_called_once() + + +@patch("clayde.freeshard.steps.remove_worktree") +@patch("clayde.freeshard.steps.post_comment") +def test_escalate_is_idempotent(mock_pc, mock_rw): + """Second call when label already present must be a no-op.""" + g = MagicMock() + existing_label = MagicMock() + existing_label.name = "manual-verify-required" + issue_obj = MagicMock() + issue_obj.get_labels.return_value = [existing_label] + g.get_repo.return_value.get_issue.return_value = issue_obj + + steps.run_escalate(g, "o", "r", 7, "reviewer") + + issue_obj.add_to_labels.assert_not_called() + issue_obj.add_to_assignees.assert_not_called() + mock_pc.assert_not_called() + mock_rw.assert_not_called() diff --git a/tests/test_github.py b/tests/test_github.py index d08069a..53c7361 100644 --- a/tests/test_github.py +++ b/tests/test_github.py @@ -8,6 +8,7 @@ from clayde.github import ( _has_open_parent_issue, add_pr_reviewer, + count_branch_commits, create_pull_request, edit_comment, fetch_comment, @@ -293,6 +294,22 @@ def test_returns_pull_request_object(self): assert result is mock_pr +class TestCountBranchCommits: + def test_returns_commit_count(self): + g = MagicMock() + mock_commits = [MagicMock(), MagicMock(), MagicMock()] + g.get_repo.return_value.compare.return_value.commits = mock_commits + result = count_branch_commits(g, "alice", "repo", "clayde/issue-5", "main") + g.get_repo.return_value.compare.assert_called_once_with("main", "clayde/issue-5") + assert result == 3 + + def test_returns_zero_on_github_exception(self): + g = MagicMock() + g.get_repo.return_value.compare.side_effect = GithubException(404, "Not Found", None) + result = count_branch_commits(g, "alice", "repo", "clayde/issue-5", "main") + assert result == 0 + + class TestHasCiWorkflows: def test_returns_true_when_workflows_present(self): g = MagicMock()