diff --git a/.github/aw/actions-lock.json b/.github/aw/actions-lock.json index 29f0fd6c..f53dac8d 100644 --- a/.github/aw/actions-lock.json +++ b/.github/aw/actions-lock.json @@ -35,11 +35,6 @@ "version": "v7", "sha": "043fb46d1a93c77aae656e7c1c64a875d1fc6a0a" }, - "github/gh-aw-actions/setup-cli@v0.74.4": { - "repo": "github/gh-aw-actions/setup-cli", - "version": "v0.74.4", - "sha": "d3abfe96a194bce3a523ed2093ddedd5704cdf62" - }, "github/gh-aw-actions/setup@v0.74.4": { "repo": "github/gh-aw-actions/setup", "version": "v0.74.4", diff --git a/.github/workflows/crane.md b/.github/workflows/crane.md index cc65e9f5..795c11b7 100644 --- a/.github/workflows/crane.md +++ b/.github/workflows/crane.md @@ -247,7 +247,7 @@ The pre-step fetches open issues with the `crane-migration` label via the GitHub When a migration is issue-based, `/tmp/gh-aw/crane.json` includes: - **`selected_issue`**: The issue number (e.g., `42`) if the selected migration came from an issue, or `null` if it came from a file. - **`issue_migrations`**: A mapping of migration name -> issue number for all issue-based migrations found. -- **`stale_completed_state`**: Issue-based migrations whose completion state is not trustworthy. This includes active issues that still have `crane-migration` even though repo-memory says `Completed: true`, and completed-label issues whose current PR-head completion gate did not pass. +- **`stale_completed_state`**: Issue-based migrations whose completion state is not trustworthy. This includes active issues that still have `crane-migration` even though repo-memory says `Completed: true`, and completed-label issues whose current up-to-date PR-head completion gate did not pass. ### Reading Migrations @@ -261,7 +261,7 @@ The pre-step has already determined which migration to run. Read `/tmp/gh-aw/cra - **`selected_strategy`**: The `strategy` value from the migration's frontmatter -- one of `"in-place"`, `"greenfield"`, or `"auto"`. If `"auto"`, the agent must pick on the first iteration and write the chosen strategy back into the state file's Machine State table. - **`state_file_size_bytes`** / **`state_file_max_bytes`**: For rolling-compaction decisions (see [Update Rules](#update-rules)). - **`issue_migrations`**: A mapping of migration name -> issue number for all discovered issue-based migrations. -- **`stale_completed_state`**: A list of issue-based migrations where repo-memory still says `Completed: true`, but the issue label state or PR-head completion gate says the migration is not safely complete; treat this as stale memory, not as permission to finish. +- **`stale_completed_state`**: A list of issue-based migrations where repo-memory still says `Completed: true`, but the issue label state or up-to-date PR-head completion gate says the migration is not safely complete; treat this as stale memory, not as permission to finish. - **`deferred`**: Other migrations that were due but will be handled in future runs. - **`unconfigured`**: Migrations that still have the sentinel or placeholder content. - **`skipped`**: Migrations not due yet based on their per-migration schedule, or completed/paused. @@ -277,7 +277,7 @@ If `selected` is not null: 4. Read the state file `{selected}.md` from the repo-memory folder. This contains the Machine State table, the Migration Plan, lessons, blockers, and iteration history. 5. If `selected_issue` is not null, also read the issue comments for any human steering input. 6. If `selected` appears in `stale_completed_state`, ignore any pre-existing `Completed: true`, `Completed Reason`, target-satisfying `best_metric`, or `crane-completed` label as a completion signal. Restore the issue to active migration state by ensuring `crane-migration` is present and `crane-completed` is absent unless the deterministic completion gate passes later in this run. First run the current verification contract and only enter the completion-candidate path after a fresh accepted iteration satisfies the target metric. -7. Before making code changes, inspect the state file's `Completion Candidate` field. If it is `true`, run the deterministic completion gate in [Halting Condition](#halting-condition). Only finalize completion if the current PR head checks pass. If the gate is failing, fix the failing PR checks instead of starting unrelated migration work. If the gate is pending or unavailable, leave the issue active and update `Completion Gate Status`. +7. Before making code changes, inspect the state file's `Completion Candidate` field. If it is `true`, run the deterministic completion gate in [Halting Condition](#halting-condition). Only finalize completion if the current PR head contains the current base branch SHA and the current PR head checks pass. If the gate is failing, stale, behind, or diverged, fix the PR branch/checks instead of starting unrelated migration work. If the gate is pending or unavailable, leave the issue active and update `Completion Gate Status`. ## Multiple Migrations @@ -617,7 +617,7 @@ If `status == "failure"`, **fix and retry -- do not revert, do not accept**: - Update **[docs] Lessons Learned** if this iteration revealed something new (e.g. a bridging trick, a parity surprise, a perf trap). - Update **[scope] Future Work** if this iteration opened new threads. 5. **Update the migration issue**: edit the status comment and post a per-iteration comment using the same shared accepted iteration summary. -6. **Check halting condition** (see [Halting Condition](#halting-condition)): if `target-metric` is set, compare the new `best_metric` from this accepted iteration against it. For `higher` direction, the target is reached when `best_metric >= target-metric`. Reaching the target metric does **not** complete the migration in this run. It creates a completion candidate: set `Completion Candidate: true`, set `Completion Gate: pr-head-checks`, set `Completion Gate Status: pending`, keep `Completed: false`, keep `Completed Reason: --`, and leave the `crane-migration` label on the issue. Completion is finalized only by a later run after the pushed PR head's deterministic checks are observed green. +6. **Check halting condition** (see [Halting Condition](#halting-condition)): if `target-metric` is set, compare the new `best_metric` from this accepted iteration against it. For `higher` direction, the target is reached when `best_metric >= target-metric`. Reaching the target metric does **not** complete the migration in this run. It creates a completion candidate: set `Completion Candidate: true`, set `Completion Gate: up-to-date-pr-head-checks`, set `Completion Gate Status: pending`, keep `Completed: false`, keep `Completed Reason: --`, and leave the `crane-migration` label on the issue. Completion is finalized only by a later run after the pushed PR head contains the current base SHA and its deterministic checks are observed green. **If the score did not improve**: 1. Discard the code changes (do not commit them to the long-running branch). @@ -727,7 +727,7 @@ After **every iteration** (accepted, rejected, or error), post a **new comment** ## Halting Condition -Migrations are usually **goal-oriented** -- you want to finish. Set `target-metric: 1.0` in the frontmatter to nominate a migration for completion once the health score reaches 1.0 (which, with the recommended `correctness x progress` convention, means "fully migrated and verified"). The metric is necessary but not sufficient: final completion always requires a deterministic PR-head completion gate. +Migrations are usually **goal-oriented** -- you want to finish. Set `target-metric: 1.0` in the frontmatter to nominate a migration for completion once the health score reaches 1.0 (which, with the recommended `correctness x progress` convention, means "fully migrated and verified"). The metric is necessary but not sufficient: final completion always requires a deterministic up-to-date PR-head completion gate. ### How It Works @@ -737,21 +737,22 @@ Migrations are usually **goal-oriented** -- you want to finish. Set `target-metr 4. For `lower` direction: the target is reached when `best_metric <= target-metric`. 5. When the target is reached, create a completion candidate instead of completing immediately: - Set `Completion Candidate: true`. - - Set `Completion Gate: pr-head-checks`. + - Set `Completion Gate: up-to-date-pr-head-checks`. - Set `Completion Gate Status: pending`. - Keep `Completed: false` and `Completed Reason: --`. - For issue-based migrations, keep the `crane-migration` label and do not add `crane-completed`. - Update the status comment to Active with a note that the migration is waiting on deterministic PR-head checks. 6. On the next run while `Completion Candidate: true`, run the deterministic completion gate before making code changes: - Resolve the migration PR from `existing_pr` or the state file's `PR` field. - - Query the PR's current head SHA via the GitHub API. + - Query the PR's current head SHA and current base SHA via the GitHub API. + - Query `GET /repos/{owner}/{repo}/compare/{base_sha}...{head_sha}`. The gate is stale unless the compare status is `ahead` or `identical`. If it is `behind`, `diverged`, or unavailable, merge/rebase the current base into the PR branch, push the PR branch, and wait for fresh checks. Do not mark the migration complete from checks produced before the current base SHA was included. - Query check-runs/check-suites for that exact PR head SHA, or use `gh pr checks "$PR" --json name,conclusion,state,startedAt,completedAt`. - - The gate passes only if every check for the current PR head is terminal success. Treat missing checks, pending checks, queued checks, failing checks, cancelled checks, timed-out checks, stale checks, and action-required checks as not passing. + - The gate passes only if every check for the current up-to-date PR head is terminal success. Treat missing checks, pending checks, queued checks, failing checks, cancelled checks, timed-out checks, stale checks, report-mode migration checks, and action-required checks as not passing. - If the gate is pending or missing, leave `Completion Candidate: true`, keep `Completed: false`, ensure `crane-migration` is present, ensure `crane-completed` is absent, set `Completion Gate Status: pending:`, and end without completing. - If the gate fails, leave `Completion Candidate: true`, keep `Completed: false`, ensure `crane-migration` is present, ensure `crane-completed` is absent, set `Completion Gate Status: failing:`, and fix the failing PR checks. 7. Only when the deterministic completion gate passes: - Set `Completed: true` in the Machine State table. - - Set `Completed Reason` to a human-readable message that includes both the target metric and PR-head check evidence (e.g., `target metric 1.0 reached; PR #123 head abc1234 checks passed`). + - Set `Completed Reason` to a human-readable message that includes the target metric, PR head SHA, current base SHA, and check evidence (e.g., `target metric 1.0 reached; PR #123 head abc1234 contains base def5678 and strict checks passed`). - Set `Completion Candidate: false`. - Set `Completion Gate Status: passed:`. - **For issue-based migrations**: remove the `crane-migration` label, add the `crane-completed` label. @@ -759,7 +760,7 @@ Migrations are usually **goal-oriented** -- you want to finish. Set `target-metr - Post a celebratory per-iteration comment: `[+] **Migration complete!** {source} -> {target} finished after {N} iterations.` - The migration will not be selected for future runs. -Do not enter final completion from repo-memory alone. A stored `Completed: true`, old `Completed Reason`, historical `best_metric`, or same-run sandbox score is only evidence about a previous or local run. Final completion requires deterministic evidence from the current PR head after safe outputs have pushed the branch and GitHub Actions has reported the head checks. +Do not enter final completion from repo-memory alone. A stored `Completed: true`, old `Completed Reason`, historical `best_metric`, or same-run sandbox score is only evidence about a previous or local run. Final completion requires deterministic evidence from the current PR head after safe outputs have pushed a branch that contains the current base SHA and GitHub Actions has reported strict checks for that up-to-date head. ### Open-Ended Migrations @@ -810,7 +811,7 @@ When creating or updating a migration's state file, use this structure: | Completed | false | | Completed Reason | -- | | Completion Candidate | false | -| Completion Gate | pr-head-checks | +| Completion Gate | up-to-date-pr-head-checks | | Completion Gate Status | -- | | Consecutive Errors | 0 | | Recent Statuses | -- | @@ -911,7 +912,7 @@ All iterations in reverse chronological order (newest first). | Completed | `true` or `false` | Whether the deterministic completion gate has passed and the migration is final | | Completed Reason | text or `--` | e.g., `target metric 1.0 reached; PR #123 head abc1234 checks passed` | | Completion Candidate | `true` or `false` | Whether the target metric has been reached and the migration is waiting for deterministic PR-head checks | -| Completion Gate | text | Deterministic gate required for final completion. Default: `pr-head-checks` | +| Completion Gate | text | Deterministic gate required for final completion. Default: `up-to-date-pr-head-checks` | | Completion Gate Status | text or `--` | Latest gate result, such as `pending:`, `failing:`, or `passed:` | | Consecutive Errors | integer | Count of consecutive verification failures | | Recent Statuses | comma-separated | Last 10 outcomes: `accepted`, `rejected`, `error`, or `ci-fix-exhausted` | diff --git a/.github/workflows/scripts/crane_scheduler.py b/.github/workflows/scripts/crane_scheduler.py index 2b30fe89..f181cb8d 100644 --- a/.github/workflows/scripts/crane_scheduler.py +++ b/.github/workflows/scripts/crane_scheduler.py @@ -597,7 +597,8 @@ def get_pr_head_check_gate(repo, pr_number, github_token, http_get_json=_http_ge """Return ``(passed, reason)`` for the deterministic PR-head check gate. ``passed`` is: - True - the current PR head has at least one check run and all are success + True - the current PR head contains the current base and all check runs + are success False - the PR exists, but checks are missing, pending, or failing None - the API could not provide enough data to decide """ @@ -615,6 +616,26 @@ def get_pr_head_check_gate(repo, pr_number, github_token, http_get_json=_http_ge head_sha = head.get("sha") if isinstance(head, dict) else None if not head_sha: return None, "pr-head-sha-unavailable" + base = pr_body.get("base") or {} + base_sha = base.get("sha") if isinstance(base, dict) else None + if not base_sha: + return None, "pr-base-sha-unavailable:{}".format(head_sha[:12]) + + compare_url = "https://api.github.com/repos/{}/compare/{}...{}".format( + repo, + base_sha, + head_sha, + ) + compare_body, _ = http_get_json(compare_url, headers) + if not isinstance(compare_body, dict): + return None, "base-compare-unavailable:{}:{}".format(base_sha[:12], head_sha[:12]) + compare_status = compare_body.get("status") + if compare_status not in {"ahead", "identical"}: + return False, "stale-base:{}:{}:base:{}".format( + head_sha[:12], + compare_status or "unknown", + base_sha[:12], + ) check_runs = [] next_url = "https://api.github.com/repos/{}/commits/{}/check-runs?per_page=100".format( diff --git a/tests/unit/test_crane_scheduler.py b/tests/unit/test_crane_scheduler.py index bda7ebee..4db57696 100644 --- a/tests/unit/test_crane_scheduler.py +++ b/tests/unit/test_crane_scheduler.py @@ -213,7 +213,12 @@ def test_pr_head_gate_fails_when_any_check_is_not_success() -> None: def fake_http_get_json(url, _headers, timeout=30): del timeout if url.endswith("/pulls/102"): - return {"head": {"sha": "abcdef1234567890"}}, None + return { + "base": {"sha": "1111111111111111"}, + "head": {"sha": "abcdef1234567890"}, + }, None + if "/compare/1111111111111111...abcdef1234567890" in url: + return {"status": "ahead"}, None if "/check-runs" in url: return { "check_runs": [ @@ -239,7 +244,12 @@ def test_pr_head_gate_passes_only_when_all_checks_succeed() -> None: def fake_http_get_json(url, _headers, timeout=30): del timeout if url.endswith("/pulls/102"): - return {"head": {"sha": "abcdef1234567890"}}, None + return { + "base": {"sha": "1111111111111111"}, + "head": {"sha": "abcdef1234567890"}, + }, None + if "/compare/1111111111111111...abcdef1234567890" in url: + return {"status": "ahead"}, None if "/check-runs" in url: return { "check_runs": [ @@ -258,3 +268,29 @@ def fake_http_get_json(url, _headers, timeout=30): assert passed is True assert reason == "passed:abcdef123456" + + +def test_pr_head_gate_fails_when_pr_head_does_not_contain_current_base() -> None: + def fake_http_get_json(url, _headers, timeout=30): + del timeout + if url.endswith("/pulls/117"): + return { + "base": {"sha": "e96b795d2540d4f991ee0c04cb4cfc67d8d74960"}, + "head": {"sha": "f02847267a544d9700e885f6edebe15f4c4aa406"}, + }, None + if ( + "/compare/e96b795d2540d4f991ee0c04cb4cfc67d8d74960...f02847267a544d9700e885f6edebe15f4c4aa406" + in url + ): + return {"status": "diverged"}, None + raise AssertionError(f"unexpected URL: {url}") + + passed, reason = crane_scheduler.get_pr_head_check_gate( + "githubnext/apm", + 117, + "token", + http_get_json=fake_http_get_json, + ) + + assert passed is False + assert reason == "stale-base:f02847267a54:diverged:base:e96b795d2540" diff --git a/tests/unit/test_crane_workflow_prompt.py b/tests/unit/test_crane_workflow_prompt.py index f7f7c48e..2f48a143 100644 --- a/tests/unit/test_crane_workflow_prompt.py +++ b/tests/unit/test_crane_workflow_prompt.py @@ -48,10 +48,10 @@ def test_crane_completion_is_two_phase_and_pr_head_gated() -> None: assert "Reaching the target metric does **not** complete the migration in this run" in text assert "Completion Candidate: true" in text - assert "Completion Gate: pr-head-checks" in text + assert "Completion Gate: up-to-date-pr-head-checks" in text assert "leave the `crane-migration` label on the issue" in text - assert "current PR head checks pass" in text - assert "every check for the current PR head is terminal success" in text + assert "current PR head contains the current base branch SHA" in text + assert "every check for the current up-to-date PR head is terminal success" in text assert "Completion Gate Status: passed:" in text @@ -59,6 +59,6 @@ def test_crane_state_template_tracks_completion_candidate_gate() -> None: text = _workflow_text() assert "| Completion Candidate | false |" in text - assert "| Completion Gate | pr-head-checks |" in text + assert "| Completion Gate | up-to-date-pr-head-checks |" in text assert "| Completion Gate Status | -- |" in text assert "Whether the target metric has been reached and the migration is waiting" in text