diff --git a/.agents/skills/agents-shipgate/SKILL.md b/.agents/skills/agents-shipgate/SKILL.md index ece7eac2..372a152b 100644 --- a/.agents/skills/agents-shipgate/SKILL.md +++ b/.agents/skills/agents-shipgate/SKILL.md @@ -5,7 +5,7 @@ description: Use when the user wants to add or run Agents Shipgate — the deter # Agents Shipgate -Agents Shipgate is the deterministic merge gate for AI-generated agent capability changes — a local-first, static Tool-Use Readiness review. It reads `shipgate.yaml` plus local tool sources and writes deterministic reports as Markdown, JSON, SARIF, and Release Evidence Packets. +Agents Shipgate is the deterministic merge gate for AI-generated agent capability changes — a local-first, static Tool-Use Readiness review. It reads `shipgate.yaml` plus local tool sources and writes deterministic reports as Markdown, JSON, SARIF, and supporting Release Evidence Packets. Use this skill when a task touches agent tools, MCP exports, OpenAPI specs, prompts that constrain tool use, permissions/scopes, approval or confirmation policies, `shipgate.yaml`, Shipgate CI, or `agents-shipgate-reports/report.json`. diff --git a/.agents/skills/agents-shipgate/references/recipes.md b/.agents/skills/agents-shipgate/references/recipes.md index 3f8ff09c..a663b4f5 100644 --- a/.agents/skills/agents-shipgate/references/recipes.md +++ b/.agents/skills/agents-shipgate/references/recipes.md @@ -117,9 +117,10 @@ Read `agents-shipgate-reports/agent-handoff.json` first. Lead with `next_action`, `controller`, and `fix_task.safe_to_attempt`. Then read `agents-shipgate-reports/verifier.json` for detailed controller context and `agents-shipgate-reports/report.json`; `release_decision.decision` remains the -gate. Use `verifier_summary` only as a composition summary: its `verdict` -mirrors `release_decision.decision` and it adds counts for protected-surface -touches, policy weakening, human acknowledgement, and top reason codes. +gate. `capability_review.top_changes[]` and `verifier_summary` are +supporting/provisional composition summaries: their verdict-like values mirror +`release_decision.decision`, and they add counts for protected-surface touches, +policy weakening, human acknowledgement, and top reason codes. Do not bypass the verifier. Do not suppress findings, lower severity, expand baselines or waivers, remove Shipgate CI, or weaken agent instructions to make diff --git a/.agents/skills/agents-shipgate/references/report-reading.md b/.agents/skills/agents-shipgate/references/report-reading.md index 4b85b9f7..454258fa 100644 --- a/.agents/skills/agents-shipgate/references/report-reading.md +++ b/.agents/skills/agents-shipgate/references/report-reading.md @@ -10,8 +10,9 @@ and `agents-shipgate-reports/report.json` for findings. Do not scrape Markdown. 2. `agent-handoff.json.capability_review.top_changes[]`: the highest-signal tool/action or trust-root changes. 3. `agent-handoff.json.next_action` / `controller` / `fix_task`: who acts next and whether a coding agent may safely attempt the fix. 4. `report.json.release_decision.decision`: `blocked`, `review_required`, `insufficient_evidence`, or `passed`; this is the release gate. -5. `release_decision.blockers[]` and `release_decision.review_items[]`. -6. `findings[]`: detailed evidence, source, severity, and remediation. +5. `verifier.json.capability_review.top_changes[]`: supporting/provisional highest-signal tool/action or trust-root changes. +6. `release_decision.blockers[]` and `release_decision.review_items[]`. +7. `findings[]`: detailed evidence, source, severity, and remediation. ## Verifier Summary @@ -27,8 +28,9 @@ When `report_schema_version` is `0.22` or newer, read state; a coding agent must not synthesize acknowledgement. - `top_reason_codes[]`: ranked reason-code counts for concise summaries. -This block is a deterministic projection. It cannot introduce a blocker that -is not already present in `findings[]` and `release_decision`. +This block is a supporting/provisional deterministic projection. It cannot +introduce a blocker that is not already present in `findings[]` and +`release_decision`. ## Per-Finding Action diff --git a/.cursor/rules/agents-shipgate.mdc b/.cursor/rules/agents-shipgate.mdc index 642b49bb..0fb85ec3 100644 --- a/.cursor/rules/agents-shipgate.mdc +++ b/.cursor/rules/agents-shipgate.mdc @@ -71,6 +71,8 @@ fetches. Read `agents-shipgate-reports/agent-handoff.json` first for `agents-shipgate-reports/verify-run.json` for reproducibility metadata, and `agents-shipgate-reports/report.json.release_decision.decision` for the release gate. +Legacy `agent-result.json` surfaces, where present, are supporting/provisional +projections and not the CI gate. Apply only high-confidence safe patches. Do not invent approval, confirmation, or idempotency evidence. diff --git a/.github/workflows/agents-shipgate-self.yml b/.github/workflows/agents-shipgate-self.yml new file mode 100644 index 00000000..d5de0522 --- /dev/null +++ b/.github/workflows/agents-shipgate-self.yml @@ -0,0 +1,26 @@ +name: Agents Shipgate Self Dogfood + +on: + pull_request: + +permissions: + contents: read + +jobs: + verify-self: + runs-on: ubuntu-latest + timeout-minutes: 10 + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd + with: + fetch-depth: 0 + + - uses: ./ + with: + config: shipgate-self.yaml + verify_mode: verify + ci_mode: advisory + diff_base: target + fail_on_merge_verdicts: blocked + upload_artifact: "true" + pr_comment: "false" diff --git a/.well-known/agents-shipgate.json b/.well-known/agents-shipgate.json index 9e6f8c58..b980a6dc 100644 --- a/.well-known/agents-shipgate.json +++ b/.well-known/agents-shipgate.json @@ -153,6 +153,7 @@ "capability_standard_version": "0.1", "governance_benchmark_catalog_schema_version": "0.2", "governance_benchmark_result_schema_version": "0.2", + "supporting_provisional_surfaces": ["agent_result", "agent_decision", "release_evidence_packet", "reviewer_summary", "verifier_summary", "capability_review", "runtime_trace_evidence", "capability_diff_projections", "skill_review"], "external_integration_surfaces": ["agent_handoff", "preflight", "capability_lock", "capability_lock_diff", "capability_standard", "attestation", "registry", "org_governance", "org_evidence_bundle", "host_grants_inventory", "governance_benchmark_catalog", "governance_benchmark_result"], "agent_interface_operations": ["verify_pr", "verify_local", "verify_preview"], "exit_code_policy": { @@ -165,6 +166,7 @@ }, "mcp_tools": ["shipgate.check", "shipgate.preflight", "shipgate.explain", "shipgate.capabilities", "shipgate.handoff"], "gating_signal": "release_decision.decision", + "agent_controller_signals": ["merge_verdict", "applicability", "agent_controller"], "merge_verdicts": ["mergeable", "human_review_required", "insufficient_evidence", "blocked", "unknown"], "check_run_policies": ["advisory", "blocked-fails", "require-mergeable"], "github_action_pr_workflow": { @@ -187,6 +189,7 @@ }, "verifier_read_order": [ "merge_verdict", + "applicability", "can_merge_without_human", "first_next_action", "fix_task", diff --git a/CHANGELOG.md b/CHANGELOG.md index 6e8eea8c..19be0f71 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,21 @@ ## Unreleased +- **Non-preview `verify` now fails closed on a missing `--config`.** + `agents-shipgate verify --workspace . --config missing.yaml --json` exits + `2` with `merge_verdict: "unknown"`, `applicability: "unknown"`, and + `can_merge_without_human: false`; it writes lightweight verifier/controller + artifacts but no `report.json` and runs no head scan. This replaces the old + lenient path where a missing config could trigger-skip and exit `0`. + `verify --preview --config missing.yaml --json` is unchanged and remains the + setup/relevance path with exit `0`. +- **Shipgate now has a separate self-dogfood PR workflow.** The root + `shipgate.yaml` remains the public Codex-plugin marketplace self-scan, while + `shipgate-self.yaml` and `.github/workflows/agents-shipgate-self.yml` run an + advisory static-only local-action gate on pull requests with + `fail_on_merge_verdicts: blocked`, artifact upload enabled, and PR comments + disabled. This does not scan Shipgate's Python scanner implementation; tests, + coverage, audit, SBOM, and release signing remain that assurance path. - **A named high concern now routes to review, not `insufficient_evidence`.** When a scan turns up an *active* (not baseline-accepted) high/critical review finding, the release decision is now `review_required` even if low-confidence diff --git a/README.md b/README.md index 0e944275..d2aa7ed6 100644 --- a/README.md +++ b/README.md @@ -163,8 +163,10 @@ The release gate is `agents-shipgate-reports/report.json` → The PR/controller surface is `agents-shipgate-reports/verifier.json` → `merge_verdict` (`mergeable | human_review_required | insufficient_evidence | blocked | unknown`), a deterministic projection of the release decision. Read -`verifier.json` first for `merge_verdict`, `can_merge_without_human`, -`first_next_action`, `fix_task`, and `capability_review.top_changes`. +`verifier.json` first for `merge_verdict`, `applicability`, +`agent_controller`, `can_merge_without_human`, `first_next_action`, and +`fix_task`. `capability_review.top_changes` is supporting/provisional reviewer +context. Zero-setup demos of both verdicts are in [60 seconds](#60-seconds-watch-it-block-two-prs) above; `uvx` runs them with no @@ -255,8 +257,9 @@ For local control, parse the `shipgate check` stdout JSON omit `--base`/`--head`. For committed PR/CI refs, make the base ref available first because `verify` never fetches. Read `agents-shipgate-reports/verifier.json` first and lead with `merge_verdict`, -`can_merge_without_human`, `first_next_action`, `fix_task`, and -`capability_review.top_changes`, then read +`applicability`, `agent_controller`, `can_merge_without_human`, +`first_next_action`, and `fix_task`, then read supporting/provisional +`capability_review.top_changes` and `agents-shipgate-reports/report.json` for `release_decision.decision`. Do not claim completion when `merge_verdict` is `blocked`, `insufficient_evidence`, or `human_review_required` unless the user explicitly accepts human review. Do not auto-assert approval. Do not auto-assert confirmation, idempotency, @@ -338,9 +341,12 @@ make the base ref available first because `verify` never fetches. Verify writes base scan can be materialized, verify also writes `base.capabilities.lock.json` plus `capability-lock-diff.{json,md}`, and the PR comment includes a compact semantic capability diff summary. Lead with -`merge_verdict`, `can_merge_without_human`, `first_next_action`, and the capability diff or -`capability_review.top_changes`; use `release_decision.decision` as the release -gate. +`merge_verdict`, `applicability`, `agent_controller`, +`can_merge_without_human`, `first_next_action`, and `fix_task`; use +`release_decision.decision` as the release gate. Capability diff summaries and +`capability_review.top_changes` are supporting/provisional review context. +Legacy `agent_result_v1` / `agent-result.json` compatibility surfaces are +supporting/provisional projections, not the CI gate or verifier read path. Install alternatives (your agent project does **not** need Python 3.12 — install the CLI separately): @@ -511,7 +517,7 @@ Agents Shipgate is designed to be agent-friendly. If you're a coding agent (Clau - **`agents-shipgate install-hooks --target claude-code --write`** — deterministic Claude Code hooks: a PreToolUse trust-root guard, a cheap trigger check after `Edit|Write|MultiEdit`, and a full `verify` at `Stop`, so the gate runs even when instruction files lose attention on long sessions. See [`docs/agents/use-with-claude-code.md`](docs/agents/use-with-claude-code.md#hooks-the-deterministic-path-recommended). - **`agents-shipgate mcp-serve`** (`[mcp]` extra) — read-only stdio MCP server exposing `shipgate.check`, `shipgate.preflight`, `shipgate.explain`, `shipgate.capabilities`, and `shipgate.handoff` for agents without comfortable shell access. It is static-only and not a general MCP permission broker. See [`docs/mcp-server.md`](docs/mcp-server.md). - **[`docs/ai-search-summary.md`](docs/ai-search-summary.md)** — human-readable summary for AI search, answer engines, and coding agents -- **[`docs/manifest-v0.1.json`](docs/manifest-v0.1.json)** + **[`docs/report-schema.v0.27.json`](docs/report-schema.v0.27.json)** + **[`docs/agent-handoff-schema.v1.json`](docs/agent-handoff-schema.v1.json)** + **[`docs/preflight-schema.v0.2.json`](docs/preflight-schema.v0.2.json)** — JSON Schemas for live editor validation and agent routing (current; emitted reports carry `report_schema_version: "0.27"`, handoff emits `schema_version: "shipgate.agent_handoff/v1"`, preflight emits `preflight_schema_version: "0.2"`). v0.27 adds policy-pack distribution metadata (`loaded_policy_packs[].{source,sha256,sha256_status,owner}`) over v0.26's structured evidence gaps; gate behavior is unchanged. Read `release_decision.decision` for release gating, `agent-handoff.json.gate` / `controller` for the compact agent step, and `reviewer_summary.first_recommended_surface` for the human-review entry point. The per-version additive history lives in [`docs/agent-contract-current.md`](docs/agent-contract-current.md) and [`STABILITY.md`](STABILITY.md). +- **[`docs/manifest-v0.1.json`](docs/manifest-v0.1.json)** + **[`docs/report-schema.v0.27.json`](docs/report-schema.v0.27.json)** + **[`docs/agent-handoff-schema.v1.json`](docs/agent-handoff-schema.v1.json)** + **[`docs/preflight-schema.v0.2.json`](docs/preflight-schema.v0.2.json)** — JSON Schemas for live editor validation and agent routing (current; emitted reports carry `report_schema_version: "0.27"`, handoff emits `schema_version: "shipgate.agent_handoff/v1"`, preflight emits `preflight_schema_version: "0.2"`). v0.27 adds policy-pack distribution metadata (`loaded_policy_packs[].{source,sha256,sha256_status,owner}`) over v0.26's structured evidence gaps; gate behavior is unchanged. Read `release_decision.decision` for release gating, `agent-handoff.json.gate` / `controller` for the compact agent step, and `reviewer_summary.first_recommended_surface` for the human-review entry point. `reviewer_summary`, `verifier_summary`, runtime trace/evidence fields, Release Evidence Packet outputs, legacy `agent_result_v1` surfaces, and capability diff projections are supporting/provisional review or compatibility context, not additional gates. The per-version additive history lives in [`docs/agent-contract-current.md`](docs/agent-contract-current.md) and [`STABILITY.md`](STABILITY.md). - **[`docs/capability-lock-schema.v0.2.json`](docs/capability-lock-schema.v0.2.json)** + **[`docs/capability-lock-diff-schema.v0.3.json`](docs/capability-lock-diff-schema.v0.3.json)** — stable schemas for the static capability envelope and semantic diff emitted by `agents-shipgate capability` and, in PR workflows, by `agents-shipgate verify`; non-gating and separate from `report.json`. - **[`docs/attestation-schema.v0.4.json`](docs/attestation-schema.v0.4.json)** + **[`docs/org-governance-schema.v0.1.json`](docs/org-governance-schema.v0.1.json)** + **[`docs/org-evidence-bundle-schema.v1.json`](docs/org-evidence-bundle-schema.v1.json)** + **[`docs/registry-schema.v0.3.json`](docs/registry-schema.v0.3.json)** + **[`docs/host-grants-inventory-schema.v0.1.json`](docs/host-grants-inventory-schema.v0.1.json)** — deterministic local attestation, organization governance, org evidence bundle, append-only registry, and host-grant inventory schemas for multi-repo governance. - **[`docs/governance-benchmark-catalog-schema.v0.2.json`](docs/governance-benchmark-catalog-schema.v0.2.json)** + **[`docs/governance-benchmark-result-schema.v0.2.json`](docs/governance-benchmark-result-schema.v0.2.json)** — stable schemas for the research benchmark catalog and deterministic result artifact. diff --git a/STABILITY.md b/STABILITY.md index bfd227f5..172d6f86 100644 --- a/STABILITY.md +++ b/STABILITY.md @@ -22,6 +22,13 @@ Breaking changes from the `0.x` line: `report.json.release_decision.decision`. - `agents-shipgate verify --format agent` was removed. Use `--format json` to print the full `VerifierArtifact`. +- Non-preview `agents-shipgate verify --config ` now fails closed when + `` is missing. The old lenient path could trigger-skip and exit `0`; + the new behavior exits `2`, emits `merge_verdict: "unknown"` and + `applicability: "unknown"`, writes only lightweight verifier/controller + artifacts, and does not write `report.json` or run a head scan. + `agents-shipgate verify --preview` is unchanged and still treats a missing + config as an onboarding/relevance condition with exit `0`. - `shipgate check --format agent-json` was removed. Use `shipgate check --format codex-boundary-json`; the output `schema_version` is now `shipgate.codex_boundary_result/v1`. @@ -134,7 +141,7 @@ not agent-repairable authority gaps. Full PR verification uses `agents-shipgate verify`. The single agent-controller artifact is `agents-shipgate-reports/verifier.json`; it leads with -`merge_verdict`, `can_merge_without_human`, `agent_controller`, +`merge_verdict`, `applicability`, `can_merge_without_human`, `agent_controller`, `first_next_action`, and `fix_task`. `verify-run.json` records stable run identity and input hashes for reproducibility. `report.json` remains the release-gate artifact. @@ -312,6 +319,14 @@ In `agents-shipgate-reports/report.json`, the following are guaranteed: - `effective_policy.{ci_mode, fail_on, suppressed_check_ids, waiver_scopes, severity_overrides, baseline_integrity_mode, baseline_fingerprints, ci_gate_present}` (v0.22+) — normalized (not text-diff) snapshot of the release-policy surface for base-vs-head weakening comparison. Required + always present. Every list/dict is emitted sorted (`fail_on` by severity tier rank) for byte-stable output; derived from the manifest plus accepted-debt fingerprints. - `human_ack.{required, satisfied, acks, outstanding}` (v0.22+) — declared human-acknowledgement state. Required + always present (default `required=false`, `satisfied=true`, empty lists). Within the static boundary, acknowledgement is declared evidence only — never inferred. A trust-root weakening (`SHIP-VERIFY-POLICY-WEAKENED`, `-CI-GATE-REMOVED`, `-BASELINE-OR-WAIVER-EXPANDED`) makes a surface `required`; `satisfied` only when a matching `human_ack` entry exists in `shipgate.yaml`. `acks[]` are `{owner, reason, affected_surface, expires, source}`; `outstanding[]` lists required-but-unacknowledged surfaces. The ack section lives in `shipgate.yaml` (a trust root) so adding one trips `SHIP-VERIFY-TRUST-ROOT-TOUCHED`. +During `0.x`, secondary projections are supporting/provisional even when their +field shapes are documented for additive compatibility. CI gates on +`report.json.release_decision.decision`; PR controllers use +`verifier.json.merge_verdict`, `applicability`, and `agent_controller`. +`reviewer_summary`, `verifier_summary`, `capability_review`, runtime +trace/evidence fields, Release Evidence Packets, and non-gating capability diff +projections are explanatory surfaces, not independent policy engines. + ### Privacy and redaction Reports, packets, SARIF, Markdown, GitHub step summaries, `explain-finding` @@ -791,6 +806,16 @@ CI. If the requested base ref or PR diff context is unavailable, verify records base manifest or base scan is unavailable, verify records `base_status`, disables diff enrichment, and leaves the head release decision and exit code unchanged. +Before any trigger-skip can return success, non-preview `verify` also requires +the resolved `--config` path to exist. A missing config is a configuration +failure, not a docs-only or no-trigger success: verify writes `verifier.json`, +`verify-run.json`, `agent-handoff.json`, and `pr-comment.md` with +`head_status: "failed"`, `head_exit_code: 2`, `merge_verdict: "unknown"`, +`applicability: "unknown"`, and `can_merge_without_human: false`; it writes no +`report.json` and runs no head scan. The first next action directs agents to +fix the config path or run `agents-shipgate verify --preview --json` / +`agents-shipgate detect --workspace . --json` before initializing. + The head scan writes `report.md`, `report.json`, `report.sarif`, `packet.json`, `verifier.json`, `verify-run.json`, `agent-handoff.json`, and `pr-comment.md`. `verify` intentionally requests packet @@ -845,7 +870,8 @@ consumer may read: capability_changes_modified, top_changes[]}`. `top_changes[]` carries the highest-signal capability deltas with `{id, title, impact, rationale, related_finding_ids}`. `impact` mirrors the gate; this block never introduces a - finding-independent blocker. + finding-independent blocker. Treat it as supporting/provisional reviewer + context, not as the controller's primary verdict. - `agent_controller` — imperative restatement of the verdict for autonomous control (`null` for `--preview`): `{completion_allowed, must_stop, stop_reason, allowed_next_commands[], forbidden_file_edits[], forbidden_actions[], @@ -923,7 +949,14 @@ Diff remains explanatory only. ### Release Evidence Packet (v0.7) -`agents-shipgate-reports/packet.json` is governed by [`docs/packet-schema.v0.7.json`](docs/packet-schema.v0.7.json). v0.7 adds capability-linked local trace evidence summary and trace refs under `human_in_the_loop`. v0.6 stays as the frozen reference at [`docs/packet-schema.v0.6.json`](docs/packet-schema.v0.6.json); pre-v0.7 packets validate against it. v0.6 added the top-level `evidence_matrix` section and the optional `ReleaseDecisionItem.source` and `ReleaseDecisionItem.policy_evidence_source` pointers for reviewer-grade dual-source provenance on top of v0.5. Within `0.x`: +`agents-shipgate-reports/packet.json` is a supporting/provisional reviewer +artifact governed by [`docs/packet-schema.v0.7.json`](docs/packet-schema.v0.7.json). +v0.7 adds capability-linked local trace evidence summary and trace refs under +`human_in_the_loop`. v0.6 stays as the frozen reference at +[`docs/packet-schema.v0.6.json`](docs/packet-schema.v0.6.json); pre-v0.7 packets +validate against it. v0.6 added the top-level `evidence_matrix` section and the +optional `ReleaseDecisionItem.source` and `ReleaseDecisionItem.policy_evidence_source` +pointers for reviewer-grade dual-source provenance on top of v0.5. Within `0.x`: - `packet_schema_version` is a real field on every emitted packet; minor bumps are additive. - The reviewer sections (release_decision, evidence_matrix, capability_intent, high_risk_surface, tool_surface_diff, action_surface_diff, approval_coverage, idempotency_risk, scope_coverage, memory_isolation, human_in_the_loop, dynamic_scenarios, not_proven) are always present. @@ -1121,6 +1154,12 @@ The following paths are part of the public agent surface and will not move withi The body content of these files may change to reflect new prompts; the entry-point paths will not. +`agents-shipgate skill lint`, `agents-shipgate skill security`, and +`agents-shipgate skill review` are supporting/provisional review helpers in +`0.x`. They may inform skill and instruction review, but they are not the CI +release gate and should not be treated as a substitute for +`report.json.release_decision.decision`. + --- ## What MAY change additively in any minor release diff --git a/action.yml b/action.yml index 5eff0fa4..d901d252 100644 --- a/action.yml +++ b/action.yml @@ -283,13 +283,13 @@ outputs: description: Whether verifier_summary reports a policy weakening (`true`/`false`). Review signal; not a second gate. value: ${{ steps.report_outputs.outputs.policy_weakened }} capability_changes_added: - description: Number of added members in the canonical capability delta. + description: Supporting/provisional count of added members in the capability delta; not a second gate. value: ${{ steps.report_outputs.outputs.capability_changes_added }} capability_changes_modified: - description: Number of broadened plus narrowed members in the canonical capability delta. + description: Supporting/provisional count of broadened plus narrowed capability delta members; not a second gate. value: ${{ steps.report_outputs.outputs.capability_changes_modified }} capability_changes_removed: - description: Number of removed members in the canonical capability delta. + description: Supporting/provisional count of removed members in the capability delta; not a second gate. value: ${{ steps.report_outputs.outputs.capability_changes_removed }} runs: diff --git a/adoption-kits/claude-code-skill/.agents-shipgate-kit-metadata.json b/adoption-kits/claude-code-skill/.agents-shipgate-kit-metadata.json index a009c08d..01cbba7b 100644 --- a/adoption-kits/claude-code-skill/.agents-shipgate-kit-metadata.json +++ b/adoption-kits/claude-code-skill/.agents-shipgate-kit-metadata.json @@ -16,12 +16,14 @@ "ec2c3378a8a73e7b259664170fdcd33fa3242f9adb3c9b83b0ae616609fa6c4a", "3b643d8fe5f88817702fae3d7bd51964d878a413e13fa1c7e1bc0007ee733744", "1d80cb6eeadf064fbb613ae3bd5b5e46ed4fd729c21b314d30537cb884e4a19e", + "398e88622bf73b524f91405ffc5dbccde651c6a9cb7c2df035ab01d39a964e4f", "9860b9246057289450b425daa212f248be8082327101f2a5e6a355a266f779c1" ], "prompts/add-shipgate-to-repo.md": [ "ea3c37cfbbd42c40d164abfe21d468a3a5550d5384125f94a53c947dea6b4b2a", "61cafab6b684e3da5f658c18f85fd127773ca60f0006548508f165e48727666a", "2c946f83247106a3cda96eaf92466df3d5af564faea040e13d586eefedf4f4a9", + "2a6c5dea9919f031f64a5b8ee0c657d3cc6913c05da3a4ebaa8eb9e2f0728dc0", "347118060d1d39f709de5c024b617a2fdb8bd9659ac5a56d768e9a1aa39c0142", "4db5c3f0a2f1c68fb726c5e5ec7439c985e373a29dfa6867f18b6e22c604be18" ], @@ -31,7 +33,8 @@ "12810569a6aa655b4d8a6ed384142a430eef367bf6fab51b1a9e614aeff1c1a8" ], "prompts/verify-agent-diff.md": [ - "0c939414da7900b8f03f2a743e0f6b8f4d96f409c1d5cde038e27a98318bf486" + "0c939414da7900b8f03f2a743e0f6b8f4d96f409c1d5cde038e27a98318bf486", + "1d59c30ea72b1e7ba12ae0f650cf75462f62f1a8b532ab44f88c78e2242a8d17" ], "ci-recipes/advisory-pr-comment.yml": [ "a8aa3f577af73534cdb529fd4f5d34c08522181225a2eddee70099c5a8ef4191", diff --git a/adoption-kits/claude-code-skill/SKILL.md b/adoption-kits/claude-code-skill/SKILL.md index 6b6d2509..e7780a8f 100644 --- a/adoption-kits/claude-code-skill/SKILL.md +++ b/adoption-kits/claude-code-skill/SKILL.md @@ -76,9 +76,9 @@ For non-GitHub CI (GitLab, CircleCI, Jenkins, Azure Pipelines, Buildkite, Bitbuc - **Installed CLI contract**: when available, run `agents-shipgate contract --json` to verify local schema versions, capability/research surfaces, `release_decision.decision`, and manual-review signal fields. Older installs should use [`docs/agent-contract-current.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/agent-contract-current.md) or upgrade before automating against the local contract command. - **Verifier JSON**: `verifier_schema_version: "0.1"`. Read `merge_verdict`, `can_merge_without_human`, `first_next_action`, `fix_task`, `capability_review.top_changes`, `trust_root_touched`, and `policy_weakened` before summarizing an AI-generated PR. `merge_verdict` is a deterministic projection; the gate remains `report.json.release_decision.decision`. - **Verify run JSON**: `verify-run.json` uses `schema_version: "shipgate.verify_run/v1"` and records stable run identity, subject refs, input hashes, outcome, and artifact hashes. It is the reproducibility artifact for `verify`; do not treat it as a second gate. -- **Report JSON**: `report_schema_version: "0.27"`. Read `release_decision.decision` first for release gating; use `agent_summary` / `findings[].agent_action` for agent routing and `reviewer_summary` for the human-review entry point. v0.27 adds policy-pack distribution metadata (`loaded_policy_packs[].{source,sha256,sha256_status,owner}`) for organization audit while preserving gate behavior. v0.26 added structured evidence gaps (`release_decision.evidence_coverage.evidence_gaps[]` — one actionable remediation row per low-confidence tool or source warning) plus the advisory `suggested-inventory.json` skeleton written next to `report.json`; when the decision is `insufficient_evidence`, follow each gap's `next_action` instead of guessing. v0.25 added opt-in capability-linked local trace/provenance evidence (`capability_runtime_evidence`, `findings[].capability_trace_refs`, and mirrored `ReleaseDecisionItem.capability_trace_refs`) while preserving fingerprints, baselines, and gate behavior. v0.24 added capability-native policy evidence (`findings[].capability_refs`, optional `findings[].capability_policy_evidence`, and mirrored `ReleaseDecisionItem.capability_refs`). v0.23 added semantic metadata to `capability_change` members while preserving existing buckets. v0.22 added the verifier-cycle blocks `capability_change`, `protected_surface_changes`, `effective_policy`, `human_ack`, and `verifier_summary` — all reviewer-facing projections that never gate independently (`release_decision.decision` stays the only gate). To remove heuristic findings from the active gate, rerun scan with `--no-heuristics`; filtered findings remain in `findings[]` with `suppressed=true`, and `heuristics_filter` records `enabled`, `excluded_provenance_kinds`, `filtered_finding_count`, and `filtered_by_kind`; `runtime_trace` findings are not filtered as heuristics. To inspect provenance without changing gate behavior, use `agents-shipgate findings --from agents-shipgate-reports/report.json --provenance-kind keyword_heuristic,regex_heuristic,runtime_trace --json`. Do not gate on `summary.status`; it is legacy and baseline-blind. The full field list lives in [`docs/agent-contract-current.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/agent-contract-current.md#read-these-first-for-release-gating), and reports validate against [`docs/report-schema.v0.27.json`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/report-schema.v0.27.json). -- **Release Evidence Packet**: `agents-shipgate-reports/packet.{md,json,html}` (and `packet.pdf` with the `[pdf]` extras) is emitted alongside the report by default. The packet has fixed reviewer sections governed by [`docs/packet-schema.v0.7.json`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/packet-schema.v0.7.json) (latest; v0.7 adds capability-linked local trace evidence summary and trace refs under `human_in_the_loop`). See [STABILITY.md §Release Evidence Packet](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/STABILITY.md#release-evidence-packet-v07). Use the packet for reviewer-shaped output; use the report for finding details. -- **Capability standard**: `agents-shipgate capability export` emits a stable static capability lock (`capability_lock_schema_version: "0.2"`) and `agents-shipgate capability diff` emits a stable semantic diff (`capability_lock_diff_schema_version: "0.3"`). These artifacts are non-gating, exclude runtime trace evidence, and are documented in [`docs/capability-standard.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/capability-standard.md). +- **Report JSON**: `report_schema_version: "0.27"`. Read `release_decision.decision` first for release gating; use `agent_summary` / `findings[].agent_action` for agent routing and `reviewer_summary` for the human-review entry point. v0.27 adds policy-pack distribution metadata (`loaded_policy_packs[].{source,sha256,sha256_status,owner}`) for organization audit while preserving gate behavior. v0.26 added structured evidence gaps (`release_decision.evidence_coverage.evidence_gaps[]` — one actionable remediation row per low-confidence tool or source warning) plus the advisory `suggested-inventory.json` skeleton written next to `report.json`; when the decision is `insufficient_evidence`, follow each gap's `next_action` instead of guessing. v0.25 added opt-in capability-linked local trace/provenance evidence (`capability_runtime_evidence`, `findings[].capability_trace_refs`, and mirrored `ReleaseDecisionItem.capability_trace_refs`) while preserving fingerprints, baselines, and gate behavior. v0.24 added capability-native policy evidence (`findings[].capability_refs`, optional `findings[].capability_policy_evidence`, and mirrored `ReleaseDecisionItem.capability_refs`). v0.23 added semantic metadata to `capability_change` members while preserving existing buckets. v0.22 added the verifier-cycle blocks `capability_change`, `protected_surface_changes`, `effective_policy`, `human_ack`, and `verifier_summary` — all reviewer-facing projections that never gate independently (`release_decision.decision` stays the only gate). `reviewer_summary`, `verifier_summary`, runtime trace/evidence fields, and non-gating capability diff projections are supporting/provisional explanatory context. To remove heuristic findings from the active gate, rerun scan with `--no-heuristics`; filtered findings remain in `findings[]` with `suppressed=true`, and `heuristics_filter` records `enabled`, `excluded_provenance_kinds`, `filtered_finding_count`, and `filtered_by_kind`; `runtime_trace` findings are not filtered as heuristics. To inspect provenance without changing gate behavior, use `agents-shipgate findings --from agents-shipgate-reports/report.json --provenance-kind keyword_heuristic,regex_heuristic,runtime_trace --json`. Do not gate on `summary.status`; it is legacy and baseline-blind. The full field list lives in [`docs/agent-contract-current.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/agent-contract-current.md#read-these-first-for-release-gating), and reports validate against [`docs/report-schema.v0.27.json`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/report-schema.v0.27.json). +- **Release Evidence Packet**: `agents-shipgate-reports/packet.{md,json,html}` (and `packet.pdf` with the `[pdf]` extras) is emitted alongside the report by default as a supporting/provisional reviewer artifact. The packet has fixed reviewer sections governed by [`docs/packet-schema.v0.7.json`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/packet-schema.v0.7.json) (latest; v0.7 adds capability-linked local trace evidence summary and trace refs under `human_in_the_loop`). See [STABILITY.md §Release Evidence Packet](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/STABILITY.md#release-evidence-packet-v07). Use the packet for reviewer-shaped output; use the report for finding details. +- **Capability standard**: `agents-shipgate capability export` emits a stable static capability lock (`capability_lock_schema_version: "0.2"`) and `agents-shipgate capability diff` emits a stable semantic diff (`capability_lock_diff_schema_version: "0.3"`). These artifacts are supporting/provisional, non-gating, exclude runtime trace evidence, and are documented in [`docs/capability-standard.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/capability-standard.md). - **Governance benchmark**: `benchmark/agent-pr-governance/cases.yaml` and `scripts/run_governance_benchmark.py` are the stable research benchmark substrate (`governance_benchmark_result_schema_version: "0.2"`), not a release gate. See [`docs/governance-benchmark.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/governance-benchmark.md). - **Single source of truth for the contract**: [`docs/agent-contract-current.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/agent-contract-current.md). When the schema bumps, that file updates first. - **Exit codes**: `0` pass, `2` config error, `3` parse error, `4` other error, `20` strict-mode gate failure. diff --git a/adoption-kits/claude-code-skill/prompts/add-shipgate-to-repo.md b/adoption-kits/claude-code-skill/prompts/add-shipgate-to-repo.md index 083a4e5c..01b03928 100644 --- a/adoption-kits/claude-code-skill/prompts/add-shipgate-to-repo.md +++ b/adoption-kits/claude-code-skill/prompts/add-shipgate-to-repo.md @@ -70,7 +70,7 @@ agent-related PRs should use `agents-shipgate verify` after this adoption step. ```bash $SG scan -c shipgate.yaml --suggest-patches --format json --ci-mode advisory ``` - The report lands at `agents-shipgate-reports/report.json`. The Release Evidence Packet lands at `agents-shipgate-reports/packet.{md,json,html}`. Parse `report.json`; Codex plugin facts, when present, live under `codex_plugin_surface`. + The report lands at `agents-shipgate-reports/report.json`. The supporting Release Evidence Packet lands at `agents-shipgate-reports/packet.{md,json,html}`. Parse `report.json`; Codex plugin facts, when present, live under `codex_plugin_surface`. **Read these first for release gating (v0.8+):** - `release_decision.decision` ∈ `{"blocked", "review_required", "insufficient_evidence", "passed"}` — baseline-aware. This is the gating signal. `insufficient_evidence` (v0.14+) fires when evidence coverage is degraded past threshold; treat unknown future values as `review_required`. @@ -111,7 +111,7 @@ agent-related PRs should use `agents-shipgate verify` after this adoption step. 9. **Report back to the user**: - `release_decision.decision` and `release_decision.reason` (the gating signal — baseline-aware, v0.8+) - Blocker / review-item counts (`len(release_decision.blockers)` / `len(release_decision.review_items)`) - - The path to the Release Evidence Packet (`agents-shipgate-reports/packet.md`) for reviewer-shaped output + - The path to the supporting Release Evidence Packet (`agents-shipgate-reports/packet.md`) for reviewer-shaped output - The top 3 active critical/high findings (use `report.json`, not stdout) - Which patches were applied (count from `apply-patches --json` output's `files`) - Any check IDs the user should investigate first — link to `docs_url` from the finding for full rationale, or use `$SG explain --json` for the same content via CLI diff --git a/adoption-kits/claude-code-skill/prompts/verify-agent-diff.md b/adoption-kits/claude-code-skill/prompts/verify-agent-diff.md index 776c988d..ec706e11 100644 --- a/adoption-kits/claude-code-skill/prompts/verify-agent-diff.md +++ b/adoption-kits/claude-code-skill/prompts/verify-agent-diff.md @@ -68,13 +68,13 @@ work is complete. 5. **Read JSON, not Markdown.** - `agents-shipgate-reports/verifier.json` is the PR/controller artifact. - - Lead with `merge_verdict`, then inspect `capability_review.top_changes[]`, - `first_next_action.actor`, and `fix_task.safe_to_attempt`. + - Lead with `merge_verdict`, `applicability`, and `agent_controller`, then + inspect `first_next_action.actor` and `fix_task.safe_to_attempt`. - `agents-shipgate-reports/report.json` is the release-gate artifact. - `release_decision.decision` is the only gate signal. - - `verifier_summary` is a one-fetch composition for controller output; its - `verdict` mirrors `release_decision.decision` and never gates - independently. + - `capability_review.top_changes[]` and `verifier_summary` are + supporting/provisional composition summaries; verdict-like values mirror + `release_decision.decision` and never gate independently. 6. **Do not bypass the verifier.** Do not suppress findings, lower severity, expand baselines or waivers, remove Shipgate CI, or soften agent @@ -111,7 +111,9 @@ work is complete. - `agents-shipgate-reports/report.json` exists and parses. - `agents-shipgate-reports/verifier.json` exists and parses. - `verifier.json.merge_verdict` is surfaced to the user. -- `capability_review.top_changes[]` is considered before generic findings. +- `applicability` and `agent_controller` are considered before generic findings. +- `capability_review.top_changes[]` is treated as supporting/provisional review + context. - `report.json.release_decision.decision` is surfaced to the user. - If `verifier_summary.protected_surface_touched` or `policy_weakened` is true, the response names the human-review requirement. diff --git a/adoption-kits/codex-skill/.agents-shipgate-kit-metadata.json b/adoption-kits/codex-skill/.agents-shipgate-kit-metadata.json index 87a7d478..329a5280 100644 --- a/adoption-kits/codex-skill/.agents-shipgate-kit-metadata.json +++ b/adoption-kits/codex-skill/.agents-shipgate-kit-metadata.json @@ -10,6 +10,7 @@ "3031abbfd241c9af4eeac92de6caf2454ad124ce897109ca93f8d9933f4a04cb", "ba127bfa1f47e00b78b7b8d463857496dbd2594615b7f223bd4d7c0c572aa7b6", "f545a945da52ba4cb67810464b0b60b1734b46a791d9c6023b5c8652d67286a8", + "388e4997b09196791ff4c71aab3e83c7462a5b61e66a5cee13e76aa3cad0d89f", "b27913ffc17f1af4e0b9370ac6c353c3cfdc91c69deb5b0cd1de1b83f7581e3b", "76c060a91d342cd02bf2c2273bcbb919a120ab063ebb32aba9cde67d6f0c2d1b" ], @@ -21,6 +22,7 @@ "584546244c7e6aa559606aa4dbb1c050b3539aa6e371c38239382796733a39b1", "9d21e609b83af11af52166d3f00af10bb596777a84cf16453a89e7e85218314b", "a554d3b989a65f9a64af6f2c14304e4ad94daa9e11515c21a2997dc1dd892c99", + "6bf8b3a409df3cd6f94e070555d62eedf8ba3690b4cfdceae2d7a7482b90e91b", "8576dc41812871e97a5d5c213a2c9c44f9766f746d99e9bc9909ee69a4002575", "d7779f4f2365c84727d02da770696a40490428a396e10d7a031b316f15ef849d" ], diff --git a/adoption-kits/codex-skill/SKILL.md b/adoption-kits/codex-skill/SKILL.md index ece7eac2..372a152b 100644 --- a/adoption-kits/codex-skill/SKILL.md +++ b/adoption-kits/codex-skill/SKILL.md @@ -5,7 +5,7 @@ description: Use when the user wants to add or run Agents Shipgate — the deter # Agents Shipgate -Agents Shipgate is the deterministic merge gate for AI-generated agent capability changes — a local-first, static Tool-Use Readiness review. It reads `shipgate.yaml` plus local tool sources and writes deterministic reports as Markdown, JSON, SARIF, and Release Evidence Packets. +Agents Shipgate is the deterministic merge gate for AI-generated agent capability changes — a local-first, static Tool-Use Readiness review. It reads `shipgate.yaml` plus local tool sources and writes deterministic reports as Markdown, JSON, SARIF, and supporting Release Evidence Packets. Use this skill when a task touches agent tools, MCP exports, OpenAPI specs, prompts that constrain tool use, permissions/scopes, approval or confirmation policies, `shipgate.yaml`, Shipgate CI, or `agents-shipgate-reports/report.json`. diff --git a/adoption-kits/codex-skill/references/recipes.md b/adoption-kits/codex-skill/references/recipes.md index 3f8ff09c..a663b4f5 100644 --- a/adoption-kits/codex-skill/references/recipes.md +++ b/adoption-kits/codex-skill/references/recipes.md @@ -117,9 +117,10 @@ Read `agents-shipgate-reports/agent-handoff.json` first. Lead with `next_action`, `controller`, and `fix_task.safe_to_attempt`. Then read `agents-shipgate-reports/verifier.json` for detailed controller context and `agents-shipgate-reports/report.json`; `release_decision.decision` remains the -gate. Use `verifier_summary` only as a composition summary: its `verdict` -mirrors `release_decision.decision` and it adds counts for protected-surface -touches, policy weakening, human acknowledgement, and top reason codes. +gate. `capability_review.top_changes[]` and `verifier_summary` are +supporting/provisional composition summaries: their verdict-like values mirror +`release_decision.decision`, and they add counts for protected-surface touches, +policy weakening, human acknowledgement, and top reason codes. Do not bypass the verifier. Do not suppress findings, lower severity, expand baselines or waivers, remove Shipgate CI, or weaken agent instructions to make diff --git a/adoption-kits/codex-skill/references/report-reading.md b/adoption-kits/codex-skill/references/report-reading.md index 4b85b9f7..454258fa 100644 --- a/adoption-kits/codex-skill/references/report-reading.md +++ b/adoption-kits/codex-skill/references/report-reading.md @@ -10,8 +10,9 @@ and `agents-shipgate-reports/report.json` for findings. Do not scrape Markdown. 2. `agent-handoff.json.capability_review.top_changes[]`: the highest-signal tool/action or trust-root changes. 3. `agent-handoff.json.next_action` / `controller` / `fix_task`: who acts next and whether a coding agent may safely attempt the fix. 4. `report.json.release_decision.decision`: `blocked`, `review_required`, `insufficient_evidence`, or `passed`; this is the release gate. -5. `release_decision.blockers[]` and `release_decision.review_items[]`. -6. `findings[]`: detailed evidence, source, severity, and remediation. +5. `verifier.json.capability_review.top_changes[]`: supporting/provisional highest-signal tool/action or trust-root changes. +6. `release_decision.blockers[]` and `release_decision.review_items[]`. +7. `findings[]`: detailed evidence, source, severity, and remediation. ## Verifier Summary @@ -27,8 +28,9 @@ When `report_schema_version` is `0.22` or newer, read state; a coding agent must not synthesize acknowledgement. - `top_reason_codes[]`: ranked reason-code counts for concise summaries. -This block is a deterministic projection. It cannot introduce a blocker that -is not already present in `findings[]` and `release_decision`. +This block is a supporting/provisional deterministic projection. It cannot +introduce a blocker that is not already present in `findings[]` and +`release_decision`. ## Per-Finding Action diff --git a/docs/agent-contract-current.md b/docs/agent-contract-current.md index 1acc61ee..336f6e8f 100644 --- a/docs/agent-contract-current.md +++ b/docs/agent-contract-current.md @@ -71,6 +71,23 @@ one decision engine. `merge_verdict` is a deterministic projection of `release_decision.decision`, so the two can never disagree. +## Primary vs supporting surfaces + +Primary gates are intentionally narrow. CI gates on +`report.json.release_decision.decision`. Coding agents handling committed PRs +read `agent-handoff.json.gate.merge_verdict` and `controller` first, with +`verifier.json.merge_verdict`, `applicability`, and `agent_controller` as the +authoritative detailed substrate. Everything else in the +verifier/report/packet family is supporting review evidence or a convenience +projection. + +Treat legacy `agent_result_v1` / `agent-result.json` compatibility surfaces, +runtime trace/evidence fields, the Release Evidence Packet, `reviewer_summary`, +`verifier_summary`, `capability_review`, non-gating capability diff +projections, and `agents-shipgate skill ...` review output as +supporting/provisional surfaces. They may be useful for routing and review, but +they do not replace the gate above and must not introduce a second verdict. + `agents-shipgate preflight --workspace . --plan - --json` is a proactive routing surface for coding agents before edits. It accepts a single `PreflightPlanV1` object with `changed_files[]`, optional `diff_text`, @@ -430,7 +447,13 @@ agents-shipgate findings --from agents-shipgate-reports/report.json \ The command reads active findings by default; add `--include-suppressed` when a reviewer needs suppressed entries in the same provenance summary. -For reviewer-shaped output, also read the **Release Evidence Packet** at `agents-shipgate-reports/packet.{md,json,html}` (and `packet.pdf` when the `[pdf]` extras are installed). Packet outputs are redacted by the same default privacy layer as the report. The packet has fixed reviewer sections governed by [`docs/packet-schema.v0.7.json`](packet-schema.v0.7.json) — see [STABILITY.md §Release Evidence Packet](../STABILITY.md#release-evidence-packet-v07). +For reviewer-shaped output, also read the **Release Evidence Packet** at +`agents-shipgate-reports/packet.{md,json,html}` (and `packet.pdf` when the +`[pdf]` extras are installed). The packet is a supporting/provisional reviewer +projection, not a second gate. Packet outputs are redacted by the same default +privacy layer as the report. The packet has fixed reviewer sections governed by +[`docs/packet-schema.v0.7.json`](packet-schema.v0.7.json) — see +[STABILITY.md §Release Evidence Packet](../STABILITY.md#release-evidence-packet-v07). Packet schema `0.7` adds capability-linked trace summary and trace refs under `human_in_the_loop`. Packet schema `0.6` preserved the v0.5 `action_surface_diff` section and added two independent additive extensions: diff --git a/docs/agents/claude-code.md b/docs/agents/claude-code.md index 3d6e3be0..87c45aa9 100644 --- a/docs/agents/claude-code.md +++ b/docs/agents/claude-code.md @@ -29,7 +29,8 @@ agents-shipgate verify --workspace . --config shipgate.yaml --base origin/main - Read `agents-shipgate-reports/agent-handoff.json` first, then `verifier.json`, then `verify-run.json`, then `report.json` for reviewer -evidence. +evidence. Legacy `agent-result.json` surfaces are supporting/provisional +compatibility projections for older automation consumers. See [protocol.md](protocol.md) for the state machine, repair loop, policy discovery convention, and MCP read-only boundary. diff --git a/docs/agents/codex.md b/docs/agents/codex.md index 94ac545c..15ecee72 100644 --- a/docs/agents/codex.md +++ b/docs/agents/codex.md @@ -29,7 +29,8 @@ agents-shipgate verify --workspace . --config shipgate.yaml --base origin/main - Read `agents-shipgate-reports/agent-handoff.json` first, then `verifier.json`, then `verify-run.json`, then `report.json` for reviewer -evidence. +evidence. Legacy `agent-result.json` surfaces are supporting/provisional +compatibility projections for older automation consumers. See [protocol.md](protocol.md) for the state machine, repair loop, policy discovery convention, and MCP read-only boundary. diff --git a/docs/agents/cursor.md b/docs/agents/cursor.md index a30f2c03..12264776 100644 --- a/docs/agents/cursor.md +++ b/docs/agents/cursor.md @@ -29,7 +29,8 @@ agents-shipgate verify --workspace . --config shipgate.yaml --base origin/main - Read `agents-shipgate-reports/agent-handoff.json` first, then `verifier.json`, then `verify-run.json`, then `report.json` for reviewer -evidence. +evidence. Legacy `agent-result.json` surfaces are supporting/provisional +compatibility projections for older automation consumers. See [protocol.md](protocol.md) for the state machine, repair loop, policy discovery convention, and MCP read-only boundary. diff --git a/docs/agents/use-with-claude-code.md b/docs/agents/use-with-claude-code.md index 976c589b..46b8d63c 100644 --- a/docs/agents/use-with-claude-code.md +++ b/docs/agents/use-with-claude-code.md @@ -142,7 +142,8 @@ Then read `agents-shipgate-reports/agent-handoff.json` and **lead with `agents-shipgate-reports/report.json`. Read `capability_review.top_changes[]` next for the highest-signal tool/action access changes, and check `controller`, `next_action`, and `fix_task`. Use `verifier.json` only for -detailed controller context. +detailed controller context. Legacy `agent-result.json` surfaces are +supporting/provisional compatibility projections and not the verifier read path. Do **not** claim completion when `merge_verdict` is `blocked`, `insufficient_evidence`, or `human_review_required` unless the user has diff --git a/docs/agents/use-with-codex.md b/docs/agents/use-with-codex.md index b8910359..b3328fb0 100644 --- a/docs/agents/use-with-codex.md +++ b/docs/agents/use-with-codex.md @@ -220,6 +220,8 @@ projection of `release_decision.decision`, which remains the gate in `capability_review.top_changes[]` next to see the highest-signal tool/action access changes, and check `controller`, `next_action`, and `fix_task`. Use `verifier.json` only for detailed controller context. +Legacy `agent-result.json` surfaces are supporting/provisional compatibility +projections and not the verifier read path. Codex must not claim completion when `merge_verdict` is `blocked`, `insufficient_evidence`, or `human_review_required` unless the user has diff --git a/docs/agents/use-with-cursor.md b/docs/agents/use-with-cursor.md index d811251f..2d842f2c 100644 --- a/docs/agents/use-with-cursor.md +++ b/docs/agents/use-with-cursor.md @@ -97,6 +97,8 @@ Read `agents-shipgate-reports/verifier.json` and **lead with `merge_verdict`** which stays the gate in `agents-shipgate-reports/report.json`. Read `capability_review.top_changes[]` next for the highest-signal tool/action access changes, and check `trust_root_touched`, `policy_weakened`, and `fix_task`. +`agent-result.json` is a supporting/provisional compact projection; Cursor should +not read it ahead of `verifier.json`. Cursor must not claim the change is complete when `merge_verdict` is `blocked`, `insufficient_evidence`, or `human_review_required` unless the user has diff --git a/docs/integrations.md b/docs/integrations.md index fdfd74f8..5851ead8 100644 --- a/docs/integrations.md +++ b/docs/integrations.md @@ -98,10 +98,11 @@ Action outputs: The action runs `agents-shipgate verify`, which writes Markdown, JSON, SARIF, packet JSON, verifier JSON, verify-run JSON, and PR-comment Markdown artifacts. It intentionally emits `packet.json` only for the packet; -`pr-comment.md` is the human PR surface. Read `verifier.json` first for `merge_verdict`, -`can_merge_without_human`, `agent_controller`, `first_next_action`, and -`capability_review.top_changes`; read `verify-run.json` for reproducibility -metadata; read `report.json.release_decision.decision` for the gate. +`pr-comment.md` is the human PR surface. Read `agent-handoff.json` first for +the compact agent handoff, `verifier.json` for detailed controller context, +`verify-run.json` for reproducibility metadata, and +`report.json.release_decision.decision` for the gate. Capability diffs and +`capability_review.top_changes` are supporting/provisional review context. Verify never fetches; use `fetch-depth: 0` on checkout or fetch the base ref before the action when `diff_base: target` is set. An explicit `head_ref` is scanned from an isolated archive; without it, the checked-out diff --git a/docs/target-repo-agent-snippets.md b/docs/target-repo-agent-snippets.md index 9ed46553..0d68e851 100644 --- a/docs/target-repo-agent-snippets.md +++ b/docs/target-repo-agent-snippets.md @@ -85,6 +85,8 @@ fetches. Read `agents-shipgate-reports/agent-handoff.json` first for `agents-shipgate-reports/verify-run.json` for reproducibility metadata, and `agents-shipgate-reports/report.json.release_decision.decision` for the release gate. +Legacy `agent-result.json` surfaces, where present, are supporting/provisional +projections and not the CI gate. Auto-apply only high-confidence safe patches. Do not auto-assert approval, confirmation, idempotency, broad-scope, or prohibited-action policy decisions; @@ -191,6 +193,8 @@ fetches. Read `agents-shipgate-reports/agent-handoff.json` first for `agents-shipgate-reports/verify-run.json` for reproducibility metadata, and `agents-shipgate-reports/report.json.release_decision.decision` for the release gate. +Legacy `agent-result.json` surfaces, where present, are supporting/provisional +projections and not the CI gate. Use `apply-patches --confidence high --apply` only for high-confidence safe patches. Approval, confirmation, idempotency, broad-scope, and prohibited-action @@ -277,6 +281,8 @@ fetches. Read `agents-shipgate-reports/agent-handoff.json` first for `agents-shipgate-reports/verify-run.json` for reproducibility metadata, and `agents-shipgate-reports/report.json.release_decision.decision` for the release gate. +Legacy `agent-result.json` surfaces, where present, are supporting/provisional +projections and not the CI gate. Apply only high-confidence safe patches. Do not invent approval, confirmation, or idempotency evidence. diff --git a/llms-full.txt b/llms-full.txt index 9bc8b876..ff0c1921 100644 --- a/llms-full.txt +++ b/llms-full.txt @@ -1043,6 +1043,23 @@ one decision engine. `merge_verdict` is a deterministic projection of `release_decision.decision`, so the two can never disagree. +## Primary vs supporting surfaces + +Primary gates are intentionally narrow. CI gates on +`report.json.release_decision.decision`. Coding agents handling committed PRs +read `agent-handoff.json.gate.merge_verdict` and `controller` first, with +`verifier.json.merge_verdict`, `applicability`, and `agent_controller` as the +authoritative detailed substrate. Everything else in the +verifier/report/packet family is supporting review evidence or a convenience +projection. + +Treat legacy `agent_result_v1` / `agent-result.json` compatibility surfaces, +runtime trace/evidence fields, the Release Evidence Packet, `reviewer_summary`, +`verifier_summary`, `capability_review`, non-gating capability diff +projections, and `agents-shipgate skill ...` review output as +supporting/provisional surfaces. They may be useful for routing and review, but +they do not replace the gate above and must not introduce a second verdict. + `agents-shipgate preflight --workspace . --plan - --json` is a proactive routing surface for coding agents before edits. It accepts a single `PreflightPlanV1` object with `changed_files[]`, optional `diff_text`, @@ -1402,7 +1419,13 @@ agents-shipgate findings --from agents-shipgate-reports/report.json \ The command reads active findings by default; add `--include-suppressed` when a reviewer needs suppressed entries in the same provenance summary. -For reviewer-shaped output, also read the **Release Evidence Packet** at `agents-shipgate-reports/packet.{md,json,html}` (and `packet.pdf` when the `[pdf]` extras are installed). Packet outputs are redacted by the same default privacy layer as the report. The packet has fixed reviewer sections governed by [`docs/packet-schema.v0.7.json`](packet-schema.v0.7.json) — see [STABILITY.md §Release Evidence Packet](../STABILITY.md#release-evidence-packet-v07). +For reviewer-shaped output, also read the **Release Evidence Packet** at +`agents-shipgate-reports/packet.{md,json,html}` (and `packet.pdf` when the +`[pdf]` extras are installed). The packet is a supporting/provisional reviewer +projection, not a second gate. Packet outputs are redacted by the same default +privacy layer as the report. The packet has fixed reviewer sections governed by +[`docs/packet-schema.v0.7.json`](packet-schema.v0.7.json) — see +[STABILITY.md §Release Evidence Packet](../STABILITY.md#release-evidence-packet-v07). Packet schema `0.7` adds capability-linked trace summary and trace refs under `human_in_the_loop`. Packet schema `0.6` preserved the v0.5 `action_surface_diff` section and added two independent additive extensions: diff --git a/plugins/agents-shipgate/skills/agents-shipgate/SKILL.md b/plugins/agents-shipgate/skills/agents-shipgate/SKILL.md index ece7eac2..372a152b 100644 --- a/plugins/agents-shipgate/skills/agents-shipgate/SKILL.md +++ b/plugins/agents-shipgate/skills/agents-shipgate/SKILL.md @@ -5,7 +5,7 @@ description: Use when the user wants to add or run Agents Shipgate — the deter # Agents Shipgate -Agents Shipgate is the deterministic merge gate for AI-generated agent capability changes — a local-first, static Tool-Use Readiness review. It reads `shipgate.yaml` plus local tool sources and writes deterministic reports as Markdown, JSON, SARIF, and Release Evidence Packets. +Agents Shipgate is the deterministic merge gate for AI-generated agent capability changes — a local-first, static Tool-Use Readiness review. It reads `shipgate.yaml` plus local tool sources and writes deterministic reports as Markdown, JSON, SARIF, and supporting Release Evidence Packets. Use this skill when a task touches agent tools, MCP exports, OpenAPI specs, prompts that constrain tool use, permissions/scopes, approval or confirmation policies, `shipgate.yaml`, Shipgate CI, or `agents-shipgate-reports/report.json`. diff --git a/plugins/agents-shipgate/skills/agents-shipgate/references/recipes.md b/plugins/agents-shipgate/skills/agents-shipgate/references/recipes.md index 3f8ff09c..a663b4f5 100644 --- a/plugins/agents-shipgate/skills/agents-shipgate/references/recipes.md +++ b/plugins/agents-shipgate/skills/agents-shipgate/references/recipes.md @@ -117,9 +117,10 @@ Read `agents-shipgate-reports/agent-handoff.json` first. Lead with `next_action`, `controller`, and `fix_task.safe_to_attempt`. Then read `agents-shipgate-reports/verifier.json` for detailed controller context and `agents-shipgate-reports/report.json`; `release_decision.decision` remains the -gate. Use `verifier_summary` only as a composition summary: its `verdict` -mirrors `release_decision.decision` and it adds counts for protected-surface -touches, policy weakening, human acknowledgement, and top reason codes. +gate. `capability_review.top_changes[]` and `verifier_summary` are +supporting/provisional composition summaries: their verdict-like values mirror +`release_decision.decision`, and they add counts for protected-surface touches, +policy weakening, human acknowledgement, and top reason codes. Do not bypass the verifier. Do not suppress findings, lower severity, expand baselines or waivers, remove Shipgate CI, or weaken agent instructions to make diff --git a/plugins/agents-shipgate/skills/agents-shipgate/references/report-reading.md b/plugins/agents-shipgate/skills/agents-shipgate/references/report-reading.md index 4b85b9f7..454258fa 100644 --- a/plugins/agents-shipgate/skills/agents-shipgate/references/report-reading.md +++ b/plugins/agents-shipgate/skills/agents-shipgate/references/report-reading.md @@ -10,8 +10,9 @@ and `agents-shipgate-reports/report.json` for findings. Do not scrape Markdown. 2. `agent-handoff.json.capability_review.top_changes[]`: the highest-signal tool/action or trust-root changes. 3. `agent-handoff.json.next_action` / `controller` / `fix_task`: who acts next and whether a coding agent may safely attempt the fix. 4. `report.json.release_decision.decision`: `blocked`, `review_required`, `insufficient_evidence`, or `passed`; this is the release gate. -5. `release_decision.blockers[]` and `release_decision.review_items[]`. -6. `findings[]`: detailed evidence, source, severity, and remediation. +5. `verifier.json.capability_review.top_changes[]`: supporting/provisional highest-signal tool/action or trust-root changes. +6. `release_decision.blockers[]` and `release_decision.review_items[]`. +7. `findings[]`: detailed evidence, source, severity, and remediation. ## Verifier Summary @@ -27,8 +28,9 @@ When `report_schema_version` is `0.22` or newer, read state; a coding agent must not synthesize acknowledgement. - `top_reason_codes[]`: ranked reason-code counts for concise summaries. -This block is a deterministic projection. It cannot introduce a blocker that -is not already present in `findings[]` and `release_decision`. +This block is a supporting/provisional deterministic projection. It cannot +introduce a blocker that is not already present in `findings[]` and +`release_decision`. ## Per-Finding Action diff --git a/prompts/add-shipgate-to-repo.md b/prompts/add-shipgate-to-repo.md index 083a4e5c..01b03928 100644 --- a/prompts/add-shipgate-to-repo.md +++ b/prompts/add-shipgate-to-repo.md @@ -70,7 +70,7 @@ agent-related PRs should use `agents-shipgate verify` after this adoption step. ```bash $SG scan -c shipgate.yaml --suggest-patches --format json --ci-mode advisory ``` - The report lands at `agents-shipgate-reports/report.json`. The Release Evidence Packet lands at `agents-shipgate-reports/packet.{md,json,html}`. Parse `report.json`; Codex plugin facts, when present, live under `codex_plugin_surface`. + The report lands at `agents-shipgate-reports/report.json`. The supporting Release Evidence Packet lands at `agents-shipgate-reports/packet.{md,json,html}`. Parse `report.json`; Codex plugin facts, when present, live under `codex_plugin_surface`. **Read these first for release gating (v0.8+):** - `release_decision.decision` ∈ `{"blocked", "review_required", "insufficient_evidence", "passed"}` — baseline-aware. This is the gating signal. `insufficient_evidence` (v0.14+) fires when evidence coverage is degraded past threshold; treat unknown future values as `review_required`. @@ -111,7 +111,7 @@ agent-related PRs should use `agents-shipgate verify` after this adoption step. 9. **Report back to the user**: - `release_decision.decision` and `release_decision.reason` (the gating signal — baseline-aware, v0.8+) - Blocker / review-item counts (`len(release_decision.blockers)` / `len(release_decision.review_items)`) - - The path to the Release Evidence Packet (`agents-shipgate-reports/packet.md`) for reviewer-shaped output + - The path to the supporting Release Evidence Packet (`agents-shipgate-reports/packet.md`) for reviewer-shaped output - The top 3 active critical/high findings (use `report.json`, not stdout) - Which patches were applied (count from `apply-patches --json` output's `files`) - Any check IDs the user should investigate first — link to `docs_url` from the finding for full rationale, or use `$SG explain --json` for the same content via CLI diff --git a/prompts/verify-agent-diff.md b/prompts/verify-agent-diff.md index 776c988d..ec706e11 100644 --- a/prompts/verify-agent-diff.md +++ b/prompts/verify-agent-diff.md @@ -68,13 +68,13 @@ work is complete. 5. **Read JSON, not Markdown.** - `agents-shipgate-reports/verifier.json` is the PR/controller artifact. - - Lead with `merge_verdict`, then inspect `capability_review.top_changes[]`, - `first_next_action.actor`, and `fix_task.safe_to_attempt`. + - Lead with `merge_verdict`, `applicability`, and `agent_controller`, then + inspect `first_next_action.actor` and `fix_task.safe_to_attempt`. - `agents-shipgate-reports/report.json` is the release-gate artifact. - `release_decision.decision` is the only gate signal. - - `verifier_summary` is a one-fetch composition for controller output; its - `verdict` mirrors `release_decision.decision` and never gates - independently. + - `capability_review.top_changes[]` and `verifier_summary` are + supporting/provisional composition summaries; verdict-like values mirror + `release_decision.decision` and never gate independently. 6. **Do not bypass the verifier.** Do not suppress findings, lower severity, expand baselines or waivers, remove Shipgate CI, or soften agent @@ -111,7 +111,9 @@ work is complete. - `agents-shipgate-reports/report.json` exists and parses. - `agents-shipgate-reports/verifier.json` exists and parses. - `verifier.json.merge_verdict` is surfaced to the user. -- `capability_review.top_changes[]` is considered before generic findings. +- `applicability` and `agent_controller` are considered before generic findings. +- `capability_review.top_changes[]` is treated as supporting/provisional review + context. - `report.json.release_decision.decision` is surfaced to the user. - If `verifier_summary.protected_surface_touched` or `policy_weakened` is true, the response names the human-review requirement. diff --git a/shipgate-self.yaml b/shipgate-self.yaml new file mode 100644 index 00000000..3d185038 --- /dev/null +++ b/shipgate-self.yaml @@ -0,0 +1,48 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/ThreeMoonsLab/agents-shipgate/main/docs/manifest-v0.1.json +version: "0.1" + +project: + name: agents-shipgate + owner: Three Moons Lab + repo: ThreeMoonsLab/agents-shipgate + +agent: + name: Agents Shipgate self-governance surfaces + declared_purpose: + - verify the packaged Agents Shipgate Codex plugin surface + - review bundled skill metadata and static plugin declarations + - keep self-dogfood coverage separate from scanner-source CI tests + prohibited_actions: + - execute agent, tool, MCP server, hook, or LLM calls during static scanning + - treat this manifest as coverage for Shipgate Python scanner internals + - weaken Shipgate release policy or CI to make a verifier run pass + instructions_preview: | + This self-dogfood manifest scans the local Codex plugin package that ships + Agents Shipgate workflow instructions. It is intentionally separate from + the root shipgate.yaml marketplace scan. Python scanner implementation + assurance remains covered by the normal CI test, coverage, audit, SBOM, and + release-signing gates. + +environment: + target: production_like + +tool_sources: + - id: agents_shipgate_codex_plugin_package + type: codex_plugin + mode: package + path: plugins/agents-shipgate + +ci: + mode: advisory + fail_on: + - critical + - high + pr_comment: false + upload_artifact: true + +output: + directory: agents-shipgate-reports/self + formats: + - markdown + - json + - sarif diff --git a/skills/agents-shipgate/SKILL.md b/skills/agents-shipgate/SKILL.md index 6b6d2509..e7780a8f 100644 --- a/skills/agents-shipgate/SKILL.md +++ b/skills/agents-shipgate/SKILL.md @@ -76,9 +76,9 @@ For non-GitHub CI (GitLab, CircleCI, Jenkins, Azure Pipelines, Buildkite, Bitbuc - **Installed CLI contract**: when available, run `agents-shipgate contract --json` to verify local schema versions, capability/research surfaces, `release_decision.decision`, and manual-review signal fields. Older installs should use [`docs/agent-contract-current.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/agent-contract-current.md) or upgrade before automating against the local contract command. - **Verifier JSON**: `verifier_schema_version: "0.1"`. Read `merge_verdict`, `can_merge_without_human`, `first_next_action`, `fix_task`, `capability_review.top_changes`, `trust_root_touched`, and `policy_weakened` before summarizing an AI-generated PR. `merge_verdict` is a deterministic projection; the gate remains `report.json.release_decision.decision`. - **Verify run JSON**: `verify-run.json` uses `schema_version: "shipgate.verify_run/v1"` and records stable run identity, subject refs, input hashes, outcome, and artifact hashes. It is the reproducibility artifact for `verify`; do not treat it as a second gate. -- **Report JSON**: `report_schema_version: "0.27"`. Read `release_decision.decision` first for release gating; use `agent_summary` / `findings[].agent_action` for agent routing and `reviewer_summary` for the human-review entry point. v0.27 adds policy-pack distribution metadata (`loaded_policy_packs[].{source,sha256,sha256_status,owner}`) for organization audit while preserving gate behavior. v0.26 added structured evidence gaps (`release_decision.evidence_coverage.evidence_gaps[]` — one actionable remediation row per low-confidence tool or source warning) plus the advisory `suggested-inventory.json` skeleton written next to `report.json`; when the decision is `insufficient_evidence`, follow each gap's `next_action` instead of guessing. v0.25 added opt-in capability-linked local trace/provenance evidence (`capability_runtime_evidence`, `findings[].capability_trace_refs`, and mirrored `ReleaseDecisionItem.capability_trace_refs`) while preserving fingerprints, baselines, and gate behavior. v0.24 added capability-native policy evidence (`findings[].capability_refs`, optional `findings[].capability_policy_evidence`, and mirrored `ReleaseDecisionItem.capability_refs`). v0.23 added semantic metadata to `capability_change` members while preserving existing buckets. v0.22 added the verifier-cycle blocks `capability_change`, `protected_surface_changes`, `effective_policy`, `human_ack`, and `verifier_summary` — all reviewer-facing projections that never gate independently (`release_decision.decision` stays the only gate). To remove heuristic findings from the active gate, rerun scan with `--no-heuristics`; filtered findings remain in `findings[]` with `suppressed=true`, and `heuristics_filter` records `enabled`, `excluded_provenance_kinds`, `filtered_finding_count`, and `filtered_by_kind`; `runtime_trace` findings are not filtered as heuristics. To inspect provenance without changing gate behavior, use `agents-shipgate findings --from agents-shipgate-reports/report.json --provenance-kind keyword_heuristic,regex_heuristic,runtime_trace --json`. Do not gate on `summary.status`; it is legacy and baseline-blind. The full field list lives in [`docs/agent-contract-current.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/agent-contract-current.md#read-these-first-for-release-gating), and reports validate against [`docs/report-schema.v0.27.json`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/report-schema.v0.27.json). -- **Release Evidence Packet**: `agents-shipgate-reports/packet.{md,json,html}` (and `packet.pdf` with the `[pdf]` extras) is emitted alongside the report by default. The packet has fixed reviewer sections governed by [`docs/packet-schema.v0.7.json`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/packet-schema.v0.7.json) (latest; v0.7 adds capability-linked local trace evidence summary and trace refs under `human_in_the_loop`). See [STABILITY.md §Release Evidence Packet](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/STABILITY.md#release-evidence-packet-v07). Use the packet for reviewer-shaped output; use the report for finding details. -- **Capability standard**: `agents-shipgate capability export` emits a stable static capability lock (`capability_lock_schema_version: "0.2"`) and `agents-shipgate capability diff` emits a stable semantic diff (`capability_lock_diff_schema_version: "0.3"`). These artifacts are non-gating, exclude runtime trace evidence, and are documented in [`docs/capability-standard.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/capability-standard.md). +- **Report JSON**: `report_schema_version: "0.27"`. Read `release_decision.decision` first for release gating; use `agent_summary` / `findings[].agent_action` for agent routing and `reviewer_summary` for the human-review entry point. v0.27 adds policy-pack distribution metadata (`loaded_policy_packs[].{source,sha256,sha256_status,owner}`) for organization audit while preserving gate behavior. v0.26 added structured evidence gaps (`release_decision.evidence_coverage.evidence_gaps[]` — one actionable remediation row per low-confidence tool or source warning) plus the advisory `suggested-inventory.json` skeleton written next to `report.json`; when the decision is `insufficient_evidence`, follow each gap's `next_action` instead of guessing. v0.25 added opt-in capability-linked local trace/provenance evidence (`capability_runtime_evidence`, `findings[].capability_trace_refs`, and mirrored `ReleaseDecisionItem.capability_trace_refs`) while preserving fingerprints, baselines, and gate behavior. v0.24 added capability-native policy evidence (`findings[].capability_refs`, optional `findings[].capability_policy_evidence`, and mirrored `ReleaseDecisionItem.capability_refs`). v0.23 added semantic metadata to `capability_change` members while preserving existing buckets. v0.22 added the verifier-cycle blocks `capability_change`, `protected_surface_changes`, `effective_policy`, `human_ack`, and `verifier_summary` — all reviewer-facing projections that never gate independently (`release_decision.decision` stays the only gate). `reviewer_summary`, `verifier_summary`, runtime trace/evidence fields, and non-gating capability diff projections are supporting/provisional explanatory context. To remove heuristic findings from the active gate, rerun scan with `--no-heuristics`; filtered findings remain in `findings[]` with `suppressed=true`, and `heuristics_filter` records `enabled`, `excluded_provenance_kinds`, `filtered_finding_count`, and `filtered_by_kind`; `runtime_trace` findings are not filtered as heuristics. To inspect provenance without changing gate behavior, use `agents-shipgate findings --from agents-shipgate-reports/report.json --provenance-kind keyword_heuristic,regex_heuristic,runtime_trace --json`. Do not gate on `summary.status`; it is legacy and baseline-blind. The full field list lives in [`docs/agent-contract-current.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/agent-contract-current.md#read-these-first-for-release-gating), and reports validate against [`docs/report-schema.v0.27.json`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/report-schema.v0.27.json). +- **Release Evidence Packet**: `agents-shipgate-reports/packet.{md,json,html}` (and `packet.pdf` with the `[pdf]` extras) is emitted alongside the report by default as a supporting/provisional reviewer artifact. The packet has fixed reviewer sections governed by [`docs/packet-schema.v0.7.json`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/packet-schema.v0.7.json) (latest; v0.7 adds capability-linked local trace evidence summary and trace refs under `human_in_the_loop`). See [STABILITY.md §Release Evidence Packet](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/STABILITY.md#release-evidence-packet-v07). Use the packet for reviewer-shaped output; use the report for finding details. +- **Capability standard**: `agents-shipgate capability export` emits a stable static capability lock (`capability_lock_schema_version: "0.2"`) and `agents-shipgate capability diff` emits a stable semantic diff (`capability_lock_diff_schema_version: "0.3"`). These artifacts are supporting/provisional, non-gating, exclude runtime trace evidence, and are documented in [`docs/capability-standard.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/capability-standard.md). - **Governance benchmark**: `benchmark/agent-pr-governance/cases.yaml` and `scripts/run_governance_benchmark.py` are the stable research benchmark substrate (`governance_benchmark_result_schema_version: "0.2"`), not a release gate. See [`docs/governance-benchmark.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/governance-benchmark.md). - **Single source of truth for the contract**: [`docs/agent-contract-current.md`](https://github.com/ThreeMoonsLab/agents-shipgate/blob/main/docs/agent-contract-current.md). When the schema bumps, that file updates first. - **Exit codes**: `0` pass, `2` config error, `3` parse error, `4` other error, `20` strict-mode gate failure. diff --git a/skills/agents-shipgate/prompts/add-shipgate-to-repo.md b/skills/agents-shipgate/prompts/add-shipgate-to-repo.md index 083a4e5c..01b03928 100644 --- a/skills/agents-shipgate/prompts/add-shipgate-to-repo.md +++ b/skills/agents-shipgate/prompts/add-shipgate-to-repo.md @@ -70,7 +70,7 @@ agent-related PRs should use `agents-shipgate verify` after this adoption step. ```bash $SG scan -c shipgate.yaml --suggest-patches --format json --ci-mode advisory ``` - The report lands at `agents-shipgate-reports/report.json`. The Release Evidence Packet lands at `agents-shipgate-reports/packet.{md,json,html}`. Parse `report.json`; Codex plugin facts, when present, live under `codex_plugin_surface`. + The report lands at `agents-shipgate-reports/report.json`. The supporting Release Evidence Packet lands at `agents-shipgate-reports/packet.{md,json,html}`. Parse `report.json`; Codex plugin facts, when present, live under `codex_plugin_surface`. **Read these first for release gating (v0.8+):** - `release_decision.decision` ∈ `{"blocked", "review_required", "insufficient_evidence", "passed"}` — baseline-aware. This is the gating signal. `insufficient_evidence` (v0.14+) fires when evidence coverage is degraded past threshold; treat unknown future values as `review_required`. @@ -111,7 +111,7 @@ agent-related PRs should use `agents-shipgate verify` after this adoption step. 9. **Report back to the user**: - `release_decision.decision` and `release_decision.reason` (the gating signal — baseline-aware, v0.8+) - Blocker / review-item counts (`len(release_decision.blockers)` / `len(release_decision.review_items)`) - - The path to the Release Evidence Packet (`agents-shipgate-reports/packet.md`) for reviewer-shaped output + - The path to the supporting Release Evidence Packet (`agents-shipgate-reports/packet.md`) for reviewer-shaped output - The top 3 active critical/high findings (use `report.json`, not stdout) - Which patches were applied (count from `apply-patches --json` output's `files`) - Any check IDs the user should investigate first — link to `docs_url` from the finding for full rationale, or use `$SG explain --json` for the same content via CLI diff --git a/skills/agents-shipgate/prompts/verify-agent-diff.md b/skills/agents-shipgate/prompts/verify-agent-diff.md index 776c988d..ec706e11 100644 --- a/skills/agents-shipgate/prompts/verify-agent-diff.md +++ b/skills/agents-shipgate/prompts/verify-agent-diff.md @@ -68,13 +68,13 @@ work is complete. 5. **Read JSON, not Markdown.** - `agents-shipgate-reports/verifier.json` is the PR/controller artifact. - - Lead with `merge_verdict`, then inspect `capability_review.top_changes[]`, - `first_next_action.actor`, and `fix_task.safe_to_attempt`. + - Lead with `merge_verdict`, `applicability`, and `agent_controller`, then + inspect `first_next_action.actor` and `fix_task.safe_to_attempt`. - `agents-shipgate-reports/report.json` is the release-gate artifact. - `release_decision.decision` is the only gate signal. - - `verifier_summary` is a one-fetch composition for controller output; its - `verdict` mirrors `release_decision.decision` and never gates - independently. + - `capability_review.top_changes[]` and `verifier_summary` are + supporting/provisional composition summaries; verdict-like values mirror + `release_decision.decision` and never gate independently. 6. **Do not bypass the verifier.** Do not suppress findings, lower severity, expand baselines or waivers, remove Shipgate CI, or soften agent @@ -111,7 +111,9 @@ work is complete. - `agents-shipgate-reports/report.json` exists and parses. - `agents-shipgate-reports/verifier.json` exists and parses. - `verifier.json.merge_verdict` is surfaced to the user. -- `capability_review.top_changes[]` is considered before generic findings. +- `applicability` and `agent_controller` are considered before generic findings. +- `capability_review.top_changes[]` is treated as supporting/provisional review + context. - `report.json.release_decision.decision` is surfaced to the user. - If `verifier_summary.protected_surface_touched` or `policy_weakened` is true, the response names the human-review requirement. diff --git a/src/agents_shipgate/cli/discovery/agent_instructions/renderers/agents_md.py b/src/agents_shipgate/cli/discovery/agent_instructions/renderers/agents_md.py index 786992f5..a995cd5c 100644 --- a/src/agents_shipgate/cli/discovery/agent_instructions/renderers/agents_md.py +++ b/src/agents_shipgate/cli/discovery/agent_instructions/renderers/agents_md.py @@ -66,6 +66,8 @@ def render_block() -> str: `agents-shipgate-reports/verify-run.json` for reproducibility metadata, and `agents-shipgate-reports/report.json.release_decision.decision` for the release gate. +Legacy `agent-result.json` surfaces, where present, are supporting/provisional +projections and not the CI gate. Auto-apply only high-confidence safe patches. Do not auto-assert approval, confirmation, idempotency, broad-scope, or prohibited-action policy decisions; diff --git a/src/agents_shipgate/cli/discovery/agent_instructions/renderers/claude_md.py b/src/agents_shipgate/cli/discovery/agent_instructions/renderers/claude_md.py index c01b6de1..7627f71c 100644 --- a/src/agents_shipgate/cli/discovery/agent_instructions/renderers/claude_md.py +++ b/src/agents_shipgate/cli/discovery/agent_instructions/renderers/claude_md.py @@ -57,6 +57,8 @@ def render_block() -> str: `agents-shipgate-reports/verify-run.json` for reproducibility metadata, and `agents-shipgate-reports/report.json.release_decision.decision` for the release gate. +Legacy `agent-result.json` surfaces, where present, are supporting/provisional +projections and not the CI gate. Use `apply-patches --confidence high --apply` only for high-confidence safe patches. Approval, confirmation, idempotency, broad-scope, and prohibited-action diff --git a/src/agents_shipgate/cli/discovery/agent_instructions/renderers/cursor.py b/src/agents_shipgate/cli/discovery/agent_instructions/renderers/cursor.py index 7c2fd6e8..74efee6c 100644 --- a/src/agents_shipgate/cli/discovery/agent_instructions/renderers/cursor.py +++ b/src/agents_shipgate/cli/discovery/agent_instructions/renderers/cursor.py @@ -87,6 +87,8 @@ def render_file() -> str: `agents-shipgate-reports/verify-run.json` for reproducibility metadata, and `agents-shipgate-reports/report.json.release_decision.decision` for the release gate. +Legacy `agent-result.json` surfaces, where present, are supporting/provisional +projections and not the CI gate. Apply only high-confidence safe patches. Do not invent approval, confirmation, or idempotency evidence. diff --git a/src/agents_shipgate/cli/skill.py b/src/agents_shipgate/cli/skill.py index 752648f5..562c7042 100644 --- a/src/agents_shipgate/cli/skill.py +++ b/src/agents_shipgate/cli/skill.py @@ -12,7 +12,10 @@ from agents_shipgate.skill.runner import run_skill_review skill_app = typer.Typer( - help="Lint and security-review agent skill and instruction artifacts.", + help=( + "Supporting/provisional lint and security review for agent skill " + "and instruction artifacts." + ), no_args_is_help=True, ) diff --git a/src/agents_shipgate/cli/verify/orchestrator.py b/src/agents_shipgate/cli/verify/orchestrator.py index dfc899dd..64eb8f01 100644 --- a/src/agents_shipgate/cli/verify/orchestrator.py +++ b/src/agents_shipgate/cli/verify/orchestrator.py @@ -111,6 +111,66 @@ def run_verify( verify_run_path = out_dir / "verify-run.json" pr_comment_path = out_dir / "pr-comment.md" + if not config_path.is_file(): + trigger = evaluate( + paths=[], + diff_text="", + manifest_present=False, + user_requested=True, + ) + message = ( + f"Shipgate config not found at {_display_path(config_path, git_root)}. " + "Correct --config, or run `agents-shipgate verify --preview --json` " + "and `agents-shipgate detect --workspace . --json` before initializing." + ) + verifier = _build_verifier( + git_root=git_root, + config_path=config_path, + base=base, + head=head, + changed_files=[], + diff_text="", + trigger=trigger, + base_status="not_requested", + base_tree=None, + base_report=None, + base_notes=[], + report=None, + head_status="failed", + head_exit_code=2, + out_dir=out_dir, + ci_mode=ci_mode, + headline_override=message, + human_review_override=VerifierHumanReview(required=True, why=message), + first_next_action_override=VerifierNextAction( + actor="coding_agent", + kind="command", + command="agents-shipgate verify --preview --json", + why=( + "Shipgate could not find the configured manifest; preview or " + "detect the workspace, then correct --config or initialize " + "shipgate.yaml." + ), + ), + ) + _remove_scan_artifacts(out_dir) + _write_artifacts( + verifier, + verifier_path, + verify_run_path, + pr_comment_path, + report=None, + git_root=git_root, + config_path=config_path, + baseline_path=baseline_path, + policy_pack_paths=policy_pack_paths or [], + plugins_enabled=plugins_enabled, + no_heuristics=no_heuristics, + fail_on=fail_on, + pr_comment_style=pr_comment_style, + ) + return verifier, None, 2 + changed_files: list[str] = [] diff_text = "" base_status: VerifierBaseStatus = "not_requested" @@ -883,6 +943,9 @@ def _build_verifier( out_dir: Path, ci_mode: str | None = None, preview: bool = False, + headline_override: str | None = None, + human_review_override: VerifierHumanReview | None = None, + first_next_action_override: VerifierNextAction | None = None, ) -> VerifierArtifact: release_decision_model = report.release_decision if report is not None else None release_decision = ( @@ -900,7 +963,7 @@ def _build_verifier( applicability = applicability_for(decision=decision, head_status=head_status) agent_summary_model = report.agent_summary if report is not None else None capability_review = build_capability_review(report) if report is not None else None - human_review = _human_review( + human_review = human_review_override or _human_review( merge_verdict=merge_verdict, release_decision=release_decision_model, capability_review=capability_review, @@ -917,7 +980,7 @@ def _build_verifier( release_decision=release_decision_model, capability_review=capability_review, ) - headline = _verifier_headline( + headline = headline_override or _verifier_headline( report=report, merge_verdict=merge_verdict, head_status=head_status, @@ -978,7 +1041,8 @@ def _build_verifier( can_merge_without_human=can_merge, headline=headline, human_review=human_review, - first_next_action=_first_next_action( + first_next_action=first_next_action_override + or _first_next_action( merge_verdict=merge_verdict, fix_task=fix_task, agent_summary=agent_summary_model, @@ -1023,6 +1087,26 @@ def _artifact_paths( } +def _remove_scan_artifacts(out_dir: Path) -> None: + for name in ( + "report.md", + "report.json", + "report.sarif", + "packet.md", + "packet.json", + "packet.html", + "packet.pdf", + "capabilities.lock.json", + "base.capabilities.lock.json", + "capability-lock-diff.json", + "capability-lock-diff.md", + ): + path = out_dir / name + if path.is_file() or path.is_symlink(): + with contextlib.suppress(OSError): + path.unlink() + + def _write_artifacts( verifier: VerifierArtifact, verifier_path: Path, diff --git a/src/agents_shipgate/schemas/contract.py b/src/agents_shipgate/schemas/contract.py index ac5aa9a5..ca883036 100644 --- a/src/agents_shipgate/schemas/contract.py +++ b/src/agents_shipgate/schemas/contract.py @@ -200,6 +200,7 @@ ) VERIFIER_READ_ORDER: tuple[str, ...] = ( "merge_verdict", + "applicability", "can_merge_without_human", "first_next_action", "fix_task", diff --git a/tests/test_agent_instructions_apply.py b/tests/test_agent_instructions_apply.py index 9fc12a27..609c0c9a 100644 --- a/tests/test_agent_instructions_apply.py +++ b/tests/test_agent_instructions_apply.py @@ -201,8 +201,9 @@ def test_local_contract_renderer_has_required_fields() -> None: assert payload["host_grants_inventory_schema_version"] == "0.1" assert payload["gating_signal"] == "release_decision.decision" assert payload["default_paths"]["local_contract"] == ".shipgate/agent-contract.json" - assert payload["verifier_read_order"][:5] == [ + assert payload["verifier_read_order"][:6] == [ "merge_verdict", + "applicability", "can_merge_without_human", "first_next_action", "fix_task", diff --git a/tests/test_agent_instructions_renderers.py b/tests/test_agent_instructions_renderers.py index 0b245058..3954e853 100644 --- a/tests/test_agent_instructions_renderers.py +++ b/tests/test_agent_instructions_renderers.py @@ -45,13 +45,13 @@ REPO_ROOT = Path(__file__).resolve().parent.parent EXPECTED_CLAUDE_CODE_SKILL_RENDER_SHA256 = { ".claude/skills/agents-shipgate/SKILL.md": ( - "1e97a37f354f4ffd20078179a0f2d2357f3bf483f98b763d19f44a9ea208526b" + "c2b0882af212c091d1b94c6c838ab312e25455057cf57e994a49c93d84646273" ), ".claude/skills/agents-shipgate/ci-recipes/advisory-pr-comment.yml": ( "99b2acfbd9dfc6653a6bbee268b83f1e2d4297829636eba662d9f4ad6fa35423" ), ".claude/skills/agents-shipgate/prompts/add-shipgate-to-repo.md": ( - "51f63536f13e251f922b325ba0d9b536ee698c0d12266b94e5961ad297de3ff5" + "b8403d6e873fbc343eb3677fca1e117faef1ec3743befae1a1fe0bf1e5ea003d" ), ".claude/skills/agents-shipgate/prompts/decide-shipgate-relevance.md": ( "03df378c4dae05b0d7da558b3a7e868de4d1bcba5f55744615b1c2290a13879e" @@ -75,12 +75,12 @@ "992122338eba26ae5d8056b9658117d718a6b477b9928c2a438dd449b5effb68" ), ".claude/skills/agents-shipgate/prompts/verify-agent-diff.md": ( - "577100cabad0d0182dd8908209d985d2a041a1c1d42be55705085c796a0068d5" + "919059f86649c7098a75922123c988b819da3094d5bb42ac1737af25e81604de" ), } EXPECTED_CODEX_SKILL_RENDER_SHA256 = { ".agents/skills/agents-shipgate/SKILL.md": ( - "d4c306638e8c269073e8bd8b1dcfc2dc642c7561a905ebd3be14c8a32a12699d" + "bf711ad6209b4a7ea5030bb97b3c0d1ce848dc4255f868bea2329bd06f8a9999" ), ".agents/skills/agents-shipgate/agents/openai.yaml": ( "aa511e933ff663dcd1e0d2af3da2a7101206ce2bb1bb98c4dae801bb3f4e42ef" @@ -89,10 +89,10 @@ "16894ce679eb55c69213070775cb265f0775ad7ff1cd08091a5c57627950871b" ), ".agents/skills/agents-shipgate/references/recipes.md": ( - "3e55caf7cde63334278d3843dd5b988e40519768d6d09019d1c74be650336085" + "d1676a96e803a9526d715a58f458174bcb661d5c54156ecb823b0bd77bb35775" ), ".agents/skills/agents-shipgate/references/report-reading.md": ( - "ac1e4760ea72d4ff8e961484a1777c8a1392ac973e1812faaeee8ebc63470880" + "6d2848f3436f6e246bf553e6cf061c990888d6ff39eb82fec9a41f291b2e94fe" ), } diff --git a/tests/test_agent_mode.py b/tests/test_agent_mode.py index 99220014..262a6fdd 100644 --- a/tests/test_agent_mode.py +++ b/tests/test_agent_mode.py @@ -178,6 +178,25 @@ def _set_origin_main(repo: Path) -> None: def _docs_only_repo(tmp_path: Path) -> Path: repo = _init_repo(tmp_path) + (repo / "shipgate.yaml").write_text( + """ +version: "0.1" +project: + name: test +agent: + name: test-agent + declared_purpose: + - test +environment: + target: local +tool_sources: + - id: tools + type: mcp + path: tools.json +""".lstrip(), + encoding="utf-8", + ) + (repo / "tools.json").write_text('{"tools":[]}\n', encoding="utf-8") (repo / "README.md").write_text("base\n", encoding="utf-8") _commit_all(repo, "base") _set_origin_main(repo) @@ -209,8 +228,8 @@ def test_verify_json_shortcut_prints_verifier_artifact(tmp_path: Path) -> None: assert result.exit_code == 0, result.output payload = json.loads(result.output) assert payload["verifier_schema_version"] == "0.1" - assert payload["merge_verdict"] == "mergeable" - assert payload["can_merge_without_human"] is True + assert payload["merge_verdict"] == "human_review_required" + assert payload["can_merge_without_human"] is False # Full artifacts still land on disk for the documented file contract. assert (repo / "agents-shipgate-reports" / "verifier.json").is_file() assert (repo / "agents-shipgate-reports" / "verify-run.json").is_file() @@ -258,8 +277,9 @@ def test_verify_format_json_still_prints_full_verifier_artifact( assert result.exit_code == 0, result.output payload = json.loads(result.output) assert payload["verifier_schema_version"] == "0.1" - assert payload["head_status"] == "skipped" - assert payload["trigger"]["run_shipgate"] is False + assert payload["head_status"] == "succeeded" + assert payload["trigger"]["run_shipgate"] is True + assert payload["trigger"]["force_run"] is True def test_verify_agent_environment_defaults_to_verifier_json( @@ -273,7 +293,7 @@ def test_verify_agent_environment_defaults_to_verifier_json( assert result.exit_code == 0, result.output payload = json.loads(result.output) assert payload["verifier_schema_version"] == "0.1" - assert payload["merge_verdict"] == "mergeable" + assert payload["merge_verdict"] == "human_review_required" def test_verify_without_agent_environment_defaults_to_text( diff --git a/tests/test_github_action_outputs.py b/tests/test_github_action_outputs.py index 478ed079..2364a555 100644 --- a/tests/test_github_action_outputs.py +++ b/tests/test_github_action_outputs.py @@ -206,6 +206,30 @@ def test_action_outputs_include_verify_run_and_agent_controller_fields(tmp_path: assert outputs["agent_controller_stop_reason"] == "human_review_required" assert outputs["agent_controller_completion_allowed"] == "false" +def test_action_outputs_do_not_allow_failed_missing_config_verify( + tmp_path: Path, +) -> None: + output_dir = tmp_path / "agents-shipgate-reports" + output_dir.mkdir() + _write_json( + output_dir / "verifier.json", + { + "head_status": "failed", + "head_exit_code": 2, + "merge_verdict": "unknown", + "can_merge_without_human": False, + "headline": "Shipgate config not found at missing.yaml.", + "trigger": {"run_shipgate": False}, + }, + ) + + outputs = extract_outputs(output_dir) + + assert outputs["decision"] == "" + assert outputs["verifier_verdict"] == "failed" + assert outputs["merge_verdict"] == "unknown" + assert outputs["can_merge_without_human"] == "false" + def test_step_summary_leads_with_verifier_merge_state( tmp_path: Path, diff --git a/tests/test_public_surface_contract.py b/tests/test_public_surface_contract.py index 8063e3bd..d1e7c104 100644 --- a/tests/test_public_surface_contract.py +++ b/tests/test_public_surface_contract.py @@ -1460,12 +1460,25 @@ def test_well_known_seo_geo_positioning_fields_are_pinned(): assert data.get("static_scan_fixture_run") == ( "agents-shipgate fixture run support_refund_agent" ) - assert data.get("verifier_read_order", [])[:5] == [ + assert data.get("verifier_read_order", [])[:7] == [ "merge_verdict", + "applicability", "can_merge_without_human", "first_next_action", "fix_task", "capability_review.top_changes", + "agent_controller", + ] + assert data.get("supporting_provisional_surfaces", []) == [ + "agent_result", + "agent_decision", + "release_evidence_packet", + "reviewer_summary", + "verifier_summary", + "capability_review", + "runtime_trace_evidence", + "capability_diff_projections", + "skill_review", ] recommended_topics = data.get("recommended_github_topics", []) @@ -1805,6 +1818,31 @@ def test_pre_commit_hook_regex_skips_docs_only_paths(): ) +def test_self_dogfood_manifest_scans_codex_plugin_package() -> None: + """The internal self-dogfood gate must stay on supported static surfaces. + + It intentionally scans the shipped Codex plugin package, not the Python + scanner implementation. Scanner-source assurance lives in normal CI. + """ + import yaml + + manifest = yaml.safe_load(_read("shipgate-self.yaml")) + assert manifest["tool_sources"] == [ + { + "id": "agents_shipgate_codex_plugin_package", + "type": "codex_plugin", + "mode": "package", + "path": "plugins/agents-shipgate", + } + ] + assert manifest["output"]["directory"] == "agents-shipgate-reports/self" + + workflow = _read(".github/workflows/agents-shipgate-self.yml") + assert "config: shipgate-self.yaml" in workflow + assert "verify_mode: verify" in workflow + assert "fail_on_merge_verdicts: blocked" in workflow + + def test_pre_commit_local_docs_show_same_path_trigger_clauses(): """The copy-paste `repo: local` snippet must not lag the root hook. diff --git a/tests/test_verifier_scenarios.py b/tests/test_verifier_scenarios.py index cd9991dc..24f39605 100644 --- a/tests/test_verifier_scenarios.py +++ b/tests/test_verifier_scenarios.py @@ -257,7 +257,7 @@ def test_scenario_agent_weakens_shipgate_policy_touches_trust_root( assert payload["capability_review"]["trust_root_touched"] is True -def test_scenario_docs_only_no_shipgate_skips(tmp_path: Path) -> None: +def test_scenario_docs_only_no_shipgate_fails_closed(tmp_path: Path) -> None: repo = _init_repo(tmp_path) (repo / "README.md").write_text("hello\n", encoding="utf-8") _commit(repo, "base docs") @@ -265,12 +265,31 @@ def test_scenario_docs_only_no_shipgate_skips(tmp_path: Path) -> None: (repo / "README.md").write_text("hello world\n", encoding="utf-8") _commit(repo, "docs only") - payload = _verify(repo) + result = runner.invoke( + app, + [ + "verify", + "--workspace", + str(repo), + "--config", + "shipgate.yaml", + "--base", + "origin/main", + "--head", + "HEAD", + "--format", + "json", + ], + ) + assert result.exit_code == 2, result.output + payload = json.loads(result.output) assert payload["trigger"]["should_run"] is False - assert payload["head_status"] == "skipped" - assert payload["merge_verdict"] == "mergeable" - assert payload["can_merge_without_human"] is True + assert payload["head_status"] == "failed" + assert payload["merge_verdict"] == "unknown" + assert payload["applicability"] == "unknown" + assert payload["can_merge_without_human"] is False + assert not (repo / "agents-shipgate-reports" / "report.json").exists() def test_scenario_docs_only_with_shipgate_yaml_force_runs(tmp_path: Path) -> None: diff --git a/tests/test_verify.py b/tests/test_verify.py index f2299d66..a7fb57c7 100644 --- a/tests/test_verify.py +++ b/tests/test_verify.py @@ -57,13 +57,14 @@ runner = CliRunner() -def test_verify_trigger_skip_writes_lightweight_artifacts(tmp_path: Path) -> None: - repo = _init_repo(tmp_path) - (repo / "README.md").write_text("base\n", encoding="utf-8") - _commit_all(repo, "base") +def test_verify_manifest_present_force_runs_even_docs_only_diff(tmp_path: Path) -> None: + repo = _repo_with_manifest(tmp_path) _set_origin_main(repo) (repo / "README.md").write_text("docs only\n", encoding="utf-8") _commit_all(repo, "docs") + out_dir = repo / "agents-shipgate-reports" + out_dir.mkdir() + (out_dir / "report.json").write_text('{"stale": true}\n', encoding="utf-8") result = runner.invoke( app, @@ -84,21 +85,139 @@ def test_verify_trigger_skip_writes_lightweight_artifacts(tmp_path: Path) -> Non assert result.exit_code == 0, result.output payload = json.loads(result.output) - assert payload["head_status"] == "skipped" - assert payload["trigger"]["run_shipgate"] is False + assert payload["head_status"] == "succeeded" + assert payload["trigger"]["run_shipgate"] is True + assert payload["trigger"]["force_run"] is True verifier_json = repo / "agents-shipgate-reports" / "verifier.json" pr_comment = repo / "agents-shipgate-reports" / "pr-comment.md" assert verifier_json.is_file() assert pr_comment.is_file() - assert not (repo / "agents-shipgate-reports" / "report.json").exists() - assert "report_json" not in payload["artifacts"] - assert payload["base_status"] == "skipped" + assert (repo / "agents-shipgate-reports" / "report.json").is_file() + assert "report_json" in payload["artifacts"] + assert payload["base_status"] == "succeeded" -def test_verify_warns_when_reports_directory_is_staged(tmp_path: Path) -> None: +def test_verify_missing_config_docs_only_diff_fails_closed(tmp_path: Path) -> None: + repo = _init_repo(tmp_path) + (repo / "README.md").write_text("base\n", encoding="utf-8") + _commit_all(repo, "base") + _set_origin_main(repo) + (repo / "README.md").write_text("docs only\n", encoding="utf-8") + _commit_all(repo, "docs") + out_dir = repo / "agents-shipgate-reports" + out_dir.mkdir() + (out_dir / "report.json").write_text('{"stale": true}\n', encoding="utf-8") + + result = runner.invoke( + app, + [ + "verify", + "--workspace", + str(repo), + "--config", + "missing.yaml", + "--base", + "origin/main", + "--head", + "HEAD", + "--format", + "json", + ], + ) + + assert result.exit_code == 2, result.output + payload = json.loads(result.output) + assert payload["head_status"] == "failed" + assert payload["head_exit_code"] == 2 + assert payload["merge_verdict"] == "unknown" + assert payload["applicability"] == "unknown" + assert payload["can_merge_without_human"] is False + assert payload["release_decision"] is None + assert "correct --config" in payload["headline"].lower() + assert payload["human_review"]["required"] is True + assert "verify --preview --json" in payload["human_review"]["why"] + assert payload["first_next_action"]["command"] == ( + "agents-shipgate verify --preview --json" + ) + assert (out_dir / "verifier.json").is_file() + assert (out_dir / "verify-run.json").is_file() + assert (out_dir / "agent-handoff.json").is_file() + assert (out_dir / "pr-comment.md").is_file() + assert not (out_dir / "agent-result.json").exists() + assert not (out_dir / "report.json").exists() + + +def test_verify_json_missing_config_emits_verifier_unknown(tmp_path: Path) -> None: repo = _init_repo(tmp_path) (repo / "README.md").write_text("base\n", encoding="utf-8") _commit_all(repo, "base") + + result = runner.invoke( + app, + [ + "verify", + "--workspace", + str(repo), + "--config", + "missing.yaml", + "--json", + ], + ) + + assert result.exit_code == 2, result.output + payload = json.loads(result.output) + assert payload["merge_verdict"] == "unknown" + assert payload["applicability"] == "unknown" + assert payload["head_status"] == "failed" + assert payload["head_exit_code"] == 2 + assert payload["can_merge_without_human"] is False + assert "schema_version" not in payload + assert not (repo / "agents-shipgate-reports" / "report.json").exists() + + +def test_verify_missing_config_relevant_diff_fails_before_head_scan( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo = _init_repo(tmp_path) + (repo / "tools.json").write_text('{"tools":[]}\n', encoding="utf-8") + _commit_all(repo, "base") + _set_origin_main(repo) + (repo / "tools.json").write_text( + '{"tools":[{"name":"delete_files","description":"Delete files."}]}\n', + encoding="utf-8", + ) + _commit_all(repo, "head") + calls: list[dict[str, Any]] = [] + _patch_run_scan(monkeypatch, calls, head_exit=0) + + result = runner.invoke( + app, + [ + "verify", + "--workspace", + str(repo), + "--config", + "missing.yaml", + "--base", + "origin/main", + "--head", + "HEAD", + "--format", + "json", + ], + ) + + assert result.exit_code == 2, result.output + payload = json.loads(result.output) + assert payload["head_status"] == "failed" + assert payload["merge_verdict"] == "unknown" + assert payload["can_merge_without_human"] is False + assert calls == [] + + +def test_verify_warns_when_reports_directory_is_staged(tmp_path: Path) -> None: + repo = _repo_with_manifest(tmp_path) _set_origin_main(repo) (repo / "README.md").write_text("docs only\n", encoding="utf-8") _commit_all(repo, "docs") @@ -128,7 +247,7 @@ def test_verify_warns_when_reports_directory_is_staged(tmp_path: Path) -> None: ) # Advisory only: the staged-reports nudge never changes the verdict or - # the exit code (this is a trigger-skip, so exit 0 is unchanged). + # the exit code. assert result.exit_code == 0, result.output assert "warning:" in result.output assert "agents-shipgate-reports/" in result.output @@ -138,9 +257,7 @@ def test_verify_warns_when_reports_directory_is_staged(tmp_path: Path) -> None: def test_verify_warns_on_staged_reports_from_subdirectory_workspace( tmp_path: Path, ) -> None: - repo = _init_repo(tmp_path) - (repo / "README.md").write_text("base\n", encoding="utf-8") - _commit_all(repo, "base") + repo = _repo_with_manifest(tmp_path) _set_origin_main(repo) (repo / "README.md").write_text("docs only\n", encoding="utf-8") _commit_all(repo, "docs") @@ -181,9 +298,7 @@ def test_verify_warns_on_staged_reports_from_subdirectory_workspace( def test_verify_no_staged_reports_warning_and_stdout_json_is_clean( tmp_path: Path, ) -> None: - repo = _init_repo(tmp_path) - (repo / "README.md").write_text("base\n", encoding="utf-8") - _commit_all(repo, "base") + repo = _repo_with_manifest(tmp_path) _set_origin_main(repo) (repo / "README.md").write_text("docs only\n", encoding="utf-8") _commit_all(repo, "docs") @@ -209,10 +324,10 @@ def test_verify_no_staged_reports_warning_and_stdout_json_is_clean( # pure JSON for agent consumers. assert "report file(s) staged" not in result.output payload = json.loads(result.output) - assert payload["head_status"] == "skipped" + assert payload["head_status"] == "succeeded" -def test_verify_missing_base_without_manifest_is_unknown_not_mergeable( +def test_verify_missing_config_takes_precedence_over_missing_base( tmp_path: Path, ) -> None: repo = _init_repo(tmp_path) @@ -243,9 +358,10 @@ def test_verify_missing_base_without_manifest_is_unknown_not_mergeable( assert result.exit_code == 2, result.output payload = json.loads(result.output) - assert payload["base_status"] == "ref_missing" + assert payload["base_status"] == "not_requested" assert payload["head_status"] == "failed" assert payload["merge_verdict"] == "unknown" + assert payload["applicability"] == "unknown" assert payload["can_merge_without_human"] is False assert not (repo / "agents-shipgate-reports" / "report.json").exists() diff --git a/tests/test_verify_auto_base.py b/tests/test_verify_auto_base.py index 32130e32..dc9bbee4 100644 --- a/tests/test_verify_auto_base.py +++ b/tests/test_verify_auto_base.py @@ -41,6 +41,25 @@ def _commit_all(repo: Path, message: str) -> None: def _docs_only_repo_with_origin_main(tmp_path: Path) -> Path: repo = _init_repo(tmp_path) + (repo / "shipgate.yaml").write_text( + """ +version: "0.1" +project: + name: test +agent: + name: test-agent + declared_purpose: + - test +environment: + target: local +tool_sources: + - id: tools + type: mcp + path: tools.json +""".lstrip(), + encoding="utf-8", + ) + (repo / "tools.json").write_text('{"tools":[]}\n', encoding="utf-8") (repo / "README.md").write_text("base\n", encoding="utf-8") _commit_all(repo, "base") _git(repo, "update-ref", "refs/remotes/origin/main", "HEAD")