diff --git a/.github/workflows/build_registry.py b/.github/workflows/build_registry.py index 8469b1df..1b596f43 100644 --- a/.github/workflows/build_registry.py +++ b/.github/workflows/build_registry.py @@ -7,17 +7,14 @@ import os import re import sys -import urllib.error -import urllib.request import xml.etree.ElementTree as ET from pathlib import Path from registry_utils import ( - extract_npm_package_name, extract_npm_package_version, - extract_pypi_package_name, normalize_version, should_skip_dir, + validate_distribution_urls, ) try: @@ -48,40 +45,6 @@ PREFERRED_ICON_SIZE = 16 ALLOWED_FILL_STROKE_VALUES = {"currentcolor", "none", "inherit"} -# URL validation -SKIP_URL_VALIDATION = os.environ.get("SKIP_URL_VALIDATION", "").lower() in ( - "1", - "true", - "yes", -) - - -def url_exists(url: str, method: str = "HEAD", retries: int = 3) -> bool: - """Check if a URL exists using HEAD or GET request with retries.""" - import time - - for attempt in range(retries): - try: - req = urllib.request.Request(url, method=method) - req.add_header("User-Agent", "ACP-Registry-Validator/1.0") - with urllib.request.urlopen(req, timeout=15) as response: - return response.status in (200, 301, 302) - except urllib.error.HTTPError as e: - # Some servers don't support HEAD, try GET - if method == "HEAD" and e.code in (403, 405): - return url_exists(url, method="GET", retries=retries - attempt) - if attempt < retries - 1 and e.code in (429, 500, 502, 503, 504): - time.sleep(2**attempt) - continue - return False - except (urllib.error.URLError, TimeoutError, OSError): - if attempt < retries - 1: - time.sleep(2**attempt) - continue - return False - return False - - def extract_version_from_url(url: str) -> str | None: """Extract version from binary archive URL.""" # GitHub releases: /download/v1.0.0/ or /releases/v1.0.0/ @@ -147,44 +110,6 @@ def validate_distribution_versions(agent_version: str, distribution: dict) -> li return errors -def validate_distribution_urls(distribution: dict) -> list[str]: - """Validate that distribution URLs exist.""" - if SKIP_URL_VALIDATION: - return [] - - errors = [] - - # Check binary archive URLs - if "binary" in distribution: - for platform, target in distribution["binary"].items(): - if "archive" in target: - url = target["archive"] - if not url_exists(url): - errors.append(f"Binary archive URL not accessible for {platform}: {url}") - - # Check npm package URLs (registry.npmjs.org) - seen_npm = set() - for dist_type in ("npx",): - if dist_type in distribution: - package = distribution[dist_type].get("package", "") - pkg_name = extract_npm_package_name(package) - if pkg_name and pkg_name not in seen_npm: - seen_npm.add(pkg_name) - npm_url = f"https://registry.npmjs.org/{pkg_name}" - if not url_exists(npm_url): - errors.append(f"npm package not found: {pkg_name}") - - # Check PyPI package URLs - if "uvx" in distribution: - package = distribution["uvx"].get("package", "") - pkg_name = extract_pypi_package_name(package) - pypi_url = f"https://pypi.org/pypi/{pkg_name}/json" - if not url_exists(pypi_url): - errors.append(f"PyPI package not found: {pkg_name}") - - return errors - - def validate_icon_monochrome(root: ET.Element) -> list[str]: """Validate that icon uses currentColor and no hardcoded colors. diff --git a/.github/workflows/registry_utils.py b/.github/workflows/registry_utils.py index 5b26904e..7ac7a67f 100644 --- a/.github/workflows/registry_utils.py +++ b/.github/workflows/registry_utils.py @@ -1,8 +1,12 @@ """Shared utilities for ACP registry scripts.""" import json +import os import re import sys +import time +import urllib.error +import urllib.request from pathlib import Path SKIP_DIRS = { @@ -57,6 +61,80 @@ def normalize_version(version: str) -> str: return ".".join(parts[:3]) +# URL validation: env var lets CI / local runs disable network calls. +SKIP_URL_VALIDATION = os.environ.get("SKIP_URL_VALIDATION", "").lower() in ( + "1", + "true", + "yes", +) + + +def url_exists(url: str, method: str = "HEAD", retries: int = 3) -> bool: + """Check if a URL exists using HEAD or GET request with retries.""" + for attempt in range(retries): + try: + req = urllib.request.Request(url, method=method) + req.add_header("User-Agent", "ACP-Registry-Validator/1.0") + with urllib.request.urlopen(req, timeout=15) as response: + return response.status in (200, 301, 302) + except urllib.error.HTTPError as e: + # Some servers don't support HEAD, try GET + if method == "HEAD" and e.code in (403, 405): + return url_exists(url, method="GET", retries=retries - attempt) + if attempt < retries - 1 and e.code in (429, 500, 502, 503, 504): + time.sleep(2**attempt) + continue + return False + except (urllib.error.URLError, TimeoutError, OSError): + if attempt < retries - 1: + time.sleep(2**attempt) + continue + return False + return False + + +def validate_distribution_urls(distribution: dict) -> list[str]: + """Validate that distribution URLs exist (binary archives, npm, PyPI). + + Returns a list of human-readable error strings (empty on success). + Honors SKIP_URL_VALIDATION env var. + """ + if SKIP_URL_VALIDATION: + return [] + + errors = [] + + # Check binary archive URLs + if "binary" in distribution: + for platform, target in distribution["binary"].items(): + if "archive" in target: + url = target["archive"] + if not url_exists(url): + errors.append(f"Binary archive URL not accessible for {platform}: {url}") + + # Check npm package URLs (registry.npmjs.org) + seen_npm = set() + for dist_type in ("npx",): + if dist_type in distribution: + package = distribution[dist_type].get("package", "") + pkg_name = extract_npm_package_name(package) + if pkg_name and pkg_name not in seen_npm: + seen_npm.add(pkg_name) + npm_url = f"https://registry.npmjs.org/{pkg_name}" + if not url_exists(npm_url): + errors.append(f"npm package not found: {pkg_name}") + + # Check PyPI package URLs + if "uvx" in distribution: + package = distribution["uvx"].get("package", "") + pkg_name = extract_pypi_package_name(package) + pypi_url = f"https://pypi.org/pypi/{pkg_name}/json" + if not url_exists(pypi_url): + errors.append(f"PyPI package not found: {pkg_name}") + + return errors + + def load_quarantine(registry_dir: Path) -> dict[str, str]: """Load quarantine list from registry directory. diff --git a/.github/workflows/tests/test_registry_utils.py b/.github/workflows/tests/test_registry_utils.py index 988b249a..22a5699f 100644 --- a/.github/workflows/tests/test_registry_utils.py +++ b/.github/workflows/tests/test_registry_utils.py @@ -3,6 +3,7 @@ import json import tempfile from pathlib import Path +from unittest.mock import patch from registry_utils import ( extract_npm_package_name, @@ -11,6 +12,7 @@ load_quarantine, normalize_version, should_skip_dir, + validate_distribution_urls, ) @@ -98,6 +100,72 @@ def test_invalid_json(self): assert load_quarantine(Path(d)) == {} +class TestValidateDistributionUrls: + """Distribution URL validation (moved from build_registry.py).""" + + def test_returns_no_errors_when_all_urls_reachable(self): + distribution = { + "binary": { + "darwin-aarch64": { + "archive": "https://example.com/agent-darwin.tar.gz", + "cmd": "./agent", + }, + "linux-x86_64": { + "archive": "https://example.com/agent-linux.tar.gz", + "cmd": "./agent", + }, + } + } + with patch("registry_utils.url_exists", return_value=True): + assert validate_distribution_urls(distribution) == [] + + def test_reports_unreachable_binary_archive(self): + distribution = { + "binary": { + "darwin-aarch64": { + "archive": "https://example.com/ok.tar.gz", + "cmd": "./agent", + }, + "windows-x86_64": { + "archive": "https://example.com/missing-windows.zip", + "cmd": "agent.exe", + }, + } + } + + def fake_url_exists(url, *_args, **_kwargs): + return "missing" not in url + + with patch("registry_utils.url_exists", side_effect=fake_url_exists): + errors = validate_distribution_urls(distribution) + + assert len(errors) == 1 + assert "windows-x86_64" in errors[0] + assert "missing-windows.zip" in errors[0] + + def test_skip_url_validation_env_returns_empty(self, monkeypatch): + monkeypatch.setenv("SKIP_URL_VALIDATION", "1") + # Re-import to pick up the patched env var. + import importlib + + import registry_utils + + importlib.reload(registry_utils) + try: + distribution = { + "binary": { + "darwin-aarch64": { + "archive": "https://example.com/anything.tar.gz", + "cmd": "./agent", + } + } + } + assert registry_utils.validate_distribution_urls(distribution) == [] + finally: + monkeypatch.delenv("SKIP_URL_VALIDATION", raising=False) + importlib.reload(registry_utils) + + class TestShouldSkipDir: def test_skips_hidden_runtime_dirs(self): assert should_skip_dir(".sandbox") diff --git a/.github/workflows/tests/test_update_versions.py b/.github/workflows/tests/test_update_versions.py index a6b17181..4346a7a7 100644 --- a/.github/workflows/tests/test_update_versions.py +++ b/.github/workflows/tests/test_update_versions.py @@ -1,12 +1,20 @@ """Tests for update_versions.py.""" +import json +import tempfile import urllib.error from pathlib import Path from unittest.mock import patch import pytest -from update_versions import check_agent_version, is_prerelease, make_request +from update_versions import ( + VersionUpdate, + apply_update, + check_agent_version, + is_prerelease, + make_request, +) class TestMakeRequestServerErrors: @@ -266,3 +274,95 @@ def test_returns_error_when_sources_have_no_common_version( assert update is None assert error is not None assert error.error == "Version mismatch across distributions: binary=7.2.1, npx=7.2.4" + + +class TestApplyUpdateUrlValidation: + """apply_update reverts and reports skipped when new URLs aren't reachable. + + Regression test for the failure mode where an upstream stops publishing one + platform's binary at a new version (e.g. vtcode 0.105.5 dropped Windows zip), + which previously caused the entire hourly auto-update workflow to fail. + """ + + def _write_agent(self, tmpdir: Path, agent_data: dict) -> Path: + agent_dir = tmpdir / agent_data["id"] + agent_dir.mkdir() + path = agent_dir / "agent.json" + path.write_text(json.dumps(agent_data, indent=2) + "\n") + return path + + def _build_update(self, path: Path, agent_data: dict, new_version: str) -> VersionUpdate: + return VersionUpdate( + agent_id=agent_data["id"], + agent_path=path, + current_version=agent_data["version"], + latest_version=new_version, + distribution_type="binary", + source_url="https://github.com/owner/repo", + ) + + def test_reverts_when_new_binary_url_missing(self): + agent_data = { + "id": "vtlike", + "version": "0.96.14", + "distribution": { + "binary": { + "darwin-aarch64": { + "archive": "https://github.com/owner/repo/releases/download/0.96.14/agent-0.96.14-aarch64-apple-darwin.tar.gz", + "cmd": "./agent", + }, + "windows-x86_64": { + "archive": "https://github.com/owner/repo/releases/download/0.96.14/agent-0.96.14-x86_64-pc-windows-msvc.zip", + "cmd": "agent.exe", + }, + } + }, + } + with tempfile.TemporaryDirectory() as d: + tmpdir = Path(d) + path = self._write_agent(tmpdir, agent_data) + original = path.read_text() + update = self._build_update(path, agent_data, "0.105.5") + + # New version's Windows zip 404s; macOS exists. + def fake_url_exists(url, *_args, **_kwargs): + return "windows" not in url + + with patch("registry_utils.url_exists", side_effect=fake_url_exists): + ok, reason = apply_update(update) + + assert ok is False + assert reason is not None + assert "windows-x86_64" in reason + # File must be byte-identical to its pre-update state. + assert path.read_text() == original + + def test_succeeds_when_all_new_urls_reachable(self): + agent_data = { + "id": "okagent", + "version": "1.0.0", + "distribution": { + "binary": { + "darwin-aarch64": { + "archive": "https://github.com/owner/repo/releases/download/1.0.0/agent-1.0.0-aarch64-apple-darwin.tar.gz", + "cmd": "./agent", + }, + } + }, + } + with tempfile.TemporaryDirectory() as d: + tmpdir = Path(d) + path = self._write_agent(tmpdir, agent_data) + update = self._build_update(path, agent_data, "1.1.0") + + with patch("registry_utils.url_exists", return_value=True): + ok, reason = apply_update(update) + + assert ok is True + assert reason is None + written = json.loads(path.read_text()) + assert written["version"] == "1.1.0" + assert ( + written["distribution"]["binary"]["darwin-aarch64"]["archive"] + == "https://github.com/owner/repo/releases/download/1.1.0/agent-1.1.0-aarch64-apple-darwin.tar.gz" + ) diff --git a/.github/workflows/update_versions.py b/.github/workflows/update_versions.py index 141d1b85..88cb3678 100755 --- a/.github/workflows/update_versions.py +++ b/.github/workflows/update_versions.py @@ -31,6 +31,7 @@ extract_pypi_package_name, load_quarantine, should_skip_dir, + validate_distribution_urls, ) @@ -52,6 +53,20 @@ class UpdateError(NamedTuple): error: str +class SkippedUpdate(NamedTuple): + """A planned update that was reverted because its new URLs were not reachable. + + These are not fatal: the workflow keeps the agent at its current version and + continues applying the rest of the updates so a single broken upstream doesn't + block the whole hourly auto-update cycle. + """ + + agent_id: str + current_version: str + latest_version: str + reason: str + + # Directories to scan for agents AGENT_DIRS = [ ".", # Root directory (active agents) @@ -368,14 +383,23 @@ def check_agent_version( ), None -def apply_update(update: VersionUpdate) -> bool: - """Apply a version update to an agent, updating all distribution types.""" +def apply_update(update: VersionUpdate) -> tuple[bool, str | None]: + """Apply a version update to an agent, updating all distribution types. + + Returns: + (True, None): update was applied and validated. + (False, reason): update was applied then reverted because the new + distribution URLs were unreachable. The agent.json is restored to its + pre-update content. ``reason`` describes which URLs failed. + (False, None): hard I/O / parse failure (caller should treat as fatal). + """ try: with open(update.agent_path) as f: - agent_data = json.load(f) + original_content = f.read() + agent_data = json.loads(original_content) except (json.JSONDecodeError, OSError) as e: print(f"Error reading {update.agent_path}: {e}", file=sys.stderr) - return False + return False, None old_version = agent_data["version"] new_version = update.latest_version @@ -428,10 +452,28 @@ def apply_update(update: VersionUpdate) -> bool: with open(update.agent_path, "w") as f: json.dump(agent_data, f, indent=2) f.write("\n") - return True except OSError as e: print(f"Error writing {update.agent_path}: {e}", file=sys.stderr) - return False + return False, None + + # Validate that the new distribution URLs are reachable. If any URL fails + # (e.g. upstream stopped publishing a Windows binary at this version), revert + # to the original content and report the agent as skipped — the caller will + # carry on with the remaining updates. + url_errors = validate_distribution_urls(distribution) + if url_errors: + try: + with open(update.agent_path, "w") as f: + f.write(original_content) + except OSError as e: + print( + f"Error reverting {update.agent_path} after URL check failed: {e}", + file=sys.stderr, + ) + return False, None + return False, "; ".join(url_errors) + + return True, None def main(): @@ -495,6 +537,51 @@ def main(): if not args.json: print(f"OK ({agent_data.get('version', 'unknown')})") + skipped: list[SkippedUpdate] = [] + + # Apply updates if requested. We do this before assembling the JSON output so + # the JSON can report agents that were reverted because their new URLs 404'd. + if args.apply and updates: + print() + print("Applying updates...") + applied = 0 + failed = 0 + kept_updates: list[VersionUpdate] = [] + for update in updates: + if not args.json: + print(f" Updating {update.agent_id}...", end=" ", flush=True) + ok, skip_reason = apply_update(update) + if ok: + applied += 1 + kept_updates.append(update) + if not args.json: + print("OK") + elif skip_reason: + skipped.append( + SkippedUpdate( + agent_id=update.agent_id, + current_version=update.current_version, + latest_version=update.latest_version, + reason=skip_reason, + ) + ) + if not args.json: + print(f"SKIPPED ({skip_reason})") + else: + failed += 1 + if not args.json: + print("FAILED") + + # Drop reverted agents from the canonical update list so downstream + # consumers (commit message, verify-agents) only see what actually changed. + updates = kept_updates + + print() + summary = f"Applied {applied} updates, {failed} failed" + if skipped: + summary += f", {len(skipped)} skipped (broken upstream URLs)" + print(summary) + # Output results if args.json: result = { @@ -510,15 +597,28 @@ def main(): for u in updates ], "errors": [{"agent_id": e.agent_id, "error": e.error} for e in errors], + "skipped": [ + { + "agent_id": s.agent_id, + "current_version": s.current_version, + "latest_version": s.latest_version, + "reason": s.reason, + } + for s in skipped + ], "up_to_date": up_to_date, } print(json.dumps(result, indent=2)) else: print() print("=" * 60) - print( - f"Summary: {len(updates)} updates, {len(errors)} errors, {len(up_to_date)} up-to-date" + summary_line = ( + f"Summary: {len(updates)} updates, {len(errors)} errors, " + f"{len(up_to_date)} up-to-date" ) + if skipped: + summary_line += f", {len(skipped)} skipped" + print(summary_line) if updates: print() @@ -529,36 +629,24 @@ def main(): f"{u.latest_version} ({u.distribution_type})" ) + if skipped: + print() + print("Skipped (broken upstream URLs, kept at current version):") + for s in skipped: + print( + f" - {s.agent_id}: {s.current_version} -> " + f"{s.latest_version}: {s.reason}" + ) + if errors: print() print("Errors:") for e in errors: print(f" - {e.agent_id}: {e.error}") - # Apply updates if requested - if args.apply and updates: - print() - print("Applying updates...") - applied = 0 - failed = 0 - for update in updates: - if not args.json: - print(f" Updating {update.agent_id}...", end=" ", flush=True) - if apply_update(update): - applied += 1 - if not args.json: - print("OK") - else: - failed += 1 - if not args.json: - print("FAILED") - - print() - print(f"Applied {applied} updates, {failed} failed") - - # Exit with error if any updates failed - if failed > 0: - sys.exit(1) + # Exit with error if any apply attempts had hard I/O failures. + if args.apply and "failed" in locals() and failed > 0: + sys.exit(1) # Exit with special code if updates are available (for CI) if updates and not args.apply: