From a7bddae89c0b5db6d0a7028e2f631c2a045b85a9 Mon Sep 17 00:00:00 2001 From: Davide Gallitelli Date: Wed, 8 Apr 2026 06:51:28 +0000 Subject: [PATCH 1/4] feat(skills): support loading skills from GitHub/Git URLs Add support for remote Git repository URLs as skill sources in AgentSkills, enabling teams to share and reference skills directly from GitHub without manual cloning. Closes #2090 Co-Authored-By: Claude Opus 4.6 (1M context) --- .../vended_plugins/skills/_url_loader.py | 174 +++++++++++ .../vended_plugins/skills/agent_skills.py | 20 +- src/strands/vended_plugins/skills/skill.py | 49 +++ .../skills/test_agent_skills.py | 106 +++++++ .../vended_plugins/skills/test_skill.py | 139 ++++++++- .../vended_plugins/skills/test_url_loader.py | 287 ++++++++++++++++++ 6 files changed, 773 insertions(+), 2 deletions(-) create mode 100644 src/strands/vended_plugins/skills/_url_loader.py create mode 100644 tests/strands/vended_plugins/skills/test_url_loader.py diff --git a/src/strands/vended_plugins/skills/_url_loader.py b/src/strands/vended_plugins/skills/_url_loader.py new file mode 100644 index 000000000..2f44dfcf1 --- /dev/null +++ b/src/strands/vended_plugins/skills/_url_loader.py @@ -0,0 +1,174 @@ +"""Utilities for loading skills from remote Git repository URLs. + +This module provides functions to detect URL-type skill sources, parse +optional version references, clone repositories with shallow depth, and +manage a local cache of cloned skill repositories. +""" + +from __future__ import annotations + +import hashlib +import logging +import re +import shutil +import subprocess +from pathlib import Path + +logger = logging.getLogger(__name__) + +_DEFAULT_CACHE_DIR = Path.home() / ".cache" / "strands" / "skills" + +# Patterns that indicate a string is a URL rather than a local path +_URL_PREFIXES = ("https://", "http://", "git@", "ssh://") + +# Regex to strip .git suffix from URLs before ref parsing +_GIT_SUFFIX = re.compile(r"\.git$") + + +def is_url(source: str) -> bool: + """Check whether a skill source string looks like a remote URL. + + Args: + source: The skill source string to check. + + Returns: + True if the source appears to be a URL. + """ + return any(source.startswith(prefix) for prefix in _URL_PREFIXES) + + +def parse_url_ref(url: str) -> tuple[str, str | None]: + """Parse a skill URL into a clone URL and an optional Git ref. + + Supports an ``@ref`` suffix for specifying a branch, tag, or commit:: + + https://github.com/org/skill-repo@v1.0.0 -> (https://github.com/org/skill-repo, v1.0.0) + https://github.com/org/skill-repo -> (https://github.com/org/skill-repo, None) + https://github.com/org/skill-repo.git@main -> (https://github.com/org/skill-repo.git, main) + git@github.com:org/skill-repo.git@v2 -> (git@github.com:org/skill-repo.git, v2) + + Args: + url: The skill URL, optionally with an ``@ref`` suffix. + + Returns: + Tuple of (clone_url, ref_or_none). + """ + if url.startswith(("https://", "http://", "ssh://")): + # Find the path portion after the host + scheme_end = url.index("//") + 2 + host_end = url.find("/", scheme_end) + if host_end == -1: + return url, None + + path_part = url[host_end:] + + # Strip .git suffix before looking for @ref so that + # "repo.git@v1" is handled correctly + clean_path = _GIT_SUFFIX.sub("", path_part) + had_git_suffix = clean_path != path_part + + if "@" in clean_path: + at_idx = clean_path.rfind("@") + ref = clean_path[at_idx + 1 :] + base_path = clean_path[:at_idx] + if had_git_suffix: + base_path += ".git" + return url[:host_end] + base_path, ref + + return url, None + + if url.startswith("git@"): + # SSH format: git@host:owner/repo.git@ref + # The first @ is part of the SSH URL format. + first_at = url.index("@") + rest = url[first_at + 1 :] + + clean_rest = _GIT_SUFFIX.sub("", rest) + had_git_suffix = clean_rest != rest + + if "@" in clean_rest: + at_idx = clean_rest.rfind("@") + ref = clean_rest[at_idx + 1 :] + base_rest = clean_rest[:at_idx] + if had_git_suffix: + base_rest += ".git" + return url[: first_at + 1] + base_rest, ref + + return url, None + + return url, None + + +def cache_key(url: str, ref: str | None) -> str: + """Generate a deterministic cache directory name from a URL and ref. + + Args: + url: The clone URL. + ref: The optional Git ref. + + Returns: + A short hex digest suitable for use as a directory name. + """ + key_input = f"{url}@{ref}" if ref else url + return hashlib.sha256(key_input.encode()).hexdigest()[:16] + + +def clone_skill_repo( + url: str, + *, + ref: str | None = None, + cache_dir: Path | None = None, +) -> Path: + """Clone a skill repository to a local cache directory. + + Uses ``git clone --depth 1`` for efficiency. If a ``ref`` is provided it + is passed as ``--branch`` (works for branches and tags). Repositories are + cached by a hash of (url, ref) so repeated loads are instant. + + Args: + url: The Git clone URL. + ref: Optional branch or tag to check out. + cache_dir: Override the default cache directory + (``~/.cache/strands/skills/``). + + Returns: + Path to the cloned repository root. + + Raises: + RuntimeError: If the clone fails or ``git`` is not installed. + """ + cache_dir = cache_dir or _DEFAULT_CACHE_DIR + cache_dir.mkdir(parents=True, exist_ok=True) + + key = cache_key(url, ref) + target = cache_dir / key + + if target.exists(): + logger.debug("url=<%s>, ref=<%s> | using cached skill at %s", url, ref, target) + return target + + logger.info("url=<%s>, ref=<%s> | cloning skill repository", url, ref) + + cmd: list[str] = ["git", "clone", "--depth", "1"] + if ref: + cmd.extend(["--branch", ref]) + cmd.extend([url, str(target)]) + + try: + subprocess.run( # noqa: S603 + cmd, + check=True, + capture_output=True, + text=True, + timeout=120, + ) + except subprocess.CalledProcessError as e: + # Clean up any partial clone + if target.exists(): + shutil.rmtree(target) + raise RuntimeError(f"url=<{url}>, ref=<{ref}> | failed to clone skill repository: {e.stderr.strip()}") from e + except FileNotFoundError as e: + raise RuntimeError("git is required to load skills from URLs but was not found on PATH") from e + + logger.debug("url=<%s>, ref=<%s> | cloned to %s", url, ref, target) + return target diff --git a/src/strands/vended_plugins/skills/agent_skills.py b/src/strands/vended_plugins/skills/agent_skills.py index 5e42b9230..2459be120 100644 --- a/src/strands/vended_plugins/skills/agent_skills.py +++ b/src/strands/vended_plugins/skills/agent_skills.py @@ -77,6 +77,7 @@ def __init__( state_key: str = _DEFAULT_STATE_KEY, max_resource_files: int = _DEFAULT_MAX_RESOURCE_FILES, strict: bool = False, + cache_dir: Path | None = None, ) -> None: """Initialize the AgentSkills plugin. @@ -86,11 +87,16 @@ def __init__( - A ``str`` or ``Path`` to a skill directory (containing SKILL.md) - A ``str`` or ``Path`` to a parent directory (containing skill subdirectories) - A ``Skill`` dataclass instance + - A remote Git URL (``https://``, ``git@``, or ``ssh://``) + with optional ``@ref`` suffix for branch/tag pinning state_key: Key used to store plugin state in ``agent.state``. max_resource_files: Maximum number of resource files to list in skill responses. strict: If True, raise on skill validation issues. If False (default), warn and load anyway. + cache_dir: Directory for caching cloned skill repositories. + Defaults to ``~/.cache/strands/skills/``. """ self._strict = strict + self._cache_dir = cache_dir self._skills: dict[str, Skill] = self._resolve_skills(_normalize_sources(skills)) self._state_key = state_key self._max_resource_files = max_resource_files @@ -284,7 +290,8 @@ def _resolve_skills(self, sources: list[SkillSource]) -> dict[str, Skill]: """Resolve a list of skill sources into Skill instances. Each source can be a Skill instance, a path to a skill directory, - or a path to a parent directory containing multiple skills. + a path to a parent directory containing multiple skills, or a remote + Git URL. Args: sources: List of skill sources to resolve. @@ -292,6 +299,8 @@ def _resolve_skills(self, sources: list[SkillSource]) -> dict[str, Skill]: Returns: Dict mapping skill names to Skill instances. """ + from ._url_loader import is_url + resolved: dict[str, Skill] = {} for source in sources: @@ -299,6 +308,15 @@ def _resolve_skills(self, sources: list[SkillSource]) -> dict[str, Skill]: if source.name in resolved: logger.warning("name=<%s> | duplicate skill name, overwriting previous skill", source.name) resolved[source.name] = source + elif isinstance(source, str) and is_url(source): + try: + url_skills = Skill.from_url(source, cache_dir=self._cache_dir, strict=self._strict) + for skill in url_skills: + if skill.name in resolved: + logger.warning("name=<%s> | duplicate skill name, overwriting previous skill", skill.name) + resolved[skill.name] = skill + except (RuntimeError, ValueError) as e: + logger.warning("url=<%s> | failed to load skill from URL: %s", source, e) else: path = Path(source).resolve() if not path.exists(): diff --git a/src/strands/vended_plugins/skills/skill.py b/src/strands/vended_plugins/skills/skill.py index 3e1b6bba5..40724270f 100644 --- a/src/strands/vended_plugins/skills/skill.py +++ b/src/strands/vended_plugins/skills/skill.py @@ -333,6 +333,55 @@ def from_content(cls, content: str, *, strict: bool = False) -> Skill: return _build_skill_from_frontmatter(frontmatter, body) + @classmethod + def from_url( + cls, + url: str, + *, + cache_dir: Path | None = None, + strict: bool = False, + ) -> list[Skill]: + """Load skill(s) from a remote Git repository URL. + + Clones the repository (or uses a cached copy) and then loads skills + using the standard filesystem methods. If the repository root contains + a ``SKILL.md`` file it is treated as a single skill; otherwise it is + scanned for skill subdirectories. + + Supports an optional ``@ref`` suffix for branch or tag pinning:: + + skills = Skill.from_url("https://github.com/org/my-skill@v1.0.0") + + Args: + url: A Git-cloneable URL, optionally with an ``@ref`` suffix. + cache_dir: Override the default cache directory + (``~/.cache/strands/skills/``). + strict: If True, raise on any validation issue. If False (default), + warn and load anyway. + + Returns: + List of Skill instances loaded from the repository. + + Raises: + RuntimeError: If the repository cannot be cloned or ``git`` is not + available. + """ + from ._url_loader import clone_skill_repo, is_url, parse_url_ref + + if not is_url(url): + raise ValueError(f"url=<{url}> | not a valid remote URL") + + clean_url, ref = parse_url_ref(url) + repo_path = clone_skill_repo(clean_url, ref=ref, cache_dir=cache_dir) + + # If the repo root is itself a skill, load it directly + has_skill_md = (repo_path / "SKILL.md").is_file() or (repo_path / "skill.md").is_file() + + if has_skill_md: + return [cls.from_file(repo_path, strict=strict)] + else: + return cls.from_directory(repo_path, strict=strict) + @classmethod def from_directory(cls, skills_dir: str | Path, *, strict: bool = False) -> list[Skill]: """Load all skills from a parent directory containing skill subdirectories. diff --git a/tests/strands/vended_plugins/skills/test_agent_skills.py b/tests/strands/vended_plugins/skills/test_agent_skills.py index 52802a6c1..bbe62cdea 100644 --- a/tests/strands/vended_plugins/skills/test_agent_skills.py +++ b/tests/strands/vended_plugins/skills/test_agent_skills.py @@ -661,6 +661,112 @@ def test_resolve_nonexistent_path(self, tmp_path): assert len(plugin._skills) == 0 +class TestResolveUrlSkills: + """Tests for _resolve_skills with URL sources.""" + + _URL_LOADER = "strands.vended_plugins.skills._url_loader" + + def _mock_clone(self, tmp_path, skill_name="url-skill", description="A URL skill"): + """Create a mock clone function that creates a skill directory.""" + skill_dir = tmp_path / "cloned" + + def fake_clone(url, *, ref=None, cache_dir=None): + skill_dir.mkdir(parents=True, exist_ok=True) + content = f"---\nname: {skill_name}\ndescription: {description}\n---\n# Instructions\n" + (skill_dir / "SKILL.md").write_text(content) + return skill_dir + + return fake_clone + + def test_resolve_url_source(self, tmp_path): + """Test resolving a URL string as a skill source.""" + from unittest.mock import patch + + fake_clone = self._mock_clone(tmp_path) + + with ( + patch(f"{self._URL_LOADER}.clone_skill_repo", side_effect=fake_clone), + patch( + f"{self._URL_LOADER}.parse_url_ref", + return_value=("https://github.com/org/url-skill", None), + ), + ): + plugin = AgentSkills(skills=["https://github.com/org/url-skill"]) + + assert len(plugin.get_available_skills()) == 1 + assert plugin.get_available_skills()[0].name == "url-skill" + + def test_resolve_mixed_url_and_local(self, tmp_path): + """Test resolving a mix of URL and local filesystem sources.""" + from unittest.mock import patch + + _make_skill_dir(tmp_path, "local-skill") + fake_clone = self._mock_clone(tmp_path) + + with ( + patch(f"{self._URL_LOADER}.clone_skill_repo", side_effect=fake_clone), + patch( + f"{self._URL_LOADER}.parse_url_ref", + return_value=("https://github.com/org/url-skill", None), + ), + ): + plugin = AgentSkills( + skills=[ + "https://github.com/org/url-skill", + str(tmp_path / "local-skill"), + ] + ) + + assert len(plugin.get_available_skills()) == 2 + names = {s.name for s in plugin.get_available_skills()} + assert names == {"url-skill", "local-skill"} + + def test_resolve_url_failure_skips_gracefully(self, caplog): + """Test that a failed URL clone is skipped with a warning.""" + import logging + from unittest.mock import patch + + with ( + patch( + f"{self._URL_LOADER}.parse_url_ref", + return_value=("https://github.com/org/broken", None), + ), + patch( + f"{self._URL_LOADER}.clone_skill_repo", + side_effect=RuntimeError("clone failed"), + ), + caplog.at_level(logging.WARNING), + ): + plugin = AgentSkills(skills=["https://github.com/org/broken"]) + + assert len(plugin.get_available_skills()) == 0 + assert "failed to load skill from URL" in caplog.text + + def test_cache_dir_forwarded(self, tmp_path): + """Test that custom cache_dir is forwarded through to clone.""" + from unittest.mock import patch + + fake_clone = self._mock_clone(tmp_path) + captured_cache = [] + + def tracking_clone(url, *, ref=None, cache_dir=None): + captured_cache.append(cache_dir) + return fake_clone(url, ref=ref, cache_dir=cache_dir) + + custom_cache = tmp_path / "my-cache" + + with ( + patch(f"{self._URL_LOADER}.clone_skill_repo", side_effect=tracking_clone), + patch( + f"{self._URL_LOADER}.parse_url_ref", + return_value=("https://github.com/org/url-skill", None), + ), + ): + AgentSkills(skills=["https://github.com/org/url-skill"], cache_dir=custom_cache) + + assert captured_cache == [custom_cache] + + class TestImports: """Tests for module imports.""" diff --git a/tests/strands/vended_plugins/skills/test_skill.py b/tests/strands/vended_plugins/skills/test_skill.py index 53d6f3507..8857382fb 100644 --- a/tests/strands/vended_plugins/skills/test_skill.py +++ b/tests/strands/vended_plugins/skills/test_skill.py @@ -551,11 +551,148 @@ def test_strict_mode(self): Skill.from_content(content, strict=True) +class TestSkillFromUrl: + """Tests for Skill.from_url.""" + + _URL_LOADER = "strands.vended_plugins.skills._url_loader" + + def _mock_clone(self, tmp_path, skill_name="my-skill", description="A remote skill", body="Remote instructions."): + """Create a mock clone function that creates a skill directory.""" + skill_dir = tmp_path / "cloned" + + def fake_clone(url, *, ref=None, cache_dir=None): + skill_dir.mkdir(parents=True, exist_ok=True) + content = f"---\nname: {skill_name}\ndescription: {description}\n---\n{body}\n" + (skill_dir / "SKILL.md").write_text(content) + return skill_dir + + return fake_clone + + def _mock_clone_multi(self, tmp_path): + """Create a mock clone function that creates a parent dir with multiple skills.""" + parent_dir = tmp_path / "cloned" + + def fake_clone(url, *, ref=None, cache_dir=None): + parent_dir.mkdir(parents=True, exist_ok=True) + for name in ("skill-a", "skill-b"): + child = parent_dir / name + child.mkdir(exist_ok=True) + (child / "SKILL.md").write_text(f"---\nname: {name}\ndescription: Skill {name}\n---\nBody.\n") + return parent_dir + + return fake_clone + + def test_from_url_single_skill(self, tmp_path): + """Test loading a single skill from a URL.""" + from unittest.mock import patch + + fake_clone = self._mock_clone(tmp_path) + + with ( + patch(f"{self._URL_LOADER}.clone_skill_repo", side_effect=fake_clone), + patch(f"{self._URL_LOADER}.parse_url_ref", return_value=("https://github.com/org/my-skill", None)), + ): + skills = Skill.from_url("https://github.com/org/my-skill") + + assert len(skills) == 1 + assert skills[0].name == "my-skill" + assert skills[0].description == "A remote skill" + assert "Remote instructions." in skills[0].instructions + + def test_from_url_multiple_skills(self, tmp_path): + """Test loading multiple skills from a URL pointing to a parent directory.""" + from unittest.mock import patch + + fake_clone = self._mock_clone_multi(tmp_path) + + with ( + patch(f"{self._URL_LOADER}.clone_skill_repo", side_effect=fake_clone), + patch( + f"{self._URL_LOADER}.parse_url_ref", + return_value=("https://github.com/org/skills-collection", None), + ), + ): + skills = Skill.from_url("https://github.com/org/skills-collection") + + assert len(skills) == 2 + names = {s.name for s in skills} + assert names == {"skill-a", "skill-b"} + + def test_from_url_with_ref(self, tmp_path): + """Test that @ref is correctly parsed and forwarded.""" + from unittest.mock import patch + + fake_clone = self._mock_clone(tmp_path) + captured_ref = [] + + def tracking_clone(url, *, ref=None, cache_dir=None): + captured_ref.append(ref) + return fake_clone(url, ref=ref, cache_dir=cache_dir) + + with ( + patch(f"{self._URL_LOADER}.clone_skill_repo", side_effect=tracking_clone), + patch( + f"{self._URL_LOADER}.parse_url_ref", + return_value=("https://github.com/org/my-skill", "v1.0.0"), + ), + ): + Skill.from_url("https://github.com/org/my-skill@v1.0.0") + + assert captured_ref == ["v1.0.0"] + + def test_from_url_invalid_url_raises(self): + """Test that a non-URL string raises ValueError.""" + with pytest.raises(ValueError, match="not a valid remote URL"): + Skill.from_url("./local-path") + + def test_from_url_clone_failure_raises(self): + """Test that a clone failure propagates as RuntimeError.""" + from unittest.mock import patch + + with ( + patch( + f"{self._URL_LOADER}.parse_url_ref", + return_value=("https://github.com/org/broken", None), + ), + patch( + f"{self._URL_LOADER}.clone_skill_repo", + side_effect=RuntimeError("clone failed"), + ), + ): + with pytest.raises(RuntimeError, match="clone failed"): + Skill.from_url("https://github.com/org/broken") + + def test_from_url_passes_cache_dir(self, tmp_path): + """Test that cache_dir is forwarded to clone_skill_repo.""" + from unittest.mock import patch + + fake_clone = self._mock_clone(tmp_path) + captured_cache = [] + + def tracking_clone(url, *, ref=None, cache_dir=None): + captured_cache.append(cache_dir) + return fake_clone(url, ref=ref, cache_dir=cache_dir) + + custom_cache = tmp_path / "custom-cache" + + with ( + patch(f"{self._URL_LOADER}.clone_skill_repo", side_effect=tracking_clone), + patch( + f"{self._URL_LOADER}.parse_url_ref", + return_value=("https://github.com/org/my-skill", None), + ), + ): + Skill.from_url("https://github.com/org/my-skill", cache_dir=custom_cache) + + assert captured_cache == [custom_cache] + + class TestSkillClassmethods: """Tests for Skill classmethod existence.""" def test_skill_classmethods_exist(self): - """Test that Skill has from_file, from_content, and from_directory classmethods.""" + """Test that Skill has from_file, from_content, from_directory, and from_url classmethods.""" assert callable(getattr(Skill, "from_file", None)) assert callable(getattr(Skill, "from_content", None)) assert callable(getattr(Skill, "from_directory", None)) + assert callable(getattr(Skill, "from_url", None)) diff --git a/tests/strands/vended_plugins/skills/test_url_loader.py b/tests/strands/vended_plugins/skills/test_url_loader.py new file mode 100644 index 000000000..994672c32 --- /dev/null +++ b/tests/strands/vended_plugins/skills/test_url_loader.py @@ -0,0 +1,287 @@ +"""Tests for the _url_loader module.""" + +from __future__ import annotations + +import subprocess +from pathlib import Path +from unittest.mock import patch + +import pytest + +from strands.vended_plugins.skills._url_loader import ( + cache_key, + clone_skill_repo, + is_url, + parse_url_ref, +) + + +class TestIsUrl: + """Tests for is_url.""" + + def test_https_github_url(self): + assert is_url("https://github.com/org/skill-repo") is True + + def test_http_url(self): + assert is_url("http://github.com/org/skill-repo") is True + + def test_ssh_url(self): + assert is_url("ssh://git@github.com/org/skill-repo") is True + + def test_git_at_url(self): + assert is_url("git@github.com:org/skill-repo.git") is True + + def test_local_relative_path(self): + assert is_url("./skills/my-skill") is False + + def test_local_absolute_path(self): + assert is_url("/home/user/skills/my-skill") is False + + def test_plain_directory_name(self): + assert is_url("my-skill") is False + + def test_empty_string(self): + assert is_url("") is False + + def test_https_with_ref(self): + assert is_url("https://github.com/org/skill@v1.0.0") is True + + +class TestParseUrlRef: + """Tests for parse_url_ref.""" + + def test_https_no_ref(self): + url, ref = parse_url_ref("https://github.com/org/skill-repo") + assert url == "https://github.com/org/skill-repo" + assert ref is None + + def test_https_with_tag_ref(self): + url, ref = parse_url_ref("https://github.com/org/skill-repo@v1.0.0") + assert url == "https://github.com/org/skill-repo" + assert ref == "v1.0.0" + + def test_https_with_branch_ref(self): + url, ref = parse_url_ref("https://github.com/org/skill-repo@main") + assert url == "https://github.com/org/skill-repo" + assert ref == "main" + + def test_https_git_suffix_no_ref(self): + url, ref = parse_url_ref("https://github.com/org/skill-repo.git") + assert url == "https://github.com/org/skill-repo.git" + assert ref is None + + def test_https_git_suffix_with_ref(self): + url, ref = parse_url_ref("https://github.com/org/skill-repo.git@v2.0") + assert url == "https://github.com/org/skill-repo.git" + assert ref == "v2.0" + + def test_ssh_no_ref(self): + url, ref = parse_url_ref("git@github.com:org/skill-repo.git") + assert url == "git@github.com:org/skill-repo.git" + assert ref is None + + def test_ssh_with_ref(self): + url, ref = parse_url_ref("git@github.com:org/skill-repo.git@v1.0") + assert url == "git@github.com:org/skill-repo.git" + assert ref == "v1.0" + + def test_ssh_no_git_suffix_with_ref(self): + url, ref = parse_url_ref("git@github.com:org/skill-repo@develop") + assert url == "git@github.com:org/skill-repo" + assert ref == "develop" + + def test_ssh_protocol_no_ref(self): + url, ref = parse_url_ref("ssh://git@github.com/org/skill-repo") + assert url == "ssh://git@github.com/org/skill-repo" + assert ref is None + + def test_ssh_protocol_with_ref(self): + url, ref = parse_url_ref("ssh://git@github.com/org/skill-repo@v3") + assert url == "ssh://git@github.com/org/skill-repo" + assert ref == "v3" + + def test_http_with_ref(self): + url, ref = parse_url_ref("http://gitlab.com/org/skill@feature-branch") + assert url == "http://gitlab.com/org/skill" + assert ref == "feature-branch" + + def test_url_host_only(self): + url, ref = parse_url_ref("https://example.com") + assert url == "https://example.com" + assert ref is None + + def test_non_url_passthrough(self): + url, ref = parse_url_ref("/local/path") + assert url == "/local/path" + assert ref is None + + +class TestCacheKey: + """Tests for cache_key.""" + + def test_deterministic(self): + key1 = cache_key("https://github.com/org/repo", None) + key2 = cache_key("https://github.com/org/repo", None) + assert key1 == key2 + + def test_different_url_different_key(self): + key1 = cache_key("https://github.com/org/repo-a", None) + key2 = cache_key("https://github.com/org/repo-b", None) + assert key1 != key2 + + def test_different_ref_different_key(self): + key1 = cache_key("https://github.com/org/repo", None) + key2 = cache_key("https://github.com/org/repo", "v1.0") + assert key1 != key2 + + def test_length(self): + key = cache_key("https://github.com/org/repo", "main") + assert len(key) == 16 + + def test_hex_characters_only(self): + key = cache_key("https://github.com/org/repo", "main") + assert all(c in "0123456789abcdef" for c in key) + + +class TestCloneSkillRepo: + """Tests for clone_skill_repo.""" + + def test_clone_success(self, tmp_path): + """Test successful clone by mocking subprocess.run.""" + cache = tmp_path / "cache" + + def fake_clone(cmd, **kwargs): + # Simulate a successful clone by creating the target directory + target_dir = Path(cmd[-1]) + target_dir.mkdir(parents=True, exist_ok=True) + (target_dir / "SKILL.md").write_text("---\nname: test\ndescription: test\n---\n") + return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") + + with patch("strands.vended_plugins.skills._url_loader.subprocess.run", side_effect=fake_clone): + result = clone_skill_repo("https://github.com/org/skill", cache_dir=cache) + + assert result.exists() + assert (result / "SKILL.md").exists() + + def test_clone_with_ref(self, tmp_path): + """Test that ref is passed as --branch to git clone.""" + cache = tmp_path / "cache" + captured_cmd = [] + + def fake_clone(cmd, **kwargs): + captured_cmd.extend(cmd) + target_dir = Path(cmd[-1]) + target_dir.mkdir(parents=True, exist_ok=True) + return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") + + with patch("strands.vended_plugins.skills._url_loader.subprocess.run", side_effect=fake_clone): + clone_skill_repo("https://github.com/org/skill", ref="v1.0", cache_dir=cache) + + assert "--branch" in captured_cmd + assert "v1.0" in captured_cmd + + def test_clone_without_ref(self, tmp_path): + """Test that --branch is not passed when ref is None.""" + cache = tmp_path / "cache" + captured_cmd = [] + + def fake_clone(cmd, **kwargs): + captured_cmd.extend(cmd) + target_dir = Path(cmd[-1]) + target_dir.mkdir(parents=True, exist_ok=True) + return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") + + with patch("strands.vended_plugins.skills._url_loader.subprocess.run", side_effect=fake_clone): + clone_skill_repo("https://github.com/org/skill", cache_dir=cache) + + assert "--branch" not in captured_cmd + + def test_uses_cache_on_second_call(self, tmp_path): + """Test that the cache is used on the second call.""" + cache = tmp_path / "cache" + call_count = 0 + + def fake_clone(cmd, **kwargs): + nonlocal call_count + call_count += 1 + target_dir = Path(cmd[-1]) + target_dir.mkdir(parents=True, exist_ok=True) + return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") + + with patch("strands.vended_plugins.skills._url_loader.subprocess.run", side_effect=fake_clone): + result1 = clone_skill_repo("https://github.com/org/skill", cache_dir=cache) + result2 = clone_skill_repo("https://github.com/org/skill", cache_dir=cache) + + assert call_count == 1 + assert result1 == result2 + + def test_clone_failure_raises_runtime_error(self, tmp_path): + """Test that a failed clone raises RuntimeError.""" + cache = tmp_path / "cache" + + with patch( + "strands.vended_plugins.skills._url_loader.subprocess.run", + side_effect=subprocess.CalledProcessError(128, "git", stderr="fatal: repo not found"), + ): + with pytest.raises(RuntimeError, match="failed to clone"): + clone_skill_repo("https://github.com/org/nonexistent", cache_dir=cache) + + def test_clone_failure_cleans_up_partial(self, tmp_path): + """Test that a failed clone removes any partial directory.""" + cache = tmp_path / "cache" + + def failing_clone(cmd, **kwargs): + # Create partial clone then fail + target_dir = Path(cmd[-1]) + target_dir.mkdir(parents=True, exist_ok=True) + raise subprocess.CalledProcessError(128, "git", stderr="fatal: error") + + with patch("strands.vended_plugins.skills._url_loader.subprocess.run", side_effect=failing_clone): + with pytest.raises(RuntimeError): + clone_skill_repo("https://github.com/org/broken", cache_dir=cache) + + # Verify no leftover directory + assert len(list(cache.iterdir())) == 0 + + def test_git_not_found_raises_runtime_error(self, tmp_path): + """Test that missing git binary raises RuntimeError.""" + cache = tmp_path / "cache" + + with patch( + "strands.vended_plugins.skills._url_loader.subprocess.run", + side_effect=FileNotFoundError(), + ): + with pytest.raises(RuntimeError, match="git is required"): + clone_skill_repo("https://github.com/org/skill", cache_dir=cache) + + def test_creates_cache_dir_if_missing(self, tmp_path): + """Test that the cache directory is created if it doesn't exist.""" + cache = tmp_path / "deep" / "nested" / "cache" + + def fake_clone(cmd, **kwargs): + target_dir = Path(cmd[-1]) + target_dir.mkdir(parents=True, exist_ok=True) + return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") + + with patch("strands.vended_plugins.skills._url_loader.subprocess.run", side_effect=fake_clone): + clone_skill_repo("https://github.com/org/skill", cache_dir=cache) + + assert cache.exists() + + def test_shallow_clone_depth_one(self, tmp_path): + """Test that --depth 1 is always passed.""" + cache = tmp_path / "cache" + captured_cmd = [] + + def fake_clone(cmd, **kwargs): + captured_cmd.extend(cmd) + target_dir = Path(cmd[-1]) + target_dir.mkdir(parents=True, exist_ok=True) + return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") + + with patch("strands.vended_plugins.skills._url_loader.subprocess.run", side_effect=fake_clone): + clone_skill_repo("https://github.com/org/skill", cache_dir=cache) + + assert "--depth" in captured_cmd + depth_idx = captured_cmd.index("--depth") + assert captured_cmd[depth_idx + 1] == "1" From 83966f6a73c2c7e20de3a9bd4ccec1303b409577 Mon Sep 17 00:00:00 2001 From: Davide Gallitelli Date: Wed, 8 Apr 2026 08:00:19 +0000 Subject: [PATCH 2/4] feat(skills): support GitHub /tree/ URLs for nested skills Parse GitHub web URLs like /tree//path to extract the clone URL, branch, and subdirectory path. This enables loading skills from subdirectories within mono-repos. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../vended_plugins/skills/_url_loader.py | 114 +++++++++++------- src/strands/vended_plugins/skills/skill.py | 11 +- .../skills/test_agent_skills.py | 12 +- .../vended_plugins/skills/test_skill.py | 18 +-- .../vended_plugins/skills/test_url_loader.py | 112 +++++++++++++++-- 5 files changed, 193 insertions(+), 74 deletions(-) diff --git a/src/strands/vended_plugins/skills/_url_loader.py b/src/strands/vended_plugins/skills/_url_loader.py index 2f44dfcf1..9e4531917 100644 --- a/src/strands/vended_plugins/skills/_url_loader.py +++ b/src/strands/vended_plugins/skills/_url_loader.py @@ -24,6 +24,10 @@ # Regex to strip .git suffix from URLs before ref parsing _GIT_SUFFIX = re.compile(r"\.git$") +# Matches GitHub /tree/ or /tree// (also /blob/) +# e.g. /owner/repo/tree/main/skills/my-skill -> groups: (/owner/repo, main, skills/my-skill) +_GITHUB_TREE_PATTERN = re.compile(r"^(/[^/]+/[^/]+)/(?:tree|blob)/([^/]+)(?:/(.+?))?/?$") + def is_url(source: str) -> bool: """Check whether a skill source string looks like a remote URL. @@ -37,31 +41,43 @@ def is_url(source: str) -> bool: return any(source.startswith(prefix) for prefix in _URL_PREFIXES) -def parse_url_ref(url: str) -> tuple[str, str | None]: - """Parse a skill URL into a clone URL and an optional Git ref. +def parse_url_ref(url: str) -> tuple[str, str | None, str | None]: + """Parse a skill URL into a clone URL, optional Git ref, and optional subpath. Supports an ``@ref`` suffix for specifying a branch, tag, or commit:: - https://github.com/org/skill-repo@v1.0.0 -> (https://github.com/org/skill-repo, v1.0.0) - https://github.com/org/skill-repo -> (https://github.com/org/skill-repo, None) - https://github.com/org/skill-repo.git@main -> (https://github.com/org/skill-repo.git, main) - git@github.com:org/skill-repo.git@v2 -> (git@github.com:org/skill-repo.git, v2) + https://github.com/org/skill-repo@v1.0.0 -> (https://github.com/org/skill-repo, v1.0.0, None) + https://github.com/org/skill-repo -> (https://github.com/org/skill-repo, None, None) + + Also supports GitHub web URLs with ``/tree//path`` :: + + https://github.com/org/repo/tree/main/skills/my-skill + -> (https://github.com/org/repo, main, skills/my-skill) Args: - url: The skill URL, optionally with an ``@ref`` suffix. + url: The skill URL, optionally with an ``@ref`` suffix or ``/tree/`` path. Returns: - Tuple of (clone_url, ref_or_none). + Tuple of (clone_url, ref_or_none, subpath_or_none). """ if url.startswith(("https://", "http://", "ssh://")): # Find the path portion after the host scheme_end = url.index("//") + 2 host_end = url.find("/", scheme_end) if host_end == -1: - return url, None + return url, None, None path_part = url[host_end:] + # Handle GitHub /tree//path and /blob//path URLs + tree_match = _GITHUB_TREE_PATTERN.match(path_part) + if tree_match: + owner_repo = tree_match.group(1) + ref = tree_match.group(2) + subpath = tree_match.group(3) or None + clone_url = url[:host_end] + owner_repo + return clone_url, ref, subpath + # Strip .git suffix before looking for @ref so that # "repo.git@v1" is handled correctly clean_path = _GIT_SUFFIX.sub("", path_part) @@ -73,9 +89,9 @@ def parse_url_ref(url: str) -> tuple[str, str | None]: base_path = clean_path[:at_idx] if had_git_suffix: base_path += ".git" - return url[:host_end] + base_path, ref + return url[:host_end] + base_path, ref, None - return url, None + return url, None, None if url.startswith("git@"): # SSH format: git@host:owner/repo.git@ref @@ -92,11 +108,11 @@ def parse_url_ref(url: str) -> tuple[str, str | None]: base_rest = clean_rest[:at_idx] if had_git_suffix: base_rest += ".git" - return url[: first_at + 1] + base_rest, ref + return url[: first_at + 1] + base_rest, ref, None - return url, None + return url, None, None - return url, None + return url, None, None def cache_key(url: str, ref: str | None) -> str: @@ -117,6 +133,7 @@ def clone_skill_repo( url: str, *, ref: str | None = None, + subpath: str | None = None, cache_dir: Path | None = None, ) -> Path: """Clone a skill repository to a local cache directory. @@ -125,14 +142,19 @@ def clone_skill_repo( is passed as ``--branch`` (works for branches and tags). Repositories are cached by a hash of (url, ref) so repeated loads are instant. + If ``subpath`` is provided, the returned path points to that subdirectory + within the cloned repository (useful for mono-repos containing skills in + nested directories). + Args: url: The Git clone URL. ref: Optional branch or tag to check out. + subpath: Optional path within the repo to return (e.g. ``skills/my-skill``). cache_dir: Override the default cache directory (``~/.cache/strands/skills/``). Returns: - Path to the cloned repository root. + Path to the cloned repository root, or to ``subpath`` within it. Raises: RuntimeError: If the clone fails or ``git`` is not installed. @@ -143,32 +165,38 @@ def clone_skill_repo( key = cache_key(url, ref) target = cache_dir / key - if target.exists(): + if not target.exists(): + logger.info("url=<%s>, ref=<%s> | cloning skill repository", url, ref) + + cmd: list[str] = ["git", "clone", "--depth", "1"] + if ref: + cmd.extend(["--branch", ref]) + cmd.extend([url, str(target)]) + + try: + subprocess.run( # noqa: S603 + cmd, + check=True, + capture_output=True, + text=True, + timeout=120, + ) + except subprocess.CalledProcessError as e: + # Clean up any partial clone + if target.exists(): + shutil.rmtree(target) + raise RuntimeError( + f"url=<{url}>, ref=<{ref}> | failed to clone skill repository: {e.stderr.strip()}" + ) from e + except FileNotFoundError as e: + raise RuntimeError("git is required to load skills from URLs but was not found on PATH") from e + else: logger.debug("url=<%s>, ref=<%s> | using cached skill at %s", url, ref, target) - return target - - logger.info("url=<%s>, ref=<%s> | cloning skill repository", url, ref) - - cmd: list[str] = ["git", "clone", "--depth", "1"] - if ref: - cmd.extend(["--branch", ref]) - cmd.extend([url, str(target)]) - - try: - subprocess.run( # noqa: S603 - cmd, - check=True, - capture_output=True, - text=True, - timeout=120, - ) - except subprocess.CalledProcessError as e: - # Clean up any partial clone - if target.exists(): - shutil.rmtree(target) - raise RuntimeError(f"url=<{url}>, ref=<{ref}> | failed to clone skill repository: {e.stderr.strip()}") from e - except FileNotFoundError as e: - raise RuntimeError("git is required to load skills from URLs but was not found on PATH") from e - - logger.debug("url=<%s>, ref=<%s> | cloned to %s", url, ref, target) - return target + + result = target / subpath if subpath else target + + if subpath and not result.is_dir(): + raise RuntimeError(f"url=<{url}>, subpath=<{subpath}> | subdirectory does not exist in cloned repository") + + logger.debug("url=<%s>, ref=<%s> | resolved to %s", url, ref, result) + return result diff --git a/src/strands/vended_plugins/skills/skill.py b/src/strands/vended_plugins/skills/skill.py index 40724270f..8ce9194a9 100644 --- a/src/strands/vended_plugins/skills/skill.py +++ b/src/strands/vended_plugins/skills/skill.py @@ -352,8 +352,13 @@ def from_url( skills = Skill.from_url("https://github.com/org/my-skill@v1.0.0") + Also supports GitHub web URLs pointing to subdirectories:: + + skills = Skill.from_url("https://github.com/org/repo/tree/main/skills/my-skill") + Args: - url: A Git-cloneable URL, optionally with an ``@ref`` suffix. + url: A Git-cloneable URL, optionally with an ``@ref`` suffix or + a GitHub ``/tree//path`` URL. cache_dir: Override the default cache directory (``~/.cache/strands/skills/``). strict: If True, raise on any validation issue. If False (default), @@ -371,8 +376,8 @@ def from_url( if not is_url(url): raise ValueError(f"url=<{url}> | not a valid remote URL") - clean_url, ref = parse_url_ref(url) - repo_path = clone_skill_repo(clean_url, ref=ref, cache_dir=cache_dir) + clean_url, ref, subpath = parse_url_ref(url) + repo_path = clone_skill_repo(clean_url, ref=ref, subpath=subpath, cache_dir=cache_dir) # If the repo root is itself a skill, load it directly has_skill_md = (repo_path / "SKILL.md").is_file() or (repo_path / "skill.md").is_file() diff --git a/tests/strands/vended_plugins/skills/test_agent_skills.py b/tests/strands/vended_plugins/skills/test_agent_skills.py index bbe62cdea..6f150a792 100644 --- a/tests/strands/vended_plugins/skills/test_agent_skills.py +++ b/tests/strands/vended_plugins/skills/test_agent_skills.py @@ -670,7 +670,7 @@ def _mock_clone(self, tmp_path, skill_name="url-skill", description="A URL skill """Create a mock clone function that creates a skill directory.""" skill_dir = tmp_path / "cloned" - def fake_clone(url, *, ref=None, cache_dir=None): + def fake_clone(url, *, ref=None, subpath=None, cache_dir=None): skill_dir.mkdir(parents=True, exist_ok=True) content = f"---\nname: {skill_name}\ndescription: {description}\n---\n# Instructions\n" (skill_dir / "SKILL.md").write_text(content) @@ -688,7 +688,7 @@ def test_resolve_url_source(self, tmp_path): patch(f"{self._URL_LOADER}.clone_skill_repo", side_effect=fake_clone), patch( f"{self._URL_LOADER}.parse_url_ref", - return_value=("https://github.com/org/url-skill", None), + return_value=("https://github.com/org/url-skill", None, None), ), ): plugin = AgentSkills(skills=["https://github.com/org/url-skill"]) @@ -707,7 +707,7 @@ def test_resolve_mixed_url_and_local(self, tmp_path): patch(f"{self._URL_LOADER}.clone_skill_repo", side_effect=fake_clone), patch( f"{self._URL_LOADER}.parse_url_ref", - return_value=("https://github.com/org/url-skill", None), + return_value=("https://github.com/org/url-skill", None, None), ), ): plugin = AgentSkills( @@ -729,7 +729,7 @@ def test_resolve_url_failure_skips_gracefully(self, caplog): with ( patch( f"{self._URL_LOADER}.parse_url_ref", - return_value=("https://github.com/org/broken", None), + return_value=("https://github.com/org/broken", None, None), ), patch( f"{self._URL_LOADER}.clone_skill_repo", @@ -749,7 +749,7 @@ def test_cache_dir_forwarded(self, tmp_path): fake_clone = self._mock_clone(tmp_path) captured_cache = [] - def tracking_clone(url, *, ref=None, cache_dir=None): + def tracking_clone(url, *, ref=None, subpath=None, cache_dir=None): captured_cache.append(cache_dir) return fake_clone(url, ref=ref, cache_dir=cache_dir) @@ -759,7 +759,7 @@ def tracking_clone(url, *, ref=None, cache_dir=None): patch(f"{self._URL_LOADER}.clone_skill_repo", side_effect=tracking_clone), patch( f"{self._URL_LOADER}.parse_url_ref", - return_value=("https://github.com/org/url-skill", None), + return_value=("https://github.com/org/url-skill", None, None), ), ): AgentSkills(skills=["https://github.com/org/url-skill"], cache_dir=custom_cache) diff --git a/tests/strands/vended_plugins/skills/test_skill.py b/tests/strands/vended_plugins/skills/test_skill.py index 8857382fb..5b6753bd2 100644 --- a/tests/strands/vended_plugins/skills/test_skill.py +++ b/tests/strands/vended_plugins/skills/test_skill.py @@ -560,7 +560,7 @@ def _mock_clone(self, tmp_path, skill_name="my-skill", description="A remote ski """Create a mock clone function that creates a skill directory.""" skill_dir = tmp_path / "cloned" - def fake_clone(url, *, ref=None, cache_dir=None): + def fake_clone(url, *, ref=None, subpath=None, cache_dir=None): skill_dir.mkdir(parents=True, exist_ok=True) content = f"---\nname: {skill_name}\ndescription: {description}\n---\n{body}\n" (skill_dir / "SKILL.md").write_text(content) @@ -572,7 +572,7 @@ def _mock_clone_multi(self, tmp_path): """Create a mock clone function that creates a parent dir with multiple skills.""" parent_dir = tmp_path / "cloned" - def fake_clone(url, *, ref=None, cache_dir=None): + def fake_clone(url, *, ref=None, subpath=None, cache_dir=None): parent_dir.mkdir(parents=True, exist_ok=True) for name in ("skill-a", "skill-b"): child = parent_dir / name @@ -590,7 +590,7 @@ def test_from_url_single_skill(self, tmp_path): with ( patch(f"{self._URL_LOADER}.clone_skill_repo", side_effect=fake_clone), - patch(f"{self._URL_LOADER}.parse_url_ref", return_value=("https://github.com/org/my-skill", None)), + patch(f"{self._URL_LOADER}.parse_url_ref", return_value=("https://github.com/org/my-skill", None, None)), ): skills = Skill.from_url("https://github.com/org/my-skill") @@ -609,7 +609,7 @@ def test_from_url_multiple_skills(self, tmp_path): patch(f"{self._URL_LOADER}.clone_skill_repo", side_effect=fake_clone), patch( f"{self._URL_LOADER}.parse_url_ref", - return_value=("https://github.com/org/skills-collection", None), + return_value=("https://github.com/org/skills-collection", None, None), ), ): skills = Skill.from_url("https://github.com/org/skills-collection") @@ -625,7 +625,7 @@ def test_from_url_with_ref(self, tmp_path): fake_clone = self._mock_clone(tmp_path) captured_ref = [] - def tracking_clone(url, *, ref=None, cache_dir=None): + def tracking_clone(url, *, ref=None, subpath=None, cache_dir=None): captured_ref.append(ref) return fake_clone(url, ref=ref, cache_dir=cache_dir) @@ -633,7 +633,7 @@ def tracking_clone(url, *, ref=None, cache_dir=None): patch(f"{self._URL_LOADER}.clone_skill_repo", side_effect=tracking_clone), patch( f"{self._URL_LOADER}.parse_url_ref", - return_value=("https://github.com/org/my-skill", "v1.0.0"), + return_value=("https://github.com/org/my-skill", "v1.0.0", None), ), ): Skill.from_url("https://github.com/org/my-skill@v1.0.0") @@ -652,7 +652,7 @@ def test_from_url_clone_failure_raises(self): with ( patch( f"{self._URL_LOADER}.parse_url_ref", - return_value=("https://github.com/org/broken", None), + return_value=("https://github.com/org/broken", None, None), ), patch( f"{self._URL_LOADER}.clone_skill_repo", @@ -669,7 +669,7 @@ def test_from_url_passes_cache_dir(self, tmp_path): fake_clone = self._mock_clone(tmp_path) captured_cache = [] - def tracking_clone(url, *, ref=None, cache_dir=None): + def tracking_clone(url, *, ref=None, subpath=None, cache_dir=None): captured_cache.append(cache_dir) return fake_clone(url, ref=ref, cache_dir=cache_dir) @@ -679,7 +679,7 @@ def tracking_clone(url, *, ref=None, cache_dir=None): patch(f"{self._URL_LOADER}.clone_skill_repo", side_effect=tracking_clone), patch( f"{self._URL_LOADER}.parse_url_ref", - return_value=("https://github.com/org/my-skill", None), + return_value=("https://github.com/org/my-skill", None, None), ), ): Skill.from_url("https://github.com/org/my-skill", cache_dir=custom_cache) diff --git a/tests/strands/vended_plugins/skills/test_url_loader.py b/tests/strands/vended_plugins/skills/test_url_loader.py index 994672c32..0caf2b6d0 100644 --- a/tests/strands/vended_plugins/skills/test_url_loader.py +++ b/tests/strands/vended_plugins/skills/test_url_loader.py @@ -51,69 +51,123 @@ class TestParseUrlRef: """Tests for parse_url_ref.""" def test_https_no_ref(self): - url, ref = parse_url_ref("https://github.com/org/skill-repo") + url, ref, subpath = parse_url_ref("https://github.com/org/skill-repo") assert url == "https://github.com/org/skill-repo" assert ref is None + assert subpath is None def test_https_with_tag_ref(self): - url, ref = parse_url_ref("https://github.com/org/skill-repo@v1.0.0") + url, ref, subpath = parse_url_ref("https://github.com/org/skill-repo@v1.0.0") assert url == "https://github.com/org/skill-repo" assert ref == "v1.0.0" + assert subpath is None def test_https_with_branch_ref(self): - url, ref = parse_url_ref("https://github.com/org/skill-repo@main") + url, ref, subpath = parse_url_ref("https://github.com/org/skill-repo@main") assert url == "https://github.com/org/skill-repo" assert ref == "main" + assert subpath is None def test_https_git_suffix_no_ref(self): - url, ref = parse_url_ref("https://github.com/org/skill-repo.git") + url, ref, subpath = parse_url_ref("https://github.com/org/skill-repo.git") assert url == "https://github.com/org/skill-repo.git" assert ref is None + assert subpath is None def test_https_git_suffix_with_ref(self): - url, ref = parse_url_ref("https://github.com/org/skill-repo.git@v2.0") + url, ref, subpath = parse_url_ref("https://github.com/org/skill-repo.git@v2.0") assert url == "https://github.com/org/skill-repo.git" assert ref == "v2.0" + assert subpath is None def test_ssh_no_ref(self): - url, ref = parse_url_ref("git@github.com:org/skill-repo.git") + url, ref, subpath = parse_url_ref("git@github.com:org/skill-repo.git") assert url == "git@github.com:org/skill-repo.git" assert ref is None + assert subpath is None def test_ssh_with_ref(self): - url, ref = parse_url_ref("git@github.com:org/skill-repo.git@v1.0") + url, ref, subpath = parse_url_ref("git@github.com:org/skill-repo.git@v1.0") assert url == "git@github.com:org/skill-repo.git" assert ref == "v1.0" + assert subpath is None def test_ssh_no_git_suffix_with_ref(self): - url, ref = parse_url_ref("git@github.com:org/skill-repo@develop") + url, ref, subpath = parse_url_ref("git@github.com:org/skill-repo@develop") assert url == "git@github.com:org/skill-repo" assert ref == "develop" + assert subpath is None def test_ssh_protocol_no_ref(self): - url, ref = parse_url_ref("ssh://git@github.com/org/skill-repo") + url, ref, subpath = parse_url_ref("ssh://git@github.com/org/skill-repo") assert url == "ssh://git@github.com/org/skill-repo" assert ref is None + assert subpath is None def test_ssh_protocol_with_ref(self): - url, ref = parse_url_ref("ssh://git@github.com/org/skill-repo@v3") + url, ref, subpath = parse_url_ref("ssh://git@github.com/org/skill-repo@v3") assert url == "ssh://git@github.com/org/skill-repo" assert ref == "v3" + assert subpath is None def test_http_with_ref(self): - url, ref = parse_url_ref("http://gitlab.com/org/skill@feature-branch") + url, ref, subpath = parse_url_ref("http://gitlab.com/org/skill@feature-branch") assert url == "http://gitlab.com/org/skill" assert ref == "feature-branch" + assert subpath is None def test_url_host_only(self): - url, ref = parse_url_ref("https://example.com") + url, ref, subpath = parse_url_ref("https://example.com") assert url == "https://example.com" assert ref is None + assert subpath is None def test_non_url_passthrough(self): - url, ref = parse_url_ref("/local/path") + url, ref, subpath = parse_url_ref("/local/path") assert url == "/local/path" assert ref is None + assert subpath is None + + +class TestParseUrlRefGitHubTree: + """Tests for parse_url_ref with GitHub /tree/ and /blob/ URLs.""" + + def test_tree_with_subpath(self): + url, ref, subpath = parse_url_ref("https://github.com/org/repo/tree/main/skills/my-skill") + assert url == "https://github.com/org/repo" + assert ref == "main" + assert subpath == "skills/my-skill" + + def test_tree_branch_only(self): + url, ref, subpath = parse_url_ref("https://github.com/org/repo/tree/main") + assert url == "https://github.com/org/repo" + assert ref == "main" + assert subpath is None + + def test_tree_with_tag(self): + url, ref, subpath = parse_url_ref("https://github.com/org/repo/tree/v1.0.0/skills/brainstorming") + assert url == "https://github.com/org/repo" + assert ref == "v1.0.0" + assert subpath == "skills/brainstorming" + + def test_tree_deep_subpath(self): + url, ref, subpath = parse_url_ref("https://github.com/org/repo/tree/develop/a/b/c/d") + assert url == "https://github.com/org/repo" + assert ref == "develop" + assert subpath == "a/b/c/d" + + def test_blob_url(self): + """Test that /blob/ URLs are handled like /tree/.""" + url, ref, subpath = parse_url_ref("https://github.com/org/repo/blob/main/skills/my-skill") + assert url == "https://github.com/org/repo" + assert ref == "main" + assert subpath == "skills/my-skill" + + def test_tree_trailing_slash(self): + url, ref, subpath = parse_url_ref("https://github.com/org/repo/tree/main/skills/my-skill/") + assert url == "https://github.com/org/repo" + assert ref == "main" + assert subpath == "skills/my-skill" class TestCacheKey: @@ -268,6 +322,38 @@ def fake_clone(cmd, **kwargs): assert cache.exists() + def test_subpath_returns_subdirectory(self, tmp_path): + """Test that subpath parameter returns the subdirectory within the clone.""" + cache = tmp_path / "cache" + + def fake_clone(cmd, **kwargs): + target_dir = Path(cmd[-1]) + target_dir.mkdir(parents=True, exist_ok=True) + # Create a nested skill directory + skill_dir = target_dir / "skills" / "my-skill" + skill_dir.mkdir(parents=True) + (skill_dir / "SKILL.md").write_text("---\nname: my-skill\ndescription: test\n---\n") + return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") + + with patch("strands.vended_plugins.skills._url_loader.subprocess.run", side_effect=fake_clone): + result = clone_skill_repo("https://github.com/org/repo", subpath="skills/my-skill", cache_dir=cache) + + assert result.name == "my-skill" + assert (result / "SKILL.md").exists() + + def test_subpath_nonexistent_raises(self, tmp_path): + """Test that a nonexistent subpath raises RuntimeError.""" + cache = tmp_path / "cache" + + def fake_clone(cmd, **kwargs): + target_dir = Path(cmd[-1]) + target_dir.mkdir(parents=True, exist_ok=True) + return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") + + with patch("strands.vended_plugins.skills._url_loader.subprocess.run", side_effect=fake_clone): + with pytest.raises(RuntimeError, match="subdirectory does not exist"): + clone_skill_repo("https://github.com/org/repo", subpath="nonexistent/path", cache_dir=cache) + def test_shallow_clone_depth_one(self, tmp_path): """Test that --depth 1 is always passed.""" cache = tmp_path / "cache" From 7f89fd82da00fdf3725aaebd63d180a68d231ee1 Mon Sep 17 00:00:00 2001 From: Davide Gallitelli Date: Fri, 10 Apr 2026 00:38:52 +0000 Subject: [PATCH 3/4] fix(skills): address review feedback on URL loading - Security: remove http:// support (MITM risk), only allow https/ssh/git@ - Reliability: atomic clone via tempdir + rename to prevent race conditions - Reliability: evaluate cache dir lazily (not at import time) for containers - Usability: respect XDG_CACHE_HOME for cache directory - Usability: add force_refresh parameter to re-clone stale unpinned refs - Docs: add ValueError to Skill.from_url() Raises section - Docs: add _url_loader.py to AGENTS.md directory tree - Tests: add coverage for XDG_CACHE_HOME, force_refresh, race condition, http:// rejection (184 tests total) Co-Authored-By: Claude Opus 4.6 (1M context) --- AGENTS.md | 1 + .../vended_plugins/skills/_url_loader.py | 61 +++++++-- src/strands/vended_plugins/skills/skill.py | 16 ++- .../skills/test_agent_skills.py | 4 +- .../vended_plugins/skills/test_skill.py | 8 +- .../vended_plugins/skills/test_url_loader.py | 124 ++++++++++++++---- 6 files changed, 171 insertions(+), 43 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index a9a2a5044..60d4178a1 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -141,6 +141,7 @@ strands-agents/ │ │ │ ├── core/ # Base classes, actions, context │ │ │ └── handlers/ # Handler implementations (e.g., LLM) │ │ └── skills/ # AgentSkills.io integration (Skill, AgentSkills) +│ │ └── _url_loader.py # Remote URL loading, git clone, caching │ │ │ ├── experimental/ # Experimental features (API may change) │ │ ├── agent_config.py # Experimental agent config diff --git a/src/strands/vended_plugins/skills/_url_loader.py b/src/strands/vended_plugins/skills/_url_loader.py index 9e4531917..7ba5245f6 100644 --- a/src/strands/vended_plugins/skills/_url_loader.py +++ b/src/strands/vended_plugins/skills/_url_loader.py @@ -9,17 +9,20 @@ import hashlib import logging +import os import re import shutil import subprocess +import tempfile from pathlib import Path logger = logging.getLogger(__name__) -_DEFAULT_CACHE_DIR = Path.home() / ".cache" / "strands" / "skills" - -# Patterns that indicate a string is a URL rather than a local path -_URL_PREFIXES = ("https://", "http://", "git@", "ssh://") +# Patterns that indicate a string is a URL rather than a local path. +# NOTE: http:// is intentionally excluded — plaintext HTTP exposes users to +# man-in-the-middle attacks where malicious code could be injected into a +# cloned skill. Only encrypted transports are supported. +_URL_PREFIXES = ("https://", "git@", "ssh://") # Regex to strip .git suffix from URLs before ref parsing _GIT_SUFFIX = re.compile(r"\.git$") @@ -29,6 +32,17 @@ _GITHUB_TREE_PATTERN = re.compile(r"^(/[^/]+/[^/]+)/(?:tree|blob)/([^/]+)(?:/(.+?))?/?$") +def _default_cache_dir() -> Path: + """Return the default cache directory, respecting ``XDG_CACHE_HOME``. + + Evaluated lazily (not at import time) so that ``Path.home()`` is not + called in environments where ``HOME`` may be unset (e.g. containers). + """ + xdg = os.environ.get("XDG_CACHE_HOME") + base = Path(xdg) if xdg else Path.home() / ".cache" + return base / "strands" / "skills" + + def is_url(source: str) -> bool: """Check whether a skill source string looks like a remote URL. @@ -135,6 +149,7 @@ def clone_skill_repo( ref: str | None = None, subpath: str | None = None, cache_dir: Path | None = None, + force_refresh: bool = False, ) -> Path: """Clone a skill repository to a local cache directory. @@ -146,12 +161,19 @@ def clone_skill_repo( within the cloned repository (useful for mono-repos containing skills in nested directories). + **Cache behaviour**: Cloned repos are cached at + ``$XDG_CACHE_HOME/strands/skills/`` (or ``~/.cache/strands/skills/``). + When no ``ref`` is pinned the default branch is cached; pass + ``force_refresh=True`` to re-clone, or pin a specific tag/branch for + reproducibility. + Args: url: The Git clone URL. ref: Optional branch or tag to check out. subpath: Optional path within the repo to return (e.g. ``skills/my-skill``). - cache_dir: Override the default cache directory - (``~/.cache/strands/skills/``). + cache_dir: Override the default cache directory. + force_refresh: If True, delete any cached clone and re-fetch from the + remote. Useful for unpinned refs that may have been updated. Returns: Path to the cloned repository root, or to ``subpath`` within it. @@ -159,21 +181,32 @@ def clone_skill_repo( Raises: RuntimeError: If the clone fails or ``git`` is not installed. """ - cache_dir = cache_dir or _DEFAULT_CACHE_DIR + cache_dir = cache_dir or _default_cache_dir() cache_dir.mkdir(parents=True, exist_ok=True) key = cache_key(url, ref) target = cache_dir / key + if force_refresh and target.exists(): + logger.info("url=<%s>, ref=<%s> | force-refreshing cached skill", url, ref) + shutil.rmtree(target) + if not target.exists(): logger.info("url=<%s>, ref=<%s> | cloning skill repository", url, ref) cmd: list[str] = ["git", "clone", "--depth", "1"] if ref: cmd.extend(["--branch", ref]) - cmd.extend([url, str(target)]) + + # Clone into a temporary directory first, then atomically rename to + # the target path. This prevents a race condition where two + # concurrent processes both pass the ``not target.exists()`` check + # and attempt to clone into the same directory. + tmp_dir = tempfile.mkdtemp(dir=cache_dir, prefix=".tmp-clone-") + tmp_target = Path(tmp_dir) / "repo" try: + cmd.extend([url, str(tmp_target)]) subprocess.run( # noqa: S603 cmd, check=True, @@ -181,15 +214,21 @@ def clone_skill_repo( text=True, timeout=120, ) + try: + tmp_target.rename(target) + except OSError: + # Another process completed the clone first — use theirs + logger.debug("url=<%s> | another process already cached this skill, using existing", url) except subprocess.CalledProcessError as e: - # Clean up any partial clone - if target.exists(): - shutil.rmtree(target) raise RuntimeError( f"url=<{url}>, ref=<{ref}> | failed to clone skill repository: {e.stderr.strip()}" ) from e except FileNotFoundError as e: raise RuntimeError("git is required to load skills from URLs but was not found on PATH") from e + finally: + # Always clean up the temp directory + if Path(tmp_dir).exists(): + shutil.rmtree(tmp_dir, ignore_errors=True) else: logger.debug("url=<%s>, ref=<%s> | using cached skill at %s", url, ref, target) diff --git a/src/strands/vended_plugins/skills/skill.py b/src/strands/vended_plugins/skills/skill.py index 8ce9194a9..8435442c3 100644 --- a/src/strands/vended_plugins/skills/skill.py +++ b/src/strands/vended_plugins/skills/skill.py @@ -339,6 +339,7 @@ def from_url( url: str, *, cache_dir: Path | None = None, + force_refresh: bool = False, strict: bool = False, ) -> list[Skill]: """Load skill(s) from a remote Git repository URL. @@ -358,9 +359,15 @@ def from_url( Args: url: A Git-cloneable URL, optionally with an ``@ref`` suffix or - a GitHub ``/tree//path`` URL. + a GitHub ``/tree//path`` URL. Only ``https://``, + ``ssh://``, and ``git@`` schemes are supported; plaintext + ``http://`` is rejected for security. cache_dir: Override the default cache directory - (``~/.cache/strands/skills/``). + (``$XDG_CACHE_HOME/strands/skills/`` or + ``~/.cache/strands/skills/``). + force_refresh: If True, discard any cached clone and re-fetch from + the remote. Useful when loading unpinned refs (e.g. ``main``) + that may have been updated upstream. strict: If True, raise on any validation issue. If False (default), warn and load anyway. @@ -368,6 +375,7 @@ def from_url( List of Skill instances loaded from the repository. Raises: + ValueError: If ``url`` is not a recognised remote URL scheme. RuntimeError: If the repository cannot be cloned or ``git`` is not available. """ @@ -377,7 +385,9 @@ def from_url( raise ValueError(f"url=<{url}> | not a valid remote URL") clean_url, ref, subpath = parse_url_ref(url) - repo_path = clone_skill_repo(clean_url, ref=ref, subpath=subpath, cache_dir=cache_dir) + repo_path = clone_skill_repo( + clean_url, ref=ref, subpath=subpath, cache_dir=cache_dir, force_refresh=force_refresh + ) # If the repo root is itself a skill, load it directly has_skill_md = (repo_path / "SKILL.md").is_file() or (repo_path / "skill.md").is_file() diff --git a/tests/strands/vended_plugins/skills/test_agent_skills.py b/tests/strands/vended_plugins/skills/test_agent_skills.py index 6f150a792..b9ed9692a 100644 --- a/tests/strands/vended_plugins/skills/test_agent_skills.py +++ b/tests/strands/vended_plugins/skills/test_agent_skills.py @@ -670,7 +670,7 @@ def _mock_clone(self, tmp_path, skill_name="url-skill", description="A URL skill """Create a mock clone function that creates a skill directory.""" skill_dir = tmp_path / "cloned" - def fake_clone(url, *, ref=None, subpath=None, cache_dir=None): + def fake_clone(url, *, ref=None, subpath=None, cache_dir=None, force_refresh=False): skill_dir.mkdir(parents=True, exist_ok=True) content = f"---\nname: {skill_name}\ndescription: {description}\n---\n# Instructions\n" (skill_dir / "SKILL.md").write_text(content) @@ -749,7 +749,7 @@ def test_cache_dir_forwarded(self, tmp_path): fake_clone = self._mock_clone(tmp_path) captured_cache = [] - def tracking_clone(url, *, ref=None, subpath=None, cache_dir=None): + def tracking_clone(url, *, ref=None, subpath=None, cache_dir=None, force_refresh=False): captured_cache.append(cache_dir) return fake_clone(url, ref=ref, cache_dir=cache_dir) diff --git a/tests/strands/vended_plugins/skills/test_skill.py b/tests/strands/vended_plugins/skills/test_skill.py index 5b6753bd2..c8a5232eb 100644 --- a/tests/strands/vended_plugins/skills/test_skill.py +++ b/tests/strands/vended_plugins/skills/test_skill.py @@ -560,7 +560,7 @@ def _mock_clone(self, tmp_path, skill_name="my-skill", description="A remote ski """Create a mock clone function that creates a skill directory.""" skill_dir = tmp_path / "cloned" - def fake_clone(url, *, ref=None, subpath=None, cache_dir=None): + def fake_clone(url, *, ref=None, subpath=None, cache_dir=None, force_refresh=False): skill_dir.mkdir(parents=True, exist_ok=True) content = f"---\nname: {skill_name}\ndescription: {description}\n---\n{body}\n" (skill_dir / "SKILL.md").write_text(content) @@ -572,7 +572,7 @@ def _mock_clone_multi(self, tmp_path): """Create a mock clone function that creates a parent dir with multiple skills.""" parent_dir = tmp_path / "cloned" - def fake_clone(url, *, ref=None, subpath=None, cache_dir=None): + def fake_clone(url, *, ref=None, subpath=None, cache_dir=None, force_refresh=False): parent_dir.mkdir(parents=True, exist_ok=True) for name in ("skill-a", "skill-b"): child = parent_dir / name @@ -625,7 +625,7 @@ def test_from_url_with_ref(self, tmp_path): fake_clone = self._mock_clone(tmp_path) captured_ref = [] - def tracking_clone(url, *, ref=None, subpath=None, cache_dir=None): + def tracking_clone(url, *, ref=None, subpath=None, cache_dir=None, force_refresh=False): captured_ref.append(ref) return fake_clone(url, ref=ref, cache_dir=cache_dir) @@ -669,7 +669,7 @@ def test_from_url_passes_cache_dir(self, tmp_path): fake_clone = self._mock_clone(tmp_path) captured_cache = [] - def tracking_clone(url, *, ref=None, subpath=None, cache_dir=None): + def tracking_clone(url, *, ref=None, subpath=None, cache_dir=None, force_refresh=False): captured_cache.append(cache_dir) return fake_clone(url, ref=ref, cache_dir=cache_dir) diff --git a/tests/strands/vended_plugins/skills/test_url_loader.py b/tests/strands/vended_plugins/skills/test_url_loader.py index 0caf2b6d0..f26cf8083 100644 --- a/tests/strands/vended_plugins/skills/test_url_loader.py +++ b/tests/strands/vended_plugins/skills/test_url_loader.py @@ -2,6 +2,7 @@ from __future__ import annotations +import os import subprocess from pathlib import Path from unittest.mock import patch @@ -9,6 +10,7 @@ import pytest from strands.vended_plugins.skills._url_loader import ( + _default_cache_dir, cache_key, clone_skill_repo, is_url, @@ -22,8 +24,9 @@ class TestIsUrl: def test_https_github_url(self): assert is_url("https://github.com/org/skill-repo") is True - def test_http_url(self): - assert is_url("http://github.com/org/skill-repo") is True + def test_http_url_rejected(self): + """Plaintext http:// is not supported for security reasons.""" + assert is_url("http://github.com/org/skill-repo") is False def test_ssh_url(self): assert is_url("ssh://git@github.com/org/skill-repo") is True @@ -111,6 +114,7 @@ def test_ssh_protocol_with_ref(self): assert subpath is None def test_http_with_ref(self): + """http:// URLs are still parsed (parse_url_ref doesn't enforce security).""" url, ref, subpath = parse_url_ref("http://gitlab.com/org/skill@feature-branch") assert url == "http://gitlab.com/org/skill" assert ref == "feature-branch" @@ -170,6 +174,26 @@ def test_tree_trailing_slash(self): assert subpath == "skills/my-skill" +class TestDefaultCacheDir: + """Tests for _default_cache_dir.""" + + def test_respects_xdg_cache_home(self, tmp_path): + """Test that XDG_CACHE_HOME is respected.""" + with patch.dict(os.environ, {"XDG_CACHE_HOME": str(tmp_path / "xdg")}): + result = _default_cache_dir() + assert result == tmp_path / "xdg" / "strands" / "skills" + + def test_falls_back_to_home_cache(self): + """Test that without XDG_CACHE_HOME, falls back to ~/.cache.""" + with patch.dict(os.environ, {}, clear=False): + # Remove XDG_CACHE_HOME if set + env = os.environ.copy() + env.pop("XDG_CACHE_HOME", None) + with patch.dict(os.environ, env, clear=True): + result = _default_cache_dir() + assert result == Path.home() / ".cache" / "strands" / "skills" + + class TestCacheKey: """Tests for cache_key.""" @@ -197,6 +221,18 @@ def test_hex_characters_only(self): assert all(c in "0123456789abcdef" for c in key) +def _fake_clone_factory(): + """Return a fake clone function that creates the target directory via atomic rename.""" + + def fake_clone(cmd, **kwargs): + target_dir = Path(cmd[-1]) + target_dir.mkdir(parents=True, exist_ok=True) + (target_dir / "SKILL.md").write_text("---\nname: test\ndescription: test\n---\n") + return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") + + return fake_clone + + class TestCloneSkillRepo: """Tests for clone_skill_repo.""" @@ -204,18 +240,10 @@ def test_clone_success(self, tmp_path): """Test successful clone by mocking subprocess.run.""" cache = tmp_path / "cache" - def fake_clone(cmd, **kwargs): - # Simulate a successful clone by creating the target directory - target_dir = Path(cmd[-1]) - target_dir.mkdir(parents=True, exist_ok=True) - (target_dir / "SKILL.md").write_text("---\nname: test\ndescription: test\n---\n") - return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") - - with patch("strands.vended_plugins.skills._url_loader.subprocess.run", side_effect=fake_clone): + with patch("strands.vended_plugins.skills._url_loader.subprocess.run", side_effect=_fake_clone_factory()): result = clone_skill_repo("https://github.com/org/skill", cache_dir=cache) assert result.exists() - assert (result / "SKILL.md").exists() def test_clone_with_ref(self, tmp_path): """Test that ref is passed as --branch to git clone.""" @@ -280,22 +308,21 @@ def test_clone_failure_raises_runtime_error(self, tmp_path): with pytest.raises(RuntimeError, match="failed to clone"): clone_skill_repo("https://github.com/org/nonexistent", cache_dir=cache) - def test_clone_failure_cleans_up_partial(self, tmp_path): - """Test that a failed clone removes any partial directory.""" + def test_clone_failure_cleans_up_temp_dir(self, tmp_path): + """Test that a failed clone removes the temp directory.""" cache = tmp_path / "cache" + cache.mkdir() - def failing_clone(cmd, **kwargs): - # Create partial clone then fail - target_dir = Path(cmd[-1]) - target_dir.mkdir(parents=True, exist_ok=True) - raise subprocess.CalledProcessError(128, "git", stderr="fatal: error") - - with patch("strands.vended_plugins.skills._url_loader.subprocess.run", side_effect=failing_clone): + with patch( + "strands.vended_plugins.skills._url_loader.subprocess.run", + side_effect=subprocess.CalledProcessError(128, "git", stderr="fatal: error"), + ): with pytest.raises(RuntimeError): clone_skill_repo("https://github.com/org/broken", cache_dir=cache) - # Verify no leftover directory - assert len(list(cache.iterdir())) == 0 + # Only the cache dir itself should remain, no temp or target dirs + remaining = [p for p in cache.iterdir() if not p.name.startswith(".")] + assert len(remaining) == 0 def test_git_not_found_raises_runtime_error(self, tmp_path): """Test that missing git binary raises RuntimeError.""" @@ -329,7 +356,6 @@ def test_subpath_returns_subdirectory(self, tmp_path): def fake_clone(cmd, **kwargs): target_dir = Path(cmd[-1]) target_dir.mkdir(parents=True, exist_ok=True) - # Create a nested skill directory skill_dir = target_dir / "skills" / "my-skill" skill_dir.mkdir(parents=True) (skill_dir / "SKILL.md").write_text("---\nname: my-skill\ndescription: test\n---\n") @@ -371,3 +397,55 @@ def fake_clone(cmd, **kwargs): assert "--depth" in captured_cmd depth_idx = captured_cmd.index("--depth") assert captured_cmd[depth_idx + 1] == "1" + + def test_force_refresh_reclones(self, tmp_path): + """Test that force_refresh=True deletes cache and re-clones.""" + cache = tmp_path / "cache" + call_count = 0 + + def fake_clone(cmd, **kwargs): + nonlocal call_count + call_count += 1 + target_dir = Path(cmd[-1]) + target_dir.mkdir(parents=True, exist_ok=True) + (target_dir / "marker.txt").write_text(f"clone-{call_count}") + return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") + + with patch("strands.vended_plugins.skills._url_loader.subprocess.run", side_effect=fake_clone): + result1 = clone_skill_repo("https://github.com/org/skill", cache_dir=cache) + assert (result1 / "marker.txt").read_text() == "clone-1" + + result2 = clone_skill_repo("https://github.com/org/skill", cache_dir=cache, force_refresh=True) + assert (result2 / "marker.txt").read_text() == "clone-2" + + assert call_count == 2 + assert result1 == result2 + + def test_force_refresh_noop_when_no_cache(self, tmp_path): + """Test that force_refresh=True works even when there's no existing cache.""" + cache = tmp_path / "cache" + + with patch("strands.vended_plugins.skills._url_loader.subprocess.run", side_effect=_fake_clone_factory()): + result = clone_skill_repo("https://github.com/org/skill", cache_dir=cache, force_refresh=True) + + assert result.exists() + + def test_race_condition_other_process_wins(self, tmp_path): + """Test that when another process clones first (rename fails), we use their clone.""" + cache = tmp_path / "cache" + key_hash = cache_key("https://github.com/org/skill", None) + target = cache / key_hash + + def fake_clone(cmd, **kwargs): + target_dir = Path(cmd[-1]) + target_dir.mkdir(parents=True, exist_ok=True) + # Simulate another process completing the clone first + target.mkdir(parents=True, exist_ok=True) + (target / "SKILL.md").write_text("---\nname: winner\ndescription: test\n---\n") + return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") + + with patch("strands.vended_plugins.skills._url_loader.subprocess.run", side_effect=fake_clone): + result = clone_skill_repo("https://github.com/org/skill", cache_dir=cache) + + assert result == target + assert result.exists() From 393dcd760f03ef729704a50d8c29eb1b85b68470 Mon Sep 17 00:00:00 2001 From: Davide Gallitelli Date: Sat, 11 Apr 2026 03:26:07 +0000 Subject: [PATCH 4/4] refactor(skills): replace git clone with HTTPS-only fetch Per maintainer feedback, remove all git/SSH/cache logic and replace with simple HTTPS fetch of SKILL.md content via urllib (stdlib). - Drop git clone, subprocess, caching, tempdir, race condition handling - Only support https:// URLs (no git@, ssh://, http://) - Auto-resolve GitHub web URLs to raw.githubusercontent.com - Skill.from_url() now returns a single Skill (not list) - Remove cache_dir, force_refresh params from public API - Net -532 lines, zero new dependencies Co-Authored-By: Claude Opus 4.6 (1M context) --- .../vended_plugins/skills/_url_loader.py | 303 ++++------- .../vended_plugins/skills/agent_skills.py | 17 +- src/strands/vended_plugins/skills/skill.py | 70 +-- .../skills/test_agent_skills.py | 68 +-- .../vended_plugins/skills/test_skill.py | 148 ++--- .../vended_plugins/skills/test_url_loader.py | 512 ++++-------------- 6 files changed, 293 insertions(+), 825 deletions(-) diff --git a/src/strands/vended_plugins/skills/_url_loader.py b/src/strands/vended_plugins/skills/_url_loader.py index 7ba5245f6..464ec4579 100644 --- a/src/strands/vended_plugins/skills/_url_loader.py +++ b/src/strands/vended_plugins/skills/_url_loader.py @@ -1,241 +1,140 @@ -"""Utilities for loading skills from remote Git repository URLs. +"""Utilities for loading skills from HTTPS URLs. -This module provides functions to detect URL-type skill sources, parse -optional version references, clone repositories with shallow depth, and -manage a local cache of cloned skill repositories. +This module provides functions to detect URL-type skill sources, resolve +GitHub web URLs to raw content URLs, and fetch SKILL.md content over HTTPS. +No git dependency or local caching is required. """ from __future__ import annotations -import hashlib import logging -import os import re -import shutil -import subprocess -import tempfile -from pathlib import Path +import urllib.error +import urllib.request logger = logging.getLogger(__name__) -# Patterns that indicate a string is a URL rather than a local path. -# NOTE: http:// is intentionally excluded — plaintext HTTP exposes users to -# man-in-the-middle attacks where malicious code could be injected into a -# cloned skill. Only encrypted transports are supported. -_URL_PREFIXES = ("https://", "git@", "ssh://") - -# Regex to strip .git suffix from URLs before ref parsing -_GIT_SUFFIX = re.compile(r"\.git$") - -# Matches GitHub /tree/ or /tree// (also /blob/) -# e.g. /owner/repo/tree/main/skills/my-skill -> groups: (/owner/repo, main, skills/my-skill) -_GITHUB_TREE_PATTERN = re.compile(r"^(/[^/]+/[^/]+)/(?:tree|blob)/([^/]+)(?:/(.+?))?/?$") - - -def _default_cache_dir() -> Path: - """Return the default cache directory, respecting ``XDG_CACHE_HOME``. - - Evaluated lazily (not at import time) so that ``Path.home()`` is not - called in environments where ``HOME`` may be unset (e.g. containers). - """ - xdg = os.environ.get("XDG_CACHE_HOME") - base = Path(xdg) if xdg else Path.home() / ".cache" - return base / "strands" / "skills" +# Matches GitHub /tree//path and /blob//path URLs. +# e.g. /owner/repo/tree/main/skills/my-skill -> groups: (owner/repo, main, skills/my-skill) +_GITHUB_TREE_PATTERN = re.compile(r"^/([^/]+/[^/]+)/(?:tree|blob)/([^/]+)(?:/(.+?))?/?$") def is_url(source: str) -> bool: - """Check whether a skill source string looks like a remote URL. + """Check whether a skill source string looks like an HTTPS URL. + + Only ``https://`` URLs are supported; plaintext ``http://`` is rejected + for security (MITM risk). Args: source: The skill source string to check. Returns: - True if the source appears to be a URL. + True if the source is an ``https://`` URL. """ - return any(source.startswith(prefix) for prefix in _URL_PREFIXES) + return source.startswith("https://") -def parse_url_ref(url: str) -> tuple[str, str | None, str | None]: - """Parse a skill URL into a clone URL, optional Git ref, and optional subpath. +def resolve_to_raw_url(url: str) -> str: + """Resolve a GitHub web URL to a raw content URL for SKILL.md. - Supports an ``@ref`` suffix for specifying a branch, tag, or commit:: + Supports several GitHub URL patterns and converts them to + ``raw.githubusercontent.com`` URLs:: - https://github.com/org/skill-repo@v1.0.0 -> (https://github.com/org/skill-repo, v1.0.0, None) - https://github.com/org/skill-repo -> (https://github.com/org/skill-repo, None, None) + # Repository root (assumes HEAD and SKILL.md at root) + https://github.com/owner/repo + -> https://raw.githubusercontent.com/owner/repo/HEAD/SKILL.md - Also supports GitHub web URLs with ``/tree//path`` :: + # Repository root with @ref + https://github.com/owner/repo@v1.0 + -> https://raw.githubusercontent.com/owner/repo/v1.0/SKILL.md - https://github.com/org/repo/tree/main/skills/my-skill - -> (https://github.com/org/repo, main, skills/my-skill) + # Tree URL pointing to a directory + https://github.com/owner/repo/tree/main/skills/my-skill + -> https://raw.githubusercontent.com/owner/repo/main/skills/my-skill/SKILL.md - Args: - url: The skill URL, optionally with an ``@ref`` suffix or ``/tree/`` path. + # Blob URL pointing to SKILL.md directly + https://github.com/owner/repo/blob/main/skills/my-skill/SKILL.md + -> https://raw.githubusercontent.com/owner/repo/main/skills/my-skill/SKILL.md - Returns: - Tuple of (clone_url, ref_or_none, subpath_or_none). - """ - if url.startswith(("https://", "http://", "ssh://")): - # Find the path portion after the host - scheme_end = url.index("//") + 2 - host_end = url.find("/", scheme_end) - if host_end == -1: - return url, None, None - - path_part = url[host_end:] - - # Handle GitHub /tree//path and /blob//path URLs - tree_match = _GITHUB_TREE_PATTERN.match(path_part) - if tree_match: - owner_repo = tree_match.group(1) - ref = tree_match.group(2) - subpath = tree_match.group(3) or None - clone_url = url[:host_end] + owner_repo - return clone_url, ref, subpath - - # Strip .git suffix before looking for @ref so that - # "repo.git@v1" is handled correctly - clean_path = _GIT_SUFFIX.sub("", path_part) - had_git_suffix = clean_path != path_part - - if "@" in clean_path: - at_idx = clean_path.rfind("@") - ref = clean_path[at_idx + 1 :] - base_path = clean_path[:at_idx] - if had_git_suffix: - base_path += ".git" - return url[:host_end] + base_path, ref, None - - return url, None, None - - if url.startswith("git@"): - # SSH format: git@host:owner/repo.git@ref - # The first @ is part of the SSH URL format. - first_at = url.index("@") - rest = url[first_at + 1 :] - - clean_rest = _GIT_SUFFIX.sub("", rest) - had_git_suffix = clean_rest != rest - - if "@" in clean_rest: - at_idx = clean_rest.rfind("@") - ref = clean_rest[at_idx + 1 :] - base_rest = clean_rest[:at_idx] - if had_git_suffix: - base_rest += ".git" - return url[: first_at + 1] + base_rest, ref, None - - return url, None, None - - return url, None, None - - -def cache_key(url: str, ref: str | None) -> str: - """Generate a deterministic cache directory name from a URL and ref. + Non-GitHub URLs and ``raw.githubusercontent.com`` URLs are returned as-is. Args: - url: The clone URL. - ref: The optional Git ref. + url: An HTTPS URL, possibly a GitHub web URL. Returns: - A short hex digest suitable for use as a directory name. + A URL that can be fetched directly to obtain SKILL.md content. """ - key_input = f"{url}@{ref}" if ref else url - return hashlib.sha256(key_input.encode()).hexdigest()[:16] - - -def clone_skill_repo( - url: str, - *, - ref: str | None = None, - subpath: str | None = None, - cache_dir: Path | None = None, - force_refresh: bool = False, -) -> Path: - """Clone a skill repository to a local cache directory. - - Uses ``git clone --depth 1`` for efficiency. If a ``ref`` is provided it - is passed as ``--branch`` (works for branches and tags). Repositories are - cached by a hash of (url, ref) so repeated loads are instant. - - If ``subpath`` is provided, the returned path points to that subdirectory - within the cloned repository (useful for mono-repos containing skills in - nested directories). - - **Cache behaviour**: Cloned repos are cached at - ``$XDG_CACHE_HOME/strands/skills/`` (or ``~/.cache/strands/skills/``). - When no ``ref`` is pinned the default branch is cached; pass - ``force_refresh=True`` to re-clone, or pin a specific tag/branch for - reproducibility. + # Already a raw URL — return as-is + if "raw.githubusercontent.com" in url: + return url + + # Not a github.com URL — return as-is (user provides a direct link) + if not url.startswith("https://github.com"): + return url + + # Parse the path portion + path_start = len("https://github.com") + path = url[path_start:] + + # Handle /tree//path and /blob//path + tree_match = _GITHUB_TREE_PATTERN.match(path) + if tree_match: + owner_repo = tree_match.group(1) + ref = tree_match.group(2) + subpath = tree_match.group(3) or "" + + # If the URL points to a SKILL.md file directly, use it as-is + if subpath.lower().endswith("skill.md"): + return f"https://raw.githubusercontent.com/{owner_repo}/{ref}/{subpath}" + + # Otherwise, assume it's a directory and append SKILL.md + if subpath: + return f"https://raw.githubusercontent.com/{owner_repo}/{ref}/{subpath}/SKILL.md" + return f"https://raw.githubusercontent.com/{owner_repo}/{ref}/SKILL.md" + + # Handle plain repo URL: /owner/repo or /owner/repo@ref + # Strip leading slash and any trailing slash + clean_path = path.strip("/") + + # Check for @ref suffix + if "@" in clean_path: + at_idx = clean_path.rfind("@") + owner_repo = clean_path[:at_idx] + ref = clean_path[at_idx + 1 :] + else: + owner_repo = clean_path + ref = "HEAD" + + # Only match owner/repo (exactly two path segments) + if owner_repo.count("/") == 1: + return f"https://raw.githubusercontent.com/{owner_repo}/{ref}/SKILL.md" + + # Unrecognised GitHub URL pattern — return as-is + return url + + +def fetch_skill_content(url: str) -> str: + """Fetch SKILL.md content from an HTTPS URL. + + Uses ``urllib.request`` (stdlib) so no additional dependencies are needed. Args: - url: The Git clone URL. - ref: Optional branch or tag to check out. - subpath: Optional path within the repo to return (e.g. ``skills/my-skill``). - cache_dir: Override the default cache directory. - force_refresh: If True, delete any cached clone and re-fetch from the - remote. Useful for unpinned refs that may have been updated. + url: The HTTPS URL to fetch. Returns: - Path to the cloned repository root, or to ``subpath`` within it. + The response body as a string. Raises: - RuntimeError: If the clone fails or ``git`` is not installed. + RuntimeError: If the fetch fails (network error, 404, etc.). """ - cache_dir = cache_dir or _default_cache_dir() - cache_dir.mkdir(parents=True, exist_ok=True) - - key = cache_key(url, ref) - target = cache_dir / key - - if force_refresh and target.exists(): - logger.info("url=<%s>, ref=<%s> | force-refreshing cached skill", url, ref) - shutil.rmtree(target) - - if not target.exists(): - logger.info("url=<%s>, ref=<%s> | cloning skill repository", url, ref) - - cmd: list[str] = ["git", "clone", "--depth", "1"] - if ref: - cmd.extend(["--branch", ref]) - - # Clone into a temporary directory first, then atomically rename to - # the target path. This prevents a race condition where two - # concurrent processes both pass the ``not target.exists()`` check - # and attempt to clone into the same directory. - tmp_dir = tempfile.mkdtemp(dir=cache_dir, prefix=".tmp-clone-") - tmp_target = Path(tmp_dir) / "repo" - - try: - cmd.extend([url, str(tmp_target)]) - subprocess.run( # noqa: S603 - cmd, - check=True, - capture_output=True, - text=True, - timeout=120, - ) - try: - tmp_target.rename(target) - except OSError: - # Another process completed the clone first — use theirs - logger.debug("url=<%s> | another process already cached this skill, using existing", url) - except subprocess.CalledProcessError as e: - raise RuntimeError( - f"url=<{url}>, ref=<{ref}> | failed to clone skill repository: {e.stderr.strip()}" - ) from e - except FileNotFoundError as e: - raise RuntimeError("git is required to load skills from URLs but was not found on PATH") from e - finally: - # Always clean up the temp directory - if Path(tmp_dir).exists(): - shutil.rmtree(tmp_dir, ignore_errors=True) - else: - logger.debug("url=<%s>, ref=<%s> | using cached skill at %s", url, ref, target) - - result = target / subpath if subpath else target - - if subpath and not result.is_dir(): - raise RuntimeError(f"url=<{url}>, subpath=<{subpath}> | subdirectory does not exist in cloned repository") - - logger.debug("url=<%s>, ref=<%s> | resolved to %s", url, ref, result) - return result + resolved = resolve_to_raw_url(url) + logger.info("url=<%s> | fetching skill content from %s", url, resolved) + + try: + req = urllib.request.Request(resolved, headers={"User-Agent": "strands-agents-sdk"}) # noqa: S310 + with urllib.request.urlopen(req, timeout=30) as response: # noqa: S310 + return response.read().decode("utf-8") + except urllib.error.HTTPError as e: + raise RuntimeError(f"url=<{resolved}> | HTTP {e.code}: {e.reason}") from e + except urllib.error.URLError as e: + raise RuntimeError(f"url=<{resolved}> | failed to fetch skill: {e.reason}") from e diff --git a/src/strands/vended_plugins/skills/agent_skills.py b/src/strands/vended_plugins/skills/agent_skills.py index 2459be120..ad604a20c 100644 --- a/src/strands/vended_plugins/skills/agent_skills.py +++ b/src/strands/vended_plugins/skills/agent_skills.py @@ -77,7 +77,6 @@ def __init__( state_key: str = _DEFAULT_STATE_KEY, max_resource_files: int = _DEFAULT_MAX_RESOURCE_FILES, strict: bool = False, - cache_dir: Path | None = None, ) -> None: """Initialize the AgentSkills plugin. @@ -87,16 +86,13 @@ def __init__( - A ``str`` or ``Path`` to a skill directory (containing SKILL.md) - A ``str`` or ``Path`` to a parent directory (containing skill subdirectories) - A ``Skill`` dataclass instance - - A remote Git URL (``https://``, ``git@``, or ``ssh://``) - with optional ``@ref`` suffix for branch/tag pinning + - An ``https://`` URL pointing to a SKILL.md file or a GitHub + repository/directory URL (auto-resolved to raw content) state_key: Key used to store plugin state in ``agent.state``. max_resource_files: Maximum number of resource files to list in skill responses. strict: If True, raise on skill validation issues. If False (default), warn and load anyway. - cache_dir: Directory for caching cloned skill repositories. - Defaults to ``~/.cache/strands/skills/``. """ self._strict = strict - self._cache_dir = cache_dir self._skills: dict[str, Skill] = self._resolve_skills(_normalize_sources(skills)) self._state_key = state_key self._max_resource_files = max_resource_files @@ -310,11 +306,10 @@ def _resolve_skills(self, sources: list[SkillSource]) -> dict[str, Skill]: resolved[source.name] = source elif isinstance(source, str) and is_url(source): try: - url_skills = Skill.from_url(source, cache_dir=self._cache_dir, strict=self._strict) - for skill in url_skills: - if skill.name in resolved: - logger.warning("name=<%s> | duplicate skill name, overwriting previous skill", skill.name) - resolved[skill.name] = skill + skill = Skill.from_url(source, strict=self._strict) + if skill.name in resolved: + logger.warning("name=<%s> | duplicate skill name, overwriting previous skill", skill.name) + resolved[skill.name] = skill except (RuntimeError, ValueError) as e: logger.warning("url=<%s> | failed to load skill from URL: %s", source, e) else: diff --git a/src/strands/vended_plugins/skills/skill.py b/src/strands/vended_plugins/skills/skill.py index 8435442c3..70181f3be 100644 --- a/src/strands/vended_plugins/skills/skill.py +++ b/src/strands/vended_plugins/skills/skill.py @@ -334,68 +334,46 @@ def from_content(cls, content: str, *, strict: bool = False) -> Skill: return _build_skill_from_frontmatter(frontmatter, body) @classmethod - def from_url( - cls, - url: str, - *, - cache_dir: Path | None = None, - force_refresh: bool = False, - strict: bool = False, - ) -> list[Skill]: - """Load skill(s) from a remote Git repository URL. + def from_url(cls, url: str, *, strict: bool = False) -> Skill: + """Load a skill by fetching its SKILL.md content from an HTTPS URL. - Clones the repository (or uses a cached copy) and then loads skills - using the standard filesystem methods. If the repository root contains - a ``SKILL.md`` file it is treated as a single skill; otherwise it is - scanned for skill subdirectories. + Fetches the SKILL.md content over HTTPS and parses it using + :meth:`from_content`. GitHub web URLs are automatically resolved + to ``raw.githubusercontent.com``:: - Supports an optional ``@ref`` suffix for branch or tag pinning:: - - skills = Skill.from_url("https://github.com/org/my-skill@v1.0.0") + # Direct raw URL + skill = Skill.from_url( + "https://raw.githubusercontent.com/org/repo/main/SKILL.md" + ) - Also supports GitHub web URLs pointing to subdirectories:: + # GitHub web URL (auto-resolved) + skill = Skill.from_url( + "https://github.com/org/repo/tree/main/skills/my-skill" + ) - skills = Skill.from_url("https://github.com/org/repo/tree/main/skills/my-skill") + # Repository root with @ref + skill = Skill.from_url("https://github.com/org/my-skill@v1.0.0") Args: - url: A Git-cloneable URL, optionally with an ``@ref`` suffix or - a GitHub ``/tree//path`` URL. Only ``https://``, - ``ssh://``, and ``git@`` schemes are supported; plaintext - ``http://`` is rejected for security. - cache_dir: Override the default cache directory - (``$XDG_CACHE_HOME/strands/skills/`` or - ``~/.cache/strands/skills/``). - force_refresh: If True, discard any cached clone and re-fetch from - the remote. Useful when loading unpinned refs (e.g. ``main``) - that may have been updated upstream. + url: An ``https://`` URL pointing to a SKILL.md file, a GitHub + repository, or a GitHub ``/tree//path`` URL. strict: If True, raise on any validation issue. If False (default), warn and load anyway. Returns: - List of Skill instances loaded from the repository. + A Skill instance populated from the fetched SKILL.md content. Raises: - ValueError: If ``url`` is not a recognised remote URL scheme. - RuntimeError: If the repository cannot be cloned or ``git`` is not - available. + ValueError: If ``url`` is not an ``https://`` URL. + RuntimeError: If the SKILL.md content cannot be fetched. """ - from ._url_loader import clone_skill_repo, is_url, parse_url_ref + from ._url_loader import fetch_skill_content, is_url if not is_url(url): - raise ValueError(f"url=<{url}> | not a valid remote URL") + raise ValueError(f"url=<{url}> | not a valid HTTPS URL") - clean_url, ref, subpath = parse_url_ref(url) - repo_path = clone_skill_repo( - clean_url, ref=ref, subpath=subpath, cache_dir=cache_dir, force_refresh=force_refresh - ) - - # If the repo root is itself a skill, load it directly - has_skill_md = (repo_path / "SKILL.md").is_file() or (repo_path / "skill.md").is_file() - - if has_skill_md: - return [cls.from_file(repo_path, strict=strict)] - else: - return cls.from_directory(repo_path, strict=strict) + content = fetch_skill_content(url) + return cls.from_content(content, strict=strict) @classmethod def from_directory(cls, skills_dir: str | Path, *, strict: bool = False) -> list[Skill]: diff --git a/tests/strands/vended_plugins/skills/test_agent_skills.py b/tests/strands/vended_plugins/skills/test_agent_skills.py index b9ed9692a..4e69a7f68 100644 --- a/tests/strands/vended_plugins/skills/test_agent_skills.py +++ b/tests/strands/vended_plugins/skills/test_agent_skills.py @@ -665,32 +665,13 @@ class TestResolveUrlSkills: """Tests for _resolve_skills with URL sources.""" _URL_LOADER = "strands.vended_plugins.skills._url_loader" + _SAMPLE_CONTENT = "---\nname: url-skill\ndescription: A URL skill\n---\n# Instructions\n" - def _mock_clone(self, tmp_path, skill_name="url-skill", description="A URL skill"): - """Create a mock clone function that creates a skill directory.""" - skill_dir = tmp_path / "cloned" - - def fake_clone(url, *, ref=None, subpath=None, cache_dir=None, force_refresh=False): - skill_dir.mkdir(parents=True, exist_ok=True) - content = f"---\nname: {skill_name}\ndescription: {description}\n---\n# Instructions\n" - (skill_dir / "SKILL.md").write_text(content) - return skill_dir - - return fake_clone - - def test_resolve_url_source(self, tmp_path): + def test_resolve_url_source(self): """Test resolving a URL string as a skill source.""" from unittest.mock import patch - fake_clone = self._mock_clone(tmp_path) - - with ( - patch(f"{self._URL_LOADER}.clone_skill_repo", side_effect=fake_clone), - patch( - f"{self._URL_LOADER}.parse_url_ref", - return_value=("https://github.com/org/url-skill", None, None), - ), - ): + with patch(f"{self._URL_LOADER}.fetch_skill_content", return_value=self._SAMPLE_CONTENT): plugin = AgentSkills(skills=["https://github.com/org/url-skill"]) assert len(plugin.get_available_skills()) == 1 @@ -701,15 +682,8 @@ def test_resolve_mixed_url_and_local(self, tmp_path): from unittest.mock import patch _make_skill_dir(tmp_path, "local-skill") - fake_clone = self._mock_clone(tmp_path) - with ( - patch(f"{self._URL_LOADER}.clone_skill_repo", side_effect=fake_clone), - patch( - f"{self._URL_LOADER}.parse_url_ref", - return_value=("https://github.com/org/url-skill", None, None), - ), - ): + with patch(f"{self._URL_LOADER}.fetch_skill_content", return_value=self._SAMPLE_CONTENT): plugin = AgentSkills( skills=[ "https://github.com/org/url-skill", @@ -722,18 +696,14 @@ def test_resolve_mixed_url_and_local(self, tmp_path): assert names == {"url-skill", "local-skill"} def test_resolve_url_failure_skips_gracefully(self, caplog): - """Test that a failed URL clone is skipped with a warning.""" + """Test that a failed URL fetch is skipped with a warning.""" import logging from unittest.mock import patch with ( patch( - f"{self._URL_LOADER}.parse_url_ref", - return_value=("https://github.com/org/broken", None, None), - ), - patch( - f"{self._URL_LOADER}.clone_skill_repo", - side_effect=RuntimeError("clone failed"), + f"{self._URL_LOADER}.fetch_skill_content", + side_effect=RuntimeError("HTTP 404: Not Found"), ), caplog.at_level(logging.WARNING), ): @@ -742,30 +712,6 @@ def test_resolve_url_failure_skips_gracefully(self, caplog): assert len(plugin.get_available_skills()) == 0 assert "failed to load skill from URL" in caplog.text - def test_cache_dir_forwarded(self, tmp_path): - """Test that custom cache_dir is forwarded through to clone.""" - from unittest.mock import patch - - fake_clone = self._mock_clone(tmp_path) - captured_cache = [] - - def tracking_clone(url, *, ref=None, subpath=None, cache_dir=None, force_refresh=False): - captured_cache.append(cache_dir) - return fake_clone(url, ref=ref, cache_dir=cache_dir) - - custom_cache = tmp_path / "my-cache" - - with ( - patch(f"{self._URL_LOADER}.clone_skill_repo", side_effect=tracking_clone), - patch( - f"{self._URL_LOADER}.parse_url_ref", - return_value=("https://github.com/org/url-skill", None, None), - ), - ): - AgentSkills(skills=["https://github.com/org/url-skill"], cache_dir=custom_cache) - - assert captured_cache == [custom_cache] - class TestImports: """Tests for module imports.""" diff --git a/tests/strands/vended_plugins/skills/test_skill.py b/tests/strands/vended_plugins/skills/test_skill.py index c8a5232eb..7b814a00e 100644 --- a/tests/strands/vended_plugins/skills/test_skill.py +++ b/tests/strands/vended_plugins/skills/test_skill.py @@ -556,135 +556,69 @@ class TestSkillFromUrl: _URL_LOADER = "strands.vended_plugins.skills._url_loader" - def _mock_clone(self, tmp_path, skill_name="my-skill", description="A remote skill", body="Remote instructions."): - """Create a mock clone function that creates a skill directory.""" - skill_dir = tmp_path / "cloned" - - def fake_clone(url, *, ref=None, subpath=None, cache_dir=None, force_refresh=False): - skill_dir.mkdir(parents=True, exist_ok=True) - content = f"---\nname: {skill_name}\ndescription: {description}\n---\n{body}\n" - (skill_dir / "SKILL.md").write_text(content) - return skill_dir - - return fake_clone - - def _mock_clone_multi(self, tmp_path): - """Create a mock clone function that creates a parent dir with multiple skills.""" - parent_dir = tmp_path / "cloned" - - def fake_clone(url, *, ref=None, subpath=None, cache_dir=None, force_refresh=False): - parent_dir.mkdir(parents=True, exist_ok=True) - for name in ("skill-a", "skill-b"): - child = parent_dir / name - child.mkdir(exist_ok=True) - (child / "SKILL.md").write_text(f"---\nname: {name}\ndescription: Skill {name}\n---\nBody.\n") - return parent_dir - - return fake_clone - - def test_from_url_single_skill(self, tmp_path): - """Test loading a single skill from a URL.""" - from unittest.mock import patch + _SAMPLE_CONTENT = "---\nname: my-skill\ndescription: A remote skill\n---\nRemote instructions.\n" - fake_clone = self._mock_clone(tmp_path) + def test_from_url_returns_skill(self): + """Test loading a skill from a URL returns a single Skill.""" + from unittest.mock import patch - with ( - patch(f"{self._URL_LOADER}.clone_skill_repo", side_effect=fake_clone), - patch(f"{self._URL_LOADER}.parse_url_ref", return_value=("https://github.com/org/my-skill", None, None)), - ): - skills = Skill.from_url("https://github.com/org/my-skill") + with patch(f"{self._URL_LOADER}.fetch_skill_content", return_value=self._SAMPLE_CONTENT): + skill = Skill.from_url("https://raw.githubusercontent.com/org/repo/main/SKILL.md") - assert len(skills) == 1 - assert skills[0].name == "my-skill" - assert skills[0].description == "A remote skill" - assert "Remote instructions." in skills[0].instructions + assert isinstance(skill, Skill) + assert skill.name == "my-skill" + assert skill.description == "A remote skill" + assert "Remote instructions." in skill.instructions + assert skill.path is None - def test_from_url_multiple_skills(self, tmp_path): - """Test loading multiple skills from a URL pointing to a parent directory.""" + def test_from_url_github_web_url(self): + """Test that GitHub web URLs are handled (resolved in fetch_skill_content).""" from unittest.mock import patch - fake_clone = self._mock_clone_multi(tmp_path) + with patch(f"{self._URL_LOADER}.fetch_skill_content", return_value=self._SAMPLE_CONTENT): + skill = Skill.from_url("https://github.com/org/repo/tree/main/skills/my-skill") - with ( - patch(f"{self._URL_LOADER}.clone_skill_repo", side_effect=fake_clone), - patch( - f"{self._URL_LOADER}.parse_url_ref", - return_value=("https://github.com/org/skills-collection", None, None), - ), - ): - skills = Skill.from_url("https://github.com/org/skills-collection") - - assert len(skills) == 2 - names = {s.name for s in skills} - assert names == {"skill-a", "skill-b"} + assert skill.name == "my-skill" - def test_from_url_with_ref(self, tmp_path): - """Test that @ref is correctly parsed and forwarded.""" + def test_from_url_with_ref(self): + """Test URL with @ref suffix.""" from unittest.mock import patch - fake_clone = self._mock_clone(tmp_path) - captured_ref = [] - - def tracking_clone(url, *, ref=None, subpath=None, cache_dir=None, force_refresh=False): - captured_ref.append(ref) - return fake_clone(url, ref=ref, cache_dir=cache_dir) + with patch(f"{self._URL_LOADER}.fetch_skill_content", return_value=self._SAMPLE_CONTENT): + skill = Skill.from_url("https://github.com/org/my-skill@v1.0.0") - with ( - patch(f"{self._URL_LOADER}.clone_skill_repo", side_effect=tracking_clone), - patch( - f"{self._URL_LOADER}.parse_url_ref", - return_value=("https://github.com/org/my-skill", "v1.0.0", None), - ), - ): - Skill.from_url("https://github.com/org/my-skill@v1.0.0") - - assert captured_ref == ["v1.0.0"] + assert skill.name == "my-skill" def test_from_url_invalid_url_raises(self): - """Test that a non-URL string raises ValueError.""" - with pytest.raises(ValueError, match="not a valid remote URL"): + """Test that a non-HTTPS URL raises ValueError.""" + with pytest.raises(ValueError, match="not a valid HTTPS URL"): Skill.from_url("./local-path") - def test_from_url_clone_failure_raises(self): - """Test that a clone failure propagates as RuntimeError.""" + def test_from_url_http_rejected(self): + """Test that http:// URLs are rejected.""" + with pytest.raises(ValueError, match="not a valid HTTPS URL"): + Skill.from_url("http://example.com/SKILL.md") + + def test_from_url_fetch_failure_raises(self): + """Test that a fetch failure propagates as RuntimeError.""" from unittest.mock import patch - with ( - patch( - f"{self._URL_LOADER}.parse_url_ref", - return_value=("https://github.com/org/broken", None, None), - ), - patch( - f"{self._URL_LOADER}.clone_skill_repo", - side_effect=RuntimeError("clone failed"), - ), + with patch( + f"{self._URL_LOADER}.fetch_skill_content", + side_effect=RuntimeError("HTTP 404: Not Found"), ): - with pytest.raises(RuntimeError, match="clone failed"): - Skill.from_url("https://github.com/org/broken") + with pytest.raises(RuntimeError, match="HTTP 404"): + Skill.from_url("https://example.com/nonexistent/SKILL.md") - def test_from_url_passes_cache_dir(self, tmp_path): - """Test that cache_dir is forwarded to clone_skill_repo.""" + def test_from_url_strict_mode(self): + """Test that strict mode is forwarded to from_content.""" from unittest.mock import patch - fake_clone = self._mock_clone(tmp_path) - captured_cache = [] - - def tracking_clone(url, *, ref=None, subpath=None, cache_dir=None, force_refresh=False): - captured_cache.append(cache_dir) - return fake_clone(url, ref=ref, cache_dir=cache_dir) - - custom_cache = tmp_path / "custom-cache" - - with ( - patch(f"{self._URL_LOADER}.clone_skill_repo", side_effect=tracking_clone), - patch( - f"{self._URL_LOADER}.parse_url_ref", - return_value=("https://github.com/org/my-skill", None, None), - ), - ): - Skill.from_url("https://github.com/org/my-skill", cache_dir=custom_cache) + bad_content = "---\nname: BAD_NAME\ndescription: Bad\n---\nBody." - assert captured_cache == [custom_cache] + with patch(f"{self._URL_LOADER}.fetch_skill_content", return_value=bad_content): + with pytest.raises(ValueError): + Skill.from_url("https://example.com/SKILL.md", strict=True) class TestSkillClassmethods: diff --git a/tests/strands/vended_plugins/skills/test_url_loader.py b/tests/strands/vended_plugins/skills/test_url_loader.py index f26cf8083..a33c0f6b7 100644 --- a/tests/strands/vended_plugins/skills/test_url_loader.py +++ b/tests/strands/vended_plugins/skills/test_url_loader.py @@ -2,37 +2,38 @@ from __future__ import annotations -import os -import subprocess -from pathlib import Path -from unittest.mock import patch +import urllib.error +from unittest.mock import MagicMock, patch import pytest from strands.vended_plugins.skills._url_loader import ( - _default_cache_dir, - cache_key, - clone_skill_repo, + fetch_skill_content, is_url, - parse_url_ref, + resolve_to_raw_url, ) class TestIsUrl: """Tests for is_url.""" - def test_https_github_url(self): + def test_https_url(self): assert is_url("https://github.com/org/skill-repo") is True - def test_http_url_rejected(self): - """Plaintext http:// is not supported for security reasons.""" + def test_https_raw_url(self): + assert is_url("https://raw.githubusercontent.com/org/repo/main/SKILL.md") is True + + def test_http_rejected(self): + """Plaintext http:// is rejected for security.""" assert is_url("http://github.com/org/skill-repo") is False - def test_ssh_url(self): - assert is_url("ssh://git@github.com/org/skill-repo") is True + def test_ssh_rejected(self): + """SSH URLs are not supported in HTTPS-only mode.""" + assert is_url("ssh://git@github.com/org/skill-repo") is False - def test_git_at_url(self): - assert is_url("git@github.com:org/skill-repo.git") is True + def test_git_at_rejected(self): + """git@ URLs are not supported in HTTPS-only mode.""" + assert is_url("git@github.com:org/skill-repo.git") is False def test_local_relative_path(self): assert is_url("./skills/my-skill") is False @@ -46,406 +47,121 @@ def test_plain_directory_name(self): def test_empty_string(self): assert is_url("") is False - def test_https_with_ref(self): - assert is_url("https://github.com/org/skill@v1.0.0") is True - - -class TestParseUrlRef: - """Tests for parse_url_ref.""" - - def test_https_no_ref(self): - url, ref, subpath = parse_url_ref("https://github.com/org/skill-repo") - assert url == "https://github.com/org/skill-repo" - assert ref is None - assert subpath is None - - def test_https_with_tag_ref(self): - url, ref, subpath = parse_url_ref("https://github.com/org/skill-repo@v1.0.0") - assert url == "https://github.com/org/skill-repo" - assert ref == "v1.0.0" - assert subpath is None - - def test_https_with_branch_ref(self): - url, ref, subpath = parse_url_ref("https://github.com/org/skill-repo@main") - assert url == "https://github.com/org/skill-repo" - assert ref == "main" - assert subpath is None - - def test_https_git_suffix_no_ref(self): - url, ref, subpath = parse_url_ref("https://github.com/org/skill-repo.git") - assert url == "https://github.com/org/skill-repo.git" - assert ref is None - assert subpath is None - - def test_https_git_suffix_with_ref(self): - url, ref, subpath = parse_url_ref("https://github.com/org/skill-repo.git@v2.0") - assert url == "https://github.com/org/skill-repo.git" - assert ref == "v2.0" - assert subpath is None - - def test_ssh_no_ref(self): - url, ref, subpath = parse_url_ref("git@github.com:org/skill-repo.git") - assert url == "git@github.com:org/skill-repo.git" - assert ref is None - assert subpath is None - - def test_ssh_with_ref(self): - url, ref, subpath = parse_url_ref("git@github.com:org/skill-repo.git@v1.0") - assert url == "git@github.com:org/skill-repo.git" - assert ref == "v1.0" - assert subpath is None - - def test_ssh_no_git_suffix_with_ref(self): - url, ref, subpath = parse_url_ref("git@github.com:org/skill-repo@develop") - assert url == "git@github.com:org/skill-repo" - assert ref == "develop" - assert subpath is None - - def test_ssh_protocol_no_ref(self): - url, ref, subpath = parse_url_ref("ssh://git@github.com/org/skill-repo") - assert url == "ssh://git@github.com/org/skill-repo" - assert ref is None - assert subpath is None - - def test_ssh_protocol_with_ref(self): - url, ref, subpath = parse_url_ref("ssh://git@github.com/org/skill-repo@v3") - assert url == "ssh://git@github.com/org/skill-repo" - assert ref == "v3" - assert subpath is None - - def test_http_with_ref(self): - """http:// URLs are still parsed (parse_url_ref doesn't enforce security).""" - url, ref, subpath = parse_url_ref("http://gitlab.com/org/skill@feature-branch") - assert url == "http://gitlab.com/org/skill" - assert ref == "feature-branch" - assert subpath is None - - def test_url_host_only(self): - url, ref, subpath = parse_url_ref("https://example.com") - assert url == "https://example.com" - assert ref is None - assert subpath is None - - def test_non_url_passthrough(self): - url, ref, subpath = parse_url_ref("/local/path") - assert url == "/local/path" - assert ref is None - assert subpath is None - - -class TestParseUrlRefGitHubTree: - """Tests for parse_url_ref with GitHub /tree/ and /blob/ URLs.""" - - def test_tree_with_subpath(self): - url, ref, subpath = parse_url_ref("https://github.com/org/repo/tree/main/skills/my-skill") - assert url == "https://github.com/org/repo" - assert ref == "main" - assert subpath == "skills/my-skill" - - def test_tree_branch_only(self): - url, ref, subpath = parse_url_ref("https://github.com/org/repo/tree/main") - assert url == "https://github.com/org/repo" - assert ref == "main" - assert subpath is None - - def test_tree_with_tag(self): - url, ref, subpath = parse_url_ref("https://github.com/org/repo/tree/v1.0.0/skills/brainstorming") - assert url == "https://github.com/org/repo" - assert ref == "v1.0.0" - assert subpath == "skills/brainstorming" - - def test_tree_deep_subpath(self): - url, ref, subpath = parse_url_ref("https://github.com/org/repo/tree/develop/a/b/c/d") - assert url == "https://github.com/org/repo" - assert ref == "develop" - assert subpath == "a/b/c/d" - - def test_blob_url(self): - """Test that /blob/ URLs are handled like /tree/.""" - url, ref, subpath = parse_url_ref("https://github.com/org/repo/blob/main/skills/my-skill") - assert url == "https://github.com/org/repo" - assert ref == "main" - assert subpath == "skills/my-skill" - - def test_tree_trailing_slash(self): - url, ref, subpath = parse_url_ref("https://github.com/org/repo/tree/main/skills/my-skill/") - assert url == "https://github.com/org/repo" - assert ref == "main" - assert subpath == "skills/my-skill" - - -class TestDefaultCacheDir: - """Tests for _default_cache_dir.""" - - def test_respects_xdg_cache_home(self, tmp_path): - """Test that XDG_CACHE_HOME is respected.""" - with patch.dict(os.environ, {"XDG_CACHE_HOME": str(tmp_path / "xdg")}): - result = _default_cache_dir() - assert result == tmp_path / "xdg" / "strands" / "skills" - - def test_falls_back_to_home_cache(self): - """Test that without XDG_CACHE_HOME, falls back to ~/.cache.""" - with patch.dict(os.environ, {}, clear=False): - # Remove XDG_CACHE_HOME if set - env = os.environ.copy() - env.pop("XDG_CACHE_HOME", None) - with patch.dict(os.environ, env, clear=True): - result = _default_cache_dir() - assert result == Path.home() / ".cache" / "strands" / "skills" - - -class TestCacheKey: - """Tests for cache_key.""" - - def test_deterministic(self): - key1 = cache_key("https://github.com/org/repo", None) - key2 = cache_key("https://github.com/org/repo", None) - assert key1 == key2 - - def test_different_url_different_key(self): - key1 = cache_key("https://github.com/org/repo-a", None) - key2 = cache_key("https://github.com/org/repo-b", None) - assert key1 != key2 - - def test_different_ref_different_key(self): - key1 = cache_key("https://github.com/org/repo", None) - key2 = cache_key("https://github.com/org/repo", "v1.0") - assert key1 != key2 - - def test_length(self): - key = cache_key("https://github.com/org/repo", "main") - assert len(key) == 16 - - def test_hex_characters_only(self): - key = cache_key("https://github.com/org/repo", "main") - assert all(c in "0123456789abcdef" for c in key) - - -def _fake_clone_factory(): - """Return a fake clone function that creates the target directory via atomic rename.""" - - def fake_clone(cmd, **kwargs): - target_dir = Path(cmd[-1]) - target_dir.mkdir(parents=True, exist_ok=True) - (target_dir / "SKILL.md").write_text("---\nname: test\ndescription: test\n---\n") - return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") - - return fake_clone +class TestResolveToRawUrl: + """Tests for resolve_to_raw_url.""" -class TestCloneSkillRepo: - """Tests for clone_skill_repo.""" - - def test_clone_success(self, tmp_path): - """Test successful clone by mocking subprocess.run.""" - cache = tmp_path / "cache" - - with patch("strands.vended_plugins.skills._url_loader.subprocess.run", side_effect=_fake_clone_factory()): - result = clone_skill_repo("https://github.com/org/skill", cache_dir=cache) - - assert result.exists() - - def test_clone_with_ref(self, tmp_path): - """Test that ref is passed as --branch to git clone.""" - cache = tmp_path / "cache" - captured_cmd = [] - - def fake_clone(cmd, **kwargs): - captured_cmd.extend(cmd) - target_dir = Path(cmd[-1]) - target_dir.mkdir(parents=True, exist_ok=True) - return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") - - with patch("strands.vended_plugins.skills._url_loader.subprocess.run", side_effect=fake_clone): - clone_skill_repo("https://github.com/org/skill", ref="v1.0", cache_dir=cache) + def test_raw_url_passthrough(self): + url = "https://raw.githubusercontent.com/org/repo/main/SKILL.md" + assert resolve_to_raw_url(url) == url - assert "--branch" in captured_cmd - assert "v1.0" in captured_cmd + def test_non_github_passthrough(self): + url = "https://example.com/skills/SKILL.md" + assert resolve_to_raw_url(url) == url - def test_clone_without_ref(self, tmp_path): - """Test that --branch is not passed when ref is None.""" - cache = tmp_path / "cache" - captured_cmd = [] + def test_repo_root(self): + assert resolve_to_raw_url("https://github.com/org/repo") == ( + "https://raw.githubusercontent.com/org/repo/HEAD/SKILL.md" + ) - def fake_clone(cmd, **kwargs): - captured_cmd.extend(cmd) - target_dir = Path(cmd[-1]) - target_dir.mkdir(parents=True, exist_ok=True) - return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") + def test_repo_root_trailing_slash(self): + assert resolve_to_raw_url("https://github.com/org/repo/") == ( + "https://raw.githubusercontent.com/org/repo/HEAD/SKILL.md" + ) - with patch("strands.vended_plugins.skills._url_loader.subprocess.run", side_effect=fake_clone): - clone_skill_repo("https://github.com/org/skill", cache_dir=cache) + def test_repo_root_with_ref(self): + assert resolve_to_raw_url("https://github.com/org/repo@v1.0.0") == ( + "https://raw.githubusercontent.com/org/repo/v1.0.0/SKILL.md" + ) - assert "--branch" not in captured_cmd + def test_repo_root_with_branch_ref(self): + assert resolve_to_raw_url("https://github.com/org/repo@main") == ( + "https://raw.githubusercontent.com/org/repo/main/SKILL.md" + ) - def test_uses_cache_on_second_call(self, tmp_path): - """Test that the cache is used on the second call.""" - cache = tmp_path / "cache" - call_count = 0 + def test_tree_url_directory(self): + assert resolve_to_raw_url("https://github.com/org/repo/tree/main/skills/my-skill") == ( + "https://raw.githubusercontent.com/org/repo/main/skills/my-skill/SKILL.md" + ) - def fake_clone(cmd, **kwargs): - nonlocal call_count - call_count += 1 - target_dir = Path(cmd[-1]) - target_dir.mkdir(parents=True, exist_ok=True) - return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") + def test_tree_url_branch_only(self): + assert resolve_to_raw_url("https://github.com/org/repo/tree/main") == ( + "https://raw.githubusercontent.com/org/repo/main/SKILL.md" + ) - with patch("strands.vended_plugins.skills._url_loader.subprocess.run", side_effect=fake_clone): - result1 = clone_skill_repo("https://github.com/org/skill", cache_dir=cache) - result2 = clone_skill_repo("https://github.com/org/skill", cache_dir=cache) + def test_tree_url_trailing_slash(self): + assert resolve_to_raw_url("https://github.com/org/repo/tree/main/skills/my-skill/") == ( + "https://raw.githubusercontent.com/org/repo/main/skills/my-skill/SKILL.md" + ) - assert call_count == 1 - assert result1 == result2 + def test_tree_url_with_tag(self): + assert resolve_to_raw_url("https://github.com/org/repo/tree/v2.0/skills/brainstorming") == ( + "https://raw.githubusercontent.com/org/repo/v2.0/skills/brainstorming/SKILL.md" + ) - def test_clone_failure_raises_runtime_error(self, tmp_path): - """Test that a failed clone raises RuntimeError.""" - cache = tmp_path / "cache" + def test_blob_url_to_skill_md(self): + assert resolve_to_raw_url("https://github.com/org/repo/blob/main/skills/my-skill/SKILL.md") == ( + "https://raw.githubusercontent.com/org/repo/main/skills/my-skill/SKILL.md" + ) - with patch( - "strands.vended_plugins.skills._url_loader.subprocess.run", - side_effect=subprocess.CalledProcessError(128, "git", stderr="fatal: repo not found"), - ): - with pytest.raises(RuntimeError, match="failed to clone"): - clone_skill_repo("https://github.com/org/nonexistent", cache_dir=cache) + def test_blob_url_to_lowercase_skill_md(self): + assert resolve_to_raw_url("https://github.com/org/repo/blob/main/skills/my-skill/skill.md") == ( + "https://raw.githubusercontent.com/org/repo/main/skills/my-skill/skill.md" + ) - def test_clone_failure_cleans_up_temp_dir(self, tmp_path): - """Test that a failed clone removes the temp directory.""" - cache = tmp_path / "cache" - cache.mkdir() - with patch( - "strands.vended_plugins.skills._url_loader.subprocess.run", - side_effect=subprocess.CalledProcessError(128, "git", stderr="fatal: error"), - ): - with pytest.raises(RuntimeError): - clone_skill_repo("https://github.com/org/broken", cache_dir=cache) +class TestFetchSkillContent: + """Tests for fetch_skill_content.""" + + _LOADER = "strands.vended_plugins.skills._url_loader" + + def test_fetch_success(self): + """Test successful content fetch.""" + skill_content = "---\nname: test-skill\ndescription: A test\n---\n# Instructions\n" + + mock_response = MagicMock() + mock_response.read.return_value = skill_content.encode("utf-8") + mock_response.__enter__ = MagicMock(return_value=mock_response) + mock_response.__exit__ = MagicMock(return_value=False) - # Only the cache dir itself should remain, no temp or target dirs - remaining = [p for p in cache.iterdir() if not p.name.startswith(".")] - assert len(remaining) == 0 + with patch(f"{self._LOADER}.urllib.request.urlopen", return_value=mock_response): + result = fetch_skill_content("https://raw.githubusercontent.com/org/repo/main/SKILL.md") - def test_git_not_found_raises_runtime_error(self, tmp_path): - """Test that missing git binary raises RuntimeError.""" - cache = tmp_path / "cache" + assert result == skill_content + + def test_fetch_resolves_github_url(self): + """Test that GitHub web URLs are resolved before fetching.""" + skill_content = "---\nname: test\ndescription: test\n---\n" + + mock_response = MagicMock() + mock_response.read.return_value = skill_content.encode("utf-8") + mock_response.__enter__ = MagicMock(return_value=mock_response) + mock_response.__exit__ = MagicMock(return_value=False) + + with patch(f"{self._LOADER}.urllib.request.urlopen", return_value=mock_response) as mock_urlopen: + fetch_skill_content("https://github.com/org/repo/tree/main/skills/my-skill") + + # Verify the resolved raw URL was used + call_args = mock_urlopen.call_args + request_obj = call_args[0][0] + assert "raw.githubusercontent.com" in request_obj.full_url + + def test_fetch_http_error(self): + """Test that HTTP errors raise RuntimeError.""" + with patch( + f"{self._LOADER}.urllib.request.urlopen", + side_effect=urllib.error.HTTPError( + url="https://example.com", code=404, msg="Not Found", hdrs=None, fp=None + ), + ): + with pytest.raises(RuntimeError, match="HTTP 404"): + fetch_skill_content("https://example.com/SKILL.md") + def test_fetch_url_error(self): + """Test that network errors raise RuntimeError.""" with patch( - "strands.vended_plugins.skills._url_loader.subprocess.run", - side_effect=FileNotFoundError(), + f"{self._LOADER}.urllib.request.urlopen", + side_effect=urllib.error.URLError("Connection refused"), ): - with pytest.raises(RuntimeError, match="git is required"): - clone_skill_repo("https://github.com/org/skill", cache_dir=cache) - - def test_creates_cache_dir_if_missing(self, tmp_path): - """Test that the cache directory is created if it doesn't exist.""" - cache = tmp_path / "deep" / "nested" / "cache" - - def fake_clone(cmd, **kwargs): - target_dir = Path(cmd[-1]) - target_dir.mkdir(parents=True, exist_ok=True) - return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") - - with patch("strands.vended_plugins.skills._url_loader.subprocess.run", side_effect=fake_clone): - clone_skill_repo("https://github.com/org/skill", cache_dir=cache) - - assert cache.exists() - - def test_subpath_returns_subdirectory(self, tmp_path): - """Test that subpath parameter returns the subdirectory within the clone.""" - cache = tmp_path / "cache" - - def fake_clone(cmd, **kwargs): - target_dir = Path(cmd[-1]) - target_dir.mkdir(parents=True, exist_ok=True) - skill_dir = target_dir / "skills" / "my-skill" - skill_dir.mkdir(parents=True) - (skill_dir / "SKILL.md").write_text("---\nname: my-skill\ndescription: test\n---\n") - return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") - - with patch("strands.vended_plugins.skills._url_loader.subprocess.run", side_effect=fake_clone): - result = clone_skill_repo("https://github.com/org/repo", subpath="skills/my-skill", cache_dir=cache) - - assert result.name == "my-skill" - assert (result / "SKILL.md").exists() - - def test_subpath_nonexistent_raises(self, tmp_path): - """Test that a nonexistent subpath raises RuntimeError.""" - cache = tmp_path / "cache" - - def fake_clone(cmd, **kwargs): - target_dir = Path(cmd[-1]) - target_dir.mkdir(parents=True, exist_ok=True) - return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") - - with patch("strands.vended_plugins.skills._url_loader.subprocess.run", side_effect=fake_clone): - with pytest.raises(RuntimeError, match="subdirectory does not exist"): - clone_skill_repo("https://github.com/org/repo", subpath="nonexistent/path", cache_dir=cache) - - def test_shallow_clone_depth_one(self, tmp_path): - """Test that --depth 1 is always passed.""" - cache = tmp_path / "cache" - captured_cmd = [] - - def fake_clone(cmd, **kwargs): - captured_cmd.extend(cmd) - target_dir = Path(cmd[-1]) - target_dir.mkdir(parents=True, exist_ok=True) - return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") - - with patch("strands.vended_plugins.skills._url_loader.subprocess.run", side_effect=fake_clone): - clone_skill_repo("https://github.com/org/skill", cache_dir=cache) - - assert "--depth" in captured_cmd - depth_idx = captured_cmd.index("--depth") - assert captured_cmd[depth_idx + 1] == "1" - - def test_force_refresh_reclones(self, tmp_path): - """Test that force_refresh=True deletes cache and re-clones.""" - cache = tmp_path / "cache" - call_count = 0 - - def fake_clone(cmd, **kwargs): - nonlocal call_count - call_count += 1 - target_dir = Path(cmd[-1]) - target_dir.mkdir(parents=True, exist_ok=True) - (target_dir / "marker.txt").write_text(f"clone-{call_count}") - return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") - - with patch("strands.vended_plugins.skills._url_loader.subprocess.run", side_effect=fake_clone): - result1 = clone_skill_repo("https://github.com/org/skill", cache_dir=cache) - assert (result1 / "marker.txt").read_text() == "clone-1" - - result2 = clone_skill_repo("https://github.com/org/skill", cache_dir=cache, force_refresh=True) - assert (result2 / "marker.txt").read_text() == "clone-2" - - assert call_count == 2 - assert result1 == result2 - - def test_force_refresh_noop_when_no_cache(self, tmp_path): - """Test that force_refresh=True works even when there's no existing cache.""" - cache = tmp_path / "cache" - - with patch("strands.vended_plugins.skills._url_loader.subprocess.run", side_effect=_fake_clone_factory()): - result = clone_skill_repo("https://github.com/org/skill", cache_dir=cache, force_refresh=True) - - assert result.exists() - - def test_race_condition_other_process_wins(self, tmp_path): - """Test that when another process clones first (rename fails), we use their clone.""" - cache = tmp_path / "cache" - key_hash = cache_key("https://github.com/org/skill", None) - target = cache / key_hash - - def fake_clone(cmd, **kwargs): - target_dir = Path(cmd[-1]) - target_dir.mkdir(parents=True, exist_ok=True) - # Simulate another process completing the clone first - target.mkdir(parents=True, exist_ok=True) - (target / "SKILL.md").write_text("---\nname: winner\ndescription: test\n---\n") - return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") - - with patch("strands.vended_plugins.skills._url_loader.subprocess.run", side_effect=fake_clone): - result = clone_skill_repo("https://github.com/org/skill", cache_dir=cache) - - assert result == target - assert result.exists() + with pytest.raises(RuntimeError, match="failed to fetch"): + fetch_skill_content("https://example.com/SKILL.md")