diff --git a/AGENTS.md b/AGENTS.md index a9a2a5044..60d4178a1 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -141,6 +141,7 @@ strands-agents/ │ │ │ ├── core/ # Base classes, actions, context │ │ │ └── handlers/ # Handler implementations (e.g., LLM) │ │ └── skills/ # AgentSkills.io integration (Skill, AgentSkills) +│ │ └── _url_loader.py # Remote URL loading, git clone, caching │ │ │ ├── experimental/ # Experimental features (API may change) │ │ ├── agent_config.py # Experimental agent config diff --git a/src/strands/vended_plugins/skills/_url_loader.py b/src/strands/vended_plugins/skills/_url_loader.py new file mode 100644 index 000000000..464ec4579 --- /dev/null +++ b/src/strands/vended_plugins/skills/_url_loader.py @@ -0,0 +1,140 @@ +"""Utilities for loading skills from HTTPS URLs. + +This module provides functions to detect URL-type skill sources, resolve +GitHub web URLs to raw content URLs, and fetch SKILL.md content over HTTPS. +No git dependency or local caching is required. +""" + +from __future__ import annotations + +import logging +import re +import urllib.error +import urllib.request + +logger = logging.getLogger(__name__) + +# Matches GitHub /tree//path and /blob//path URLs. +# e.g. /owner/repo/tree/main/skills/my-skill -> groups: (owner/repo, main, skills/my-skill) +_GITHUB_TREE_PATTERN = re.compile(r"^/([^/]+/[^/]+)/(?:tree|blob)/([^/]+)(?:/(.+?))?/?$") + + +def is_url(source: str) -> bool: + """Check whether a skill source string looks like an HTTPS URL. + + Only ``https://`` URLs are supported; plaintext ``http://`` is rejected + for security (MITM risk). + + Args: + source: The skill source string to check. + + Returns: + True if the source is an ``https://`` URL. + """ + return source.startswith("https://") + + +def resolve_to_raw_url(url: str) -> str: + """Resolve a GitHub web URL to a raw content URL for SKILL.md. + + Supports several GitHub URL patterns and converts them to + ``raw.githubusercontent.com`` URLs:: + + # Repository root (assumes HEAD and SKILL.md at root) + https://github.com/owner/repo + -> https://raw.githubusercontent.com/owner/repo/HEAD/SKILL.md + + # Repository root with @ref + https://github.com/owner/repo@v1.0 + -> https://raw.githubusercontent.com/owner/repo/v1.0/SKILL.md + + # Tree URL pointing to a directory + https://github.com/owner/repo/tree/main/skills/my-skill + -> https://raw.githubusercontent.com/owner/repo/main/skills/my-skill/SKILL.md + + # Blob URL pointing to SKILL.md directly + https://github.com/owner/repo/blob/main/skills/my-skill/SKILL.md + -> https://raw.githubusercontent.com/owner/repo/main/skills/my-skill/SKILL.md + + Non-GitHub URLs and ``raw.githubusercontent.com`` URLs are returned as-is. + + Args: + url: An HTTPS URL, possibly a GitHub web URL. + + Returns: + A URL that can be fetched directly to obtain SKILL.md content. + """ + # Already a raw URL — return as-is + if "raw.githubusercontent.com" in url: + return url + + # Not a github.com URL — return as-is (user provides a direct link) + if not url.startswith("https://github.com"): + return url + + # Parse the path portion + path_start = len("https://github.com") + path = url[path_start:] + + # Handle /tree//path and /blob//path + tree_match = _GITHUB_TREE_PATTERN.match(path) + if tree_match: + owner_repo = tree_match.group(1) + ref = tree_match.group(2) + subpath = tree_match.group(3) or "" + + # If the URL points to a SKILL.md file directly, use it as-is + if subpath.lower().endswith("skill.md"): + return f"https://raw.githubusercontent.com/{owner_repo}/{ref}/{subpath}" + + # Otherwise, assume it's a directory and append SKILL.md + if subpath: + return f"https://raw.githubusercontent.com/{owner_repo}/{ref}/{subpath}/SKILL.md" + return f"https://raw.githubusercontent.com/{owner_repo}/{ref}/SKILL.md" + + # Handle plain repo URL: /owner/repo or /owner/repo@ref + # Strip leading slash and any trailing slash + clean_path = path.strip("/") + + # Check for @ref suffix + if "@" in clean_path: + at_idx = clean_path.rfind("@") + owner_repo = clean_path[:at_idx] + ref = clean_path[at_idx + 1 :] + else: + owner_repo = clean_path + ref = "HEAD" + + # Only match owner/repo (exactly two path segments) + if owner_repo.count("/") == 1: + return f"https://raw.githubusercontent.com/{owner_repo}/{ref}/SKILL.md" + + # Unrecognised GitHub URL pattern — return as-is + return url + + +def fetch_skill_content(url: str) -> str: + """Fetch SKILL.md content from an HTTPS URL. + + Uses ``urllib.request`` (stdlib) so no additional dependencies are needed. + + Args: + url: The HTTPS URL to fetch. + + Returns: + The response body as a string. + + Raises: + RuntimeError: If the fetch fails (network error, 404, etc.). + """ + resolved = resolve_to_raw_url(url) + logger.info("url=<%s> | fetching skill content from %s", url, resolved) + + try: + req = urllib.request.Request(resolved, headers={"User-Agent": "strands-agents-sdk"}) # noqa: S310 + with urllib.request.urlopen(req, timeout=30) as response: # noqa: S310 + return response.read().decode("utf-8") + except urllib.error.HTTPError as e: + raise RuntimeError(f"url=<{resolved}> | HTTP {e.code}: {e.reason}") from e + except urllib.error.URLError as e: + raise RuntimeError(f"url=<{resolved}> | failed to fetch skill: {e.reason}") from e diff --git a/src/strands/vended_plugins/skills/agent_skills.py b/src/strands/vended_plugins/skills/agent_skills.py index 5e42b9230..ad604a20c 100644 --- a/src/strands/vended_plugins/skills/agent_skills.py +++ b/src/strands/vended_plugins/skills/agent_skills.py @@ -86,6 +86,8 @@ def __init__( - A ``str`` or ``Path`` to a skill directory (containing SKILL.md) - A ``str`` or ``Path`` to a parent directory (containing skill subdirectories) - A ``Skill`` dataclass instance + - An ``https://`` URL pointing to a SKILL.md file or a GitHub + repository/directory URL (auto-resolved to raw content) state_key: Key used to store plugin state in ``agent.state``. max_resource_files: Maximum number of resource files to list in skill responses. strict: If True, raise on skill validation issues. If False (default), warn and load anyway. @@ -284,7 +286,8 @@ def _resolve_skills(self, sources: list[SkillSource]) -> dict[str, Skill]: """Resolve a list of skill sources into Skill instances. Each source can be a Skill instance, a path to a skill directory, - or a path to a parent directory containing multiple skills. + a path to a parent directory containing multiple skills, or a remote + Git URL. Args: sources: List of skill sources to resolve. @@ -292,6 +295,8 @@ def _resolve_skills(self, sources: list[SkillSource]) -> dict[str, Skill]: Returns: Dict mapping skill names to Skill instances. """ + from ._url_loader import is_url + resolved: dict[str, Skill] = {} for source in sources: @@ -299,6 +304,14 @@ def _resolve_skills(self, sources: list[SkillSource]) -> dict[str, Skill]: if source.name in resolved: logger.warning("name=<%s> | duplicate skill name, overwriting previous skill", source.name) resolved[source.name] = source + elif isinstance(source, str) and is_url(source): + try: + skill = Skill.from_url(source, strict=self._strict) + if skill.name in resolved: + logger.warning("name=<%s> | duplicate skill name, overwriting previous skill", skill.name) + resolved[skill.name] = skill + except (RuntimeError, ValueError) as e: + logger.warning("url=<%s> | failed to load skill from URL: %s", source, e) else: path = Path(source).resolve() if not path.exists(): diff --git a/src/strands/vended_plugins/skills/skill.py b/src/strands/vended_plugins/skills/skill.py index 3e1b6bba5..70181f3be 100644 --- a/src/strands/vended_plugins/skills/skill.py +++ b/src/strands/vended_plugins/skills/skill.py @@ -333,6 +333,48 @@ def from_content(cls, content: str, *, strict: bool = False) -> Skill: return _build_skill_from_frontmatter(frontmatter, body) + @classmethod + def from_url(cls, url: str, *, strict: bool = False) -> Skill: + """Load a skill by fetching its SKILL.md content from an HTTPS URL. + + Fetches the SKILL.md content over HTTPS and parses it using + :meth:`from_content`. GitHub web URLs are automatically resolved + to ``raw.githubusercontent.com``:: + + # Direct raw URL + skill = Skill.from_url( + "https://raw.githubusercontent.com/org/repo/main/SKILL.md" + ) + + # GitHub web URL (auto-resolved) + skill = Skill.from_url( + "https://github.com/org/repo/tree/main/skills/my-skill" + ) + + # Repository root with @ref + skill = Skill.from_url("https://github.com/org/my-skill@v1.0.0") + + Args: + url: An ``https://`` URL pointing to a SKILL.md file, a GitHub + repository, or a GitHub ``/tree//path`` URL. + strict: If True, raise on any validation issue. If False (default), + warn and load anyway. + + Returns: + A Skill instance populated from the fetched SKILL.md content. + + Raises: + ValueError: If ``url`` is not an ``https://`` URL. + RuntimeError: If the SKILL.md content cannot be fetched. + """ + from ._url_loader import fetch_skill_content, is_url + + if not is_url(url): + raise ValueError(f"url=<{url}> | not a valid HTTPS URL") + + content = fetch_skill_content(url) + return cls.from_content(content, strict=strict) + @classmethod def from_directory(cls, skills_dir: str | Path, *, strict: bool = False) -> list[Skill]: """Load all skills from a parent directory containing skill subdirectories. diff --git a/tests/strands/vended_plugins/skills/test_agent_skills.py b/tests/strands/vended_plugins/skills/test_agent_skills.py index 52802a6c1..4e69a7f68 100644 --- a/tests/strands/vended_plugins/skills/test_agent_skills.py +++ b/tests/strands/vended_plugins/skills/test_agent_skills.py @@ -661,6 +661,58 @@ def test_resolve_nonexistent_path(self, tmp_path): assert len(plugin._skills) == 0 +class TestResolveUrlSkills: + """Tests for _resolve_skills with URL sources.""" + + _URL_LOADER = "strands.vended_plugins.skills._url_loader" + _SAMPLE_CONTENT = "---\nname: url-skill\ndescription: A URL skill\n---\n# Instructions\n" + + def test_resolve_url_source(self): + """Test resolving a URL string as a skill source.""" + from unittest.mock import patch + + with patch(f"{self._URL_LOADER}.fetch_skill_content", return_value=self._SAMPLE_CONTENT): + plugin = AgentSkills(skills=["https://github.com/org/url-skill"]) + + assert len(plugin.get_available_skills()) == 1 + assert plugin.get_available_skills()[0].name == "url-skill" + + def test_resolve_mixed_url_and_local(self, tmp_path): + """Test resolving a mix of URL and local filesystem sources.""" + from unittest.mock import patch + + _make_skill_dir(tmp_path, "local-skill") + + with patch(f"{self._URL_LOADER}.fetch_skill_content", return_value=self._SAMPLE_CONTENT): + plugin = AgentSkills( + skills=[ + "https://github.com/org/url-skill", + str(tmp_path / "local-skill"), + ] + ) + + assert len(plugin.get_available_skills()) == 2 + names = {s.name for s in plugin.get_available_skills()} + assert names == {"url-skill", "local-skill"} + + def test_resolve_url_failure_skips_gracefully(self, caplog): + """Test that a failed URL fetch is skipped with a warning.""" + import logging + from unittest.mock import patch + + with ( + patch( + f"{self._URL_LOADER}.fetch_skill_content", + side_effect=RuntimeError("HTTP 404: Not Found"), + ), + caplog.at_level(logging.WARNING), + ): + plugin = AgentSkills(skills=["https://github.com/org/broken"]) + + assert len(plugin.get_available_skills()) == 0 + assert "failed to load skill from URL" in caplog.text + + class TestImports: """Tests for module imports.""" diff --git a/tests/strands/vended_plugins/skills/test_skill.py b/tests/strands/vended_plugins/skills/test_skill.py index 53d6f3507..7b814a00e 100644 --- a/tests/strands/vended_plugins/skills/test_skill.py +++ b/tests/strands/vended_plugins/skills/test_skill.py @@ -551,11 +551,82 @@ def test_strict_mode(self): Skill.from_content(content, strict=True) +class TestSkillFromUrl: + """Tests for Skill.from_url.""" + + _URL_LOADER = "strands.vended_plugins.skills._url_loader" + + _SAMPLE_CONTENT = "---\nname: my-skill\ndescription: A remote skill\n---\nRemote instructions.\n" + + def test_from_url_returns_skill(self): + """Test loading a skill from a URL returns a single Skill.""" + from unittest.mock import patch + + with patch(f"{self._URL_LOADER}.fetch_skill_content", return_value=self._SAMPLE_CONTENT): + skill = Skill.from_url("https://raw.githubusercontent.com/org/repo/main/SKILL.md") + + assert isinstance(skill, Skill) + assert skill.name == "my-skill" + assert skill.description == "A remote skill" + assert "Remote instructions." in skill.instructions + assert skill.path is None + + def test_from_url_github_web_url(self): + """Test that GitHub web URLs are handled (resolved in fetch_skill_content).""" + from unittest.mock import patch + + with patch(f"{self._URL_LOADER}.fetch_skill_content", return_value=self._SAMPLE_CONTENT): + skill = Skill.from_url("https://github.com/org/repo/tree/main/skills/my-skill") + + assert skill.name == "my-skill" + + def test_from_url_with_ref(self): + """Test URL with @ref suffix.""" + from unittest.mock import patch + + with patch(f"{self._URL_LOADER}.fetch_skill_content", return_value=self._SAMPLE_CONTENT): + skill = Skill.from_url("https://github.com/org/my-skill@v1.0.0") + + assert skill.name == "my-skill" + + def test_from_url_invalid_url_raises(self): + """Test that a non-HTTPS URL raises ValueError.""" + with pytest.raises(ValueError, match="not a valid HTTPS URL"): + Skill.from_url("./local-path") + + def test_from_url_http_rejected(self): + """Test that http:// URLs are rejected.""" + with pytest.raises(ValueError, match="not a valid HTTPS URL"): + Skill.from_url("http://example.com/SKILL.md") + + def test_from_url_fetch_failure_raises(self): + """Test that a fetch failure propagates as RuntimeError.""" + from unittest.mock import patch + + with patch( + f"{self._URL_LOADER}.fetch_skill_content", + side_effect=RuntimeError("HTTP 404: Not Found"), + ): + with pytest.raises(RuntimeError, match="HTTP 404"): + Skill.from_url("https://example.com/nonexistent/SKILL.md") + + def test_from_url_strict_mode(self): + """Test that strict mode is forwarded to from_content.""" + from unittest.mock import patch + + bad_content = "---\nname: BAD_NAME\ndescription: Bad\n---\nBody." + + with patch(f"{self._URL_LOADER}.fetch_skill_content", return_value=bad_content): + with pytest.raises(ValueError): + Skill.from_url("https://example.com/SKILL.md", strict=True) + + class TestSkillClassmethods: """Tests for Skill classmethod existence.""" def test_skill_classmethods_exist(self): - """Test that Skill has from_file, from_content, and from_directory classmethods.""" + """Test that Skill has from_file, from_content, from_directory, and from_url classmethods.""" assert callable(getattr(Skill, "from_file", None)) assert callable(getattr(Skill, "from_content", None)) assert callable(getattr(Skill, "from_directory", None)) + assert callable(getattr(Skill, "from_url", None)) diff --git a/tests/strands/vended_plugins/skills/test_url_loader.py b/tests/strands/vended_plugins/skills/test_url_loader.py new file mode 100644 index 000000000..a33c0f6b7 --- /dev/null +++ b/tests/strands/vended_plugins/skills/test_url_loader.py @@ -0,0 +1,167 @@ +"""Tests for the _url_loader module.""" + +from __future__ import annotations + +import urllib.error +from unittest.mock import MagicMock, patch + +import pytest + +from strands.vended_plugins.skills._url_loader import ( + fetch_skill_content, + is_url, + resolve_to_raw_url, +) + + +class TestIsUrl: + """Tests for is_url.""" + + def test_https_url(self): + assert is_url("https://github.com/org/skill-repo") is True + + def test_https_raw_url(self): + assert is_url("https://raw.githubusercontent.com/org/repo/main/SKILL.md") is True + + def test_http_rejected(self): + """Plaintext http:// is rejected for security.""" + assert is_url("http://github.com/org/skill-repo") is False + + def test_ssh_rejected(self): + """SSH URLs are not supported in HTTPS-only mode.""" + assert is_url("ssh://git@github.com/org/skill-repo") is False + + def test_git_at_rejected(self): + """git@ URLs are not supported in HTTPS-only mode.""" + assert is_url("git@github.com:org/skill-repo.git") is False + + def test_local_relative_path(self): + assert is_url("./skills/my-skill") is False + + def test_local_absolute_path(self): + assert is_url("/home/user/skills/my-skill") is False + + def test_plain_directory_name(self): + assert is_url("my-skill") is False + + def test_empty_string(self): + assert is_url("") is False + + +class TestResolveToRawUrl: + """Tests for resolve_to_raw_url.""" + + def test_raw_url_passthrough(self): + url = "https://raw.githubusercontent.com/org/repo/main/SKILL.md" + assert resolve_to_raw_url(url) == url + + def test_non_github_passthrough(self): + url = "https://example.com/skills/SKILL.md" + assert resolve_to_raw_url(url) == url + + def test_repo_root(self): + assert resolve_to_raw_url("https://github.com/org/repo") == ( + "https://raw.githubusercontent.com/org/repo/HEAD/SKILL.md" + ) + + def test_repo_root_trailing_slash(self): + assert resolve_to_raw_url("https://github.com/org/repo/") == ( + "https://raw.githubusercontent.com/org/repo/HEAD/SKILL.md" + ) + + def test_repo_root_with_ref(self): + assert resolve_to_raw_url("https://github.com/org/repo@v1.0.0") == ( + "https://raw.githubusercontent.com/org/repo/v1.0.0/SKILL.md" + ) + + def test_repo_root_with_branch_ref(self): + assert resolve_to_raw_url("https://github.com/org/repo@main") == ( + "https://raw.githubusercontent.com/org/repo/main/SKILL.md" + ) + + def test_tree_url_directory(self): + assert resolve_to_raw_url("https://github.com/org/repo/tree/main/skills/my-skill") == ( + "https://raw.githubusercontent.com/org/repo/main/skills/my-skill/SKILL.md" + ) + + def test_tree_url_branch_only(self): + assert resolve_to_raw_url("https://github.com/org/repo/tree/main") == ( + "https://raw.githubusercontent.com/org/repo/main/SKILL.md" + ) + + def test_tree_url_trailing_slash(self): + assert resolve_to_raw_url("https://github.com/org/repo/tree/main/skills/my-skill/") == ( + "https://raw.githubusercontent.com/org/repo/main/skills/my-skill/SKILL.md" + ) + + def test_tree_url_with_tag(self): + assert resolve_to_raw_url("https://github.com/org/repo/tree/v2.0/skills/brainstorming") == ( + "https://raw.githubusercontent.com/org/repo/v2.0/skills/brainstorming/SKILL.md" + ) + + def test_blob_url_to_skill_md(self): + assert resolve_to_raw_url("https://github.com/org/repo/blob/main/skills/my-skill/SKILL.md") == ( + "https://raw.githubusercontent.com/org/repo/main/skills/my-skill/SKILL.md" + ) + + def test_blob_url_to_lowercase_skill_md(self): + assert resolve_to_raw_url("https://github.com/org/repo/blob/main/skills/my-skill/skill.md") == ( + "https://raw.githubusercontent.com/org/repo/main/skills/my-skill/skill.md" + ) + + +class TestFetchSkillContent: + """Tests for fetch_skill_content.""" + + _LOADER = "strands.vended_plugins.skills._url_loader" + + def test_fetch_success(self): + """Test successful content fetch.""" + skill_content = "---\nname: test-skill\ndescription: A test\n---\n# Instructions\n" + + mock_response = MagicMock() + mock_response.read.return_value = skill_content.encode("utf-8") + mock_response.__enter__ = MagicMock(return_value=mock_response) + mock_response.__exit__ = MagicMock(return_value=False) + + with patch(f"{self._LOADER}.urllib.request.urlopen", return_value=mock_response): + result = fetch_skill_content("https://raw.githubusercontent.com/org/repo/main/SKILL.md") + + assert result == skill_content + + def test_fetch_resolves_github_url(self): + """Test that GitHub web URLs are resolved before fetching.""" + skill_content = "---\nname: test\ndescription: test\n---\n" + + mock_response = MagicMock() + mock_response.read.return_value = skill_content.encode("utf-8") + mock_response.__enter__ = MagicMock(return_value=mock_response) + mock_response.__exit__ = MagicMock(return_value=False) + + with patch(f"{self._LOADER}.urllib.request.urlopen", return_value=mock_response) as mock_urlopen: + fetch_skill_content("https://github.com/org/repo/tree/main/skills/my-skill") + + # Verify the resolved raw URL was used + call_args = mock_urlopen.call_args + request_obj = call_args[0][0] + assert "raw.githubusercontent.com" in request_obj.full_url + + def test_fetch_http_error(self): + """Test that HTTP errors raise RuntimeError.""" + with patch( + f"{self._LOADER}.urllib.request.urlopen", + side_effect=urllib.error.HTTPError( + url="https://example.com", code=404, msg="Not Found", hdrs=None, fp=None + ), + ): + with pytest.raises(RuntimeError, match="HTTP 404"): + fetch_skill_content("https://example.com/SKILL.md") + + def test_fetch_url_error(self): + """Test that network errors raise RuntimeError.""" + with patch( + f"{self._LOADER}.urllib.request.urlopen", + side_effect=urllib.error.URLError("Connection refused"), + ): + with pytest.raises(RuntimeError, match="failed to fetch"): + fetch_skill_content("https://example.com/SKILL.md")