diff --git a/.github/bandit-baseline.json b/.github/bandit-baseline.json new file mode 100644 index 0000000000..14ee416174 --- /dev/null +++ b/.github/bandit-baseline.json @@ -0,0 +1,51 @@ +{ + "results": [ + { + "code": "168 return opener.open(req, timeout=timeout)\n169 return urllib.request.urlopen(req, timeout=timeout) # noqa: S310\n", + "col_offset": 11, + "end_col_offset": 55, + "filename": "src/specify_cli/authentication/http.py", + "issue_confidence": "HIGH", + "issue_cwe": { + "id": 22, + "link": "https://cwe.mitre.org/data/definitions/22.html" + }, + "issue_severity": "MEDIUM", + "issue_text": "Audit url open for permitted schemes. Allowing use of file:/ or custom schemes is often unexpected.", + "line_number": 169, + "line_range": [ + 169 + ], + "more_info": "https://bandit.readthedocs.io/en/1.9.4/blacklists/blacklist_calls.html#b310-urllib-urlopen", + "test_id": "B310", + "test_name": "blacklist" + }, + { + "code": "34 run_cmd,\n35 shell=True,\n36 capture_output=True,\n37 text=True,\n38 cwd=cwd,\n39 timeout=300,\n40 )\n41 output = {\n42 \"exit_code\": proc.returncode,\n43 \"stdout\": proc.stdout,\n", + "col_offset": 19, + "end_col_offset": 13, + "filename": "src/specify_cli/workflows/steps/shell/__init__.py", + "issue_confidence": "HIGH", + "issue_cwe": { + "id": 78, + "link": "https://cwe.mitre.org/data/definitions/78.html" + }, + "issue_severity": "HIGH", + "issue_text": "subprocess call with shell=True identified, security issue.", + "line_number": 35, + "line_range": [ + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40 + ], + "more_info": "https://bandit.readthedocs.io/en/1.9.4/plugins/b602_subprocess_popen_with_shell_equals_true.html", + "test_id": "B602", + "test_name": "subprocess_popen_with_shell_equals_true" + } + ] +} diff --git a/.github/scripts/check_bandit_baseline.py b/.github/scripts/check_bandit_baseline.py new file mode 100644 index 0000000000..badedc0307 --- /dev/null +++ b/.github/scripts/check_bandit_baseline.py @@ -0,0 +1,213 @@ +"""Fail if new entries appear in the Bandit baseline without acknowledgement. + +The bandit baseline whitelists known findings so they don't fail CI. If a +contributor adds a new entry, silent whitelisting becomes invisible in +review. This script compares the set of result *identities* in the +baseline at the PR head against the baseline at its base; if any new +identity appears, the PR must carry the label ``security-baseline-change`` +to confirm the addition is intentional. + +We compare identities (filename + line + test_id + issue_severity + +issue_confidence + hash-of-code-snippet) rather than raw counts so a PR +cannot remove one existing entry and add a different new one to keep the +count constant — which would silently whitelist a new finding. + +When the baseline file does not exist at the base ref, this is the PR +that introduces it; we treat all entries as the starting baseline and +do not require the label. + +For the head side we read the working tree directly (the CI runner is +checked out at the PR head, so the working-tree file IS the head state). +Reading via ``git show :`` would fail-open on unfetched refs +or detached checkouts — for a security gate we want fail-closed. + +Required environment variables: +- ``BANDIT_BASELINE_BASE``: git ref of the PR base +- ``BANDIT_BASELINE_LABELS``: comma-separated PR labels + +Outside of PR events, all inputs may be empty and the script no-ops. +""" + +from __future__ import annotations + +import hashlib +import json +import os +import re +import subprocess +import sys +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parents[2] +BASELINE_PATH = ".github/bandit-baseline.json" +ACK_LABEL = "security-baseline-change" + + +def _git_ok(*args: str) -> bool: + """True if the git command exits 0 (output discarded).""" + return ( + subprocess.run( + ["git", *args], + cwd=REPO_ROOT, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ).returncode + == 0 + ) + + +def _read_baseline_at(ref: str) -> tuple[dict, bool]: + """Return (baseline_json, file_existed_at_ref). + + Used for the base side. The head side reads the working tree to avoid + silently fail-opening on an unfetched/invalid head ref. + + Only a missing *path* at a resolvable ref counts as "did not exist"; + an unresolvable ref or a failing ``git show`` aborts instead, so a + transient git failure cannot silently disable the gate. + """ + if not ref: + return {"results": []}, False + if not _git_ok("rev-parse", "--verify", "--quiet", f"{ref}^{{commit}}"): + raise SystemExit( + f"Base ref {ref!r} cannot be resolved (unfetched or invalid). " + f"Refusing to fail-open on a security gate." + ) + if not _git_ok("cat-file", "-e", f"{ref}:{BASELINE_PATH}"): + return {"results": []}, False + try: + blob = subprocess.run( + ["git", "show", f"{ref}:{BASELINE_PATH}"], + check=True, + cwd=REPO_ROOT, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ).stdout + except subprocess.CalledProcessError as exc: + raise SystemExit( + f"Could not read baseline at {ref!r}: {exc.stderr.strip()}. " + f"Refusing to fail-open on a security gate." + ) + try: + return json.loads(blob), True + except json.JSONDecodeError: + print("Could not parse baseline at base ref; treating as empty.", file=sys.stderr) + return {"results": []}, True + + +def _read_baseline_from_worktree() -> tuple[dict, bool]: + """Return (baseline_json, file_exists_on_disk). + + The CI runner is checked out at the PR head, so the working-tree + file IS the head state. Reading it directly sidesteps spurious + ``git show`` failures that would otherwise let an unreadable head + silently pass the gate. + + Asymmetric with the base reader: a corrupt JSON on disk is the + proposed PR state — we fail-closed there rather than treating + it as an empty baseline (which would silently drop the gate). + """ + path = REPO_ROOT / BASELINE_PATH + if not path.exists(): + return {"results": []}, False + try: + return json.loads(path.read_text(encoding="utf-8")), True + except json.JSONDecodeError as exc: + raise SystemExit( + f"Working-tree baseline at {BASELINE_PATH} is corrupt: {exc}. " + f"Refusing to fail-open on a security gate." + ) + + +_WHITESPACE_RE = re.compile(r"\s+") + + +def _identity(result: dict) -> str: + """Stable identity for a baseline entry. + + Combines location, test, severity, confidence, and a hash of the + pinned code snippet (whitespace-normalized) so reformatting changes + or upstream bandit-output tweaks don't register as new findings, + but a different finding at the same line does. + """ + code = result.get("code", "") or "" + normalized = _WHITESPACE_RE.sub(" ", code).strip() + code_hash = hashlib.sha256(normalized.encode("utf-8")).hexdigest()[:16] + return "|".join( + [ + str(result.get("filename", "")), + str(result.get("line_number", "")), + str(result.get("test_id", "")), + str(result.get("issue_severity", "")), + str(result.get("issue_confidence", "")), + code_hash, + ] + ) + + +def main() -> int: + base_ref = os.environ.get("BANDIT_BASELINE_BASE", "").strip() + + if not base_ref or set(base_ref) <= {"0"}: + print("No PR base ref; baseline diff check skipped.") + return 0 + + base_baseline, base_existed = _read_baseline_at(base_ref) + head_baseline, head_existed = _read_baseline_from_worktree() + + if not base_existed: + print( + "Baseline file not present at base ref; treating this PR as the " + "introduction of the baseline. No acknowledgement required." + ) + return 0 + + if not head_existed: + # Fail-closed: the file existed at base but is missing in the + # working tree. Either the PR deleted it (suspicious — the gate + # would no longer protect anything) or the workspace is incomplete. + print( + f"Baseline file {BASELINE_PATH} existed at the base ref but is " + f"missing in the working tree. Refusing to fail-open on a " + f"security gate.", + file=sys.stderr, + ) + return 1 + + base_ids = {_identity(r) for r in base_baseline.get("results", [])} + head_ids = {_identity(r) for r in head_baseline.get("results", [])} + + new_ids = head_ids - base_ids + if not new_ids: + print( + f"Bandit baseline entries: {len(base_ids)} -> {len(head_ids)} " + f"(no new identities)." + ) + return 0 + + labels = { + label.strip() + for label in os.environ.get("BANDIT_BASELINE_LABELS", "").split(",") + if label.strip() + } + if ACK_LABEL in labels: + print( + f"Bandit baseline gained {len(new_ids)} new identities; " + f"acknowledged via label '{ACK_LABEL}'." + ) + return 0 + + print( + f"Bandit baseline gained {len(new_ids)} new identities. " + f"Add label '{ACK_LABEL}' to the PR to acknowledge that the new " + f"whitelist entries are intentional.", + file=sys.stderr, + ) + for identity in sorted(new_ids): + print(f" + {identity}", file=sys.stderr) + return 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/.github/scripts/check_secrets_baseline.py b/.github/scripts/check_secrets_baseline.py new file mode 100644 index 0000000000..d16c0cfd47 --- /dev/null +++ b/.github/scripts/check_secrets_baseline.py @@ -0,0 +1,215 @@ +"""Fail if new entries appear in the detect-secrets baseline without ack. + +Mirrors ``check_bandit_baseline.py``: when ``.secrets.baseline`` grows on +a PR, the maintainer adding the new whitelist entry must label the PR +``secrets-baseline-change`` so reviewers see the expansion. + +Identity is ``filename + line + type + hashed_secret`` — detect-secrets +already hashes the candidate, so identities are stable across runs and a +swap (remove one, add another with the same count) is still caught. + +When the baseline file does not exist at the base ref, the PR is the one +that introduces it; no acknowledgement is required. + +For the head side we read the working tree directly (the CI runner is +checked out at the PR head); this avoids fail-opening when +``git show :`` happens to fail. + +Required environment variables: +- ``SECRETS_BASELINE_BASE``: git ref of the PR base +- ``SECRETS_BASELINE_LABELS``: comma-separated PR labels + +Outside of PR events, all inputs may be empty and the script no-ops. +""" + +from __future__ import annotations + +import json +import os +import subprocess +import sys +from dataclasses import dataclass, field +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parents[2] +BASELINE_PATH = ".secrets.baseline" +ACK_LABEL = "secrets-baseline-change" + + +@dataclass(frozen=True, order=True) +class SecretIdentity: + """Comparison identity for one detect-secrets baseline entry.""" + + filename: str + line_number: str + secret_type: str + hashed_secret: str = field(repr=False) + + def log_safe(self) -> str: + return "|".join( + [ + self.filename, + self.line_number, + self.secret_type, + "hashed_secret=", + ] + ) + + +def _git_ok(*args: str) -> bool: + """True if the git command exits 0 (output discarded).""" + return ( + subprocess.run( + ["git", *args], + cwd=REPO_ROOT, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ).returncode + == 0 + ) + + +def _read_baseline_at(ref: str) -> tuple[dict, bool]: + """Return (baseline_json, file_existed_at_ref). Base side only. + + Only a missing *path* at a resolvable ref counts as "did not exist"; + an unresolvable ref or a failing ``git show`` aborts instead, so a + transient git failure cannot silently disable the gate. + """ + if not ref: + return {"results": {}}, False + if not _git_ok("rev-parse", "--verify", "--quiet", f"{ref}^{{commit}}"): + raise SystemExit( + f"Base ref {ref!r} cannot be resolved (unfetched or invalid). " + f"Refusing to fail-open on a security gate." + ) + if not _git_ok("cat-file", "-e", f"{ref}:{BASELINE_PATH}"): + return {"results": {}}, False + try: + blob = subprocess.run( + ["git", "show", f"{ref}:{BASELINE_PATH}"], + check=True, + cwd=REPO_ROOT, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ).stdout + except subprocess.CalledProcessError as exc: + raise SystemExit( + f"Could not read baseline at {ref!r}: {exc.stderr.strip()}. " + f"Refusing to fail-open on a security gate." + ) + try: + return json.loads(blob), True + except json.JSONDecodeError: + print("Could not parse baseline at base ref; treating as empty.", file=sys.stderr) + return {"results": {}}, True + + +def _read_baseline_from_worktree() -> tuple[dict, bool]: + """Return (baseline_json, file_exists_on_disk). Head side. + + Reading the working tree (rather than ``git show :``) makes the + head side fail-closed: a missing file blocks the gate, and a corrupt + file raises SystemExit rather than being treated as empty (which + would silently neutralize the gate). + """ + path = REPO_ROOT / BASELINE_PATH + if not path.exists(): + return {"results": {}}, False + try: + return json.loads(path.read_text(encoding="utf-8")), True + except json.JSONDecodeError as exc: + raise SystemExit( + f"Working-tree baseline at {BASELINE_PATH} is corrupt: {exc}. " + f"Refusing to fail-open on a security gate." + ) + + +def _identities(baseline: dict) -> set[SecretIdentity]: + """Flatten detect-secrets results to a set of stable identities.""" + ids: set[SecretIdentity] = set() + results = baseline.get("results", {}) + if not isinstance(results, dict): + return ids + for filename, entries in results.items(): + if not isinstance(entries, list): + continue + for entry in entries: + if not isinstance(entry, dict): + continue + ids.add( + SecretIdentity( + filename=str(filename), + line_number=str(entry.get("line_number", "")), + secret_type=str(entry.get("type", "")), + hashed_secret=str(entry.get("hashed_secret", "")), + ) + ) + return ids + + +def main() -> int: + base_ref = os.environ.get("SECRETS_BASELINE_BASE", "").strip() + + if not base_ref or set(base_ref) <= {"0"}: + print("No PR base ref; secrets baseline diff check skipped.") + return 0 + + base_baseline, base_existed = _read_baseline_at(base_ref) + head_baseline, head_existed = _read_baseline_from_worktree() + + if not base_existed: + print( + "Baseline file not present at base ref; treating this PR as the " + "introduction of the baseline. No acknowledgement required." + ) + return 0 + + if not head_existed: + print( + f"Baseline file {BASELINE_PATH} existed at the base ref but is " + f"missing in the working tree. Refusing to fail-open on a " + f"security gate.", + file=sys.stderr, + ) + return 1 + + base_ids = _identities(base_baseline) + head_ids = _identities(head_baseline) + + new_ids = head_ids - base_ids + if not new_ids: + print( + f"Secrets baseline entries: {len(base_ids)} -> {len(head_ids)} " + f"(no new identities)." + ) + return 0 + + labels = { + label.strip() + for label in os.environ.get("SECRETS_BASELINE_LABELS", "").split(",") + if label.strip() + } + if ACK_LABEL in labels: + print( + f"Secrets baseline gained {len(new_ids)} new identities; " + f"acknowledged via label '{ACK_LABEL}'." + ) + return 0 + + print( + f"Secrets baseline gained {len(new_ids)} new identities. " + f"Audit the new entries — if they are genuine false positives " + f"(SHA pins, docs examples, test fixtures), add label " + f"'{ACK_LABEL}' to the PR to acknowledge them. If any are real " + f"secrets, remove them from history before merging.", + file=sys.stderr, + ) + for identity in sorted(new_ids): + print(f" + {identity.log_safe()}", file=sys.stderr) + return 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/.github/scripts/check_security_requirements.py b/.github/scripts/check_security_requirements.py new file mode 100644 index 0000000000..38040d7bd9 --- /dev/null +++ b/.github/scripts/check_security_requirements.py @@ -0,0 +1,107 @@ +"""Check that committed security audit requirements are up to date.""" + +from __future__ import annotations + +import os +import subprocess +import sys +from pathlib import Path + + +REPO_ROOT = Path(__file__).resolve().parents[2] +COMMITTED_REQUIREMENTS = REPO_ROOT / ".github" / "security-audit-requirements.txt" +DEPENDENCY_INPUTS = ("pyproject.toml", ".github/security-audit-requirements.txt") + + +def _dependency_diff_refs() -> tuple[str, str]: + base_ref = os.environ.get("DEPENDENCY_DIFF_BASE", "").strip() + head_ref = os.environ.get("DEPENDENCY_DIFF_HEAD", "").strip() or "HEAD" + if base_ref and not set(base_ref) <= {"0"}: + return base_ref, head_ref + # Fallback when no usable base is supplied (push with an all-zero + # ``github.event.before``, manual dispatch, etc.). ``HEAD^`` fails on a + # shallow checkout or a single-commit repo; that ``git diff`` error is + # caught by the caller and deliberately treated as "inputs changed" so the + # audit runs anyway — failing safe (audit) rather than skipping silently. + return "HEAD^", "HEAD" + + +def _dependency_inputs_changed() -> bool: + base_ref, head_ref = _dependency_diff_refs() + try: + result = subprocess.run( + [ + "git", + "diff", + "--name-only", + base_ref, + head_ref, + "--", + *DEPENDENCY_INPUTS, + ], + check=True, + cwd=REPO_ROOT, + stderr=subprocess.PIPE, + stdout=subprocess.PIPE, + text=True, + ) + except subprocess.CalledProcessError as exc: + print( + "Could not determine changed dependency inputs; checking requirements.", + file=sys.stderr, + ) + if exc.stderr: + print(exc.stderr.strip(), file=sys.stderr) + return True + + changed_inputs = [line for line in result.stdout.splitlines() if line] + if not changed_inputs: + print("Dependency audit inputs unchanged; sync check skipped.") + return False + + print(f"Dependency audit inputs changed: {', '.join(changed_inputs)}") + return True + + +def main() -> int: + if not _dependency_inputs_changed(): + return 0 + + generated_requirements = Path(os.environ["GENERATED_REQUIREMENTS"]) + generated_requirements.parent.mkdir(parents=True, exist_ok=True) + + subprocess.run( + [ + "uv", + "pip", + "compile", + "pyproject.toml", + "--extra", + "test", + "--universal", + "--upgrade", + "--generate-hashes", + "--quiet", + "--no-header", + "--output-file", + str(generated_requirements), + ], + check=True, + cwd=REPO_ROOT, + ) + + committed = COMMITTED_REQUIREMENTS.read_text(encoding="utf-8") + generated = generated_requirements.read_text(encoding="utf-8") + if committed == generated: + return 0 + + print( + "Regenerate .github/security-audit-requirements.txt with the documented " + "uv pip compile command.", + file=sys.stderr, + ) + return 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/.github/security-audit-requirements.txt b/.github/security-audit-requirements.txt new file mode 100644 index 0000000000..f897c10d3a --- /dev/null +++ b/.github/security-audit-requirements.txt @@ -0,0 +1,268 @@ +annotated-doc==0.0.4 \ + --hash=sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320 \ + --hash=sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4 + # via typer +click==8.4.1 \ + --hash=sha256:482be17c6991b8c19c5429a1e995d9b0efdbb63172824c41f99965dc0ade8ec2 \ + --hash=sha256:918b5633eddf6b41c32d4f454bf0de810065c74e3f7dbf8ee5452f8be88d3e96 + # via specify-cli (pyproject.toml) +colorama==0.4.6 ; sys_platform == 'win32' \ + --hash=sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44 \ + --hash=sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6 + # via + # click + # pytest + # typer +coverage==7.14.1 \ + --hash=sha256:0177614a0370f227888b4e436a7c55686d6a9f90eb1ade2b624ba685a1686e86 \ + --hash=sha256:01b7733daad0237daa01ef80fe2dfceffc911e6a17fa7b55d14aa8214eaaaecd \ + --hash=sha256:03a6f93c1ec3b7f2e77b5dbcc5573a2c21f12529a5c6bbe0f16f72303cc2fa4d \ + --hash=sha256:042c46ded7c288aeb07cf14a28b6c1e10b78fcba40171c3fa1e939377eeef0b5 \ + --hash=sha256:06144cd511cf2624873a035c5069cf297144f6e77a73ee3d7a55b605ec5efb42 \ + --hash=sha256:07c6290b1697b862c0478eab545eec949a0d0e4d6d03497f446d706da3b4f2de \ + --hash=sha256:10274a1fbeb8ec5d72966e17bb198a3104257aca4ac09d98667c5f8aca8c8548 \ + --hash=sha256:1101a5ebb083aecb625ebb6209d4105b58f647b093cb2dc8122d7b33f743cfe1 \ + --hash=sha256:114c95ef29302423b87d159075805f4ab973254a2638a5d7d046c94887cc87d7 \ + --hash=sha256:1238cb94638e610e972c60dac68e813f868dc7d6e982535270558443058d9d59 \ + --hash=sha256:12c42ec1e14f553c4f817e989365982e646e27211f10a0f717855b94a79c8906 \ + --hash=sha256:145986fe66647eb489f18d9a997567a3fd358584c4b5a808769113abc07466af \ + --hash=sha256:17a5a241e5997621a956a7f402a7433ef4221e5152809b785bec79e2323799f1 \ + --hash=sha256:1896f5e19ff3f0431c7ce2172adc54890fd97f86b59ced8ca1649145d9ffe35d \ + --hash=sha256:196a13319ad88d6d8ef5ab489ec4f44ddde2143c0c7d5b27786f6c3ffd56a7e1 \ + --hash=sha256:221c70f316241a78e77e607c227cefc8808d4e08f28d99c04f35694690e940be \ + --hash=sha256:2222be86d0b54f5dd5a38f45f17f315f737245e857bf0bdedc70734f84a13c02 \ + --hash=sha256:2224f89ffd0c5605ccce1ed7a584da162bc7c55f601ab1c946bc9de31a486b42 \ + --hash=sha256:23bf7fa51ac02e07fc7c96849b82946da47ae862dc8f86d183b2a4864fc38129 \ + --hash=sha256:2d69af5dea2de76fc485a83032a630523f985198b7e25be901ec60181587b01e \ + --hash=sha256:30c08f7d90415aa98b3c990385dea2939b0da55f38515e5b369b83655f8523be \ + --hash=sha256:357d4e32935c36588aaba057d734fa32428c360c9fc2e4442afbf1b646beee6e \ + --hash=sha256:35ab22d91de736e8966b980dc355cbcdd2c6dbbcfe275f9a2991bc8a91b3df65 \ + --hash=sha256:370c5afae3fa0658e11694a32b24c2778f6bc2d17718121f94ee185e69f26b54 \ + --hash=sha256:3758dd0a7f1fa57365ef2e781df0f0731d38b6e3772259d13dae4bd8a958d4b1 \ + --hash=sha256:39b21e212c55af06fa375e3dbf90a8a8e38792f3a910c580066d23563830ddd5 \ + --hash=sha256:3a56abc20a472baf0304c455721bc601477440d28ecfde8a03dde79ede07e0df \ + --hash=sha256:3c18ebc343e15be53049b3a2dce38fe82d58f37e20ab9094b3a39c0aa4f6bb47 \ + --hash=sha256:3d452fd08b5c72c5167c93e6867b5c08500bd40f2a21e1e854a500550b6cc36f \ + --hash=sha256:3e3680291c4a1d0dadfa84a2c459576a4af5133abb617905714339a0c73138cf \ + --hash=sha256:442cc9c952b2df400cda54bb04ab87330cf2cd08a8692cbbea36773531eb6f37 \ + --hash=sha256:46f714d2fb8ae2f4f29f23ada7f1e79b759fff5a70f94a1dac23af204c3ec9e4 \ + --hash=sha256:478b5bcd63c2e1357c5c7e16c070690df7b07f676b1c114d7b93e533c664309f \ + --hash=sha256:48b283b1dd6372e8de2a7a9a4c4d5dc06f4d4fd209b876f3c88a7a205a0c8f84 \ + --hash=sha256:4a28fd227808366b196a75476dced2eb35b351d6766ba9c858dc93319e87f4f1 \ + --hash=sha256:4ea1c034f95c9b056e856b794630b17f9fa3d57e4800ff1e503d3be0f9c9078c \ + --hash=sha256:51bd64741cc6fa065abd300ede1afe5a5291ece9c31da8b24884deda48bcc3f8 \ + --hash=sha256:54acdb6674a4661768d7bf7db32dfb9f46ab1d764f8aba6df75ce1a6a088724e \ + --hash=sha256:59baf88468dbc8d63b1887afd92bda52e40bb1561696e5819670601403810cec \ + --hash=sha256:5a1c5215be81035e629d5bc756650634d0bf31991038db7a0eccb90f025ce16d \ + --hash=sha256:5b0c99ba93a07d56f6df340bb79be53202a082b2fdb81bfe6190b741a3470d54 \ + --hash=sha256:5ea0c297e27133853b4d8a3eb799bff5a2dbd9f2f41537a240d337ac9b4df890 \ + --hash=sha256:5f0cfc27c539f07cf5c0a4cfe211d0b6cae039f8f40526dbaa71944e64b50a7b \ + --hash=sha256:6223a72fd0e4c7156353ec0f08a5f93623e1d3034d0e2683b9bb8ea674131b1d \ + --hash=sha256:62a9f70b52e0b5a95cfef4a5c5641b06983cadc5e538a3feeb5c00211f523ac2 \ + --hash=sha256:62fd185ef9df3c33d1c8178c5af105f762afbad96038de9a4ae100aa6297ca33 \ + --hash=sha256:6a3cb83d1552c0cd1b4906655b6a33fd4a8473229633a901c6b73bf86914dee9 \ + --hash=sha256:6adc5a36984624a70bf11d7184e20fa0a49aa7c47ffab43804106a1a695ea22e \ + --hash=sha256:6b6b0853b895fe0e98cbfc580d1ec3393d9302b4b1e96a77b3f5c91fdab899e6 \ + --hash=sha256:6ff665fb023a77386fe11685190cee1f60a7d635994a30d9b0a061533d470fce \ + --hash=sha256:7279d2110a28cebc738b6459ecda2771735a4c18465fbbd36b3288fe5ed92247 \ + --hash=sha256:76a085d7005236a767e3426148b2c407e53ad61695c562f8a81da2d373324901 \ + --hash=sha256:7771b601718fdde84832c3a434ca9bbf4ae9adbc49d84198b4110700c3c77c36 \ + --hash=sha256:79058c47dae6788504b5effb319961bcd72d7240551464b91d474bc0ed186d69 \ + --hash=sha256:7af486dabe8954d03b087f0021540897afe084f04e16ff5579e08cc46f871416 \ + --hash=sha256:7f02d09f70776579b926d889a4c9c235070a1f47c40458aeaca563fae5acfdb5 \ + --hash=sha256:8011224a62280e50dab346960c03cf47aca1a1e09e608c0fb33fd6e0cc8e9500 \ + --hash=sha256:8270544c361ed405a27a060dbc9ed2c124b084d96dfdc2d9a2510482aef981ad \ + --hash=sha256:84ac9499e48700399a5dd0ea7085b5091961fec52c68d66b4ec0d3cf7f4441b1 \ + --hash=sha256:84b535f00655ecafe1d929d1fb00ed5d6fa3051ea643ab2c161a3887b86f294b \ + --hash=sha256:851b9e1e4e8a4608e77c79714b2e77c0970d2ed7202a05e92ae407817481887b \ + --hash=sha256:85e85586565842f6932abebd4c18bcb1074223dc0b3576e7d173ca710622813a \ + --hash=sha256:87ebdf787d4888e3f3f2d523eadc6e18c6d18c6d0eb173801a189641627fb37e \ + --hash=sha256:8a3ce026d73290f42f08dafecbd82c193a74df280461fbf97300fec51fd133ee \ + --hash=sha256:9132cd363a68a4c3daa7c8704a654b1e39d3360f6f5b8ddd470608a945236c07 \ + --hash=sha256:99cd41ff91afd94896fea3bc002706b6ae4ce95727d06e4a0f39c0a8d8bd8b1a \ + --hash=sha256:9eeb3fcbc13ba40dfbdb22d01d196a28e9cef9ed4c29b60061a1e0e823a9929d \ + --hash=sha256:a06c76364a9360e33d6d23769aefdf7f66f38e2ffb60ceb1baaa4989d83b695c \ + --hash=sha256:a07891c3f4805442b31b71e84ba3cf29ed1aa9a428284e06deeb4b23e5b46343 \ + --hash=sha256:a24a81f9715ee42ef59a316cc11611c98fe23920f7c81861315c9f3ff4a230f4 \ + --hash=sha256:a252f21c27e38347e60111a3266b03827422a7d5525951aceee313aa68bab1d2 \ + --hash=sha256:a311d8e1da24be5c1ccf85cbfb06315dbaa1703d5a1eab3f6432c72b837917c8 \ + --hash=sha256:a5274669f37f2343635a347b91a60777621341ab3378e9c6ac9335eee704bddf \ + --hash=sha256:aa5e304a873fabddc11e484e9b6b738bd38bd7bed17b09aa84eecf5332e8b8bb \ + --hash=sha256:ab4af6352741a604c431c6072fce5bee33bf0f20dc7a56618d6bf6bb89e9810c \ + --hash=sha256:b553d04b5e778a8e56d57eb134aff42a92718ecba45e79c4764ecfa40efd92ff \ + --hash=sha256:b84800013769a78ccb9ef4659402e26d06867e337b61ec365f77ad008adea80e \ + --hash=sha256:b84ffdf877644e7096aa936991efeed873f7f3df57b9cd001312b7668ab08550 \ + --hash=sha256:bcaa50684dcaadfa599ac48f81103c756d791cfd85c97203d2217c593d48b860 \ + --hash=sha256:be9f2c802dcfce3f71298303aa5dad0dce440a76c52f2f60dacd8656dab78793 \ + --hash=sha256:c643734307300234fafa36bf2a040a7235f8f177ea1fd6ec1423aea6fb7b929f \ + --hash=sha256:c79cead5b5bc584d9c71451cb984d0e3a84e0c0937379c8efcbf27c8d661b851 \ + --hash=sha256:c7e057326434e441306226fbeb5d1aaf14a2637efe97ba668306635835f32ad7 \ + --hash=sha256:c912c259304cfb5ee584481cfb7ce1ff932b4d61e6c9140b8f19cb7b5ed82332 \ + --hash=sha256:ce66d8e46da2bb5ee313a745cbd2e391d319176c1f7a9451bfcd3a2fb920859b \ + --hash=sha256:ced2f09ef276fd58611a1ef502164ad266d2b75174e5a40cabbdb4033f9f6cf2 \ + --hash=sha256:cfe5a5fec635799ef33428f1e5e61bafa45a92a96190ba731561ba558ccc214d \ + --hash=sha256:d13e6725992e2d2fd7d81d4f5241952d13740121dfd501da09201be39b2c003a \ + --hash=sha256:d34d75f892b3ab73ba11cab5442cce7b3e168fd64162b16f0e1e0d09c508edef \ + --hash=sha256:d5b89cdfb2ee051b71e8c3c70bd81a9eff81100f736a269136fe1a68efe00474 \ + --hash=sha256:d5ed429d0b8edaac649e889b4ffcedb6c80b06629a3f93050e3dddfb99235bee \ + --hash=sha256:da028256b04ec30e5e0114b6f76172938c313991f0a2d3d894271315cf5d5e43 \ + --hash=sha256:dcbf65f1f66a26cdd88c35cf68fb4729c5d1cd2e88added72420541dfb212034 \ + --hash=sha256:dd34767fa19848d35659ffc0a75314f58c7af3f1cd87ec521e8292a1238398a3 \ + --hash=sha256:ddf799247318f34dbcd2efa8c95a8d0642674e926bb1774cf9b63dfd2a389d1c \ + --hash=sha256:de286598cc65d2b489411174b1faec2f5a7775fb3201fd925db2a76b4030f37d \ + --hash=sha256:e471bc5769ff073b058cfadb0d736b56ce067c8560eabeb0da88462df98c23e7 \ + --hash=sha256:e854312c4103f2ad4c0dc023b69b77ebfd2c89db5f86c4c94dc2353f9a92167e \ + --hash=sha256:ea8cd6ca0ee9f616aaef3afc6882e32c2cbf18b00d96313ffd76af650574034d \ + --hash=sha256:f2302660e32562a532b442480121aef8aa61a5bdb20b30bf0adab29f10a5a4b4 \ + --hash=sha256:f497a1ea81d4cd7c10ddcaa685135b9aabd291af3d55775a9ddf3cb7a364cdd9 \ + --hash=sha256:f4ddbe407477f04c45115d1a4e5bc480f753553b534d338d4c3358b1cdd0ea52 \ + --hash=sha256:f747dc8edcfe740130f28f32f3995e955494285717e86ee25af51db2219df08a \ + --hash=sha256:fad54e871165f6ec2f536063ac74c3104508a12963e64072ba44bd822de52b0c \ + --hash=sha256:fc459e5d73be2d6332fcfe8dbf3d8994671fe33c700f4565988ecfa511547253 \ + --hash=sha256:fd86572566fb40189a8260446158235159bc7a82dfbc87a3b39cf4fb57fcec1c + # via pytest-cov +iniconfig==2.3.0 \ + --hash=sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730 \ + --hash=sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12 + # via pytest +json5==0.14.0 \ + --hash=sha256:56cf861bab076b1178eb8c92e1311d273a9b9acea2ccc82c276abf839ebaef3a \ + --hash=sha256:b3f492fad9f6cdbced8b7d40b28b9b1c9701c5f561bef0d33b81c2ff433fefcb + # via specify-cli (pyproject.toml) +markdown-it-py==4.2.0 \ + --hash=sha256:04a21681d6fbb623de53f6f364d352309d4094dd4194040a10fd51833e418d49 \ + --hash=sha256:9f7ebbcd14fe59494226453aed97c1070d83f8d24b6fc3a3bcf9a38092641c4a + # via rich +mdurl==0.1.2 \ + --hash=sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8 \ + --hash=sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba + # via markdown-it-py +packaging==26.2 \ + --hash=sha256:5fc45236b9446107ff2415ce77c807cee2862cb6fac22b8a73826d0693b0980e \ + --hash=sha256:ff452ff5a3e828ce110190feff1178bb1f2ea2281fa2075aadb987c2fb221661 + # via + # specify-cli (pyproject.toml) + # pytest +pathspec==1.1.1 \ + --hash=sha256:17db5ecd524104a120e173814c90367a96a98d07c45b2e10c2f3919fff91bf5a \ + --hash=sha256:a00ce642f577bf7f473932318056212bc4f8bfdf53128c78bbd5af0b9b20b189 + # via specify-cli (pyproject.toml) +platformdirs==4.10.0 \ + --hash=sha256:31e761a6a0ca04faf7353ea759bdba55652be214725111e5aac52dfa29d4bef7 \ + --hash=sha256:fb516cdb12eb0d857d0cd85a7c57cea4d060bee4578d6cf5a14dfdf8cbf8784a + # via specify-cli (pyproject.toml) +pluggy==1.6.0 \ + --hash=sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3 \ + --hash=sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746 + # via + # pytest + # pytest-cov +pygments==2.20.0 \ + --hash=sha256:6757cd03768053ff99f3039c1a36d6c0aa0b263438fcab17520b30a303a82b5f \ + --hash=sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176 + # via + # pytest + # rich +pytest==9.0.3 \ + --hash=sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9 \ + --hash=sha256:b86ada508af81d19edeb213c681b1d48246c1a91d304c6c81a427674c17eb91c + # via + # specify-cli (pyproject.toml) + # pytest-cov +pytest-cov==7.1.0 \ + --hash=sha256:30674f2b5f6351aa09702a9c8c364f6a01c27aae0c1366ae8016160d1efc56b2 \ + --hash=sha256:a0461110b7865f9a271aa1b51e516c9a95de9d696734a2f71e3e78f46e1d4678 + # via specify-cli (pyproject.toml) +pyyaml==6.0.3 \ + --hash=sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c \ + --hash=sha256:0150219816b6a1fa26fb4699fb7daa9caf09eb1999f3b70fb6e786805e80375a \ + --hash=sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3 \ + --hash=sha256:02ea2dfa234451bbb8772601d7b8e426c2bfa197136796224e50e35a78777956 \ + --hash=sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6 \ + --hash=sha256:10892704fc220243f5305762e276552a0395f7beb4dbf9b14ec8fd43b57f126c \ + --hash=sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65 \ + --hash=sha256:1d37d57ad971609cf3c53ba6a7e365e40660e3be0e5175fa9f2365a379d6095a \ + --hash=sha256:1ebe39cb5fc479422b83de611d14e2c0d3bb2a18bbcb01f229ab3cfbd8fee7a0 \ + --hash=sha256:214ed4befebe12df36bcc8bc2b64b396ca31be9304b8f59e25c11cf94a4c033b \ + --hash=sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1 \ + --hash=sha256:22ba7cfcad58ef3ecddc7ed1db3409af68d023b7f940da23c6c2a1890976eda6 \ + --hash=sha256:27c0abcb4a5dac13684a37f76e701e054692a9b2d3064b70f5e4eb54810553d7 \ + --hash=sha256:28c8d926f98f432f88adc23edf2e6d4921ac26fb084b028c733d01868d19007e \ + --hash=sha256:2e71d11abed7344e42a8849600193d15b6def118602c4c176f748e4583246007 \ + --hash=sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310 \ + --hash=sha256:37503bfbfc9d2c40b344d06b2199cf0e96e97957ab1c1b546fd4f87e53e5d3e4 \ + --hash=sha256:3c5677e12444c15717b902a5798264fa7909e41153cdf9ef7ad571b704a63dd9 \ + --hash=sha256:3ff07ec89bae51176c0549bc4c63aa6202991da2d9a6129d7aef7f1407d3f295 \ + --hash=sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea \ + --hash=sha256:418cf3f2111bc80e0933b2cd8cd04f286338bb88bdc7bc8e6dd775ebde60b5e0 \ + --hash=sha256:44edc647873928551a01e7a563d7452ccdebee747728c1080d881d68af7b997e \ + --hash=sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac \ + --hash=sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9 \ + --hash=sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7 \ + --hash=sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35 \ + --hash=sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb \ + --hash=sha256:5cf4e27da7e3fbed4d6c3d8e797387aaad68102272f8f9752883bc32d61cb87b \ + --hash=sha256:5e0b74767e5f8c593e8c9b5912019159ed0533c70051e9cce3e8b6aa699fcd69 \ + --hash=sha256:5ed875a24292240029e4483f9d4a4b8a1ae08843b9c54f43fcc11e404532a8a5 \ + --hash=sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b \ + --hash=sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c \ + --hash=sha256:6344df0d5755a2c9a276d4473ae6b90647e216ab4757f8426893b5dd2ac3f369 \ + --hash=sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd \ + --hash=sha256:652cb6edd41e718550aad172851962662ff2681490a8a711af6a4d288dd96824 \ + --hash=sha256:66291b10affd76d76f54fad28e22e51719ef9ba22b29e1d7d03d6777a9174198 \ + --hash=sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065 \ + --hash=sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c \ + --hash=sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c \ + --hash=sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764 \ + --hash=sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196 \ + --hash=sha256:8098f252adfa6c80ab48096053f512f2321f0b998f98150cea9bd23d83e1467b \ + --hash=sha256:850774a7879607d3a6f50d36d04f00ee69e7fc816450e5f7e58d7f17f1ae5c00 \ + --hash=sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac \ + --hash=sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8 \ + --hash=sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e \ + --hash=sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28 \ + --hash=sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3 \ + --hash=sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5 \ + --hash=sha256:9c57bb8c96f6d1808c030b1687b9b5fb476abaa47f0db9c0101f5e9f394e97f4 \ + --hash=sha256:9c7708761fccb9397fe64bbc0395abcae8c4bf7b0eac081e12b809bf47700d0b \ + --hash=sha256:9f3bfb4965eb874431221a3ff3fdcddc7e74e3b07799e0e84ca4a0f867d449bf \ + --hash=sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5 \ + --hash=sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702 \ + --hash=sha256:b30236e45cf30d2b8e7b3e85881719e98507abed1011bf463a8fa23e9c3e98a8 \ + --hash=sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788 \ + --hash=sha256:b865addae83924361678b652338317d1bd7e79b1f4596f96b96c77a5a34b34da \ + --hash=sha256:b8bb0864c5a28024fac8a632c443c87c5aa6f215c0b126c449ae1a150412f31d \ + --hash=sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc \ + --hash=sha256:bdb2c67c6c1390b63c6ff89f210c8fd09d9a1217a465701eac7316313c915e4c \ + --hash=sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba \ + --hash=sha256:c2514fceb77bc5e7a2f7adfaa1feb2fb311607c9cb518dbc378688ec73d8292f \ + --hash=sha256:c3355370a2c156cffb25e876646f149d5d68f5e0a3ce86a5084dd0b64a994917 \ + --hash=sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5 \ + --hash=sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26 \ + --hash=sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f \ + --hash=sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b \ + --hash=sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be \ + --hash=sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c \ + --hash=sha256:efd7b85f94a6f21e4932043973a7ba2613b059c4a000551892ac9f1d11f5baf3 \ + --hash=sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6 \ + --hash=sha256:fa160448684b4e94d80416c0fa4aac48967a969efe22931448d853ada8baf926 \ + --hash=sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0 + # via specify-cli (pyproject.toml) +readchar==4.2.2 \ + --hash=sha256:92daf7e42c52b0787e6c75d01ecfb9a94f4ceff3764958b570c1dddedd47b200 \ + --hash=sha256:e3b270fe16fc90c50ac79107700330a133dd4c63d22939f5b03b4f24564d5dd8 + # via specify-cli (pyproject.toml) +rich==15.0.0 \ + --hash=sha256:33bd4ef74232fb73fe9279a257718407f169c09b78a87ad3d296f548e27de0bb \ + --hash=sha256:edd07a4824c6b40189fb7ac9bc4c52536e9780fbbfbddf6f1e2502c31b068c36 + # via + # specify-cli (pyproject.toml) + # typer +shellingham==1.5.4 \ + --hash=sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686 \ + --hash=sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de + # via typer +typer==0.26.7 \ + --hash=sha256:5c87cfbc5d34491c5346ebf49c23e18d56ccb863268d3a8d592b26087c2f5e58 \ + --hash=sha256:e314a34c617e419c091b2830dda3ea1f257134ff593061a8f5b9717ab8dddb3a + # via specify-cli (pyproject.toml) diff --git a/.github/workflows/catalog-assign.yml b/.github/workflows/catalog-assign.yml index 78b4f552f3..f828794864 100644 --- a/.github/workflows/catalog-assign.yml +++ b/.github/workflows/catalog-assign.yml @@ -19,7 +19,7 @@ jobs: permissions: issues: write steps: - - uses: actions/github-script@v9 + - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 with: script: | const issue = context.payload.issue; diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index a4b1bf7d5a..5ba2989cc0 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -42,3 +42,15 @@ jobs: globs: | '**/*.md' !extensions/**/*.md + + shellcheck: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + + # shellcheck is preinstalled on ubuntu-latest runners. + # Start at --severity=error to block real bugs without flagging style + # (notably SC2155). Tighten in a follow-up after cleanup. + - name: Run shellcheck on scripts/bash + run: shellcheck --severity=error scripts/bash/*.sh diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml new file mode 100644 index 0000000000..abdebf77ac --- /dev/null +++ b/.github/workflows/security.yml @@ -0,0 +1,191 @@ +name: Security Audit + +permissions: + contents: read + +on: + push: + branches: ["main"] + pull_request: + # labeled/unlabeled so the baseline-growth gates re-evaluate when the + # acknowledgement label is added or removed, without requiring a push. + types: [opened, synchronize, reopened, labeled, unlabeled] + schedule: + - cron: "17 4 * * 1" + workflow_dispatch: + +jobs: + dependency-audit: + name: Dependency audit (${{ matrix.os }}, Python ${{ matrix.python-version }}) + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest] + python-version: ["3.11", "3.12", "3.13"] + steps: + - name: Checkout + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + fetch-depth: 2 + + - name: Install uv + uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # v8.2.0 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6 + with: + python-version: ${{ matrix.python-version }} + + # The committed .github/security-audit-requirements.txt is generated with + # --universal (resolves across all interpreters/platforms) and is what + # push/PR runs audit. The scheduled job instead compiles per matrix + # entry with --python-version so it can surface advisories in wheels that + # only resolve on a specific interpreter (e.g. 3.11-only) — coverage the + # universal file may not exercise. This broadening is intentional; PR runs + # trade that depth for determinism against the committed snapshot. + - name: Compile scheduled audit requirements + if: ${{ github.event_name == 'schedule' }} + run: | + uv pip compile pyproject.toml --extra test --python-version "${{ matrix.python-version }}" --upgrade --generate-hashes --quiet --output-file "${{ runner.temp }}/spec-kit-audit-requirements.txt" + + - name: Run pip-audit (scheduled live resolution) + if: ${{ github.event_name == 'schedule' }} + run: uvx --from pip-audit==2.10.0 pip-audit --disable-pip --require-hashes -r "${{ runner.temp }}/spec-kit-audit-requirements.txt" --progress-spinner off + + - name: Check committed audit requirements are current + if: ${{ github.event_name != 'schedule' }} + env: + DEPENDENCY_DIFF_BASE: ${{ github.event.pull_request.base.sha || github.event.before || '' }} + DEPENDENCY_DIFF_HEAD: ${{ github.sha }} + GENERATED_REQUIREMENTS: ${{ runner.temp }}/security-audit-requirements.txt + run: python .github/scripts/check_security_requirements.py + + - name: Run pip-audit (committed requirements) + if: ${{ github.event_name != 'schedule' }} + run: uvx --from pip-audit==2.10.0 pip-audit --disable-pip --require-hashes -r .github/security-audit-requirements.txt --progress-spinner off + + static-analysis: + name: Static analysis + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + # Need the PR base to compare baseline growth. + fetch-depth: 0 + + - name: Install uv + uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # v8.2.0 + + - name: Set up Python + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6 + with: + python-version: "3.13" + + # Blocking: HIGH severity only, with baseline. Real regressions fail CI. + - name: Run Bandit + run: uvx --from bandit==1.9.4 bandit -r src -lll --baseline .github/bandit-baseline.json + + # Informative: MEDIUM severity, using the SAME baseline so accepted + # findings do not re-fire here. Surfaces new MEDIUM-or-above findings + # in the job summary without breaking CI. + - name: Run Bandit medium-severity informational pass + id: bandit-medium + continue-on-error: true + run: uvx --from bandit==1.9.4 bandit -r src -ll --baseline .github/bandit-baseline.json + + # Surface the medium-severity outcome in the job summary so reviewers + # see it without expanding the log; continue-on-error swallows the + # non-zero exit otherwise. We branch on three outcomes: + # - failure → new findings (⚠️) + # - success → clean (✅) + # - skipped → the blocking HIGH bandit step failed, so the medium + # pass never ran; don't claim "clean" in that case (⏭️). + - name: Surface medium-severity findings in job summary + if: always() + run: | + case "${{ steps.bandit-medium.outcome }}" in + failure) + { + echo "## ⚠️ Bandit medium-severity informational pass" + echo "" + echo "New MEDIUM-or-above findings detected (baseline-filtered). These" + echo "do not fail CI but should be audited. Resolution paths, in order" + echo "of preference:" + echo " 1. Fix the underlying issue." + echo " 2. If the finding is a documented intentional pattern, append" + echo " it to \`.github/bandit-baseline.json\` and add the" + echo " \`security-baseline-change\` label to acknowledge the growth." + echo " 3. For ruff S6xx false positives only, use \`# noqa: S6xx\`" + echo " with an inline justification." + echo "" + echo "Do NOT use \`# nosec\` — it is forbidden in \`src/\` by the" + echo "\`test_bandit_nosec_is_not_suppressed_in_source\` regression test." + echo "" + echo "See the **Run Bandit medium-severity informational pass** step" + echo "above for the file/line list." + } >> "$GITHUB_STEP_SUMMARY" + ;; + success) + echo "## ✅ Bandit medium-severity informational pass — clean" >> "$GITHUB_STEP_SUMMARY" + ;; + *) + echo "## ⏭️ Bandit medium-severity informational pass — skipped (the blocking HIGH pass failed; fix it first)" >> "$GITHUB_STEP_SUMMARY" + ;; + esac + + # Prevent silent whitelisting: if the baseline grew, the PR must carry + # the 'security-baseline-change' label to acknowledge it. + - name: Check Bandit baseline growth + if: ${{ github.event_name == 'pull_request' }} + env: + # Base side via `git show` (needs full fetch-depth above). + # Head side reads the working tree — fail-closed. + BANDIT_BASELINE_BASE: ${{ github.event.pull_request.base.sha }} + BANDIT_BASELINE_LABELS: ${{ join(github.event.pull_request.labels.*.name, ',') }} + run: python .github/scripts/check_bandit_baseline.py + + secret-scan: + name: Secret scan + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + # Needed by check_secrets_baseline.py to read the baseline at base ref. + fetch-depth: 0 + + - name: Install uv + uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # v8.2.0 + + - name: Set up Python + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6 + with: + python-version: "3.13" + + # detect-secrets is a Python tool (consistent with bandit / pip-audit + # install pattern) and detects entropy-based and provider-specific + # secrets. detect-secrets-hook compares tracked files against the + # baseline and exits non-zero when a new candidate appears, without + # rewriting the baseline file (so there's no spurious git diff). + - name: Run detect-secrets + run: | + set -euo pipefail + git ls-files -z \ + -- ':!:.secrets.baseline' \ + ':!:uv.lock' \ + ':!:.github/security-audit-requirements.txt' \ + | xargs -0 uvx --from detect-secrets==1.5.0 detect-secrets-hook \ + --baseline .secrets.baseline + + # Symmetric with the bandit baseline gate: if .secrets.baseline grew, + # the PR must carry the 'secrets-baseline-change' label so reviewers + # see the whitelist expansion explicitly. + - name: Check secrets baseline growth + if: ${{ github.event_name == 'pull_request' }} + env: + # Head side reads the working tree (see check_secrets_baseline.py). + SECRETS_BASELINE_BASE: ${{ github.event.pull_request.base.sha }} + SECRETS_BASELINE_LABELS: ${{ join(github.event.pull_request.labels.*.name, ',') }} + run: python .github/scripts/check_secrets_baseline.py diff --git a/.secrets.baseline b/.secrets.baseline new file mode 100644 index 0000000000..1e8f282645 --- /dev/null +++ b/.secrets.baseline @@ -0,0 +1,242 @@ +{ + "version": "1.5.0", + "plugins_used": [ + { + "name": "ArtifactoryDetector" + }, + { + "name": "AWSKeyDetector" + }, + { + "name": "AzureStorageKeyDetector" + }, + { + "name": "Base64HighEntropyString", + "limit": 4.5 + }, + { + "name": "BasicAuthDetector" + }, + { + "name": "CloudantDetector" + }, + { + "name": "DiscordBotTokenDetector" + }, + { + "name": "GitHubTokenDetector" + }, + { + "name": "GitLabTokenDetector" + }, + { + "name": "HexHighEntropyString", + "limit": 3.0 + }, + { + "name": "IbmCloudIamDetector" + }, + { + "name": "IbmCosHmacDetector" + }, + { + "name": "IPPublicDetector" + }, + { + "name": "JwtTokenDetector" + }, + { + "name": "KeywordDetector", + "keyword_exclude": "" + }, + { + "name": "MailchimpDetector" + }, + { + "name": "NpmDetector" + }, + { + "name": "OpenAIDetector" + }, + { + "name": "PrivateKeyDetector" + }, + { + "name": "PypiTokenDetector" + }, + { + "name": "SendGridDetector" + }, + { + "name": "SlackDetector" + }, + { + "name": "SoftlayerDetector" + }, + { + "name": "SquareOAuthDetector" + }, + { + "name": "StripeDetector" + }, + { + "name": "TelegramBotTokenDetector" + }, + { + "name": "TwilioKeyDetector" + } + ], + "filters_used": [ + { + "path": "detect_secrets.filters.allowlist.is_line_allowlisted" + }, + { + "path": "detect_secrets.filters.common.is_baseline_file", + "filename": ".secrets.baseline" + }, + { + "path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies", + "min_level": 2 + }, + { + "path": "detect_secrets.filters.heuristic.is_indirect_reference" + }, + { + "path": "detect_secrets.filters.heuristic.is_likely_id_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_lock_file" + }, + { + "path": "detect_secrets.filters.heuristic.is_not_alphanumeric_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_potential_uuid" + }, + { + "path": "detect_secrets.filters.heuristic.is_prefixed_with_dollar_sign" + }, + { + "path": "detect_secrets.filters.heuristic.is_sequential_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_swagger_file" + }, + { + "path": "detect_secrets.filters.heuristic.is_templated_secret" + }, + { + "path": "detect_secrets.filters.regex.should_exclude_file", + "pattern": [ + "(\\.secrets\\.baseline$|uv\\.lock$|\\.github/security-audit-requirements\\.txt$)" + ] + } + ], + "results": { + ".devcontainer/post-create.sh": [ + { + "type": "Hex High Entropy String", + "filename": ".devcontainer/post-create.sh", + "hashed_secret": "7a549d52003f28825cf4d8a7351585120349c1c5", + "is_verified": false, + "line_number": 65 + } + ], + ".github/aw/actions-lock.json": [ + { + "type": "Hex High Entropy String", + "filename": ".github/aw/actions-lock.json", + "hashed_secret": "d85a5096b95f13fd8eadff7813c465b4f0b6b074", + "is_verified": false, + "line_number": 6 + }, + { + "type": "Hex High Entropy String", + "filename": ".github/aw/actions-lock.json", + "hashed_secret": "ff853346af779bcfc8f28515063f78a852a97dbf", + "is_verified": false, + "line_number": 11 + } + ], + ".github/workflows/add-community-extension.lock.yml": [ + { + "type": "Secret Keyword", + "filename": ".github/workflows/add-community-extension.lock.yml", + "hashed_secret": "a4fbc895dedf49406d65138d4733c6d6b1b09d0d", + "is_verified": false, + "line_number": 903 + } + ], + ".github/workflows/add-community-preset.lock.yml": [ + { + "type": "Secret Keyword", + "filename": ".github/workflows/add-community-preset.lock.yml", + "hashed_secret": "a4fbc895dedf49406d65138d4733c6d6b1b09d0d", + "is_verified": false, + "line_number": 903 + } + ], + ".github/workflows/security.yml": [ + { + "type": "Secret Keyword", + "filename": ".github/workflows/security.yml", + "hashed_secret": "4202a5e0d1da60251e0163e869ae02016bb68767", + "is_verified": false, + "line_number": 163 + } + ], + "docs/reference/authentication.md": [ + { + "type": "Secret Keyword", + "filename": "docs/reference/authentication.md", + "hashed_secret": "d92490a1457d8b0712a85fe018b3e9fd781816a7", + "is_verified": false, + "line_number": 113 + } + ], + "extensions/template/EXAMPLE-README.md": [ + { + "type": "Secret Keyword", + "filename": "extensions/template/EXAMPLE-README.md", + "hashed_secret": "11fa7c37d697f30e6aee828b4426a10f83ab2380", + "is_verified": false, + "line_number": 52 + }, + { + "type": "Secret Keyword", + "filename": "extensions/template/EXAMPLE-README.md", + "hashed_secret": "71fdbe9f60b1157a53c18b7ec93d4041d828aaad", + "is_verified": false, + "line_number": 106 + } + ], + "tests/test_agent_config_consistency.py": [ + { + "type": "Hex High Entropy String", + "filename": "tests/test_agent_config_consistency.py", + "hashed_secret": "7a549d52003f28825cf4d8a7351585120349c1c5", + "is_verified": false, + "line_number": 56 + } + ], + "tests/test_authentication.py": [ + { + "type": "Secret Keyword", + "filename": "tests/test_authentication.py", + "hashed_secret": "3c3b274d119ff5a5ec6c1e215c1cb794d9973ac1", + "is_verified": false, + "line_number": 131 + } + ], + "tests/test_extensions.py": [ + { + "type": "Secret Keyword", + "filename": "tests/test_extensions.py", + "hashed_secret": "7a9b93cfa651fbc2c93d88edea4d4fcfe33c0a0b", + "is_verified": false, + "line_number": 3676 + } + ] + }, + "generated_at": "2026-05-28T20:24:09Z" +} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 12b095f5fc..2a88e034f7 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -95,6 +95,54 @@ uv run python -m pytest tests/test_agent_config_consistency.py -q Run this when you change agent metadata, context update scripts, or integration wiring. +#### Security checks + +```bash +uvx --from pip-audit==2.10.0 pip-audit --disable-pip --require-hashes -r .github/security-audit-requirements.txt --progress-spinner off +uvx --from bandit==1.9.4 bandit -r src -lll --baseline .github/bandit-baseline.json +``` + +Run these before changing dependency metadata, workflow execution code, subprocess usage, or security-sensitive paths. Pull request, push, and manual CI audits use the committed hashed requirements file so they stay deterministic. The scheduled CI audit also resolves the runtime and `test` extra dependency set across the supported Python and OS matrix to catch newly published advisories. If dependency metadata changes, refresh the committed audit input before running pip-audit: + +```bash +uv pip compile pyproject.toml --extra test --universal --upgrade --generate-hashes --quiet --no-header --output-file .github/security-audit-requirements.txt +``` + +Upstream package releases drift over time, so even an unrelated PR touching `pyproject.toml` can fail the `dependency-audit` check until the committed file is regenerated with the command above and re-committed. + +#### Secret scanning + +```bash +git ls-files -z -- ':!:.secrets.baseline' ':!:uv.lock' ':!:.github/security-audit-requirements.txt' \ + | xargs -0 uvx --from detect-secrets==1.5.0 detect-secrets-hook --baseline .secrets.baseline +``` + +The CI `secret-scan` job runs this against tracked files. It reports any high-entropy strings or provider tokens that aren't already whitelisted in `.secrets.baseline`. If you hit a known false positive (SHA pin, docs example, test fixture), regenerate the baseline: + +```bash +uvx --from detect-secrets==1.5.0 detect-secrets scan \ + --exclude-files '\.secrets\.baseline$' \ + --exclude-files 'uv\.lock$' \ + --exclude-files '\.github/security-audit-requirements\.txt$' \ + > .secrets.baseline +``` + +Audit the new entries before committing — a leaked credential must never be merged into the baseline. + +#### Bandit baseline + +The CI `static-analysis` job runs Bandit with `--baseline .github/bandit-baseline.json` (HIGH severity, blocking) plus a second informational pass at MEDIUM severity sharing the same baseline (`continue-on-error`, surfaced in the job summary). If a finding is intentional, audit it carefully, document the rationale next to the code (regular comment — **not** `# nosec`; see below), and append the entry to `.github/bandit-baseline.json`. Growing the baseline is gated: the `check_bandit_baseline.py` script fails the PR unless it carries the `security-baseline-change` label, so reviewers see the whitelist expansion. + +> **Do not use `# nosec` in `src/`.** The `test_bandit_nosec_is_not_suppressed_in_source` regression test fails any PR that adds one. The supported suppression paths are (a) the bandit baseline (covered above) for HIGH findings, and (b) `# noqa: S6xx` with an inline justification for ruff's subprocess-shell rules (`S602/S604/S605`). Both are visible in review; `# nosec` hides the finding without trace. + +#### Shell scripts + +```bash +shellcheck --severity=error scripts/bash/*.sh +``` + +The CI `lint.yml` `shellcheck` job blocks at `--severity=error` to catch real bugs while leaving stylistic warnings (SC2155 etc.) advisory. + ### Manual testing #### Testing setup diff --git a/pyproject.toml b/pyproject.toml index f8bdc23f91..0d5430aea4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,3 +73,13 @@ precision = 2 show_missing = true skip_covered = false +[tool.ruff.lint] +# Lock in subprocess security posture: any reintroduction of shell=True +# (or os.system / popen2) must be acknowledged with an explicit `# noqa` +# pointing at the rule, making the deviation visible in review. +extend-select = [ + "S602", # subprocess-popen-with-shell-equals-true + "S604", # call-with-shell-equals-true + "S605", # start-process-with-a-shell +] + diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index b22cef6adb..7cc4c73801 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -40,6 +40,11 @@ from rich.panel import Panel from rich.align import Align from rich.table import Table +from ._download_security import ( + is_https_or_localhost_http, + read_response_limited, + read_zip_member_limited, +) from .shared_infra import ( install_shared_infra as _install_shared_infra_impl, refresh_shared_templates as _refresh_shared_templates_impl, @@ -684,11 +689,8 @@ def preset_add( elif from_url: # Validate URL scheme before downloading - from urllib.parse import urlparse as _urlparse - _parsed = _urlparse(from_url) - _is_localhost = _parsed.hostname in ("localhost", "127.0.0.1", "::1") - if _parsed.scheme != "https" and not (_parsed.scheme == "http" and _is_localhost): - console.print(f"[red]Error:[/red] URL must use HTTPS (got {_parsed.scheme}://). HTTP is only allowed for localhost.") + if not is_https_or_localhost_http(from_url): + console.print("[red]Error:[/red] URL must use HTTPS. HTTP is only allowed for localhost (127.0.0.1, ::1).") raise typer.Exit(1) console.print(f"Installing preset from [cyan]{from_url}[/cyan]...") @@ -698,18 +700,37 @@ def preset_add( with tempfile.TemporaryDirectory() as tmpdir: zip_path = Path(tmpdir) / "preset.zip" try: + from functools import partial + from specify_cli.authentication.http import open_url as _open_url from specify_cli._github_http import resolve_github_release_asset_api_url _preset_extra_headers = None - _resolved_from_url = resolve_github_release_asset_api_url(from_url, _open_url) + _resolved_from_url = resolve_github_release_asset_api_url( + from_url, partial(_open_url, strict_redirects=True) + ) if _resolved_from_url: from_url = _resolved_from_url _preset_extra_headers = {"Accept": "application/octet-stream"} - with _open_url(from_url, timeout=60, extra_headers=_preset_extra_headers) as response: - zip_path.write_bytes(response.read()) - except urllib.error.URLError as e: + with _open_url( + from_url, + timeout=60, + extra_headers=_preset_extra_headers, + strict_redirects=True, + ) as response: + zip_path.write_bytes( + read_response_limited( + response, + error_type=PresetError, + label=f"preset {from_url}", + ) + ) + # The URL scheme is validated above, so the only failures here + # are network errors and an oversized body (raised as PresetError + # via error_type). Catching those specifically lets unrelated + # ValueErrors surface instead of masquerading as download errors. + except (urllib.error.URLError, PresetError) as e: console.print(f"[red]Error:[/red] Failed to download: {e}") raise typer.Exit(1) @@ -1581,15 +1602,11 @@ def extension_add( # Guard with ``not dev`` so that --dev + --from does not show a # confusing confirmation for a URL that will be ignored. if from_url and not dev: - from urllib.parse import urlparse from rich.markup import escape as _escape_markup - parsed = urlparse(from_url) - is_localhost = parsed.hostname in ("localhost", "127.0.0.1", "::1") - - if parsed.scheme != "https" and not (parsed.scheme == "http" and is_localhost): + if not is_https_or_localhost_http(from_url): console.print("[red]Error:[/red] URL must use HTTPS for security.") - console.print("HTTP is only allowed for localhost URLs.") + console.print("HTTP is only allowed for localhost (127.0.0.1, ::1) URLs.") raise typer.Exit(1) safe_url = _escape_markup(from_url) @@ -1649,13 +1666,25 @@ def extension_add( try: from specify_cli.authentication.http import open_url as _open_url - with _open_url(from_url, timeout=60) as response: - zip_data = response.read() + with _open_url( + from_url, + timeout=60, + strict_redirects=True, + ) as response: + zip_data = read_response_limited( + response, + error_type=ExtensionError, + label=f"extension {from_url}", + ) zip_path.write_bytes(zip_data) # Install from downloaded ZIP manifest = manager.install_from_zip(zip_path, speckit_version, priority=priority, force=force) - except urllib.error.URLError as e: + # ExtensionError covers an oversized body (via error_type) and the + # ValidationError/ExtensionError raised by install_from_zip; URL + # scheme is validated above. Catching these instead of a blanket + # ValueError lets unrelated ValueErrors surface as real errors. + except (urllib.error.URLError, ExtensionError) as e: console.print(f"[red]Error:[/red] Failed to download from {safe_url}: {e}") raise typer.Exit(1) finally: @@ -2324,17 +2353,24 @@ def extension_update( manifest_data = None namelist = zf.namelist() - # First try root-level extension.yml + # Read the manifest under a hard size cap: this happens + # before install_from_zip()'s safe_extract_zip(), so a + # raw zf.open().read() here would bypass that bound and + # let a zip-bomb extension.yml exhaust memory. + manifest_member = None if "extension.yml" in namelist: - with zf.open("extension.yml") as f: - manifest_data = yaml.safe_load(f) or {} + manifest_member = "extension.yml" else: # Look for extension.yml in a single top-level subdirectory # (e.g., "repo-name-branch/extension.yml") manifest_paths = [n for n in namelist if n.endswith("/extension.yml") and n.count("/") == 1] if len(manifest_paths) == 1: - with zf.open(manifest_paths[0]) as f: - manifest_data = yaml.safe_load(f) or {} + manifest_member = manifest_paths[0] + + if manifest_member is not None: + manifest_data = yaml.safe_load( + read_zip_member_limited(zf, manifest_member) + ) or {} if manifest_data is None: raise ValueError("Downloaded extension archive is missing 'extension.yml'") @@ -3054,49 +3090,38 @@ def _validate_and_install_local(yaml_path: Path, source_label: str) -> None: # Try as URL (http/https) if source.startswith("http://") or source.startswith("https://"): - from ipaddress import ip_address - from urllib.parse import urlparse + from functools import partial + from specify_cli.authentication.http import open_url as _open_url - parsed_src = urlparse(source) - src_host = parsed_src.hostname or "" - src_loopback = src_host == "localhost" - if not src_loopback: - try: - src_loopback = ip_address(src_host).is_loopback - except ValueError: - # Host is not an IP literal (e.g., a DNS name); keep default non-loopback. - pass - if parsed_src.scheme != "https" and not (parsed_src.scheme == "http" and src_loopback): - console.print("[red]Error:[/red] Only HTTPS URLs are allowed, except HTTP for localhost.") + if not is_https_or_localhost_http(source): + console.print("[red]Error:[/red] Only HTTPS URLs are allowed, except HTTP for localhost (127.0.0.1, ::1).") raise typer.Exit(1) from specify_cli._github_http import resolve_github_release_asset_api_url as _resolve_gh_asset _wf_url_extra_headers = None - _resolved_wf_url = _resolve_gh_asset(source, _open_url, timeout=30) + _resolved_wf_url = _resolve_gh_asset( + source, partial(_open_url, strict_redirects=True), timeout=30 + ) if _resolved_wf_url: source = _resolved_wf_url _wf_url_extra_headers = {"Accept": "application/octet-stream"} import tempfile try: - with _open_url(source, timeout=30, extra_headers=_wf_url_extra_headers) as resp: + with _open_url( + source, + timeout=30, + extra_headers=_wf_url_extra_headers, + strict_redirects=True, + ) as resp: final_url = resp.geturl() - final_parsed = urlparse(final_url) - final_host = final_parsed.hostname or "" - final_lb = final_host == "localhost" - if not final_lb: - try: - final_lb = ip_address(final_host).is_loopback - except ValueError: - # Redirect host is not an IP literal; keep loopback as determined above. - pass - if final_parsed.scheme != "https" and not (final_parsed.scheme == "http" and final_lb): + if not is_https_or_localhost_http(final_url): console.print(f"[red]Error:[/red] URL redirected to non-HTTPS: {final_url}") raise typer.Exit(1) with tempfile.NamedTemporaryFile(suffix=".yml", delete=False) as tmp: - tmp.write(resp.read()) + tmp.write(read_response_limited(resp, label=f"workflow {source}")) tmp_path = Path(tmp.name) except typer.Exit: raise @@ -3146,24 +3171,10 @@ def _validate_and_install_local(yaml_path: Path, source_label: str) -> None: raise typer.Exit(1) # Validate URL scheme (HTTPS required, HTTP allowed for localhost only) - from ipaddress import ip_address - from urllib.parse import urlparse - - parsed_url = urlparse(workflow_url) - url_host = parsed_url.hostname or "" - is_loopback = False - if url_host == "localhost": - is_loopback = True - else: - try: - is_loopback = ip_address(url_host).is_loopback - except ValueError: - # Host is not an IP literal (e.g., a regular hostname); treat as non-loopback. - pass - if parsed_url.scheme != "https" and not (parsed_url.scheme == "http" and is_loopback): + if not is_https_or_localhost_http(workflow_url): console.print( f"[red]Error:[/red] Workflow '{source}' has an invalid install URL. " - "Only HTTPS URLs are allowed, except HTTP for localhost/loopback." + "Only HTTPS URLs are allowed, except HTTP for localhost (127.0.0.1, ::1)." ) raise typer.Exit(1) @@ -3177,29 +3188,29 @@ def _validate_and_install_local(yaml_path: Path, source_label: str) -> None: workflow_file = workflow_dir / "workflow.yml" try: + from functools import partial + from specify_cli.authentication.http import open_url as _open_url from specify_cli._github_http import resolve_github_release_asset_api_url as _resolve_gh_asset _wf_cat_extra_headers = None - _resolved_workflow_url = _resolve_gh_asset(workflow_url, _open_url, timeout=30) + _resolved_workflow_url = _resolve_gh_asset( + workflow_url, partial(_open_url, strict_redirects=True), timeout=30 + ) if _resolved_workflow_url: workflow_url = _resolved_workflow_url _wf_cat_extra_headers = {"Accept": "application/octet-stream"} workflow_dir.mkdir(parents=True, exist_ok=True) - with _open_url(workflow_url, timeout=30, extra_headers=_wf_cat_extra_headers) as response: + with _open_url( + workflow_url, + timeout=30, + extra_headers=_wf_cat_extra_headers, + strict_redirects=True, + ) as response: # Validate final URL after redirects final_url = response.geturl() - final_parsed = urlparse(final_url) - final_host = final_parsed.hostname or "" - final_loopback = final_host == "localhost" - if not final_loopback: - try: - final_loopback = ip_address(final_host).is_loopback - except ValueError: - # Host is not an IP literal (e.g., a regular hostname); treat as non-loopback. - pass - if final_parsed.scheme != "https" and not (final_parsed.scheme == "http" and final_loopback): + if not is_https_or_localhost_http(final_url): if workflow_dir.exists(): import shutil shutil.rmtree(workflow_dir, ignore_errors=True) @@ -3207,7 +3218,9 @@ def _validate_and_install_local(yaml_path: Path, source_label: str) -> None: f"[red]Error:[/red] Workflow '{source}' redirected to non-HTTPS URL: {final_url}" ) raise typer.Exit(1) - workflow_file.write_bytes(response.read()) + workflow_file.write_bytes( + read_response_limited(response, label=f"workflow {source}") + ) except Exception as exc: if workflow_dir.exists(): import shutil diff --git a/src/specify_cli/_download_security.py b/src/specify_cli/_download_security.py new file mode 100644 index 0000000000..75b8da91df --- /dev/null +++ b/src/specify_cli/_download_security.py @@ -0,0 +1,340 @@ +"""Helpers for bounded downloads and archive extraction.""" + +from __future__ import annotations + +import hashlib +import re +import stat +import zipfile +from pathlib import Path, PurePosixPath +from typing import NoReturn, TypeVar +from urllib.parse import urlparse + + +ErrorT = TypeVar("ErrorT", bound=Exception) + +MAX_DOWNLOAD_BYTES = 50 * 1024 * 1024 +MAX_ZIP_ENTRIES = 512 +MAX_ZIP_MEMBER_BYTES = 10 * 1024 * 1024 +MAX_ZIP_TOTAL_BYTES = 50 * 1024 * 1024 +READ_CHUNK_SIZE = 1024 * 1024 + +# Tighter ceilings for responses that are read fully into memory and parsed as +# JSON. The 50 MiB MAX_DOWNLOAD_BYTES default is sized for archive/payload +# downloads; JSON responses are far smaller, so capping them close to their real +# size shrinks the memory-DoS surface and keeps the "too large" error reachable +# (rather than only triggering on tens of MiB). Pass the matching constant +# explicitly at each JSON call site so the intended bound is pinned there. +# * METADATA — fixed-shape single-object responses (an OAuth token, one +# release's metadata): a few KiB in practice, 1 MiB is already generous. +# * CATALOG — listings that grow with the number of published items. The +# largest bundled catalog is ~130 KiB today, so 8 MiB leaves ~60x headroom +# for growth while staying well under the download ceiling. +MAX_JSON_METADATA_BYTES = 1 * 1024 * 1024 +MAX_JSON_CATALOG_BYTES = 8 * 1024 * 1024 +SHA256_RE = re.compile(r"^[0-9a-fA-F]{64}$") + + +def is_https_or_localhost_http(url: str) -> bool: + """Return True if *url* is HTTPS, or HTTP limited to loopback hosts. + + Shared scheme-safety predicate used by the auth HTTP redirect handler and + by the direct URL validations in the CLI download flows, so the rule (and + any future tightening of it) lives in one place. + + The loopback allowance is a deliberate *exact-string* match on + ``localhost`` / ``127.0.0.1`` / ``::1``, not an IP-range check: other + loopback addresses (e.g. ``127.0.0.2``) are intentionally not covered. + ``urlparse`` already lower-cases the hostname, so the comparison is + case-insensitive. + """ + parsed = urlparse(url) + is_localhost = parsed.hostname in ("localhost", "127.0.0.1", "::1") + return parsed.scheme == "https" or (parsed.scheme == "http" and is_localhost) + + +def _raise(error_type: type[ErrorT], message: str) -> NoReturn: + raise error_type(message) + + +def _raise_from(error_type: type[ErrorT], message: str, exc: Exception) -> NoReturn: + raise error_type(message) from exc + + +def read_response_limited( + response, + *, + max_bytes: int = MAX_DOWNLOAD_BYTES, + error_type: type[ErrorT] = ValueError, + label: str = "download", +) -> bytes: + """Read at most *max_bytes* from a response object. + + ``response.read(n)`` is only guaranteed to return *up to* ``n`` bytes and may + return fewer even when more data is pending (e.g. chunked transfer encoding), + so a single ``read(max_bytes + 1)`` cannot enforce the bound on its own. Read + in a loop until EOF or until one byte past the limit has been accumulated. + + *max_bytes* is keyword-only. It defaults to the module-wide + ``MAX_DOWNLOAD_BYTES`` (50 MiB) ceiling for archive/payload downloads; + callers with a tighter budget (e.g. small JSON responses) should pass an + explicit value so the intended bound is pinned at the call site rather than + tracking changes to the shared default. + """ + chunks: list[bytes] = [] + total = 0 + limit = max_bytes + 1 + while total < limit: + chunk = response.read(min(READ_CHUNK_SIZE, limit - total)) + if not chunk: + break + chunks.append(chunk) + total += len(chunk) + if total > max_bytes: + _raise(error_type, f"{label} exceeds maximum size of {max_bytes} bytes") + return b"".join(chunks) + + +def normalize_sha256(value: object, *, error_type: type[ErrorT] = ValueError) -> str | None: + """Normalize an optional sha256/sha256: checksum value.""" + if value is None: + return None + if not isinstance(value, str): + _raise(error_type, "sha256 checksum must be a string") + + checksum = value.strip() + if checksum.startswith("sha256:"): + checksum = checksum[len("sha256:") :] + if not SHA256_RE.fullmatch(checksum): + _raise(error_type, "sha256 checksum must be 64 hexadecimal characters") + return checksum.lower() + + +def verify_sha256( + data: bytes, + expected: object, + *, + error_type: type[ErrorT] = ValueError, + label: str = "download", +) -> None: + """Verify *data* against an optional sha256 checksum.""" + checksum = normalize_sha256(expected, error_type=error_type) + if checksum is None: + return + + actual = hashlib.sha256(data).hexdigest() + if actual != checksum: + _raise( + error_type, + f"{label} checksum mismatch: expected sha256:{checksum}, got sha256:{actual}", + ) + + +def read_zip_member_limited( + zf: zipfile.ZipFile, + name: str, + *, + max_bytes: int = MAX_ZIP_MEMBER_BYTES, + error_type: type[ErrorT] = ValueError, + label: str | None = None, +) -> bytes: + """Read a single ZIP member into memory under a hard size cap. + + Reading a member with ``zf.open(name).read()`` is unbounded: a crafted + archive can declare a tiny ``file_size`` yet decompress to many gigabytes (a + "zip bomb"), exhausting memory before the caller ever inspects the data. + This rejects members whose *declared* size already exceeds *max_bytes* and, + to defend against headers that lie, also reads in bounded chunks and stops + one byte past the limit. + + Use this for any inline manifest/metadata read that happens *before* + :func:`safe_extract_zip` (which already enforces the same per-member bound + during extraction); a raw ``zf.open(...).read()`` bypasses that protection. + """ + member_label = label or name + try: + info = zf.getinfo(name) + except KeyError as exc: + _raise_from(error_type, f"ZIP member not found: {name}", exc) + if info.file_size > max_bytes: + _raise( + error_type, + f"ZIP member {member_label} exceeds maximum size of {max_bytes} bytes", + ) + + chunks: list[bytes] = [] + total = 0 + limit = max_bytes + 1 + try: + with zf.open(name, "r") as source: + while total < limit: + chunk = source.read(min(READ_CHUNK_SIZE, limit - total)) + if not chunk: + break + chunks.append(chunk) + total += len(chunk) + except (OSError, zipfile.BadZipFile, RuntimeError) as exc: + _raise_from(error_type, f"Failed to read ZIP member {member_label}: {exc}", exc) + if total > max_bytes: + _raise( + error_type, + f"ZIP member {member_label} exceeds maximum size of {max_bytes} bytes", + ) + return b"".join(chunks) + + +def _safe_zip_name(name: str, *, error_type: type[ErrorT]) -> str: + """Return a normalized ZIP member name or raise on traversal.""" + if "\x00" in name: + _raise(error_type, f"Unsafe path in ZIP archive: {name!r}") + + normalized = name.replace("\\", "/") + path = PurePosixPath(normalized) + raw_parts = normalized.split("/") + # Strip a single trailing empty segment, i.e. the one-slash directory + # marker that legitimate ZIPs use ("mydir/", "mydir/subdir/"). Anything + # else that produces an empty segment — consecutive slashes ("a//b") or a + # second trailing slash — is left in place and rejected below as malformed. + if raw_parts and raw_parts[-1] == "": + raw_parts = raw_parts[:-1] + has_windows_drive = re.match(r"^[A-Za-z]:", normalized) is not None + if ( + not raw_parts + or path.is_absolute() + or has_windows_drive + or any(part in {"", ".", ".."} for part in raw_parts) + ): + _raise( + error_type, + f"Unsafe path in ZIP archive: {name} (potential path traversal)", + ) + return normalized + + +def safe_extract_zip( + zip_path: Path, + target_dir: Path, + *, + error_type: type[ErrorT] = ValueError, + max_entries: int = MAX_ZIP_ENTRIES, + max_member_bytes: int = MAX_ZIP_MEMBER_BYTES, + max_total_bytes: int = MAX_ZIP_TOTAL_BYTES, +) -> None: + """Extract a ZIP archive after path, symlink, and size validation.""" + try: + target_root = target_dir.resolve() + except OSError as exc: + _raise_from(error_type, f"Invalid ZIP extraction target: {target_dir}", exc) + + try: + zf = zipfile.ZipFile(zip_path, "r") + except (OSError, zipfile.BadZipFile) as exc: + _raise_from(error_type, f"Invalid ZIP archive: {zip_path}", exc) + + with zf: + try: + members = zf.infolist() + except zipfile.BadZipFile as exc: + _raise_from(error_type, f"Invalid ZIP archive: {zip_path}", exc) + if len(members) > max_entries: + _raise( + error_type, + f"ZIP archive contains too many entries ({len(members)} > {max_entries})", + ) + + normalized_members: list[tuple[zipfile.ZipInfo, str]] = [] + total_size = 0 + for member in members: + normalized_name = _safe_zip_name(member.filename, error_type=error_type) + + mode = member.external_attr >> 16 + if stat.S_ISLNK(mode): + _raise(error_type, f"Unsafe symlink in ZIP archive: {member.filename}") + + member_path = (target_dir / normalized_name).resolve() + try: + member_path.relative_to(target_root) + except ValueError: + _raise( + error_type, + f"Unsafe path in ZIP archive: {member.filename} " + "(potential path traversal)", + ) + + if not member.is_dir(): + if member.file_size > max_member_bytes: + _raise( + error_type, + f"ZIP member {member.filename} exceeds maximum size " + f"of {max_member_bytes} bytes", + ) + total_size += member.file_size + if total_size > max_total_bytes: + _raise( + error_type, + f"ZIP archive exceeds maximum uncompressed size " + f"of {max_total_bytes} bytes", + ) + + normalized_members.append((member, normalized_name)) + + # The loop above bounds the *declared* total via member.file_size, but a + # crafted archive can understate those headers. Mirror the per-member + # guard below with a cumulative count of the bytes actually written so + # the total-size bound holds even when the headers lie. + total_written = 0 + for member, normalized_name in normalized_members: + member_path = target_dir / normalized_name + if member.is_dir(): + try: + member_path.mkdir(parents=True, exist_ok=True) + except OSError as exc: + _raise_from( + error_type, + f"Failed to create ZIP directory {member.filename}: {exc}", + exc, + ) + continue + + try: + member_path.parent.mkdir(parents=True, exist_ok=True) + except OSError as exc: + _raise_from( + error_type, + f"Failed to create parent directory for ZIP member {member.filename}: {exc}", + exc, + ) + written = 0 + # Raised outside the try below: if error_type subclasses OSError or + # RuntimeError, raising inside would re-wrap the limit error as + # "Failed to extract" and lose the size-bound message. + limit_error: str | None = None + try: + with zf.open(member, "r") as source, member_path.open("wb") as dest: + while True: + chunk = source.read(READ_CHUNK_SIZE) + if not chunk: + break + written += len(chunk) + if written > max_member_bytes: + limit_error = ( + f"ZIP member {member.filename} exceeds maximum size " + f"of {max_member_bytes} bytes" + ) + break + total_written += len(chunk) + if total_written > max_total_bytes: + limit_error = ( + f"ZIP archive exceeds maximum uncompressed size " + f"of {max_total_bytes} bytes" + ) + break + dest.write(chunk) + except (OSError, zipfile.BadZipFile, RuntimeError) as exc: + _raise_from( + error_type, + f"Failed to extract ZIP member {member.filename}: {exc}", + exc, + ) + if limit_error is not None: + _raise(error_type, limit_error) diff --git a/src/specify_cli/_github_http.py b/src/specify_cli/_github_http.py index d2030b57a8..e9a5f7a4b1 100644 --- a/src/specify_cli/_github_http.py +++ b/src/specify_cli/_github_http.py @@ -91,6 +91,11 @@ def resolve_github_release_asset_api_url( import json import urllib.error + from specify_cli._download_security import ( + MAX_JSON_METADATA_BYTES, + read_response_limited, + ) + parsed = urlparse(download_url) parts = [unquote(part) for part in parsed.path.strip("/").split("/")] @@ -118,8 +123,17 @@ def resolve_github_release_asset_api_url( try: with open_url_fn(release_url, timeout=timeout) as response: - release_data = json.loads(response.read()) - except (urllib.error.URLError, json.JSONDecodeError): + release_data = json.loads( + read_response_limited( + response, + max_bytes=MAX_JSON_METADATA_BYTES, + label=f"GitHub release metadata {release_url}", + ) + ) + # ValueError covers both an oversized body (raised by read_response_limited) + # and json.JSONDecodeError (a ValueError subclass); on any of these, fall + # back to the original URL by returning None. + except (urllib.error.URLError, ValueError): return None for asset in release_data.get("assets", []): diff --git a/src/specify_cli/_utils.py b/src/specify_cli/_utils.py index 6daab08316..dfd9bb8446 100644 --- a/src/specify_cli/_utils.py +++ b/src/specify_cli/_utils.py @@ -16,14 +16,36 @@ CLAUDE_NPM_LOCAL_PATH = Path.home() / ".claude" / "local" / "node_modules" / ".bin" / "claude" -def run_command(cmd: list[str], check_return: bool = True, capture: bool = False, shell: bool = False) -> str | None: - """Run a shell command and optionally capture output.""" +def run_command( + cmd: list[str], + check_return: bool = True, + capture: bool = False, + shell: bool = False, +) -> str | None: + """Run a command without invoking a shell and optionally capture output. + + The ``shell`` parameter is kept in the signature so existing keyword + callers (and the re-export from ``specify_cli``) don't raise ``TypeError``, + but only the default ``shell=False`` is honoured. ``shell=True`` is + rejected with ``ValueError`` rather than silently ignored, so the + unsupported mode fails loudly instead of running with a different meaning. + """ + if shell: + raise ValueError( + "run_command() does not support shell=True; pass argv as a list" + ) + try: if capture: - result = subprocess.run(cmd, check=check_return, capture_output=True, text=True, shell=shell) + result = subprocess.run( + cmd, + check=check_return, + capture_output=True, + text=True, + ) return result.stdout.strip() else: - subprocess.run(cmd, check=check_return, shell=shell) + subprocess.run(cmd, check=check_return) return None except subprocess.CalledProcessError as e: if check_return: diff --git a/src/specify_cli/_version.py b/src/specify_cli/_version.py index e634a4f286..2a4f5e2fa7 100644 --- a/src/specify_cli/_version.py +++ b/src/specify_cli/_version.py @@ -29,6 +29,7 @@ from packaging.version import InvalidVersion, Version from ._console import console +from ._download_security import MAX_JSON_METADATA_BYTES, read_response_limited GITHUB_API_LATEST = "https://api.github.com/repos/github/spec-kit/releases/latest" _RESOLUTION_FAILURE_OFFLINE = "offline or timeout" @@ -118,8 +119,15 @@ def _fetch_latest_release_tag() -> tuple[str | None, str | None]: GITHUB_API_LATEST, timeout=5, extra_headers={"Accept": "application/vnd.github+json"}, + strict_redirects=True, ) as resp: - payload = json.loads(resp.read().decode("utf-8")) + payload = json.loads( + read_response_limited( + resp, + max_bytes=MAX_JSON_METADATA_BYTES, + label="GitHub latest release", + ).decode("utf-8") + ) tag = payload.get("tag_name") if not isinstance(tag, str) or not tag: raise ValueError("GitHub API response missing valid tag_name") diff --git a/src/specify_cli/agents.py b/src/specify_cli/agents.py index 3c06418014..2d115cd87a 100644 --- a/src/specify_cli/agents.py +++ b/src/specify_cli/agents.py @@ -545,8 +545,20 @@ def register_commands( cmd_name = cmd_info["name"] aliases = cmd_info.get("aliases", []) cmd_file = cmd_info["file"] + if not isinstance(cmd_file, str) or not cmd_file.strip(): + raise ValueError( + f"Command source file for {cmd_name!r} must be a non-empty string" + ) - source_file = source_dir / cmd_file + try: + source_root = source_dir.resolve() + source_file = (source_root / cmd_file).resolve() + source_file.relative_to(source_root) + except (OSError, ValueError): + raise ValueError( + f"Command source file {cmd_file!r} escapes directory " + f"{source_dir!r}" + ) from None if not source_file.exists(): continue diff --git a/src/specify_cli/authentication/azure_devops.py b/src/specify_cli/authentication/azure_devops.py index 5d71a1957b..2e041004db 100644 --- a/src/specify_cli/authentication/azure_devops.py +++ b/src/specify_cli/authentication/azure_devops.py @@ -17,6 +17,10 @@ _ADO_RESOURCE_ID = "499b84ac-1321-427f-aa17-267ca6975798" +class _TokenResponseTooLarge(Exception): + """Raised when an Azure AD token response exceeds the bounded read limit.""" + + class AzureDevOpsAuth(AuthProvider): """Azure DevOps authentication provider. @@ -109,9 +113,35 @@ def _acquire_via_client_credentials(entry: AuthConfigEntry) -> str | None: headers={"Content-Type": "application/x-www-form-urlencoded"}, ) try: - with urllib.request.urlopen(req, timeout=30) as resp: # noqa: S310 - payload = _json.loads(resp.read().decode("utf-8")) + from specify_cli._download_security import ( + MAX_JSON_METADATA_BYTES, + read_response_limited, + ) + from specify_cli.authentication.http import _StripAuthOnRedirect + + # A 307/308 redirect preserves the POST body, which carries the + # client_secret. Reuse the package HTTPS-downgrade guard (empty host + # list ⇒ no auth header to strip, just the scheme check) so the + # secret can never be forwarded to a non-HTTPS, non-loopback host. + opener = urllib.request.build_opener(_StripAuthOnRedirect(())) + with opener.open(req, timeout=30) as resp: # noqa: S310 + payload = _json.loads( + read_response_limited( + resp, + max_bytes=MAX_JSON_METADATA_BYTES, + error_type=_TokenResponseTooLarge, + label="Azure DevOps token response", + ).decode("utf-8") + ) token = payload.get("access_token", "").strip() return token or None - except (urllib.error.URLError, OSError, _json.JSONDecodeError, KeyError): + except ( + urllib.error.URLError, + OSError, + _json.JSONDecodeError, + _TokenResponseTooLarge, + ): + # Network failure, malformed JSON, or an oversized response — fall + # through to the next strategy. Unrelated programming errors (other + # ValueErrors, KeyErrors) intentionally propagate so they surface. return None diff --git a/src/specify_cli/authentication/http.py b/src/specify_cli/authentication/http.py index d6402e5c3e..c8b3540fab 100644 --- a/src/specify_cli/authentication/http.py +++ b/src/specify_cli/authentication/http.py @@ -16,6 +16,7 @@ from fnmatch import fnmatch from urllib.parse import urlparse +from .._download_security import is_https_or_localhost_http from . import get_provider from .config import AuthConfigEntry, _default_config_path, find_entries_for_url, load_auth_config @@ -57,13 +58,23 @@ def _hostname_in_hosts(hostname: str, hosts: tuple[str, ...]) -> bool: class _StripAuthOnRedirect(urllib.request.HTTPRedirectHandler): - """Drop ``Authorization`` when a redirect leaves the entry's declared hosts.""" + """Redirect handler that guards every redirect it is installed for. + + 1. Reject redirects to non-HTTPS URLs (loopback HTTP excepted, per + ``is_https_or_localhost_http``) — enforced unconditionally. + 2. Drop ``Authorization`` when a redirect leaves the entry's declared hosts. + """ def __init__(self, hosts: tuple[str, ...]) -> None: super().__init__() self._hosts = hosts def redirect_request(self, req, fp, code, msg, headers, newurl): + if not is_https_or_localhost_http(newurl): + raise urllib.error.URLError( + f"Refusing unsafe redirect to non-HTTPS URL: {newurl}" + ) + original_auth = ( req.get_header("Authorization") or req.unredirected_hdrs.get("Authorization") @@ -103,7 +114,13 @@ def build_request(url: str, extra_headers: dict[str, str] | None = None) -> urll return urllib.request.Request(url, headers=headers) -def open_url(url: str, timeout: int = 10, extra_headers: dict[str, str] | None = None): +def open_url( + url: str, + timeout: int = 10, + extra_headers: dict[str, str] | None = None, + *, + strict_redirects: bool = False, +): """Open *url* with config-driven auth, redirect stripping, and fallthrough. 1. Find ``auth.json`` entries whose hosts match the URL. @@ -113,6 +130,13 @@ def open_url(url: str, timeout: int = 10, extra_headers: dict[str, str] | None = 5. Non-auth errors (404, 500, network) raise immediately. *extra_headers* (e.g. ``Accept``) are merged into every attempt. + + Redirect scheme safety: every authenticated attempt goes through + ``_StripAuthOnRedirect``, which always rejects redirects to non-HTTPS + URLs (except HTTP to localhost / 127.0.0.1 / ::1, the hosts allowed by + ``is_https_or_localhost_http``). *strict_redirects* extends that same + scheme guard to the unauthenticated fallback; without it, the fallback + follows redirects without the scheme check. """ entries = find_entries_for_url(url, _load_config()) @@ -146,4 +170,10 @@ def _make_req(auth_headers: dict[str, str]) -> urllib.request.Request: # No entry worked (or none matched) — unauthenticated fallback req = _make_req({}) + if strict_redirects: + # No auth is attached on this path, so the handler's host list is empty: + # here it acts purely as the HTTPS-downgrade guard (its redirect_request + # rejects non-HTTPS, non-loopback targets), not as an auth-stripper. + opener = urllib.request.build_opener(_StripAuthOnRedirect(())) + return opener.open(req, timeout=timeout) return urllib.request.urlopen(req, timeout=timeout) # noqa: S310 diff --git a/src/specify_cli/extensions.py b/src/specify_cli/extensions.py index a5458ce51e..0d57216184 100644 --- a/src/specify_cli/extensions.py +++ b/src/specify_cli/extensions.py @@ -10,11 +10,10 @@ import hashlib import os import tempfile -import zipfile import shutil import copy from dataclasses import dataclass -from pathlib import Path +from pathlib import Path, PurePosixPath from typing import Optional, Dict, List, Any, Callable, Set from datetime import datetime, timezone import re @@ -25,6 +24,12 @@ from packaging import version as pkg_version from packaging.specifiers import SpecifierSet, InvalidSpecifier +from ._download_security import ( + MAX_JSON_CATALOG_BYTES, + read_response_limited, + safe_extract_zip, + verify_sha256, +) from .catalogs import CatalogEntry as BaseCatalogEntry, CatalogStackBase from ._init_options import is_ai_skills_enabled @@ -285,6 +290,24 @@ def _validate(self): ) if "name" not in cmd or "file" not in cmd: raise ValidationError("Command missing 'name' or 'file'") + if not isinstance(cmd["file"], str) or not cmd["file"].strip(): + raise ValidationError( + f"Command '{cmd['name']}' file must be a non-empty string" + ) + + normalized_file = cmd["file"].replace("\\", "/") + file_path = PurePosixPath(normalized_file) + has_windows_drive = re.match(r"^[A-Za-z]:", normalized_file) is not None + if ( + file_path.is_absolute() + or has_windows_drive + or any(part == ".." for part in file_path.parts) + ): + raise ValidationError( + f"Invalid command file path '{cmd['file']}': " + "must be a relative path within the extension directory" + ) + cmd["file"] = normalized_file # Validate command name format if not EXTENSION_COMMAND_NAME_PATTERN.match(cmd["name"]): @@ -1429,21 +1452,7 @@ def install_from_zip( with tempfile.TemporaryDirectory() as tmpdir: temp_path = Path(tmpdir) - # Extract ZIP safely (prevent Zip Slip attack) - with zipfile.ZipFile(zip_path, 'r') as zf: - # Validate all paths first before extracting anything - temp_path_resolved = temp_path.resolve() - for member in zf.namelist(): - member_path = (temp_path / member).resolve() - # Use is_relative_to for safe path containment check - try: - member_path.relative_to(temp_path_resolved) - except ValueError: - raise ValidationError( - f"Unsafe path in ZIP archive: {member} (potential path traversal)" - ) - # Only extract after all paths are validated - zf.extractall(temp_path) + safe_extract_zip(zip_path, temp_path, error_type=ValidationError) # Find extension directory (may be nested) extension_dir = temp_path @@ -1915,7 +1924,12 @@ def _open_url( Delegates to :func:`specify_cli.authentication.http.open_url`. """ from specify_cli.authentication.http import open_url - return open_url(url, timeout, extra_headers=extra_headers) + return open_url( + url, + timeout, + extra_headers=extra_headers, + strict_redirects=True, + ) def _resolve_github_release_asset_api_url( self, @@ -2135,7 +2149,14 @@ def _fetch_single_catalog(self, entry: CatalogEntry, force_refresh: bool = False # Fetch from network try: with self._open_url(entry.url, timeout=10) as response: - catalog_data = json.loads(response.read()) + catalog_data = json.loads( + read_response_limited( + response, + max_bytes=MAX_JSON_CATALOG_BYTES, + error_type=ExtensionError, + label=f"extension catalog {entry.url}", + ) + ) self._validate_catalog_payload(catalog_data, entry.url) @@ -2309,7 +2330,14 @@ def fetch_catalog(self, force_refresh: bool = False) -> Dict[str, Any]: import urllib.error with self._open_url(catalog_url, timeout=10) as response: - catalog_data = json.loads(response.read()) + catalog_data = json.loads( + read_response_limited( + response, + max_bytes=MAX_JSON_CATALOG_BYTES, + error_type=ExtensionError, + label=f"extension catalog {catalog_url}", + ) + ) # Validate catalog structure. Reuses the same helper as # ``_fetch_single_catalog`` so all three branches (root type, @@ -2479,9 +2507,23 @@ def download_extension(self, extension_id: str, target_dir: Optional[Path] = Non # Download the ZIP file try: - with self._open_url(download_url, timeout=60, extra_headers=extra_headers) as response: - zip_data = response.read() + with self._open_url( + download_url, + timeout=60, + extra_headers=extra_headers, + ) as response: + zip_data = read_response_limited( + response, + error_type=ExtensionError, + label=f"extension '{extension_id}' download", + ) + verify_sha256( + zip_data, + ext_info.get("sha256"), + error_type=ExtensionError, + label=f"extension '{extension_id}' download", + ) zip_path.write_bytes(zip_data) return zip_path diff --git a/src/specify_cli/integrations/catalog.py b/src/specify_cli/integrations/catalog.py index aba5877d8f..6af83762d7 100644 --- a/src/specify_cli/integrations/catalog.py +++ b/src/specify_cli/integrations/catalog.py @@ -21,6 +21,7 @@ import yaml from packaging import version as pkg_version +from .._download_security import MAX_JSON_CATALOG_BYTES, read_response_limited from ..catalogs import CatalogEntry, CatalogStackBase @@ -165,12 +166,19 @@ def _fetch_single_catalog( try: from specify_cli.authentication.http import open_url - with open_url(entry.url, timeout=10) as resp: + with open_url(entry.url, timeout=10, strict_redirects=True) as resp: # Validate final URL after redirects final_url = resp.geturl() if final_url != entry.url: self._validate_catalog_url(final_url) - catalog_data = json.loads(resp.read()) + catalog_data = json.loads( + read_response_limited( + resp, + max_bytes=MAX_JSON_CATALOG_BYTES, + error_type=IntegrationCatalogError, + label=f"integration catalog {entry.url}", + ) + ) if not isinstance(catalog_data, dict): raise IntegrationCatalogError( diff --git a/src/specify_cli/presets.py b/src/specify_cli/presets.py index a6b2ded49f..8a62aa48f3 100644 --- a/src/specify_cli/presets.py +++ b/src/specify_cli/presets.py @@ -12,10 +12,9 @@ import hashlib import os import tempfile -import zipfile import shutil from dataclasses import dataclass -from pathlib import Path +from pathlib import Path, PurePosixPath from typing import TYPE_CHECKING, Optional, Dict, List, Any if TYPE_CHECKING: @@ -27,6 +26,12 @@ from packaging import version as pkg_version from packaging.specifiers import SpecifierSet, InvalidSpecifier +from ._download_security import ( + MAX_JSON_CATALOG_BYTES, + read_response_limited, + safe_extract_zip, + verify_sha256, +) from .extensions import REINSTALL_COMMAND, ExtensionRegistry, normalize_priority from .integrations.base import IntegrationBase from ._init_options import is_ai_skills_enabled @@ -218,12 +223,21 @@ def _validate(self): # Validate file path safety: must be relative, no parent traversal file_path = tmpl["file"] - normalized = os.path.normpath(file_path) - if os.path.isabs(normalized) or normalized.startswith(".."): + if not isinstance(file_path, str) or not file_path.strip(): + raise PresetValidationError( + "Invalid template file path: must be a non-empty string" + ) + normalized = file_path.replace("\\", "/") + normalized_path = PurePosixPath(normalized) + has_windows_drive = re.match(r"^[A-Za-z]:", normalized) is not None + if normalized_path.is_absolute() or any( + part == ".." for part in normalized_path.parts + ) or has_windows_drive: raise PresetValidationError( f"Invalid template file path '{file_path}': " "must be a relative path within the preset directory" ) + tmpl["file"] = normalized # Validate strategy field (optional, defaults to "replace") strategy = tmpl.get("strategy", "replace") @@ -1641,18 +1655,7 @@ def install_from_zip( with tempfile.TemporaryDirectory() as tmpdir: temp_path = Path(tmpdir) - with zipfile.ZipFile(zip_path, 'r') as zf: - temp_path_resolved = temp_path.resolve() - for member in zf.namelist(): - member_path = (temp_path / member).resolve() - try: - member_path.relative_to(temp_path_resolved) - except ValueError: - raise PresetValidationError( - f"Unsafe path in ZIP archive: {member} " - "(potential path traversal)" - ) - zf.extractall(temp_path) + safe_extract_zip(zip_path, temp_path, error_type=PresetValidationError) pack_dir = temp_path manifest_path = pack_dir / "preset.yml" @@ -1879,7 +1882,7 @@ def _open_url( Delegates to :func:`specify_cli.authentication.http.open_url`. """ from specify_cli.authentication.http import open_url - return open_url(url, timeout, extra_headers=extra_headers) + return open_url(url, timeout, extra_headers=extra_headers, strict_redirects=True) def _resolve_github_release_asset_api_url( self, @@ -2158,7 +2161,14 @@ def _fetch_single_catalog(self, entry: PresetCatalogEntry, force_refresh: bool = try: with self._open_url(entry.url, timeout=10) as response: - catalog_data = json.loads(response.read()) + catalog_data = json.loads( + read_response_limited( + response, + max_bytes=MAX_JSON_CATALOG_BYTES, + error_type=PresetError, + label=f"preset catalog {entry.url}", + ) + ) self._validate_catalog_payload(catalog_data, entry.url) @@ -2309,7 +2319,14 @@ def fetch_catalog(self, force_refresh: bool = False) -> Dict[str, Any]: try: with self._open_url(catalog_url, timeout=10) as response: - catalog_data = json.loads(response.read()) + catalog_data = json.loads( + read_response_limited( + response, + max_bytes=MAX_JSON_CATALOG_BYTES, + error_type=PresetError, + label=f"preset catalog {catalog_url}", + ) + ) # Validate catalog structure. Reuses the same helper as # ``_fetch_single_catalog`` so all three branches (root type, @@ -2498,8 +2515,18 @@ def download_pack( try: with self._open_url(download_url, timeout=60, extra_headers=extra_headers) as response: - zip_data = response.read() + zip_data = read_response_limited( + response, + error_type=PresetError, + label=f"preset '{pack_id}' download", + ) + verify_sha256( + zip_data, + pack_info.get("sha256"), + error_type=PresetError, + label=f"preset '{pack_id}' download", + ) zip_path.write_bytes(zip_data) return zip_path diff --git a/src/specify_cli/workflows/catalog.py b/src/specify_cli/workflows/catalog.py index 213b443e3d..7537555d5c 100644 --- a/src/specify_cli/workflows/catalog.py +++ b/src/specify_cli/workflows/catalog.py @@ -19,6 +19,8 @@ import yaml +from specify_cli._download_security import MAX_JSON_CATALOG_BYTES, read_response_limited + # --------------------------------------------------------------------------- # Errors @@ -337,9 +339,20 @@ def _validate_catalog_url(url: str) -> None: _validate_catalog_url(entry.url) try: - with _open_url(entry.url, timeout=30) as resp: + with _open_url( + entry.url, + timeout=30, + strict_redirects=True, + ) as resp: _validate_catalog_url(resp.geturl()) - data = json.loads(resp.read().decode("utf-8")) + data = json.loads( + read_response_limited( + resp, + max_bytes=MAX_JSON_CATALOG_BYTES, + error_type=WorkflowCatalogError, + label="workflow catalog", + ).decode("utf-8") + ) except Exception as exc: # Fall back to cache if available if cache_file.exists(): diff --git a/src/specify_cli/workflows/steps/shell/__init__.py b/src/specify_cli/workflows/steps/shell/__init__.py index 73ac99530a..89ab718cab 100644 --- a/src/specify_cli/workflows/steps/shell/__init__.py +++ b/src/specify_cli/workflows/steps/shell/__init__.py @@ -30,7 +30,7 @@ def execute(self, config: dict[str, Any], context: StepContext) -> StepResult: # control commands; catalog-installed workflows should be reviewed # before use (see PUBLISHING.md for security guidance). try: - proc = subprocess.run( + proc = subprocess.run( # noqa: S602 -- intentional shell=True (see NOTE above) run_cmd, shell=True, capture_output=True, diff --git a/tests/http_helpers.py b/tests/http_helpers.py index 46e26806b4..5effd27e42 100644 --- a/tests/http_helpers.py +++ b/tests/http_helpers.py @@ -1,15 +1,41 @@ """HTTP test helpers shared by version-related CLI tests.""" +import io import json +import urllib.request from unittest.mock import MagicMock +import pytest + def mock_urlopen_response(payload: dict) -> MagicMock: """Build a urlopen context-manager mock whose read returns JSON.""" body = json.dumps(payload).encode("utf-8") resp = MagicMock() - resp.read.return_value = body + resp.read.side_effect = io.BytesIO(body).read cm = MagicMock() cm.__enter__.return_value = resp cm.__exit__.return_value = False return cm + + +@pytest.fixture(autouse=True) +def route_opener_open_through_urlopen(monkeypatch): + """Route OpenerDirector.open through urllib.request.urlopen. + + ``open_url(..., strict_redirects=True)`` fetches via + ``build_opener(...).open()``, which bypasses ``urllib.request.urlopen`` + — and with it the urlopen patches these test modules are built on. + Delegating ``open()`` to urlopen at call time keeps those patches + effective; the redirect handler's own behavior is covered by + ``TestRedirectStripping`` in test_authentication.py. + + Import this fixture into a test module to activate it there. + """ + monkeypatch.setattr( + urllib.request.OpenerDirector, + "open", + lambda self, req, data=None, timeout=None: urllib.request.urlopen( + req, timeout=timeout + ), + ) diff --git a/tests/integrations/test_integration_catalog.py b/tests/integrations/test_integration_catalog.py index fae9e32d23..a2c0fc1d7c 100644 --- a/tests/integrations/test_integration_catalog.py +++ b/tests/integrations/test_integration_catalog.py @@ -1,5 +1,6 @@ """Tests for the integration catalog system (catalog.py).""" +import io import json import os @@ -166,15 +167,27 @@ class TestCatalogFetch: """Tests that use a local HTTP server stub via monkeypatch.""" def _patch_urlopen(self, monkeypatch, catalog_data): - """Patch authentication.http.urllib.request.urlopen to return *catalog_data*.""" + """Patch authentication.http urlopen + OpenerDirector to return *catalog_data*. + + Covers both code paths in ``open_url``: + - default: ``urllib.request.urlopen`` (unauthenticated, no strict redirects) + - hardened: ``OpenerDirector.open`` (strict_redirects=True path). + """ class FakeResponse: def __init__(self, data, url=""): self._data = json.dumps(data).encode() self._url = url if isinstance(url, str) else url.full_url + self._pos = 0 - def read(self): - return self._data + def read(self, size=-1): + # Advance a cursor and return b"" at EOF like a real stream, so + # read_response_limited's bounded loop terminates. + if size is None or size < 0: + size = len(self._data) - self._pos + out = self._data[self._pos : self._pos + size] + self._pos += len(out) + return out def geturl(self): return self._url @@ -189,8 +202,14 @@ def fake_urlopen(req, timeout=10): url = req if isinstance(req, str) else req.full_url return FakeResponse(catalog_data, url) + def fake_opener_open(_self, req, data=None, timeout=10): + return fake_urlopen(req, timeout) + import specify_cli.authentication.http as _auth_http monkeypatch.setattr(_auth_http.urllib.request, "urlopen", fake_urlopen) + monkeypatch.setattr( + _auth_http.urllib.request.OpenerDirector, "open", fake_opener_open + ) def test_fetch_and_search_all(self, tmp_path, monkeypatch): monkeypatch.setenv("HOME", str(tmp_path)) @@ -295,6 +314,58 @@ def test_invalid_catalog_format(self, tmp_path, monkeypatch): with pytest.raises(IntegrationCatalogError, match="Failed to fetch any integration catalog"): cat.search() + def test_fetch_single_catalog_uses_bounded_read(self, tmp_path, monkeypatch): + cat = IntegrationCatalog(tmp_path) + entry = IntegrationCatalogEntry( + url="https://example.com/catalog.json", + name="test", + priority=1, + install_allowed=True, + ) + + class FakeResponse: + def __init__(self): + self._stream = io.BytesIO(b"{}") + + def read(self, size=-1): + return self._stream.read(size) + + def geturl(self): + return entry.url + + def __enter__(self): + return self + + def __exit__(self, *_args): + pass + + def fake_urlopen(req, timeout=10): + assert (req if isinstance(req, str) else req.full_url) == entry.url + assert timeout == 10 + return FakeResponse() + + def fake_read_response_limited(response, **kwargs): + assert isinstance(response, FakeResponse) + assert kwargs["error_type"] is IntegrationCatalogError + assert kwargs["label"] == "integration catalog https://example.com/catalog.json" + raise IntegrationCatalogError("catalog too large") + + import urllib.request + + monkeypatch.setattr(urllib.request, "urlopen", fake_urlopen) + monkeypatch.setattr( + urllib.request.OpenerDirector, + "open", + lambda _self, req, data=None, timeout=10: fake_urlopen(req, timeout), + ) + monkeypatch.setattr( + "specify_cli.integrations.catalog.read_response_limited", + fake_read_response_limited, + ) + + with pytest.raises(IntegrationCatalogError, match="catalog too large"): + cat._fetch_single_catalog(entry, force_refresh=True) + def test_clear_cache(self, tmp_path): (tmp_path / ".specify").mkdir() cat = IntegrationCatalog(tmp_path) @@ -492,17 +563,35 @@ class FakeResponse: def __init__(self, data, url=""): self._data = json.dumps(data).encode() self._url = url if isinstance(url, str) else url.full_url - def read(self): - return self._data + self._pos = 0 + + def read(self, size=-1): + # Advance a cursor and return b"" at EOF like a real stream, so + # read_response_limited's bounded loop terminates. + if size is None or size < 0: + size = len(self._data) - self._pos + out = self._data[self._pos : self._pos + size] + self._pos += len(out) + return out + def geturl(self): return self._url + def __enter__(self): return self + def __exit__(self, *a): pass - monkeypatch.setattr(_auth_http.urllib.request, "urlopen", - lambda req, timeout=10: FakeResponse(catalog, req if isinstance(req, str) else req.full_url)) + def _fake_urlopen(req, timeout=10): + return FakeResponse(catalog, req if isinstance(req, str) else req.full_url) + + monkeypatch.setattr(_auth_http.urllib.request, "urlopen", _fake_urlopen) + monkeypatch.setattr( + _auth_http.urllib.request.OpenerDirector, + "open", + lambda _self, req, data=None, timeout=10: _fake_urlopen(req, timeout), + ) old = os.getcwd() try: diff --git a/tests/self_upgrade_helpers.py b/tests/self_upgrade_helpers.py index c363f57b13..fc0f339f92 100644 --- a/tests/self_upgrade_helpers.py +++ b/tests/self_upgrade_helpers.py @@ -18,7 +18,7 @@ _verify_upgrade, ) from tests.conftest import strip_ansi -from tests.http_helpers import mock_urlopen_response +from tests.http_helpers import mock_urlopen_response, route_opener_open_through_urlopen __all__ = ( "SENTINEL_GH_TOKEN", @@ -31,6 +31,7 @@ "_verify_upgrade", "mock_urlopen_response", "requires_posix", + "route_opener_open_through_urlopen", "runner", "strip_ansi", ) diff --git a/tests/test_authentication.py b/tests/test_authentication.py index 5d75355a09..15c29633e9 100644 --- a/tests/test_authentication.py +++ b/tests/test_authentication.py @@ -14,6 +14,7 @@ from __future__ import annotations import base64 +import io import json import os @@ -497,10 +498,15 @@ def test_resolve_token_azure_ad_success(self, monkeypatch): tenant_id="tid", client_id="cid", client_secret_env="MY_SECRET", ) mock_resp = MagicMock() - mock_resp.read.return_value = b'{"access_token": "ad-acquired-token"}' + mock_resp.read.side_effect = io.BytesIO(b'{"access_token": "ad-acquired-token"}').read mock_resp.__enter__ = lambda s: s mock_resp.__exit__ = MagicMock(return_value=False) - with patch("urllib.request.urlopen", return_value=mock_resp): + # The token request goes through a strict-redirect opener (so a 307/308 + # cannot forward the client_secret body to a non-HTTPS host), not bare + # urlopen — patch the opener it builds. + mock_opener = MagicMock() + mock_opener.open.return_value = mock_resp + with patch("urllib.request.build_opener", return_value=mock_opener): assert AzureDevOpsAuth().resolve_token(entry) == "ad-acquired-token" def test_resolve_token_azure_ad_missing_secret_returns_none(self, monkeypatch): @@ -816,6 +822,18 @@ def test_multi_hop_redirect_within_hosts_preserves_auth(self): auth3 = req3.get_header("Authorization") or req3.unredirected_hdrs.get("Authorization") assert auth3 == "Bearer tok" + def test_redirect_rejects_https_downgrade(self): + """HTTPS downloads must not follow redirects to non-local HTTP URLs.""" + from specify_cli.authentication.http import _StripAuthOnRedirect + from urllib.request import Request + import io + import urllib.error + handler = _StripAuthOnRedirect(("example.com",)) + req = Request("https://example.com/archive.zip") + with pytest.raises(urllib.error.URLError, match="unsafe redirect"): + handler.redirect_request(req, io.BytesIO(b""), 302, "Found", {}, + "http://evil.example.com/archive.zip") + # --------------------------------------------------------------------------- # _fetch_latest_release_tag delegation @@ -835,7 +853,7 @@ def side_effect(req, timeout=None): captured["request"] = req body = _json.dumps({"tag_name": "v9.9.9"}).encode() resp = MagicMock() - resp.read.return_value = body + resp.read.side_effect = io.BytesIO(body).read cm = MagicMock() cm.__enter__.return_value = resp cm.__exit__.return_value = False @@ -855,19 +873,25 @@ def test_gh_token_forwarded_when_configured(self, monkeypatch): assert captured["request"].get_header("Authorization") == "Bearer forwarded-sentinel" def test_no_config_means_no_auth(self, monkeypatch): - from unittest.mock import patch + from unittest.mock import MagicMock, patch from specify_cli._version import _fetch_latest_release_tag self._set_config(monkeypatch, []) captured, side_effect = self._capture_request() - with patch("specify_cli.authentication.http.urllib.request.urlopen", side_effect=side_effect): + # The release fetch uses strict_redirects=True, so the unauthenticated + # path goes through build_opener().open(), not urlopen. + mock_opener = MagicMock() + mock_opener.open.side_effect = side_effect + with patch("specify_cli.authentication.http.urllib.request.build_opener", return_value=mock_opener): _fetch_latest_release_tag() assert captured["request"].get_header("Authorization") is None def test_accept_header_present(self, monkeypatch): - from unittest.mock import patch + from unittest.mock import MagicMock, patch from specify_cli._version import _fetch_latest_release_tag self._set_config(monkeypatch, []) captured, side_effect = self._capture_request() - with patch("specify_cli.authentication.http.urllib.request.urlopen", side_effect=side_effect): + mock_opener = MagicMock() + mock_opener.open.side_effect = side_effect + with patch("specify_cli.authentication.http.urllib.request.build_opener", return_value=mock_opener): _fetch_latest_release_tag() assert captured["request"].get_header("Accept") == "application/vnd.github+json" diff --git a/tests/test_baseline_gates.py b/tests/test_baseline_gates.py new file mode 100644 index 0000000000..71f844aa16 --- /dev/null +++ b/tests/test_baseline_gates.py @@ -0,0 +1,433 @@ +"""Tests for the bandit and detect-secrets baseline growth gate scripts. + +Both scripts share the same shape: read the baseline at a base ref and a +head ref, compare *identities* (not counts) so a swap doesn't slip +through, and require an acknowledgement label when the head set is a +strict superset. + +Shared cases (introduction, identical, growth±label, swap, corrupt-JSON +fallback) are parametrized across both scripts via the ``gate`` fixture. +Bandit-only quirks (no-base-ref, whitespace normalization) live in +``TestBanditSpecific``. + +We drive the scripts as subprocesses against a throwaway git repo so the +``git show :`` calls inside them resolve real refs. +""" + +from __future__ import annotations + +import json +import os +import subprocess +import sys +from dataclasses import dataclass +from pathlib import Path +from typing import Callable + +import pytest + + +REPO_ROOT = Path(__file__).resolve().parent.parent +BANDIT_SCRIPT = REPO_ROOT / ".github" / "scripts" / "check_bandit_baseline.py" +SECRETS_SCRIPT = REPO_ROOT / ".github" / "scripts" / "check_secrets_baseline.py" + + +def _git(repo: Path, *args: str) -> str: + return subprocess.run( + ["git", *args], + cwd=repo, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ).stdout.strip() + + +def _init_repo(tmp_path: Path) -> Path: + repo = tmp_path / "repo" + repo.mkdir() + _git(repo, "init", "-q", "-b", "main") + _git(repo, "config", "user.email", "test@example.com") + _git(repo, "config", "user.name", "Test") + (repo / ".github").mkdir() + (repo / ".github" / "scripts").mkdir() + return repo + + +def _install_script(repo: Path, source: Path) -> Path: + target = repo / ".github" / "scripts" / source.name + target.write_text(source.read_text(encoding="utf-8"), encoding="utf-8") + return target + + +def _commit_file(repo: Path, rel_path: str, content: str, message: str) -> str: + target = repo / rel_path + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text(content, encoding="utf-8") + _git(repo, "add", rel_path) + _git(repo, "commit", "-q", "-m", message) + return _git(repo, "rev-parse", "HEAD") + + +def _commit_baseline(repo: Path, baseline_path: str, payload: dict, message: str) -> str: + return _commit_file(repo, baseline_path, json.dumps(payload, indent=2), message) + + +def _run_script(repo: Path, script: Path, env_overrides: dict[str, str]): + env = os.environ.copy() + env.update( + { + "HOME": str(repo), + **env_overrides, + } + ) + return subprocess.run( + [sys.executable, str(script)], + cwd=repo, + env=env, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + + +# --------------------------------------------------------------------------- +# Parametrization machinery +# --------------------------------------------------------------------------- + + +def _bandit_baseline(entries: list[tuple[str, int]]) -> dict: + """Build a bandit-style baseline from (filename, line) tuples.""" + return { + "results": [ + { + "filename": filename, + "line_number": line, + "test_id": "B602", + "issue_severity": "HIGH", + "issue_confidence": "HIGH", + "code": f"shell=True at {filename}:{line}", + } + for filename, line in entries + ] + } + + +def _secrets_baseline(entries: list[tuple[str, int]]) -> dict: + """Build a detect-secrets-style baseline from (filename, line) tuples.""" + results: dict[str, list[dict]] = {} + for filename, line in entries: + results.setdefault(filename, []).append( + { + "type": "Secret Keyword", + "filename": filename, + # The hash is part of the identity, so make it unique per (file, line). + "hashed_secret": f"h_{filename}_{line}", + "is_verified": False, + "line_number": line, + } + ) + return {"version": "1.5.0", "results": results} + + +@dataclass +class GateConfig: + name: str + script: Path + env_prefix: str + baseline_path: str + label: str + make_baseline: Callable[[list[tuple[str, int]]], dict] + + +BANDIT_GATE = GateConfig( + name="bandit", + script=BANDIT_SCRIPT, + env_prefix="BANDIT_BASELINE", + baseline_path=".github/bandit-baseline.json", + label="security-baseline-change", + make_baseline=_bandit_baseline, +) + + +SECRETS_GATE = GateConfig( + name="secrets", + script=SECRETS_SCRIPT, + env_prefix="SECRETS_BASELINE", + baseline_path=".secrets.baseline", + label="secrets-baseline-change", + make_baseline=_secrets_baseline, +) + + +@dataclass +class GateHandle: + """Live test harness: a repo with the script installed and helpers.""" + + config: GateConfig + repo: Path + + def commit(self, entries: list[tuple[str, int]], message: str) -> str: + return _commit_baseline( + self.repo, + self.config.baseline_path, + self.config.make_baseline(entries), + message, + ) + + def commit_raw(self, raw_content: str, message: str) -> str: + return _commit_file(self.repo, self.config.baseline_path, raw_content, message) + + def delete_baseline(self, message: str) -> str: + """Remove the baseline file from the working tree and commit.""" + (self.repo / self.config.baseline_path).unlink() + _git(self.repo, "add", "-A") + _git(self.repo, "commit", "-q", "-m", message) + return _git(self.repo, "rev-parse", "HEAD") + + def overwrite_worktree(self, raw_content: str) -> None: + """Replace the working-tree baseline without committing. + + Used to simulate a corrupt head state read from disk. + """ + (self.repo / self.config.baseline_path).write_text(raw_content, encoding="utf-8") + + def run(self, *, base: str, labels: str = ""): + # Head side reads the working tree directly — no env var needed. + return _run_script( + self.repo, + self.repo / ".github" / "scripts" / self.config.script.name, + { + f"{self.config.env_prefix}_BASE": base, + f"{self.config.env_prefix}_LABELS": labels, + }, + ) + + +@pytest.fixture(params=[BANDIT_GATE, SECRETS_GATE], ids=lambda c: c.name) +def gate(request, tmp_path) -> GateHandle: + config: GateConfig = request.param + repo = _init_repo(tmp_path) + _install_script(repo, config.script) + return GateHandle(config=config, repo=repo) + + +# --------------------------------------------------------------------------- +# Shared scenarios (parametrized across both scripts) +# --------------------------------------------------------------------------- + + +class TestSharedBaselineGate: + """Scenarios that must hold for both the bandit and secrets gates.""" + + def test_introduction_pr_skips_check(self, gate: GateHandle): + # Baseline file did not exist at base ref → no acknowledgement needed. + _git(gate.repo, "commit", "--allow-empty", "-q", "-m", "before baseline") + base_sha = _git(gate.repo, "rev-parse", "HEAD") + gate.commit([("a.py", 10)], "introduce baseline") + + result = gate.run(base=base_sha) + + assert result.returncode == 0, result.stderr + assert "introduction of the baseline" in result.stdout + + def test_identical_baselines_pass(self, gate: GateHandle): + base_sha = gate.commit([("a.py", 10)], "base") + result = gate.run(base=base_sha) + assert result.returncode == 0 + assert "no new identities" in result.stdout + + def test_growth_without_label_fails(self, gate: GateHandle): + base_sha = gate.commit([("a.py", 10)], "base") + gate.commit([("a.py", 10), ("b.py", 20)], "grow") + + result = gate.run(base=base_sha) + + assert result.returncode == 1 + assert f"'{gate.config.label}'" in result.stderr + + def test_growth_with_label_passes(self, gate: GateHandle): + base_sha = gate.commit([("a.py", 10)], "base") + gate.commit([("a.py", 10), ("b.py", 20)], "grow") + + result = gate.run(base=base_sha, labels=gate.config.label) + + assert result.returncode == 0, result.stderr + assert "acknowledged via label" in result.stdout + + def test_swap_attack_detected(self, gate: GateHandle): + """Remove one entry and add a different one → constant count, but + a *new* identity appears. Gate must still fire.""" + base_sha = gate.commit([("a.py", 10)], "base") + gate.commit([("b.py", 20)], "swap") # same count, different ID + + result = gate.run(base=base_sha) + + assert result.returncode == 1, "identity diff must catch swaps" + assert "1 new identities" in result.stderr + + def test_corrupt_json_at_base_falls_back_to_empty(self, gate: GateHandle): + """If the baseline at the base ref is unparseable JSON, treat its + contents as empty so the script still completes (the head set + becomes 'all new' and the label gate fires).""" + base_sha = gate.commit_raw("{ invalid json", "corrupt base") + gate.commit([("a.py", 10)], "valid head") + + result = gate.run(base=base_sha) + + assert result.returncode == 1, "corrupt base should not crash the script" + assert f"'{gate.config.label}'" in result.stderr + assert "Could not parse baseline" in result.stderr + assert base_sha not in result.stderr + + def test_head_missing_fails_closed(self, gate: GateHandle): + """If the baseline existed at base but is missing in the working + tree (head), the gate must fail-closed — silently passing would + let a PR delete the whole baseline file and neutralize the gate.""" + base_sha = gate.commit([("a.py", 10)], "base") + gate.delete_baseline("remove baseline at head") + + result = gate.run(base=base_sha) + + assert result.returncode == 1 + assert "Refusing to fail-open" in result.stderr + + def test_head_corrupt_in_worktree_fails_closed(self, gate: GateHandle): + """A corrupt JSON in the working tree must raise (not be silently + treated as empty, which would also drop the gate). Simulates a + flaky tool writing junk to the file just before the script runs.""" + base_sha = gate.commit([("a.py", 10)], "base") + gate.overwrite_worktree("{ not json") + + result = gate.run(base=base_sha) + + assert result.returncode == 1 + assert "is corrupt" in result.stderr + assert "fail-open" in result.stderr + + +# --------------------------------------------------------------------------- +# Bandit-only scenarios +# --------------------------------------------------------------------------- + + +class TestBanditSpecific: + """Cases that only exist for the bandit gate.""" + + @pytest.fixture + def gate(self, tmp_path) -> GateHandle: + repo = _init_repo(tmp_path) + _install_script(repo, BANDIT_SCRIPT) + return GateHandle(config=BANDIT_GATE, repo=repo) + + def test_unresolvable_base_ref_fails_closed(self, gate: GateHandle): + # A base ref that cannot be resolved (unfetched, typo) must block + # the gate, not be treated as "baseline did not exist yet". + gate.commit([("a.py", 10)], "base") + + result = gate.run(base="0123456789abcdef0123456789abcdef01234567") + + assert result.returncode == 1 + assert "Refusing to fail-open" in result.stderr + + def test_no_base_ref_is_skipped(self, gate: GateHandle): + gate.commit([], "init") # need at least one commit so HEAD resolves + result = gate.run(base="") + assert result.returncode == 0 + assert "baseline diff check skipped" in result.stdout + + def test_whitespace_only_change_does_not_trip(self, gate: GateHandle): + """A bandit version bump that reformats the code snippet (different + whitespace) should not flag the same finding as new — that's the + purpose of the whitespace-normalized identity hash.""" + base_sha = _commit_baseline( + gate.repo, + gate.config.baseline_path, + { + "results": [ + { + "filename": "a.py", + "line_number": 10, + "test_id": "B602", + "issue_severity": "HIGH", + "issue_confidence": "HIGH", + "code": "shell=True\n capture_output=True", + } + ] + }, + "base", + ) + _commit_baseline( + gate.repo, + gate.config.baseline_path, + { + "results": [ + { + "filename": "a.py", + "line_number": 10, + "test_id": "B602", + "issue_severity": "HIGH", + "issue_confidence": "HIGH", + "code": "shell=True\ncapture_output=True", # one less space + } + ] + }, + "reformatted snippet", + ) + + result = gate.run(base=base_sha) + + assert result.returncode == 0, result.stderr + + +class TestSecretsSpecific: + """Cases that only exist for the detect-secrets gate.""" + + @pytest.fixture + def gate(self, tmp_path) -> GateHandle: + repo = _init_repo(tmp_path) + _install_script(repo, SECRETS_SCRIPT) + return GateHandle(config=SECRETS_GATE, repo=repo) + + @staticmethod + def _baseline_with_hash(hashed_secret: str) -> dict: + return { + "version": "1.5.0", + "results": { + "app.py": [ + { + "type": "Secret Keyword", + "filename": "app.py", + "hashed_secret": hashed_secret, + "is_verified": False, + "line_number": 42, + } + ] + }, + } + + def test_same_location_secret_swap_fails_without_leaking_hash( + self, gate: GateHandle + ): + """The hash remains part of the gate identity, but not CI logs.""" + old_hash = "old-sensitive-hash" + new_hash = "new-sensitive-hash" + base_sha = _commit_baseline( + gate.repo, + gate.config.baseline_path, + self._baseline_with_hash(old_hash), + "base", + ) + _commit_baseline( + gate.repo, + gate.config.baseline_path, + self._baseline_with_hash(new_hash), + "secret swap", + ) + + result = gate.run(base=base_sha) + + assert result.returncode == 1, "hashed secret diff must catch swaps" + assert "app.py|42|Secret Keyword|hashed_secret=" in result.stderr + assert old_hash not in result.stderr + assert new_hash not in result.stderr diff --git a/tests/test_download_security.py b/tests/test_download_security.py new file mode 100644 index 0000000000..9c2eb2b18d --- /dev/null +++ b/tests/test_download_security.py @@ -0,0 +1,357 @@ +"""Tests for bounded download and ZIP extraction helpers.""" + +from __future__ import annotations + +import ast +import stat +import zipfile +from pathlib import Path + +import pytest + +from specify_cli._download_security import ( + read_response_limited, + read_zip_member_limited, + safe_extract_zip, + verify_sha256, +) + + +REPO_ROOT = Path(__file__).resolve().parent.parent +LOCAL_FILE_HASH_READ_ALLOWLIST = { + ("src/specify_cli/extensions.py", "get_hash"), + ("src/specify_cli/integrations/catalog.py", "get_hash"), + ("src/specify_cli/presets.py", "get_hash"), +} + + +class _Response: + """Faithful stream stand-in: read() advances a cursor and returns b"" at EOF.""" + + def __init__(self, data: bytes, *, chunk: int | None = None): + self.data = data + self.pos = 0 + # When set, never return more than *chunk* bytes per call even if more is + # requested — simulates short reads (e.g. chunked transfer encoding). + self.chunk = chunk + + def read(self, size: int = -1) -> bytes: + if size < 0: + size = len(self.data) - self.pos + if self.chunk is not None: + size = min(size, self.chunk) + out = self.data[self.pos : self.pos + size] + self.pos += len(out) + return out + + +class _CustomZipError(ValueError): + pass + + +def _constant_int(node: ast.AST) -> int | None: + if isinstance(node, ast.Constant) and isinstance(node.value, int): + return node.value + if ( + isinstance(node, ast.UnaryOp) + and isinstance(node.op, ast.USub) + and isinstance(node.operand, ast.Constant) + and isinstance(node.operand.value, int) + ): + return -node.operand.value + return None + + +def _is_unbounded_read(call: ast.Call) -> bool: + if call.args: + size = _constant_int(call.args[0]) + return size is not None and size < 0 + + for keyword in call.keywords: + if keyword.arg == "size": + size = _constant_int(keyword.value) + return size is not None and size < 0 + + return True + + +class _UnboundedReadVisitor(ast.NodeVisitor): + def __init__(self) -> None: + self._function_stack: list[str] = [] + self.offenders: list[tuple[int, str]] = [] + + def visit_FunctionDef(self, node: ast.FunctionDef) -> None: + self._function_stack.append(node.name) + self.generic_visit(node) + self._function_stack.pop() + + def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None: + self._function_stack.append(node.name) + self.generic_visit(node) + self._function_stack.pop() + + def visit_Call(self, node: ast.Call) -> None: + if ( + isinstance(node.func, ast.Attribute) + and node.func.attr == "read" + and _is_unbounded_read(node) + ): + function_name = self._function_stack[-1] if self._function_stack else "" + self.offenders.append((node.lineno, function_name)) + self.generic_visit(node) + + +def test_read_response_limited_rejects_oversized_download(): + with pytest.raises(ValueError, match="exceeds maximum size"): + read_response_limited(_Response(b"abcde"), max_bytes=4) + + +def test_read_response_limited_returns_full_body_within_limit(): + assert read_response_limited(_Response(b"abcde"), max_bytes=10) == b"abcde" + + +def test_read_response_limited_enforces_bound_under_short_reads(): + # A server that streams more than max_bytes total while every read() returns + # fewer bytes than requested (chunked encoding) must still be rejected — a + # single read(max_bytes + 1) could be fooled, the accumulating loop cannot. + response = _Response(b"x" * 100, chunk=8) + with pytest.raises(ValueError, match="exceeds maximum size"): + read_response_limited(response, max_bytes=16) + + +def test_remote_downloads_do_not_use_unbounded_response_reads(): + offenders = [] + for path in (REPO_ROOT / "src" / "specify_cli").rglob("*.py"): + rel_path = path.relative_to(REPO_ROOT).as_posix() + visitor = _UnboundedReadVisitor() + visitor.visit(ast.parse(path.read_text(encoding="utf-8"))) + for line_number, function_name in visitor.offenders: + if (rel_path, function_name) not in LOCAL_FILE_HASH_READ_ALLOWLIST: + offenders.append(f"{rel_path}:{line_number}") + + assert offenders == [] + + +def test_verify_sha256_rejects_mismatch(): + with pytest.raises(ValueError, match="checksum mismatch"): + verify_sha256(b"payload", "sha256:" + "0" * 64) + + +@pytest.mark.parametrize( + "member_name", + [ + "../evil.txt", + "nested/../../evil.txt", + "nested\\..\\evil.txt", + "C:\\Windows\\evil.txt", + "C:drive-relative.txt", + ], +) +def test_safe_extract_zip_rejects_traversal(tmp_path, member_name): + zip_path = tmp_path / "bad.zip" + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr(member_name, "nope") + + with pytest.raises(ValueError, match="Unsafe path"): + safe_extract_zip(zip_path, tmp_path / "out") + + +# An empty member name is rejected by _safe_zip_name too, but zipfile cannot +# even write such an entry, so it is not testable through this API. +@pytest.mark.parametrize("member_name", [".", "./file.txt", "nested/./file.txt", "nested//file.txt"]) +def test_safe_extract_zip_rejects_dot_path_segments(tmp_path, member_name): + zip_path = tmp_path / "bad.zip" + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr(member_name, "nope") + + with pytest.raises(_CustomZipError, match="Unsafe path"): + safe_extract_zip(zip_path, tmp_path / "out", error_type=_CustomZipError) + + +def test_safe_extract_zip_rejects_symlinks(tmp_path): + zip_path = tmp_path / "bad.zip" + info = zipfile.ZipInfo("link") + info.external_attr = (stat.S_IFLNK | 0o777) << 16 + + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr(info, "target") + + with pytest.raises(ValueError, match="Unsafe symlink"): + safe_extract_zip(zip_path, tmp_path / "out") + + +def test_safe_extract_zip_rejects_symlink_without_partial_extraction(tmp_path): + # A symlink sitting next to benign members must be rejected before ANY + # file is written: validation runs over the whole member list first, so an + # unsafe member cannot leak a partially-extracted tree to disk. + zip_path = tmp_path / "mixed.zip" + link = zipfile.ZipInfo("evil-link") + link.external_attr = (stat.S_IFLNK | 0o777) << 16 + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr("safe/first.txt", "hello") + zf.writestr(link, "target") + zf.writestr("safe/second.txt", "world") + + out_dir = tmp_path / "out" + with pytest.raises(ValueError, match="Unsafe symlink"): + safe_extract_zip(zip_path, out_dir) + + # Nothing should have been written — not even the benign member that + # precedes the symlink in the archive. + assert not out_dir.exists() or not any(out_dir.rglob("*")) + + +def test_safe_extract_zip_rejects_oversized_member(tmp_path): + zip_path = tmp_path / "bad.zip" + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr("big.txt", "abcde") + + with pytest.raises(ValueError, match="exceeds maximum size"): + safe_extract_zip(zip_path, tmp_path / "out", max_member_bytes=4) + + +def test_safe_extract_zip_rejects_too_many_entries(tmp_path): + zip_path = tmp_path / "bad.zip" + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr("one.txt", "1") + zf.writestr("two.txt", "2") + + with pytest.raises(ValueError, match="too many entries"): + safe_extract_zip(zip_path, tmp_path / "out", max_entries=1) + + +def test_safe_extract_zip_rejects_total_uncompressed_size(tmp_path): + zip_path = tmp_path / "bad.zip" + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr("one.txt", "123") + zf.writestr("two.txt", "456") + + with pytest.raises(ValueError, match="maximum uncompressed size"): + safe_extract_zip(zip_path, tmp_path / "out", max_total_bytes=5) + + +def test_safe_extract_zip_bounds_actual_written_bytes_when_headers_understate_size( + tmp_path, monkeypatch +): + # Defense in depth: the pre-extraction check sums the *declared* + # member.file_size values, which a crafted archive can understate so that + # check passes. If the ZIP reader then yields more bytes than the header + # promised, the extraction loop must still abort once the cumulative bytes + # actually written exceed max_total_bytes. CPython's own zipfile happens to + # bound member reads to file_size and CRC-check them, so we substitute a + # reader that does not — exercising our guard rather than the stdlib's. + zip_path = tmp_path / "liar.zip" + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr("a.txt", "") # declared file_size 0 ⇒ declared total stays 0 + zf.writestr("b.txt", "") + + class _OverreadingStream: + """A member reader that yields more bytes than any header declared.""" + + def __init__(self, payload: bytes): + self._remaining = payload + + def read(self, size: int = -1) -> bytes: + if size is None or size < 0: + size = len(self._remaining) + out, self._remaining = self._remaining[:size], self._remaining[size:] + return out + + def __enter__(self): + return self + + def __exit__(self, *exc): + return False + + # Each member streams 8 bytes despite declaring 0; the per-member cap (10 MiB + # default) is untouched, so only the cumulative guard can stop this. + monkeypatch.setattr( + zipfile.ZipFile, "open", lambda self, *a, **k: _OverreadingStream(b"x" * 8) + ) + + # 8 bytes for "a.txt" (total 8 ≤ 12), then "b.txt" busts the 12-byte ceiling. + with pytest.raises(ValueError, match="maximum uncompressed size"): + safe_extract_zip(zip_path, tmp_path / "out", max_total_bytes=12) + + +def test_safe_extract_zip_wraps_bad_zip_file(tmp_path): + zip_path = tmp_path / "bad.zip" + zip_path.write_bytes(b"not a zip archive") + + with pytest.raises(_CustomZipError, match="Invalid ZIP archive"): + safe_extract_zip(zip_path, tmp_path / "out", error_type=_CustomZipError) + + +def test_safe_extract_zip_wraps_filesystem_errors(tmp_path): + zip_path = tmp_path / "ok.zip" + blocked_parent = tmp_path / "blocked" + blocked_parent.write_text("not a directory", encoding="utf-8") + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr("file.txt", "hello") + + with pytest.raises(_CustomZipError, match="Failed to create parent directory"): + safe_extract_zip( + zip_path, + blocked_parent / "out", + error_type=_CustomZipError, + ) + + +def test_safe_extract_zip_wraps_directory_filesystem_errors(tmp_path): + zip_path = tmp_path / "ok.zip" + blocked_parent = tmp_path / "blocked" + blocked_parent.write_text("not a directory", encoding="utf-8") + with zipfile.ZipFile(zip_path, "w") as zf: + zf.mkdir("dir") + + with pytest.raises(_CustomZipError, match="Failed to create ZIP directory"): + safe_extract_zip( + zip_path, + blocked_parent / "out", + error_type=_CustomZipError, + ) + + +def test_read_zip_member_limited_returns_member_within_limit(tmp_path): + zip_path = tmp_path / "ok.zip" + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr("extension.yml", "extension:\n id: demo\n") + + with zipfile.ZipFile(zip_path, "r") as zf: + data = read_zip_member_limited(zf, "extension.yml") + + assert data == b"extension:\n id: demo\n" + + +def test_read_zip_member_limited_rejects_oversized_member(tmp_path): + # A manifest whose declared size already blows the cap (the zip-bomb shape: + # a few KB compressed that decompresses to gigabytes) is rejected before any + # of it is read into memory. + zip_path = tmp_path / "bomb.zip" + with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf: + zf.writestr("extension.yml", "a" * 5000) + + with zipfile.ZipFile(zip_path, "r") as zf: + with pytest.raises(ValueError, match="exceeds maximum size"): + read_zip_member_limited(zf, "extension.yml", max_bytes=16) + + +def test_read_zip_member_limited_wraps_missing_member(tmp_path): + zip_path = tmp_path / "ok.zip" + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr("other.txt", "x") + + with zipfile.ZipFile(zip_path, "r") as zf: + with pytest.raises(_CustomZipError, match="ZIP member not found"): + read_zip_member_limited(zf, "extension.yml", error_type=_CustomZipError) + + +def test_safe_extract_zip_extracts_safe_archive(tmp_path): + zip_path = tmp_path / "ok.zip" + out_dir = tmp_path / "out" + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr("nested/file.txt", "hello") + + safe_extract_zip(zip_path, out_dir) + + assert (out_dir / "nested" / "file.txt").read_text(encoding="utf-8") == "hello" diff --git a/tests/test_extensions.py b/tests/test_extensions.py index 1d05e1c2c4..b3cba75d95 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -10,7 +10,9 @@ """ import pytest +import io import json +import hashlib import os import platform import tempfile @@ -377,6 +379,43 @@ def test_invalid_command_name(self, temp_dir, valid_manifest_data): with pytest.raises(ValidationError, match="Invalid command name"): ExtensionManifest(manifest_path) + @pytest.mark.parametrize( + "bad_file", + [ + "../outside.md", + "/tmp/outside.md", + "commands/../../outside.md", + "C:\\Windows\\outside.md", + "C:outside.md", + ], + ) + def test_invalid_command_file_path(self, temp_dir, valid_manifest_data, bad_file): + """Command files must stay inside the extension package.""" + import yaml + + valid_manifest_data["provides"]["commands"][0]["file"] = bad_file + + manifest_path = temp_dir / "extension.yml" + with open(manifest_path, "w") as f: + yaml.dump(valid_manifest_data, f) + + with pytest.raises(ValidationError, match="Invalid command file path"): + ExtensionManifest(manifest_path) + + def test_windows_command_file_path_is_normalized(self, temp_dir, valid_manifest_data): + """Windows-authored manifests keep compatibility without traversal.""" + import yaml + + valid_manifest_data["provides"]["commands"][0]["file"] = "commands\\hello.md" + + manifest_path = temp_dir / "extension.yml" + with open(manifest_path, "w") as f: + yaml.dump(valid_manifest_data, f) + + manifest = ExtensionManifest(manifest_path) + + assert manifest.commands[0]["file"] == "commands/hello.md" + def test_command_name_autocorrect_speckit_prefix(self, temp_dir, valid_manifest_data): """Test that 'speckit.command' is auto-corrected to 'speckit.{ext_id}.command'.""" import yaml @@ -2420,7 +2459,8 @@ def test_unregister_skill_removes_parent_directory(self, project_dir, temp_dir): registrar = CommandRegistrar() from specify_cli.extensions import ExtensionManifest manifest = ExtensionManifest(ext_dir / "extension.yml") - registrar.register_commands_for_agent("codex", manifest, ext_dir, project_dir) + registered = registrar.register_commands_for_agent("codex", manifest, ext_dir, project_dir) + assert registered == ["speckit.cleanup-ext.run"] skill_subdir = skills_dir / "speckit-cleanup-ext-run" assert skill_subdir.exists(), "Skill subdirectory should exist after registration" @@ -3125,7 +3165,7 @@ def test_fetch_single_catalog_sends_auth_header(self, temp_dir, monkeypatch): catalog_data = {"schema_version": "1.0", "extensions": {}} mock_response = MagicMock() - mock_response.read.return_value = json.dumps(catalog_data).encode() + mock_response.read.side_effect = io.BytesIO(json.dumps(catalog_data).encode()).read mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) mock_response.geturl.return_value = "https://raw.githubusercontent.com/org/repo/main/catalog.json" @@ -3181,7 +3221,7 @@ def test_fetch_single_catalog_rejects_malformed_payload(self, temp_dir, payload) catalog = self._make_catalog(temp_dir) mock_response = MagicMock() - mock_response.read.return_value = json.dumps(payload).encode() + mock_response.read.side_effect = io.BytesIO(json.dumps(payload).encode()).read mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) @@ -3249,7 +3289,7 @@ def test_fetch_single_catalog_rejects_malformed_cached_payload( "extensions": {"foo": {"name": "Foo", "version": "1.0.0"}}, } mock_response = MagicMock() - mock_response.read.return_value = json.dumps(valid).encode() + mock_response.read.side_effect = io.BytesIO(json.dumps(valid).encode()).read mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) @@ -3296,7 +3336,7 @@ def test_fetch_catalog_rejects_malformed_payload(self, temp_dir, payload): catalog = self._make_catalog(temp_dir) mock_response = MagicMock() - mock_response.read.return_value = json.dumps(payload).encode() + mock_response.read.side_effect = io.BytesIO(json.dumps(payload).encode()).read mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) @@ -3336,7 +3376,7 @@ def test_fetch_catalog_recovers_from_unreadable_cache(self, temp_dir): "extensions": {"foo": {"name": "Foo", "version": "1.0.0"}}, } mock_response = MagicMock() - mock_response.read.return_value = json.dumps(valid).encode() + mock_response.read.side_effect = io.BytesIO(json.dumps(valid).encode()).read mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) @@ -3374,7 +3414,7 @@ def test_fetch_catalog_recovers_from_unreadable_metadata(self, temp_dir): "extensions": {"foo": {"name": "Foo", "version": "1.0.0"}}, } mock_response = MagicMock() - mock_response.read.return_value = json.dumps(valid).encode() + mock_response.read.side_effect = io.BytesIO(json.dumps(valid).encode()).read mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) @@ -3448,7 +3488,7 @@ def test_fetch_catalog_writes_cache_as_utf8(self, temp_dir, monkeypatch): "extensions": {"foo": {"name": "Foo", "version": "1.0.0"}}, } mock_response = MagicMock() - mock_response.read.return_value = json.dumps(payload).encode("utf-8") + mock_response.read.side_effect = io.BytesIO(json.dumps(payload).encode("utf-8")).read mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) @@ -3498,10 +3538,15 @@ def test_fetch_catalog_survives_unwritable_cache(self, temp_dir, monkeypatch): "schema_version": "1.0", "extensions": {"foo": {"name": "Foo", "version": "1.0.0"}}, } - mock_response = MagicMock() - mock_response.read.return_value = json.dumps(valid).encode() - mock_response.__enter__ = lambda s: s - mock_response.__exit__ = MagicMock(return_value=False) + # Two fetches below: build a fresh response (and stream) per + # _open_url call so the bounded read doesn't see an exhausted + # BytesIO on the second one. + def make_response(*args, **kwargs): + mock_response = MagicMock() + mock_response.read.side_effect = io.BytesIO(json.dumps(valid).encode()).read + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + return mock_response # Simulate an unwritable cache dir: every write_text under the # cache directory raises PermissionError (an OSError subclass). @@ -3514,7 +3559,7 @@ def failing_write_text(self, data, *args, **kwargs): monkeypatch.setattr(_PathCls, "write_text", failing_write_text) - with patch.object(catalog, "_open_url", return_value=mock_response): + with patch.object(catalog, "_open_url", side_effect=make_response): # Legacy single-catalog path. assert catalog.fetch_catalog(force_refresh=True) == valid @@ -3550,7 +3595,7 @@ def test_get_merged_extensions_skips_non_mapping_entries(self, temp_dir): }, } mock_response = MagicMock() - mock_response.read.return_value = json.dumps(payload).encode() + mock_response.read.side_effect = io.BytesIO(json.dumps(payload).encode()).read mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) @@ -3569,11 +3614,51 @@ def test_get_merged_extensions_skips_non_mapping_entries(self, temp_dir): # silently dropped rather than raising or crashing. assert [ext["id"] for ext in merged] == ["good"] + def test_fetch_single_catalog_uses_bounded_read(self, temp_dir): + """Catalog JSON responses must use the shared bounded-read helper.""" + from unittest.mock import patch, MagicMock + + catalog = self._make_catalog(temp_dir) + mock_response = MagicMock() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + entry = CatalogEntry( + url="https://example.com/catalog.json", + name="custom", + priority=1, + install_allowed=True, + ) + + with patch.object(catalog, "_open_url", return_value=mock_response), \ + patch( + "specify_cli.extensions.read_response_limited", + side_effect=ExtensionError("catalog too large"), + ): + with pytest.raises(ExtensionError, match="catalog too large"): + catalog._fetch_single_catalog(entry, force_refresh=True) + + def test_fetch_catalog_uses_bounded_read(self, temp_dir): + """The legacy single-catalog path must also bound catalog JSON reads.""" + from unittest.mock import patch, MagicMock + + catalog = self._make_catalog(temp_dir) + mock_response = MagicMock() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + + with patch.object(catalog, "get_catalog_url", return_value="https://example.com/catalog.json"), \ + patch.object(catalog, "_open_url", return_value=mock_response), \ + patch( + "specify_cli.extensions.read_response_limited", + side_effect=ExtensionError("catalog too large"), + ): + with pytest.raises(ExtensionError, match="catalog too large"): + catalog.fetch_catalog(force_refresh=True) + def test_download_extension_sends_auth_header(self, temp_dir, monkeypatch): """download_extension passes Authorization header when a provider is configured.""" from unittest.mock import patch, MagicMock import zipfile - import io monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken") self._inject_github_config(monkeypatch, token_env="GITHUB_TOKEN") @@ -3586,7 +3671,7 @@ def test_download_extension_sends_auth_header(self, temp_dir, monkeypatch): zip_bytes = zip_buf.getvalue() release_response = MagicMock() - release_response.read.return_value = json.dumps( + release_bytes = json.dumps( { "assets": [ { @@ -3596,11 +3681,12 @@ def test_download_extension_sends_auth_header(self, temp_dir, monkeypatch): ] } ).encode() + release_response.read.side_effect = io.BytesIO(release_bytes).read release_response.__enter__ = lambda s: s release_response.__exit__ = MagicMock(return_value=False) asset_response = MagicMock() - asset_response.read.return_value = zip_bytes + asset_response.read.side_effect = io.BytesIO(zip_bytes).read asset_response.__enter__ = lambda s: s asset_response.__exit__ = MagicMock(return_value=False) @@ -3636,7 +3722,6 @@ def test_download_extension_accepts_direct_github_rest_asset_url(self, temp_dir, """download_extension can use a GitHub REST release asset URL directly.""" from unittest.mock import patch, MagicMock import zipfile - import io monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken") self._inject_github_config(monkeypatch, token_env="GITHUB_TOKEN") @@ -3648,7 +3733,7 @@ def test_download_extension_accepts_direct_github_rest_asset_url(self, temp_dir, zip_bytes = zip_buf.getvalue() asset_response = MagicMock() - asset_response.read.return_value = zip_bytes + asset_response.read.side_effect = io.BytesIO(zip_bytes).read asset_response.__enter__ = lambda s: s asset_response.__exit__ = MagicMock(return_value=False) @@ -3677,6 +3762,52 @@ def fake_open(req, timeout=None): assert captured[0].get_header("Authorization") == "Bearer ghp_testtoken" assert captured[0].get_header("Accept") == "application/octet-stream" + def test_download_extension_verifies_sha256(self, temp_dir): + """Catalog-provided checksums are enforced when present.""" + from unittest.mock import patch, MagicMock + + catalog = self._make_catalog(temp_dir) + zip_bytes = b"fake zip data" + mock_response = MagicMock() + mock_response.read.side_effect = io.BytesIO(zip_bytes).read + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + ext_info = { + "id": "test-ext", + "name": "Test Extension", + "version": "1.0.0", + "download_url": "https://example.com/test-ext.zip", + "sha256": hashlib.sha256(zip_bytes).hexdigest(), + } + + with patch.object(catalog, "get_extension_info", return_value=ext_info), \ + patch.object(catalog, "_open_url", return_value=mock_response): + result = catalog.download_extension("test-ext", target_dir=temp_dir) + + assert result.read_bytes() == zip_bytes + + def test_download_extension_rejects_sha256_mismatch(self, temp_dir): + """A mismatched catalog checksum stops the downloaded ZIP being used.""" + from unittest.mock import patch, MagicMock + + catalog = self._make_catalog(temp_dir) + mock_response = MagicMock() + mock_response.read.side_effect = io.BytesIO(b"fake zip data").read + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + ext_info = { + "id": "test-ext", + "name": "Test Extension", + "version": "1.0.0", + "download_url": "https://example.com/test-ext.zip", + "sha256": "0" * 64, + } + + with patch.object(catalog, "get_extension_info", return_value=ext_info), \ + patch.object(catalog, "_open_url", return_value=mock_response): + with pytest.raises(ExtensionError, match="checksum mismatch"): + catalog.download_extension("test-ext", target_dir=temp_dir) + # ===== CatalogEntry Tests ===== @@ -4850,7 +4981,6 @@ def test_download_extension_raises_for_bundled(self, temp_dir): def test_download_extension_allows_bundled_with_url(self, temp_dir): """download_extension should allow bundled extensions that have a download_url (newer version).""" from unittest.mock import patch, MagicMock - import urllib.request project_dir = temp_dir / "project" project_dir.mkdir() @@ -4868,12 +4998,12 @@ def test_download_extension_allows_bundled_with_url(self, temp_dir): } mock_response = MagicMock() - mock_response.read.return_value = b"fake zip data" + mock_response.read.side_effect = io.BytesIO(b"fake zip data").read mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) with patch.object(catalog, "get_extension_info", return_value=bundled_with_url), \ - patch.object(urllib.request, "urlopen", return_value=mock_response): + patch.object(catalog, "_open_url", return_value=mock_response): result = catalog.download_extension("git") assert result.name == "git-2.0.0.zip" diff --git a/tests/test_github_http.py b/tests/test_github_http.py index f513680796..de94f1ee77 100644 --- a/tests/test_github_http.py +++ b/tests/test_github_http.py @@ -1,5 +1,6 @@ """Tests for GitHub-authenticated HTTP request helpers.""" +import io import json import os from contextlib import contextmanager @@ -90,7 +91,10 @@ def _make_open_url_fn(self, release_json): @contextmanager def fake_open(url, timeout=None, extra_headers=None): resp = MagicMock() - resp.read.return_value = json.dumps(release_json).encode() + # side_effect (not return_value) so read() terminates at EOF — + # resolve_github_release_asset_api_url reads via the bounded + # read_response_limited helper, which loops until b"". + resp.read.side_effect = io.BytesIO(json.dumps(release_json).encode()).read yield resp return fake_open @@ -160,7 +164,7 @@ def test_tag_with_special_characters_is_url_encoded(self): def capturing_open(url, timeout=None, extra_headers=None): captured_urls.append(url) resp = MagicMock() - resp.read.return_value = json.dumps({"assets": []}).encode() + resp.read.side_effect = io.BytesIO(json.dumps({"assets": []}).encode()).read yield resp resolve_github_release_asset_api_url( @@ -179,7 +183,7 @@ def test_tag_with_hash_is_url_encoded(self): def capturing_open(url, timeout=None, extra_headers=None): captured_urls.append(url) resp = MagicMock() - resp.read.return_value = json.dumps({"assets": []}).encode() + resp.read.side_effect = io.BytesIO(json.dumps({"assets": []}).encode()).read yield resp resolve_github_release_asset_api_url( @@ -187,4 +191,4 @@ def capturing_open(url, timeout=None, extra_headers=None): capturing_open, ) assert len(captured_urls) == 1 - assert "releases/tags/v1%23beta" in captured_urls[0] \ No newline at end of file + assert "releases/tags/v1%23beta" in captured_urls[0] diff --git a/tests/test_github_workflows.py b/tests/test_github_workflows.py new file mode 100644 index 0000000000..7ad0b714ec --- /dev/null +++ b/tests/test_github_workflows.py @@ -0,0 +1,34 @@ +"""Static checks for repository GitHub Actions workflows.""" + +from __future__ import annotations + +import re +from pathlib import Path + + +REPO_ROOT = Path(__file__).resolve().parent.parent +WORKFLOWS_DIR = REPO_ROOT / ".github" / "workflows" +# Match both the dedicated-step form (` uses: x@sha`) and the +# inline shorthand (` - uses: x@sha`) used in catalog-assign.yml. +USES_RE = re.compile(r"^\s*(?:-\s*)?uses:\s*(?P\S+)", re.MULTILINE) + + +def test_github_actions_are_pinned_to_full_commit_shas(): + unpinned_refs = [] + + workflows = sorted( + list(WORKFLOWS_DIR.glob("*.yml")) + list(WORKFLOWS_DIR.glob("*.yaml")) + ) + assert workflows + + for workflow in workflows: + workflow_text = workflow.read_text(encoding="utf-8") + for match in USES_RE.finditer(workflow_text): + uses_ref = match.group("ref") + if uses_ref.startswith(("./", "../")): + continue + if re.search(r"@[0-9a-f]{40}$", uses_ref): + continue + unpinned_refs.append(f"{workflow.relative_to(REPO_ROOT)}: {uses_ref}") + + assert unpinned_refs == [] diff --git a/tests/test_presets.py b/tests/test_presets.py index 92add1103e..b648455bcb 100644 --- a/tests/test_presets.py +++ b/tests/test_presets.py @@ -11,7 +11,9 @@ """ import pytest +import io import json +import hashlib import tempfile import shutil import warnings @@ -289,6 +291,39 @@ def test_invalid_template_name_format(self, temp_dir, valid_pack_data): with pytest.raises(PresetValidationError, match="Invalid template name"): PresetManifest(manifest_path) + @pytest.mark.parametrize( + "bad_file", + [ + "../outside.md", + "/tmp/outside.md", + "templates/../../outside.md", + "C:\\Windows\\outside.md", + "C:outside.md", + ], + ) + def test_invalid_template_file_path(self, temp_dir, valid_pack_data, bad_file): + """Template files must stay inside the preset package.""" + valid_pack_data["provides"]["templates"][0]["file"] = bad_file + manifest_path = temp_dir / "preset.yml" + with open(manifest_path, "w") as f: + yaml.dump(valid_pack_data, f) + + with pytest.raises(PresetValidationError, match="Invalid template file path"): + PresetManifest(manifest_path) + + def test_windows_template_file_path_is_normalized(self, temp_dir, valid_pack_data): + """Windows-authored manifests keep compatibility without traversal.""" + valid_pack_data["provides"]["templates"][0]["file"] = ( + "templates\\spec-template.md" + ) + manifest_path = temp_dir / "preset.yml" + with open(manifest_path, "w") as f: + yaml.dump(valid_pack_data, f) + + manifest = PresetManifest(manifest_path) + + assert manifest.templates[0]["file"] == "templates/spec-template.md" + def test_get_hash(self, pack_dir): """Test manifest hash calculation.""" manifest = PresetManifest(pack_dir / "preset.yml") @@ -1488,7 +1523,7 @@ def test_fetch_single_catalog_sends_auth_header(self, project_dir, monkeypatch): catalog_data = {"schema_version": "1.0", "presets": {}} mock_response = MagicMock() - mock_response.read.return_value = json.dumps(catalog_data).encode() + mock_response.read.side_effect = io.BytesIO(json.dumps(catalog_data).encode()).read mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) mock_response.geturl.return_value = "https://raw.githubusercontent.com/org/repo/main/presets/catalog.json" @@ -1544,7 +1579,7 @@ def test_fetch_single_catalog_rejects_malformed_payload(self, project_dir, paylo catalog = PresetCatalog(project_dir) mock_response = MagicMock() - mock_response.read.return_value = json.dumps(payload).encode() + mock_response.read.side_effect = io.BytesIO(json.dumps(payload).encode()).read mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) @@ -1613,7 +1648,7 @@ def test_fetch_single_catalog_rejects_malformed_cached_payload( "presets": {"foo": {"name": "Foo", "version": "1.0.0"}}, } mock_response = MagicMock() - mock_response.read.return_value = json.dumps(valid).encode() + mock_response.read.side_effect = io.BytesIO(json.dumps(valid).encode()).read mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) @@ -1660,7 +1695,7 @@ def test_fetch_catalog_rejects_malformed_payload(self, project_dir, payload): catalog = PresetCatalog(project_dir) mock_response = MagicMock() - mock_response.read.return_value = json.dumps(payload).encode() + mock_response.read.side_effect = io.BytesIO(json.dumps(payload).encode()).read mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) @@ -1701,7 +1736,7 @@ def test_fetch_catalog_recovers_from_unreadable_cache(self, project_dir): "presets": {"foo": {"name": "Foo", "version": "1.0.0"}}, } mock_response = MagicMock() - mock_response.read.return_value = json.dumps(valid).encode() + mock_response.read.side_effect = io.BytesIO(json.dumps(valid).encode()).read mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) @@ -1739,7 +1774,7 @@ def test_fetch_catalog_recovers_from_unreadable_metadata(self, project_dir): "presets": {"foo": {"name": "Foo", "version": "1.0.0"}}, } mock_response = MagicMock() - mock_response.read.return_value = json.dumps(valid).encode() + mock_response.read.side_effect = io.BytesIO(json.dumps(valid).encode()).read mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) @@ -1809,7 +1844,7 @@ def test_fetch_catalog_writes_cache_as_utf8(self, project_dir, monkeypatch): "presets": {"foo": {"name": "Foo", "version": "1.0.0"}}, } mock_response = MagicMock() - mock_response.read.return_value = json.dumps(payload).encode("utf-8") + mock_response.read.side_effect = io.BytesIO(json.dumps(payload).encode("utf-8")).read mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) @@ -1857,10 +1892,15 @@ def test_fetch_catalog_survives_unwritable_cache(self, project_dir, monkeypatch) "schema_version": "1.0", "presets": {"foo": {"name": "Foo", "version": "1.0.0"}}, } - mock_response = MagicMock() - mock_response.read.return_value = json.dumps(valid).encode() - mock_response.__enter__ = lambda s: s - mock_response.__exit__ = MagicMock(return_value=False) + # Two fetches below: build a fresh response (and stream) per + # _open_url call so the bounded read doesn't see an exhausted + # BytesIO on the second one. + def make_response(*args, **kwargs): + mock_response = MagicMock() + mock_response.read.side_effect = io.BytesIO(json.dumps(valid).encode()).read + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + return mock_response # Simulate an unwritable cache dir: every write_text under the # cache directory raises PermissionError (an OSError subclass). @@ -1873,7 +1913,7 @@ def failing_write_text(self, data, *args, **kwargs): monkeypatch.setattr(_PathCls, "write_text", failing_write_text) - with patch.object(catalog, "_open_url", return_value=mock_response): + with patch.object(catalog, "_open_url", side_effect=make_response): # Legacy single-catalog path. assert catalog.fetch_catalog(force_refresh=True) == valid @@ -1910,7 +1950,7 @@ def test_get_merged_packs_skips_non_mapping_entries(self, project_dir): }, } mock_response = MagicMock() - mock_response.read.return_value = json.dumps(payload).encode() + mock_response.read.side_effect = io.BytesIO(json.dumps(payload).encode()).read mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) @@ -1944,21 +1984,23 @@ def test_download_pack_sends_auth_header(self, project_dir, monkeypatch): zip_bytes = zip_buf.getvalue() release_response = MagicMock() - release_response.read.return_value = json.dumps( - { - "assets": [ - { - "name": "test-pack.zip", - "url": "https://api.github.com/repos/org/repo/releases/assets/1", - } - ] - } - ).encode() + release_response.read.side_effect = io.BytesIO( + json.dumps( + { + "assets": [ + { + "name": "test-pack.zip", + "url": "https://api.github.com/repos/org/repo/releases/assets/1", + } + ] + } + ).encode() + ).read release_response.__enter__ = lambda s: s release_response.__exit__ = MagicMock(return_value=False) asset_response = MagicMock() - asset_response.read.return_value = zip_bytes + asset_response.read.side_effect = io.BytesIO(zip_bytes).read asset_response.__enter__ = lambda s: s asset_response.__exit__ = MagicMock(return_value=False) @@ -2006,7 +2048,7 @@ def test_download_pack_accepts_direct_github_rest_asset_url(self, project_dir, m zip_bytes = zip_buf.getvalue() asset_response = MagicMock() - asset_response.read.return_value = zip_bytes + asset_response.read.side_effect = io.BytesIO(zip_bytes).read asset_response.__enter__ = lambda s: s asset_response.__exit__ = MagicMock(return_value=False) @@ -2036,6 +2078,95 @@ def fake_open(req, timeout=None): assert captured[0].get_header("Authorization") == "Bearer ghp_testtoken" assert captured[0].get_header("Accept") == "application/octet-stream" + def test_fetch_single_catalog_uses_bounded_read(self, project_dir): + """Catalog JSON responses must use the shared bounded-read helper.""" + from unittest.mock import patch, MagicMock + + catalog = PresetCatalog(project_dir) + mock_response = MagicMock() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + entry = PresetCatalogEntry( + url="https://example.com/catalog.json", + name="custom", + priority=1, + install_allowed=True, + ) + + with patch.object(catalog, "_open_url", return_value=mock_response), \ + patch( + "specify_cli.presets.read_response_limited", + side_effect=PresetError("catalog too large"), + ): + with pytest.raises(PresetError, match="catalog too large"): + catalog._fetch_single_catalog(entry, force_refresh=True) + + def test_fetch_catalog_uses_bounded_read(self, project_dir): + """The legacy single-catalog path must also bound catalog JSON reads.""" + from unittest.mock import patch, MagicMock + + catalog = PresetCatalog(project_dir) + mock_response = MagicMock() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + + with patch.object(catalog, "get_catalog_url", return_value="https://example.com/catalog.json"), \ + patch.object(catalog, "_open_url", return_value=mock_response), \ + patch( + "specify_cli.presets.read_response_limited", + side_effect=PresetError("catalog too large"), + ): + with pytest.raises(PresetError, match="catalog too large"): + catalog.fetch_catalog(force_refresh=True) + + def test_download_pack_verifies_sha256(self, project_dir): + """Catalog-provided checksums are enforced when present.""" + from unittest.mock import patch, MagicMock + + catalog = PresetCatalog(project_dir) + zip_bytes = b"fake zip data" + mock_response = MagicMock() + mock_response.read.side_effect = io.BytesIO(zip_bytes).read + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + pack_info = { + "id": "test-pack", + "name": "Test Pack", + "version": "1.0.0", + "download_url": "https://example.com/test-pack.zip", + "sha256": hashlib.sha256(zip_bytes).hexdigest(), + "_install_allowed": True, + } + + with patch.object(catalog, "get_pack_info", return_value=pack_info), \ + patch.object(catalog, "_open_url", return_value=mock_response): + result = catalog.download_pack("test-pack", target_dir=project_dir) + + assert result.read_bytes() == zip_bytes + + def test_download_pack_rejects_sha256_mismatch(self, project_dir): + """A mismatched catalog checksum stops the downloaded ZIP being used.""" + from unittest.mock import patch, MagicMock + + catalog = PresetCatalog(project_dir) + mock_response = MagicMock() + mock_response.read.side_effect = io.BytesIO(b"fake zip data").read + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + pack_info = { + "id": "test-pack", + "name": "Test Pack", + "version": "1.0.0", + "download_url": "https://example.com/test-pack.zip", + "sha256": "0" * 64, + "_install_allowed": True, + } + + with patch.object(catalog, "get_pack_info", return_value=pack_info), \ + patch.object(catalog, "_open_url", return_value=mock_response): + with pytest.raises(PresetError, match="checksum mismatch"): + catalog.download_pack("test-pack", target_dir=project_dir) + # ===== Integration Tests ===== @@ -4335,10 +4466,12 @@ def test_preset_add_from_github_release_url_resolves_and_downloads(self, project class FakeResponse: def __init__(self, data): - self._data = data + # BytesIO so read(size) terminates at EOF — downloads go through + # the bounded read_response_limited helper, which loops to b"". + self._stream = io.BytesIO(data) - def read(self): - return self._data + def read(self, *args): + return self._stream.read(*args) def __enter__(self): return self @@ -4346,7 +4479,7 @@ def __enter__(self): def __exit__(self, *a): return False - def fake_open_url(url, timeout=None, extra_headers=None): + def fake_open_url(url, timeout=None, extra_headers=None, **kwargs): captured_urls.append((url, extra_headers)) if "releases/tags/" in url: return FakeResponse(json.dumps({ @@ -4393,10 +4526,12 @@ def test_preset_add_from_direct_api_asset_url_passes_through(self, project_dir): class FakeResponse: def __init__(self, data): - self._data = data + # BytesIO so read(size) terminates at EOF — downloads go through + # the bounded read_response_limited helper, which loops to b"". + self._stream = io.BytesIO(data) - def read(self): - return self._data + def read(self, *args): + return self._stream.read(*args) def __enter__(self): return self @@ -4404,7 +4539,7 @@ def __enter__(self): def __exit__(self, *a): return False - def fake_open_url(url, timeout=None, extra_headers=None): + def fake_open_url(url, timeout=None, extra_headers=None, **kwargs): captured_urls.append((url, extra_headers)) return FakeResponse(zip_bytes) diff --git a/tests/test_registrar_path_traversal.py b/tests/test_registrar_path_traversal.py index fc423b4056..006daa89e8 100644 --- a/tests/test_registrar_path_traversal.py +++ b/tests/test_registrar_path_traversal.py @@ -121,6 +121,31 @@ def test_copilot_rejects_traversal_in_alias(self, tmp_path, bad_alias): _assert_no_stray_files(tmp_path, Path(bad_alias).name.replace("/", "")) +class TestSourceFileTraversal: + """Command source files must stay inside the declared source directory.""" + + @pytest.mark.parametrize("bad_file", TRAVERSAL_PAYLOADS) + def test_rejects_traversal_in_command_source_file(self, tmp_path, bad_file): + project, ext_dir = _project_and_source(tmp_path) + (project / ".gemini" / "commands").mkdir(parents=True) + + registrar = CommandRegistrar() + with pytest.raises(ValueError, match="escapes directory"): + registrar.register_commands( + "gemini", + [ + { + "name": "speckit.myext.ok", + "file": bad_file, + "aliases": [], + } + ], + "myext", + ext_dir, + project, + ) + + class TestCopilotPromptTraversal: """`write_copilot_prompt` is a public static method — guard it directly.""" diff --git a/tests/test_security_workflow.py b/tests/test_security_workflow.py new file mode 100644 index 0000000000..b4e052da33 --- /dev/null +++ b/tests/test_security_workflow.py @@ -0,0 +1,465 @@ +"""Static checks for the GitHub Actions security workflow.""" + +from __future__ import annotations + +import inspect +import importlib.util +import json +import re +import subprocess +from pathlib import Path + +import pytest +import yaml + + +REPO_ROOT = Path(__file__).resolve().parent.parent +SECURITY_WORKFLOW = REPO_ROOT / ".github" / "workflows" / "security.yml" +CONTRIBUTING = REPO_ROOT / "CONTRIBUTING.md" +BANDIT_BASELINE = REPO_ROOT / ".github" / "bandit-baseline.json" +SECURITY_REQUIREMENTS = REPO_ROOT / ".github" / "security-audit-requirements.txt" +SECURITY_REQUIREMENTS_SYNC_SCRIPT = ( + REPO_ROOT / ".github" / "scripts" / "check_security_requirements.py" +) + +WORKFLOW_LIVE_AUDIT_REQUIREMENTS = '"${{ runner.temp }}/spec-kit-audit-requirements.txt"' +COMMITTED_AUDIT_REQUIREMENTS = ".github/security-audit-requirements.txt" +WORKFLOW_COMPILE_SCHEDULED_TEST_EXTRA_DEPS = ( + "uv pip compile pyproject.toml --extra test " + '--python-version "${{ matrix.python-version }}" --upgrade --generate-hashes --quiet ' + f"--output-file {WORKFLOW_LIVE_AUDIT_REQUIREMENTS}" +) +LOCAL_REFRESH_TEST_EXTRA_DEPS = ( + "uv pip compile pyproject.toml --extra test --universal --upgrade --generate-hashes " + f"--quiet --no-header --output-file {COMMITTED_AUDIT_REQUIREMENTS}" +) +WORKFLOW_SYNC_COMPILE_TEST_EXTRA_DEPS = ( + "uv pip compile pyproject.toml --extra test --universal --upgrade --generate-hashes " + "--quiet --no-header --output-file" +) +WORKFLOW_SYNC_SCRIPT = "python .github/scripts/check_security_requirements.py" +WORKFLOW_LIVE_PIP_AUDIT = ( + "uvx --from pip-audit==2.10.0 pip-audit --disable-pip --require-hashes " + f"-r {WORKFLOW_LIVE_AUDIT_REQUIREMENTS} --progress-spinner off" +) +LOCAL_PIP_AUDIT = ( + "uvx --from pip-audit==2.10.0 pip-audit --disable-pip --require-hashes " + f"-r {COMMITTED_AUDIT_REQUIREMENTS} --progress-spinner off" +) +BANDIT = ( + "uvx --from bandit==1.9.4 bandit -r src -lll " + "--baseline .github/bandit-baseline.json" +) + + +def _load_security_workflow() -> dict: + return yaml.safe_load(SECURITY_WORKFLOW.read_text(encoding="utf-8")) + + +def _workflow_triggers() -> dict: + workflow = _load_security_workflow() + return workflow.get("on") or workflow[True] + + +def _step(job_name: str, step_name: str) -> dict: + workflow = _load_security_workflow() + for step in workflow["jobs"][job_name]["steps"]: + if step.get("name") == step_name: + return step + raise AssertionError(f"Step {step_name!r} not found in job {job_name!r}.") + + +def _find_step_by_run_signature(job_name: str, marker: str) -> dict: + """Locate a step in *job_name* whose ``run`` command contains *marker*. + + Step naming is incidental to behavior; tests that assert on what a + step *does* should look it up by what it runs, not by its label, so + renames don't silently make the assertion skip. + """ + workflow = _load_security_workflow() + matches = [ + step + for step in workflow["jobs"][job_name]["steps"] + if marker in (step.get("run") or "") + ] + if not matches: + raise AssertionError( + f"No step in job {job_name!r} runs a command containing {marker!r}." + ) + if len(matches) > 1: + raise AssertionError( + f"Marker {marker!r} matched {len(matches)} steps in job " + f"{job_name!r}; expected exactly one." + ) + return matches[0] + + +def _load_sync_script(): + spec = importlib.util.spec_from_file_location( + "check_security_requirements", + SECURITY_REQUIREMENTS_SYNC_SCRIPT, + ) + assert spec is not None + assert spec.loader is not None + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +class TestSecurityWorkflow: + """Guard the security workflow against review-feedback regressions.""" + + def test_dependency_audit_uses_committed_requirements_for_prs_and_pushes(self): + scheduled_compile = _step( + "dependency-audit", + "Compile scheduled audit requirements", + ) + scheduled_audit = _step( + "dependency-audit", + "Run pip-audit (scheduled live resolution)", + ) + committed_audit = _step( + "dependency-audit", + "Run pip-audit (committed requirements)", + ) + sync_check = _step( + "dependency-audit", + "Check committed audit requirements are current", + ) + + assert scheduled_compile["if"] == "${{ github.event_name == 'schedule' }}" + assert WORKFLOW_COMPILE_SCHEDULED_TEST_EXTRA_DEPS in scheduled_compile["run"] + assert scheduled_audit["if"] == "${{ github.event_name == 'schedule' }}" + assert scheduled_audit["run"] == WORKFLOW_LIVE_PIP_AUDIT + assert sync_check["if"] == "${{ github.event_name != 'schedule' }}" + assert sync_check["env"]["DEPENDENCY_DIFF_BASE"] == ( + "${{ github.event.pull_request.base.sha || github.event.before || '' }}" + ) + assert sync_check["env"]["DEPENDENCY_DIFF_HEAD"] == "${{ github.sha }}" + assert sync_check["run"] == WORKFLOW_SYNC_SCRIPT + assert committed_audit["if"] == "${{ github.event_name != 'schedule' }}" + assert committed_audit["run"] == LOCAL_PIP_AUDIT + + dependency_job_text = "\n".join( + step.get("run", "") + for step in _load_security_workflow()["jobs"]["dependency-audit"]["steps"] + ) + dependency_protection_text = ( + dependency_job_text + + "\n" + + SECURITY_REQUIREMENTS_SYNC_SCRIPT.read_text(encoding="utf-8") + ) + assert "--generate-hashes" in dependency_protection_text + assert "--no-header" in dependency_protection_text + assert "--require-hashes" in dependency_protection_text + assert "--disable-pip" in dependency_protection_text + assert WORKFLOW_LIVE_AUDIT_REQUIREMENTS in dependency_job_text + assert COMMITTED_AUDIT_REQUIREMENTS in dependency_protection_text + assert "uv export" not in dependency_protection_text + assert "--frozen" not in dependency_protection_text + assert "--locked" not in dependency_protection_text + assert "uv.lock" not in dependency_protection_text + assert "/tmp/" not in dependency_protection_text + assert "uvx pip-audit ." not in dependency_protection_text + + def test_dependency_audit_checkout_fetches_previous_commit(self): + checkout = _step("dependency-audit", "Checkout") + + assert checkout["with"]["fetch-depth"] == 2 + + def test_security_workflow_triggers_are_preserved(self): + triggers = _workflow_triggers() + + assert triggers["push"]["branches"] == ["main"] + # labeled/unlabeled so the baseline-growth gates re-evaluate when the + # acknowledgement label is toggled, without requiring a new push. + assert triggers["pull_request"] == { + "types": ["opened", "synchronize", "reopened", "labeled", "unlabeled"] + } + assert triggers["workflow_dispatch"] is None + assert triggers["schedule"] == [{"cron": "17 4 * * 1"}] + + def test_dependency_audit_runs_supported_python_os_matrix(self): + workflow = _load_security_workflow() + matrix = workflow["jobs"]["dependency-audit"]["strategy"]["matrix"] + + assert matrix["os"] == ["ubuntu-latest", "windows-latest"] + assert matrix["python-version"] == ["3.11", "3.12", "3.13"] + assert workflow["jobs"]["dependency-audit"]["runs-on"] == "${{ matrix.os }}" + + def test_security_tools_are_pinned(self): + workflow_text = SECURITY_WORKFLOW.read_text(encoding="utf-8") + + assert WORKFLOW_LIVE_PIP_AUDIT in workflow_text + assert LOCAL_PIP_AUDIT in workflow_text + assert BANDIT in workflow_text + assert re.search(r"\buvx\s+pip-audit\b", workflow_text) is None + assert re.search(r"\buvx\s+bandit\b", workflow_text) is None + + def test_actions_are_pinned_to_full_commit_shas(self): + workflow = _load_security_workflow() + uses_refs = [ + step["uses"] + for job in workflow["jobs"].values() + for step in job["steps"] + if "uses" in step + ] + + assert uses_refs + for uses_ref in uses_refs: + assert re.search(r"@[0-9a-f]{40}$", uses_ref), uses_ref + assert re.search(r"@v\d+", uses_ref) is None + + def test_bandit_does_not_globally_skip_b602(self): + # Identify the blocking bandit step by its severity-level arg (-lll + # → HIGH only; the informational MEDIUM pass uses -ll). Doing this + # by behavior signature rather than step name keeps the test robust + # to renames while remaining unambiguous now that both passes share + # the baseline argument. + bandit_step = _find_step_by_run_signature("static-analysis", "-r src -lll") + run = bandit_step["run"] + workflow_text = SECURITY_WORKFLOW.read_text(encoding="utf-8") + + assert run == BANDIT + assert "--skip" not in run + assert "--skip B602" not in workflow_text + + def test_bandit_baseline_tracks_only_accepted_findings(self): + baseline = json.loads(BANDIT_BASELINE.read_text(encoding="utf-8")) + results = baseline["results"] + + # Identify entries by (filename, test_id), not line number: unrelated + # edits shift lines and force a baseline regen, and the growth gate + # (check_bandit_baseline.py) already guards full identities. + assert { + (result["filename"], result["test_id"]) for result in results + } == { + ("src/specify_cli/authentication/http.py", "B310"), + ("src/specify_cli/workflows/steps/shell/__init__.py", "B602"), + } + assert {result["issue_severity"] for result in results} == {"MEDIUM", "HIGH"} + + def test_bandit_nosec_is_not_suppressed_in_source(self): + nosec_lines = [] + for path in (REPO_ROOT / "src").rglob("*.py"): + for line_number, line in enumerate( + path.read_text(encoding="utf-8").splitlines(), + start=1, + ): + if re.search(r"#\s*nosec\b", line, flags=re.IGNORECASE): + nosec_lines.append(f"{path.relative_to(REPO_ROOT)}:{line_number}") + + assert nosec_lines == [] + + def test_run_command_rejects_shell_execution_compatibly(self): + from specify_cli import run_command + + assert inspect.signature(run_command).parameters["shell"].default is False + with pytest.raises(ValueError, match="does not support shell=True"): + run_command(["echo", "blocked"], shell=True) # noqa: S604 + + def test_committed_audit_requirements_are_hashed(self): + requirements = SECURITY_REQUIREMENTS.read_text(encoding="utf-8") + + assert "--hash=sha256:" in requirements + assert not requirements.startswith("#") + assert "pytest==" in requirements + assert "pytest-cov==" in requirements + + def test_sync_script_skips_when_dependency_inputs_are_unchanged( + self, + monkeypatch, + capsys, + ): + sync_script = _load_sync_script() + + def fake_run(command, **kwargs): + assert command == [ + "git", + "diff", + "--name-only", + "HEAD^", + "HEAD", + "--", + "pyproject.toml", + ".github/security-audit-requirements.txt", + ] + assert kwargs["check"] is True + return subprocess.CompletedProcess(command, 0, stdout="", stderr="") + + monkeypatch.setattr(sync_script.subprocess, "run", fake_run) + + assert sync_script.main() == 0 + assert "sync check skipped" in capsys.readouterr().out + + def test_sync_script_uses_github_diff_refs_when_available( + self, + monkeypatch, + ): + sync_script = _load_sync_script() + monkeypatch.setenv("DEPENDENCY_DIFF_BASE", "abc123") + monkeypatch.setenv("DEPENDENCY_DIFF_HEAD", "def456") + + def fake_run(command, **_kwargs): + assert command == [ + "git", + "diff", + "--name-only", + "abc123", + "def456", + "--", + "pyproject.toml", + ".github/security-audit-requirements.txt", + ] + return subprocess.CompletedProcess(command, 0, stdout="", stderr="") + + monkeypatch.setattr(sync_script.subprocess, "run", fake_run) + + assert sync_script._dependency_inputs_changed() is False + + def test_sync_script_compiles_and_compares_when_dependency_inputs_changed( + self, + monkeypatch, + tmp_path, + ): + sync_script = _load_sync_script() + committed_requirements = tmp_path / ".github" / "security-audit-requirements.txt" + generated_requirements = tmp_path / "generated-requirements.txt" + committed_requirements.parent.mkdir() + committed_requirements.write_text("pytest==1\n", encoding="utf-8") + compile_commands = [] + + monkeypatch.setattr(sync_script, "REPO_ROOT", tmp_path) + monkeypatch.setattr(sync_script, "COMMITTED_REQUIREMENTS", committed_requirements) + monkeypatch.setenv("GENERATED_REQUIREMENTS", str(generated_requirements)) + + def fake_run(command, **kwargs): + if command[0] == "git": + return subprocess.CompletedProcess( + command, + 0, + stdout="pyproject.toml\n", + stderr="", + ) + + compile_commands.append(command) + assert kwargs["check"] is True + generated_requirements.write_text("pytest==1\n", encoding="utf-8") + return subprocess.CompletedProcess(command, 0) + + monkeypatch.setattr(sync_script.subprocess, "run", fake_run) + + assert sync_script.main() == 0 + assert len(compile_commands) == 1 + compile_command = " ".join(compile_commands[0]) + assert WORKFLOW_SYNC_COMPILE_TEST_EXTRA_DEPS in compile_command + assert "--output-file" in compile_commands[0] + assert str(generated_requirements) in compile_commands[0] + + def test_sync_script_fails_when_generated_requirements_differ( + self, + monkeypatch, + tmp_path, + capsys, + ): + sync_script = _load_sync_script() + committed_requirements = tmp_path / ".github" / "security-audit-requirements.txt" + generated_requirements = tmp_path / "generated-requirements.txt" + committed_requirements.parent.mkdir() + committed_requirements.write_text("pytest==1\n", encoding="utf-8") + + monkeypatch.setattr(sync_script, "REPO_ROOT", tmp_path) + monkeypatch.setattr(sync_script, "COMMITTED_REQUIREMENTS", committed_requirements) + monkeypatch.setenv("GENERATED_REQUIREMENTS", str(generated_requirements)) + + def fake_run(command, **_kwargs): + if command[0] == "git": + return subprocess.CompletedProcess( + command, + 0, + stdout="pyproject.toml\n", + stderr="", + ) + + generated_requirements.write_text("pytest==2\n", encoding="utf-8") + return subprocess.CompletedProcess(command, 0) + + monkeypatch.setattr(sync_script.subprocess, "run", fake_run) + + assert sync_script.main() == 1 + assert ( + "Regenerate .github/security-audit-requirements.txt" + in capsys.readouterr().err + ) + + def test_contributing_documents_security_commands(self): + contributing_text = CONTRIBUTING.read_text(encoding="utf-8") + + assert LOCAL_REFRESH_TEST_EXTRA_DEPS in contributing_text + assert LOCAL_PIP_AUDIT in contributing_text + assert BANDIT in contributing_text + assert "/tmp/" not in contributing_text + assert "uv export" not in contributing_text + assert "--frozen" not in contributing_text + assert "--locked" not in contributing_text + assert ( + re.search( + r"--output-file\s+spec-kit-audit-requirements\.txt\b", + contributing_text, + ) + is None + ) + assert ( + re.search(r"-r\s+spec-kit-audit-requirements\.txt\b", contributing_text) + is None + ) + + # ----------------------------------------------------------------- + # secret-scan job (parity coverage with dependency-audit / bandit) + # ----------------------------------------------------------------- + + def test_secret_scan_job_uses_detect_secrets_hook(self): + workflow = _load_security_workflow() + scan_step = _find_step_by_run_signature("secret-scan", "detect-secrets-hook") + run = scan_step["run"] + + # The hook is the right tool: it compares against the baseline + # and exits non-zero on new findings, without rewriting the file. + assert "uvx --from detect-secrets==1.5.0 detect-secrets-hook" in run + assert "--baseline .secrets.baseline" in run + # Auto-generated content must be excluded so it doesn't dominate the scan. + assert "':!:.secrets.baseline'" in run + assert "':!:uv.lock'" in run + assert "':!:.github/security-audit-requirements.txt'" in run + # Iteration over tracked files is via git ls-files (-z to handle weird names). + assert "git ls-files -z" in run + # secret-scan job is in fact wired into the workflow. + assert "secret-scan" in workflow["jobs"] + + def test_secret_scan_job_has_baseline_growth_gate(self): + gate_step = _find_step_by_run_signature( + "secret-scan", "check_secrets_baseline.py" + ) + # The gate runs only on pull_request events (label is meaningless otherwise). + assert gate_step["if"] == "${{ github.event_name == 'pull_request' }}" + env = gate_step["env"] + assert env["SECRETS_BASELINE_BASE"] == ( + "${{ github.event.pull_request.base.sha }}" + ) + assert env["SECRETS_BASELINE_LABELS"] == ( + "${{ join(github.event.pull_request.labels.*.name, ',') }}" + ) + # Head is read from the working tree (fail-closed); env var must NOT + # be passed (else a future caller might think the script honors it). + assert "SECRETS_BASELINE_HEAD" not in env + + def test_secret_scan_checkout_has_full_history(self): + # The growth gate uses `git show :` so it needs full history. + workflow = _load_security_workflow() + checkout_steps = [ + step + for step in workflow["jobs"]["secret-scan"]["steps"] + if "actions/checkout" in (step.get("uses") or "") + ] + assert len(checkout_steps) == 1 + assert checkout_steps[0]["with"]["fetch-depth"] == 0 diff --git a/tests/test_self_upgrade_detection.py b/tests/test_self_upgrade_detection.py index ab575e7435..73b55ebb79 100644 --- a/tests/test_self_upgrade_detection.py +++ b/tests/test_self_upgrade_detection.py @@ -13,6 +13,7 @@ from specify_cli import app from tests.self_upgrade_helpers import ( + route_opener_open_through_urlopen, # noqa: F401 (autouse fixture) _InstallMethod, _assemble_installer_argv, _completed_process, diff --git a/tests/test_self_upgrade_execution.py b/tests/test_self_upgrade_execution.py index 6696b4fc79..5c761014be 100644 --- a/tests/test_self_upgrade_execution.py +++ b/tests/test_self_upgrade_execution.py @@ -7,6 +7,7 @@ from specify_cli import app from tests.self_upgrade_helpers import ( + route_opener_open_through_urlopen, # noqa: F401 (autouse fixture) _completed_process, mock_urlopen_response, requires_posix, diff --git a/tests/test_self_upgrade_verification.py b/tests/test_self_upgrade_verification.py index f1a018f06c..c4e7eecf1b 100644 --- a/tests/test_self_upgrade_verification.py +++ b/tests/test_self_upgrade_verification.py @@ -8,6 +8,7 @@ from specify_cli import app from tests.self_upgrade_helpers import ( + route_opener_open_through_urlopen, # noqa: F401 (autouse fixture) SENTINEL_GH_TOKEN, SENTINEL_GITHUB_TOKEN, _InstallMethod, diff --git a/tests/test_upgrade.py b/tests/test_upgrade.py index 3ad8c84f62..b6f3134286 100644 --- a/tests/test_upgrade.py +++ b/tests/test_upgrade.py @@ -17,6 +17,7 @@ from typer.testing import CliRunner from specify_cli import app +from specify_cli._download_security import read_response_limited as _real_read_response_limited from specify_cli._version import ( _fetch_latest_release_tag, _get_installed_version, @@ -24,7 +25,10 @@ _normalize_tag, ) from tests.conftest import strip_ansi -from tests.http_helpers import mock_urlopen_response +from tests.http_helpers import ( + mock_urlopen_response, + route_opener_open_through_urlopen, # noqa: F401 (autouse fixture) +) runner = CliRunner() @@ -235,6 +239,46 @@ def test_generic_exception_propagates(self): _fetch_latest_release_tag() +class TestBoundedRead: + """Regression test for the read_response_limited hardening. + + A future refactor could silently revert `_fetch_latest_release_tag` to + `resp.read()` (the unbounded form) — this test pins the contract that + the response body is read through ``read_response_limited`` with a + bounded ``max_bytes``. + """ + + def test_response_body_is_bounded(self): + recorded: dict[str, int | str] = {} + + def _spy(response, *, max_bytes: int, label: str, **kwargs): + # max_bytes and label are keyword-only with no defaults: if the + # caller forgets to pass either, the call raises TypeError here + # (instead of recording a misleading None). + recorded["max_bytes"] = max_bytes + recorded["label"] = label + # Forward to the real implementation so the function under test + # still gets a parseable body. + return _real_read_response_limited( + response, max_bytes=max_bytes, label=label, **kwargs + ) + + with patch( + "specify_cli.authentication.http.urllib.request.urlopen", + return_value=mock_urlopen_response({"tag_name": "v9.9.9"}), + ), patch("specify_cli._version.read_response_limited", side_effect=_spy): + tag, reason = _fetch_latest_release_tag() + + assert tag == "v9.9.9" + assert reason is None + # The cap (1 MiB) is a deliberate ceiling for the GitHub release + # JSON — keep it explicit so a future refactor that drops the + # `max_bytes=` argument fails this test instead of regressing + # silently to the default. + assert recorded["max_bytes"] == 1024 * 1024 + assert recorded["label"] == "GitHub latest release" + + _FAILURE_CASES = [ ("offline or timeout", urllib.error.URLError("down")), (_RATE_LIMITED_REASON, _http_error(403)), diff --git a/tests/test_workflows.py b/tests/test_workflows.py index 51da5cc86b..59acb32f66 100644 --- a/tests/test_workflows.py +++ b/tests/test_workflows.py @@ -12,6 +12,7 @@ from __future__ import annotations +import io import json import os import shutil @@ -3401,6 +3402,88 @@ def test_get_catalog_configs(self, project_dir): assert configs[0]["name"] == "default" assert isinstance(configs[0]["install_allowed"], bool) + def test_fetch_single_catalog_uses_bounded_read(self, project_dir, monkeypatch): + """Regression test for the read_response_limited hardening on + workflow catalog downloads. Mirrors TestBoundedRead for + _fetch_latest_release_tag and the equivalent test in + tests/integrations/test_integration_catalog.py for the + integration catalog. A future refactor that drops the bounded + read here would let a malicious server stream an unbounded + catalog into memory.""" + from specify_cli.workflows.catalog import ( + WorkflowCatalog, + WorkflowCatalogEntry, + WorkflowCatalogError, + ) + from specify_cli import _download_security as _download_security_module + import specify_cli.authentication.http as _auth_http + + entry = WorkflowCatalogEntry( + url="https://example.com/workflow-catalog.json", + name="test", + priority=0, + install_allowed=False, + ) + + recorded: dict[str, object] = {} + real_read = _download_security_module.read_response_limited + + def _spy(response, **kwargs): + # Capture exactly the kwargs the caller chose to pass, so the + # assertion below can distinguish "explicit" from "default". + recorded["kwargs"] = dict(kwargs) + return real_read(response, **kwargs) + + class _FakeResponse: + def __init__(self): + self._data = json.dumps({"workflows": []}).encode() + self._pos = 0 + + def read(self, size=-1): + # Advance a cursor and return b"" at EOF like a real stream, so + # read_response_limited's bounded loop terminates. + if size is None or size < 0: + size = len(self._data) - self._pos + out = self._data[self._pos : self._pos + size] + self._pos += len(out) + return out + + def geturl(self): + return entry.url + + def __enter__(self): + return self + + def __exit__(self, *_a): + pass + + def _fake_urlopen(req, timeout=30): + return _FakeResponse() + + monkeypatch.setattr(_auth_http.urllib.request, "urlopen", _fake_urlopen) + monkeypatch.setattr( + _auth_http.urllib.request.OpenerDirector, + "open", + lambda _self, req, data=None, timeout=30: _fake_urlopen(req, timeout), + ) + monkeypatch.setattr( + "specify_cli.workflows.catalog.read_response_limited", _spy + ) + + cat = WorkflowCatalog(project_dir) + cat._fetch_single_catalog(entry, force_refresh=True) + + # Bounded read was invoked (not raw resp.read()). error_type must + # be the WorkflowCatalogError so an oversized response surfaces + # as a workflow-catalog domain error, not a generic ValueError + # that callers might miss. + from specify_cli._download_security import MAX_JSON_CATALOG_BYTES + + assert "kwargs" in recorded, "read_response_limited was not called" + assert recorded["kwargs"]["error_type"] is WorkflowCatalogError + assert recorded["kwargs"]["label"] == "workflow catalog" + assert recorded["kwargs"]["max_bytes"] == MAX_JSON_CATALOG_BYTES + # ===== Integration Test ===== @@ -3793,8 +3876,12 @@ def __init__(self, data, url=None): self._data = data self._url = url or "https://api.github.com/repos/org/repo/releases/assets/42" - def read(self): - return self._data + def read(self, *args): + # BytesIO so read(size) terminates at EOF — downloads go through + # the bounded read_response_limited helper, which loops to b"". + if not hasattr(self, "_stream"): + self._stream = io.BytesIO(self._data) + return self._stream.read(*args) def geturl(self): return self._url @@ -3805,7 +3892,7 @@ def __enter__(self): def __exit__(self, *a): return False - def fake_open_url(url, timeout=None, extra_headers=None): + def fake_open_url(url, timeout=None, extra_headers=None, **kwargs): captured_urls.append((url, extra_headers, timeout)) if "releases/tags/" in url: return FakeResponse(json.dumps({ @@ -3845,8 +3932,12 @@ def __init__(self, data, url=None): self._data = data self._url = url or "https://api.github.com/repos/org/repo/releases/assets/42" - def read(self): - return self._data + def read(self, *args): + # BytesIO so read(size) terminates at EOF — downloads go through + # the bounded read_response_limited helper, which loops to b"". + if not hasattr(self, "_stream"): + self._stream = io.BytesIO(self._data) + return self._stream.read(*args) def geturl(self): return self._url @@ -3857,7 +3948,7 @@ def __enter__(self): def __exit__(self, *a): return False - def fake_open_url(url, timeout=None, extra_headers=None): + def fake_open_url(url, timeout=None, extra_headers=None, **kwargs): captured_urls.append((url, extra_headers)) return FakeResponse(self.VALID_WORKFLOW_YAML.encode()) @@ -3888,8 +3979,12 @@ def __init__(self, data, url=None): self._data = data self._url = url or "https://api.github.com/repos/org/repo/releases/assets/55" - def read(self): - return self._data + def read(self, *args): + # BytesIO so read(size) terminates at EOF — downloads go through + # the bounded read_response_limited helper, which loops to b"". + if not hasattr(self, "_stream"): + self._stream = io.BytesIO(self._data) + return self._stream.read(*args) def geturl(self): return self._url @@ -3900,7 +3995,7 @@ def __enter__(self): def __exit__(self, *a): return False - def fake_open_url(url, timeout=None, extra_headers=None): + def fake_open_url(url, timeout=None, extra_headers=None, **kwargs): captured_urls.append((url, extra_headers)) if "releases/tags/" in url: return FakeResponse(json.dumps({ diff --git a/workflows/PUBLISHING.md b/workflows/PUBLISHING.md index ce0d251826..0370ed09f9 100644 --- a/workflows/PUBLISHING.md +++ b/workflows/PUBLISHING.md @@ -272,6 +272,17 @@ When releasing a new version: - **Quote variables** — use proper quoting in shell commands to handle spaces - **Check exit codes** — shell step failures stop the workflow; make sure commands are robust +#### Security: shell steps execute arbitrary code + +Workflow `shell` steps execute their `run` field through `/bin/sh` (POSIX) or the platform shell. There is no sandbox between the step and the user's machine: a malicious or buggy `run` block can read environment variables, modify files outside the project, exfiltrate data, or escalate privileges. + +Catalog-listed workflows are reviewed at submission time (see [Verification Process](#verification-process)), but you should still treat every install as code-execution from an untrusted source until you have read the `workflow.yml`: + +- **Before installing a workflow**, fetch the raw YAML and audit every `shell` step's `run` field directly. `specify workflow info ` only shows metadata (name, version, inputs, step IDs/types) — not the shell content that would actually execute. +- **Prefer explicit commands over interpolation** in `run` blocks: `{{ inputs.something }}` substitutions should be quoted and constrained via `enum` so a malicious input can't inject shell syntax. +- **Limit privilege**: shell steps inherit the user's environment. Workflows that need elevated access (sudo, secrets, GitHub tokens) should call them out explicitly in the README so reviewers can spot the requirement. +- **Authors**: if your workflow has shell steps that look risky out of context (deletions, network calls, credential reads), document the rationale in your README. Maintainers will reject submissions whose shell steps can't be justified at review time. + ### Integration Flexibility - **Set `integration` at workflow level** — use the `workflow.integration` field as the default