From e4b38e755361e730c057ee3da7d2f1e4362155cc Mon Sep 17 00:00:00 2001 From: Pascal Date: Sat, 2 May 2026 08:39:20 +0200 Subject: [PATCH 01/30] Add automated security audit workflow --- .github/workflows/security.yml | 51 ++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 .github/workflows/security.yml diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml new file mode 100644 index 0000000000..5a8cfa8bf9 --- /dev/null +++ b/.github/workflows/security.yml @@ -0,0 +1,51 @@ +name: Security Audit + +permissions: + contents: read + +on: + push: + branches: ["main"] + pull_request: + schedule: + - cron: "17 4 * * 1" + workflow_dispatch: + +jobs: + dependency-audit: + name: Dependency audit + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + + - name: Install uv + uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0 + + - name: Set up Python + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6 + with: + python-version: "3.13" + + - name: Run pip-audit + run: uvx pip-audit . --progress-spinner off + + static-analysis: + name: Static analysis + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + + - name: Install uv + uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0 + + - name: Set up Python + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6 + with: + python-version: "3.13" + + - name: Run Bandit + # B602 is tracked in #2440; keep the baseline green until shell steps + # require explicit opt-in. + run: uvx bandit -r src -lll --skip B602 From 65b0dabedec1d08c1c5599c33ecc1e3972971a3b Mon Sep 17 00:00:00 2001 From: Pascal Date: Tue, 5 May 2026 18:46:03 +0200 Subject: [PATCH 02/30] Address security audit review feedback --- .github/workflows/security.yml | 8 ++++---- CONTRIBUTING.md | 10 ++++++++++ src/specify_cli/__init__.py | 12 ++++++++++-- src/specify_cli/workflows/steps/shell/__init__.py | 2 +- 4 files changed, 25 insertions(+), 7 deletions(-) diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml index 5a8cfa8bf9..52086fca20 100644 --- a/.github/workflows/security.yml +++ b/.github/workflows/security.yml @@ -28,7 +28,9 @@ jobs: python-version: "3.13" - name: Run pip-audit - run: uvx pip-audit . --progress-spinner off + run: | + uv export --quiet --extra test --frozen --format requirements.txt --no-emit-project --output-file /tmp/spec-kit-audit-requirements.txt + uvx --from pip-audit==2.10.0 pip-audit -r /tmp/spec-kit-audit-requirements.txt --progress-spinner off static-analysis: name: Static analysis @@ -46,6 +48,4 @@ jobs: python-version: "3.13" - name: Run Bandit - # B602 is tracked in #2440; keep the baseline green until shell steps - # require explicit opt-in. - run: uvx bandit -r src -lll --skip B602 + run: uvx --from bandit==1.9.4 bandit -r src -lll diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 5188d70a71..f1557402e0 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -81,6 +81,16 @@ uv run python -m pytest tests/test_agent_config_consistency.py -q Run this when you change agent metadata, context update scripts, or integration wiring. +#### Security checks + +```bash +uv export --quiet --extra test --frozen --format requirements.txt --no-emit-project --output-file /tmp/spec-kit-audit-requirements.txt +uvx --from pip-audit==2.10.0 pip-audit -r /tmp/spec-kit-audit-requirements.txt --progress-spinner off +uvx --from bandit==1.9.4 bandit -r src -lll +``` + +Run these before changing dependency metadata, workflow execution code, subprocess usage, or security-sensitive paths. The dependency audit uses the locked runtime and `test` extra dependency set used by CI and contributors. + ### Manual testing #### Testing setup diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index ccd670d20e..0364efb021 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -400,10 +400,18 @@ def run_command(cmd: list[str], check_return: bool = True, capture: bool = False """Run a shell command and optionally capture output.""" try: if capture: - result = subprocess.run(cmd, check=check_return, capture_output=True, text=True, shell=shell) + # shell=True is only available to callers that opt in explicitly. + result = subprocess.run( # nosec B602 + cmd, + check=check_return, + capture_output=True, + text=True, + shell=shell, + ) return result.stdout.strip() else: - subprocess.run(cmd, check=check_return, shell=shell) + # shell=True is only available to callers that opt in explicitly. + subprocess.run(cmd, check=check_return, shell=shell) # nosec B602 return None except subprocess.CalledProcessError as e: if check_return: diff --git a/src/specify_cli/workflows/steps/shell/__init__.py b/src/specify_cli/workflows/steps/shell/__init__.py index 73ac99530a..2eb381ad20 100644 --- a/src/specify_cli/workflows/steps/shell/__init__.py +++ b/src/specify_cli/workflows/steps/shell/__init__.py @@ -32,7 +32,7 @@ def execute(self, config: dict[str, Any], context: StepContext) -> StepResult: try: proc = subprocess.run( run_cmd, - shell=True, + shell=True, # nosec B602 capture_output=True, text=True, cwd=cwd, From 397e0d7efc248b6f67d78a14fac24a5364bd46f6 Mon Sep 17 00:00:00 2001 From: Pascal Date: Tue, 5 May 2026 18:49:27 +0200 Subject: [PATCH 03/30] Add security workflow regression tests --- tests/test_security_workflow.py | 70 +++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 tests/test_security_workflow.py diff --git a/tests/test_security_workflow.py b/tests/test_security_workflow.py new file mode 100644 index 0000000000..92685340ea --- /dev/null +++ b/tests/test_security_workflow.py @@ -0,0 +1,70 @@ +"""Static checks for the GitHub Actions security workflow.""" + +from __future__ import annotations + +import re +from pathlib import Path + +import yaml + + +REPO_ROOT = Path(__file__).resolve().parent.parent +SECURITY_WORKFLOW = REPO_ROOT / ".github" / "workflows" / "security.yml" +CONTRIBUTING = REPO_ROOT / "CONTRIBUTING.md" + +AUDIT_REQUIREMENTS = "/tmp/spec-kit-audit-requirements.txt" +EXPORT_TEST_DEPS = ( + "uv export --quiet --extra test --frozen --format requirements.txt " + f"--no-emit-project --output-file {AUDIT_REQUIREMENTS}" +) +PIP_AUDIT = ( + "uvx --from pip-audit==2.10.0 pip-audit " + f"-r {AUDIT_REQUIREMENTS} --progress-spinner off" +) +BANDIT = "uvx --from bandit==1.9.4 bandit -r src -lll" + + +def _load_security_workflow() -> dict: + return yaml.safe_load(SECURITY_WORKFLOW.read_text(encoding="utf-8")) + + +def _step_run(job_name: str, step_name: str) -> str: + workflow = _load_security_workflow() + for step in workflow["jobs"][job_name]["steps"]: + if step.get("name") == step_name: + return step["run"] + raise AssertionError(f"Step {step_name!r} not found in job {job_name!r}.") + + +class TestSecurityWorkflow: + """Guard the security workflow against review-feedback regressions.""" + + def test_dependency_audit_uses_locked_test_extra_export(self): + run = _step_run("dependency-audit", "Run pip-audit") + + assert EXPORT_TEST_DEPS in run + assert PIP_AUDIT in run + assert "uvx pip-audit ." not in run + + def test_security_tools_are_pinned(self): + workflow_text = SECURITY_WORKFLOW.read_text(encoding="utf-8") + + assert PIP_AUDIT in workflow_text + assert BANDIT in workflow_text + assert re.search(r"\buvx\s+pip-audit\b", workflow_text) is None + assert re.search(r"\buvx\s+bandit\b", workflow_text) is None + + def test_bandit_does_not_globally_skip_b602(self): + run = _step_run("static-analysis", "Run Bandit") + workflow_text = SECURITY_WORKFLOW.read_text(encoding="utf-8") + + assert run == BANDIT + assert "--skip" not in run + assert "--skip B602" not in workflow_text + + def test_contributing_documents_security_commands(self): + contributing_text = CONTRIBUTING.read_text(encoding="utf-8") + + assert EXPORT_TEST_DEPS in contributing_text + assert PIP_AUDIT in contributing_text + assert BANDIT in contributing_text From e1e805196009d2b90beabe7753d4850c489f4876 Mon Sep 17 00:00:00 2001 From: Pascal Date: Tue, 5 May 2026 22:17:57 +0200 Subject: [PATCH 04/30] Address follow-up security workflow review --- .github/bandit-baseline.json | 31 ++++++++++++ .github/workflows/security.yml | 4 +- CONTRIBUTING.md | 6 +-- src/specify_cli/__init__.py | 15 +++--- .../workflows/steps/shell/__init__.py | 2 +- tests/test_security_workflow.py | 48 ++++++++++++++++--- 6 files changed, 88 insertions(+), 18 deletions(-) create mode 100644 .github/bandit-baseline.json diff --git a/.github/bandit-baseline.json b/.github/bandit-baseline.json new file mode 100644 index 0000000000..2c6a477879 --- /dev/null +++ b/.github/bandit-baseline.json @@ -0,0 +1,31 @@ +{ + "results": [ + { + "code": "34 run_cmd,\n35 shell=True,\n36 capture_output=True,\n37 text=True,\n38 cwd=cwd,\n39 timeout=300,\n40 )\n41 output = {\n42 \"exit_code\": proc.returncode,\n43 \"stdout\": proc.stdout,\n", + "col_offset": 19, + "end_col_offset": 13, + "filename": "src/specify_cli/workflows/steps/shell/__init__.py", + "issue_confidence": "HIGH", + "issue_cwe": { + "id": 78, + "link": "https://cwe.mitre.org/data/definitions/78.html" + }, + "issue_severity": "HIGH", + "issue_text": "subprocess call with shell=True identified, security issue.", + "line_number": 35, + "line_range": [ + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40 + ], + "more_info": "https://bandit.readthedocs.io/en/1.9.4/plugins/b602_subprocess_popen_with_shell_equals_true.html", + "test_id": "B602", + "test_name": "subprocess_popen_with_shell_equals_true" + } + ] +} diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml index 52086fca20..49a82df187 100644 --- a/.github/workflows/security.yml +++ b/.github/workflows/security.yml @@ -29,7 +29,7 @@ jobs: - name: Run pip-audit run: | - uv export --quiet --extra test --frozen --format requirements.txt --no-emit-project --output-file /tmp/spec-kit-audit-requirements.txt + uv export --quiet --extra test --format requirements.txt --no-emit-project --output-file /tmp/spec-kit-audit-requirements.txt uvx --from pip-audit==2.10.0 pip-audit -r /tmp/spec-kit-audit-requirements.txt --progress-spinner off static-analysis: @@ -48,4 +48,4 @@ jobs: python-version: "3.13" - name: Run Bandit - run: uvx --from bandit==1.9.4 bandit -r src -lll + run: uvx --from bandit==1.9.4 bandit -r src -lll --baseline .github/bandit-baseline.json diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f1557402e0..2894f691fd 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -84,12 +84,12 @@ Run this when you change agent metadata, context update scripts, or integration #### Security checks ```bash -uv export --quiet --extra test --frozen --format requirements.txt --no-emit-project --output-file /tmp/spec-kit-audit-requirements.txt +uv export --quiet --extra test --format requirements.txt --no-emit-project --output-file /tmp/spec-kit-audit-requirements.txt uvx --from pip-audit==2.10.0 pip-audit -r /tmp/spec-kit-audit-requirements.txt --progress-spinner off -uvx --from bandit==1.9.4 bandit -r src -lll +uvx --from bandit==1.9.4 bandit -r src -lll --baseline .github/bandit-baseline.json ``` -Run these before changing dependency metadata, workflow execution code, subprocess usage, or security-sensitive paths. The dependency audit uses the locked runtime and `test` extra dependency set used by CI and contributors. +Run these before changing dependency metadata, workflow execution code, subprocess usage, or security-sensitive paths. The dependency audit resolves the runtime and `test` extra dependency set used by CI and contributors. ### Manual testing diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index 0364efb021..b386501b50 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -397,21 +397,24 @@ def callback( console.print() def run_command(cmd: list[str], check_return: bool = True, capture: bool = False, shell: bool = False) -> Optional[str]: - """Run a shell command and optionally capture output.""" + """Run a command without invoking a shell and optionally capture output.""" + if shell: + raise ValueError( + "run_command does not support shell=True; use a reviewed " + "subprocess.run call for shell-specific behavior." + ) + try: if capture: - # shell=True is only available to callers that opt in explicitly. - result = subprocess.run( # nosec B602 + result = subprocess.run( cmd, check=check_return, capture_output=True, text=True, - shell=shell, ) return result.stdout.strip() else: - # shell=True is only available to callers that opt in explicitly. - subprocess.run(cmd, check=check_return, shell=shell) # nosec B602 + subprocess.run(cmd, check=check_return) return None except subprocess.CalledProcessError as e: if check_return: diff --git a/src/specify_cli/workflows/steps/shell/__init__.py b/src/specify_cli/workflows/steps/shell/__init__.py index 2eb381ad20..73ac99530a 100644 --- a/src/specify_cli/workflows/steps/shell/__init__.py +++ b/src/specify_cli/workflows/steps/shell/__init__.py @@ -32,7 +32,7 @@ def execute(self, config: dict[str, Any], context: StepContext) -> StepResult: try: proc = subprocess.run( run_cmd, - shell=True, # nosec B602 + shell=True, capture_output=True, text=True, cwd=cwd, diff --git a/tests/test_security_workflow.py b/tests/test_security_workflow.py index 92685340ea..2836a5d15d 100644 --- a/tests/test_security_workflow.py +++ b/tests/test_security_workflow.py @@ -2,26 +2,32 @@ from __future__ import annotations +import json import re from pathlib import Path +import pytest import yaml REPO_ROOT = Path(__file__).resolve().parent.parent SECURITY_WORKFLOW = REPO_ROOT / ".github" / "workflows" / "security.yml" CONTRIBUTING = REPO_ROOT / "CONTRIBUTING.md" +BANDIT_BASELINE = REPO_ROOT / ".github" / "bandit-baseline.json" AUDIT_REQUIREMENTS = "/tmp/spec-kit-audit-requirements.txt" -EXPORT_TEST_DEPS = ( - "uv export --quiet --extra test --frozen --format requirements.txt " +EXPORT_TEST_EXTRA_DEPS = ( + "uv export --quiet --extra test --format requirements.txt " f"--no-emit-project --output-file {AUDIT_REQUIREMENTS}" ) PIP_AUDIT = ( "uvx --from pip-audit==2.10.0 pip-audit " f"-r {AUDIT_REQUIREMENTS} --progress-spinner off" ) -BANDIT = "uvx --from bandit==1.9.4 bandit -r src -lll" +BANDIT = ( + "uvx --from bandit==1.9.4 bandit -r src -lll " + "--baseline .github/bandit-baseline.json" +) def _load_security_workflow() -> dict: @@ -39,11 +45,13 @@ def _step_run(job_name: str, step_name: str) -> str: class TestSecurityWorkflow: """Guard the security workflow against review-feedback regressions.""" - def test_dependency_audit_uses_locked_test_extra_export(self): + def test_dependency_audit_uses_test_extra_export_without_lockfile_flags(self): run = _step_run("dependency-audit", "Run pip-audit") - assert EXPORT_TEST_DEPS in run + assert EXPORT_TEST_EXTRA_DEPS in run assert PIP_AUDIT in run + assert "--frozen" not in run + assert "--locked" not in run assert "uvx pip-audit ." not in run def test_security_tools_are_pinned(self): @@ -61,10 +69,38 @@ def test_bandit_does_not_globally_skip_b602(self): assert run == BANDIT assert "--skip" not in run assert "--skip B602" not in workflow_text + assert "--baseline .github/bandit-baseline.json" in run + + def test_bandit_baseline_only_ignores_shell_step_b602(self): + baseline = json.loads(BANDIT_BASELINE.read_text(encoding="utf-8")) + results = baseline["results"] + + assert len(results) == 1 + assert results[0]["test_id"] == "B602" + assert ( + results[0]["filename"] + == "src/specify_cli/workflows/steps/shell/__init__.py" + ) + + def test_b602_is_not_suppressed_in_source(self): + source_text = "\n".join( + path.read_text(encoding="utf-8") + for path in (REPO_ROOT / "src").rglob("*.py") + ) + + assert "# nosec B602" not in source_text + + def test_run_command_rejects_shell_true(self): + from specify_cli import run_command + + with pytest.raises(ValueError, match="shell=True"): + run_command(["echo", "hello"], shell=True) def test_contributing_documents_security_commands(self): contributing_text = CONTRIBUTING.read_text(encoding="utf-8") - assert EXPORT_TEST_DEPS in contributing_text + assert EXPORT_TEST_EXTRA_DEPS in contributing_text assert PIP_AUDIT in contributing_text assert BANDIT in contributing_text + assert "--frozen" not in contributing_text + assert "--locked" not in contributing_text From 6f1da27be4d5d047d36e149c24dff518b8879067 Mon Sep 17 00:00:00 2001 From: Pascal Date: Tue, 5 May 2026 22:26:51 +0200 Subject: [PATCH 05/30] Use compile for security audit requirements --- .github/workflows/security.yml | 2 +- CONTRIBUTING.md | 2 +- tests/test_security_workflow.py | 15 +++++++++------ 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml index 49a82df187..b4b266b4f9 100644 --- a/.github/workflows/security.yml +++ b/.github/workflows/security.yml @@ -29,7 +29,7 @@ jobs: - name: Run pip-audit run: | - uv export --quiet --extra test --format requirements.txt --no-emit-project --output-file /tmp/spec-kit-audit-requirements.txt + uv pip compile pyproject.toml --extra test --quiet --output-file /tmp/spec-kit-audit-requirements.txt uvx --from pip-audit==2.10.0 pip-audit -r /tmp/spec-kit-audit-requirements.txt --progress-spinner off static-analysis: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 2894f691fd..c1af13185d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -84,7 +84,7 @@ Run this when you change agent metadata, context update scripts, or integration #### Security checks ```bash -uv export --quiet --extra test --format requirements.txt --no-emit-project --output-file /tmp/spec-kit-audit-requirements.txt +uv pip compile pyproject.toml --extra test --quiet --output-file /tmp/spec-kit-audit-requirements.txt uvx --from pip-audit==2.10.0 pip-audit -r /tmp/spec-kit-audit-requirements.txt --progress-spinner off uvx --from bandit==1.9.4 bandit -r src -lll --baseline .github/bandit-baseline.json ``` diff --git a/tests/test_security_workflow.py b/tests/test_security_workflow.py index 2836a5d15d..9d2532607d 100644 --- a/tests/test_security_workflow.py +++ b/tests/test_security_workflow.py @@ -16,9 +16,9 @@ BANDIT_BASELINE = REPO_ROOT / ".github" / "bandit-baseline.json" AUDIT_REQUIREMENTS = "/tmp/spec-kit-audit-requirements.txt" -EXPORT_TEST_EXTRA_DEPS = ( - "uv export --quiet --extra test --format requirements.txt " - f"--no-emit-project --output-file {AUDIT_REQUIREMENTS}" +COMPILE_TEST_EXTRA_DEPS = ( + "uv pip compile pyproject.toml --extra test --quiet " + f"--output-file {AUDIT_REQUIREMENTS}" ) PIP_AUDIT = ( "uvx --from pip-audit==2.10.0 pip-audit " @@ -45,13 +45,15 @@ def _step_run(job_name: str, step_name: str) -> str: class TestSecurityWorkflow: """Guard the security workflow against review-feedback regressions.""" - def test_dependency_audit_uses_test_extra_export_without_lockfile_flags(self): + def test_dependency_audit_compiles_test_extra_requirements_without_lockfile(self): run = _step_run("dependency-audit", "Run pip-audit") - assert EXPORT_TEST_EXTRA_DEPS in run + assert COMPILE_TEST_EXTRA_DEPS in run assert PIP_AUDIT in run + assert "uv export" not in run assert "--frozen" not in run assert "--locked" not in run + assert "uv.lock" not in run assert "uvx pip-audit ." not in run def test_security_tools_are_pinned(self): @@ -99,8 +101,9 @@ def test_run_command_rejects_shell_true(self): def test_contributing_documents_security_commands(self): contributing_text = CONTRIBUTING.read_text(encoding="utf-8") - assert EXPORT_TEST_EXTRA_DEPS in contributing_text + assert COMPILE_TEST_EXTRA_DEPS in contributing_text assert PIP_AUDIT in contributing_text assert BANDIT in contributing_text + assert "uv export" not in contributing_text assert "--frozen" not in contributing_text assert "--locked" not in contributing_text From 129d19e44ddadd8d76af2aa8a3b1effea084da69 Mon Sep 17 00:00:00 2001 From: Pascal Date: Tue, 5 May 2026 22:45:33 +0200 Subject: [PATCH 06/30] Address latest security workflow review --- .github/workflows/security.yml | 17 ++++--- CONTRIBUTING.md | 6 +-- tests/test_security_workflow.py | 80 +++++++++++++++++++++++++-------- 3 files changed, 75 insertions(+), 28 deletions(-) diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml index b4b266b4f9..fced205987 100644 --- a/.github/workflows/security.yml +++ b/.github/workflows/security.yml @@ -13,8 +13,13 @@ on: jobs: dependency-audit: - name: Dependency audit - runs-on: ubuntu-latest + name: Dependency audit (${{ matrix.os }}, Python ${{ matrix.python-version }}) + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest] + python-version: ["3.11", "3.12", "3.13"] steps: - name: Checkout uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 @@ -22,15 +27,15 @@ jobs: - name: Install uv uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0 - - name: Set up Python + - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6 with: - python-version: "3.13" + python-version: ${{ matrix.python-version }} - name: Run pip-audit run: | - uv pip compile pyproject.toml --extra test --quiet --output-file /tmp/spec-kit-audit-requirements.txt - uvx --from pip-audit==2.10.0 pip-audit -r /tmp/spec-kit-audit-requirements.txt --progress-spinner off + uv pip compile pyproject.toml --extra test --python-version "${{ matrix.python-version }}" --generate-hashes --quiet --output-file "${{ runner.temp }}/spec-kit-audit-requirements.txt" + uvx --from pip-audit==2.10.0 pip-audit --disable-pip --require-hashes -r "${{ runner.temp }}/spec-kit-audit-requirements.txt" --progress-spinner off static-analysis: name: Static analysis diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c1af13185d..f9f87b4d0e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -84,12 +84,12 @@ Run this when you change agent metadata, context update scripts, or integration #### Security checks ```bash -uv pip compile pyproject.toml --extra test --quiet --output-file /tmp/spec-kit-audit-requirements.txt -uvx --from pip-audit==2.10.0 pip-audit -r /tmp/spec-kit-audit-requirements.txt --progress-spinner off +uv pip compile pyproject.toml --extra test --generate-hashes --quiet --output-file spec-kit-audit-requirements.txt +uvx --from pip-audit==2.10.0 pip-audit --disable-pip --require-hashes -r spec-kit-audit-requirements.txt --progress-spinner off uvx --from bandit==1.9.4 bandit -r src -lll --baseline .github/bandit-baseline.json ``` -Run these before changing dependency metadata, workflow execution code, subprocess usage, or security-sensitive paths. The dependency audit resolves the runtime and `test` extra dependency set used by CI and contributors. +Run these before changing dependency metadata, workflow execution code, subprocess usage, or security-sensitive paths. The dependency audit resolves the runtime and `test` extra dependency set used by CI and contributors. CI runs the dependency audit across the supported Python and OS matrix; locally, run these commands from the environment you want to reproduce. ### Manual testing diff --git a/tests/test_security_workflow.py b/tests/test_security_workflow.py index 9d2532607d..0d4c90f807 100644 --- a/tests/test_security_workflow.py +++ b/tests/test_security_workflow.py @@ -15,14 +15,24 @@ CONTRIBUTING = REPO_ROOT / "CONTRIBUTING.md" BANDIT_BASELINE = REPO_ROOT / ".github" / "bandit-baseline.json" -AUDIT_REQUIREMENTS = "/tmp/spec-kit-audit-requirements.txt" -COMPILE_TEST_EXTRA_DEPS = ( - "uv pip compile pyproject.toml --extra test --quiet " - f"--output-file {AUDIT_REQUIREMENTS}" +WORKFLOW_AUDIT_REQUIREMENTS = '"${{ runner.temp }}/spec-kit-audit-requirements.txt"' +LOCAL_AUDIT_REQUIREMENTS = "spec-kit-audit-requirements.txt" +WORKFLOW_COMPILE_TEST_EXTRA_DEPS = ( + "uv pip compile pyproject.toml --extra test " + '--python-version "${{ matrix.python-version }}" --generate-hashes --quiet ' + f"--output-file {WORKFLOW_AUDIT_REQUIREMENTS}" ) -PIP_AUDIT = ( - "uvx --from pip-audit==2.10.0 pip-audit " - f"-r {AUDIT_REQUIREMENTS} --progress-spinner off" +LOCAL_COMPILE_TEST_EXTRA_DEPS = ( + "uv pip compile pyproject.toml --extra test --generate-hashes --quiet " + f"--output-file {LOCAL_AUDIT_REQUIREMENTS}" +) +WORKFLOW_PIP_AUDIT = ( + "uvx --from pip-audit==2.10.0 pip-audit --disable-pip --require-hashes " + f"-r {WORKFLOW_AUDIT_REQUIREMENTS} --progress-spinner off" +) +LOCAL_PIP_AUDIT = ( + "uvx --from pip-audit==2.10.0 pip-audit --disable-pip --require-hashes " + f"-r {LOCAL_AUDIT_REQUIREMENTS} --progress-spinner off" ) BANDIT = ( "uvx --from bandit==1.9.4 bandit -r src -lll " @@ -45,25 +55,52 @@ def _step_run(job_name: str, step_name: str) -> str: class TestSecurityWorkflow: """Guard the security workflow against review-feedback regressions.""" - def test_dependency_audit_compiles_test_extra_requirements_without_lockfile(self): + def test_dependency_audit_compiles_test_extra_requirements(self): run = _step_run("dependency-audit", "Run pip-audit") - assert COMPILE_TEST_EXTRA_DEPS in run - assert PIP_AUDIT in run + assert WORKFLOW_COMPILE_TEST_EXTRA_DEPS in run + assert WORKFLOW_PIP_AUDIT in run + assert "--generate-hashes" in run + assert "--require-hashes" in run + assert "--disable-pip" in run + assert "${{ runner.temp }}" in run assert "uv export" not in run assert "--frozen" not in run assert "--locked" not in run assert "uv.lock" not in run + assert "/tmp/" not in run assert "uvx pip-audit ." not in run + def test_dependency_audit_runs_supported_python_os_matrix(self): + workflow = _load_security_workflow() + matrix = workflow["jobs"]["dependency-audit"]["strategy"]["matrix"] + + assert matrix["os"] == ["ubuntu-latest", "windows-latest"] + assert matrix["python-version"] == ["3.11", "3.12", "3.13"] + assert workflow["jobs"]["dependency-audit"]["runs-on"] == "${{ matrix.os }}" + def test_security_tools_are_pinned(self): workflow_text = SECURITY_WORKFLOW.read_text(encoding="utf-8") - assert PIP_AUDIT in workflow_text + assert WORKFLOW_PIP_AUDIT in workflow_text assert BANDIT in workflow_text assert re.search(r"\buvx\s+pip-audit\b", workflow_text) is None assert re.search(r"\buvx\s+bandit\b", workflow_text) is None + def test_actions_are_pinned_to_full_commit_shas(self): + workflow = _load_security_workflow() + uses_refs = [ + step["uses"] + for job in workflow["jobs"].values() + for step in job["steps"] + if "uses" in step + ] + + assert uses_refs + for uses_ref in uses_refs: + assert re.search(r"@[0-9a-f]{40}$", uses_ref), uses_ref + assert re.search(r"@v\d+", uses_ref) is None + def test_bandit_does_not_globally_skip_b602(self): run = _step_run("static-analysis", "Run Bandit") workflow_text = SECURITY_WORKFLOW.read_text(encoding="utf-8") @@ -84,13 +121,17 @@ def test_bandit_baseline_only_ignores_shell_step_b602(self): == "src/specify_cli/workflows/steps/shell/__init__.py" ) - def test_b602_is_not_suppressed_in_source(self): - source_text = "\n".join( - path.read_text(encoding="utf-8") - for path in (REPO_ROOT / "src").rglob("*.py") - ) + def test_bandit_nosec_is_not_suppressed_in_source(self): + nosec_lines = [] + for path in (REPO_ROOT / "src").rglob("*.py"): + for line_number, line in enumerate( + path.read_text(encoding="utf-8").splitlines(), + start=1, + ): + if re.search(r"#\s*nosec\b", line, flags=re.IGNORECASE): + nosec_lines.append(f"{path.relative_to(REPO_ROOT)}:{line_number}") - assert "# nosec B602" not in source_text + assert nosec_lines == [] def test_run_command_rejects_shell_true(self): from specify_cli import run_command @@ -101,9 +142,10 @@ def test_run_command_rejects_shell_true(self): def test_contributing_documents_security_commands(self): contributing_text = CONTRIBUTING.read_text(encoding="utf-8") - assert COMPILE_TEST_EXTRA_DEPS in contributing_text - assert PIP_AUDIT in contributing_text + assert LOCAL_COMPILE_TEST_EXTRA_DEPS in contributing_text + assert LOCAL_PIP_AUDIT in contributing_text assert BANDIT in contributing_text + assert "/tmp/" not in contributing_text assert "uv export" not in contributing_text assert "--frozen" not in contributing_text assert "--locked" not in contributing_text From aa020624cea01fe0de4365f13bbf3276e0cf55b2 Mon Sep 17 00:00:00 2001 From: Pascal Date: Tue, 5 May 2026 23:33:16 +0200 Subject: [PATCH 07/30] Address latest security audit review --- .github/security-audit-requirements.txt | 320 ++++++++++++++++++++++++ .github/workflows/security.yml | 12 +- CONTRIBUTING.md | 9 +- src/specify_cli/__init__.py | 8 +- tests/test_security_workflow.py | 123 ++++++--- 5 files changed, 427 insertions(+), 45 deletions(-) create mode 100644 .github/security-audit-requirements.txt diff --git a/.github/security-audit-requirements.txt b/.github/security-audit-requirements.txt new file mode 100644 index 0000000000..f15ab00c67 --- /dev/null +++ b/.github/security-audit-requirements.txt @@ -0,0 +1,320 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile pyproject.toml --extra test --universal --generate-hashes --output-file .github/security-audit-requirements.txt +annotated-doc==0.0.4 \ + --hash=sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320 \ + --hash=sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4 + # via typer +click==8.3.3 \ + --hash=sha256:398329ad4837b2ff7cbe1dd166a4c0f8900c3ca3a218de04466f38f6497f18a2 \ + --hash=sha256:a2bf429bb3033c89fa4936ffb35d5cb471e3719e1f3c8a7c3fff0b8314305613 + # via + # specify-cli (pyproject.toml) + # typer +colorama==0.4.6 ; sys_platform == 'win32' \ + --hash=sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44 \ + --hash=sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6 + # via + # click + # pytest +coverage==7.13.5 \ + --hash=sha256:012d5319e66e9d5a218834642d6c35d265515a62f01157a45bcc036ecf947256 \ + --hash=sha256:02ca0eed225b2ff301c474aeeeae27d26e2537942aa0f87491d3e147e784a82b \ + --hash=sha256:03ccc709a17a1de074fb1d11f217342fb0d2b1582ed544f554fc9fc3f07e95f5 \ + --hash=sha256:0428cbef5783ad91fe240f673cc1f76b25e74bbfe1a13115e4aa30d3f538162d \ + --hash=sha256:04690832cbea4e4663d9149e05dba142546ca05cb1848816760e7f58285c970a \ + --hash=sha256:0590e44dd2745c696a778f7bab6aa95256de2cbc8b8cff4f7db8ff09813d6969 \ + --hash=sha256:0672854dc733c342fa3e957e0605256d2bf5934feeac328da9e0b5449634a642 \ + --hash=sha256:084b84a8c63e8d6fc7e3931b316a9bcafca1458d753c539db82d31ed20091a87 \ + --hash=sha256:0b67af5492adb31940ee418a5a655c28e48165da5afab8c7fa6fd72a142f8740 \ + --hash=sha256:0cd9ed7a8b181775459296e402ca4fb27db1279740a24e93b3b41942ebe4b215 \ + --hash=sha256:0cef0cdec915d11254a7f549c1170afecce708d30610c6abdded1f74e581666d \ + --hash=sha256:0e223ce4b4ed47f065bfb123687686512e37629be25cc63728557ae7db261422 \ + --hash=sha256:0e3c426ffc4cd952f54ee9ffbdd10345709ecc78a3ecfd796a57236bfad0b9b8 \ + --hash=sha256:0ecf12ecb326fe2c339d93fc131816f3a7367d223db37817208905c89bded911 \ + --hash=sha256:10a0c37f0b646eaff7cce1874c31d1f1ccb297688d4c747291f4f4c70741cc8b \ + --hash=sha256:145ede53ccbafb297c1c9287f788d1bc3efd6c900da23bf6931b09eafc931587 \ + --hash=sha256:1b11eef33edeae9d142f9b4358edb76273b3bfd30bc3df9a4f95d0e49caf94e8 \ + --hash=sha256:1b88c69c8ef5d4b6fe7dea66d6636056a0f6a7527c440e890cf9259011f5e606 \ + --hash=sha256:258354455f4e86e3e9d0d17571d522e13b4e1e19bf0f8596bcf9476d61e7d8a9 \ + --hash=sha256:259b69bb83ad9894c4b25be2528139eecba9a82646ebdda2d9db1ba28424a6bf \ + --hash=sha256:2aa055ae1857258f9e0045be26a6d62bdb47a72448b62d7b55f4820f361a2633 \ + --hash=sha256:2d3807015f138ffea1ed9afeeb8624fd781703f2858b62a8dd8da5a0994c57b6 \ + --hash=sha256:301e3b7dfefecaca37c9f1aa6f0049b7d4ab8dd933742b607765d757aca77d43 \ + --hash=sha256:32ca0c0114c9834a43f045a87dcebd69d108d8ffb666957ea65aa132f50332e2 \ + --hash=sha256:34b02417cf070e173989b3db962f7ed56d2f644307b2cf9d5a0f258e13084a61 \ + --hash=sha256:356e76b46783a98c2a2fe81ec79df4883a1e62895ea952968fb253c114e7f930 \ + --hash=sha256:35a31f2b1578185fbe6aa2e74cea1b1d0bbf4c552774247d9160d29b80ed56cc \ + --hash=sha256:380e8e9084d8eb38db3a9176a1a4f3c0082c3806fa0dc882d1d87abc3c789247 \ + --hash=sha256:3ad050321264c49c2fa67bb599100456fc51d004b82534f379d16445da40fb75 \ + --hash=sha256:3e1bb5f6c78feeb1be3475789b14a0f0a5b47d505bfc7267126ccbd50289999e \ + --hash=sha256:3f4818d065964db3c1c66dc0fbdac5ac692ecbc875555e13374fdbe7eedb4376 \ + --hash=sha256:460cf0114c5016fa841214ff5564aa4864f11948da9440bc97e21ad1f4ba1e01 \ + --hash=sha256:48c39bc4a04d983a54a705a6389512883d4a3b9862991b3617d547940e9f52b1 \ + --hash=sha256:4b59148601efcd2bac8c4dbf1f0ad6391693ccf7a74b8205781751637076aee3 \ + --hash=sha256:4d2afbc5cc54d286bfb54541aa50b64cdb07a718227168c87b9e2fb8f25e1743 \ + --hash=sha256:505d7083c8b0c87a8fa8c07370c285847c1f77739b22e299ad75a6af6c32c5c9 \ + --hash=sha256:52f444e86475992506b32d4e5ca55c24fc88d73bcbda0e9745095b28ef4dc0cf \ + --hash=sha256:5b13955d31d1633cf9376908089b7cebe7d15ddad7aeaabcbe969a595a97e95e \ + --hash=sha256:5ec4af212df513e399cf11610cc27063f1586419e814755ab362e50a85ea69c1 \ + --hash=sha256:60365289c3741e4db327e7baff2a4aaacf22f788e80fa4683393891b70a89fbd \ + --hash=sha256:631efb83f01569670a5e866ceb80fe483e7c159fac6f167e6571522636104a0b \ + --hash=sha256:6697e29b93707167687543480a40f0db8f356e86d9f67ddf2e37e2dfd91a9dab \ + --hash=sha256:66a80c616f80181f4d643b0f9e709d97bcea413ecd9631e1dedc7401c8e6695d \ + --hash=sha256:67e9bc5449801fad0e5dff329499fb090ba4c5800b86805c80617b4e29809b2a \ + --hash=sha256:68a4953be99b17ac3c23b6efbc8a38330d99680c9458927491d18700ef23ded0 \ + --hash=sha256:6c36ddb64ed9d7e496028d1d00dfec3e428e0aabf4006583bb1839958d280510 \ + --hash=sha256:6e3370441f4513c6252bf042b9c36d22491142385049243253c7e48398a15a9f \ + --hash=sha256:7034b5c56a58ae5e85f23949d52c14aca2cfc6848a31764995b7de88f13a1ea0 \ + --hash=sha256:704de6328e3d612a8f6c07000a878ff38181ec3263d5a11da1db294fa6a9bdf8 \ + --hash=sha256:7132bed4bd7b836200c591410ae7d97bf7ae8be6fc87d160b2bd881df929e7bf \ + --hash=sha256:7300c8a6d13335b29bb76d7651c66af6bd8658517c43499f110ddc6717bfc209 \ + --hash=sha256:750db93a81e3e5a9831b534be7b1229df848b2e125a604fe6651e48aa070e5f9 \ + --hash=sha256:777c4d1eff1b67876139d24288aaf1817f6c03d6bae9c5cc8d27b83bcfe38fe3 \ + --hash=sha256:78e696e1cc714e57e8b25760b33a8b1026b7048d270140d25dafe1b0a1ee05a3 \ + --hash=sha256:79060214983769c7ba3f0cee10b54c97609dca4d478fa1aa32b914480fd5738d \ + --hash=sha256:7c8d4bc913dd70b93488d6c496c77f3aff5ea99a07e36a18f865bca55adef8bd \ + --hash=sha256:7f2c47b36fe7709a6e83bfadf4eefb90bd25fbe4014d715224c4316f808e59a2 \ + --hash=sha256:800bc829053c80d240a687ceeb927a94fd108bbdc68dfbe505d0d75ab578a882 \ + --hash=sha256:843ea8643cf967d1ac7e8ecd4bb00c99135adf4816c0c0593fdcc47b597fcf09 \ + --hash=sha256:8769751c10f339021e2638cd354e13adeac54004d1941119b2c96fe5276d45ea \ + --hash=sha256:8dd02af98971bdb956363e4827d34425cb3df19ee550ef92855b0acb9c7ce51c \ + --hash=sha256:8fdf453a942c3e4d99bd80088141c4c6960bb232c409d9c3558e2dbaa3998562 \ + --hash=sha256:941617e518602e2d64942c88ec8499f7fbd49d3f6c4327d3a71d43a1973032f3 \ + --hash=sha256:972a9cd27894afe4bc2b1480107054e062df08e671df7c2f18c205e805ccd806 \ + --hash=sha256:9adb6688e3b53adffefd4a52d72cbd8b02602bfb8f74dcd862337182fd4d1a4e \ + --hash=sha256:9b74db26dfea4f4e50d48a4602207cd1e78be33182bc9cbf22da94f332f99878 \ + --hash=sha256:9bb2a28101a443669a423b665939381084412b81c3f8c0fcfbac57f4e30b5b8e \ + --hash=sha256:9d44d7aa963820b1b971dbecd90bfe5fe8f81cff79787eb6cca15750bd2f79b9 \ + --hash=sha256:9dacc2ad679b292709e0f5fc1ac74a6d4d5562e424058962c7bb0c658ad25e45 \ + --hash=sha256:9ddb4f4a5479f2539644be484da179b653273bca1a323947d48ab107b3ed1f29 \ + --hash=sha256:a1a6d79a14e1ec1832cabc833898636ad5f3754a678ef8bb4908515208bf84f4 \ + --hash=sha256:a698e363641b98843c517817db75373c83254781426e94ada3197cabbc2c919c \ + --hash=sha256:ad14385487393e386e2ea988b09d62dd42c397662ac2dabc3832d71253eee479 \ + --hash=sha256:ad146744ca4fd09b50c482650e3c1b1f4dfa1d4792e0a04a369c7f23336f0400 \ + --hash=sha256:b5db73ba3c41c7008037fa731ad5459fc3944cb7452fc0aa9f822ad3533c583c \ + --hash=sha256:bd3a2fbc1c6cccb3c5106140d87cc6a8715110373ef42b63cf5aea29df8c217a \ + --hash=sha256:bdba0a6b8812e8c7df002d908a9a2ea3c36e92611b5708633c50869e6d922fdf \ + --hash=sha256:be3d4bbad9d4b037791794ddeedd7d64a56f5933a2c1373e18e9e568b9141686 \ + --hash=sha256:bf69236a9a81bdca3bff53796237aab096cdbf8d78a66ad61e992d9dac7eb2de \ + --hash=sha256:bff95879c33ec8da99fc9b6fe345ddb5be6414b41d6d1ad1c8f188d26f36e028 \ + --hash=sha256:c555b48be1853fe3997c11c4bd521cdd9a9612352de01fa4508f16ec341e6fe0 \ + --hash=sha256:c81f6515c4c40141f83f502b07bbfa5c240ba25bbe73da7b33f1e5b6120ff179 \ + --hash=sha256:c9136ff29c3a91e25b1d1552b5308e53a1e0653a23e53b6366d7c2dcbbaf8a16 \ + --hash=sha256:ce1998c0483007608c8382f4ff50164bfc5bd07a2246dd272aa4043b75e61e85 \ + --hash=sha256:cec2d83125531bd153175354055cdb7a09987af08a9430bd173c937c6d0fba2a \ + --hash=sha256:cff784eef7f0b8f6cb28804fbddcfa99f89efe4cc35fb5627e3ac58f91ed3ac0 \ + --hash=sha256:d2c87e0c473a10bffe991502eac389220533024c8082ec1ce849f4218dded810 \ + --hash=sha256:d7cfad2d6d81dd298ab6b89fe72c3b7b05ec7544bdda3b707ddaecff8d25c161 \ + --hash=sha256:d8a7a2049c14f413163e2bdabd37e41179b1d1ccb10ffc6ccc4b7a718429c607 \ + --hash=sha256:da305e9937617ee95c2e39d8ff9f040e0487cbf1ac174f777ed5eddd7a7c1f26 \ + --hash=sha256:da86cdcf10d2519e10cabb8ac2de03da1bcb6e4853790b7fbd48523332e3a819 \ + --hash=sha256:dc022073d063b25a402454e5712ef9e007113e3a676b96c5f29b2bda29352f40 \ + --hash=sha256:e0723d2c96324561b9aa76fb982406e11d93cdb388a7a7da2b16e04719cf7ca5 \ + --hash=sha256:e092b9499de38ae0fbfbc603a74660eb6ff3e869e507b50d85a13b6db9863e15 \ + --hash=sha256:e0b216a19534b2427cc201a26c25da4a48633f29a487c61258643e89d28200c0 \ + --hash=sha256:e1c85e0b6c05c592ea6d8768a66a254bfb3874b53774b12d4c89c481eb78cb90 \ + --hash=sha256:e301d30dd7e95ae068671d746ba8c34e945a82682e62918e41b2679acd2051a0 \ + --hash=sha256:e808af52a0513762df4d945ea164a24b37f2f518cbe97e03deaa0ee66139b4d6 \ + --hash=sha256:eb07647a5738b89baab047f14edd18ded523de60f3b30e75c2acc826f79c839a \ + --hash=sha256:eb7fdf1ef130660e7415e0253a01a7d5a88c9c4d158bcf75cbbd922fd65a5b58 \ + --hash=sha256:ec10e2a42b41c923c2209b846126c6582db5e43a33157e9870ba9fb70dc7854b \ + --hash=sha256:ee2aa19e03161671ec964004fb74b2257805d9710bf14a5c704558b9d8dbaf17 \ + --hash=sha256:f08fd75c50a760c7eb068ae823777268daaf16a80b918fa58eea888f8e3919f5 \ + --hash=sha256:f4cd16206ad171cbc2470dbea9103cf9a7607d5fe8c242fdf1edf36174020664 \ + --hash=sha256:f70c9ab2595c56f81a89620e22899eea8b212a4041bd728ac6f4a28bf5d3ddd0 \ + --hash=sha256:fbabfaceaeb587e16f7008f7795cd80d20ec548dc7f94fbb0d4ec2e038ce563f + # via pytest-cov +iniconfig==2.3.0 \ + --hash=sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730 \ + --hash=sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12 + # via pytest +json5==0.14.0 \ + --hash=sha256:56cf861bab076b1178eb8c92e1311d273a9b9acea2ccc82c276abf839ebaef3a \ + --hash=sha256:b3f492fad9f6cdbced8b7d40b28b9b1c9701c5f561bef0d33b81c2ff433fefcb + # via specify-cli (pyproject.toml) +markdown-it-py==4.0.0 \ + --hash=sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147 \ + --hash=sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3 + # via rich +mdurl==0.1.2 \ + --hash=sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8 \ + --hash=sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba + # via markdown-it-py +packaging==26.2 \ + --hash=sha256:5fc45236b9446107ff2415ce77c807cee2862cb6fac22b8a73826d0693b0980e \ + --hash=sha256:ff452ff5a3e828ce110190feff1178bb1f2ea2281fa2075aadb987c2fb221661 + # via + # specify-cli (pyproject.toml) + # pytest +pathspec==1.1.1 \ + --hash=sha256:17db5ecd524104a120e173814c90367a96a98d07c45b2e10c2f3919fff91bf5a \ + --hash=sha256:a00ce642f577bf7f473932318056212bc4f8bfdf53128c78bbd5af0b9b20b189 + # via specify-cli (pyproject.toml) +platformdirs==4.9.6 \ + --hash=sha256:3bfa75b0ad0db84096ae777218481852c0ebc6c727b3168c1b9e0118e458cf0a \ + --hash=sha256:e61adb1d5e5cb3441b4b7710bea7e4c12250ca49439228cc1021c00dcfac0917 + # via specify-cli (pyproject.toml) +pluggy==1.6.0 \ + --hash=sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3 \ + --hash=sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746 + # via + # pytest + # pytest-cov +pygments==2.20.0 \ + --hash=sha256:6757cd03768053ff99f3039c1a36d6c0aa0b263438fcab17520b30a303a82b5f \ + --hash=sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176 + # via + # pytest + # rich +pytest==9.0.3 \ + --hash=sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9 \ + --hash=sha256:b86ada508af81d19edeb213c681b1d48246c1a91d304c6c81a427674c17eb91c + # via + # specify-cli (pyproject.toml) + # pytest-cov +pytest-cov==7.1.0 \ + --hash=sha256:30674f2b5f6351aa09702a9c8c364f6a01c27aae0c1366ae8016160d1efc56b2 \ + --hash=sha256:a0461110b7865f9a271aa1b51e516c9a95de9d696734a2f71e3e78f46e1d4678 + # via specify-cli (pyproject.toml) +pyyaml==6.0.3 \ + --hash=sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c \ + --hash=sha256:0150219816b6a1fa26fb4699fb7daa9caf09eb1999f3b70fb6e786805e80375a \ + --hash=sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3 \ + --hash=sha256:02ea2dfa234451bbb8772601d7b8e426c2bfa197136796224e50e35a78777956 \ + --hash=sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6 \ + --hash=sha256:10892704fc220243f5305762e276552a0395f7beb4dbf9b14ec8fd43b57f126c \ + --hash=sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65 \ + --hash=sha256:1d37d57ad971609cf3c53ba6a7e365e40660e3be0e5175fa9f2365a379d6095a \ + --hash=sha256:1ebe39cb5fc479422b83de611d14e2c0d3bb2a18bbcb01f229ab3cfbd8fee7a0 \ + --hash=sha256:214ed4befebe12df36bcc8bc2b64b396ca31be9304b8f59e25c11cf94a4c033b \ + --hash=sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1 \ + --hash=sha256:22ba7cfcad58ef3ecddc7ed1db3409af68d023b7f940da23c6c2a1890976eda6 \ + --hash=sha256:27c0abcb4a5dac13684a37f76e701e054692a9b2d3064b70f5e4eb54810553d7 \ + --hash=sha256:28c8d926f98f432f88adc23edf2e6d4921ac26fb084b028c733d01868d19007e \ + --hash=sha256:2e71d11abed7344e42a8849600193d15b6def118602c4c176f748e4583246007 \ + --hash=sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310 \ + --hash=sha256:37503bfbfc9d2c40b344d06b2199cf0e96e97957ab1c1b546fd4f87e53e5d3e4 \ + --hash=sha256:3c5677e12444c15717b902a5798264fa7909e41153cdf9ef7ad571b704a63dd9 \ + --hash=sha256:3ff07ec89bae51176c0549bc4c63aa6202991da2d9a6129d7aef7f1407d3f295 \ + --hash=sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea \ + --hash=sha256:418cf3f2111bc80e0933b2cd8cd04f286338bb88bdc7bc8e6dd775ebde60b5e0 \ + --hash=sha256:44edc647873928551a01e7a563d7452ccdebee747728c1080d881d68af7b997e \ + --hash=sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac \ + --hash=sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9 \ + --hash=sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7 \ + --hash=sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35 \ + --hash=sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb \ + --hash=sha256:5cf4e27da7e3fbed4d6c3d8e797387aaad68102272f8f9752883bc32d61cb87b \ + --hash=sha256:5e0b74767e5f8c593e8c9b5912019159ed0533c70051e9cce3e8b6aa699fcd69 \ + --hash=sha256:5ed875a24292240029e4483f9d4a4b8a1ae08843b9c54f43fcc11e404532a8a5 \ + --hash=sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b \ + --hash=sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c \ + --hash=sha256:6344df0d5755a2c9a276d4473ae6b90647e216ab4757f8426893b5dd2ac3f369 \ + --hash=sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd \ + --hash=sha256:652cb6edd41e718550aad172851962662ff2681490a8a711af6a4d288dd96824 \ + --hash=sha256:66291b10affd76d76f54fad28e22e51719ef9ba22b29e1d7d03d6777a9174198 \ + --hash=sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065 \ + --hash=sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c \ + --hash=sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c \ + --hash=sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764 \ + --hash=sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196 \ + --hash=sha256:8098f252adfa6c80ab48096053f512f2321f0b998f98150cea9bd23d83e1467b \ + --hash=sha256:850774a7879607d3a6f50d36d04f00ee69e7fc816450e5f7e58d7f17f1ae5c00 \ + --hash=sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac \ + --hash=sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8 \ + --hash=sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e \ + --hash=sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28 \ + --hash=sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3 \ + --hash=sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5 \ + --hash=sha256:9c57bb8c96f6d1808c030b1687b9b5fb476abaa47f0db9c0101f5e9f394e97f4 \ + --hash=sha256:9c7708761fccb9397fe64bbc0395abcae8c4bf7b0eac081e12b809bf47700d0b \ + --hash=sha256:9f3bfb4965eb874431221a3ff3fdcddc7e74e3b07799e0e84ca4a0f867d449bf \ + --hash=sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5 \ + --hash=sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702 \ + --hash=sha256:b30236e45cf30d2b8e7b3e85881719e98507abed1011bf463a8fa23e9c3e98a8 \ + --hash=sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788 \ + --hash=sha256:b865addae83924361678b652338317d1bd7e79b1f4596f96b96c77a5a34b34da \ + --hash=sha256:b8bb0864c5a28024fac8a632c443c87c5aa6f215c0b126c449ae1a150412f31d \ + --hash=sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc \ + --hash=sha256:bdb2c67c6c1390b63c6ff89f210c8fd09d9a1217a465701eac7316313c915e4c \ + --hash=sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba \ + --hash=sha256:c2514fceb77bc5e7a2f7adfaa1feb2fb311607c9cb518dbc378688ec73d8292f \ + --hash=sha256:c3355370a2c156cffb25e876646f149d5d68f5e0a3ce86a5084dd0b64a994917 \ + --hash=sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5 \ + --hash=sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26 \ + --hash=sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f \ + --hash=sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b \ + --hash=sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be \ + --hash=sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c \ + --hash=sha256:efd7b85f94a6f21e4932043973a7ba2613b059c4a000551892ac9f1d11f5baf3 \ + --hash=sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6 \ + --hash=sha256:fa160448684b4e94d80416c0fa4aac48967a969efe22931448d853ada8baf926 \ + --hash=sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0 + # via specify-cli (pyproject.toml) +readchar==4.2.2 \ + --hash=sha256:92daf7e42c52b0787e6c75d01ecfb9a94f4ceff3764958b570c1dddedd47b200 \ + --hash=sha256:e3b270fe16fc90c50ac79107700330a133dd4c63d22939f5b03b4f24564d5dd8 + # via specify-cli (pyproject.toml) +rich==15.0.0 \ + --hash=sha256:33bd4ef74232fb73fe9279a257718407f169c09b78a87ad3d296f548e27de0bb \ + --hash=sha256:edd07a4824c6b40189fb7ac9bc4c52536e9780fbbfbddf6f1e2502c31b068c36 + # via + # specify-cli (pyproject.toml) + # typer +shellingham==1.5.4 \ + --hash=sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686 \ + --hash=sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de + # via typer +tomli==2.4.1 ; python_full_version <= '3.11' \ + --hash=sha256:01f520d4f53ef97964a240a035ec2a869fe1a37dde002b57ebc4417a27ccd853 \ + --hash=sha256:0d85819802132122da43cb86656f8d1f8c6587d54ae7dcaf30e90533028b49fe \ + --hash=sha256:136443dbd7e1dee43c68ac2694fde36b2849865fa258d39bf822c10e8068eac5 \ + --hash=sha256:1d8591993e228b0c930c4bb0db464bdad97b3289fb981255d6c9a41aedc84b2d \ + --hash=sha256:2190f2e9dd7508d2a90ded5ed369255980a1bcdd58e52f7fe24b8162bf9fedbd \ + --hash=sha256:2c1c351919aca02858f740c6d33adea0c5deea37f9ecca1cc1ef9e884a619d26 \ + --hash=sha256:36d2bd2ad5fb9eaddba5226aa02c8ec3fa4f192631e347b3ed28186d43be6b54 \ + --hash=sha256:3d48a93ee1c9b79c04bb38772ee1b64dcf18ff43085896ea460ca8dec96f35f6 \ + --hash=sha256:47149d5bd38761ac8be13a84864bf0b7b70bc051806bc3669ab1cbc56216b23c \ + --hash=sha256:4ab97e64ccda8756376892c53a72bd1f964e519c77236368527f758fbc36a53a \ + --hash=sha256:4b605484e43cdc43f0954ddae319fb75f04cc10dd80d830540060ee7cd0243cd \ + --hash=sha256:504aa796fe0569bb43171066009ead363de03675276d2d121ac1a4572397870f \ + --hash=sha256:51529d40e3ca50046d7606fa99ce3956a617f9b36380da3b7f0dd3dd28e68cb5 \ + --hash=sha256:52c8ef851d9a240f11a88c003eacb03c31fc1c9c4ec64a99a0f922b93874fda9 \ + --hash=sha256:559db847dc486944896521f68d8190be1c9e719fced785720d2216fe7022b662 \ + --hash=sha256:5a881ab208c0baf688221f8cecc5401bd291d67e38a1ac884d6736cbcd8247e9 \ + --hash=sha256:5cb41aa38891e073ee49d55fbc7839cfdb2bc0e600add13874d048c94aadddd1 \ + --hash=sha256:5e262d41726bc187e69af7825504c933b6794dc3fbd5945e41a79bb14c31f585 \ + --hash=sha256:5ee18d9ebdb417e384b58fe414e8d6af9f4e7a0ae761519fb50f721de398dd4e \ + --hash=sha256:7008df2e7655c495dd12d2a4ad038ff878d4ca4b81fccaf82b714e07eae4402c \ + --hash=sha256:734e20b57ba95624ecf1841e72b53f6e186355e216e5412de414e3c51e5e3c41 \ + --hash=sha256:7c7e1a961a0b2f2472c1ac5b69affa0ae1132c39adcb67aba98568702b9cc23f \ + --hash=sha256:7f86fd587c4ed9dd76f318225e7d9b29cfc5a9d43de44e5754db8d1128487085 \ + --hash=sha256:7f94b27a62cfad8496c8d2513e1a222dd446f095fca8987fceef261225538a15 \ + --hash=sha256:88dceee75c2c63af144e456745e10101eb67361050196b0b6af5d717254dddf7 \ + --hash=sha256:8a650c2dbafa08d42e51ba0b62740dae4ecb9338eefa093aa5c78ceb546fcd5c \ + --hash=sha256:8d65a2fbf9d2f8352685bc1364177ee3923d6baf5e7f43ea4959d7d8bc326a36 \ + --hash=sha256:96481a5786729fd470164b47cdb3e0e58062a496f455ee41b4403be77cb5a076 \ + --hash=sha256:a120733b01c45e9a0c34aeef92bf0cf1d56cfe81ed9d47d562f9ed591a9828ac \ + --hash=sha256:b1d22e6e9387bf4739fbe23bfa80e93f6b0373a7f1b96c6227c32bef95a4d7a8 \ + --hash=sha256:b8c198f8c1805dc42708689ed6864951fd2494f924149d3e4bce7710f8eb5232 \ + --hash=sha256:c2541745709bad0264b7d4705ad453b76ccd191e64aa6f0fc66b69a293a45ece \ + --hash=sha256:c742f741d58a28940ce01d58f0ab2ea3ced8b12402f162f4d534dfe18ba1cd6a \ + --hash=sha256:c7f2c7f2b9ca6bdeef8f0fa897f8e05085923eb091721675170254cbc5b02897 \ + --hash=sha256:d312ef37c91508b0ab2cee7da26ec0b3ed2f03ce12bd87a588d771ae15dcf82d \ + --hash=sha256:d4d8fe59808a54658fcc0160ecfb1b30f9089906c50b23bcb4c69eddc19ec2b4 \ + --hash=sha256:da25dc3563bff5965356133435b757a795a17b17d01dbc0f42fb32447ddfd917 \ + --hash=sha256:eab21f45c7f66c13f2a9e0e1535309cee140182a9cdae1e041d02e47291e8396 \ + --hash=sha256:eb0dc4e38e6a1fd579e5d50369aa2e10acfc9cace504579b2faabb478e76941a \ + --hash=sha256:ec9bfaf3ad2df51ace80688143a6a4ebc09a248f6ff781a9945e51937008fcbc \ + --hash=sha256:ede3e6487c5ef5d28634ba3f31f989030ad6af71edfb0055cbbd14189ff240ba \ + --hash=sha256:f3c6818a1a86dd6dca7ddcaaf76947d5ba31aecc28cb1b67009a5877c9a64f3f \ + --hash=sha256:f758f1b9299d059cc3f6546ae2af89670cb1c4d48ea29c3cacc4fe7de3058257 \ + --hash=sha256:f8f0fc26ec2cc2b965b7a3b87cd19c5c6b8c5e5f436b984e85f486d652285c30 \ + --hash=sha256:fd0409a3653af6c147209d267a0e4243f0ae46b011aa978b1080359fddc9b6cf \ + --hash=sha256:ff18e6a727ee0ab0388507b89d1bc6a22b138d1e2fa56d1ad494586d61d2eae9 \ + --hash=sha256:ff2983983d34813c1aeb0fa89091e76c3a22889ee83ab27c5eeb45100560c049 + # via coverage +typer==0.25.1 \ + --hash=sha256:75caa44ed46a03fb2dab8808753ffacdbfea88495e74c85a28c5eefcf5f39c89 \ + --hash=sha256:9616eb8853a09ffeabab1698952f33c6f29ffdbceb4eaeecf571880e8d7664cc + # via specify-cli (pyproject.toml) diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml index fced205987..f09e5812a5 100644 --- a/.github/workflows/security.yml +++ b/.github/workflows/security.yml @@ -32,10 +32,18 @@ jobs: with: python-version: ${{ matrix.python-version }} - - name: Run pip-audit + - name: Compile scheduled audit requirements + if: ${{ github.event_name == 'schedule' }} run: | uv pip compile pyproject.toml --extra test --python-version "${{ matrix.python-version }}" --generate-hashes --quiet --output-file "${{ runner.temp }}/spec-kit-audit-requirements.txt" - uvx --from pip-audit==2.10.0 pip-audit --disable-pip --require-hashes -r "${{ runner.temp }}/spec-kit-audit-requirements.txt" --progress-spinner off + + - name: Run pip-audit (scheduled live resolution) + if: ${{ github.event_name == 'schedule' }} + run: uvx --from pip-audit==2.10.0 pip-audit --disable-pip --require-hashes -r "${{ runner.temp }}/spec-kit-audit-requirements.txt" --progress-spinner off + + - name: Run pip-audit (committed requirements) + if: ${{ github.event_name != 'schedule' }} + run: uvx --from pip-audit==2.10.0 pip-audit --disable-pip --require-hashes -r .github/security-audit-requirements.txt --progress-spinner off static-analysis: name: Static analysis diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f9f87b4d0e..35bfa35834 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -84,12 +84,15 @@ Run this when you change agent metadata, context update scripts, or integration #### Security checks ```bash -uv pip compile pyproject.toml --extra test --generate-hashes --quiet --output-file spec-kit-audit-requirements.txt -uvx --from pip-audit==2.10.0 pip-audit --disable-pip --require-hashes -r spec-kit-audit-requirements.txt --progress-spinner off +uvx --from pip-audit==2.10.0 pip-audit --disable-pip --require-hashes -r .github/security-audit-requirements.txt --progress-spinner off uvx --from bandit==1.9.4 bandit -r src -lll --baseline .github/bandit-baseline.json ``` -Run these before changing dependency metadata, workflow execution code, subprocess usage, or security-sensitive paths. The dependency audit resolves the runtime and `test` extra dependency set used by CI and contributors. CI runs the dependency audit across the supported Python and OS matrix; locally, run these commands from the environment you want to reproduce. +Run these before changing dependency metadata, workflow execution code, subprocess usage, or security-sensitive paths. Pull request, push, and manual CI audits use the committed hashed requirements file so they stay deterministic. The scheduled CI audit also resolves the runtime and `test` extra dependency set across the supported Python and OS matrix to catch newly published advisories. If dependency metadata changes, refresh the committed audit input before running pip-audit: + +```bash +uv pip compile pyproject.toml --extra test --universal --generate-hashes --quiet --output-file .github/security-audit-requirements.txt +``` ### Manual testing diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index b386501b50..9f7e5ad092 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -396,14 +396,8 @@ def callback( console.print(Align.center("[dim]Run 'specify --help' for usage information[/dim]")) console.print() -def run_command(cmd: list[str], check_return: bool = True, capture: bool = False, shell: bool = False) -> Optional[str]: +def run_command(cmd: list[str], check_return: bool = True, capture: bool = False) -> Optional[str]: """Run a command without invoking a shell and optionally capture output.""" - if shell: - raise ValueError( - "run_command does not support shell=True; use a reviewed " - "subprocess.run call for shell-specific behavior." - ) - try: if capture: result = subprocess.run( diff --git a/tests/test_security_workflow.py b/tests/test_security_workflow.py index 0d4c90f807..60152a91db 100644 --- a/tests/test_security_workflow.py +++ b/tests/test_security_workflow.py @@ -2,11 +2,11 @@ from __future__ import annotations +import inspect import json import re from pathlib import Path -import pytest import yaml @@ -14,25 +14,26 @@ SECURITY_WORKFLOW = REPO_ROOT / ".github" / "workflows" / "security.yml" CONTRIBUTING = REPO_ROOT / "CONTRIBUTING.md" BANDIT_BASELINE = REPO_ROOT / ".github" / "bandit-baseline.json" +SECURITY_REQUIREMENTS = REPO_ROOT / ".github" / "security-audit-requirements.txt" -WORKFLOW_AUDIT_REQUIREMENTS = '"${{ runner.temp }}/spec-kit-audit-requirements.txt"' -LOCAL_AUDIT_REQUIREMENTS = "spec-kit-audit-requirements.txt" -WORKFLOW_COMPILE_TEST_EXTRA_DEPS = ( +WORKFLOW_LIVE_AUDIT_REQUIREMENTS = '"${{ runner.temp }}/spec-kit-audit-requirements.txt"' +COMMITTED_AUDIT_REQUIREMENTS = ".github/security-audit-requirements.txt" +WORKFLOW_COMPILE_SCHEDULED_TEST_EXTRA_DEPS = ( "uv pip compile pyproject.toml --extra test " '--python-version "${{ matrix.python-version }}" --generate-hashes --quiet ' - f"--output-file {WORKFLOW_AUDIT_REQUIREMENTS}" + f"--output-file {WORKFLOW_LIVE_AUDIT_REQUIREMENTS}" ) -LOCAL_COMPILE_TEST_EXTRA_DEPS = ( - "uv pip compile pyproject.toml --extra test --generate-hashes --quiet " - f"--output-file {LOCAL_AUDIT_REQUIREMENTS}" +LOCAL_REFRESH_TEST_EXTRA_DEPS = ( + "uv pip compile pyproject.toml --extra test --universal --generate-hashes " + f"--quiet --output-file {COMMITTED_AUDIT_REQUIREMENTS}" ) -WORKFLOW_PIP_AUDIT = ( +WORKFLOW_LIVE_PIP_AUDIT = ( "uvx --from pip-audit==2.10.0 pip-audit --disable-pip --require-hashes " - f"-r {WORKFLOW_AUDIT_REQUIREMENTS} --progress-spinner off" + f"-r {WORKFLOW_LIVE_AUDIT_REQUIREMENTS} --progress-spinner off" ) LOCAL_PIP_AUDIT = ( "uvx --from pip-audit==2.10.0 pip-audit --disable-pip --require-hashes " - f"-r {LOCAL_AUDIT_REQUIREMENTS} --progress-spinner off" + f"-r {COMMITTED_AUDIT_REQUIREMENTS} --progress-spinner off" ) BANDIT = ( "uvx --from bandit==1.9.4 bandit -r src -lll " @@ -44,32 +45,70 @@ def _load_security_workflow() -> dict: return yaml.safe_load(SECURITY_WORKFLOW.read_text(encoding="utf-8")) -def _step_run(job_name: str, step_name: str) -> str: +def _workflow_triggers() -> dict: + workflow = _load_security_workflow() + return workflow.get("on") or workflow[True] + + +def _step(job_name: str, step_name: str) -> dict: workflow = _load_security_workflow() for step in workflow["jobs"][job_name]["steps"]: if step.get("name") == step_name: - return step["run"] + return step raise AssertionError(f"Step {step_name!r} not found in job {job_name!r}.") +def _step_run(job_name: str, step_name: str) -> str: + return _step(job_name, step_name)["run"] + + class TestSecurityWorkflow: """Guard the security workflow against review-feedback regressions.""" - def test_dependency_audit_compiles_test_extra_requirements(self): - run = _step_run("dependency-audit", "Run pip-audit") - - assert WORKFLOW_COMPILE_TEST_EXTRA_DEPS in run - assert WORKFLOW_PIP_AUDIT in run - assert "--generate-hashes" in run - assert "--require-hashes" in run - assert "--disable-pip" in run - assert "${{ runner.temp }}" in run - assert "uv export" not in run - assert "--frozen" not in run - assert "--locked" not in run - assert "uv.lock" not in run - assert "/tmp/" not in run - assert "uvx pip-audit ." not in run + def test_dependency_audit_uses_committed_requirements_for_prs_and_pushes(self): + scheduled_compile = _step( + "dependency-audit", + "Compile scheduled audit requirements", + ) + scheduled_audit = _step( + "dependency-audit", + "Run pip-audit (scheduled live resolution)", + ) + committed_audit = _step( + "dependency-audit", + "Run pip-audit (committed requirements)", + ) + + assert scheduled_compile["if"] == "${{ github.event_name == 'schedule' }}" + assert WORKFLOW_COMPILE_SCHEDULED_TEST_EXTRA_DEPS in scheduled_compile["run"] + assert scheduled_audit["if"] == "${{ github.event_name == 'schedule' }}" + assert scheduled_audit["run"] == WORKFLOW_LIVE_PIP_AUDIT + assert committed_audit["if"] == "${{ github.event_name != 'schedule' }}" + assert committed_audit["run"] == LOCAL_PIP_AUDIT + + dependency_job_text = "\n".join( + step.get("run", "") + for step in _load_security_workflow()["jobs"]["dependency-audit"]["steps"] + ) + assert "--generate-hashes" in dependency_job_text + assert "--require-hashes" in dependency_job_text + assert "--disable-pip" in dependency_job_text + assert WORKFLOW_LIVE_AUDIT_REQUIREMENTS in dependency_job_text + assert COMMITTED_AUDIT_REQUIREMENTS in dependency_job_text + assert "uv export" not in dependency_job_text + assert "--frozen" not in dependency_job_text + assert "--locked" not in dependency_job_text + assert "uv.lock" not in dependency_job_text + assert "/tmp/" not in dependency_job_text + assert "uvx pip-audit ." not in dependency_job_text + + def test_security_workflow_triggers_are_preserved(self): + triggers = _workflow_triggers() + + assert triggers["push"]["branches"] == ["main"] + assert triggers["pull_request"] is None + assert triggers["workflow_dispatch"] is None + assert triggers["schedule"] == [{"cron": "17 4 * * 1"}] def test_dependency_audit_runs_supported_python_os_matrix(self): workflow = _load_security_workflow() @@ -82,7 +121,8 @@ def test_dependency_audit_runs_supported_python_os_matrix(self): def test_security_tools_are_pinned(self): workflow_text = SECURITY_WORKFLOW.read_text(encoding="utf-8") - assert WORKFLOW_PIP_AUDIT in workflow_text + assert WORKFLOW_LIVE_PIP_AUDIT in workflow_text + assert LOCAL_PIP_AUDIT in workflow_text assert BANDIT in workflow_text assert re.search(r"\buvx\s+pip-audit\b", workflow_text) is None assert re.search(r"\buvx\s+bandit\b", workflow_text) is None @@ -133,19 +173,36 @@ def test_bandit_nosec_is_not_suppressed_in_source(self): assert nosec_lines == [] - def test_run_command_rejects_shell_true(self): + def test_run_command_does_not_accept_shell_argument(self): from specify_cli import run_command - with pytest.raises(ValueError, match="shell=True"): - run_command(["echo", "hello"], shell=True) + assert "shell" not in inspect.signature(run_command).parameters + + def test_committed_audit_requirements_are_hashed(self): + requirements = SECURITY_REQUIREMENTS.read_text(encoding="utf-8") + + assert "--hash=sha256:" in requirements + assert "pytest==" in requirements + assert "pytest-cov==" in requirements def test_contributing_documents_security_commands(self): contributing_text = CONTRIBUTING.read_text(encoding="utf-8") - assert LOCAL_COMPILE_TEST_EXTRA_DEPS in contributing_text + assert LOCAL_REFRESH_TEST_EXTRA_DEPS in contributing_text assert LOCAL_PIP_AUDIT in contributing_text assert BANDIT in contributing_text assert "/tmp/" not in contributing_text assert "uv export" not in contributing_text assert "--frozen" not in contributing_text assert "--locked" not in contributing_text + assert ( + re.search( + r"--output-file\s+spec-kit-audit-requirements\.txt\b", + contributing_text, + ) + is None + ) + assert ( + re.search(r"-r\s+spec-kit-audit-requirements\.txt\b", contributing_text) + is None + ) From 1f85b929355bf831ef5d2014c52d9b8eb1b58426 Mon Sep 17 00:00:00 2001 From: Pascal Date: Wed, 6 May 2026 00:05:35 +0200 Subject: [PATCH 08/30] Harden security-sensitive repository surfaces --- .github/workflows/codeql.yml | 6 +- .github/workflows/docs.yml | 11 +- .github/workflows/lint.yml | 2 +- .github/workflows/release-trigger.yml | 2 +- .github/workflows/release.yml | 3 +- .github/workflows/stale.yml | 2 +- .github/workflows/test.yml | 8 +- src/specify_cli/_download_security.py | 173 +++++++++++++++++++++++++ src/specify_cli/_github_http.py | 18 ++- src/specify_cli/agents.py | 14 +- src/specify_cli/extensions.py | 57 +++++--- src/specify_cli/presets.py | 48 ++++--- src/specify_cli/workflows/catalog.py | 18 ++- tests/test_download_security.py | 82 ++++++++++++ tests/test_extensions.py | 111 +++++++++++++++- tests/test_github_workflows.py | 32 +++++ tests/test_presets.py | 81 ++++++++++++ tests/test_registrar_path_traversal.py | 25 ++++ 18 files changed, 629 insertions(+), 64 deletions(-) create mode 100644 src/specify_cli/_download_security.py create mode 100644 tests/test_download_security.py create mode 100644 tests/test_github_workflows.py diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 01e0df4a51..1af463c718 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -19,14 +19,14 @@ jobs: language: [ 'actions', 'python' ] steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - name: Initialize CodeQL - uses: github/codeql-action/init@v4 + uses: github/codeql-action/init@e46ed2cbd01164d986452f91f178727624ae40d7 # v4 with: languages: ${{ matrix.language }} - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v4 + uses: github/codeql-action/analyze@e46ed2cbd01164d986452f91f178727624ae40d7 # v4 with: category: "/language:${{ matrix.language }}" diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 6fe87ddce2..9cb48f8f38 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -30,12 +30,12 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v6 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 with: fetch-depth: 0 # Fetch all history for git info - name: Setup .NET - uses: actions/setup-dotnet@v4 + uses: actions/setup-dotnet@67a3573c9a986a3f9c594539f4ab511d57bb3ce9 # v4 with: dotnet-version: '8.x' @@ -48,10 +48,10 @@ jobs: docfx docfx.json - name: Setup Pages - uses: actions/configure-pages@v6 + uses: actions/configure-pages@45bfe0192ca1faeb007ade9deae92b16b8254a0d # v6 - name: Upload artifact - uses: actions/upload-pages-artifact@v5 + uses: actions/upload-pages-artifact@fc324d3547104276b827a68afc52ff2a11cc49c9 # v5 with: path: 'docs/_site' @@ -66,5 +66,4 @@ jobs: steps: - name: Deploy to GitHub Pages id: deployment - uses: actions/deploy-pages@v5 - + uses: actions/deploy-pages@cd2ce8fcbc39b97be8ca5fce6e763baed58fa128 # v5 diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 8b11ccdfff..3b2ad70bfb 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v6 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 - name: Run markdownlint-cli2 uses: DavidAnson/markdownlint-cli2-action@6b51ade7a9e4a75a7ad929842dd298a3804ebe8b # v23 diff --git a/.github/workflows/release-trigger.yml b/.github/workflows/release-trigger.yml index a451accfe6..c3728e2363 100644 --- a/.github/workflows/release-trigger.yml +++ b/.github/workflows/release-trigger.yml @@ -16,7 +16,7 @@ jobs: pull-requests: write steps: - name: Checkout repository - uses: actions/checkout@v6 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 with: fetch-depth: 0 token: ${{ secrets.RELEASE_PAT }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 7b903cf979..9437bd02e7 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -12,7 +12,7 @@ jobs: contents: write steps: - name: Checkout repository - uses: actions/checkout@v6 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 with: fetch-depth: 0 token: ${{ secrets.GITHUB_TOKEN }} @@ -86,4 +86,3 @@ jobs: --notes-file release_notes.md env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index 076d05336a..919add00f0 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -14,7 +14,7 @@ jobs: stale: runs-on: ubuntu-latest steps: - - uses: actions/stale@v10 + - uses: actions/stale@b5d41d4e1d5dceea10e7104786b73624c18a190f # v10 with: # Days of inactivity before an issue or PR becomes stale days-before-stale: 150 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 7354dd8e28..f7130aa8d1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -13,13 +13,13 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - name: Install uv uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0 - name: Set up Python - uses: actions/setup-python@v6 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6 with: python-version: "3.13" @@ -34,13 +34,13 @@ jobs: python-version: ["3.11", "3.12", "3.13"] steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - name: Install uv uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v6 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6 with: python-version: ${{ matrix.python-version }} diff --git a/src/specify_cli/_download_security.py b/src/specify_cli/_download_security.py new file mode 100644 index 0000000000..37f92749d4 --- /dev/null +++ b/src/specify_cli/_download_security.py @@ -0,0 +1,173 @@ +"""Helpers for bounded downloads and archive extraction.""" + +from __future__ import annotations + +import hashlib +import re +import stat +import zipfile +from pathlib import Path, PurePosixPath +from typing import TypeVar + + +ErrorT = TypeVar("ErrorT", bound=Exception) + +MAX_DOWNLOAD_BYTES = 50 * 1024 * 1024 +MAX_ZIP_ENTRIES = 512 +MAX_ZIP_MEMBER_BYTES = 10 * 1024 * 1024 +MAX_ZIP_TOTAL_BYTES = 50 * 1024 * 1024 +READ_CHUNK_SIZE = 1024 * 1024 +SHA256_RE = re.compile(r"^[0-9a-fA-F]{64}$") + + +def _raise(error_type: type[ErrorT], message: str) -> None: + raise error_type(message) + + +def read_response_limited( + response, + *, + max_bytes: int = MAX_DOWNLOAD_BYTES, + error_type: type[ErrorT] = ValueError, + label: str = "download", +) -> bytes: + """Read at most *max_bytes* from a response object.""" + data = response.read(max_bytes + 1) + if len(data) > max_bytes: + _raise(error_type, f"{label} exceeds maximum size of {max_bytes} bytes") + return data + + +def normalize_sha256(value: object, *, error_type: type[ErrorT] = ValueError) -> str | None: + """Normalize an optional sha256/sha256: checksum value.""" + if value is None: + return None + if not isinstance(value, str): + _raise(error_type, "sha256 checksum must be a string") + + checksum = value.strip() + if checksum.startswith("sha256:"): + checksum = checksum[len("sha256:") :] + if not SHA256_RE.fullmatch(checksum): + _raise(error_type, "sha256 checksum must be 64 hexadecimal characters") + return checksum.lower() + + +def verify_sha256( + data: bytes, + expected: object, + *, + error_type: type[ErrorT] = ValueError, + label: str = "download", +) -> None: + """Verify *data* against an optional sha256 checksum.""" + checksum = normalize_sha256(expected, error_type=error_type) + if checksum is None: + return + + actual = hashlib.sha256(data).hexdigest() + if actual != checksum: + _raise( + error_type, + f"{label} checksum mismatch: expected sha256:{checksum}, got sha256:{actual}", + ) + + +def _safe_zip_name(name: str, *, error_type: type[ErrorT]) -> str: + """Return a normalized ZIP member name or raise on traversal.""" + if "\x00" in name: + _raise(error_type, f"Unsafe path in ZIP archive: {name!r}") + + normalized = name.replace("\\", "/") + path = PurePosixPath(normalized) + has_windows_drive = re.match(r"^[A-Za-z]:/", normalized) is not None + if ( + not path.parts + or path.is_absolute() + or has_windows_drive + or any(part == ".." for part in path.parts) + ): + _raise( + error_type, + f"Unsafe path in ZIP archive: {name} (potential path traversal)", + ) + return normalized + + +def safe_extract_zip( + zip_path: Path, + target_dir: Path, + *, + error_type: type[ErrorT] = ValueError, + max_entries: int = MAX_ZIP_ENTRIES, + max_member_bytes: int = MAX_ZIP_MEMBER_BYTES, + max_total_bytes: int = MAX_ZIP_TOTAL_BYTES, +) -> None: + """Extract a ZIP archive after path, symlink, and size validation.""" + target_root = target_dir.resolve() + + with zipfile.ZipFile(zip_path, "r") as zf: + members = zf.infolist() + if len(members) > max_entries: + _raise( + error_type, + f"ZIP archive contains too many entries ({len(members)} > {max_entries})", + ) + + normalized_members: list[tuple[zipfile.ZipInfo, str]] = [] + total_size = 0 + for member in members: + normalized_name = _safe_zip_name(member.filename, error_type=error_type) + + mode = member.external_attr >> 16 + if stat.S_ISLNK(mode): + _raise(error_type, f"Unsafe symlink in ZIP archive: {member.filename}") + + member_path = (target_dir / normalized_name).resolve() + try: + member_path.relative_to(target_root) + except ValueError: + _raise( + error_type, + f"Unsafe path in ZIP archive: {member.filename} " + "(potential path traversal)", + ) + + if not member.is_dir(): + if member.file_size > max_member_bytes: + _raise( + error_type, + f"ZIP member {member.filename} exceeds maximum size " + f"of {max_member_bytes} bytes", + ) + total_size += member.file_size + if total_size > max_total_bytes: + _raise( + error_type, + f"ZIP archive exceeds maximum uncompressed size " + f"of {max_total_bytes} bytes", + ) + + normalized_members.append((member, normalized_name)) + + for member, normalized_name in normalized_members: + member_path = target_dir / normalized_name + if member.is_dir(): + member_path.mkdir(parents=True, exist_ok=True) + continue + + member_path.parent.mkdir(parents=True, exist_ok=True) + written = 0 + with zf.open(member, "r") as source, member_path.open("wb") as dest: + while True: + chunk = source.read(READ_CHUNK_SIZE) + if not chunk: + break + written += len(chunk) + if written > max_member_bytes: + _raise( + error_type, + f"ZIP member {member.filename} exceeds maximum size " + f"of {max_member_bytes} bytes", + ) + dest.write(chunk) diff --git a/src/specify_cli/_github_http.py b/src/specify_cli/_github_http.py index ee68a8325c..f86b48c657 100644 --- a/src/specify_cli/_github_http.py +++ b/src/specify_cli/_github_http.py @@ -7,6 +7,7 @@ """ import os +import urllib.error import urllib.request from urllib.parse import urlparse from typing import Dict @@ -41,6 +42,12 @@ def build_github_request(url: str) -> urllib.request.Request: return urllib.request.Request(url, headers=headers) +def _is_https_or_localhost_http(url: str) -> bool: + parsed = urlparse(url) + is_localhost = parsed.hostname in ("localhost", "127.0.0.1", "::1") + return parsed.scheme == "https" or (parsed.scheme == "http" and is_localhost) + + class _StripAuthOnRedirect(urllib.request.HTTPRedirectHandler): """Redirect handler that drops the Authorization header when leaving GitHub. @@ -50,6 +57,11 @@ class _StripAuthOnRedirect(urllib.request.HTTPRedirectHandler): """ def redirect_request(self, req, fp, code, msg, headers, newurl): + if not _is_https_or_localhost_http(newurl): + raise urllib.error.URLError( + f"Refusing unsafe redirect to non-HTTPS URL: {newurl}" + ) + original_auth = req.get_header("Authorization") new_req = super().redirect_request(req, fp, code, msg, headers, newurl) if new_req is not None: @@ -63,17 +75,19 @@ def redirect_request(self, req, fp, code, msg, headers, newurl): return new_req -def open_github_url(url: str, timeout: int = 10): +def open_github_url(url: str, timeout: int = 10, *, strict_redirects: bool = False): """Open a URL with GitHub auth, stripping the header on cross-host redirects. When the request carries an Authorization header, a custom redirect handler drops that header if the redirect target is not a GitHub-owned domain, preventing token leakage to CDNs or other third-party hosts that GitHub may redirect to (e.g. S3 for release asset downloads). + When strict_redirects is true, the same redirect handler is used even + without auth so HTTPS downloads cannot silently downgrade to HTTP. """ req = build_github_request(url) - if not req.get_header("Authorization"): + if not req.get_header("Authorization") and not strict_redirects: return urllib.request.urlopen(req, timeout=timeout) opener = urllib.request.build_opener(_StripAuthOnRedirect) diff --git a/src/specify_cli/agents.py b/src/specify_cli/agents.py index 726b0fd2a6..da16e33691 100644 --- a/src/specify_cli/agents.py +++ b/src/specify_cli/agents.py @@ -461,8 +461,20 @@ def register_commands( for cmd_info in commands: cmd_name = cmd_info["name"] cmd_file = cmd_info["file"] + if not isinstance(cmd_file, str) or not cmd_file.strip(): + raise ValueError( + f"Command source file for {cmd_name!r} must be a non-empty string" + ) - source_file = source_dir / cmd_file + try: + source_root = source_dir.resolve() + source_file = (source_root / cmd_file).resolve() + source_file.relative_to(source_root) + except (OSError, ValueError): + raise ValueError( + f"Command source file {cmd_file!r} escapes directory " + f"{source_dir!r}" + ) from None if not source_file.exists(): continue diff --git a/src/specify_cli/extensions.py b/src/specify_cli/extensions.py index 81687b4186..3dae2646d3 100644 --- a/src/specify_cli/extensions.py +++ b/src/specify_cli/extensions.py @@ -10,11 +10,10 @@ import hashlib import os import tempfile -import zipfile import shutil import copy from dataclasses import dataclass -from pathlib import Path +from pathlib import Path, PurePosixPath from typing import Optional, Dict, List, Any, Callable, Set from datetime import datetime, timezone import re @@ -25,6 +24,12 @@ from packaging import version as pkg_version from packaging.specifiers import SpecifierSet, InvalidSpecifier +from ._download_security import ( + read_response_limited, + safe_extract_zip, + verify_sha256, +) + _FALLBACK_CORE_COMMAND_NAMES = frozenset({ "analyze", "checklist", @@ -238,6 +243,24 @@ def _validate(self): ) if "name" not in cmd or "file" not in cmd: raise ValidationError("Command missing 'name' or 'file'") + if not isinstance(cmd["file"], str) or not cmd["file"].strip(): + raise ValidationError( + f"Command '{cmd['name']}' file must be a non-empty string" + ) + + normalized_file = cmd["file"].replace("\\", "/") + file_path = PurePosixPath(normalized_file) + has_windows_drive = re.match(r"^[A-Za-z]:/", normalized_file) is not None + if ( + file_path.is_absolute() + or has_windows_drive + or any(part == ".." for part in file_path.parts) + ): + raise ValidationError( + f"Invalid command file path '{cmd['file']}': " + "must be a relative path within the extension directory" + ) + cmd["file"] = normalized_file # Validate command name format if not EXTENSION_COMMAND_NAME_PATTERN.match(cmd["name"]): @@ -1234,21 +1257,7 @@ def install_from_zip( with tempfile.TemporaryDirectory() as tmpdir: temp_path = Path(tmpdir) - # Extract ZIP safely (prevent Zip Slip attack) - with zipfile.ZipFile(zip_path, 'r') as zf: - # Validate all paths first before extracting anything - temp_path_resolved = temp_path.resolve() - for member in zf.namelist(): - member_path = (temp_path / member).resolve() - # Use is_relative_to for safe path containment check - try: - member_path.relative_to(temp_path_resolved) - except ValueError: - raise ValidationError( - f"Unsafe path in ZIP archive: {member} (potential path traversal)" - ) - # Only extract after all paths are validated - zf.extractall(temp_path) + safe_extract_zip(zip_path, temp_path, error_type=ValidationError) # Find extension directory (may be nested) extension_dir = temp_path @@ -1720,7 +1729,7 @@ def _open_url(self, url: str, timeout: int = 10): Delegates to :func:`specify_cli._github_http.open_github_url`. """ from specify_cli._github_http import open_github_url - return open_github_url(url, timeout) + return open_github_url(url, timeout, strict_redirects=True) def _load_catalog_config(self, config_path: Path) -> Optional[List[CatalogEntry]]: """Load catalog stack configuration from a YAML file. @@ -2178,8 +2187,18 @@ def download_extension(self, extension_id: str, target_dir: Optional[Path] = Non # Download the ZIP file try: with self._open_url(download_url, timeout=60) as response: - zip_data = response.read() + zip_data = read_response_limited( + response, + error_type=ExtensionError, + label=f"extension '{extension_id}' download", + ) + verify_sha256( + zip_data, + ext_info.get("sha256"), + error_type=ExtensionError, + label=f"extension '{extension_id}' download", + ) zip_path.write_bytes(zip_data) return zip_path diff --git a/src/specify_cli/presets.py b/src/specify_cli/presets.py index 690d1c51ff..7617a4e9a7 100644 --- a/src/specify_cli/presets.py +++ b/src/specify_cli/presets.py @@ -12,10 +12,9 @@ import hashlib import os import tempfile -import zipfile import shutil from dataclasses import dataclass -from pathlib import Path +from pathlib import Path, PurePosixPath from typing import TYPE_CHECKING, Optional, Dict, List, Any if TYPE_CHECKING: @@ -27,6 +26,11 @@ from packaging import version as pkg_version from packaging.specifiers import SpecifierSet, InvalidSpecifier +from ._download_security import ( + read_response_limited, + safe_extract_zip, + verify_sha256, +) from .extensions import REINSTALL_COMMAND, ExtensionRegistry, normalize_priority @@ -216,12 +220,21 @@ def _validate(self): # Validate file path safety: must be relative, no parent traversal file_path = tmpl["file"] - normalized = os.path.normpath(file_path) - if os.path.isabs(normalized) or normalized.startswith(".."): + if not isinstance(file_path, str) or not file_path.strip(): + raise PresetValidationError( + "Invalid template file path: must be a non-empty string" + ) + normalized = file_path.replace("\\", "/") + normalized_path = PurePosixPath(normalized) + has_windows_drive = re.match(r"^[A-Za-z]:/", normalized) is not None + if normalized_path.is_absolute() or any( + part == ".." for part in normalized_path.parts + ) or has_windows_drive: raise PresetValidationError( f"Invalid template file path '{file_path}': " "must be a relative path within the preset directory" ) + tmpl["file"] = normalized # Validate strategy field (optional, defaults to "replace") strategy = tmpl.get("strategy", "replace") @@ -1625,18 +1638,7 @@ def install_from_zip( with tempfile.TemporaryDirectory() as tmpdir: temp_path = Path(tmpdir) - with zipfile.ZipFile(zip_path, 'r') as zf: - temp_path_resolved = temp_path.resolve() - for member in zf.namelist(): - member_path = (temp_path / member).resolve() - try: - member_path.relative_to(temp_path_resolved) - except ValueError: - raise PresetValidationError( - f"Unsafe path in ZIP archive: {member} " - "(potential path traversal)" - ) - zf.extractall(temp_path) + safe_extract_zip(zip_path, temp_path, error_type=PresetValidationError) pack_dir = temp_path manifest_path = pack_dir / "preset.yml" @@ -1858,7 +1860,7 @@ def _open_url(self, url: str, timeout: int = 10): Delegates to :func:`specify_cli._github_http.open_github_url`. """ from specify_cli._github_http import open_github_url - return open_github_url(url, timeout) + return open_github_url(url, timeout, strict_redirects=True) def _load_catalog_config(self, config_path: Path) -> Optional[List[PresetCatalogEntry]]: """Load catalog stack configuration from a YAML file. @@ -2306,8 +2308,18 @@ def download_pack( try: with self._open_url(download_url, timeout=60) as response: - zip_data = response.read() + zip_data = read_response_limited( + response, + error_type=PresetError, + label=f"preset '{pack_id}' download", + ) + verify_sha256( + zip_data, + pack_info.get("sha256"), + error_type=PresetError, + label=f"preset '{pack_id}' download", + ) zip_path.write_bytes(zip_data) return zip_path diff --git a/src/specify_cli/workflows/catalog.py b/src/specify_cli/workflows/catalog.py index da5c60b5c8..21c121715c 100644 --- a/src/specify_cli/workflows/catalog.py +++ b/src/specify_cli/workflows/catalog.py @@ -19,6 +19,9 @@ import yaml +from specify_cli._download_security import read_response_limited +from specify_cli._github_http import open_github_url + # --------------------------------------------------------------------------- # Errors @@ -322,7 +325,6 @@ def _fetch_single_catalog( # Fetch from URL — validate scheme before opening and after redirects from urllib.parse import urlparse - from urllib.request import urlopen def _validate_catalog_url(url: str) -> None: parsed = urlparse(url) @@ -337,9 +339,19 @@ def _validate_catalog_url(url: str) -> None: _validate_catalog_url(entry.url) try: - with urlopen(entry.url, timeout=30) as resp: # noqa: S310 + with open_github_url( + entry.url, + timeout=30, + strict_redirects=True, + ) as resp: _validate_catalog_url(resp.geturl()) - data = json.loads(resp.read().decode("utf-8")) + data = json.loads( + read_response_limited( + resp, + error_type=WorkflowCatalogError, + label="workflow catalog", + ).decode("utf-8") + ) except Exception as exc: # Fall back to cache if available if cache_file.exists(): diff --git a/tests/test_download_security.py b/tests/test_download_security.py new file mode 100644 index 0000000000..ac46486cfc --- /dev/null +++ b/tests/test_download_security.py @@ -0,0 +1,82 @@ +"""Tests for bounded download and ZIP extraction helpers.""" + +from __future__ import annotations + +import stat +import zipfile + +import pytest + +from specify_cli._download_security import ( + read_response_limited, + safe_extract_zip, + verify_sha256, +) + + +class _Response: + def __init__(self, data: bytes): + self.data = data + + def read(self, size: int = -1) -> bytes: + return self.data if size < 0 else self.data[:size] + + +def test_read_response_limited_rejects_oversized_download(): + with pytest.raises(ValueError, match="exceeds maximum size"): + read_response_limited(_Response(b"abcde"), max_bytes=4) + + +def test_verify_sha256_rejects_mismatch(): + with pytest.raises(ValueError, match="checksum mismatch"): + verify_sha256(b"payload", "sha256:" + "0" * 64) + + +@pytest.mark.parametrize( + "member_name", + [ + "../evil.txt", + "nested/../../evil.txt", + "nested\\..\\evil.txt", + "C:\\Windows\\evil.txt", + ], +) +def test_safe_extract_zip_rejects_traversal(tmp_path, member_name): + zip_path = tmp_path / "bad.zip" + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr(member_name, "nope") + + with pytest.raises(ValueError, match="Unsafe path"): + safe_extract_zip(zip_path, tmp_path / "out") + + +def test_safe_extract_zip_rejects_symlinks(tmp_path): + zip_path = tmp_path / "bad.zip" + info = zipfile.ZipInfo("link") + info.external_attr = (stat.S_IFLNK | 0o777) << 16 + + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr(info, "target") + + with pytest.raises(ValueError, match="Unsafe symlink"): + safe_extract_zip(zip_path, tmp_path / "out") + + +def test_safe_extract_zip_rejects_oversized_member(tmp_path): + zip_path = tmp_path / "bad.zip" + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr("big.txt", "abcde") + + with pytest.raises(ValueError, match="exceeds maximum size"): + safe_extract_zip(zip_path, tmp_path / "out", max_member_bytes=4) + + +def test_safe_extract_zip_extracts_safe_archive(tmp_path): + zip_path = tmp_path / "ok.zip" + out_dir = tmp_path / "out" + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr("nested/file.txt", "hello") + + safe_extract_zip(zip_path, out_dir) + + assert (out_dir / "nested" / "file.txt").read_text(encoding="utf-8") == "hello" diff --git a/tests/test_extensions.py b/tests/test_extensions.py index c5be0ab4f3..803ff6b439 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -11,6 +11,7 @@ import pytest import json +import hashlib import platform import tempfile import shutil @@ -293,6 +294,42 @@ def test_invalid_command_name(self, temp_dir, valid_manifest_data): with pytest.raises(ValidationError, match="Invalid command name"): ExtensionManifest(manifest_path) + @pytest.mark.parametrize( + "bad_file", + [ + "../outside.md", + "/tmp/outside.md", + "commands/../../outside.md", + "C:\\Windows\\outside.md", + ], + ) + def test_invalid_command_file_path(self, temp_dir, valid_manifest_data, bad_file): + """Command files must stay inside the extension package.""" + import yaml + + valid_manifest_data["provides"]["commands"][0]["file"] = bad_file + + manifest_path = temp_dir / "extension.yml" + with open(manifest_path, "w") as f: + yaml.dump(valid_manifest_data, f) + + with pytest.raises(ValidationError, match="Invalid command file path"): + ExtensionManifest(manifest_path) + + def test_windows_command_file_path_is_normalized(self, temp_dir, valid_manifest_data): + """Windows-authored manifests keep compatibility without traversal.""" + import yaml + + valid_manifest_data["provides"]["commands"][0]["file"] = "commands\\hello.md" + + manifest_path = temp_dir / "extension.yml" + with open(manifest_path, "w") as f: + yaml.dump(valid_manifest_data, f) + + manifest = ExtensionManifest(manifest_path) + + assert manifest.commands[0]["file"] == "commands/hello.md" + def test_command_name_autocorrect_speckit_prefix(self, temp_dir, valid_manifest_data): """Test that 'speckit.command' is auto-corrected to 'speckit.{ext_id}.command'.""" import yaml @@ -1847,6 +1884,7 @@ def test_unregister_skill_removes_parent_directory(self, project_dir, temp_dir): from specify_cli.extensions import ExtensionManifest manifest = ExtensionManifest(ext_dir / "extension.yml") registered = registrar.register_commands_for_agent("codex", manifest, ext_dir, project_dir) + assert registered == ["speckit.cleanup-ext.run"] skill_subdir = skills_dir / "speckit-cleanup-ext-run" assert skill_subdir.exists(), "Skill subdirectory should exist after registration" @@ -2577,6 +2615,27 @@ def test_redirect_strips_auth_for_github_to_external(self): assert auth_header is None assert auth_unredirected is None + def test_redirect_rejects_https_downgrade(self): + """HTTPS downloads must not follow redirects to non-local HTTP URLs.""" + from specify_cli._github_http import _StripAuthOnRedirect + from urllib.request import Request + import io + import urllib.error + + handler = _StripAuthOnRedirect() + req = Request("https://example.com/archive.zip") + fp = io.BytesIO(b"") + + with pytest.raises(urllib.error.URLError, match="unsafe redirect"): + handler.redirect_request( + req, + fp, + 302, + "Found", + {}, + "http://evil.example.com/archive.zip", + ) + def test_fetch_single_catalog_sends_auth_header(self, temp_dir, monkeypatch): """_fetch_single_catalog passes Authorization header via opener for GitHub URLs.""" from unittest.mock import patch, MagicMock @@ -2614,7 +2673,8 @@ def fake_open(req, timeout=None): def test_download_extension_sends_auth_header(self, temp_dir, monkeypatch): """download_extension passes Authorization header via opener for GitHub URLs.""" from unittest.mock import patch, MagicMock - import zipfile, io + import io + import zipfile monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken") catalog = self._make_catalog(temp_dir) @@ -2653,6 +2713,52 @@ def fake_open(req, timeout=None): assert captured["req"].get_header("Authorization") == "Bearer ghp_testtoken" + def test_download_extension_verifies_sha256(self, temp_dir): + """Catalog-provided checksums are enforced when present.""" + from unittest.mock import patch, MagicMock + + catalog = self._make_catalog(temp_dir) + zip_bytes = b"fake zip data" + mock_response = MagicMock() + mock_response.read.return_value = zip_bytes + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + ext_info = { + "id": "test-ext", + "name": "Test Extension", + "version": "1.0.0", + "download_url": "https://example.com/test-ext.zip", + "sha256": hashlib.sha256(zip_bytes).hexdigest(), + } + + with patch.object(catalog, "get_extension_info", return_value=ext_info), \ + patch.object(catalog, "_open_url", return_value=mock_response): + result = catalog.download_extension("test-ext", target_dir=temp_dir) + + assert result.read_bytes() == zip_bytes + + def test_download_extension_rejects_sha256_mismatch(self, temp_dir): + """A mismatched catalog checksum stops the downloaded ZIP being used.""" + from unittest.mock import patch, MagicMock + + catalog = self._make_catalog(temp_dir) + mock_response = MagicMock() + mock_response.read.return_value = b"fake zip data" + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + ext_info = { + "id": "test-ext", + "name": "Test Extension", + "version": "1.0.0", + "download_url": "https://example.com/test-ext.zip", + "sha256": "0" * 64, + } + + with patch.object(catalog, "get_extension_info", return_value=ext_info), \ + patch.object(catalog, "_open_url", return_value=mock_response): + with pytest.raises(ExtensionError, match="checksum mismatch"): + catalog.download_extension("test-ext", target_dir=temp_dir) + # ===== CatalogEntry Tests ===== @@ -3510,7 +3616,6 @@ def test_download_extension_raises_for_bundled(self, temp_dir): def test_download_extension_allows_bundled_with_url(self, temp_dir): """download_extension should allow bundled extensions that have a download_url (newer version).""" from unittest.mock import patch, MagicMock - import urllib.request project_dir = temp_dir / "project" project_dir.mkdir() @@ -3533,7 +3638,7 @@ def test_download_extension_allows_bundled_with_url(self, temp_dir): mock_response.__exit__ = MagicMock(return_value=False) with patch.object(catalog, "get_extension_info", return_value=bundled_with_url), \ - patch.object(urllib.request, "urlopen", return_value=mock_response): + patch.object(catalog, "_open_url", return_value=mock_response): result = catalog.download_extension("git") assert result.name == "git-2.0.0.zip" diff --git a/tests/test_github_workflows.py b/tests/test_github_workflows.py new file mode 100644 index 0000000000..2b21d3a40f --- /dev/null +++ b/tests/test_github_workflows.py @@ -0,0 +1,32 @@ +"""Static checks for repository GitHub Actions workflows.""" + +from __future__ import annotations + +import re +from pathlib import Path + + +REPO_ROOT = Path(__file__).resolve().parent.parent +WORKFLOWS_DIR = REPO_ROOT / ".github" / "workflows" +USES_RE = re.compile(r"^\s*uses:\s*(?P\S+)", re.MULTILINE) + + +def test_github_actions_are_pinned_to_full_commit_shas(): + unpinned_refs = [] + + workflows = sorted( + list(WORKFLOWS_DIR.glob("*.yml")) + list(WORKFLOWS_DIR.glob("*.yaml")) + ) + assert workflows + + for workflow in workflows: + workflow_text = workflow.read_text(encoding="utf-8") + for match in USES_RE.finditer(workflow_text): + uses_ref = match.group("ref") + if uses_ref.startswith(("./", "../")): + continue + if re.search(r"@[0-9a-f]{40}$", uses_ref): + continue + unpinned_refs.append(f"{workflow.relative_to(REPO_ROOT)}: {uses_ref}") + + assert unpinned_refs == [] diff --git a/tests/test_presets.py b/tests/test_presets.py index 848c072dd0..d4c0d1f692 100644 --- a/tests/test_presets.py +++ b/tests/test_presets.py @@ -12,6 +12,7 @@ import pytest import json +import hashlib import tempfile import shutil import warnings @@ -289,6 +290,38 @@ def test_invalid_template_name_format(self, temp_dir, valid_pack_data): with pytest.raises(PresetValidationError, match="Invalid template name"): PresetManifest(manifest_path) + @pytest.mark.parametrize( + "bad_file", + [ + "../outside.md", + "/tmp/outside.md", + "templates/../../outside.md", + "C:\\Windows\\outside.md", + ], + ) + def test_invalid_template_file_path(self, temp_dir, valid_pack_data, bad_file): + """Template files must stay inside the preset package.""" + valid_pack_data["provides"]["templates"][0]["file"] = bad_file + manifest_path = temp_dir / "preset.yml" + with open(manifest_path, "w") as f: + yaml.dump(valid_pack_data, f) + + with pytest.raises(PresetValidationError, match="Invalid template file path"): + PresetManifest(manifest_path) + + def test_windows_template_file_path_is_normalized(self, temp_dir, valid_pack_data): + """Windows-authored manifests keep compatibility without traversal.""" + valid_pack_data["provides"]["templates"][0]["file"] = ( + "templates\\spec-template.md" + ) + manifest_path = temp_dir / "preset.yml" + with open(manifest_path, "w") as f: + yaml.dump(valid_pack_data, f) + + manifest = PresetManifest(manifest_path) + + assert manifest.templates[0]["file"] == "templates/spec-template.md" + def test_get_hash(self, pack_dir): """Test manifest hash calculation.""" manifest = PresetManifest(pack_dir / "preset.yml") @@ -1556,6 +1589,54 @@ def fake_open(req, timeout=None): assert captured["req"].get_header("Authorization") == "Bearer ghp_testtoken" + def test_download_pack_verifies_sha256(self, project_dir): + """Catalog-provided checksums are enforced when present.""" + from unittest.mock import patch, MagicMock + + catalog = PresetCatalog(project_dir) + zip_bytes = b"fake zip data" + mock_response = MagicMock() + mock_response.read.return_value = zip_bytes + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + pack_info = { + "id": "test-pack", + "name": "Test Pack", + "version": "1.0.0", + "download_url": "https://example.com/test-pack.zip", + "sha256": hashlib.sha256(zip_bytes).hexdigest(), + "_install_allowed": True, + } + + with patch.object(catalog, "get_pack_info", return_value=pack_info), \ + patch.object(catalog, "_open_url", return_value=mock_response): + result = catalog.download_pack("test-pack", target_dir=project_dir) + + assert result.read_bytes() == zip_bytes + + def test_download_pack_rejects_sha256_mismatch(self, project_dir): + """A mismatched catalog checksum stops the downloaded ZIP being used.""" + from unittest.mock import patch, MagicMock + + catalog = PresetCatalog(project_dir) + mock_response = MagicMock() + mock_response.read.return_value = b"fake zip data" + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + pack_info = { + "id": "test-pack", + "name": "Test Pack", + "version": "1.0.0", + "download_url": "https://example.com/test-pack.zip", + "sha256": "0" * 64, + "_install_allowed": True, + } + + with patch.object(catalog, "get_pack_info", return_value=pack_info), \ + patch.object(catalog, "_open_url", return_value=mock_response): + with pytest.raises(PresetError, match="checksum mismatch"): + catalog.download_pack("test-pack", target_dir=project_dir) + # ===== Integration Tests ===== diff --git a/tests/test_registrar_path_traversal.py b/tests/test_registrar_path_traversal.py index fc423b4056..006daa89e8 100644 --- a/tests/test_registrar_path_traversal.py +++ b/tests/test_registrar_path_traversal.py @@ -121,6 +121,31 @@ def test_copilot_rejects_traversal_in_alias(self, tmp_path, bad_alias): _assert_no_stray_files(tmp_path, Path(bad_alias).name.replace("/", "")) +class TestSourceFileTraversal: + """Command source files must stay inside the declared source directory.""" + + @pytest.mark.parametrize("bad_file", TRAVERSAL_PAYLOADS) + def test_rejects_traversal_in_command_source_file(self, tmp_path, bad_file): + project, ext_dir = _project_and_source(tmp_path) + (project / ".gemini" / "commands").mkdir(parents=True) + + registrar = CommandRegistrar() + with pytest.raises(ValueError, match="escapes directory"): + registrar.register_commands( + "gemini", + [ + { + "name": "speckit.myext.ok", + "file": bad_file, + "aliases": [], + } + ], + "myext", + ext_dir, + project, + ) + + class TestCopilotPromptTraversal: """`write_copilot_prompt` is a public static method — guard it directly.""" From 4599155bd7f7a149e3514196cd1230399feed73a Mon Sep 17 00:00:00 2001 From: Pascal Date: Wed, 6 May 2026 06:46:05 +0200 Subject: [PATCH 09/30] Address remaining security review feedback --- .../scripts/check_security_requirements.py | 101 +++++++++ .github/security-audit-requirements.txt | 2 - .github/workflows/security.yml | 10 + CONTRIBUTING.md | 2 +- src/specify_cli/__init__.py | 28 ++- src/specify_cli/_download_security.py | 2 +- src/specify_cli/extensions.py | 18 +- src/specify_cli/integrations/catalog.py | 10 +- src/specify_cli/presets.py | 18 +- .../integrations/test_integration_catalog.py | 48 ++++- tests/test_download_security.py | 40 ++++ tests/test_extensions.py | 42 ++++ tests/test_presets.py | 42 ++++ tests/test_security_workflow.py | 192 +++++++++++++++++- 14 files changed, 524 insertions(+), 31 deletions(-) create mode 100644 .github/scripts/check_security_requirements.py diff --git a/.github/scripts/check_security_requirements.py b/.github/scripts/check_security_requirements.py new file mode 100644 index 0000000000..6834ee42bf --- /dev/null +++ b/.github/scripts/check_security_requirements.py @@ -0,0 +1,101 @@ +"""Check that committed security audit requirements are up to date.""" + +from __future__ import annotations + +import os +import subprocess +import sys +from pathlib import Path + + +REPO_ROOT = Path(__file__).resolve().parents[2] +COMMITTED_REQUIREMENTS = REPO_ROOT / ".github" / "security-audit-requirements.txt" +DEPENDENCY_INPUTS = ("pyproject.toml", ".github/security-audit-requirements.txt") + + +def _dependency_diff_refs() -> tuple[str, str]: + base_ref = os.environ.get("DEPENDENCY_DIFF_BASE", "").strip() + head_ref = os.environ.get("DEPENDENCY_DIFF_HEAD", "").strip() or "HEAD" + if base_ref and not set(base_ref) <= {"0"}: + return base_ref, head_ref + return "HEAD^", "HEAD" + + +def _dependency_inputs_changed() -> bool: + base_ref, head_ref = _dependency_diff_refs() + try: + result = subprocess.run( + [ + "git", + "diff", + "--name-only", + base_ref, + head_ref, + "--", + *DEPENDENCY_INPUTS, + ], + check=True, + cwd=REPO_ROOT, + stderr=subprocess.PIPE, + stdout=subprocess.PIPE, + text=True, + ) + except subprocess.CalledProcessError as exc: + print( + "Could not determine changed dependency inputs; checking requirements.", + file=sys.stderr, + ) + if exc.stderr: + print(exc.stderr.strip(), file=sys.stderr) + return True + + changed_inputs = [line for line in result.stdout.splitlines() if line] + if not changed_inputs: + print("Dependency audit inputs unchanged; sync check skipped.") + return False + + print(f"Dependency audit inputs changed: {', '.join(changed_inputs)}") + return True + + +def main() -> int: + if not _dependency_inputs_changed(): + return 0 + + generated_requirements = Path(os.environ["GENERATED_REQUIREMENTS"]) + generated_requirements.parent.mkdir(parents=True, exist_ok=True) + + subprocess.run( + [ + "uv", + "pip", + "compile", + "pyproject.toml", + "--extra", + "test", + "--universal", + "--generate-hashes", + "--quiet", + "--no-header", + "--output-file", + str(generated_requirements), + ], + check=True, + cwd=REPO_ROOT, + ) + + committed = COMMITTED_REQUIREMENTS.read_text(encoding="utf-8") + generated = generated_requirements.read_text(encoding="utf-8") + if committed == generated: + return 0 + + print( + "Regenerate .github/security-audit-requirements.txt with the documented " + "uv pip compile command.", + file=sys.stderr, + ) + return 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/.github/security-audit-requirements.txt b/.github/security-audit-requirements.txt index f15ab00c67..d97b84d2cd 100644 --- a/.github/security-audit-requirements.txt +++ b/.github/security-audit-requirements.txt @@ -1,5 +1,3 @@ -# This file was autogenerated by uv via the following command: -# uv pip compile pyproject.toml --extra test --universal --generate-hashes --output-file .github/security-audit-requirements.txt annotated-doc==0.0.4 \ --hash=sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320 \ --hash=sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4 diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml index f09e5812a5..8a0058c073 100644 --- a/.github/workflows/security.yml +++ b/.github/workflows/security.yml @@ -23,6 +23,8 @@ jobs: steps: - name: Checkout uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + fetch-depth: 2 - name: Install uv uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0 @@ -41,6 +43,14 @@ jobs: if: ${{ github.event_name == 'schedule' }} run: uvx --from pip-audit==2.10.0 pip-audit --disable-pip --require-hashes -r "${{ runner.temp }}/spec-kit-audit-requirements.txt" --progress-spinner off + - name: Check committed audit requirements are current + if: ${{ github.event_name != 'schedule' }} + env: + DEPENDENCY_DIFF_BASE: ${{ github.event.pull_request.base.sha || github.event.before || '' }} + DEPENDENCY_DIFF_HEAD: ${{ github.sha }} + GENERATED_REQUIREMENTS: ${{ runner.temp }}/security-audit-requirements.txt + run: python .github/scripts/check_security_requirements.py + - name: Run pip-audit (committed requirements) if: ${{ github.event_name != 'schedule' }} run: uvx --from pip-audit==2.10.0 pip-audit --disable-pip --require-hashes -r .github/security-audit-requirements.txt --progress-spinner off diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 35bfa35834..fd043e01a6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -91,7 +91,7 @@ uvx --from bandit==1.9.4 bandit -r src -lll --baseline .github/bandit-baseline.j Run these before changing dependency metadata, workflow execution code, subprocess usage, or security-sensitive paths. Pull request, push, and manual CI audits use the committed hashed requirements file so they stay deterministic. The scheduled CI audit also resolves the runtime and `test` extra dependency set across the supported Python and OS matrix to catch newly published advisories. If dependency metadata changes, refresh the committed audit input before running pip-audit: ```bash -uv pip compile pyproject.toml --extra test --universal --generate-hashes --quiet --output-file .github/security-audit-requirements.txt +uv pip compile pyproject.toml --extra test --universal --generate-hashes --quiet --no-header --output-file .github/security-audit-requirements.txt ``` ### Manual testing diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index 9f7e5ad092..8aa5ead1f5 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -54,6 +54,7 @@ from rich.tree import Tree from typer.core import TyperGroup +from ._download_security import read_response_limited from .integration_runtime import ( invoke_separator_for_integration as _invoke_separator_for_integration, resolve_integration_options as _resolve_integration_options_impl, @@ -1772,7 +1773,13 @@ def _fetch_latest_release_tag() -> tuple[str | None, str | None]: req.add_header("Authorization", f"Bearer {token}") try: with urllib.request.urlopen(req, timeout=5) as resp: - payload = json.loads(resp.read().decode("utf-8")) + payload = json.loads( + read_response_limited( + resp, + max_bytes=1024 * 1024, + label="GitHub latest release", + ).decode("utf-8") + ) tag = payload.get("tag_name") if not isinstance(tag, str) or not tag: raise ValueError("GitHub API response missing valid tag_name") @@ -3376,8 +3383,10 @@ def preset_add( zip_path = Path(tmpdir) / "preset.zip" try: with urllib.request.urlopen(from_url, timeout=60) as response: - zip_path.write_bytes(response.read()) - except urllib.error.URLError as e: + zip_path.write_bytes( + read_response_limited(response, label=f"preset {from_url}") + ) + except (urllib.error.URLError, ValueError) as e: console.print(f"[red]Error:[/red] Failed to download: {e}") raise typer.Exit(1) @@ -4280,12 +4289,15 @@ def extension_add( try: with urllib.request.urlopen(from_url, timeout=60) as response: - zip_data = response.read() + zip_data = read_response_limited( + response, + label=f"extension {from_url}", + ) zip_path.write_bytes(zip_data) # Install from downloaded ZIP manifest = manager.install_from_zip(zip_path, speckit_version, priority=priority) - except urllib.error.URLError as e: + except (urllib.error.URLError, ValueError) as e: console.print(f"[red]Error:[/red] Failed to download from {from_url}: {e}") raise typer.Exit(1) finally: @@ -5526,7 +5538,7 @@ def _validate_and_install_local(yaml_path: Path, source_label: str) -> None: console.print(f"[red]Error:[/red] URL redirected to non-HTTPS: {final_url}") raise typer.Exit(1) with tempfile.NamedTemporaryFile(suffix=".yml", delete=False) as tmp: - tmp.write(resp.read()) + tmp.write(read_response_limited(resp, label=f"workflow {source}")) tmp_path = Path(tmp.name) except typer.Exit: raise @@ -5630,7 +5642,9 @@ def _validate_and_install_local(yaml_path: Path, source_label: str) -> None: f"[red]Error:[/red] Workflow '{source}' redirected to non-HTTPS URL: {final_url}" ) raise typer.Exit(1) - workflow_file.write_bytes(response.read()) + workflow_file.write_bytes( + read_response_limited(response, label=f"workflow {source}") + ) except Exception as exc: if workflow_dir.exists(): import shutil diff --git a/src/specify_cli/_download_security.py b/src/specify_cli/_download_security.py index 37f92749d4..6706ec5237 100644 --- a/src/specify_cli/_download_security.py +++ b/src/specify_cli/_download_security.py @@ -80,7 +80,7 @@ def _safe_zip_name(name: str, *, error_type: type[ErrorT]) -> str: normalized = name.replace("\\", "/") path = PurePosixPath(normalized) - has_windows_drive = re.match(r"^[A-Za-z]:/", normalized) is not None + has_windows_drive = re.match(r"^[A-Za-z]:", normalized) is not None if ( not path.parts or path.is_absolute() diff --git a/src/specify_cli/extensions.py b/src/specify_cli/extensions.py index 3dae2646d3..1bace0b8a7 100644 --- a/src/specify_cli/extensions.py +++ b/src/specify_cli/extensions.py @@ -250,7 +250,7 @@ def _validate(self): normalized_file = cmd["file"].replace("\\", "/") file_path = PurePosixPath(normalized_file) - has_windows_drive = re.match(r"^[A-Za-z]:/", normalized_file) is not None + has_windows_drive = re.match(r"^[A-Za-z]:", normalized_file) is not None if ( file_path.is_absolute() or has_windows_drive @@ -1921,7 +1921,13 @@ def _fetch_single_catalog(self, entry: CatalogEntry, force_refresh: bool = False # Fetch from network try: with self._open_url(entry.url, timeout=10) as response: - catalog_data = json.loads(response.read()) + catalog_data = json.loads( + read_response_limited( + response, + error_type=ExtensionError, + label=f"extension catalog {entry.url}", + ) + ) if "schema_version" not in catalog_data or "extensions" not in catalog_data: raise ExtensionError(f"Invalid catalog format from {entry.url}") @@ -2037,7 +2043,13 @@ def fetch_catalog(self, force_refresh: bool = False) -> Dict[str, Any]: import urllib.error with self._open_url(catalog_url, timeout=10) as response: - catalog_data = json.loads(response.read()) + catalog_data = json.loads( + read_response_limited( + response, + error_type=ExtensionError, + label=f"extension catalog {catalog_url}", + ) + ) # Validate catalog structure if "schema_version" not in catalog_data or "extensions" not in catalog_data: diff --git a/src/specify_cli/integrations/catalog.py b/src/specify_cli/integrations/catalog.py index 1b449af682..b784147f39 100644 --- a/src/specify_cli/integrations/catalog.py +++ b/src/specify_cli/integrations/catalog.py @@ -21,6 +21,8 @@ import yaml from packaging import version as pkg_version +from .._download_security import read_response_limited + # --------------------------------------------------------------------------- # Errors @@ -294,7 +296,13 @@ def _fetch_single_catalog( final_url = resp.geturl() if final_url != entry.url: self._validate_catalog_url(final_url) - catalog_data = json.loads(resp.read()) + catalog_data = json.loads( + read_response_limited( + resp, + error_type=IntegrationCatalogError, + label=f"integration catalog {entry.url}", + ) + ) if not isinstance(catalog_data, dict): raise IntegrationCatalogError( diff --git a/src/specify_cli/presets.py b/src/specify_cli/presets.py index 7617a4e9a7..f9e350083b 100644 --- a/src/specify_cli/presets.py +++ b/src/specify_cli/presets.py @@ -226,7 +226,7 @@ def _validate(self): ) normalized = file_path.replace("\\", "/") normalized_path = PurePosixPath(normalized) - has_windows_drive = re.match(r"^[A-Za-z]:/", normalized) is not None + has_windows_drive = re.match(r"^[A-Za-z]:", normalized) is not None if normalized_path.is_absolute() or any( part == ".." for part in normalized_path.parts ) or has_windows_drive: @@ -2045,7 +2045,13 @@ def _fetch_single_catalog(self, entry: PresetCatalogEntry, force_refresh: bool = try: with self._open_url(entry.url, timeout=10) as response: - catalog_data = json.loads(response.read()) + catalog_data = json.loads( + read_response_limited( + response, + error_type=PresetError, + label=f"preset catalog {entry.url}", + ) + ) if ( "schema_version" not in catalog_data @@ -2138,7 +2144,13 @@ def fetch_catalog(self, force_refresh: bool = False) -> Dict[str, Any]: try: with self._open_url(catalog_url, timeout=10) as response: - catalog_data = json.loads(response.read()) + catalog_data = json.loads( + read_response_limited( + response, + error_type=PresetError, + label=f"preset catalog {catalog_url}", + ) + ) if ( "schema_version" not in catalog_data diff --git a/tests/integrations/test_integration_catalog.py b/tests/integrations/test_integration_catalog.py index 8b21ddfb8b..88b1b6e415 100644 --- a/tests/integrations/test_integration_catalog.py +++ b/tests/integrations/test_integration_catalog.py @@ -173,7 +173,7 @@ def __init__(self, data, url=""): self._data = json.dumps(data).encode() self._url = url - def read(self): + def read(self, _size=-1): return self._data def geturl(self): @@ -294,6 +294,50 @@ def test_invalid_catalog_format(self, tmp_path, monkeypatch): with pytest.raises(IntegrationCatalogError, match="Failed to fetch any integration catalog"): cat.search() + def test_fetch_single_catalog_uses_bounded_read(self, tmp_path, monkeypatch): + cat = IntegrationCatalog(tmp_path) + entry = IntegrationCatalogEntry( + url="https://example.com/catalog.json", + name="test", + priority=1, + install_allowed=True, + ) + + class FakeResponse: + def read(self, _size=-1): + return b"{}" + + def geturl(self): + return entry.url + + def __enter__(self): + return self + + def __exit__(self, *_args): + pass + + def fake_urlopen(url, timeout=10): + assert url == entry.url + assert timeout == 10 + return FakeResponse() + + def fake_read_response_limited(response, **kwargs): + assert isinstance(response, FakeResponse) + assert kwargs["error_type"] is IntegrationCatalogError + assert kwargs["label"] == "integration catalog https://example.com/catalog.json" + raise IntegrationCatalogError("catalog too large") + + import urllib.request + + monkeypatch.setattr(urllib.request, "urlopen", fake_urlopen) + monkeypatch.setattr( + "specify_cli.integrations.catalog.read_response_limited", + fake_read_response_limited, + ) + + with pytest.raises(IntegrationCatalogError, match="catalog too large"): + cat._fetch_single_catalog(entry, force_refresh=True) + def test_clear_cache(self, tmp_path): (tmp_path / ".specify").mkdir() cat = IntegrationCatalog(tmp_path) @@ -492,7 +536,7 @@ class FakeResponse: def __init__(self, data, url=""): self._data = json.dumps(data).encode() self._url = url - def read(self): + def read(self, _size=-1): return self._data def geturl(self): return self._url diff --git a/tests/test_download_security.py b/tests/test_download_security.py index ac46486cfc..2ce8310ff7 100644 --- a/tests/test_download_security.py +++ b/tests/test_download_security.py @@ -4,6 +4,8 @@ import stat import zipfile +import re +from pathlib import Path import pytest @@ -14,6 +16,10 @@ ) +REPO_ROOT = Path(__file__).resolve().parent.parent +RAW_RESPONSE_READ_RE = re.compile(r"\b(?:resp|response)\.read\(\)") + + class _Response: def __init__(self, data: bytes): self.data = data @@ -27,6 +33,19 @@ def test_read_response_limited_rejects_oversized_download(): read_response_limited(_Response(b"abcde"), max_bytes=4) +def test_remote_downloads_do_not_use_unbounded_response_reads(): + offenders = [] + for path in (REPO_ROOT / "src" / "specify_cli").rglob("*.py"): + for line_number, line in enumerate( + path.read_text(encoding="utf-8").splitlines(), + start=1, + ): + if RAW_RESPONSE_READ_RE.search(line): + offenders.append(f"{path.relative_to(REPO_ROOT)}:{line_number}") + + assert offenders == [] + + def test_verify_sha256_rejects_mismatch(): with pytest.raises(ValueError, match="checksum mismatch"): verify_sha256(b"payload", "sha256:" + "0" * 64) @@ -39,6 +58,7 @@ def test_verify_sha256_rejects_mismatch(): "nested/../../evil.txt", "nested\\..\\evil.txt", "C:\\Windows\\evil.txt", + "C:drive-relative.txt", ], ) def test_safe_extract_zip_rejects_traversal(tmp_path, member_name): @@ -71,6 +91,26 @@ def test_safe_extract_zip_rejects_oversized_member(tmp_path): safe_extract_zip(zip_path, tmp_path / "out", max_member_bytes=4) +def test_safe_extract_zip_rejects_too_many_entries(tmp_path): + zip_path = tmp_path / "bad.zip" + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr("one.txt", "1") + zf.writestr("two.txt", "2") + + with pytest.raises(ValueError, match="too many entries"): + safe_extract_zip(zip_path, tmp_path / "out", max_entries=1) + + +def test_safe_extract_zip_rejects_total_uncompressed_size(tmp_path): + zip_path = tmp_path / "bad.zip" + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr("one.txt", "123") + zf.writestr("two.txt", "456") + + with pytest.raises(ValueError, match="maximum uncompressed size"): + safe_extract_zip(zip_path, tmp_path / "out", max_total_bytes=5) + + def test_safe_extract_zip_extracts_safe_archive(tmp_path): zip_path = tmp_path / "ok.zip" out_dir = tmp_path / "out" diff --git a/tests/test_extensions.py b/tests/test_extensions.py index 803ff6b439..23e0bfdd21 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -301,6 +301,7 @@ def test_invalid_command_name(self, temp_dir, valid_manifest_data): "/tmp/outside.md", "commands/../../outside.md", "C:\\Windows\\outside.md", + "C:outside.md", ], ) def test_invalid_command_file_path(self, temp_dir, valid_manifest_data, bad_file): @@ -2670,6 +2671,47 @@ def fake_open(req, timeout=None): assert captured["req"].get_header("Authorization") == "Bearer ghp_testtoken" + def test_fetch_single_catalog_uses_bounded_read(self, temp_dir): + """Catalog JSON responses must use the shared bounded-read helper.""" + from unittest.mock import patch, MagicMock + + catalog = self._make_catalog(temp_dir) + mock_response = MagicMock() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + entry = CatalogEntry( + url="https://example.com/catalog.json", + name="custom", + priority=1, + install_allowed=True, + ) + + with patch.object(catalog, "_open_url", return_value=mock_response), \ + patch( + "specify_cli.extensions.read_response_limited", + side_effect=ExtensionError("catalog too large"), + ): + with pytest.raises(ExtensionError, match="catalog too large"): + catalog._fetch_single_catalog(entry, force_refresh=True) + + def test_fetch_catalog_uses_bounded_read(self, temp_dir): + """The legacy single-catalog path must also bound catalog JSON reads.""" + from unittest.mock import patch, MagicMock + + catalog = self._make_catalog(temp_dir) + mock_response = MagicMock() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + + with patch.object(catalog, "get_catalog_url", return_value="https://example.com/catalog.json"), \ + patch.object(catalog, "_open_url", return_value=mock_response), \ + patch( + "specify_cli.extensions.read_response_limited", + side_effect=ExtensionError("catalog too large"), + ): + with pytest.raises(ExtensionError, match="catalog too large"): + catalog.fetch_catalog(force_refresh=True) + def test_download_extension_sends_auth_header(self, temp_dir, monkeypatch): """download_extension passes Authorization header via opener for GitHub URLs.""" from unittest.mock import patch, MagicMock diff --git a/tests/test_presets.py b/tests/test_presets.py index d4c0d1f692..e0106c1f69 100644 --- a/tests/test_presets.py +++ b/tests/test_presets.py @@ -297,6 +297,7 @@ def test_invalid_template_name_format(self, temp_dir, valid_pack_data): "/tmp/outside.md", "templates/../../outside.md", "C:\\Windows\\outside.md", + "C:outside.md", ], ) def test_invalid_template_file_path(self, temp_dir, valid_pack_data, bad_file): @@ -1589,6 +1590,47 @@ def fake_open(req, timeout=None): assert captured["req"].get_header("Authorization") == "Bearer ghp_testtoken" + def test_fetch_single_catalog_uses_bounded_read(self, project_dir): + """Catalog JSON responses must use the shared bounded-read helper.""" + from unittest.mock import patch, MagicMock + + catalog = PresetCatalog(project_dir) + mock_response = MagicMock() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + entry = PresetCatalogEntry( + url="https://example.com/catalog.json", + name="custom", + priority=1, + install_allowed=True, + ) + + with patch.object(catalog, "_open_url", return_value=mock_response), \ + patch( + "specify_cli.presets.read_response_limited", + side_effect=PresetError("catalog too large"), + ): + with pytest.raises(PresetError, match="catalog too large"): + catalog._fetch_single_catalog(entry, force_refresh=True) + + def test_fetch_catalog_uses_bounded_read(self, project_dir): + """The legacy single-catalog path must also bound catalog JSON reads.""" + from unittest.mock import patch, MagicMock + + catalog = PresetCatalog(project_dir) + mock_response = MagicMock() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + + with patch.object(catalog, "get_catalog_url", return_value="https://example.com/catalog.json"), \ + patch.object(catalog, "_open_url", return_value=mock_response), \ + patch( + "specify_cli.presets.read_response_limited", + side_effect=PresetError("catalog too large"), + ): + with pytest.raises(PresetError, match="catalog too large"): + catalog.fetch_catalog(force_refresh=True) + def test_download_pack_verifies_sha256(self, project_dir): """Catalog-provided checksums are enforced when present.""" from unittest.mock import patch, MagicMock diff --git a/tests/test_security_workflow.py b/tests/test_security_workflow.py index 60152a91db..e02e01eff7 100644 --- a/tests/test_security_workflow.py +++ b/tests/test_security_workflow.py @@ -3,8 +3,10 @@ from __future__ import annotations import inspect +import importlib.util import json import re +import subprocess from pathlib import Path import yaml @@ -15,6 +17,9 @@ CONTRIBUTING = REPO_ROOT / "CONTRIBUTING.md" BANDIT_BASELINE = REPO_ROOT / ".github" / "bandit-baseline.json" SECURITY_REQUIREMENTS = REPO_ROOT / ".github" / "security-audit-requirements.txt" +SECURITY_REQUIREMENTS_SYNC_SCRIPT = ( + REPO_ROOT / ".github" / "scripts" / "check_security_requirements.py" +) WORKFLOW_LIVE_AUDIT_REQUIREMENTS = '"${{ runner.temp }}/spec-kit-audit-requirements.txt"' COMMITTED_AUDIT_REQUIREMENTS = ".github/security-audit-requirements.txt" @@ -25,8 +30,13 @@ ) LOCAL_REFRESH_TEST_EXTRA_DEPS = ( "uv pip compile pyproject.toml --extra test --universal --generate-hashes " - f"--quiet --output-file {COMMITTED_AUDIT_REQUIREMENTS}" + f"--quiet --no-header --output-file {COMMITTED_AUDIT_REQUIREMENTS}" +) +WORKFLOW_SYNC_COMPILE_TEST_EXTRA_DEPS = ( + "uv pip compile pyproject.toml --extra test --universal --generate-hashes " + "--quiet --no-header --output-file" ) +WORKFLOW_SYNC_SCRIPT = "python .github/scripts/check_security_requirements.py" WORKFLOW_LIVE_PIP_AUDIT = ( "uvx --from pip-audit==2.10.0 pip-audit --disable-pip --require-hashes " f"-r {WORKFLOW_LIVE_AUDIT_REQUIREMENTS} --progress-spinner off" @@ -62,6 +72,18 @@ def _step_run(job_name: str, step_name: str) -> str: return _step(job_name, step_name)["run"] +def _load_sync_script(): + spec = importlib.util.spec_from_file_location( + "check_security_requirements", + SECURITY_REQUIREMENTS_SYNC_SCRIPT, + ) + assert spec is not None + assert spec.loader is not None + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + class TestSecurityWorkflow: """Guard the security workflow against review-feedback regressions.""" @@ -78,11 +100,21 @@ def test_dependency_audit_uses_committed_requirements_for_prs_and_pushes(self): "dependency-audit", "Run pip-audit (committed requirements)", ) + sync_check = _step( + "dependency-audit", + "Check committed audit requirements are current", + ) assert scheduled_compile["if"] == "${{ github.event_name == 'schedule' }}" assert WORKFLOW_COMPILE_SCHEDULED_TEST_EXTRA_DEPS in scheduled_compile["run"] assert scheduled_audit["if"] == "${{ github.event_name == 'schedule' }}" assert scheduled_audit["run"] == WORKFLOW_LIVE_PIP_AUDIT + assert sync_check["if"] == "${{ github.event_name != 'schedule' }}" + assert sync_check["env"]["DEPENDENCY_DIFF_BASE"] == ( + "${{ github.event.pull_request.base.sha || github.event.before || '' }}" + ) + assert sync_check["env"]["DEPENDENCY_DIFF_HEAD"] == "${{ github.sha }}" + assert sync_check["run"] == WORKFLOW_SYNC_SCRIPT assert committed_audit["if"] == "${{ github.event_name != 'schedule' }}" assert committed_audit["run"] == LOCAL_PIP_AUDIT @@ -90,17 +122,28 @@ def test_dependency_audit_uses_committed_requirements_for_prs_and_pushes(self): step.get("run", "") for step in _load_security_workflow()["jobs"]["dependency-audit"]["steps"] ) - assert "--generate-hashes" in dependency_job_text - assert "--require-hashes" in dependency_job_text - assert "--disable-pip" in dependency_job_text + dependency_protection_text = ( + dependency_job_text + + "\n" + + SECURITY_REQUIREMENTS_SYNC_SCRIPT.read_text(encoding="utf-8") + ) + assert "--generate-hashes" in dependency_protection_text + assert "--no-header" in dependency_protection_text + assert "--require-hashes" in dependency_protection_text + assert "--disable-pip" in dependency_protection_text assert WORKFLOW_LIVE_AUDIT_REQUIREMENTS in dependency_job_text - assert COMMITTED_AUDIT_REQUIREMENTS in dependency_job_text - assert "uv export" not in dependency_job_text - assert "--frozen" not in dependency_job_text - assert "--locked" not in dependency_job_text - assert "uv.lock" not in dependency_job_text - assert "/tmp/" not in dependency_job_text - assert "uvx pip-audit ." not in dependency_job_text + assert COMMITTED_AUDIT_REQUIREMENTS in dependency_protection_text + assert "uv export" not in dependency_protection_text + assert "--frozen" not in dependency_protection_text + assert "--locked" not in dependency_protection_text + assert "uv.lock" not in dependency_protection_text + assert "/tmp/" not in dependency_protection_text + assert "uvx pip-audit ." not in dependency_protection_text + + def test_dependency_audit_checkout_fetches_previous_commit(self): + checkout = _step("dependency-audit", "Checkout") + + assert checkout["with"]["fetch-depth"] == 2 def test_security_workflow_triggers_are_preserved(self): triggers = _workflow_triggers() @@ -182,9 +225,136 @@ def test_committed_audit_requirements_are_hashed(self): requirements = SECURITY_REQUIREMENTS.read_text(encoding="utf-8") assert "--hash=sha256:" in requirements + assert not requirements.startswith("#") assert "pytest==" in requirements assert "pytest-cov==" in requirements + def test_sync_script_skips_when_dependency_inputs_are_unchanged( + self, + monkeypatch, + capsys, + ): + sync_script = _load_sync_script() + + def fake_run(command, **kwargs): + assert command == [ + "git", + "diff", + "--name-only", + "HEAD^", + "HEAD", + "--", + "pyproject.toml", + ".github/security-audit-requirements.txt", + ] + assert kwargs["check"] is True + return subprocess.CompletedProcess(command, 0, stdout="", stderr="") + + monkeypatch.setattr(sync_script.subprocess, "run", fake_run) + + assert sync_script.main() == 0 + assert "sync check skipped" in capsys.readouterr().out + + def test_sync_script_uses_github_diff_refs_when_available( + self, + monkeypatch, + ): + sync_script = _load_sync_script() + monkeypatch.setenv("DEPENDENCY_DIFF_BASE", "abc123") + monkeypatch.setenv("DEPENDENCY_DIFF_HEAD", "def456") + + def fake_run(command, **_kwargs): + assert command == [ + "git", + "diff", + "--name-only", + "abc123", + "def456", + "--", + "pyproject.toml", + ".github/security-audit-requirements.txt", + ] + return subprocess.CompletedProcess(command, 0, stdout="", stderr="") + + monkeypatch.setattr(sync_script.subprocess, "run", fake_run) + + assert sync_script._dependency_inputs_changed() is False + + def test_sync_script_compiles_and_compares_when_dependency_inputs_changed( + self, + monkeypatch, + tmp_path, + ): + sync_script = _load_sync_script() + committed_requirements = tmp_path / ".github" / "security-audit-requirements.txt" + generated_requirements = tmp_path / "generated-requirements.txt" + committed_requirements.parent.mkdir() + committed_requirements.write_text("pytest==1\n", encoding="utf-8") + compile_commands = [] + + monkeypatch.setattr(sync_script, "REPO_ROOT", tmp_path) + monkeypatch.setattr(sync_script, "COMMITTED_REQUIREMENTS", committed_requirements) + monkeypatch.setenv("GENERATED_REQUIREMENTS", str(generated_requirements)) + + def fake_run(command, **kwargs): + if command[0] == "git": + return subprocess.CompletedProcess( + command, + 0, + stdout="pyproject.toml\n", + stderr="", + ) + + compile_commands.append(command) + assert kwargs["check"] is True + generated_requirements.write_text("pytest==1\n", encoding="utf-8") + return subprocess.CompletedProcess(command, 0) + + monkeypatch.setattr(sync_script.subprocess, "run", fake_run) + + assert sync_script.main() == 0 + assert len(compile_commands) == 1 + compile_command = " ".join(compile_commands[0]) + assert WORKFLOW_SYNC_COMPILE_TEST_EXTRA_DEPS in compile_command + assert "--output-file" in compile_commands[0] + assert str(generated_requirements) in compile_commands[0] + + def test_sync_script_fails_when_generated_requirements_differ( + self, + monkeypatch, + tmp_path, + capsys, + ): + sync_script = _load_sync_script() + committed_requirements = tmp_path / ".github" / "security-audit-requirements.txt" + generated_requirements = tmp_path / "generated-requirements.txt" + committed_requirements.parent.mkdir() + committed_requirements.write_text("pytest==1\n", encoding="utf-8") + + monkeypatch.setattr(sync_script, "REPO_ROOT", tmp_path) + monkeypatch.setattr(sync_script, "COMMITTED_REQUIREMENTS", committed_requirements) + monkeypatch.setenv("GENERATED_REQUIREMENTS", str(generated_requirements)) + + def fake_run(command, **_kwargs): + if command[0] == "git": + return subprocess.CompletedProcess( + command, + 0, + stdout="pyproject.toml\n", + stderr="", + ) + + generated_requirements.write_text("pytest==2\n", encoding="utf-8") + return subprocess.CompletedProcess(command, 0) + + monkeypatch.setattr(sync_script.subprocess, "run", fake_run) + + assert sync_script.main() == 1 + assert ( + "Regenerate .github/security-audit-requirements.txt" + in capsys.readouterr().err + ) + def test_contributing_documents_security_commands(self): contributing_text = CONTRIBUTING.read_text(encoding="utf-8") From b089f8b5b537d1270489dd8da324f22abec9ddf4 Mon Sep 17 00:00:00 2001 From: Pascal Date: Thu, 14 May 2026 08:20:19 +0200 Subject: [PATCH 10/30] ci(security): tighten PR checks for security regressions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Six follow-on checks that lock in the hardening from this PR and add the surfaces it didn't cover: 1. ruff S602/S604/S605 in pyproject.toml — fail PRs that reintroduce subprocess shell=True. The intentional shell=True in the workflows shell step keeps its NOTE comment and gets an explicit `# noqa: S602` so the deviation is visible. 2. Bandit two-pass in security.yml — keep `-lll --baseline` blocking and add a non-blocking `-ll` informational pass so MEDIUM findings show in the job summary instead of accumulating silently. 3. Bandit baseline diff check — fail PRs that grow .github/bandit-baseline.json unless they carry the `security-baseline-change` label. New script in .github/scripts/check_bandit_baseline.py. 4. Secret scanning via detect-secrets — new `secret-scan` job in security.yml with a committed .secrets.baseline that whitelists the nine current findings (all SHA pins / docs examples / test fixtures; audited before commit). Drift fails the check. 5. shellcheck on scripts/bash/*.sh in lint.yml. Starts at --severity=error to catch real bugs; style (SC2155) can be tightened in a follow-up. 6. macos-latest added to the dependency-audit matrix in security.yml — aligns with test.yml's posture and catches platform-specific resolver surprises. --- .github/scripts/check_bandit_baseline.py | 92 ++++++++ .github/workflows/lint.yml | 12 + .github/workflows/security.yml | 61 ++++- .secrets.baseline | 213 ++++++++++++++++++ pyproject.toml | 10 + .../workflows/steps/shell/__init__.py | 2 +- 6 files changed, 387 insertions(+), 3 deletions(-) create mode 100644 .github/scripts/check_bandit_baseline.py create mode 100644 .secrets.baseline diff --git a/.github/scripts/check_bandit_baseline.py b/.github/scripts/check_bandit_baseline.py new file mode 100644 index 0000000000..95f05a42c1 --- /dev/null +++ b/.github/scripts/check_bandit_baseline.py @@ -0,0 +1,92 @@ +"""Fail if the Bandit baseline grew on this PR without explicit acknowledgement. + +The bandit baseline whitelists known findings so they don't fail CI. If a +contributor adds a new entry, silent whitelisting becomes invisible in +review. This script counts the entries in the baseline at the PR head vs. +its base; if the count increased, the PR must carry the label +``security-baseline-change`` to confirm the addition is intentional. + +Required environment variables: +- ``BANDIT_BASELINE_BASE``: git ref of the PR base (``github.event.pull_request.base.sha``) +- ``BANDIT_BASELINE_HEAD``: git ref of the PR head (``github.sha``) +- ``BANDIT_BASELINE_LABELS``: comma-separated PR labels (``join(github.event.pull_request.labels.*.name, ',')``) + +Outside of PR events, all inputs may be empty and the script no-ops. +""" + +from __future__ import annotations + +import json +import os +import subprocess +import sys +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parents[2] +BASELINE_PATH = ".github/bandit-baseline.json" +ACK_LABEL = "security-baseline-change" + + +def _read_baseline_at(ref: str) -> dict: + if not ref: + return {"results": []} + try: + blob = subprocess.run( + ["git", "show", f"{ref}:{BASELINE_PATH}"], + check=True, + cwd=REPO_ROOT, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ).stdout + except subprocess.CalledProcessError: + # File didn't exist at that ref (e.g. PR introducing the baseline). + return {"results": []} + try: + return json.loads(blob) + except json.JSONDecodeError: + print(f"Could not parse baseline at {ref}; treating as empty.", file=sys.stderr) + return {"results": []} + + +def main() -> int: + base_ref = os.environ.get("BANDIT_BASELINE_BASE", "").strip() + head_ref = os.environ.get("BANDIT_BASELINE_HEAD", "").strip() or "HEAD" + + if not base_ref or set(base_ref) <= {"0"}: + # Not a PR event, or the base ref is the zero-SHA placeholder. + print("No PR base ref; baseline diff check skipped.") + return 0 + + base_count = len(_read_baseline_at(base_ref).get("results", [])) + head_count = len(_read_baseline_at(head_ref).get("results", [])) + + if head_count <= base_count: + print( + f"Bandit baseline entries: {base_count} -> {head_count} (no growth)." + ) + return 0 + + labels = { + label.strip() + for label in os.environ.get("BANDIT_BASELINE_LABELS", "").split(",") + if label.strip() + } + if ACK_LABEL in labels: + print( + f"Bandit baseline grew from {base_count} to {head_count} entries; " + f"acknowledged via label '{ACK_LABEL}'." + ) + return 0 + + print( + f"Bandit baseline grew from {base_count} to {head_count} entries. " + f"Add label '{ACK_LABEL}' to the PR to acknowledge that the new " + f"whitelist entries are intentional.", + file=sys.stderr, + ) + return 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index c2606d698c..a183478a29 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -20,3 +20,15 @@ jobs: globs: | '**/*.md' !extensions/**/*.md + + shellcheck: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + + # shellcheck is preinstalled on ubuntu-latest runners. + # Start at --severity=error to block real bugs without flagging style + # (notably SC2155). Tighten in a follow-up after cleanup. + - name: Run shellcheck on scripts/bash + run: shellcheck --severity=error scripts/bash/*.sh diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml index 8a0058c073..43e6d5dac4 100644 --- a/.github/workflows/security.yml +++ b/.github/workflows/security.yml @@ -18,7 +18,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, windows-latest] + os: [ubuntu-latest, windows-latest, macos-latest] python-version: ["3.11", "3.12", "3.13"] steps: - name: Checkout @@ -61,6 +61,9 @@ jobs: steps: - name: Checkout uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + # Need the PR base to compare baseline growth. + fetch-depth: 0 - name: Install uv uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0 @@ -70,5 +73,59 @@ jobs: with: python-version: "3.13" - - name: Run Bandit + # Blocking: HIGH severity only, with baseline. Real regressions fail CI. + - name: Run Bandit (HIGH, baseline-gated) run: uvx --from bandit==1.9.4 bandit -r src -lll --baseline .github/bandit-baseline.json + + # Informative: MEDIUM severity, no baseline. Surfaces lower-severity + # findings in the job summary without breaking CI, so reviewers see + # them before they accumulate. + - name: Run Bandit (MEDIUM, informational) + continue-on-error: true + run: uvx --from bandit==1.9.4 bandit -r src -ll + + # Prevent silent whitelisting: if the baseline grew, the PR must carry + # the 'security-baseline-change' label to acknowledge it. + - name: Check Bandit baseline growth + if: ${{ github.event_name == 'pull_request' }} + env: + BANDIT_BASELINE_BASE: ${{ github.event.pull_request.base.sha }} + BANDIT_BASELINE_HEAD: ${{ github.event.pull_request.head.sha }} + BANDIT_BASELINE_LABELS: ${{ join(github.event.pull_request.labels.*.name, ',') }} + run: python .github/scripts/check_bandit_baseline.py + + secret-scan: + name: Secret scan + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + fetch-depth: 0 + + - name: Install uv + uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0 + + - name: Set up Python + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6 + with: + python-version: "3.13" + + # detect-secrets is a Python tool (consistent with bandit / pip-audit + # install pattern) and detects entropy-based and provider-specific + # secrets. Baseline at .secrets.baseline is honored as a whitelist; + # any drift fails the check. + - name: Run detect-secrets + run: | + uvx --from detect-secrets==1.5.0 detect-secrets scan \ + --baseline .secrets.baseline \ + --exclude-files '\.secrets\.baseline$' \ + --exclude-files 'uv\.lock$' \ + --exclude-files '\.github/security-audit-requirements\.txt$' + + - name: Verify baseline is in sync + run: | + if ! git diff --exit-code .secrets.baseline; then + echo "::error::detect-secrets found new candidates. Audit them, then update .secrets.baseline with: uvx --from detect-secrets==1.5.0 detect-secrets scan --baseline .secrets.baseline" >&2 + exit 1 + fi diff --git a/.secrets.baseline b/.secrets.baseline new file mode 100644 index 0000000000..f700e86348 --- /dev/null +++ b/.secrets.baseline @@ -0,0 +1,213 @@ +{ + "version": "1.5.0", + "plugins_used": [ + { + "name": "ArtifactoryDetector" + }, + { + "name": "AWSKeyDetector" + }, + { + "name": "AzureStorageKeyDetector" + }, + { + "name": "Base64HighEntropyString", + "limit": 4.5 + }, + { + "name": "BasicAuthDetector" + }, + { + "name": "CloudantDetector" + }, + { + "name": "DiscordBotTokenDetector" + }, + { + "name": "GitHubTokenDetector" + }, + { + "name": "GitLabTokenDetector" + }, + { + "name": "HexHighEntropyString", + "limit": 3.0 + }, + { + "name": "IbmCloudIamDetector" + }, + { + "name": "IbmCosHmacDetector" + }, + { + "name": "IPPublicDetector" + }, + { + "name": "JwtTokenDetector" + }, + { + "name": "KeywordDetector", + "keyword_exclude": "" + }, + { + "name": "MailchimpDetector" + }, + { + "name": "NpmDetector" + }, + { + "name": "OpenAIDetector" + }, + { + "name": "PrivateKeyDetector" + }, + { + "name": "PypiTokenDetector" + }, + { + "name": "SendGridDetector" + }, + { + "name": "SlackDetector" + }, + { + "name": "SoftlayerDetector" + }, + { + "name": "SquareOAuthDetector" + }, + { + "name": "StripeDetector" + }, + { + "name": "TelegramBotTokenDetector" + }, + { + "name": "TwilioKeyDetector" + } + ], + "filters_used": [ + { + "path": "detect_secrets.filters.allowlist.is_line_allowlisted" + }, + { + "path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies", + "min_level": 2 + }, + { + "path": "detect_secrets.filters.heuristic.is_indirect_reference" + }, + { + "path": "detect_secrets.filters.heuristic.is_likely_id_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_lock_file" + }, + { + "path": "detect_secrets.filters.heuristic.is_not_alphanumeric_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_potential_uuid" + }, + { + "path": "detect_secrets.filters.heuristic.is_prefixed_with_dollar_sign" + }, + { + "path": "detect_secrets.filters.heuristic.is_sequential_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_swagger_file" + }, + { + "path": "detect_secrets.filters.heuristic.is_templated_secret" + }, + { + "path": "detect_secrets.filters.regex.should_exclude_file", + "pattern": [ + "\\.secrets\\.baseline$", + "uv\\.lock$", + "\\.github/security-audit-requirements\\.txt$" + ] + } + ], + "results": { + ".devcontainer/post-create.sh": [ + { + "type": "Hex High Entropy String", + "filename": ".devcontainer/post-create.sh", + "hashed_secret": "7a549d52003f28825cf4d8a7351585120349c1c5", + "is_verified": false, + "line_number": 65 + } + ], + ".github/workflows/security.yml": [ + { + "type": "Secret Keyword", + "filename": ".github/workflows/security.yml", + "hashed_secret": "ce8f18aefefe74020792776da9b575c21d44d6b1", + "is_verified": false, + "line_number": 119 + }, + { + "type": "Secret Keyword", + "filename": ".github/workflows/security.yml", + "hashed_secret": "bc0369f6bd7ef02ba819bff38ac2ce7deacac19d", + "is_verified": false, + "line_number": 127 + } + ], + "docs/reference/authentication.md": [ + { + "type": "Secret Keyword", + "filename": "docs/reference/authentication.md", + "hashed_secret": "d92490a1457d8b0712a85fe018b3e9fd781816a7", + "is_verified": false, + "line_number": 113 + } + ], + "extensions/template/EXAMPLE-README.md": [ + { + "type": "Secret Keyword", + "filename": "extensions/template/EXAMPLE-README.md", + "hashed_secret": "11fa7c37d697f30e6aee828b4426a10f83ab2380", + "is_verified": false, + "line_number": 52 + }, + { + "type": "Secret Keyword", + "filename": "extensions/template/EXAMPLE-README.md", + "hashed_secret": "71fdbe9f60b1157a53c18b7ec93d4041d828aaad", + "is_verified": false, + "line_number": 106 + } + ], + "tests/test_agent_config_consistency.py": [ + { + "type": "Hex High Entropy String", + "filename": "tests/test_agent_config_consistency.py", + "hashed_secret": "7a549d52003f28825cf4d8a7351585120349c1c5", + "is_verified": false, + "line_number": 56 + } + ], + "tests/test_authentication.py": [ + { + "type": "Secret Keyword", + "filename": "tests/test_authentication.py", + "hashed_secret": "3c3b274d119ff5a5ec6c1e215c1cb794d9973ac1", + "is_verified": false, + "line_number": 131 + } + ], + "tests/test_extensions.py": [ + { + "type": "Secret Keyword", + "filename": "tests/test_extensions.py", + "hashed_secret": "7a9b93cfa651fbc2c93d88edea4d4fcfe33c0a0b", + "is_verified": false, + "line_number": 3397 + } + ] + }, + "generated_at": "2026-05-14T05:42:31Z" +} diff --git a/pyproject.toml b/pyproject.toml index 92735f3e9e..2dd101a688 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,3 +71,13 @@ precision = 2 show_missing = true skip_covered = false +[tool.ruff.lint] +# Lock in subprocess security posture: any reintroduction of shell=True +# (or os.system / popen2) must be acknowledged with an explicit `# noqa` +# pointing at the rule, making the deviation visible in review. +extend-select = [ + "S602", # subprocess-popen-with-shell-equals-true + "S604", # call-with-shell-equals-true + "S605", # start-process-with-a-shell +] + diff --git a/src/specify_cli/workflows/steps/shell/__init__.py b/src/specify_cli/workflows/steps/shell/__init__.py index 73ac99530a..89ab718cab 100644 --- a/src/specify_cli/workflows/steps/shell/__init__.py +++ b/src/specify_cli/workflows/steps/shell/__init__.py @@ -30,7 +30,7 @@ def execute(self, config: dict[str, Any], context: StepContext) -> StepResult: # control commands; catalog-installed workflows should be reviewed # before use (see PUBLISHING.md for security guidance). try: - proc = subprocess.run( + proc = subprocess.run( # noqa: S602 -- intentional shell=True (see NOTE above) run_cmd, shell=True, capture_output=True, From 2faf55a3e7f2f41308364e99ef96e551e8500807 Mon Sep 17 00:00:00 2001 From: Pascal Date: Fri, 15 May 2026 08:24:17 +0200 Subject: [PATCH 11/30] ci(security): address review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Copilot review #4291726625 + mnriem CHANGES_REQUESTED #4292842064. Hardening (Copilot suggestions on existing code): - Pass strict_redirects=True to open_url() at the three catalog/workflow download call sites (__init__.py preset/workflow downloads, integrations/catalog.py). Closes an HTTPS->HTTP downgrade window where the bounded read could happen on a redirected http:// target before the post-redirect URL validation. Lock-in fixes for the new PR checks: - check_bandit_baseline.py: compare result identities (filename + line + test_id + severity + confidence + code-hash) instead of raw counts so a PR can't silently swap one whitelisted finding for another. Also treat "baseline file absent at base ref" as introduction (no label required) instead of growth-from-zero. - Switch secret-scan to detect-secrets-hook (instead of `scan --baseline` followed by `git diff --exit-code`). The scan command rewrites the baseline's generated_at timestamp on every run, so the diff guard always tripped. detect-secrets-hook only reports findings that aren't in the baseline, so the diff guard is unnecessary. Brittle-test fixes: - security.yml: revert macos-latest from the dependency-audit matrix (test_security_workflow.py:160 pins ubuntu+windows, matching test.yml). - security.yml: rename "Run Bandit (HIGH, baseline-gated)" back to "Run Bandit" (test_security_workflow.py:188 expects the canonical name); the medium-severity informational pass keeps a distinct name. - security-audit-requirements.txt: regenerate with uv pip compile — pyproject.toml changed on this branch (ruff config in the previous commit) and upstream package releases drifted the lock; check_security _requirements.py was rightly failing until both sides matched. Pre-existing pinning gap caught by tests/test_github_workflows.py: - Pin actions/github-script@v9 to its commit SHA in catalog-assign.yml. - Fix USES_RE in test_github_workflows.py so it matches the `- uses:` shorthand form (without it, catalog-assign.yml's `@v9` slipped past). Test mocks: - test_integration_catalog.py: extend the three url-mocking helpers to also stub OpenerDirector.open. open_url(strict_redirects=True) takes a different code path that bypasses the urlopen mock; patching the opener covers both paths. --- .github/scripts/check_bandit_baseline.py | 86 ++++++++++++++----- .github/security-audit-requirements.txt | 49 ----------- .github/workflows/catalog-assign.yml | 2 +- .github/workflows/security.yml | 29 +++---- .secrets.baseline | 13 +-- src/specify_cli/__init__.py | 4 +- src/specify_cli/integrations/catalog.py | 2 +- .../integrations/test_integration_catalog.py | 29 ++++++- tests/test_github_workflows.py | 4 +- 9 files changed, 114 insertions(+), 104 deletions(-) diff --git a/.github/scripts/check_bandit_baseline.py b/.github/scripts/check_bandit_baseline.py index 95f05a42c1..7d9601c19f 100644 --- a/.github/scripts/check_bandit_baseline.py +++ b/.github/scripts/check_bandit_baseline.py @@ -1,21 +1,32 @@ -"""Fail if the Bandit baseline grew on this PR without explicit acknowledgement. +"""Fail if new entries appear in the Bandit baseline without acknowledgement. The bandit baseline whitelists known findings so they don't fail CI. If a contributor adds a new entry, silent whitelisting becomes invisible in -review. This script counts the entries in the baseline at the PR head vs. -its base; if the count increased, the PR must carry the label -``security-baseline-change`` to confirm the addition is intentional. +review. This script compares the set of result *identities* in the +baseline at the PR head against the baseline at its base; if any new +identity appears, the PR must carry the label ``security-baseline-change`` +to confirm the addition is intentional. + +We compare identities (filename + line + test_id + issue_severity + +issue_confidence + hash-of-code-snippet) rather than raw counts so a PR +cannot remove one existing entry and add a different new one to keep the +count constant — which would silently whitelist a new finding. + +When the baseline file does not exist at the base ref, this is the PR +that introduces it; we treat all entries as the starting baseline and +do not require the label. Required environment variables: -- ``BANDIT_BASELINE_BASE``: git ref of the PR base (``github.event.pull_request.base.sha``) -- ``BANDIT_BASELINE_HEAD``: git ref of the PR head (``github.sha``) -- ``BANDIT_BASELINE_LABELS``: comma-separated PR labels (``join(github.event.pull_request.labels.*.name, ',')``) +- ``BANDIT_BASELINE_BASE``: git ref of the PR base +- ``BANDIT_BASELINE_HEAD``: git ref of the PR head +- ``BANDIT_BASELINE_LABELS``: comma-separated PR labels Outside of PR events, all inputs may be empty and the script no-ops. """ from __future__ import annotations +import hashlib import json import os import subprocess @@ -27,9 +38,10 @@ ACK_LABEL = "security-baseline-change" -def _read_baseline_at(ref: str) -> dict: +def _read_baseline_at(ref: str) -> tuple[dict, bool]: + """Return (baseline_json, file_existed_at_ref).""" if not ref: - return {"results": []} + return {"results": []}, False try: blob = subprocess.run( ["git", "show", f"{ref}:{BASELINE_PATH}"], @@ -40,13 +52,34 @@ def _read_baseline_at(ref: str) -> dict: text=True, ).stdout except subprocess.CalledProcessError: - # File didn't exist at that ref (e.g. PR introducing the baseline). - return {"results": []} + return {"results": []}, False try: - return json.loads(blob) + return json.loads(blob), True except json.JSONDecodeError: print(f"Could not parse baseline at {ref}; treating as empty.", file=sys.stderr) - return {"results": []} + return {"results": []}, True + + +def _identity(result: dict) -> str: + """Stable identity for a baseline entry. + + Combines location, test, severity, confidence, and a hash of the + pinned code snippet so reordering or formatting changes don't + register as new findings, but a different finding at the same line + does. + """ + code = result.get("code", "") or "" + code_hash = hashlib.sha256(code.encode("utf-8")).hexdigest()[:16] + return "|".join( + [ + str(result.get("filename", "")), + str(result.get("line_number", "")), + str(result.get("test_id", "")), + str(result.get("issue_severity", "")), + str(result.get("issue_confidence", "")), + code_hash, + ] + ) def main() -> int: @@ -54,16 +87,27 @@ def main() -> int: head_ref = os.environ.get("BANDIT_BASELINE_HEAD", "").strip() or "HEAD" if not base_ref or set(base_ref) <= {"0"}: - # Not a PR event, or the base ref is the zero-SHA placeholder. print("No PR base ref; baseline diff check skipped.") return 0 - base_count = len(_read_baseline_at(base_ref).get("results", [])) - head_count = len(_read_baseline_at(head_ref).get("results", [])) + base_baseline, base_existed = _read_baseline_at(base_ref) + head_baseline, _ = _read_baseline_at(head_ref) + + if not base_existed: + print( + "Baseline file not present at base ref; treating this PR as the " + "introduction of the baseline. No acknowledgement required." + ) + return 0 + + base_ids = {_identity(r) for r in base_baseline.get("results", [])} + head_ids = {_identity(r) for r in head_baseline.get("results", [])} - if head_count <= base_count: + new_ids = head_ids - base_ids + if not new_ids: print( - f"Bandit baseline entries: {base_count} -> {head_count} (no growth)." + f"Bandit baseline entries: {len(base_ids)} -> {len(head_ids)} " + f"(no new identities)." ) return 0 @@ -74,17 +118,19 @@ def main() -> int: } if ACK_LABEL in labels: print( - f"Bandit baseline grew from {base_count} to {head_count} entries; " + f"Bandit baseline gained {len(new_ids)} new identities; " f"acknowledged via label '{ACK_LABEL}'." ) return 0 print( - f"Bandit baseline grew from {base_count} to {head_count} entries. " + f"Bandit baseline gained {len(new_ids)} new identities. " f"Add label '{ACK_LABEL}' to the PR to acknowledge that the new " f"whitelist entries are intentional.", file=sys.stderr, ) + for identity in sorted(new_ids): + print(f" + {identity}", file=sys.stderr) return 1 diff --git a/.github/security-audit-requirements.txt b/.github/security-audit-requirements.txt index d97b84d2cd..89feef3f1a 100644 --- a/.github/security-audit-requirements.txt +++ b/.github/security-audit-requirements.txt @@ -263,55 +263,6 @@ shellingham==1.5.4 \ --hash=sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686 \ --hash=sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de # via typer -tomli==2.4.1 ; python_full_version <= '3.11' \ - --hash=sha256:01f520d4f53ef97964a240a035ec2a869fe1a37dde002b57ebc4417a27ccd853 \ - --hash=sha256:0d85819802132122da43cb86656f8d1f8c6587d54ae7dcaf30e90533028b49fe \ - --hash=sha256:136443dbd7e1dee43c68ac2694fde36b2849865fa258d39bf822c10e8068eac5 \ - --hash=sha256:1d8591993e228b0c930c4bb0db464bdad97b3289fb981255d6c9a41aedc84b2d \ - --hash=sha256:2190f2e9dd7508d2a90ded5ed369255980a1bcdd58e52f7fe24b8162bf9fedbd \ - --hash=sha256:2c1c351919aca02858f740c6d33adea0c5deea37f9ecca1cc1ef9e884a619d26 \ - --hash=sha256:36d2bd2ad5fb9eaddba5226aa02c8ec3fa4f192631e347b3ed28186d43be6b54 \ - --hash=sha256:3d48a93ee1c9b79c04bb38772ee1b64dcf18ff43085896ea460ca8dec96f35f6 \ - --hash=sha256:47149d5bd38761ac8be13a84864bf0b7b70bc051806bc3669ab1cbc56216b23c \ - --hash=sha256:4ab97e64ccda8756376892c53a72bd1f964e519c77236368527f758fbc36a53a \ - --hash=sha256:4b605484e43cdc43f0954ddae319fb75f04cc10dd80d830540060ee7cd0243cd \ - --hash=sha256:504aa796fe0569bb43171066009ead363de03675276d2d121ac1a4572397870f \ - --hash=sha256:51529d40e3ca50046d7606fa99ce3956a617f9b36380da3b7f0dd3dd28e68cb5 \ - --hash=sha256:52c8ef851d9a240f11a88c003eacb03c31fc1c9c4ec64a99a0f922b93874fda9 \ - --hash=sha256:559db847dc486944896521f68d8190be1c9e719fced785720d2216fe7022b662 \ - --hash=sha256:5a881ab208c0baf688221f8cecc5401bd291d67e38a1ac884d6736cbcd8247e9 \ - --hash=sha256:5cb41aa38891e073ee49d55fbc7839cfdb2bc0e600add13874d048c94aadddd1 \ - --hash=sha256:5e262d41726bc187e69af7825504c933b6794dc3fbd5945e41a79bb14c31f585 \ - --hash=sha256:5ee18d9ebdb417e384b58fe414e8d6af9f4e7a0ae761519fb50f721de398dd4e \ - --hash=sha256:7008df2e7655c495dd12d2a4ad038ff878d4ca4b81fccaf82b714e07eae4402c \ - --hash=sha256:734e20b57ba95624ecf1841e72b53f6e186355e216e5412de414e3c51e5e3c41 \ - --hash=sha256:7c7e1a961a0b2f2472c1ac5b69affa0ae1132c39adcb67aba98568702b9cc23f \ - --hash=sha256:7f86fd587c4ed9dd76f318225e7d9b29cfc5a9d43de44e5754db8d1128487085 \ - --hash=sha256:7f94b27a62cfad8496c8d2513e1a222dd446f095fca8987fceef261225538a15 \ - --hash=sha256:88dceee75c2c63af144e456745e10101eb67361050196b0b6af5d717254dddf7 \ - --hash=sha256:8a650c2dbafa08d42e51ba0b62740dae4ecb9338eefa093aa5c78ceb546fcd5c \ - --hash=sha256:8d65a2fbf9d2f8352685bc1364177ee3923d6baf5e7f43ea4959d7d8bc326a36 \ - --hash=sha256:96481a5786729fd470164b47cdb3e0e58062a496f455ee41b4403be77cb5a076 \ - --hash=sha256:a120733b01c45e9a0c34aeef92bf0cf1d56cfe81ed9d47d562f9ed591a9828ac \ - --hash=sha256:b1d22e6e9387bf4739fbe23bfa80e93f6b0373a7f1b96c6227c32bef95a4d7a8 \ - --hash=sha256:b8c198f8c1805dc42708689ed6864951fd2494f924149d3e4bce7710f8eb5232 \ - --hash=sha256:c2541745709bad0264b7d4705ad453b76ccd191e64aa6f0fc66b69a293a45ece \ - --hash=sha256:c742f741d58a28940ce01d58f0ab2ea3ced8b12402f162f4d534dfe18ba1cd6a \ - --hash=sha256:c7f2c7f2b9ca6bdeef8f0fa897f8e05085923eb091721675170254cbc5b02897 \ - --hash=sha256:d312ef37c91508b0ab2cee7da26ec0b3ed2f03ce12bd87a588d771ae15dcf82d \ - --hash=sha256:d4d8fe59808a54658fcc0160ecfb1b30f9089906c50b23bcb4c69eddc19ec2b4 \ - --hash=sha256:da25dc3563bff5965356133435b757a795a17b17d01dbc0f42fb32447ddfd917 \ - --hash=sha256:eab21f45c7f66c13f2a9e0e1535309cee140182a9cdae1e041d02e47291e8396 \ - --hash=sha256:eb0dc4e38e6a1fd579e5d50369aa2e10acfc9cace504579b2faabb478e76941a \ - --hash=sha256:ec9bfaf3ad2df51ace80688143a6a4ebc09a248f6ff781a9945e51937008fcbc \ - --hash=sha256:ede3e6487c5ef5d28634ba3f31f989030ad6af71edfb0055cbbd14189ff240ba \ - --hash=sha256:f3c6818a1a86dd6dca7ddcaaf76947d5ba31aecc28cb1b67009a5877c9a64f3f \ - --hash=sha256:f758f1b9299d059cc3f6546ae2af89670cb1c4d48ea29c3cacc4fe7de3058257 \ - --hash=sha256:f8f0fc26ec2cc2b965b7a3b87cd19c5c6b8c5e5f436b984e85f486d652285c30 \ - --hash=sha256:fd0409a3653af6c147209d267a0e4243f0ae46b011aa978b1080359fddc9b6cf \ - --hash=sha256:ff18e6a727ee0ab0388507b89d1bc6a22b138d1e2fa56d1ad494586d61d2eae9 \ - --hash=sha256:ff2983983d34813c1aeb0fa89091e76c3a22889ee83ab27c5eeb45100560c049 - # via coverage typer==0.25.1 \ --hash=sha256:75caa44ed46a03fb2dab8808753ffacdbfea88495e74c85a28c5eefcf5f39c89 \ --hash=sha256:9616eb8853a09ffeabab1698952f33c6f29ffdbceb4eaeecf571880e8d7664cc diff --git a/.github/workflows/catalog-assign.yml b/.github/workflows/catalog-assign.yml index 78b4f552f3..f828794864 100644 --- a/.github/workflows/catalog-assign.yml +++ b/.github/workflows/catalog-assign.yml @@ -19,7 +19,7 @@ jobs: permissions: issues: write steps: - - uses: actions/github-script@v9 + - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 with: script: | const issue = context.payload.issue; diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml index 43e6d5dac4..0e19912f1e 100644 --- a/.github/workflows/security.yml +++ b/.github/workflows/security.yml @@ -18,7 +18,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, windows-latest, macos-latest] + os: [ubuntu-latest, windows-latest] python-version: ["3.11", "3.12", "3.13"] steps: - name: Checkout @@ -74,13 +74,13 @@ jobs: python-version: "3.13" # Blocking: HIGH severity only, with baseline. Real regressions fail CI. - - name: Run Bandit (HIGH, baseline-gated) + - name: Run Bandit run: uvx --from bandit==1.9.4 bandit -r src -lll --baseline .github/bandit-baseline.json # Informative: MEDIUM severity, no baseline. Surfaces lower-severity # findings in the job summary without breaking CI, so reviewers see # them before they accumulate. - - name: Run Bandit (MEDIUM, informational) + - name: Run Bandit medium-severity informational pass continue-on-error: true run: uvx --from bandit==1.9.4 bandit -r src -ll @@ -113,19 +113,14 @@ jobs: # detect-secrets is a Python tool (consistent with bandit / pip-audit # install pattern) and detects entropy-based and provider-specific - # secrets. Baseline at .secrets.baseline is honored as a whitelist; - # any drift fails the check. + # secrets. detect-secrets-hook compares tracked files against the + # baseline and exits non-zero when a new candidate appears, without + # rewriting the baseline file (so there's no spurious git diff). - name: Run detect-secrets run: | - uvx --from detect-secrets==1.5.0 detect-secrets scan \ - --baseline .secrets.baseline \ - --exclude-files '\.secrets\.baseline$' \ - --exclude-files 'uv\.lock$' \ - --exclude-files '\.github/security-audit-requirements\.txt$' - - - name: Verify baseline is in sync - run: | - if ! git diff --exit-code .secrets.baseline; then - echo "::error::detect-secrets found new candidates. Audit them, then update .secrets.baseline with: uvx --from detect-secrets==1.5.0 detect-secrets scan --baseline .secrets.baseline" >&2 - exit 1 - fi + git ls-files -z \ + -- ':!:.secrets.baseline' \ + ':!:uv.lock' \ + ':!:.github/security-audit-requirements.txt' \ + | xargs -0 uvx --from detect-secrets==1.5.0 detect-secrets-hook \ + --baseline .secrets.baseline diff --git a/.secrets.baseline b/.secrets.baseline index f700e86348..42f94920b0 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -144,16 +144,9 @@ { "type": "Secret Keyword", "filename": ".github/workflows/security.yml", - "hashed_secret": "ce8f18aefefe74020792776da9b575c21d44d6b1", + "hashed_secret": "4202a5e0d1da60251e0163e869ae02016bb68767", "is_verified": false, - "line_number": 119 - }, - { - "type": "Secret Keyword", - "filename": ".github/workflows/security.yml", - "hashed_secret": "bc0369f6bd7ef02ba819bff38ac2ce7deacac19d", - "is_verified": false, - "line_number": 127 + "line_number": 120 } ], "docs/reference/authentication.md": [ @@ -209,5 +202,5 @@ } ] }, - "generated_at": "2026-05-14T05:42:31Z" + "generated_at": "2026-05-15T06:22:08Z" } diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index 5f71115c35..877ffbdf39 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -5021,7 +5021,7 @@ def _validate_and_install_local(yaml_path: Path, source_label: str) -> None: import tempfile try: - with _open_url(source, timeout=30) as resp: + with _open_url(source, timeout=30, strict_redirects=True) as resp: final_url = resp.geturl() final_parsed = urlparse(final_url) final_host = final_parsed.hostname or "" @@ -5120,7 +5120,7 @@ def _validate_and_install_local(yaml_path: Path, source_label: str) -> None: from specify_cli.authentication.http import open_url as _open_url workflow_dir.mkdir(parents=True, exist_ok=True) - with _open_url(workflow_url, timeout=30) as response: + with _open_url(workflow_url, timeout=30, strict_redirects=True) as response: # Validate final URL after redirects final_url = response.geturl() final_parsed = urlparse(final_url) diff --git a/src/specify_cli/integrations/catalog.py b/src/specify_cli/integrations/catalog.py index d0ef121996..83ad446642 100644 --- a/src/specify_cli/integrations/catalog.py +++ b/src/specify_cli/integrations/catalog.py @@ -166,7 +166,7 @@ def _fetch_single_catalog( try: from specify_cli.authentication.http import open_url - with open_url(entry.url, timeout=10) as resp: + with open_url(entry.url, timeout=10, strict_redirects=True) as resp: # Validate final URL after redirects final_url = resp.geturl() if final_url != entry.url: diff --git a/tests/integrations/test_integration_catalog.py b/tests/integrations/test_integration_catalog.py index 6a87b51147..ca2c1875cc 100644 --- a/tests/integrations/test_integration_catalog.py +++ b/tests/integrations/test_integration_catalog.py @@ -166,7 +166,12 @@ class TestCatalogFetch: """Tests that use a local HTTP server stub via monkeypatch.""" def _patch_urlopen(self, monkeypatch, catalog_data): - """Patch authentication.http.urllib.request.urlopen to return *catalog_data*.""" + """Patch authentication.http urlopen + OpenerDirector to return *catalog_data*. + + Covers both code paths in ``open_url``: + - default: ``urllib.request.urlopen`` (unauthenticated, no strict redirects) + - hardened: ``OpenerDirector.open`` (strict_redirects=True path). + """ class FakeResponse: def __init__(self, data, url=""): @@ -189,8 +194,14 @@ def fake_urlopen(req, timeout=10): url = req if isinstance(req, str) else req.full_url return FakeResponse(catalog_data, url) + def fake_opener_open(_self, req, data=None, timeout=10): + return fake_urlopen(req, timeout) + import specify_cli.authentication.http as _auth_http monkeypatch.setattr(_auth_http.urllib.request, "urlopen", fake_urlopen) + monkeypatch.setattr( + _auth_http.urllib.request.OpenerDirector, "open", fake_opener_open + ) def test_fetch_and_search_all(self, tmp_path, monkeypatch): monkeypatch.setenv("HOME", str(tmp_path)) @@ -331,6 +342,11 @@ def fake_read_response_limited(response, **kwargs): import urllib.request monkeypatch.setattr(urllib.request, "urlopen", fake_urlopen) + monkeypatch.setattr( + urllib.request.OpenerDirector, + "open", + lambda _self, req, data=None, timeout=10: fake_urlopen(req, timeout), + ) monkeypatch.setattr( "specify_cli.integrations.catalog.read_response_limited", fake_read_response_limited, @@ -550,8 +566,15 @@ def __enter__(self): def __exit__(self, *a): pass - monkeypatch.setattr(_auth_http.urllib.request, "urlopen", - lambda req, timeout=10: FakeResponse(catalog, req if isinstance(req, str) else req.full_url)) + def _fake_urlopen(req, timeout=10): + return FakeResponse(catalog, req if isinstance(req, str) else req.full_url) + + monkeypatch.setattr(_auth_http.urllib.request, "urlopen", _fake_urlopen) + monkeypatch.setattr( + _auth_http.urllib.request.OpenerDirector, + "open", + lambda _self, req, data=None, timeout=10: _fake_urlopen(req, timeout), + ) old = os.getcwd() try: diff --git a/tests/test_github_workflows.py b/tests/test_github_workflows.py index 2b21d3a40f..7ad0b714ec 100644 --- a/tests/test_github_workflows.py +++ b/tests/test_github_workflows.py @@ -8,7 +8,9 @@ REPO_ROOT = Path(__file__).resolve().parent.parent WORKFLOWS_DIR = REPO_ROOT / ".github" / "workflows" -USES_RE = re.compile(r"^\s*uses:\s*(?P\S+)", re.MULTILINE) +# Match both the dedicated-step form (` uses: x@sha`) and the +# inline shorthand (` - uses: x@sha`) used in catalog-assign.yml. +USES_RE = re.compile(r"^\s*(?:-\s*)?uses:\s*(?P\S+)", re.MULTILINE) def test_github_actions_are_pinned_to_full_commit_shas(): From 2d7d39cf66d9804311ec9f6f36056ae03a187044 Mon Sep 17 00:00:00 2001 From: Pascal Date: Fri, 15 May 2026 08:43:24 +0200 Subject: [PATCH 12/30] ci(security): tidy follow-up details MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Polishes from the previous review pass that I noticed after pushing. - security.yml: drop the unneeded fetch-depth: 0 from the secret-scan checkout. detect-secrets-hook reads the working tree only — fetching full history slows the job without adding signal. - security.yml: add a follow-on step that surfaces the Bandit medium- severity informational pass in $GITHUB_STEP_SUMMARY. With continue-on-error: true the previous step never marks the job yellow/red, so findings were buried in the log; the summary now flags them with a ⚠️ heading (or ✅ when clean) at the top of the run page. - CONTRIBUTING.md: document the new tooling and gates so contributors don't bounce off CI: - detect-secrets-hook command + how to regenerate .secrets.baseline - the bandit baseline label gate (security-baseline-change) - shellcheck --severity=error invocation - explicit note that committed security-audit-requirements.txt can drift purely from upstream package releases and needs periodic regeneration even on unrelated PRs. --- .github/workflows/security.yml | 25 +++++++++++++++++++++++-- CONTRIBUTING.md | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 2 deletions(-) diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml index 0e19912f1e..b592605148 100644 --- a/.github/workflows/security.yml +++ b/.github/workflows/security.yml @@ -81,9 +81,32 @@ jobs: # findings in the job summary without breaking CI, so reviewers see # them before they accumulate. - name: Run Bandit medium-severity informational pass + id: bandit-medium continue-on-error: true run: uvx --from bandit==1.9.4 bandit -r src -ll + # Surface the medium-severity outcome in the job summary so reviewers + # see it without expanding the log; continue-on-error swallows the + # non-zero exit otherwise. + - name: Surface medium-severity findings in job summary + if: always() + run: | + if [ "${{ steps.bandit-medium.outcome }}" = "failure" ]; then + { + echo "## ⚠️ Bandit medium-severity informational pass" + echo "" + echo "Findings surfaced at MEDIUM severity (no baseline). These do not" + echo "fail CI but should be audited — either fix the issue, suppress" + echo "with an explicit \`# nosec\` carrying a justification, or escalate" + echo "the severity threshold once they are triaged." + echo "" + echo "See the **Run Bandit medium-severity informational pass** step" + echo "above for the file/line list." + } >> "$GITHUB_STEP_SUMMARY" + else + echo "## ✅ Bandit medium-severity informational pass — clean" >> "$GITHUB_STEP_SUMMARY" + fi + # Prevent silent whitelisting: if the baseline grew, the PR must carry # the 'security-baseline-change' label to acknowledge it. - name: Check Bandit baseline growth @@ -100,8 +123,6 @@ jobs: steps: - name: Checkout uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 - with: - fetch-depth: 0 - name: Install uv uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index fd043e01a6..926796dae9 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -94,6 +94,39 @@ Run these before changing dependency metadata, workflow execution code, subproce uv pip compile pyproject.toml --extra test --universal --generate-hashes --quiet --no-header --output-file .github/security-audit-requirements.txt ``` +Upstream package releases drift over time, so even an unrelated PR touching `pyproject.toml` can fail the `dependency-audit` check until the committed file is regenerated with the command above and re-committed. + +#### Secret scanning + +```bash +git ls-files -z -- ':!:.secrets.baseline' ':!:uv.lock' ':!:.github/security-audit-requirements.txt' \ + | xargs -0 uvx --from detect-secrets==1.5.0 detect-secrets-hook --baseline .secrets.baseline +``` + +The CI `secret-scan` job runs this against tracked files. It reports any high-entropy strings or provider tokens that aren't already whitelisted in `.secrets.baseline`. If you hit a known false positive (SHA pin, docs example, test fixture), regenerate the baseline: + +```bash +uvx --from detect-secrets==1.5.0 detect-secrets scan \ + --exclude-files '\.secrets\.baseline$' \ + --exclude-files 'uv\.lock$' \ + --exclude-files '\.github/security-audit-requirements\.txt$' \ + > .secrets.baseline +``` + +Audit the new entries before committing — a leaked credential must never be merged into the baseline. + +#### Bandit baseline + +The CI `static-analysis` job runs Bandit with `--baseline .github/bandit-baseline.json` (HIGH severity, blocking) plus a second informational pass at MEDIUM severity (`continue-on-error`, surfaced in the job summary). If a HIGH finding is intentional, audit it carefully, add an explicit `# nosec` with justification, and only then add it to the baseline. Growing the baseline is gated: the `check_bandit_baseline.py` script fails the PR unless it carries the `security-baseline-change` label, so reviewers see the whitelist expansion. + +#### Shell scripts + +```bash +shellcheck --severity=error scripts/bash/*.sh +``` + +The CI `lint.yml` `shellcheck` job blocks at `--severity=error` to catch real bugs while leaving stylistic warnings (SC2155 etc.) advisory. + ### Manual testing #### Testing setup From 7e547942ee0cc7e722f1d914349ac2b8fd094bdf Mon Sep 17 00:00:00 2001 From: Pascal Date: Fri, 15 May 2026 10:37:14 +0200 Subject: [PATCH 13/30] ci(security): apply self-review follow-ups MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Four hardening / robustness items raised during self-review of the PR. - check_bandit_baseline.py: normalize whitespace in the code-snippet hash that's part of each entry's identity. A bandit version bump that reformats the snippet (different number of context lines, different indentation) would otherwise make every baseline entry look "new", forcing the security-baseline-change label on every unrelated PR. - security.yml + check_secrets_baseline.py: symmetric growth gate on .secrets.baseline. detect-secrets-hook already blocks unknown secrets, but extending the baseline (whitelisting a new finding) was silent. Mirror the bandit gate — PR must carry secrets-baseline-change to acknowledge any new identity (filename + line + type + hashed_secret). - test_security_workflow.py: drop the brittle exact-name lookup for the blocking bandit step. The test now finds it by the baseline-arg signature, so future renames of the step don't silently bypass the --skip B602 check. Added _find_step_by_run_signature helper that insists on exactly one match. Strict assertions on OS matrix and tool version pins are kept — those are intentional security choices. - workflows/PUBLISHING.md: the shell-step NOTE in src/specify_cli/workflows/steps/shell/__init__.py points authors here for "security guidance", but the section didn't exist. Added an explicit "Security: shell steps execute arbitrary code" subsection covering the no-sandbox model, the inspect-before-install obligation, input-interpolation hygiene, and reviewer expectations. --- .github/scripts/check_bandit_baseline.py | 13 +- .github/scripts/check_secrets_baseline.py | 137 ++++++++++++++++++++++ .github/workflows/security.yml | 14 +++ tests/test_security_workflow.py | 33 +++++- workflows/PUBLISHING.md | 11 ++ 5 files changed, 202 insertions(+), 6 deletions(-) create mode 100644 .github/scripts/check_secrets_baseline.py diff --git a/.github/scripts/check_bandit_baseline.py b/.github/scripts/check_bandit_baseline.py index 7d9601c19f..0823700152 100644 --- a/.github/scripts/check_bandit_baseline.py +++ b/.github/scripts/check_bandit_baseline.py @@ -29,6 +29,7 @@ import hashlib import json import os +import re import subprocess import sys from pathlib import Path @@ -60,16 +61,20 @@ def _read_baseline_at(ref: str) -> tuple[dict, bool]: return {"results": []}, True +_WHITESPACE_RE = re.compile(r"\s+") + + def _identity(result: dict) -> str: """Stable identity for a baseline entry. Combines location, test, severity, confidence, and a hash of the - pinned code snippet so reordering or formatting changes don't - register as new findings, but a different finding at the same line - does. + pinned code snippet (whitespace-normalized) so reformatting changes + or upstream bandit-output tweaks don't register as new findings, + but a different finding at the same line does. """ code = result.get("code", "") or "" - code_hash = hashlib.sha256(code.encode("utf-8")).hexdigest()[:16] + normalized = _WHITESPACE_RE.sub(" ", code).strip() + code_hash = hashlib.sha256(normalized.encode("utf-8")).hexdigest()[:16] return "|".join( [ str(result.get("filename", "")), diff --git a/.github/scripts/check_secrets_baseline.py b/.github/scripts/check_secrets_baseline.py new file mode 100644 index 0000000000..c172b045ff --- /dev/null +++ b/.github/scripts/check_secrets_baseline.py @@ -0,0 +1,137 @@ +"""Fail if new entries appear in the detect-secrets baseline without ack. + +Mirrors ``check_bandit_baseline.py``: when ``.secrets.baseline`` grows on +a PR, the maintainer adding the new whitelist entry must label the PR +``secrets-baseline-change`` so reviewers see the expansion. + +Identity is ``filename + line + type + hashed_secret`` — detect-secrets +already hashes the candidate, so identities are stable across runs and a +swap (remove one, add another with the same count) is still caught. + +When the baseline file does not exist at the base ref, the PR is the one +that introduces it; no acknowledgement is required. + +Required environment variables: +- ``SECRETS_BASELINE_BASE``: git ref of the PR base +- ``SECRETS_BASELINE_HEAD``: git ref of the PR head +- ``SECRETS_BASELINE_LABELS``: comma-separated PR labels + +Outside of PR events, all inputs may be empty and the script no-ops. +""" + +from __future__ import annotations + +import json +import os +import subprocess +import sys +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parents[2] +BASELINE_PATH = ".secrets.baseline" +ACK_LABEL = "secrets-baseline-change" + + +def _read_baseline_at(ref: str) -> tuple[dict, bool]: + """Return (baseline_json, file_existed_at_ref).""" + if not ref: + return {"results": {}}, False + try: + blob = subprocess.run( + ["git", "show", f"{ref}:{BASELINE_PATH}"], + check=True, + cwd=REPO_ROOT, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ).stdout + except subprocess.CalledProcessError: + return {"results": {}}, False + try: + return json.loads(blob), True + except json.JSONDecodeError: + print(f"Could not parse baseline at {ref}; treating as empty.", file=sys.stderr) + return {"results": {}}, True + + +def _identities(baseline: dict) -> set[str]: + """Flatten detect-secrets results to a set of stable identities.""" + ids: set[str] = set() + results = baseline.get("results", {}) + if not isinstance(results, dict): + return ids + for filename, entries in results.items(): + if not isinstance(entries, list): + continue + for entry in entries: + if not isinstance(entry, dict): + continue + ids.add( + "|".join( + [ + str(filename), + str(entry.get("line_number", "")), + str(entry.get("type", "")), + str(entry.get("hashed_secret", "")), + ] + ) + ) + return ids + + +def main() -> int: + base_ref = os.environ.get("SECRETS_BASELINE_BASE", "").strip() + head_ref = os.environ.get("SECRETS_BASELINE_HEAD", "").strip() or "HEAD" + + if not base_ref or set(base_ref) <= {"0"}: + print("No PR base ref; secrets baseline diff check skipped.") + return 0 + + base_baseline, base_existed = _read_baseline_at(base_ref) + head_baseline, _ = _read_baseline_at(head_ref) + + if not base_existed: + print( + "Baseline file not present at base ref; treating this PR as the " + "introduction of the baseline. No acknowledgement required." + ) + return 0 + + base_ids = _identities(base_baseline) + head_ids = _identities(head_baseline) + + new_ids = head_ids - base_ids + if not new_ids: + print( + f"Secrets baseline entries: {len(base_ids)} -> {len(head_ids)} " + f"(no new identities)." + ) + return 0 + + labels = { + label.strip() + for label in os.environ.get("SECRETS_BASELINE_LABELS", "").split(",") + if label.strip() + } + if ACK_LABEL in labels: + print( + f"Secrets baseline gained {len(new_ids)} new identities; " + f"acknowledged via label '{ACK_LABEL}'." + ) + return 0 + + print( + f"Secrets baseline gained {len(new_ids)} new identities. " + f"Audit the new entries — if they are genuine false positives " + f"(SHA pins, docs examples, test fixtures), add label " + f"'{ACK_LABEL}' to the PR to acknowledge them. If any are real " + f"secrets, remove them from history before merging.", + file=sys.stderr, + ) + for identity in sorted(new_ids): + print(f" + {identity}", file=sys.stderr) + return 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml index b592605148..92c7c32f8c 100644 --- a/.github/workflows/security.yml +++ b/.github/workflows/security.yml @@ -123,6 +123,9 @@ jobs: steps: - name: Checkout uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + # Needed by check_secrets_baseline.py to read the baseline at base ref. + fetch-depth: 0 - name: Install uv uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0 @@ -145,3 +148,14 @@ jobs: ':!:.github/security-audit-requirements.txt' \ | xargs -0 uvx --from detect-secrets==1.5.0 detect-secrets-hook \ --baseline .secrets.baseline + + # Symmetric with the bandit baseline gate: if .secrets.baseline grew, + # the PR must carry the 'secrets-baseline-change' label so reviewers + # see the whitelist expansion explicitly. + - name: Check secrets baseline growth + if: ${{ github.event_name == 'pull_request' }} + env: + SECRETS_BASELINE_BASE: ${{ github.event.pull_request.base.sha }} + SECRETS_BASELINE_HEAD: ${{ github.event.pull_request.head.sha }} + SECRETS_BASELINE_LABELS: ${{ join(github.event.pull_request.labels.*.name, ',') }} + run: python .github/scripts/check_secrets_baseline.py diff --git a/tests/test_security_workflow.py b/tests/test_security_workflow.py index e02e01eff7..1b42920003 100644 --- a/tests/test_security_workflow.py +++ b/tests/test_security_workflow.py @@ -72,6 +72,31 @@ def _step_run(job_name: str, step_name: str) -> str: return _step(job_name, step_name)["run"] +def _find_step_by_run_signature(job_name: str, marker: str) -> dict: + """Locate a step in *job_name* whose ``run`` command contains *marker*. + + Step naming is incidental to behavior; tests that assert on what a + step *does* should look it up by what it runs, not by its label, so + renames don't silently make the assertion skip. + """ + workflow = _load_security_workflow() + matches = [ + step + for step in workflow["jobs"][job_name]["steps"] + if marker in (step.get("run") or "") + ] + if not matches: + raise AssertionError( + f"No step in job {job_name!r} runs a command containing {marker!r}." + ) + if len(matches) > 1: + raise AssertionError( + f"Marker {marker!r} matched {len(matches)} steps in job " + f"{job_name!r}; expected exactly one." + ) + return matches[0] + + def _load_sync_script(): spec = importlib.util.spec_from_file_location( "check_security_requirements", @@ -185,13 +210,17 @@ def test_actions_are_pinned_to_full_commit_shas(self): assert re.search(r"@v\d+", uses_ref) is None def test_bandit_does_not_globally_skip_b602(self): - run = _step_run("static-analysis", "Run Bandit") + # Identify the blocking bandit step by its baseline-arg rather than + # by exact step name — name is incidental, behavior is what matters. + bandit_step = _find_step_by_run_signature( + "static-analysis", "--baseline .github/bandit-baseline.json" + ) + run = bandit_step["run"] workflow_text = SECURITY_WORKFLOW.read_text(encoding="utf-8") assert run == BANDIT assert "--skip" not in run assert "--skip B602" not in workflow_text - assert "--baseline .github/bandit-baseline.json" in run def test_bandit_baseline_only_ignores_shell_step_b602(self): baseline = json.loads(BANDIT_BASELINE.read_text(encoding="utf-8")) diff --git a/workflows/PUBLISHING.md b/workflows/PUBLISHING.md index ce0d251826..e10f814e6a 100644 --- a/workflows/PUBLISHING.md +++ b/workflows/PUBLISHING.md @@ -272,6 +272,17 @@ When releasing a new version: - **Quote variables** — use proper quoting in shell commands to handle spaces - **Check exit codes** — shell step failures stop the workflow; make sure commands are robust +#### Security: shell steps execute arbitrary code + +Workflow `shell` steps execute their `run` field through `/bin/sh` (POSIX) or the platform shell. There is no sandbox between the step and the user's machine: a malicious or buggy `run` block can read environment variables, modify files outside the project, exfiltrate data, or escalate privileges. + +Catalog-listed workflows are reviewed at submission time (see [Verification Process](#verification-process)), but you should still treat every install as code-execution from an untrusted source until you have read the `workflow.yml`: + +- **Before installing a workflow**, run `specify workflow inspect ` (or read the YAML directly) and audit every `shell` step's `run` field. +- **Prefer explicit commands over interpolation** in `run` blocks: `{{ inputs.something }}` substitutions should be quoted and constrained via `enum` so a malicious input can't inject shell syntax. +- **Limit privilege**: shell steps inherit the user's environment. Workflows that need elevated access (sudo, secrets, GitHub tokens) should call them out explicitly in the README so reviewers can spot the requirement. +- **Authors**: if your workflow has shell steps that look risky out of context (deletions, network calls, credential reads), document the rationale in your README. Maintainers will reject submissions whose shell steps can't be justified at review time. + ### Integration Flexibility - **Set `integration` at workflow level** — use the `workflow.integration` field as the default From 2fd80710db57b18ccf4722df480fffcb5ce14f6f Mon Sep 17 00:00:00 2001 From: Pascal Date: Sat, 16 May 2026 00:05:27 +0200 Subject: [PATCH 14/30] ci(security): apply review #2 follow-ups MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two items from the second self-review: - workflows/PUBLISHING.md: fix invented command name. The first draft recommended `specify workflow inspect ` which doesn't exist — the actual subcommand is `workflow info`, and even that only shows metadata (name/version/inputs/step IDs+types), never the shell `run` content. Replace with explicit guidance to read the raw workflow.yml directly when auditing shell steps. - tests/test_baseline_gates.py: new file. 12 unit tests covering both check_bandit_baseline.py and check_secrets_baseline.py — no PR base ref, introduction (baseline absent at base), identical baselines, growth without ack label, growth with ack label, swap attack (constant count, new identity), and (bandit-only) whitespace-only drift in the code snippet hash. The latter verifies the normalization added earlier protects against bandit reformatting its output. --- tests/test_baseline_gates.py | 339 +++++++++++++++++++++++++++++++++++ workflows/PUBLISHING.md | 2 +- 2 files changed, 340 insertions(+), 1 deletion(-) create mode 100644 tests/test_baseline_gates.py diff --git a/tests/test_baseline_gates.py b/tests/test_baseline_gates.py new file mode 100644 index 0000000000..49fd5ea1d4 --- /dev/null +++ b/tests/test_baseline_gates.py @@ -0,0 +1,339 @@ +"""Tests for the bandit and detect-secrets baseline growth gate scripts. + +Both scripts share the same shape: read the baseline at a base ref and a +head ref, compare *identities* (not counts) so a swap doesn't slip +through, and require an acknowledgement label when the head set is a +strict superset. + +We drive the scripts as subprocesses against a throwaway git repo so the +``git show :`` calls inside them resolve real refs. +""" + +from __future__ import annotations + +import json +import subprocess +import sys +from pathlib import Path + +import pytest + + +REPO_ROOT = Path(__file__).resolve().parent.parent +BANDIT_SCRIPT = REPO_ROOT / ".github" / "scripts" / "check_bandit_baseline.py" +SECRETS_SCRIPT = REPO_ROOT / ".github" / "scripts" / "check_secrets_baseline.py" + + +def _git(repo: Path, *args: str) -> str: + return subprocess.run( + ["git", *args], + cwd=repo, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ).stdout.strip() + + +def _init_repo(tmp_path: Path) -> Path: + repo = tmp_path / "repo" + repo.mkdir() + _git(repo, "init", "-q", "-b", "main") + _git(repo, "config", "user.email", "test@example.com") + _git(repo, "config", "user.name", "Test") + # Mirror the layout the scripts expect: REPO_ROOT/.github/... + (repo / ".github").mkdir() + (repo / ".github" / "scripts").mkdir() + # Copy the script under test into the repo so REPO_ROOT inside the + # script (resolve().parents[2]) points at our throwaway repo. + return repo + + +def _install_script(repo: Path, source: Path) -> Path: + target = repo / ".github" / "scripts" / source.name + target.write_text(source.read_text(encoding="utf-8"), encoding="utf-8") + return target + + +def _commit_baseline(repo: Path, baseline_path: str, payload: dict, message: str) -> str: + target = repo / baseline_path + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text(json.dumps(payload, indent=2), encoding="utf-8") + _git(repo, "add", baseline_path) + _git(repo, "commit", "-q", "-m", message) + return _git(repo, "rev-parse", "HEAD") + + +def _run_script(repo: Path, script: Path, env_overrides: dict[str, str]): + env = { + "PATH": "/usr/bin:/bin", + "HOME": str(repo), # avoid leaking host gitconfig + **env_overrides, + } + return subprocess.run( + [sys.executable, str(script)], + cwd=repo, + env=env, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + + +# --------------------------------------------------------------------------- +# Bandit baseline gate +# --------------------------------------------------------------------------- + + +def _bandit_entry(filename: str, line: int, test_id: str = "B602", code: str = "shell=True") -> dict: + return { + "filename": filename, + "line_number": line, + "test_id": test_id, + "issue_severity": "HIGH", + "issue_confidence": "HIGH", + "code": code, + } + + +class TestBanditBaselineGate: + @pytest.fixture + def repo(self, tmp_path): + repo = _init_repo(tmp_path) + _install_script(repo, BANDIT_SCRIPT) + return repo + + def _run(self, repo, base, head, labels=""): + return _run_script( + repo, + repo / ".github" / "scripts" / BANDIT_SCRIPT.name, + { + "BANDIT_BASELINE_BASE": base, + "BANDIT_BASELINE_HEAD": head, + "BANDIT_BASELINE_LABELS": labels, + }, + ) + + def test_no_base_ref_is_skipped(self, repo): + # Need at least one commit so HEAD resolves. + _commit_baseline(repo, ".github/bandit-baseline.json", {"results": []}, "init") + result = self._run(repo, base="", head="HEAD") + assert result.returncode == 0 + assert "baseline diff check skipped" in result.stdout + + def test_introduction_pr_skips_check(self, repo): + _git(repo, "commit", "--allow-empty", "-q", "-m", "before baseline") + base_sha = _git(repo, "rev-parse", "HEAD") + head_sha = _commit_baseline( + repo, + ".github/bandit-baseline.json", + {"results": [_bandit_entry("a.py", 10)]}, + "introduce baseline", + ) + result = self._run(repo, base=base_sha, head=head_sha) + assert result.returncode == 0, result.stderr + assert "introduction of the baseline" in result.stdout + + def test_identical_baselines_pass(self, repo): + entries = [_bandit_entry("a.py", 10)] + base_sha = _commit_baseline(repo, ".github/bandit-baseline.json", {"results": entries}, "base") + # No changes; head == base. + result = self._run(repo, base=base_sha, head=base_sha) + assert result.returncode == 0 + assert "no new identities" in result.stdout + + def test_growth_without_label_fails(self, repo): + base_sha = _commit_baseline( + repo, + ".github/bandit-baseline.json", + {"results": [_bandit_entry("a.py", 10)]}, + "base", + ) + head_sha = _commit_baseline( + repo, + ".github/bandit-baseline.json", + {"results": [_bandit_entry("a.py", 10), _bandit_entry("b.py", 20)]}, + "grow", + ) + result = self._run(repo, base=base_sha, head=head_sha) + assert result.returncode == 1 + assert "'security-baseline-change'" in result.stderr + + def test_growth_with_label_passes(self, repo): + base_sha = _commit_baseline( + repo, + ".github/bandit-baseline.json", + {"results": [_bandit_entry("a.py", 10)]}, + "base", + ) + head_sha = _commit_baseline( + repo, + ".github/bandit-baseline.json", + {"results": [_bandit_entry("a.py", 10), _bandit_entry("b.py", 20)]}, + "grow", + ) + result = self._run(repo, base=base_sha, head=head_sha, labels="security-baseline-change") + assert result.returncode == 0 + assert "acknowledged via label" in result.stdout + + def test_swap_attack_detected(self, repo): + """Removing one entry and adding a different one keeps the count + constant but introduces a new identity; gate must still fire.""" + base_sha = _commit_baseline( + repo, + ".github/bandit-baseline.json", + {"results": [_bandit_entry("a.py", 10)]}, + "base", + ) + head_sha = _commit_baseline( + repo, + ".github/bandit-baseline.json", + {"results": [_bandit_entry("b.py", 20)]}, # swapped, same count + "swap", + ) + result = self._run(repo, base=base_sha, head=head_sha) + assert result.returncode == 1, "swap should be detected via identity diff" + assert "1 new identities" in result.stderr + + def test_whitespace_only_change_does_not_trip(self, repo): + """A bandit version bump that reformats the code snippet (different + whitespace) shouldn't make every entry look new.""" + base_sha = _commit_baseline( + repo, + ".github/bandit-baseline.json", + {"results": [_bandit_entry("a.py", 10, code="shell=True\n capture_output=True")]}, + "base", + ) + head_sha = _commit_baseline( + repo, + ".github/bandit-baseline.json", + { + "results": [ + _bandit_entry("a.py", 10, code="shell=True\ncapture_output=True") + ] + }, + "reformatted snippet", + ) + result = self._run(repo, base=base_sha, head=head_sha) + assert result.returncode == 0, result.stderr + + +# --------------------------------------------------------------------------- +# Secrets baseline gate +# --------------------------------------------------------------------------- + + +def _secrets_baseline(*entries: tuple[str, int, str, str]) -> dict: + """Build a detect-secrets-style baseline from (file, line, type, hash) tuples.""" + results: dict[str, list[dict]] = {} + for filename, line, secret_type, hashed in entries: + results.setdefault(filename, []).append( + { + "type": secret_type, + "filename": filename, + "hashed_secret": hashed, + "is_verified": False, + "line_number": line, + } + ) + return {"version": "1.5.0", "results": results} + + +class TestSecretsBaselineGate: + @pytest.fixture + def repo(self, tmp_path): + repo = _init_repo(tmp_path) + _install_script(repo, SECRETS_SCRIPT) + return repo + + def _run(self, repo, base, head, labels=""): + return _run_script( + repo, + repo / ".github" / "scripts" / SECRETS_SCRIPT.name, + { + "SECRETS_BASELINE_BASE": base, + "SECRETS_BASELINE_HEAD": head, + "SECRETS_BASELINE_LABELS": labels, + }, + ) + + def test_introduction_pr_skips_check(self, repo): + _git(repo, "commit", "--allow-empty", "-q", "-m", "before baseline") + base_sha = _git(repo, "rev-parse", "HEAD") + head_sha = _commit_baseline( + repo, + ".secrets.baseline", + _secrets_baseline(("a.py", 1, "Secret Keyword", "abc123")), + "introduce", + ) + result = self._run(repo, base=base_sha, head=head_sha) + assert result.returncode == 0, result.stderr + assert "introduction of the baseline" in result.stdout + + def test_growth_without_label_fails(self, repo): + base_sha = _commit_baseline( + repo, + ".secrets.baseline", + _secrets_baseline(("a.py", 1, "Secret Keyword", "abc")), + "base", + ) + head_sha = _commit_baseline( + repo, + ".secrets.baseline", + _secrets_baseline( + ("a.py", 1, "Secret Keyword", "abc"), + ("b.py", 2, "Secret Keyword", "def"), + ), + "grow", + ) + result = self._run(repo, base=base_sha, head=head_sha) + assert result.returncode == 1 + assert "'secrets-baseline-change'" in result.stderr + + def test_growth_with_label_passes(self, repo): + base_sha = _commit_baseline( + repo, + ".secrets.baseline", + _secrets_baseline(("a.py", 1, "Secret Keyword", "abc")), + "base", + ) + head_sha = _commit_baseline( + repo, + ".secrets.baseline", + _secrets_baseline( + ("a.py", 1, "Secret Keyword", "abc"), + ("b.py", 2, "Secret Keyword", "def"), + ), + "grow", + ) + result = self._run( + repo, base=base_sha, head=head_sha, labels="secrets-baseline-change" + ) + assert result.returncode == 0, result.stderr + assert "acknowledged via label" in result.stdout + + def test_swap_attack_detected(self, repo): + base_sha = _commit_baseline( + repo, + ".secrets.baseline", + _secrets_baseline(("a.py", 1, "Secret Keyword", "abc")), + "base", + ) + head_sha = _commit_baseline( + repo, + ".secrets.baseline", + _secrets_baseline(("b.py", 2, "Secret Keyword", "def")), + "swap", + ) + result = self._run(repo, base=base_sha, head=head_sha) + assert result.returncode == 1 + assert "1 new identities" in result.stderr + + def test_identical_baselines_pass(self, repo): + entries = (("a.py", 1, "Secret Keyword", "abc"),) + base_sha = _commit_baseline( + repo, ".secrets.baseline", _secrets_baseline(*entries), "base" + ) + result = self._run(repo, base=base_sha, head=base_sha) + assert result.returncode == 0 + assert "no new identities" in result.stdout diff --git a/workflows/PUBLISHING.md b/workflows/PUBLISHING.md index e10f814e6a..0370ed09f9 100644 --- a/workflows/PUBLISHING.md +++ b/workflows/PUBLISHING.md @@ -278,7 +278,7 @@ Workflow `shell` steps execute their `run` field through `/bin/sh` (POSIX) or th Catalog-listed workflows are reviewed at submission time (see [Verification Process](#verification-process)), but you should still treat every install as code-execution from an untrusted source until you have read the `workflow.yml`: -- **Before installing a workflow**, run `specify workflow inspect ` (or read the YAML directly) and audit every `shell` step's `run` field. +- **Before installing a workflow**, fetch the raw YAML and audit every `shell` step's `run` field directly. `specify workflow info ` only shows metadata (name, version, inputs, step IDs/types) — not the shell content that would actually execute. - **Prefer explicit commands over interpolation** in `run` blocks: `{{ inputs.something }}` substitutions should be quoted and constrained via `enum` so a malicious input can't inject shell syntax. - **Limit privilege**: shell steps inherit the user's environment. Workflows that need elevated access (sudo, secrets, GitHub tokens) should call them out explicitly in the README so reviewers can spot the requirement. - **Authors**: if your workflow has shell steps that look risky out of context (deletions, network calls, credential reads), document the rationale in your README. Maintainers will reject submissions whose shell steps can't be justified at review time. From 26bd14a1b2cad1e70616c746db1ad18dc67a68aa Mon Sep 17 00:00:00 2001 From: Pascal Date: Sat, 16 May 2026 00:17:19 +0200 Subject: [PATCH 15/30] ci(security): apply review #3 follow-ups MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three polish items from the third self-review pass. - tests/test_upgrade.py: new TestBoundedRead class. Pins the contract that _fetch_latest_release_tag wraps the response body through read_response_limited with max_bytes=1024*1024. Protects the hardening against a silent revert to `resp.read()` in a future refactor (the extraction to _version.py during the last merge would have lost it if we hadn't caught it manually). - tests/test_baseline_gates.py: replace the two near-identical test classes with a parametrized TestSharedBaselineGate (bandit + secrets via a GateConfig dataclass and a `gate` fixture). Bandit-only quirks (no-base-ref short-circuit, whitespace-normalized identity) stay in TestBanditSpecific. Removes ~80 lines of duplication; the two scripts now exercise the same scenarios by construction so a divergence is caught the moment one drifts. - tests/test_baseline_gates.py: new shared scenario test_corrupt_json_at_base_falls_back_to_empty. Covers the except JSONDecodeError branch of _read_baseline_at — corrupt base doesn't crash the script; instead the head set becomes "all new" and the normal label gate fires. Was previously dead code from a coverage standpoint. 3009 passed (up from 3006 — 14 baseline tests now parametrized as 12 + 2 bandit-specific, plus 1 new bounded-read test). --- tests/test_baseline_gates.py | 428 +++++++++++++++++------------------ tests/test_upgrade.py | 37 +++ 2 files changed, 245 insertions(+), 220 deletions(-) diff --git a/tests/test_baseline_gates.py b/tests/test_baseline_gates.py index 49fd5ea1d4..72adf2522e 100644 --- a/tests/test_baseline_gates.py +++ b/tests/test_baseline_gates.py @@ -5,6 +5,11 @@ through, and require an acknowledgement label when the head set is a strict superset. +Shared cases (introduction, identical, growth±label, swap, corrupt-JSON +fallback) are parametrized across both scripts via the ``gate`` fixture. +Bandit-only quirks (no-base-ref, whitespace normalization) live in +``TestBanditSpecific``. + We drive the scripts as subprocesses against a throwaway git repo so the ``git show :`` calls inside them resolve real refs. """ @@ -14,7 +19,9 @@ import json import subprocess import sys +from dataclasses import dataclass from pathlib import Path +from typing import Callable import pytest @@ -41,11 +48,8 @@ def _init_repo(tmp_path: Path) -> Path: _git(repo, "init", "-q", "-b", "main") _git(repo, "config", "user.email", "test@example.com") _git(repo, "config", "user.name", "Test") - # Mirror the layout the scripts expect: REPO_ROOT/.github/... (repo / ".github").mkdir() (repo / ".github" / "scripts").mkdir() - # Copy the script under test into the repo so REPO_ROOT inside the - # script (resolve().parents[2]) points at our throwaway repo. return repo @@ -55,19 +59,23 @@ def _install_script(repo: Path, source: Path) -> Path: return target -def _commit_baseline(repo: Path, baseline_path: str, payload: dict, message: str) -> str: - target = repo / baseline_path +def _commit_file(repo: Path, rel_path: str, content: str, message: str) -> str: + target = repo / rel_path target.parent.mkdir(parents=True, exist_ok=True) - target.write_text(json.dumps(payload, indent=2), encoding="utf-8") - _git(repo, "add", baseline_path) + target.write_text(content, encoding="utf-8") + _git(repo, "add", rel_path) _git(repo, "commit", "-q", "-m", message) return _git(repo, "rev-parse", "HEAD") +def _commit_baseline(repo: Path, baseline_path: str, payload: dict, message: str) -> str: + return _commit_file(repo, baseline_path, json.dumps(payload, indent=2), message) + + def _run_script(repo: Path, script: Path, env_overrides: dict[str, str]): env = { "PATH": "/usr/bin:/bin", - "HOME": str(repo), # avoid leaking host gitconfig + "HOME": str(repo), **env_overrides, } return subprocess.run( @@ -81,259 +89,239 @@ def _run_script(repo: Path, script: Path, env_overrides: dict[str, str]): # --------------------------------------------------------------------------- -# Bandit baseline gate +# Parametrization machinery # --------------------------------------------------------------------------- -def _bandit_entry(filename: str, line: int, test_id: str = "B602", code: str = "shell=True") -> dict: +def _bandit_baseline(entries: list[tuple[str, int]]) -> dict: + """Build a bandit-style baseline from (filename, line) tuples.""" return { - "filename": filename, - "line_number": line, - "test_id": test_id, - "issue_severity": "HIGH", - "issue_confidence": "HIGH", - "code": code, + "results": [ + { + "filename": filename, + "line_number": line, + "test_id": "B602", + "issue_severity": "HIGH", + "issue_confidence": "HIGH", + "code": f"shell=True at {filename}:{line}", + } + for filename, line in entries + ] } -class TestBanditBaselineGate: - @pytest.fixture - def repo(self, tmp_path): - repo = _init_repo(tmp_path) - _install_script(repo, BANDIT_SCRIPT) - return repo +def _secrets_baseline(entries: list[tuple[str, int]]) -> dict: + """Build a detect-secrets-style baseline from (filename, line) tuples.""" + results: dict[str, list[dict]] = {} + for filename, line in entries: + results.setdefault(filename, []).append( + { + "type": "Secret Keyword", + "filename": filename, + # The hash is part of the identity, so make it unique per (file, line). + "hashed_secret": f"h_{filename}_{line}", + "is_verified": False, + "line_number": line, + } + ) + return {"version": "1.5.0", "results": results} + + +@dataclass +class GateConfig: + name: str + script: Path + env_prefix: str + baseline_path: str + label: str + make_baseline: Callable[[list[tuple[str, int]]], dict] + + +BANDIT_GATE = GateConfig( + name="bandit", + script=BANDIT_SCRIPT, + env_prefix="BANDIT_BASELINE", + baseline_path=".github/bandit-baseline.json", + label="security-baseline-change", + make_baseline=_bandit_baseline, +) + + +SECRETS_GATE = GateConfig( + name="secrets", + script=SECRETS_SCRIPT, + env_prefix="SECRETS_BASELINE", + baseline_path=".secrets.baseline", + label="secrets-baseline-change", + make_baseline=_secrets_baseline, +) + + +@dataclass +class GateHandle: + """Live test harness: a repo with the script installed and helpers.""" + + config: GateConfig + repo: Path + + def commit(self, entries: list[tuple[str, int]], message: str) -> str: + return _commit_baseline( + self.repo, + self.config.baseline_path, + self.config.make_baseline(entries), + message, + ) + + def commit_raw(self, raw_content: str, message: str) -> str: + return _commit_file(self.repo, self.config.baseline_path, raw_content, message) - def _run(self, repo, base, head, labels=""): + def run(self, *, base: str, head: str, labels: str = ""): return _run_script( - repo, - repo / ".github" / "scripts" / BANDIT_SCRIPT.name, + self.repo, + self.repo / ".github" / "scripts" / self.config.script.name, { - "BANDIT_BASELINE_BASE": base, - "BANDIT_BASELINE_HEAD": head, - "BANDIT_BASELINE_LABELS": labels, + f"{self.config.env_prefix}_BASE": base, + f"{self.config.env_prefix}_HEAD": head, + f"{self.config.env_prefix}_LABELS": labels, }, ) - def test_no_base_ref_is_skipped(self, repo): - # Need at least one commit so HEAD resolves. - _commit_baseline(repo, ".github/bandit-baseline.json", {"results": []}, "init") - result = self._run(repo, base="", head="HEAD") - assert result.returncode == 0 - assert "baseline diff check skipped" in result.stdout - def test_introduction_pr_skips_check(self, repo): - _git(repo, "commit", "--allow-empty", "-q", "-m", "before baseline") - base_sha = _git(repo, "rev-parse", "HEAD") - head_sha = _commit_baseline( - repo, - ".github/bandit-baseline.json", - {"results": [_bandit_entry("a.py", 10)]}, - "introduce baseline", - ) - result = self._run(repo, base=base_sha, head=head_sha) +@pytest.fixture(params=[BANDIT_GATE, SECRETS_GATE], ids=lambda c: c.name) +def gate(request, tmp_path) -> GateHandle: + config: GateConfig = request.param + repo = _init_repo(tmp_path) + _install_script(repo, config.script) + return GateHandle(config=config, repo=repo) + + +# --------------------------------------------------------------------------- +# Shared scenarios (parametrized across both scripts) +# --------------------------------------------------------------------------- + + +class TestSharedBaselineGate: + """Scenarios that must hold for both the bandit and secrets gates.""" + + def test_introduction_pr_skips_check(self, gate: GateHandle): + # Baseline file did not exist at base ref → no acknowledgement needed. + _git(gate.repo, "commit", "--allow-empty", "-q", "-m", "before baseline") + base_sha = _git(gate.repo, "rev-parse", "HEAD") + head_sha = gate.commit([("a.py", 10)], "introduce baseline") + + result = gate.run(base=base_sha, head=head_sha) + assert result.returncode == 0, result.stderr assert "introduction of the baseline" in result.stdout - def test_identical_baselines_pass(self, repo): - entries = [_bandit_entry("a.py", 10)] - base_sha = _commit_baseline(repo, ".github/bandit-baseline.json", {"results": entries}, "base") - # No changes; head == base. - result = self._run(repo, base=base_sha, head=base_sha) + def test_identical_baselines_pass(self, gate: GateHandle): + base_sha = gate.commit([("a.py", 10)], "base") + result = gate.run(base=base_sha, head=base_sha) assert result.returncode == 0 assert "no new identities" in result.stdout - def test_growth_without_label_fails(self, repo): - base_sha = _commit_baseline( - repo, - ".github/bandit-baseline.json", - {"results": [_bandit_entry("a.py", 10)]}, - "base", - ) - head_sha = _commit_baseline( - repo, - ".github/bandit-baseline.json", - {"results": [_bandit_entry("a.py", 10), _bandit_entry("b.py", 20)]}, - "grow", - ) - result = self._run(repo, base=base_sha, head=head_sha) + def test_growth_without_label_fails(self, gate: GateHandle): + base_sha = gate.commit([("a.py", 10)], "base") + head_sha = gate.commit([("a.py", 10), ("b.py", 20)], "grow") + + result = gate.run(base=base_sha, head=head_sha) + assert result.returncode == 1 - assert "'security-baseline-change'" in result.stderr + assert f"'{gate.config.label}'" in result.stderr - def test_growth_with_label_passes(self, repo): - base_sha = _commit_baseline( - repo, - ".github/bandit-baseline.json", - {"results": [_bandit_entry("a.py", 10)]}, - "base", - ) - head_sha = _commit_baseline( - repo, - ".github/bandit-baseline.json", - {"results": [_bandit_entry("a.py", 10), _bandit_entry("b.py", 20)]}, - "grow", - ) - result = self._run(repo, base=base_sha, head=head_sha, labels="security-baseline-change") - assert result.returncode == 0 + def test_growth_with_label_passes(self, gate: GateHandle): + base_sha = gate.commit([("a.py", 10)], "base") + head_sha = gate.commit([("a.py", 10), ("b.py", 20)], "grow") + + result = gate.run(base=base_sha, head=head_sha, labels=gate.config.label) + + assert result.returncode == 0, result.stderr assert "acknowledged via label" in result.stdout - def test_swap_attack_detected(self, repo): - """Removing one entry and adding a different one keeps the count - constant but introduces a new identity; gate must still fire.""" - base_sha = _commit_baseline( - repo, - ".github/bandit-baseline.json", - {"results": [_bandit_entry("a.py", 10)]}, - "base", - ) - head_sha = _commit_baseline( - repo, - ".github/bandit-baseline.json", - {"results": [_bandit_entry("b.py", 20)]}, # swapped, same count - "swap", - ) - result = self._run(repo, base=base_sha, head=head_sha) - assert result.returncode == 1, "swap should be detected via identity diff" + def test_swap_attack_detected(self, gate: GateHandle): + """Remove one entry and add a different one → constant count, but + a *new* identity appears. Gate must still fire.""" + base_sha = gate.commit([("a.py", 10)], "base") + head_sha = gate.commit([("b.py", 20)], "swap") # same count, different ID + + result = gate.run(base=base_sha, head=head_sha) + + assert result.returncode == 1, "identity diff must catch swaps" assert "1 new identities" in result.stderr - def test_whitespace_only_change_does_not_trip(self, repo): - """A bandit version bump that reformats the code snippet (different - whitespace) shouldn't make every entry look new.""" - base_sha = _commit_baseline( - repo, - ".github/bandit-baseline.json", - {"results": [_bandit_entry("a.py", 10, code="shell=True\n capture_output=True")]}, - "base", - ) - head_sha = _commit_baseline( - repo, - ".github/bandit-baseline.json", - { - "results": [ - _bandit_entry("a.py", 10, code="shell=True\ncapture_output=True") - ] - }, - "reformatted snippet", - ) - result = self._run(repo, base=base_sha, head=head_sha) - assert result.returncode == 0, result.stderr + def test_corrupt_json_at_base_falls_back_to_empty(self, gate: GateHandle): + """If the baseline at the base ref is unparseable JSON, treat its + contents as empty so the script still completes (the head set + becomes 'all new' and the label gate fires).""" + base_sha = gate.commit_raw("{ invalid json", "corrupt base") + head_sha = gate.commit([("a.py", 10)], "valid head") + + result = gate.run(base=base_sha, head=head_sha) + + assert result.returncode == 1, "corrupt base should not crash the script" + assert f"'{gate.config.label}'" in result.stderr + assert "Could not parse baseline" in result.stderr # --------------------------------------------------------------------------- -# Secrets baseline gate +# Bandit-only scenarios # --------------------------------------------------------------------------- -def _secrets_baseline(*entries: tuple[str, int, str, str]) -> dict: - """Build a detect-secrets-style baseline from (file, line, type, hash) tuples.""" - results: dict[str, list[dict]] = {} - for filename, line, secret_type, hashed in entries: - results.setdefault(filename, []).append( - { - "type": secret_type, - "filename": filename, - "hashed_secret": hashed, - "is_verified": False, - "line_number": line, - } - ) - return {"version": "1.5.0", "results": results} - +class TestBanditSpecific: + """Cases that only exist for the bandit gate.""" -class TestSecretsBaselineGate: @pytest.fixture - def repo(self, tmp_path): + def gate(self, tmp_path) -> GateHandle: repo = _init_repo(tmp_path) - _install_script(repo, SECRETS_SCRIPT) - return repo - - def _run(self, repo, base, head, labels=""): - return _run_script( - repo, - repo / ".github" / "scripts" / SECRETS_SCRIPT.name, - { - "SECRETS_BASELINE_BASE": base, - "SECRETS_BASELINE_HEAD": head, - "SECRETS_BASELINE_LABELS": labels, - }, - ) - - def test_introduction_pr_skips_check(self, repo): - _git(repo, "commit", "--allow-empty", "-q", "-m", "before baseline") - base_sha = _git(repo, "rev-parse", "HEAD") - head_sha = _commit_baseline( - repo, - ".secrets.baseline", - _secrets_baseline(("a.py", 1, "Secret Keyword", "abc123")), - "introduce", - ) - result = self._run(repo, base=base_sha, head=head_sha) - assert result.returncode == 0, result.stderr - assert "introduction of the baseline" in result.stdout + _install_script(repo, BANDIT_SCRIPT) + return GateHandle(config=BANDIT_GATE, repo=repo) - def test_growth_without_label_fails(self, repo): - base_sha = _commit_baseline( - repo, - ".secrets.baseline", - _secrets_baseline(("a.py", 1, "Secret Keyword", "abc")), - "base", - ) - head_sha = _commit_baseline( - repo, - ".secrets.baseline", - _secrets_baseline( - ("a.py", 1, "Secret Keyword", "abc"), - ("b.py", 2, "Secret Keyword", "def"), - ), - "grow", - ) - result = self._run(repo, base=base_sha, head=head_sha) - assert result.returncode == 1 - assert "'secrets-baseline-change'" in result.stderr + def test_no_base_ref_is_skipped(self, gate: GateHandle): + gate.commit([], "init") # need at least one commit so HEAD resolves + result = gate.run(base="", head="HEAD") + assert result.returncode == 0 + assert "baseline diff check skipped" in result.stdout - def test_growth_with_label_passes(self, repo): + def test_whitespace_only_change_does_not_trip(self, gate: GateHandle): + """A bandit version bump that reformats the code snippet (different + whitespace) should not flag the same finding as new — that's the + purpose of the whitespace-normalized identity hash.""" base_sha = _commit_baseline( - repo, - ".secrets.baseline", - _secrets_baseline(("a.py", 1, "Secret Keyword", "abc")), + gate.repo, + gate.config.baseline_path, + { + "results": [ + { + "filename": "a.py", + "line_number": 10, + "test_id": "B602", + "issue_severity": "HIGH", + "issue_confidence": "HIGH", + "code": "shell=True\n capture_output=True", + } + ] + }, "base", ) head_sha = _commit_baseline( - repo, - ".secrets.baseline", - _secrets_baseline( - ("a.py", 1, "Secret Keyword", "abc"), - ("b.py", 2, "Secret Keyword", "def"), - ), - "grow", - ) - result = self._run( - repo, base=base_sha, head=head_sha, labels="secrets-baseline-change" + gate.repo, + gate.config.baseline_path, + { + "results": [ + { + "filename": "a.py", + "line_number": 10, + "test_id": "B602", + "issue_severity": "HIGH", + "issue_confidence": "HIGH", + "code": "shell=True\ncapture_output=True", # one less space + } + ] + }, + "reformatted snippet", ) - assert result.returncode == 0, result.stderr - assert "acknowledged via label" in result.stdout - def test_swap_attack_detected(self, repo): - base_sha = _commit_baseline( - repo, - ".secrets.baseline", - _secrets_baseline(("a.py", 1, "Secret Keyword", "abc")), - "base", - ) - head_sha = _commit_baseline( - repo, - ".secrets.baseline", - _secrets_baseline(("b.py", 2, "Secret Keyword", "def")), - "swap", - ) - result = self._run(repo, base=base_sha, head=head_sha) - assert result.returncode == 1 - assert "1 new identities" in result.stderr + result = gate.run(base=base_sha, head=head_sha) - def test_identical_baselines_pass(self, repo): - entries = (("a.py", 1, "Secret Keyword", "abc"),) - base_sha = _commit_baseline( - repo, ".secrets.baseline", _secrets_baseline(*entries), "base" - ) - result = self._run(repo, base=base_sha, head=base_sha) - assert result.returncode == 0 - assert "no new identities" in result.stdout + assert result.returncode == 0, result.stderr diff --git a/tests/test_upgrade.py b/tests/test_upgrade.py index 4da392c2c9..ee18d919d7 100644 --- a/tests/test_upgrade.py +++ b/tests/test_upgrade.py @@ -257,6 +257,43 @@ def test_generic_exception_propagates(self): _fetch_latest_release_tag() +class TestBoundedRead: + """Regression test for the read_response_limited hardening. + + A future refactor could silently revert `_fetch_latest_release_tag` to + `resp.read()` (the unbounded form) — this test pins the contract that + the response body is read through ``read_response_limited`` with a + bounded ``max_bytes``. + """ + + def test_response_body_is_bounded(self): + recorded: dict = {} + real_read = __import__( + "specify_cli._download_security", fromlist=["read_response_limited"] + ).read_response_limited + + def _spy(response, *, max_bytes=None, label=None, **kwargs): + recorded["max_bytes"] = max_bytes + recorded["label"] = label + # Forward to the real implementation so the function under test + # still gets a parseable body. + return real_read(response, max_bytes=max_bytes, label=label, **kwargs) + + with patch( + "specify_cli.authentication.http.urllib.request.urlopen", + return_value=_mock_urlopen_response({"tag_name": "v9.9.9"}), + ), patch("specify_cli._version.read_response_limited", side_effect=_spy): + tag, reason = _fetch_latest_release_tag() + + assert tag == "v9.9.9" + assert reason is None + # max_bytes is set by the caller; the exact value is a deliberate + # cap (1 MiB) for the GitHub release JSON. Don't accept None or + # the default — the caller must pass an explicit upper bound. + assert recorded["max_bytes"] == 1024 * 1024 + assert "github" in (recorded["label"] or "").lower() + + _FAILURE_CASES = [ ("offline or timeout", urllib.error.URLError("down")), (_RATE_LIMITED_REASON, _http_error(403)), From 328261349dff43da369b816269583daaf7e1e8c0 Mon Sep 17 00:00:00 2001 From: Pascal Date: Sat, 16 May 2026 07:52:20 +0200 Subject: [PATCH 16/30] test(upgrade): polish TestBoundedRead readability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three micro-cleanups raised during review #4 of my own work — no behavior change, just clarity. - Replace the __import__("specify_cli._download_security", fromlist=...) dance with a plain `import ... as _real_read_response_limited` at the top of the file. Easier to grep, no runtime difference. - Type the recorded dict explicitly and make max_bytes/label keyword- only without defaults on the spy. If a future refactor drops either argument the spy now raises TypeError immediately, instead of silently recording None and tripping the post-call assertion with a more confusing message. - Tighten the label check from fuzzy substring match ("github" in label.lower()) to exact equality ("GitHub latest release"). Both catch regressions; exact equality also catches typos. --- tests/test_upgrade.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/tests/test_upgrade.py b/tests/test_upgrade.py index ee18d919d7..c125b583fd 100644 --- a/tests/test_upgrade.py +++ b/tests/test_upgrade.py @@ -17,6 +17,7 @@ from typer.testing import CliRunner from specify_cli import app +from specify_cli._download_security import read_response_limited as _real_read_response_limited from specify_cli._version import ( _fetch_latest_release_tag, _get_installed_version, @@ -267,17 +268,19 @@ class TestBoundedRead: """ def test_response_body_is_bounded(self): - recorded: dict = {} - real_read = __import__( - "specify_cli._download_security", fromlist=["read_response_limited"] - ).read_response_limited + recorded: dict[str, int | str] = {} - def _spy(response, *, max_bytes=None, label=None, **kwargs): + def _spy(response, *, max_bytes: int, label: str, **kwargs): + # max_bytes and label are keyword-only with no defaults: if the + # caller forgets to pass either, the call raises TypeError here + # (instead of recording a misleading None). recorded["max_bytes"] = max_bytes recorded["label"] = label # Forward to the real implementation so the function under test # still gets a parseable body. - return real_read(response, max_bytes=max_bytes, label=label, **kwargs) + return _real_read_response_limited( + response, max_bytes=max_bytes, label=label, **kwargs + ) with patch( "specify_cli.authentication.http.urllib.request.urlopen", @@ -287,11 +290,12 @@ def _spy(response, *, max_bytes=None, label=None, **kwargs): assert tag == "v9.9.9" assert reason is None - # max_bytes is set by the caller; the exact value is a deliberate - # cap (1 MiB) for the GitHub release JSON. Don't accept None or - # the default — the caller must pass an explicit upper bound. + # The cap (1 MiB) is a deliberate ceiling for the GitHub release + # JSON — keep it explicit so a future refactor that drops the + # `max_bytes=` argument fails this test instead of regressing + # silently to the default. assert recorded["max_bytes"] == 1024 * 1024 - assert "github" in (recorded["label"] or "").lower() + assert recorded["label"] == "GitHub latest release" _FAILURE_CASES = [ From a4e9efac783f3ff53233d6e6523ed3ee7414dbed Mon Sep 17 00:00:00 2001 From: Pascal Date: Sat, 16 May 2026 08:20:39 +0200 Subject: [PATCH 17/30] ci(security): address Copilot review #4300554119 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Six items from the new Copilot pass. Three were latent bugs in the guardrails added by earlier commits, two are documentation/wording, one is parity coverage. Bugs - security.yml: the MEDIUM Bandit informational pass ran without --baseline, so the whitelisted HIGH B602 finding re-fired there on every run, turning the job summary into a permanent warning. Apply the same baseline to both passes; medium-only NEW findings now surface, as intended. - security.yml: the summary step ran with if: always() but the MEDIUM pass has the default if: success() — when the blocking HIGH step fails, the MEDIUM pass is skipped (outcome=skipped, not failure) and the summary wrote "✅ clean" anyway. Switch to a case statement that handles failure/success/skipped distinctly (⚠️ / ✅ / ⏭️). - check_bandit_baseline.py and check_secrets_baseline.py used `git show :` on both sides, so an unreadable/unfetched head ref returned empty results and the diff computed 0 new identities → fail-open. Read the head side from the working tree instead (CI is checked out at the PR head), fail-closed when the file is missing, and SystemExit on corrupt JSON. The base side keeps the lenient JSONDecodeError fallback because that's historical state we can't change. Wording - security.yml + CONTRIBUTING.md: both mentioned `# nosec` as a suppression mechanism, but tests/test_security_workflow.py:: test_bandit_nosec_is_not_suppressed_in_source explicitly forbids `# nosec` under src/. Replace with the actually-supported paths (bandit baseline for HIGH findings, `# noqa: S6xx` for ruff subprocess-shell rules) and flag the forbidden-comment policy. Parity coverage - tests/test_security_workflow.py: three new tests for the secret-scan job mirroring the dependency-audit / static-analysis coverage — detect-secrets-hook command, baseline path, excluded paths, growth gate env wiring (BASE only, no HEAD env), and fetch-depth: 0. - tests/test_workflows.py: regression test that WorkflowCatalog._fetch_single_catalog routes through read_response_limited with error_type=WorkflowCatalogError and label "workflow catalog". Mirrors TestBoundedRead for _fetch_latest_ release_tag and the equivalent test in test_integration_catalog.py. - tests/test_baseline_gates.py: two new fail-closed cases (head missing in working tree, head corrupt in working tree); drop the now-unused head_sha returns and the head env var from GateHandle.run. Note: Copilot also flagged "no tests on baseline gate scripts" — those tests already shipped in tests/test_baseline_gates.py (commit 2fd8071, posted before the review). Updated here with the new fail-closed cases. Tests: 3017 passed (was 3009). --- .github/scripts/check_bandit_baseline.py | 51 ++++++++++++++-- .github/scripts/check_secrets_baseline.py | 39 ++++++++++-- .github/workflows/security.yml | 63 ++++++++++++------- CONTRIBUTING.md | 4 +- tests/test_baseline_gates.py | 71 +++++++++++++++++----- tests/test_security_workflow.py | 61 +++++++++++++++++-- tests/test_workflows.py | 73 +++++++++++++++++++++++ 7 files changed, 310 insertions(+), 52 deletions(-) diff --git a/.github/scripts/check_bandit_baseline.py b/.github/scripts/check_bandit_baseline.py index 0823700152..e81cb69f7f 100644 --- a/.github/scripts/check_bandit_baseline.py +++ b/.github/scripts/check_bandit_baseline.py @@ -16,9 +16,13 @@ that introduces it; we treat all entries as the starting baseline and do not require the label. +For the head side we read the working tree directly (the CI runner is +checked out at the PR head, so the working-tree file IS the head state). +Reading via ``git show :`` would fail-open on unfetched refs +or detached checkouts — for a security gate we want fail-closed. + Required environment variables: - ``BANDIT_BASELINE_BASE``: git ref of the PR base -- ``BANDIT_BASELINE_HEAD``: git ref of the PR head - ``BANDIT_BASELINE_LABELS``: comma-separated PR labels Outside of PR events, all inputs may be empty and the script no-ops. @@ -40,7 +44,11 @@ def _read_baseline_at(ref: str) -> tuple[dict, bool]: - """Return (baseline_json, file_existed_at_ref).""" + """Return (baseline_json, file_existed_at_ref). + + Used for the base side. The head side reads the working tree to avoid + silently fail-opening on an unfetched/invalid head ref. + """ if not ref: return {"results": []}, False try: @@ -61,6 +69,30 @@ def _read_baseline_at(ref: str) -> tuple[dict, bool]: return {"results": []}, True +def _read_baseline_from_worktree() -> tuple[dict, bool]: + """Return (baseline_json, file_exists_on_disk). + + The CI runner is checked out at the PR head, so the working-tree + file IS the head state. Reading it directly sidesteps spurious + ``git show`` failures that would otherwise let an unreadable head + silently pass the gate. + + Asymmetric with the base reader: a corrupt JSON on disk is the + proposed PR state — we fail-closed there rather than treating + it as an empty baseline (which would silently drop the gate). + """ + path = REPO_ROOT / BASELINE_PATH + if not path.exists(): + return {"results": []}, False + try: + return json.loads(path.read_text(encoding="utf-8")), True + except json.JSONDecodeError as exc: + raise SystemExit( + f"Working-tree baseline at {BASELINE_PATH} is corrupt: {exc}. " + f"Refusing to fail-open on a security gate." + ) + + _WHITESPACE_RE = re.compile(r"\s+") @@ -89,14 +121,13 @@ def _identity(result: dict) -> str: def main() -> int: base_ref = os.environ.get("BANDIT_BASELINE_BASE", "").strip() - head_ref = os.environ.get("BANDIT_BASELINE_HEAD", "").strip() or "HEAD" if not base_ref or set(base_ref) <= {"0"}: print("No PR base ref; baseline diff check skipped.") return 0 base_baseline, base_existed = _read_baseline_at(base_ref) - head_baseline, _ = _read_baseline_at(head_ref) + head_baseline, head_existed = _read_baseline_from_worktree() if not base_existed: print( @@ -105,6 +136,18 @@ def main() -> int: ) return 0 + if not head_existed: + # Fail-closed: the file existed at base but is missing in the + # working tree. Either the PR deleted it (suspicious — the gate + # would no longer protect anything) or the workspace is incomplete. + print( + f"Baseline file {BASELINE_PATH} existed at the base ref but is " + f"missing in the working tree. Refusing to fail-open on a " + f"security gate.", + file=sys.stderr, + ) + return 1 + base_ids = {_identity(r) for r in base_baseline.get("results", [])} head_ids = {_identity(r) for r in head_baseline.get("results", [])} diff --git a/.github/scripts/check_secrets_baseline.py b/.github/scripts/check_secrets_baseline.py index c172b045ff..8f1daf2bb5 100644 --- a/.github/scripts/check_secrets_baseline.py +++ b/.github/scripts/check_secrets_baseline.py @@ -11,9 +11,12 @@ When the baseline file does not exist at the base ref, the PR is the one that introduces it; no acknowledgement is required. +For the head side we read the working tree directly (the CI runner is +checked out at the PR head); this avoids fail-opening when +``git show :`` happens to fail. + Required environment variables: - ``SECRETS_BASELINE_BASE``: git ref of the PR base -- ``SECRETS_BASELINE_HEAD``: git ref of the PR head - ``SECRETS_BASELINE_LABELS``: comma-separated PR labels Outside of PR events, all inputs may be empty and the script no-ops. @@ -33,7 +36,7 @@ def _read_baseline_at(ref: str) -> tuple[dict, bool]: - """Return (baseline_json, file_existed_at_ref).""" + """Return (baseline_json, file_existed_at_ref). Base side only.""" if not ref: return {"results": {}}, False try: @@ -54,6 +57,26 @@ def _read_baseline_at(ref: str) -> tuple[dict, bool]: return {"results": {}}, True +def _read_baseline_from_worktree() -> tuple[dict, bool]: + """Return (baseline_json, file_exists_on_disk). Head side. + + Reading the working tree (rather than ``git show :``) makes the + head side fail-closed: a missing file blocks the gate, and a corrupt + file raises SystemExit rather than being treated as empty (which + would silently neutralize the gate). + """ + path = REPO_ROOT / BASELINE_PATH + if not path.exists(): + return {"results": {}}, False + try: + return json.loads(path.read_text(encoding="utf-8")), True + except json.JSONDecodeError as exc: + raise SystemExit( + f"Working-tree baseline at {BASELINE_PATH} is corrupt: {exc}. " + f"Refusing to fail-open on a security gate." + ) + + def _identities(baseline: dict) -> set[str]: """Flatten detect-secrets results to a set of stable identities.""" ids: set[str] = set() @@ -81,14 +104,13 @@ def _identities(baseline: dict) -> set[str]: def main() -> int: base_ref = os.environ.get("SECRETS_BASELINE_BASE", "").strip() - head_ref = os.environ.get("SECRETS_BASELINE_HEAD", "").strip() or "HEAD" if not base_ref or set(base_ref) <= {"0"}: print("No PR base ref; secrets baseline diff check skipped.") return 0 base_baseline, base_existed = _read_baseline_at(base_ref) - head_baseline, _ = _read_baseline_at(head_ref) + head_baseline, head_existed = _read_baseline_from_worktree() if not base_existed: print( @@ -97,6 +119,15 @@ def main() -> int: ) return 0 + if not head_existed: + print( + f"Baseline file {BASELINE_PATH} existed at the base ref but is " + f"missing in the working tree. Refusing to fail-open on a " + f"security gate.", + file=sys.stderr, + ) + return 1 + base_ids = _identities(base_baseline) head_ids = _identities(head_baseline) diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml index 92c7c32f8c..1c1c626bab 100644 --- a/.github/workflows/security.yml +++ b/.github/workflows/security.yml @@ -77,43 +77,62 @@ jobs: - name: Run Bandit run: uvx --from bandit==1.9.4 bandit -r src -lll --baseline .github/bandit-baseline.json - # Informative: MEDIUM severity, no baseline. Surfaces lower-severity - # findings in the job summary without breaking CI, so reviewers see - # them before they accumulate. + # Informative: MEDIUM severity, using the SAME baseline so the + # accepted HIGH finding doesn't re-fire here. Surfaces new MEDIUM-or- + # above findings in the job summary without breaking CI. - name: Run Bandit medium-severity informational pass id: bandit-medium continue-on-error: true - run: uvx --from bandit==1.9.4 bandit -r src -ll + run: uvx --from bandit==1.9.4 bandit -r src -ll --baseline .github/bandit-baseline.json # Surface the medium-severity outcome in the job summary so reviewers # see it without expanding the log; continue-on-error swallows the - # non-zero exit otherwise. + # non-zero exit otherwise. We branch on three outcomes: + # - failure → new findings (⚠️) + # - success → clean (✅) + # - skipped → the blocking HIGH bandit step failed, so the medium + # pass never ran; don't claim "clean" in that case (⏭️). - name: Surface medium-severity findings in job summary if: always() run: | - if [ "${{ steps.bandit-medium.outcome }}" = "failure" ]; then - { - echo "## ⚠️ Bandit medium-severity informational pass" - echo "" - echo "Findings surfaced at MEDIUM severity (no baseline). These do not" - echo "fail CI but should be audited — either fix the issue, suppress" - echo "with an explicit \`# nosec\` carrying a justification, or escalate" - echo "the severity threshold once they are triaged." - echo "" - echo "See the **Run Bandit medium-severity informational pass** step" - echo "above for the file/line list." - } >> "$GITHUB_STEP_SUMMARY" - else - echo "## ✅ Bandit medium-severity informational pass — clean" >> "$GITHUB_STEP_SUMMARY" - fi + case "${{ steps.bandit-medium.outcome }}" in + failure) + { + echo "## ⚠️ Bandit medium-severity informational pass" + echo "" + echo "New MEDIUM-or-above findings detected (baseline-filtered). These" + echo "do not fail CI but should be audited. Resolution paths, in order" + echo "of preference:" + echo " 1. Fix the underlying issue." + echo " 2. If the finding is a documented intentional pattern, append" + echo " it to \`.github/bandit-baseline.json\` and add the" + echo " \`security-baseline-change\` label to acknowledge the growth." + echo " 3. For ruff S6xx false positives only, use \`# noqa: S6xx\`" + echo " with an inline justification." + echo "" + echo "Do NOT use \`# nosec\` — it is forbidden in \`src/\` by the" + echo "\`test_bandit_nosec_is_not_suppressed_in_source\` regression test." + echo "" + echo "See the **Run Bandit medium-severity informational pass** step" + echo "above for the file/line list." + } >> "$GITHUB_STEP_SUMMARY" + ;; + success) + echo "## ✅ Bandit medium-severity informational pass — clean" >> "$GITHUB_STEP_SUMMARY" + ;; + *) + echo "## ⏭️ Bandit medium-severity informational pass — skipped (the blocking HIGH pass failed; fix it first)" >> "$GITHUB_STEP_SUMMARY" + ;; + esac # Prevent silent whitelisting: if the baseline grew, the PR must carry # the 'security-baseline-change' label to acknowledge it. - name: Check Bandit baseline growth if: ${{ github.event_name == 'pull_request' }} env: + # Base side via `git show` (needs full fetch-depth above). + # Head side reads the working tree — fail-closed. BANDIT_BASELINE_BASE: ${{ github.event.pull_request.base.sha }} - BANDIT_BASELINE_HEAD: ${{ github.event.pull_request.head.sha }} BANDIT_BASELINE_LABELS: ${{ join(github.event.pull_request.labels.*.name, ',') }} run: python .github/scripts/check_bandit_baseline.py @@ -155,7 +174,7 @@ jobs: - name: Check secrets baseline growth if: ${{ github.event_name == 'pull_request' }} env: + # Head side reads the working tree (see check_secrets_baseline.py). SECRETS_BASELINE_BASE: ${{ github.event.pull_request.base.sha }} - SECRETS_BASELINE_HEAD: ${{ github.event.pull_request.head.sha }} SECRETS_BASELINE_LABELS: ${{ join(github.event.pull_request.labels.*.name, ',') }} run: python .github/scripts/check_secrets_baseline.py diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 926796dae9..910e7242e8 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -117,7 +117,9 @@ Audit the new entries before committing — a leaked credential must never be me #### Bandit baseline -The CI `static-analysis` job runs Bandit with `--baseline .github/bandit-baseline.json` (HIGH severity, blocking) plus a second informational pass at MEDIUM severity (`continue-on-error`, surfaced in the job summary). If a HIGH finding is intentional, audit it carefully, add an explicit `# nosec` with justification, and only then add it to the baseline. Growing the baseline is gated: the `check_bandit_baseline.py` script fails the PR unless it carries the `security-baseline-change` label, so reviewers see the whitelist expansion. +The CI `static-analysis` job runs Bandit with `--baseline .github/bandit-baseline.json` (HIGH severity, blocking) plus a second informational pass at MEDIUM severity sharing the same baseline (`continue-on-error`, surfaced in the job summary). If a HIGH finding is intentional, audit it carefully, document the rationale next to the code (regular comment — **not** `# nosec`; see below), and append the entry to `.github/bandit-baseline.json`. Growing the baseline is gated: the `check_bandit_baseline.py` script fails the PR unless it carries the `security-baseline-change` label, so reviewers see the whitelist expansion. + +> **Do not use `# nosec` in `src/`.** The `test_bandit_nosec_is_not_suppressed_in_source` regression test fails any PR that adds one. The supported suppression paths are (a) the bandit baseline (covered above) for HIGH findings, and (b) `# noqa: S6xx` with an inline justification for ruff's subprocess-shell rules (`S602/S604/S605`). Both are visible in review; `# nosec` hides the finding without trace. #### Shell scripts diff --git a/tests/test_baseline_gates.py b/tests/test_baseline_gates.py index 72adf2522e..1010634c94 100644 --- a/tests/test_baseline_gates.py +++ b/tests/test_baseline_gates.py @@ -175,13 +175,27 @@ def commit(self, entries: list[tuple[str, int]], message: str) -> str: def commit_raw(self, raw_content: str, message: str) -> str: return _commit_file(self.repo, self.config.baseline_path, raw_content, message) - def run(self, *, base: str, head: str, labels: str = ""): + def delete_baseline(self, message: str) -> str: + """Remove the baseline file from the working tree and commit.""" + (self.repo / self.config.baseline_path).unlink() + _git(self.repo, "add", "-A") + _git(self.repo, "commit", "-q", "-m", message) + return _git(self.repo, "rev-parse", "HEAD") + + def overwrite_worktree(self, raw_content: str) -> None: + """Replace the working-tree baseline without committing. + + Used to simulate a corrupt head state read from disk. + """ + (self.repo / self.config.baseline_path).write_text(raw_content, encoding="utf-8") + + def run(self, *, base: str, labels: str = ""): + # Head side reads the working tree directly — no env var needed. return _run_script( self.repo, self.repo / ".github" / "scripts" / self.config.script.name, { f"{self.config.env_prefix}_BASE": base, - f"{self.config.env_prefix}_HEAD": head, f"{self.config.env_prefix}_LABELS": labels, }, ) @@ -207,33 +221,33 @@ def test_introduction_pr_skips_check(self, gate: GateHandle): # Baseline file did not exist at base ref → no acknowledgement needed. _git(gate.repo, "commit", "--allow-empty", "-q", "-m", "before baseline") base_sha = _git(gate.repo, "rev-parse", "HEAD") - head_sha = gate.commit([("a.py", 10)], "introduce baseline") + gate.commit([("a.py", 10)], "introduce baseline") - result = gate.run(base=base_sha, head=head_sha) + result = gate.run(base=base_sha) assert result.returncode == 0, result.stderr assert "introduction of the baseline" in result.stdout def test_identical_baselines_pass(self, gate: GateHandle): base_sha = gate.commit([("a.py", 10)], "base") - result = gate.run(base=base_sha, head=base_sha) + result = gate.run(base=base_sha) assert result.returncode == 0 assert "no new identities" in result.stdout def test_growth_without_label_fails(self, gate: GateHandle): base_sha = gate.commit([("a.py", 10)], "base") - head_sha = gate.commit([("a.py", 10), ("b.py", 20)], "grow") + gate.commit([("a.py", 10), ("b.py", 20)], "grow") - result = gate.run(base=base_sha, head=head_sha) + result = gate.run(base=base_sha) assert result.returncode == 1 assert f"'{gate.config.label}'" in result.stderr def test_growth_with_label_passes(self, gate: GateHandle): base_sha = gate.commit([("a.py", 10)], "base") - head_sha = gate.commit([("a.py", 10), ("b.py", 20)], "grow") + gate.commit([("a.py", 10), ("b.py", 20)], "grow") - result = gate.run(base=base_sha, head=head_sha, labels=gate.config.label) + result = gate.run(base=base_sha, labels=gate.config.label) assert result.returncode == 0, result.stderr assert "acknowledged via label" in result.stdout @@ -242,9 +256,9 @@ def test_swap_attack_detected(self, gate: GateHandle): """Remove one entry and add a different one → constant count, but a *new* identity appears. Gate must still fire.""" base_sha = gate.commit([("a.py", 10)], "base") - head_sha = gate.commit([("b.py", 20)], "swap") # same count, different ID + gate.commit([("b.py", 20)], "swap") # same count, different ID - result = gate.run(base=base_sha, head=head_sha) + result = gate.run(base=base_sha) assert result.returncode == 1, "identity diff must catch swaps" assert "1 new identities" in result.stderr @@ -254,14 +268,39 @@ def test_corrupt_json_at_base_falls_back_to_empty(self, gate: GateHandle): contents as empty so the script still completes (the head set becomes 'all new' and the label gate fires).""" base_sha = gate.commit_raw("{ invalid json", "corrupt base") - head_sha = gate.commit([("a.py", 10)], "valid head") + gate.commit([("a.py", 10)], "valid head") - result = gate.run(base=base_sha, head=head_sha) + result = gate.run(base=base_sha) assert result.returncode == 1, "corrupt base should not crash the script" assert f"'{gate.config.label}'" in result.stderr assert "Could not parse baseline" in result.stderr + def test_head_missing_fails_closed(self, gate: GateHandle): + """If the baseline existed at base but is missing in the working + tree (head), the gate must fail-closed — silently passing would + let a PR delete the whole baseline file and neutralize the gate.""" + base_sha = gate.commit([("a.py", 10)], "base") + gate.delete_baseline("remove baseline at head") + + result = gate.run(base=base_sha) + + assert result.returncode == 1 + assert "Refusing to fail-open" in result.stderr + + def test_head_corrupt_in_worktree_fails_closed(self, gate: GateHandle): + """A corrupt JSON in the working tree must raise (not be silently + treated as empty, which would also drop the gate). Simulates a + flaky tool writing junk to the file just before the script runs.""" + base_sha = gate.commit([("a.py", 10)], "base") + gate.overwrite_worktree("{ not json") + + result = gate.run(base=base_sha) + + assert result.returncode == 1 + assert "is corrupt" in result.stderr + assert "fail-open" in result.stderr + # --------------------------------------------------------------------------- # Bandit-only scenarios @@ -279,7 +318,7 @@ def gate(self, tmp_path) -> GateHandle: def test_no_base_ref_is_skipped(self, gate: GateHandle): gate.commit([], "init") # need at least one commit so HEAD resolves - result = gate.run(base="", head="HEAD") + result = gate.run(base="") assert result.returncode == 0 assert "baseline diff check skipped" in result.stdout @@ -304,7 +343,7 @@ def test_whitespace_only_change_does_not_trip(self, gate: GateHandle): }, "base", ) - head_sha = _commit_baseline( + _commit_baseline( gate.repo, gate.config.baseline_path, { @@ -322,6 +361,6 @@ def test_whitespace_only_change_does_not_trip(self, gate: GateHandle): "reformatted snippet", ) - result = gate.run(base=base_sha, head=head_sha) + result = gate.run(base=base_sha) assert result.returncode == 0, result.stderr diff --git a/tests/test_security_workflow.py b/tests/test_security_workflow.py index 1b42920003..4897d59bf1 100644 --- a/tests/test_security_workflow.py +++ b/tests/test_security_workflow.py @@ -210,11 +210,12 @@ def test_actions_are_pinned_to_full_commit_shas(self): assert re.search(r"@v\d+", uses_ref) is None def test_bandit_does_not_globally_skip_b602(self): - # Identify the blocking bandit step by its baseline-arg rather than - # by exact step name — name is incidental, behavior is what matters. - bandit_step = _find_step_by_run_signature( - "static-analysis", "--baseline .github/bandit-baseline.json" - ) + # Identify the blocking bandit step by its severity-level arg (-lll + # → HIGH only; the informational MEDIUM pass uses -ll). Doing this + # by behavior signature rather than step name keeps the test robust + # to renames while remaining unambiguous now that both passes share + # the baseline argument. + bandit_step = _find_step_by_run_signature("static-analysis", "-r src -lll") run = bandit_step["run"] workflow_text = SECURITY_WORKFLOW.read_text(encoding="utf-8") @@ -405,3 +406,53 @@ def test_contributing_documents_security_commands(self): re.search(r"-r\s+spec-kit-audit-requirements\.txt\b", contributing_text) is None ) + + # ----------------------------------------------------------------- + # secret-scan job (parity coverage with dependency-audit / bandit) + # ----------------------------------------------------------------- + + def test_secret_scan_job_uses_detect_secrets_hook(self): + workflow = _load_security_workflow() + scan_step = _find_step_by_run_signature("secret-scan", "detect-secrets-hook") + run = scan_step["run"] + + # The hook is the right tool: it compares against the baseline + # and exits non-zero on new findings, without rewriting the file. + assert "uvx --from detect-secrets==1.5.0 detect-secrets-hook" in run + assert "--baseline .secrets.baseline" in run + # Auto-generated content must be excluded so it doesn't dominate the scan. + assert "':!:.secrets.baseline'" in run + assert "':!:uv.lock'" in run + assert "':!:.github/security-audit-requirements.txt'" in run + # Iteration over tracked files is via git ls-files (-z to handle weird names). + assert "git ls-files -z" in run + # secret-scan job is in fact wired into the workflow. + assert "secret-scan" in workflow["jobs"] + + def test_secret_scan_job_has_baseline_growth_gate(self): + gate_step = _find_step_by_run_signature( + "secret-scan", "check_secrets_baseline.py" + ) + # The gate runs only on pull_request events (label is meaningless otherwise). + assert gate_step["if"] == "${{ github.event_name == 'pull_request' }}" + env = gate_step["env"] + assert env["SECRETS_BASELINE_BASE"] == ( + "${{ github.event.pull_request.base.sha }}" + ) + assert env["SECRETS_BASELINE_LABELS"] == ( + "${{ join(github.event.pull_request.labels.*.name, ',') }}" + ) + # Head is read from the working tree (fail-closed); env var must NOT + # be passed (else a future caller might think the script honors it). + assert "SECRETS_BASELINE_HEAD" not in env + + def test_secret_scan_checkout_has_full_history(self): + # The growth gate uses `git show :` so it needs full history. + workflow = _load_security_workflow() + checkout_steps = [ + step + for step in workflow["jobs"]["secret-scan"]["steps"] + if "actions/checkout" in (step.get("uses") or "") + ] + assert len(checkout_steps) == 1 + assert checkout_steps[0]["with"]["fetch-depth"] == 0 diff --git a/tests/test_workflows.py b/tests/test_workflows.py index 3b42bf9106..e4b39f5717 100644 --- a/tests/test_workflows.py +++ b/tests/test_workflows.py @@ -2137,6 +2137,79 @@ def test_get_catalog_configs(self, project_dir): assert configs[0]["name"] == "default" assert isinstance(configs[0]["install_allowed"], bool) + def test_fetch_single_catalog_uses_bounded_read(self, project_dir, monkeypatch): + """Regression test for the read_response_limited hardening on + workflow catalog downloads. Mirrors TestBoundedRead for + _fetch_latest_release_tag and the equivalent test in + tests/integrations/test_integration_catalog.py for the + integration catalog. A future refactor that drops the bounded + read here would let a malicious server stream an unbounded + catalog into memory.""" + from specify_cli.workflows.catalog import ( + WorkflowCatalog, + WorkflowCatalogEntry, + WorkflowCatalogError, + ) + from specify_cli import _download_security as _download_security_module + import specify_cli.authentication.http as _auth_http + + entry = WorkflowCatalogEntry( + url="https://example.com/workflow-catalog.json", + name="test", + priority=0, + install_allowed=False, + ) + + recorded: dict[str, object] = {} + real_read = _download_security_module.read_response_limited + + def _spy(response, **kwargs): + # Capture exactly the kwargs the caller chose to pass, so the + # assertion below can distinguish "explicit" from "default". + recorded["kwargs"] = dict(kwargs) + return real_read(response, **kwargs) + + class _FakeResponse: + def __init__(self): + self._data = json.dumps({"workflows": []}).encode() + + def read(self, _size=-1): + return self._data + + def geturl(self): + return entry.url + + def __enter__(self): + return self + + def __exit__(self, *_a): + pass + + def _fake_urlopen(req, timeout=30): + return _FakeResponse() + + monkeypatch.setattr(_auth_http.urllib.request, "urlopen", _fake_urlopen) + monkeypatch.setattr( + _auth_http.urllib.request.OpenerDirector, + "open", + lambda _self, req, data=None, timeout=30: _fake_urlopen(req, timeout), + ) + monkeypatch.setattr( + "specify_cli.workflows.catalog.read_response_limited", _spy + ) + + cat = WorkflowCatalog(project_dir) + cat._fetch_single_catalog(entry, force_refresh=True) + + # Bounded read was invoked (not raw resp.read()). error_type must + # be the WorkflowCatalogError so an oversized response surfaces + # as a workflow-catalog domain error, not a generic ValueError + # that callers might miss. The size cap itself relies on the + # module-level default in _download_security.MAX_DOWNLOAD_BYTES. + assert "kwargs" in recorded, "read_response_limited was not called" + assert recorded["kwargs"]["error_type"] is WorkflowCatalogError + assert recorded["kwargs"]["label"] == "workflow catalog" + # ===== Integration Test ===== From 9070a878ff26dd57e466f2d023bf23cf40d66e67 Mon Sep 17 00:00:00 2001 From: Pascal Date: Sat, 16 May 2026 09:05:24 +0200 Subject: [PATCH 18/30] ci(security): refresh audit baselines --- .github/bandit-baseline.json | 60 +++++ .../scripts/check_security_requirements.py | 1 + .github/security-audit-requirements.txt | 220 +++++++++--------- .github/workflows/security.yml | 8 +- .secrets.baseline | 8 +- CONTRIBUTING.md | 4 +- tests/test_security_workflow.py | 24 +- 7 files changed, 197 insertions(+), 128 deletions(-) diff --git a/.github/bandit-baseline.json b/.github/bandit-baseline.json index 2c6a477879..345fea6d2d 100644 --- a/.github/bandit-baseline.json +++ b/.github/bandit-baseline.json @@ -1,5 +1,65 @@ { "results": [ + { + "code": "103 if not req.get_header(\"Authorization\") and not strict_redirects:\n104 return urllib.request.urlopen(req, timeout=timeout)\n105 \n", + "col_offset": 15, + "end_col_offset": 59, + "filename": "src/specify_cli/_github_http.py", + "issue_confidence": "HIGH", + "issue_cwe": { + "id": 22, + "link": "https://cwe.mitre.org/data/definitions/22.html" + }, + "issue_severity": "MEDIUM", + "issue_text": "Audit url open for permitted schemes. Allowing use of file:/ or custom schemes is often unexpected.", + "line_number": 104, + "line_range": [ + 104 + ], + "more_info": "https://bandit.readthedocs.io/en/1.9.4/blacklists/blacklist_calls.html#b310-urllib-urlopen", + "test_id": "B310", + "test_name": "blacklist" + }, + { + "code": "113 \n114 with urllib.request.urlopen(req, timeout=30) as resp: # noqa: S310\n115 payload = _json.loads(\n", + "col_offset": 17, + "end_col_offset": 56, + "filename": "src/specify_cli/authentication/azure_devops.py", + "issue_confidence": "HIGH", + "issue_cwe": { + "id": 22, + "link": "https://cwe.mitre.org/data/definitions/22.html" + }, + "issue_severity": "MEDIUM", + "issue_text": "Audit url open for permitted schemes. Allowing use of file:/ or custom schemes is often unexpected.", + "line_number": 114, + "line_range": [ + 114 + ], + "more_info": "https://bandit.readthedocs.io/en/1.9.4/blacklists/blacklist_calls.html#b310-urllib-urlopen", + "test_id": "B310", + "test_name": "blacklist" + }, + { + "code": "170 return opener.open(req, timeout=timeout)\n171 return urllib.request.urlopen(req, timeout=timeout) # noqa: S310\n", + "col_offset": 11, + "end_col_offset": 55, + "filename": "src/specify_cli/authentication/http.py", + "issue_confidence": "HIGH", + "issue_cwe": { + "id": 22, + "link": "https://cwe.mitre.org/data/definitions/22.html" + }, + "issue_severity": "MEDIUM", + "issue_text": "Audit url open for permitted schemes. Allowing use of file:/ or custom schemes is often unexpected.", + "line_number": 171, + "line_range": [ + 171 + ], + "more_info": "https://bandit.readthedocs.io/en/1.9.4/blacklists/blacklist_calls.html#b310-urllib-urlopen", + "test_id": "B310", + "test_name": "blacklist" + }, { "code": "34 run_cmd,\n35 shell=True,\n36 capture_output=True,\n37 text=True,\n38 cwd=cwd,\n39 timeout=300,\n40 )\n41 output = {\n42 \"exit_code\": proc.returncode,\n43 \"stdout\": proc.stdout,\n", "col_offset": 19, diff --git a/.github/scripts/check_security_requirements.py b/.github/scripts/check_security_requirements.py index 6834ee42bf..876fbf1a47 100644 --- a/.github/scripts/check_security_requirements.py +++ b/.github/scripts/check_security_requirements.py @@ -74,6 +74,7 @@ def main() -> int: "--extra", "test", "--universal", + "--upgrade", "--generate-hashes", "--quiet", "--no-header", diff --git a/.github/security-audit-requirements.txt b/.github/security-audit-requirements.txt index 89feef3f1a..646284db2b 100644 --- a/.github/security-audit-requirements.txt +++ b/.github/security-audit-requirements.txt @@ -14,113 +14,113 @@ colorama==0.4.6 ; sys_platform == 'win32' \ # via # click # pytest -coverage==7.13.5 \ - --hash=sha256:012d5319e66e9d5a218834642d6c35d265515a62f01157a45bcc036ecf947256 \ - --hash=sha256:02ca0eed225b2ff301c474aeeeae27d26e2537942aa0f87491d3e147e784a82b \ - --hash=sha256:03ccc709a17a1de074fb1d11f217342fb0d2b1582ed544f554fc9fc3f07e95f5 \ - --hash=sha256:0428cbef5783ad91fe240f673cc1f76b25e74bbfe1a13115e4aa30d3f538162d \ - --hash=sha256:04690832cbea4e4663d9149e05dba142546ca05cb1848816760e7f58285c970a \ - --hash=sha256:0590e44dd2745c696a778f7bab6aa95256de2cbc8b8cff4f7db8ff09813d6969 \ - --hash=sha256:0672854dc733c342fa3e957e0605256d2bf5934feeac328da9e0b5449634a642 \ - --hash=sha256:084b84a8c63e8d6fc7e3931b316a9bcafca1458d753c539db82d31ed20091a87 \ - --hash=sha256:0b67af5492adb31940ee418a5a655c28e48165da5afab8c7fa6fd72a142f8740 \ - --hash=sha256:0cd9ed7a8b181775459296e402ca4fb27db1279740a24e93b3b41942ebe4b215 \ - --hash=sha256:0cef0cdec915d11254a7f549c1170afecce708d30610c6abdded1f74e581666d \ - --hash=sha256:0e223ce4b4ed47f065bfb123687686512e37629be25cc63728557ae7db261422 \ - --hash=sha256:0e3c426ffc4cd952f54ee9ffbdd10345709ecc78a3ecfd796a57236bfad0b9b8 \ - --hash=sha256:0ecf12ecb326fe2c339d93fc131816f3a7367d223db37817208905c89bded911 \ - --hash=sha256:10a0c37f0b646eaff7cce1874c31d1f1ccb297688d4c747291f4f4c70741cc8b \ - --hash=sha256:145ede53ccbafb297c1c9287f788d1bc3efd6c900da23bf6931b09eafc931587 \ - --hash=sha256:1b11eef33edeae9d142f9b4358edb76273b3bfd30bc3df9a4f95d0e49caf94e8 \ - --hash=sha256:1b88c69c8ef5d4b6fe7dea66d6636056a0f6a7527c440e890cf9259011f5e606 \ - --hash=sha256:258354455f4e86e3e9d0d17571d522e13b4e1e19bf0f8596bcf9476d61e7d8a9 \ - --hash=sha256:259b69bb83ad9894c4b25be2528139eecba9a82646ebdda2d9db1ba28424a6bf \ - --hash=sha256:2aa055ae1857258f9e0045be26a6d62bdb47a72448b62d7b55f4820f361a2633 \ - --hash=sha256:2d3807015f138ffea1ed9afeeb8624fd781703f2858b62a8dd8da5a0994c57b6 \ - --hash=sha256:301e3b7dfefecaca37c9f1aa6f0049b7d4ab8dd933742b607765d757aca77d43 \ - --hash=sha256:32ca0c0114c9834a43f045a87dcebd69d108d8ffb666957ea65aa132f50332e2 \ - --hash=sha256:34b02417cf070e173989b3db962f7ed56d2f644307b2cf9d5a0f258e13084a61 \ - --hash=sha256:356e76b46783a98c2a2fe81ec79df4883a1e62895ea952968fb253c114e7f930 \ - --hash=sha256:35a31f2b1578185fbe6aa2e74cea1b1d0bbf4c552774247d9160d29b80ed56cc \ - --hash=sha256:380e8e9084d8eb38db3a9176a1a4f3c0082c3806fa0dc882d1d87abc3c789247 \ - --hash=sha256:3ad050321264c49c2fa67bb599100456fc51d004b82534f379d16445da40fb75 \ - --hash=sha256:3e1bb5f6c78feeb1be3475789b14a0f0a5b47d505bfc7267126ccbd50289999e \ - --hash=sha256:3f4818d065964db3c1c66dc0fbdac5ac692ecbc875555e13374fdbe7eedb4376 \ - --hash=sha256:460cf0114c5016fa841214ff5564aa4864f11948da9440bc97e21ad1f4ba1e01 \ - --hash=sha256:48c39bc4a04d983a54a705a6389512883d4a3b9862991b3617d547940e9f52b1 \ - --hash=sha256:4b59148601efcd2bac8c4dbf1f0ad6391693ccf7a74b8205781751637076aee3 \ - --hash=sha256:4d2afbc5cc54d286bfb54541aa50b64cdb07a718227168c87b9e2fb8f25e1743 \ - --hash=sha256:505d7083c8b0c87a8fa8c07370c285847c1f77739b22e299ad75a6af6c32c5c9 \ - --hash=sha256:52f444e86475992506b32d4e5ca55c24fc88d73bcbda0e9745095b28ef4dc0cf \ - --hash=sha256:5b13955d31d1633cf9376908089b7cebe7d15ddad7aeaabcbe969a595a97e95e \ - --hash=sha256:5ec4af212df513e399cf11610cc27063f1586419e814755ab362e50a85ea69c1 \ - --hash=sha256:60365289c3741e4db327e7baff2a4aaacf22f788e80fa4683393891b70a89fbd \ - --hash=sha256:631efb83f01569670a5e866ceb80fe483e7c159fac6f167e6571522636104a0b \ - --hash=sha256:6697e29b93707167687543480a40f0db8f356e86d9f67ddf2e37e2dfd91a9dab \ - --hash=sha256:66a80c616f80181f4d643b0f9e709d97bcea413ecd9631e1dedc7401c8e6695d \ - --hash=sha256:67e9bc5449801fad0e5dff329499fb090ba4c5800b86805c80617b4e29809b2a \ - --hash=sha256:68a4953be99b17ac3c23b6efbc8a38330d99680c9458927491d18700ef23ded0 \ - --hash=sha256:6c36ddb64ed9d7e496028d1d00dfec3e428e0aabf4006583bb1839958d280510 \ - --hash=sha256:6e3370441f4513c6252bf042b9c36d22491142385049243253c7e48398a15a9f \ - --hash=sha256:7034b5c56a58ae5e85f23949d52c14aca2cfc6848a31764995b7de88f13a1ea0 \ - --hash=sha256:704de6328e3d612a8f6c07000a878ff38181ec3263d5a11da1db294fa6a9bdf8 \ - --hash=sha256:7132bed4bd7b836200c591410ae7d97bf7ae8be6fc87d160b2bd881df929e7bf \ - --hash=sha256:7300c8a6d13335b29bb76d7651c66af6bd8658517c43499f110ddc6717bfc209 \ - --hash=sha256:750db93a81e3e5a9831b534be7b1229df848b2e125a604fe6651e48aa070e5f9 \ - --hash=sha256:777c4d1eff1b67876139d24288aaf1817f6c03d6bae9c5cc8d27b83bcfe38fe3 \ - --hash=sha256:78e696e1cc714e57e8b25760b33a8b1026b7048d270140d25dafe1b0a1ee05a3 \ - --hash=sha256:79060214983769c7ba3f0cee10b54c97609dca4d478fa1aa32b914480fd5738d \ - --hash=sha256:7c8d4bc913dd70b93488d6c496c77f3aff5ea99a07e36a18f865bca55adef8bd \ - --hash=sha256:7f2c47b36fe7709a6e83bfadf4eefb90bd25fbe4014d715224c4316f808e59a2 \ - --hash=sha256:800bc829053c80d240a687ceeb927a94fd108bbdc68dfbe505d0d75ab578a882 \ - --hash=sha256:843ea8643cf967d1ac7e8ecd4bb00c99135adf4816c0c0593fdcc47b597fcf09 \ - --hash=sha256:8769751c10f339021e2638cd354e13adeac54004d1941119b2c96fe5276d45ea \ - --hash=sha256:8dd02af98971bdb956363e4827d34425cb3df19ee550ef92855b0acb9c7ce51c \ - --hash=sha256:8fdf453a942c3e4d99bd80088141c4c6960bb232c409d9c3558e2dbaa3998562 \ - --hash=sha256:941617e518602e2d64942c88ec8499f7fbd49d3f6c4327d3a71d43a1973032f3 \ - --hash=sha256:972a9cd27894afe4bc2b1480107054e062df08e671df7c2f18c205e805ccd806 \ - --hash=sha256:9adb6688e3b53adffefd4a52d72cbd8b02602bfb8f74dcd862337182fd4d1a4e \ - --hash=sha256:9b74db26dfea4f4e50d48a4602207cd1e78be33182bc9cbf22da94f332f99878 \ - --hash=sha256:9bb2a28101a443669a423b665939381084412b81c3f8c0fcfbac57f4e30b5b8e \ - --hash=sha256:9d44d7aa963820b1b971dbecd90bfe5fe8f81cff79787eb6cca15750bd2f79b9 \ - --hash=sha256:9dacc2ad679b292709e0f5fc1ac74a6d4d5562e424058962c7bb0c658ad25e45 \ - --hash=sha256:9ddb4f4a5479f2539644be484da179b653273bca1a323947d48ab107b3ed1f29 \ - --hash=sha256:a1a6d79a14e1ec1832cabc833898636ad5f3754a678ef8bb4908515208bf84f4 \ - --hash=sha256:a698e363641b98843c517817db75373c83254781426e94ada3197cabbc2c919c \ - --hash=sha256:ad14385487393e386e2ea988b09d62dd42c397662ac2dabc3832d71253eee479 \ - --hash=sha256:ad146744ca4fd09b50c482650e3c1b1f4dfa1d4792e0a04a369c7f23336f0400 \ - --hash=sha256:b5db73ba3c41c7008037fa731ad5459fc3944cb7452fc0aa9f822ad3533c583c \ - --hash=sha256:bd3a2fbc1c6cccb3c5106140d87cc6a8715110373ef42b63cf5aea29df8c217a \ - --hash=sha256:bdba0a6b8812e8c7df002d908a9a2ea3c36e92611b5708633c50869e6d922fdf \ - --hash=sha256:be3d4bbad9d4b037791794ddeedd7d64a56f5933a2c1373e18e9e568b9141686 \ - --hash=sha256:bf69236a9a81bdca3bff53796237aab096cdbf8d78a66ad61e992d9dac7eb2de \ - --hash=sha256:bff95879c33ec8da99fc9b6fe345ddb5be6414b41d6d1ad1c8f188d26f36e028 \ - --hash=sha256:c555b48be1853fe3997c11c4bd521cdd9a9612352de01fa4508f16ec341e6fe0 \ - --hash=sha256:c81f6515c4c40141f83f502b07bbfa5c240ba25bbe73da7b33f1e5b6120ff179 \ - --hash=sha256:c9136ff29c3a91e25b1d1552b5308e53a1e0653a23e53b6366d7c2dcbbaf8a16 \ - --hash=sha256:ce1998c0483007608c8382f4ff50164bfc5bd07a2246dd272aa4043b75e61e85 \ - --hash=sha256:cec2d83125531bd153175354055cdb7a09987af08a9430bd173c937c6d0fba2a \ - --hash=sha256:cff784eef7f0b8f6cb28804fbddcfa99f89efe4cc35fb5627e3ac58f91ed3ac0 \ - --hash=sha256:d2c87e0c473a10bffe991502eac389220533024c8082ec1ce849f4218dded810 \ - --hash=sha256:d7cfad2d6d81dd298ab6b89fe72c3b7b05ec7544bdda3b707ddaecff8d25c161 \ - --hash=sha256:d8a7a2049c14f413163e2bdabd37e41179b1d1ccb10ffc6ccc4b7a718429c607 \ - --hash=sha256:da305e9937617ee95c2e39d8ff9f040e0487cbf1ac174f777ed5eddd7a7c1f26 \ - --hash=sha256:da86cdcf10d2519e10cabb8ac2de03da1bcb6e4853790b7fbd48523332e3a819 \ - --hash=sha256:dc022073d063b25a402454e5712ef9e007113e3a676b96c5f29b2bda29352f40 \ - --hash=sha256:e0723d2c96324561b9aa76fb982406e11d93cdb388a7a7da2b16e04719cf7ca5 \ - --hash=sha256:e092b9499de38ae0fbfbc603a74660eb6ff3e869e507b50d85a13b6db9863e15 \ - --hash=sha256:e0b216a19534b2427cc201a26c25da4a48633f29a487c61258643e89d28200c0 \ - --hash=sha256:e1c85e0b6c05c592ea6d8768a66a254bfb3874b53774b12d4c89c481eb78cb90 \ - --hash=sha256:e301d30dd7e95ae068671d746ba8c34e945a82682e62918e41b2679acd2051a0 \ - --hash=sha256:e808af52a0513762df4d945ea164a24b37f2f518cbe97e03deaa0ee66139b4d6 \ - --hash=sha256:eb07647a5738b89baab047f14edd18ded523de60f3b30e75c2acc826f79c839a \ - --hash=sha256:eb7fdf1ef130660e7415e0253a01a7d5a88c9c4d158bcf75cbbd922fd65a5b58 \ - --hash=sha256:ec10e2a42b41c923c2209b846126c6582db5e43a33157e9870ba9fb70dc7854b \ - --hash=sha256:ee2aa19e03161671ec964004fb74b2257805d9710bf14a5c704558b9d8dbaf17 \ - --hash=sha256:f08fd75c50a760c7eb068ae823777268daaf16a80b918fa58eea888f8e3919f5 \ - --hash=sha256:f4cd16206ad171cbc2470dbea9103cf9a7607d5fe8c242fdf1edf36174020664 \ - --hash=sha256:f70c9ab2595c56f81a89620e22899eea8b212a4041bd728ac6f4a28bf5d3ddd0 \ - --hash=sha256:fbabfaceaeb587e16f7008f7795cd80d20ec548dc7f94fbb0d4ec2e038ce563f +coverage==7.14.0 \ + --hash=sha256:057a6af2f160a85384cde4ab36f0d2777bae1057bae255f95413cdd382aa5c74 \ + --hash=sha256:0773d8329cf32b6fd222e4b52622c61fe8d503eb966cfc8d3c3c10c96266d50e \ + --hash=sha256:0a951308cde22cf77f953955a754d04dccb57fe3bb8e345d685778ed9fc1632a \ + --hash=sha256:0c451757d3fa2603354fdc789b5e58a0e327a117c370a40e3476ba4eabab228c \ + --hash=sha256:0f162bc9a15b82d947b02651b0c7e1609d6f7a8735ca330cfadec8481dd97d5a \ + --hash=sha256:15228a6800ce7bdf1b74800595e56db7138cecb338fdbf044806e10dcf182dfe \ + --hash=sha256:1733198802d71ec4c524f322e2867ee05c62e9e75df86bdca545407a221827d1 \ + --hash=sha256:1a0abc7342ea9711c469dd8b821c6c311e6bc6aac1442e5fbd6b27fae0a8f3db \ + --hash=sha256:1b23b0c6f0b1db6ad769b7050c8b641c0bf215ded26c1816955b17b7f26edfa9 \ + --hash=sha256:1c9ed6ef99f88fb8c14aa8e2bf8eb0fe55fa2edfea68f8675d78741df1a5ac0e \ + --hash=sha256:22a7e06a5f11a757cdfe79018e9095f9f69ae283c5cd8123774c788deec8717b \ + --hash=sha256:23b81107f46d3f21d0cbce30664fcec0f5d9f585638a67081750f99738f6bf66 \ + --hash=sha256:29943e552fdc08e082eb51400fb2f58e118a83b5542bd06531214e084399b644 \ + --hash=sha256:29fe3da551dface75deb2ccbf87b6b66e2e7ef38f6d89050b428be94afff3490 \ + --hash=sha256:2fb73254ff43c911c967a899e1359bc5049b4b115d6e8fbdde4937d0a2246cd5 \ + --hash=sha256:3485a836550b303d006d57cc06e3d5afaabc642c77050b7c985a97b13e3776b8 \ + --hash=sha256:362cb78e01a5dc82009d88004cf60f2e6b6d6fcbfdec05b05af73b0abf40118f \ + --hash=sha256:3a5d8e876dfa2f102e970b183863d6dedd023d3c0eeca1fe7a9787bc5f28b212 \ + --hash=sha256:3e7e88110bae996d199d1693ca8ec3fd52441d426401ae963437598667b4c5eb \ + --hash=sha256:3f5549365af25d770e06b1f8f5682d9a5637d06eb494db91c6fa75d3950cc917 \ + --hash=sha256:3fd43f0616e765ab78d069cf8358def7363957a45cee446d65c502dcfeea7893 \ + --hash=sha256:454a380af72c6adada298ed270d38c7a391288198dbfb8467f786f588751a90c \ + --hash=sha256:45899ec2138a4346ed34d601dedf5076fb74edf2d1dd9dc76a78e82397edee90 \ + --hash=sha256:45e0f79d8351fa76e256716df91eab12890d32678b9590df7ae1042e4bd4cf5d \ + --hash=sha256:49c005cba1e2f9677fb2845dcdf9a2e72a52a17d63e8231aaaae35d9f50215ef \ + --hash=sha256:4b899594a8b2d81e5cc064a0d7f9cac2081fed91049456cae7676787e41549c9 \ + --hash=sha256:55d3089079ce181a4566b1065ab28d2575eb76d8ac8f81f4fcda2bf037fee087 \ + --hash=sha256:5904abf7e18cddc463219b17552229650c6b79e061d31a1059283051169cf7d5 \ + --hash=sha256:5ac83957a80d0701310e96d8bec68cdcf4f90a7674b7d13f15a344315b41ab27 \ + --hash=sha256:5d4a51aad8ba8bdcd2b8bd8f03d4aca19693fa2327a3470e4718a25b03481020 \ + --hash=sha256:5ebb8f4614a3787d567e610bbfdf96a4798dd69a1afb1bd8ad228d4111fe6ff3 \ + --hash=sha256:63df0fe568e698e1045792399f8ab6da3a6c2dce3182813fb92afa2641087b47 \ + --hash=sha256:65c86fb646d2bd2972e96bd1a8b45817ed907cee68655d6295fe7ec031d04cca \ + --hash=sha256:65f267ca1370726ec2c1aa38bbe4df9a71a740f22878d2d4bf59d71a4cd8d323 \ + --hash=sha256:664123feb0929d7affc135717dbd70d61d98688a08ab1e5ba464739620c6252d \ + --hash=sha256:668b92e6958c4db7cf92e81caac328dfbbdbb215db2850ad28f0cbe1eea0bfbd \ + --hash=sha256:68af363c07ecd8d4b7d4043d85cb376d7d227eceb54e5323ee45da73dbd3e426 \ + --hash=sha256:6a6516b02a6101398e19a3f44820f69bab2590697f7def4331f668b14adaf828 \ + --hash=sha256:6a78e2a9d9c5e3b8d4ab9b9d28c985ea66fced0a7d7c2aec1f216e03a2011480 \ + --hash=sha256:6b9bf47223dd8db3d4c4b2e443b02bace480d428f0822c3f991600448a176c97 \ + --hash=sha256:6d160217ec6fe890f16ad3a9531761589443749e448f91986c972714fad361c8 \ + --hash=sha256:6e57054a583da8ac55edf24117ea4c9133032cfc4cf72aa2d48c1e5d4b52f899 \ + --hash=sha256:70390b0da32cb90b501953716302906e8bcce087cb283e70d8c97729f22e92b2 \ + --hash=sha256:72a305291fa8ee01332f1aaf38b348ca34097f6aa0b0ef627eef2837e57bbba5 \ + --hash=sha256:731dc15b385ac52289743d476245b61e1a2927e803bef655b52bc3b2a75a21f3 \ + --hash=sha256:731e535b1498b27d13594a0527a79b0510867b0ad891532be41cb883f2128e20 \ + --hash=sha256:7333cd944ee4393b9b3d3c1b598c936d4fc8d70573a4c7dacfec5590dd50e436 \ + --hash=sha256:741f57cddc9004a8c81b084660215f33a6b597dbe62c31386b983ee26310e327 \ + --hash=sha256:742a73ea621953b012f2c4c2219b512180dd84489acf5b1596b0aafc55b9100b \ + --hash=sha256:7b2bb6c9d7e769360d0f20a0f219603fd64f0c8f97de17ab25853261602be0fb \ + --hash=sha256:7b79d646cf46d5cf9a9f40281d4441df5849e445726e369006d2b117710b33fe \ + --hash=sha256:7bf43e000d24012599b879791cff41589af90674722421ef11b11a5431920bab \ + --hash=sha256:7c843572c605ab51cfdb5c6b5f2586e2a8467c0d28eca4bdef4ec70c5fecbd82 \ + --hash=sha256:7ebb1c6df9f78046a1b1e0a89674cd4bf73b7c648914eebcf976a57fd99a5627 \ + --hash=sha256:7ffd19fc8aed057fd686a17a4935eef5f9859d69208f96310e893e64b9b6ccf5 \ + --hash=sha256:8231ade007f37959fbf58acc677f26b922c02eda6f0428ea307da0fd39681bf3 \ + --hash=sha256:827d6397dbd95144939b18f89edf31f63e1f99633e8d5f32f22ba8bdda567477 \ + --hash=sha256:829994cfe1aeb773ca27bf246d4badc1e764893e3bfb98fff820fcecd1ca4662 \ + --hash=sha256:84c32d90bf4537f0e7b4dec9aaa9a938fb8205136b9d2ecf4d7629d5262dc075 \ + --hash=sha256:8767486808c436f05b23ab98eb963fb29185e32a9357a166971685cb3459900f \ + --hash=sha256:8de5b61163aee3d05c8a2beab6f47913df7981dad1baf82c414d99158c286ab1 \ + --hash=sha256:90c1a51bcfddf645b3bb7ec333d9e94393a8e94f55642380fa8a9a5a9e636cb7 \ + --hash=sha256:9117377b823daa28aa8635fbb08cda1cd6be3d7143257345459559aeef852d52 \ + --hash=sha256:91b993743d959b8be85b4abf9d5478216a69329c321efe5be0433c1a841d691d \ + --hash=sha256:92af52828e7f29d827346b0294e5a0853fa206db77db0395b282918d41e28db9 \ + --hash=sha256:9336e23e8bb3a3925398261385e2a1533957d3e760e91070dcb0e98bfa514eed \ + --hash=sha256:953f521ca9445300397e65fda3dca58b2dbd68fee983777420b57ac3c77e9f90 \ + --hash=sha256:98af83fd65ae24b1fdd03aaead967a9f523bcd2f1aab2d4f3ffda65bb568a6f1 \ + --hash=sha256:9aed9fa983514ca032790f3fe0d1c0e42ca7e16b42432af1706b50a9a46bef5d \ + --hash=sha256:9cd1169b2230f9cbe9c638ba38022ed7a2b1e641cc07f7cea0365e4be2a74980 \ + --hash=sha256:9d1aa57a1dc8e05bdc42e81c5d671d849577aeedf279f4c449d6d286f9ed88ca \ + --hash=sha256:9d26ac7f5398bafc5b57421ad994e8a4749e8a7a0e62d05ec7d53014d5963bfa \ + --hash=sha256:9f323af3e1e4f68b60b7b247e37b8515563a61375518fa59de1af48ba28a3db6 \ + --hash=sha256:9fbd898551762dea00d3fef2b1c4f99afd2c6a3ff952ea07d60a9bd5ed4f34bc \ + --hash=sha256:a1816c505187592dcd1c5a5f226601a549f70365fbd00930ac88b0c225b76bb4 \ + --hash=sha256:a2bd259c442cd43c49b30fbafc51776eb19ea396faf159d26a83e6a0a5f13b0c \ + --hash=sha256:a3b5ddfd6aa7ddad53ee3edb231e88a2151507a43229b7d71b953916deca127d \ + --hash=sha256:a706b908dfa85538863504c624b237a3cc34232bf403c057414ebfdb3b4d9f84 \ + --hash=sha256:a841fae2fadcae4f438d43b6ccc4aac2ad609f47cdb6cfdce60cbb3fe5ca7bc2 \ + --hash=sha256:a93bac2cb577ef60074999ed56d8a1535894398e2ed920d4185c3ec0c8864742 \ + --hash=sha256:a9f864ef57b7172e2db87a096642dd51e179e085ab6b2c371c29e885f65c8fb2 \ + --hash=sha256:acebd068fca5512c3a6fde9c045f901613478781a73f0e82b307b214daef23fb \ + --hash=sha256:b34ece8065914f938ed7f2c5872bb865336977a52919149846eac3744327267a \ + --hash=sha256:b4cc4fce8672fffcb09b0eafc167b396b3ba53c4a7230f54b7aaffbf6c835fa9 \ + --hash=sha256:b4e26a0f1b696faf283bffe5b8569e44e336c582439df5d53281ab89ee0cba96 \ + --hash=sha256:b4f07cf7edcb7ec39431a5074d7ea83b29a9f71fcfc494f0f40af4e65180420f \ + --hash=sha256:b812eb847b19876ebf33fb6c4f11819af05ab6050b0bfa1bc53412ae81779adb \ + --hash=sha256:ba3b8390db29296dbbf49e91b6fe08f990743a90c8f447ba4c2ffc29670dfa63 \ + --hash=sha256:bcb2e855b87321259a037429288ae85216d191c74de3e79bf57cd2bc0761992c \ + --hash=sha256:bfb0ed8ec5d25e93face268115d7964db9df8b9aae8edcde9ec6b16c726a7cc1 \ + --hash=sha256:c7492f2d493b976941c7ca050f273cbda2f43c381124f7586a3e3c16d1804fec \ + --hash=sha256:c79d2319cabef1fe8e86df73371126931550804738f78ad7d31e3aad85a67367 \ + --hash=sha256:c83d2399a51bbec8429266905d33616f04bc5726b1138c35844d5fcd896b2e20 \ + --hash=sha256:ca3d9cf2c32b521bd9518385608787fa86f38daf993695307531822c3430ed67 \ + --hash=sha256:cc3499459bbcdd51a65b64c35ab7ed2764eaf3cba826e0df3f1d7fe2e102b70b \ + --hash=sha256:d128b1bba9361fbaaf6a19e179e6cfd6a9103ce0c0555876f72780acc93efd85 \ + --hash=sha256:d1bb3543b58fea74d2cd1abc4054cc927e4724687cb4560cd2ed88d2c7d820c0 \ + --hash=sha256:d8b013632cc1ce1d09dbe4f32667b4d320ec2f54fc326ebeffcd0b0bcc2bb6c4 \ + --hash=sha256:d8e1762f0e9cbc26ec315471e7b47855218e833cd5a032d706fbf43845d878c7 \ + --hash=sha256:d9c8ef6ed820c433de075657d72dda1f89a2984955e58b8a75feb3f184250218 \ + --hash=sha256:dc38367eaa2abb1b766ac333142bce7655335a73537f5c8b75aaa89c2b987757 \ + --hash=sha256:f2bbb8254370eb4c628ff3d6fa8a7f74ddc40565394d4f7ab791d1fe568e37ef \ + --hash=sha256:f580f8c80acd94ac72e863efe2cab791d8c38d153e0b463b92dfa000d5c84cd1 \ + --hash=sha256:fab3877e4ebb06bd9d4d4d00ee53309ee5478e66873c66a382272e3ee33eb7ea \ + --hash=sha256:fb609b3658479e33f9516d46f1a89dbb9b6c261366e3a11844a96ec487533dae \ + --hash=sha256:fcaba850dd317c65423a9d63d88f9573c53b00354d6dd95724576cc98a131595 # via pytest-cov iniconfig==2.3.0 \ --hash=sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730 \ @@ -130,9 +130,9 @@ json5==0.14.0 \ --hash=sha256:56cf861bab076b1178eb8c92e1311d273a9b9acea2ccc82c276abf839ebaef3a \ --hash=sha256:b3f492fad9f6cdbced8b7d40b28b9b1c9701c5f561bef0d33b81c2ff433fefcb # via specify-cli (pyproject.toml) -markdown-it-py==4.0.0 \ - --hash=sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147 \ - --hash=sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3 +markdown-it-py==4.2.0 \ + --hash=sha256:04a21681d6fbb623de53f6f364d352309d4094dd4194040a10fd51833e418d49 \ + --hash=sha256:9f7ebbcd14fe59494226453aed97c1070d83f8d24b6fc3a3bcf9a38092641c4a # via rich mdurl==0.1.2 \ --hash=sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8 \ diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml index 1c1c626bab..467c6989d8 100644 --- a/.github/workflows/security.yml +++ b/.github/workflows/security.yml @@ -37,7 +37,7 @@ jobs: - name: Compile scheduled audit requirements if: ${{ github.event_name == 'schedule' }} run: | - uv pip compile pyproject.toml --extra test --python-version "${{ matrix.python-version }}" --generate-hashes --quiet --output-file "${{ runner.temp }}/spec-kit-audit-requirements.txt" + uv pip compile pyproject.toml --extra test --python-version "${{ matrix.python-version }}" --upgrade --generate-hashes --quiet --output-file "${{ runner.temp }}/spec-kit-audit-requirements.txt" - name: Run pip-audit (scheduled live resolution) if: ${{ github.event_name == 'schedule' }} @@ -77,9 +77,9 @@ jobs: - name: Run Bandit run: uvx --from bandit==1.9.4 bandit -r src -lll --baseline .github/bandit-baseline.json - # Informative: MEDIUM severity, using the SAME baseline so the - # accepted HIGH finding doesn't re-fire here. Surfaces new MEDIUM-or- - # above findings in the job summary without breaking CI. + # Informative: MEDIUM severity, using the SAME baseline so accepted + # findings do not re-fire here. Surfaces new MEDIUM-or-above findings + # in the job summary without breaking CI. - name: Run Bandit medium-severity informational pass id: bandit-medium continue-on-error: true diff --git a/.secrets.baseline b/.secrets.baseline index 42f94920b0..524003da3c 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -90,6 +90,10 @@ { "path": "detect_secrets.filters.allowlist.is_line_allowlisted" }, + { + "path": "detect_secrets.filters.common.is_baseline_file", + "filename": ".secrets.baseline" + }, { "path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies", "min_level": 2 @@ -146,7 +150,7 @@ "filename": ".github/workflows/security.yml", "hashed_secret": "4202a5e0d1da60251e0163e869ae02016bb68767", "is_verified": false, - "line_number": 120 + "line_number": 163 } ], "docs/reference/authentication.md": [ @@ -202,5 +206,5 @@ } ] }, - "generated_at": "2026-05-15T06:22:08Z" + "generated_at": "2026-05-16T06:38:49Z" } diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 910e7242e8..5223cfaa51 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -91,7 +91,7 @@ uvx --from bandit==1.9.4 bandit -r src -lll --baseline .github/bandit-baseline.j Run these before changing dependency metadata, workflow execution code, subprocess usage, or security-sensitive paths. Pull request, push, and manual CI audits use the committed hashed requirements file so they stay deterministic. The scheduled CI audit also resolves the runtime and `test` extra dependency set across the supported Python and OS matrix to catch newly published advisories. If dependency metadata changes, refresh the committed audit input before running pip-audit: ```bash -uv pip compile pyproject.toml --extra test --universal --generate-hashes --quiet --no-header --output-file .github/security-audit-requirements.txt +uv pip compile pyproject.toml --extra test --universal --upgrade --generate-hashes --quiet --no-header --output-file .github/security-audit-requirements.txt ``` Upstream package releases drift over time, so even an unrelated PR touching `pyproject.toml` can fail the `dependency-audit` check until the committed file is regenerated with the command above and re-committed. @@ -117,7 +117,7 @@ Audit the new entries before committing — a leaked credential must never be me #### Bandit baseline -The CI `static-analysis` job runs Bandit with `--baseline .github/bandit-baseline.json` (HIGH severity, blocking) plus a second informational pass at MEDIUM severity sharing the same baseline (`continue-on-error`, surfaced in the job summary). If a HIGH finding is intentional, audit it carefully, document the rationale next to the code (regular comment — **not** `# nosec`; see below), and append the entry to `.github/bandit-baseline.json`. Growing the baseline is gated: the `check_bandit_baseline.py` script fails the PR unless it carries the `security-baseline-change` label, so reviewers see the whitelist expansion. +The CI `static-analysis` job runs Bandit with `--baseline .github/bandit-baseline.json` (HIGH severity, blocking) plus a second informational pass at MEDIUM severity sharing the same baseline (`continue-on-error`, surfaced in the job summary). If a finding is intentional, audit it carefully, document the rationale next to the code (regular comment — **not** `# nosec`; see below), and append the entry to `.github/bandit-baseline.json`. Growing the baseline is gated: the `check_bandit_baseline.py` script fails the PR unless it carries the `security-baseline-change` label, so reviewers see the whitelist expansion. > **Do not use `# nosec` in `src/`.** The `test_bandit_nosec_is_not_suppressed_in_source` regression test fails any PR that adds one. The supported suppression paths are (a) the bandit baseline (covered above) for HIGH findings, and (b) `# noqa: S6xx` with an inline justification for ruff's subprocess-shell rules (`S602/S604/S605`). Both are visible in review; `# nosec` hides the finding without trace. diff --git a/tests/test_security_workflow.py b/tests/test_security_workflow.py index 4897d59bf1..c5fcdee425 100644 --- a/tests/test_security_workflow.py +++ b/tests/test_security_workflow.py @@ -25,15 +25,15 @@ COMMITTED_AUDIT_REQUIREMENTS = ".github/security-audit-requirements.txt" WORKFLOW_COMPILE_SCHEDULED_TEST_EXTRA_DEPS = ( "uv pip compile pyproject.toml --extra test " - '--python-version "${{ matrix.python-version }}" --generate-hashes --quiet ' + '--python-version "${{ matrix.python-version }}" --upgrade --generate-hashes --quiet ' f"--output-file {WORKFLOW_LIVE_AUDIT_REQUIREMENTS}" ) LOCAL_REFRESH_TEST_EXTRA_DEPS = ( - "uv pip compile pyproject.toml --extra test --universal --generate-hashes " + "uv pip compile pyproject.toml --extra test --universal --upgrade --generate-hashes " f"--quiet --no-header --output-file {COMMITTED_AUDIT_REQUIREMENTS}" ) WORKFLOW_SYNC_COMPILE_TEST_EXTRA_DEPS = ( - "uv pip compile pyproject.toml --extra test --universal --generate-hashes " + "uv pip compile pyproject.toml --extra test --universal --upgrade --generate-hashes " "--quiet --no-header --output-file" ) WORKFLOW_SYNC_SCRIPT = "python .github/scripts/check_security_requirements.py" @@ -223,16 +223,20 @@ def test_bandit_does_not_globally_skip_b602(self): assert "--skip" not in run assert "--skip B602" not in workflow_text - def test_bandit_baseline_only_ignores_shell_step_b602(self): + def test_bandit_baseline_tracks_only_accepted_findings(self): baseline = json.loads(BANDIT_BASELINE.read_text(encoding="utf-8")) results = baseline["results"] - assert len(results) == 1 - assert results[0]["test_id"] == "B602" - assert ( - results[0]["filename"] - == "src/specify_cli/workflows/steps/shell/__init__.py" - ) + assert { + (result["filename"], result["line_number"], result["test_id"]) + for result in results + } == { + ("src/specify_cli/_github_http.py", 104, "B310"), + ("src/specify_cli/authentication/azure_devops.py", 114, "B310"), + ("src/specify_cli/authentication/http.py", 171, "B310"), + ("src/specify_cli/workflows/steps/shell/__init__.py", 35, "B602"), + } + assert {result["issue_severity"] for result in results} == {"MEDIUM", "HIGH"} def test_bandit_nosec_is_not_suppressed_in_source(self): nosec_lines = [] From a2e53c26d324097246683985a5ee3b5364c3b4f6 Mon Sep 17 00:00:00 2001 From: Pascal Date: Thu, 21 May 2026 15:27:47 +0200 Subject: [PATCH 19/30] fix: address copilot security review follow-up --- src/specify_cli/_github_http.py | 4 ++- src/specify_cli/_utils.py | 18 +++++++++++-- tests/test_github_http.py | 45 ++++++++++++++++++++++++++++++++- tests/test_security_workflow.py | 7 +++-- 4 files changed, 68 insertions(+), 6 deletions(-) diff --git a/src/specify_cli/_github_http.py b/src/specify_cli/_github_http.py index 23cc747823..70b5bf4290 100644 --- a/src/specify_cli/_github_http.py +++ b/src/specify_cli/_github_http.py @@ -75,7 +75,9 @@ def redirect_request(self, req, fp, code, msg, headers, newurl): f"Refusing unsafe redirect to non-HTTPS URL: {newurl}" ) - original_auth = req.get_header("Authorization") + original_auth = req.get_header("Authorization") or req.unredirected_hdrs.get( + "Authorization" + ) new_req = super().redirect_request(req, fp, code, msg, headers, newurl) if new_req is not None: hostname = (urlparse(newurl).hostname or "").lower() diff --git a/src/specify_cli/_utils.py b/src/specify_cli/_utils.py index 5271cfcb3a..cae681d97f 100644 --- a/src/specify_cli/_utils.py +++ b/src/specify_cli/_utils.py @@ -16,8 +16,22 @@ CLAUDE_NPM_LOCAL_PATH = Path.home() / ".claude" / "local" / "node_modules" / ".bin" / "claude" -def run_command(cmd: list[str], check_return: bool = True, capture: bool = False) -> str | None: - """Run a command without invoking a shell and optionally capture output.""" +def run_command( + cmd: list[str], + check_return: bool = True, + capture: bool = False, + shell: bool = False, +) -> str | None: + """Run a command without invoking a shell and optionally capture output. + + ``shell`` remains accepted for public API compatibility, but shell + execution is intentionally unsupported. + """ + if shell: + raise ValueError( + "run_command() does not support shell=True; pass argv as a list" + ) + try: if capture: result = subprocess.run( diff --git a/tests/test_github_http.py b/tests/test_github_http.py index f414aeeb2b..a5d404a8be 100644 --- a/tests/test_github_http.py +++ b/tests/test_github_http.py @@ -1,11 +1,14 @@ """Tests for GitHub-authenticated HTTP request helpers.""" +import io import os from unittest.mock import patch +from urllib.request import Request import pytest from specify_cli._github_http import ( + _StripAuthOnRedirect, build_github_request, ) @@ -76,4 +79,44 @@ def test_no_auth_header_when_no_token(self): def test_missing_hostname_raises_value_error(self): """build_github_request() must reject URLs with valid scheme but no hostname.""" with pytest.raises(ValueError, match="url must include a hostname"): - build_github_request("http://") \ No newline at end of file + build_github_request("http://") + + +class TestGitHubRedirectAuth: + """Tests for GitHub-owned redirect auth handling.""" + + def test_multi_hop_github_redirect_preserves_unredirected_auth(self): + """Auth survives a multi-hop redirect chain within GitHub hosts.""" + handler = _StripAuthOnRedirect() + req1 = Request( + "https://github.com/org/repo", + headers={"Authorization": "Bearer tok"}, + ) + + req2 = handler.redirect_request( + req1, + io.BytesIO(b""), + 302, + "Found", + {}, + "https://codeload.github.com/org/repo/zip", + ) + assert req2 is not None + auth2 = req2.get_header("Authorization") or req2.unredirected_hdrs.get( + "Authorization" + ) + assert auth2 == "Bearer tok" + + req3 = handler.redirect_request( + req2, + io.BytesIO(b""), + 302, + "Found", + {}, + "https://raw.githubusercontent.com/org/repo/main/file", + ) + assert req3 is not None + auth3 = req3.get_header("Authorization") or req3.unredirected_hdrs.get( + "Authorization" + ) + assert auth3 == "Bearer tok" diff --git a/tests/test_security_workflow.py b/tests/test_security_workflow.py index c5fcdee425..7105ee7942 100644 --- a/tests/test_security_workflow.py +++ b/tests/test_security_workflow.py @@ -9,6 +9,7 @@ import subprocess from pathlib import Path +import pytest import yaml @@ -250,10 +251,12 @@ def test_bandit_nosec_is_not_suppressed_in_source(self): assert nosec_lines == [] - def test_run_command_does_not_accept_shell_argument(self): + def test_run_command_rejects_shell_execution_compatibly(self): from specify_cli import run_command - assert "shell" not in inspect.signature(run_command).parameters + assert inspect.signature(run_command).parameters["shell"].default is False + with pytest.raises(ValueError, match="does not support shell=True"): + run_command(["echo", "blocked"], shell=True) # noqa: S604 def test_committed_audit_requirements_are_hashed(self): requirements = SECURITY_REQUIREMENTS.read_text(encoding="utf-8") From 2957b839f258d3687ddf049d1ed2641e5122f498 Mon Sep 17 00:00:00 2001 From: Pascal Date: Wed, 27 May 2026 23:59:54 +0200 Subject: [PATCH 20/30] fix: wrap unsafe zip extraction errors --- src/specify_cli/_download_security.py | 81 ++++++++++++++++++++------- tests/test_download_security.py | 52 +++++++++++++++++ 2 files changed, 112 insertions(+), 21 deletions(-) diff --git a/src/specify_cli/_download_security.py b/src/specify_cli/_download_security.py index 6706ec5237..91314e4a0c 100644 --- a/src/specify_cli/_download_security.py +++ b/src/specify_cli/_download_security.py @@ -24,6 +24,10 @@ def _raise(error_type: type[ErrorT], message: str) -> None: raise error_type(message) +def _raise_from(error_type: type[ErrorT], message: str, exc: Exception) -> None: + raise error_type(message) from exc + + def read_response_limited( response, *, @@ -80,12 +84,15 @@ def _safe_zip_name(name: str, *, error_type: type[ErrorT]) -> str: normalized = name.replace("\\", "/") path = PurePosixPath(normalized) + raw_parts = normalized.split("/") + if raw_parts and raw_parts[-1] == "": + raw_parts = raw_parts[:-1] has_windows_drive = re.match(r"^[A-Za-z]:", normalized) is not None if ( - not path.parts + not raw_parts or path.is_absolute() or has_windows_drive - or any(part == ".." for part in path.parts) + or any(part in {"", ".", ".."} for part in raw_parts) ): _raise( error_type, @@ -104,10 +111,21 @@ def safe_extract_zip( max_total_bytes: int = MAX_ZIP_TOTAL_BYTES, ) -> None: """Extract a ZIP archive after path, symlink, and size validation.""" - target_root = target_dir.resolve() - - with zipfile.ZipFile(zip_path, "r") as zf: - members = zf.infolist() + try: + target_root = target_dir.resolve() + except OSError as exc: + _raise_from(error_type, f"Invalid ZIP extraction target: {target_dir}", exc) + + try: + zf = zipfile.ZipFile(zip_path, "r") + except (OSError, zipfile.BadZipFile) as exc: + _raise_from(error_type, f"Invalid ZIP archive: {zip_path}", exc) + + with zf: + try: + members = zf.infolist() + except zipfile.BadZipFile as exc: + _raise_from(error_type, f"Invalid ZIP archive: {zip_path}", exc) if len(members) > max_entries: _raise( error_type, @@ -153,21 +171,42 @@ def safe_extract_zip( for member, normalized_name in normalized_members: member_path = target_dir / normalized_name if member.is_dir(): - member_path.mkdir(parents=True, exist_ok=True) + try: + member_path.mkdir(parents=True, exist_ok=True) + except OSError as exc: + _raise_from( + error_type, + f"Failed to create ZIP directory {member.filename}: {exc}", + exc, + ) continue - member_path.parent.mkdir(parents=True, exist_ok=True) + try: + member_path.parent.mkdir(parents=True, exist_ok=True) + except OSError as exc: + _raise_from( + error_type, + f"Failed to create parent directory for ZIP member {member.filename}: {exc}", + exc, + ) written = 0 - with zf.open(member, "r") as source, member_path.open("wb") as dest: - while True: - chunk = source.read(READ_CHUNK_SIZE) - if not chunk: - break - written += len(chunk) - if written > max_member_bytes: - _raise( - error_type, - f"ZIP member {member.filename} exceeds maximum size " - f"of {max_member_bytes} bytes", - ) - dest.write(chunk) + try: + with zf.open(member, "r") as source, member_path.open("wb") as dest: + while True: + chunk = source.read(READ_CHUNK_SIZE) + if not chunk: + break + written += len(chunk) + if written > max_member_bytes: + _raise( + error_type, + f"ZIP member {member.filename} exceeds maximum size " + f"of {max_member_bytes} bytes", + ) + dest.write(chunk) + except (OSError, zipfile.BadZipFile, RuntimeError) as exc: + _raise_from( + error_type, + f"Failed to extract ZIP member {member.filename}: {exc}", + exc, + ) diff --git a/tests/test_download_security.py b/tests/test_download_security.py index 6240af7484..40bf081fd1 100644 --- a/tests/test_download_security.py +++ b/tests/test_download_security.py @@ -32,6 +32,10 @@ def read(self, size: int = -1) -> bytes: return self.data if size < 0 else self.data[:size] +class _CustomZipError(ValueError): + pass + + def _constant_int(node: ast.AST) -> int | None: if isinstance(node, ast.Constant) and isinstance(node.value, int): return node.value @@ -126,6 +130,16 @@ def test_safe_extract_zip_rejects_traversal(tmp_path, member_name): safe_extract_zip(zip_path, tmp_path / "out") +@pytest.mark.parametrize("member_name", ["", ".", "./file.txt", "nested/./file.txt", "nested//file.txt"]) +def test_safe_extract_zip_rejects_dot_path_segments(tmp_path, member_name): + zip_path = tmp_path / "bad.zip" + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr(member_name, "nope") + + with pytest.raises(_CustomZipError, match="Unsafe path"): + safe_extract_zip(zip_path, tmp_path / "out", error_type=_CustomZipError) + + def test_safe_extract_zip_rejects_symlinks(tmp_path): zip_path = tmp_path / "bad.zip" info = zipfile.ZipInfo("link") @@ -167,6 +181,44 @@ def test_safe_extract_zip_rejects_total_uncompressed_size(tmp_path): safe_extract_zip(zip_path, tmp_path / "out", max_total_bytes=5) +def test_safe_extract_zip_wraps_bad_zip_file(tmp_path): + zip_path = tmp_path / "bad.zip" + zip_path.write_bytes(b"not a zip archive") + + with pytest.raises(_CustomZipError, match="Invalid ZIP archive"): + safe_extract_zip(zip_path, tmp_path / "out", error_type=_CustomZipError) + + +def test_safe_extract_zip_wraps_filesystem_errors(tmp_path): + zip_path = tmp_path / "ok.zip" + blocked_parent = tmp_path / "blocked" + blocked_parent.write_text("not a directory", encoding="utf-8") + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr("file.txt", "hello") + + with pytest.raises(_CustomZipError, match="Failed to create parent directory"): + safe_extract_zip( + zip_path, + blocked_parent / "out", + error_type=_CustomZipError, + ) + + +def test_safe_extract_zip_wraps_directory_filesystem_errors(tmp_path): + zip_path = tmp_path / "ok.zip" + blocked_parent = tmp_path / "blocked" + blocked_parent.write_text("not a directory", encoding="utf-8") + with zipfile.ZipFile(zip_path, "w") as zf: + zf.mkdir("dir") + + with pytest.raises(_CustomZipError, match="Failed to create ZIP directory"): + safe_extract_zip( + zip_path, + blocked_parent / "out", + error_type=_CustomZipError, + ) + + def test_safe_extract_zip_extracts_safe_archive(tmp_path): zip_path = tmp_path / "ok.zip" out_dir = tmp_path / "out" From 48988e190f5ffdfab6fdb32f523a4807f6086257 Mon Sep 17 00:00:00 2001 From: Pascal Date: Sat, 30 May 2026 00:12:34 +0200 Subject: [PATCH 21/30] fix: redact secrets baseline hash logs --- .github/scripts/check_secrets_baseline.py | 39 ++++++++++++----- tests/test_baseline_gates.py | 53 +++++++++++++++++++++++ 2 files changed, 82 insertions(+), 10 deletions(-) diff --git a/.github/scripts/check_secrets_baseline.py b/.github/scripts/check_secrets_baseline.py index a42626203d..33453cbf42 100644 --- a/.github/scripts/check_secrets_baseline.py +++ b/.github/scripts/check_secrets_baseline.py @@ -28,6 +28,7 @@ import os import subprocess import sys +from dataclasses import dataclass from pathlib import Path REPO_ROOT = Path(__file__).resolve().parents[2] @@ -35,6 +36,26 @@ ACK_LABEL = "secrets-baseline-change" +@dataclass(frozen=True, order=True) +class SecretIdentity: + """Comparison identity for one detect-secrets baseline entry.""" + + filename: str + line_number: str + secret_type: str + hashed_secret: str + + def log_safe(self) -> str: + return "|".join( + [ + self.filename, + self.line_number, + self.secret_type, + "hashed_secret=", + ] + ) + + def _read_baseline_at(ref: str) -> tuple[dict, bool]: """Return (baseline_json, file_existed_at_ref). Base side only.""" if not ref: @@ -77,9 +98,9 @@ def _read_baseline_from_worktree() -> tuple[dict, bool]: ) -def _identities(baseline: dict) -> set[str]: +def _identities(baseline: dict) -> set[SecretIdentity]: """Flatten detect-secrets results to a set of stable identities.""" - ids: set[str] = set() + ids: set[SecretIdentity] = set() results = baseline.get("results", {}) if not isinstance(results, dict): return ids @@ -90,13 +111,11 @@ def _identities(baseline: dict) -> set[str]: if not isinstance(entry, dict): continue ids.add( - "|".join( - [ - str(filename), - str(entry.get("line_number", "")), - str(entry.get("type", "")), - str(entry.get("hashed_secret", "")), - ] + SecretIdentity( + filename=str(filename), + line_number=str(entry.get("line_number", "")), + secret_type=str(entry.get("type", "")), + hashed_secret=str(entry.get("hashed_secret", "")), ) ) return ids @@ -160,7 +179,7 @@ def main() -> int: file=sys.stderr, ) for identity in sorted(new_ids): - print(f" + {identity}", file=sys.stderr) + print(f" + {identity.log_safe()}", file=sys.stderr) return 1 diff --git a/tests/test_baseline_gates.py b/tests/test_baseline_gates.py index 7037d1d008..42714b56e0 100644 --- a/tests/test_baseline_gates.py +++ b/tests/test_baseline_gates.py @@ -368,3 +368,56 @@ def test_whitespace_only_change_does_not_trip(self, gate: GateHandle): result = gate.run(base=base_sha) assert result.returncode == 0, result.stderr + + +class TestSecretsSpecific: + """Cases that only exist for the detect-secrets gate.""" + + @pytest.fixture + def gate(self, tmp_path) -> GateHandle: + repo = _init_repo(tmp_path) + _install_script(repo, SECRETS_SCRIPT) + return GateHandle(config=SECRETS_GATE, repo=repo) + + @staticmethod + def _baseline_with_hash(hashed_secret: str) -> dict: + return { + "version": "1.5.0", + "results": { + "app.py": [ + { + "type": "Secret Keyword", + "filename": "app.py", + "hashed_secret": hashed_secret, + "is_verified": False, + "line_number": 42, + } + ] + }, + } + + def test_same_location_secret_swap_fails_without_leaking_hash( + self, gate: GateHandle + ): + """The hash remains part of the gate identity, but not CI logs.""" + old_hash = "old-sensitive-hash" + new_hash = "new-sensitive-hash" + base_sha = _commit_baseline( + gate.repo, + gate.config.baseline_path, + self._baseline_with_hash(old_hash), + "base", + ) + _commit_baseline( + gate.repo, + gate.config.baseline_path, + self._baseline_with_hash(new_hash), + "secret swap", + ) + + result = gate.run(base=base_sha) + + assert result.returncode == 1, "hashed secret diff must catch swaps" + assert "app.py|42|Secret Keyword|hashed_secret=" in result.stderr + assert old_hash not in result.stderr + assert new_hash not in result.stderr From 5a15b77b3a5cae8c5aa6a8ba62df622e0d68c39f Mon Sep 17 00:00:00 2001 From: Pascal Date: Sat, 30 May 2026 00:15:04 +0200 Subject: [PATCH 22/30] fix: keep secrets baseline hashes out of repr --- .github/scripts/check_secrets_baseline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/scripts/check_secrets_baseline.py b/.github/scripts/check_secrets_baseline.py index 33453cbf42..2865bbde86 100644 --- a/.github/scripts/check_secrets_baseline.py +++ b/.github/scripts/check_secrets_baseline.py @@ -28,7 +28,7 @@ import os import subprocess import sys -from dataclasses import dataclass +from dataclasses import dataclass, field from pathlib import Path REPO_ROOT = Path(__file__).resolve().parents[2] @@ -43,7 +43,7 @@ class SecretIdentity: filename: str line_number: str secret_type: str - hashed_secret: str + hashed_secret: str = field(repr=False) def log_safe(self) -> str: return "|".join( From 68bad0cc6908fdc80cc2becece2837b3b9490d12 Mon Sep 17 00:00:00 2001 From: Pascal Date: Sat, 30 May 2026 16:58:39 +0200 Subject: [PATCH 23/30] fix: address Copilot review on bounded reads and redirect-safety - read_response_limited: read in a loop until EOF or one byte past the limit instead of a single read(max_bytes + 1). A server using chunked transfer encoding can return fewer bytes per read() than requested while streaming more than max_bytes total, defeating the single-read bound. Add regression tests for the short-read and within-limit paths. - _download_security: annotate _raise / _raise_from as NoReturn so type checkers treat call sites as unreachable. - Extract the duplicated is_https_or_localhost_http redirect-safety predicate into _download_security and import it from both _github_http and authentication/http so the rule lives in one place. - azure_devops: stop catching broad ValueError/KeyError around token acquisition; give the bounded read a dedicated _TokenResponseTooLarge type and catch only URLError, OSError, JSONDecodeError, and that type so unrelated programming errors still surface. - tests: make response mocks faithful streams (advancing cursor, b"" at EOF) so the bounded read loop terminates as it would against a real http.client.HTTPResponse. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/specify_cli/_download_security.py | 40 +++++++++++++++---- src/specify_cli/_github_http.py | 10 ++--- .../authentication/azure_devops.py | 12 +++++- src/specify_cli/authentication/http.py | 9 +---- .../integrations/test_integration_catalog.py | 24 ++++++++--- tests/test_authentication.py | 5 ++- tests/test_download_security.py | 29 +++++++++++++- tests/test_extensions.py | 11 ++--- tests/test_presets.py | 9 +++-- tests/test_upgrade.py | 6 ++- tests/test_workflows.py | 13 ++++-- 11 files changed, 123 insertions(+), 45 deletions(-) diff --git a/src/specify_cli/_download_security.py b/src/specify_cli/_download_security.py index 91314e4a0c..40cb687a8e 100644 --- a/src/specify_cli/_download_security.py +++ b/src/specify_cli/_download_security.py @@ -7,7 +7,8 @@ import stat import zipfile from pathlib import Path, PurePosixPath -from typing import TypeVar +from typing import NoReturn, TypeVar +from urllib.parse import urlparse ErrorT = TypeVar("ErrorT", bound=Exception) @@ -20,11 +21,22 @@ SHA256_RE = re.compile(r"^[0-9a-fA-F]{64}$") -def _raise(error_type: type[ErrorT], message: str) -> None: +def is_https_or_localhost_http(url: str) -> bool: + """Return True if *url* is HTTPS, or HTTP limited to loopback hosts. + + Shared redirect-safety predicate used by the GitHub and auth HTTP redirect + handlers so the rule (and any future tightening of it) lives in one place. + """ + parsed = urlparse(url) + is_localhost = parsed.hostname in ("localhost", "127.0.0.1", "::1") + return parsed.scheme == "https" or (parsed.scheme == "http" and is_localhost) + + +def _raise(error_type: type[ErrorT], message: str) -> NoReturn: raise error_type(message) -def _raise_from(error_type: type[ErrorT], message: str, exc: Exception) -> None: +def _raise_from(error_type: type[ErrorT], message: str, exc: Exception) -> NoReturn: raise error_type(message) from exc @@ -35,11 +47,25 @@ def read_response_limited( error_type: type[ErrorT] = ValueError, label: str = "download", ) -> bytes: - """Read at most *max_bytes* from a response object.""" - data = response.read(max_bytes + 1) - if len(data) > max_bytes: + """Read at most *max_bytes* from a response object. + + ``response.read(n)`` is only guaranteed to return *up to* ``n`` bytes and may + return fewer even when more data is pending (e.g. chunked transfer encoding), + so a single ``read(max_bytes + 1)`` cannot enforce the bound on its own. Read + in a loop until EOF or until one byte past the limit has been accumulated. + """ + chunks: list[bytes] = [] + total = 0 + limit = max_bytes + 1 + while total < limit: + chunk = response.read(min(READ_CHUNK_SIZE, limit - total)) + if not chunk: + break + chunks.append(chunk) + total += len(chunk) + if total > max_bytes: _raise(error_type, f"{label} exceeds maximum size of {max_bytes} bytes") - return data + return b"".join(chunks) def normalize_sha256(value: object, *, error_type: type[ErrorT] = ValueError) -> str | None: diff --git a/src/specify_cli/_github_http.py b/src/specify_cli/_github_http.py index 70b5bf4290..481e869aac 100644 --- a/src/specify_cli/_github_http.py +++ b/src/specify_cli/_github_http.py @@ -12,6 +12,8 @@ from typing import Dict from urllib.parse import urlparse +from specify_cli._download_security import is_https_or_localhost_http + # GitHub-owned hostnames that should receive the Authorization header. # Includes codeload.github.com because GitHub archive URL downloads # (e.g. /archive/refs/tags/.zip) redirect there and require auth @@ -55,12 +57,6 @@ def build_github_request(url: str) -> urllib.request.Request: return urllib.request.Request(url, headers=headers) -def _is_https_or_localhost_http(url: str) -> bool: - parsed = urlparse(url) - is_localhost = parsed.hostname in ("localhost", "127.0.0.1", "::1") - return parsed.scheme == "https" or (parsed.scheme == "http" and is_localhost) - - class _StripAuthOnRedirect(urllib.request.HTTPRedirectHandler): """Redirect handler that drops the Authorization header when leaving GitHub. @@ -70,7 +66,7 @@ class _StripAuthOnRedirect(urllib.request.HTTPRedirectHandler): """ def redirect_request(self, req, fp, code, msg, headers, newurl): - if not _is_https_or_localhost_http(newurl): + if not is_https_or_localhost_http(newurl): raise urllib.error.URLError( f"Refusing unsafe redirect to non-HTTPS URL: {newurl}" ) diff --git a/src/specify_cli/authentication/azure_devops.py b/src/specify_cli/authentication/azure_devops.py index 2837108261..060d7bbf27 100644 --- a/src/specify_cli/authentication/azure_devops.py +++ b/src/specify_cli/authentication/azure_devops.py @@ -17,6 +17,10 @@ _ADO_RESOURCE_ID = "499b84ac-1321-427f-aa17-267ca6975798" +class _TokenResponseTooLarge(Exception): + """Raised when an Azure AD token response exceeds the bounded read limit.""" + + class AzureDevOpsAuth(AuthProvider): """Azure DevOps authentication provider. @@ -115,6 +119,7 @@ def _acquire_via_client_credentials(entry: AuthConfigEntry) -> str | None: payload = _json.loads( read_response_limited( resp, + error_type=_TokenResponseTooLarge, label="Azure DevOps token response", ).decode("utf-8") ) @@ -123,7 +128,10 @@ def _acquire_via_client_credentials(entry: AuthConfigEntry) -> str | None: except ( urllib.error.URLError, OSError, - ValueError, - KeyError, + _json.JSONDecodeError, + _TokenResponseTooLarge, ): + # Network failure, malformed JSON, or an oversized response — fall + # through to the next strategy. Unrelated programming errors (other + # ValueErrors, KeyErrors) intentionally propagate so they surface. return None diff --git a/src/specify_cli/authentication/http.py b/src/specify_cli/authentication/http.py index 34bc35da9e..bcd1815520 100644 --- a/src/specify_cli/authentication/http.py +++ b/src/specify_cli/authentication/http.py @@ -16,6 +16,7 @@ from fnmatch import fnmatch from urllib.parse import urlparse +from .._download_security import is_https_or_localhost_http from . import get_provider from .config import AuthConfigEntry, _default_config_path, find_entries_for_url, load_auth_config @@ -56,12 +57,6 @@ def _hostname_in_hosts(hostname: str, hosts: tuple[str, ...]) -> bool: return any(p == hostname or fnmatch(hostname, p) for p in hosts) -def _is_https_or_localhost_http(url: str) -> bool: - parsed = urlparse(url) - is_localhost = parsed.hostname in ("localhost", "127.0.0.1", "::1") - return parsed.scheme == "https" or (parsed.scheme == "http" and is_localhost) - - class _StripAuthOnRedirect(urllib.request.HTTPRedirectHandler): """Drop ``Authorization`` when a redirect leaves the entry's declared hosts.""" @@ -70,7 +65,7 @@ def __init__(self, hosts: tuple[str, ...]) -> None: self._hosts = hosts def redirect_request(self, req, fp, code, msg, headers, newurl): - if not _is_https_or_localhost_http(newurl): + if not is_https_or_localhost_http(newurl): raise urllib.error.URLError( f"Refusing unsafe redirect to non-HTTPS URL: {newurl}" ) diff --git a/tests/integrations/test_integration_catalog.py b/tests/integrations/test_integration_catalog.py index ca2c1875cc..16004caa3e 100644 --- a/tests/integrations/test_integration_catalog.py +++ b/tests/integrations/test_integration_catalog.py @@ -177,9 +177,16 @@ class FakeResponse: def __init__(self, data, url=""): self._data = json.dumps(data).encode() self._url = url if isinstance(url, str) else url.full_url + self._pos = 0 - def read(self, _size=-1): - return self._data + def read(self, size=-1): + # Advance a cursor and return b"" at EOF like a real stream, so + # read_response_limited's bounded loop terminates. + if size is None or size < 0: + size = len(self._data) - self._pos + out = self._data[self._pos : self._pos + size] + self._pos += len(out) + return out def geturl(self): return self._url @@ -553,9 +560,16 @@ class FakeResponse: def __init__(self, data, url=""): self._data = json.dumps(data).encode() self._url = url if isinstance(url, str) else url.full_url - - def read(self, _size=-1): - return self._data + self._pos = 0 + + def read(self, size=-1): + # Advance a cursor and return b"" at EOF like a real stream, so + # read_response_limited's bounded loop terminates. + if size is None or size < 0: + size = len(self._data) - self._pos + out = self._data[self._pos : self._pos + size] + self._pos += len(out) + return out def geturl(self): return self._url diff --git a/tests/test_authentication.py b/tests/test_authentication.py index 213f8625d8..08b361c59f 100644 --- a/tests/test_authentication.py +++ b/tests/test_authentication.py @@ -14,6 +14,7 @@ from __future__ import annotations import base64 +import io import json import os @@ -497,7 +498,7 @@ def test_resolve_token_azure_ad_success(self, monkeypatch): tenant_id="tid", client_id="cid", client_secret_env="MY_SECRET", ) mock_resp = MagicMock() - mock_resp.read.return_value = b'{"access_token": "ad-acquired-token"}' + mock_resp.read.side_effect = io.BytesIO(b'{"access_token": "ad-acquired-token"}').read mock_resp.__enter__ = lambda s: s mock_resp.__exit__ = MagicMock(return_value=False) with patch("urllib.request.urlopen", return_value=mock_resp): @@ -825,7 +826,7 @@ def _capture_request(self): def side_effect(req, timeout=None): captured["request"] = req body = _json.dumps({"tag_name": "v9.9.9"}).encode() - resp = MagicMock(); resp.read.return_value = body + resp = MagicMock(); resp.read.side_effect = io.BytesIO(body).read cm = MagicMock(); cm.__enter__.return_value = resp; cm.__exit__.return_value = False return cm return captured, side_effect diff --git a/tests/test_download_security.py b/tests/test_download_security.py index 40bf081fd1..78658e14cf 100644 --- a/tests/test_download_security.py +++ b/tests/test_download_security.py @@ -25,11 +25,23 @@ class _Response: - def __init__(self, data: bytes): + """Faithful stream stand-in: read() advances a cursor and returns b"" at EOF.""" + + def __init__(self, data: bytes, *, chunk: int | None = None): self.data = data + self.pos = 0 + # When set, never return more than *chunk* bytes per call even if more is + # requested — simulates short reads (e.g. chunked transfer encoding). + self.chunk = chunk def read(self, size: int = -1) -> bytes: - return self.data if size < 0 else self.data[:size] + if size < 0: + size = len(self.data) - self.pos + if self.chunk is not None: + size = min(size, self.chunk) + out = self.data[self.pos : self.pos + size] + self.pos += len(out) + return out class _CustomZipError(ValueError): @@ -93,6 +105,19 @@ def test_read_response_limited_rejects_oversized_download(): read_response_limited(_Response(b"abcde"), max_bytes=4) +def test_read_response_limited_returns_full_body_within_limit(): + assert read_response_limited(_Response(b"abcde"), max_bytes=10) == b"abcde" + + +def test_read_response_limited_enforces_bound_under_short_reads(): + # A server that streams more than max_bytes total while every read() returns + # fewer bytes than requested (chunked encoding) must still be rejected — a + # single read(max_bytes + 1) could be fooled, the accumulating loop cannot. + response = _Response(b"x" * 100, chunk=8) + with pytest.raises(ValueError, match="exceeds maximum size"): + read_response_limited(response, max_bytes=16) + + def test_remote_downloads_do_not_use_unbounded_response_reads(): offenders = [] for path in (REPO_ROOT / "src" / "specify_cli").rglob("*.py"): diff --git a/tests/test_extensions.py b/tests/test_extensions.py index 8c6e28b98c..da2539457e 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -10,6 +10,7 @@ """ import pytest +import io import json import hashlib import os @@ -2820,7 +2821,7 @@ def test_fetch_single_catalog_sends_auth_header(self, temp_dir, monkeypatch): catalog_data = {"schema_version": "1.0", "extensions": {}} mock_response = MagicMock() - mock_response.read.return_value = json.dumps(catalog_data).encode() + mock_response.read.side_effect = io.BytesIO(json.dumps(catalog_data).encode()).read mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) mock_response.geturl.return_value = "https://raw.githubusercontent.com/org/repo/main/catalog.json" @@ -2904,7 +2905,7 @@ def test_download_extension_sends_auth_header(self, temp_dir, monkeypatch): zip_bytes = zip_buf.getvalue() mock_response = MagicMock() - mock_response.read.return_value = zip_bytes + mock_response.read.side_effect = io.BytesIO(zip_bytes).read mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) @@ -2937,7 +2938,7 @@ def test_download_extension_verifies_sha256(self, temp_dir): catalog = self._make_catalog(temp_dir) zip_bytes = b"fake zip data" mock_response = MagicMock() - mock_response.read.return_value = zip_bytes + mock_response.read.side_effect = io.BytesIO(zip_bytes).read mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) ext_info = { @@ -2960,7 +2961,7 @@ def test_download_extension_rejects_sha256_mismatch(self, temp_dir): catalog = self._make_catalog(temp_dir) mock_response = MagicMock() - mock_response.read.return_value = b"fake zip data" + mock_response.read.side_effect = io.BytesIO(b"fake zip data").read mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) ext_info = { @@ -4034,7 +4035,7 @@ def test_download_extension_allows_bundled_with_url(self, temp_dir): } mock_response = MagicMock() - mock_response.read.return_value = b"fake zip data" + mock_response.read.side_effect = io.BytesIO(b"fake zip data").read mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) diff --git a/tests/test_presets.py b/tests/test_presets.py index 0749c4052a..dca8ca3d1f 100644 --- a/tests/test_presets.py +++ b/tests/test_presets.py @@ -11,6 +11,7 @@ """ import pytest +import io import json import hashlib import tempfile @@ -1522,7 +1523,7 @@ def test_fetch_single_catalog_sends_auth_header(self, project_dir, monkeypatch): catalog_data = {"schema_version": "1.0", "presets": {}} mock_response = MagicMock() - mock_response.read.return_value = json.dumps(catalog_data).encode() + mock_response.read.side_effect = io.BytesIO(json.dumps(catalog_data).encode()).read mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) mock_response.geturl.return_value = "https://raw.githubusercontent.com/org/repo/main/presets/catalog.json" @@ -1563,7 +1564,7 @@ def test_download_pack_sends_auth_header(self, project_dir, monkeypatch): zip_bytes = zip_buf.getvalue() mock_response = MagicMock() - mock_response.read.return_value = zip_bytes + mock_response.read.side_effect = io.BytesIO(zip_bytes).read mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) @@ -1638,7 +1639,7 @@ def test_download_pack_verifies_sha256(self, project_dir): catalog = PresetCatalog(project_dir) zip_bytes = b"fake zip data" mock_response = MagicMock() - mock_response.read.return_value = zip_bytes + mock_response.read.side_effect = io.BytesIO(zip_bytes).read mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) pack_info = { @@ -1662,7 +1663,7 @@ def test_download_pack_rejects_sha256_mismatch(self, project_dir): catalog = PresetCatalog(project_dir) mock_response = MagicMock() - mock_response.read.return_value = b"fake zip data" + mock_response.read.side_effect = io.BytesIO(b"fake zip data").read mock_response.__enter__ = lambda s: s mock_response.__exit__ = MagicMock(return_value=False) pack_info = { diff --git a/tests/test_upgrade.py b/tests/test_upgrade.py index c125b583fd..f6bd18fbc1 100644 --- a/tests/test_upgrade.py +++ b/tests/test_upgrade.py @@ -8,6 +8,7 @@ (if installed) with `--disable-socket` as an extra safety net. """ +import io import json import urllib.error import importlib.metadata @@ -39,7 +40,10 @@ def _mock_urlopen_response(payload: dict) -> MagicMock: body = json.dumps(payload).encode("utf-8") resp = MagicMock() - resp.read.return_value = body + # Back read() with a real stream so it advances and returns b"" at EOF, + # matching http.client.HTTPResponse (a fixed return_value would loop forever + # under read_response_limited's bounded read loop). + resp.read.side_effect = io.BytesIO(body).read cm = MagicMock() cm.__enter__.return_value = resp cm.__exit__.return_value = False diff --git a/tests/test_workflows.py b/tests/test_workflows.py index 6c7ed3e1ce..f9c8b98237 100644 --- a/tests/test_workflows.py +++ b/tests/test_workflows.py @@ -2621,9 +2621,16 @@ def _spy(response, **kwargs): class _FakeResponse: def __init__(self): self._data = json.dumps({"workflows": []}).encode() - - def read(self, _size=-1): - return self._data + self._pos = 0 + + def read(self, size=-1): + # Advance a cursor and return b"" at EOF like a real stream, so + # read_response_limited's bounded loop terminates. + if size is None or size < 0: + size = len(self._data) - self._pos + out = self._data[self._pos : self._pos + size] + self._pos += len(out) + return out def geturl(self): return entry.url From 6a0bd2cae2feade3fb4ce2e9113b627eddbbdcf7 Mon Sep 17 00:00:00 2001 From: Pascal Date: Sat, 30 May 2026 17:09:41 +0200 Subject: [PATCH 24/30] fix: address follow-up Copilot review (error typing, docs, tests) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - __init__.py preset/extension URL installs: give read_response_limited a domain error_type (PresetError / ExtensionError) and catch that instead of a blanket ValueError, so an oversized body is reported cleanly while unrelated ValueErrors surface as real errors. The extension catch now also covers install_from_zip's ValidationError (an ExtensionError). - _utils.run_command: rewrite the misleading docstring — shell=False is the only honoured mode; shell=True is rejected with ValueError, the parameter is retained only so existing keyword callers don't hit TypeError. - _download_security: document that the loopback allowance is an exact-string match (not an IP-range check), that read_response_limited's max_bytes default is the 50 MiB ceiling (callers with tighter budgets should pass an explicit value), and how _safe_zip_name handles single trailing-slash directory markers vs malformed empty segments. - authentication/http: comment the empty-hosts _StripAuthOnRedirect use as the HTTPS-downgrade guard on the unauthenticated path. - check_security_requirements: document the HEAD^ fallback failing safe (audit anyway) on shallow / single-commit checkouts. - security.yml: document the universal committed snapshot vs per-Python scheduled compile distinction. - tests: add a regression test that a symlink alongside benign members is rejected with no partial extraction to disk. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../scripts/check_security_requirements.py | 5 +++++ .github/workflows/security.yml | 7 +++++++ src/specify_cli/__init__.py | 14 +++++++++++-- src/specify_cli/_download_security.py | 16 ++++++++++++++ src/specify_cli/_utils.py | 7 +++++-- src/specify_cli/authentication/http.py | 3 +++ tests/test_download_security.py | 21 +++++++++++++++++++ 7 files changed, 69 insertions(+), 4 deletions(-) diff --git a/.github/scripts/check_security_requirements.py b/.github/scripts/check_security_requirements.py index 876fbf1a47..38040d7bd9 100644 --- a/.github/scripts/check_security_requirements.py +++ b/.github/scripts/check_security_requirements.py @@ -18,6 +18,11 @@ def _dependency_diff_refs() -> tuple[str, str]: head_ref = os.environ.get("DEPENDENCY_DIFF_HEAD", "").strip() or "HEAD" if base_ref and not set(base_ref) <= {"0"}: return base_ref, head_ref + # Fallback when no usable base is supplied (push with an all-zero + # ``github.event.before``, manual dispatch, etc.). ``HEAD^`` fails on a + # shallow checkout or a single-commit repo; that ``git diff`` error is + # caught by the caller and deliberately treated as "inputs changed" so the + # audit runs anyway — failing safe (audit) rather than skipping silently. return "HEAD^", "HEAD" diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml index 467c6989d8..2e9124a357 100644 --- a/.github/workflows/security.yml +++ b/.github/workflows/security.yml @@ -34,6 +34,13 @@ jobs: with: python-version: ${{ matrix.python-version }} + # The committed .github/security-audit-requirements.txt is generated with + # --universal (resolves across all interpreters/platforms) and is what + # push/PR runs audit. The scheduled job instead compiles per matrix + # entry with --python-version so it can surface advisories in wheels that + # only resolve on a specific interpreter (e.g. 3.11-only) — coverage the + # universal file may not exercise. This broadening is intentional; PR runs + # trade that depth for determinism against the committed snapshot. - name: Compile scheduled audit requirements if: ${{ github.event_name == 'schedule' }} run: | diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index 7039722e9c..ff7f321776 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -2114,10 +2114,15 @@ def preset_add( zip_path.write_bytes( read_response_limited( response, + error_type=PresetError, label=f"preset {from_url}", ) ) - except (urllib.error.URLError, ValueError) as e: + # The URL scheme is validated above, so the only failures here + # are network errors and an oversized body (raised as PresetError + # via error_type). Catching those specifically lets unrelated + # ValueErrors surface instead of masquerading as download errors. + except (urllib.error.URLError, PresetError) as e: console.print(f"[red]Error:[/red] Failed to download: {e}") raise typer.Exit(1) @@ -3047,13 +3052,18 @@ def extension_add( ) as response: zip_data = read_response_limited( response, + error_type=ExtensionError, label=f"extension {from_url}", ) zip_path.write_bytes(zip_data) # Install from downloaded ZIP manifest = manager.install_from_zip(zip_path, speckit_version, priority=priority) - except (urllib.error.URLError, ValueError) as e: + # ExtensionError covers an oversized body (via error_type) and the + # ValidationError/ExtensionError raised by install_from_zip; URL + # scheme is validated above. Catching these instead of a blanket + # ValueError lets unrelated ValueErrors surface as real errors. + except (urllib.error.URLError, ExtensionError) as e: console.print(f"[red]Error:[/red] Failed to download from {from_url}: {e}") raise typer.Exit(1) finally: diff --git a/src/specify_cli/_download_security.py b/src/specify_cli/_download_security.py index 40cb687a8e..8e9d5dc8d8 100644 --- a/src/specify_cli/_download_security.py +++ b/src/specify_cli/_download_security.py @@ -26,6 +26,12 @@ def is_https_or_localhost_http(url: str) -> bool: Shared redirect-safety predicate used by the GitHub and auth HTTP redirect handlers so the rule (and any future tightening of it) lives in one place. + + The loopback allowance is a deliberate *exact-string* match on + ``localhost`` / ``127.0.0.1`` / ``::1``, not an IP-range check: other + loopback addresses (e.g. ``127.0.0.2``) are intentionally not covered. + ``urlparse`` already lower-cases the hostname, so the comparison is + case-insensitive. """ parsed = urlparse(url) is_localhost = parsed.hostname in ("localhost", "127.0.0.1", "::1") @@ -53,6 +59,12 @@ def read_response_limited( return fewer even when more data is pending (e.g. chunked transfer encoding), so a single ``read(max_bytes + 1)`` cannot enforce the bound on its own. Read in a loop until EOF or until one byte past the limit has been accumulated. + + *max_bytes* is keyword-only. It defaults to the module-wide + ``MAX_DOWNLOAD_BYTES`` (50 MiB) ceiling for archive/payload downloads; + callers with a tighter budget (e.g. small JSON responses) should pass an + explicit value so the intended bound is pinned at the call site rather than + tracking changes to the shared default. """ chunks: list[bytes] = [] total = 0 @@ -111,6 +123,10 @@ def _safe_zip_name(name: str, *, error_type: type[ErrorT]) -> str: normalized = name.replace("\\", "/") path = PurePosixPath(normalized) raw_parts = normalized.split("/") + # Strip a single trailing empty segment, i.e. the one-slash directory + # marker that legitimate ZIPs use ("mydir/", "mydir/subdir/"). Anything + # else that produces an empty segment — consecutive slashes ("a//b") or a + # second trailing slash — is left in place and rejected below as malformed. if raw_parts and raw_parts[-1] == "": raw_parts = raw_parts[:-1] has_windows_drive = re.match(r"^[A-Za-z]:", normalized) is not None diff --git a/src/specify_cli/_utils.py b/src/specify_cli/_utils.py index cae681d97f..95ddd25fbc 100644 --- a/src/specify_cli/_utils.py +++ b/src/specify_cli/_utils.py @@ -24,8 +24,11 @@ def run_command( ) -> str | None: """Run a command without invoking a shell and optionally capture output. - ``shell`` remains accepted for public API compatibility, but shell - execution is intentionally unsupported. + The ``shell`` parameter is kept in the signature so existing keyword + callers (and the re-export from ``specify_cli``) don't raise ``TypeError``, + but only the default ``shell=False`` is honoured. ``shell=True`` is + rejected with ``ValueError`` rather than silently ignored, so the + unsupported mode fails loudly instead of running with a different meaning. """ if shell: raise ValueError( diff --git a/src/specify_cli/authentication/http.py b/src/specify_cli/authentication/http.py index bcd1815520..8ee061db76 100644 --- a/src/specify_cli/authentication/http.py +++ b/src/specify_cli/authentication/http.py @@ -161,6 +161,9 @@ def _make_req(auth_headers: dict[str, str]) -> urllib.request.Request: # No entry worked (or none matched) — unauthenticated fallback req = _make_req({}) if strict_redirects: + # No auth is attached on this path, so the handler's host list is empty: + # here it acts purely as the HTTPS-downgrade guard (its redirect_request + # rejects non-HTTPS, non-loopback targets), not as an auth-stripper. opener = urllib.request.build_opener(_StripAuthOnRedirect(())) return opener.open(req, timeout=timeout) return urllib.request.urlopen(req, timeout=timeout) # noqa: S310 diff --git a/tests/test_download_security.py b/tests/test_download_security.py index 78658e14cf..41e54f5075 100644 --- a/tests/test_download_security.py +++ b/tests/test_download_security.py @@ -177,6 +177,27 @@ def test_safe_extract_zip_rejects_symlinks(tmp_path): safe_extract_zip(zip_path, tmp_path / "out") +def test_safe_extract_zip_rejects_symlink_without_partial_extraction(tmp_path): + # A symlink sitting next to benign members must be rejected before ANY + # file is written: validation runs over the whole member list first, so an + # unsafe member cannot leak a partially-extracted tree to disk. + zip_path = tmp_path / "mixed.zip" + link = zipfile.ZipInfo("evil-link") + link.external_attr = (stat.S_IFLNK | 0o777) << 16 + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr("safe/first.txt", "hello") + zf.writestr(link, "target") + zf.writestr("safe/second.txt", "world") + + out_dir = tmp_path / "out" + with pytest.raises(ValueError, match="Unsafe symlink"): + safe_extract_zip(zip_path, out_dir) + + # Nothing should have been written — not even the benign member that + # precedes the symlink in the archive. + assert not out_dir.exists() or not any(out_dir.rglob("*")) + + def test_safe_extract_zip_rejects_oversized_member(tmp_path): zip_path = tmp_path / "bad.zip" with zipfile.ZipFile(zip_path, "w") as zf: From 85cc129c931d1e6d7871f2b5a0a179aaeec09899 Mon Sep 17 00:00:00 2001 From: Pascal Date: Sat, 6 Jun 2026 07:18:20 +0200 Subject: [PATCH 25/30] fix(security): bound inline ZIP manifest read; guard ADO token redirects Audit follow-up to the download-hardening work, closing similar cases within the scope of this PR: - Add read_zip_member_limited() and use it for the inline extension.yml read in the extension *update* path (__init__.py). That read happened before install_from_zip()'s safe_extract_zip(), so a raw zf.open().read() bypassed the per-member size bound: a manifest declaring a huge file_size (few KB compressed, gigabytes uncompressed) would be fully loaded by yaml.safe_load. The helper rejects on declared size and reads bounded. - Route the Azure DevOps OAuth token request through a strict-redirect opener so a 307/308 redirect cannot forward the client_secret POST body to a non-HTTPS, non-loopback host. - Tests for the new helper and the updated ADO opener path. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/specify_cli/__init__.py | 19 ++++--- src/specify_cli/_download_security.py | 53 +++++++++++++++++++ .../authentication/azure_devops.py | 10 +++- tests/test_authentication.py | 7 ++- tests/test_download_security.py | 35 ++++++++++++ 5 files changed, 115 insertions(+), 9 deletions(-) diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index 5e6edf5316..3cbd0e1402 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -39,7 +39,7 @@ from rich.panel import Panel from rich.align import Align from rich.table import Table -from ._download_security import read_response_limited +from ._download_security import read_response_limited, read_zip_member_limited from .shared_infra import ( install_shared_infra as _install_shared_infra_impl, refresh_shared_templates as _refresh_shared_templates_impl, @@ -2396,17 +2396,24 @@ def extension_update( manifest_data = None namelist = zf.namelist() - # First try root-level extension.yml + # Read the manifest under a hard size cap: this happens + # before install_from_zip()'s safe_extract_zip(), so a + # raw zf.open().read() here would bypass that bound and + # let a zip-bomb extension.yml exhaust memory. + manifest_member = None if "extension.yml" in namelist: - with zf.open("extension.yml") as f: - manifest_data = yaml.safe_load(f) or {} + manifest_member = "extension.yml" else: # Look for extension.yml in a single top-level subdirectory # (e.g., "repo-name-branch/extension.yml") manifest_paths = [n for n in namelist if n.endswith("/extension.yml") and n.count("/") == 1] if len(manifest_paths) == 1: - with zf.open(manifest_paths[0]) as f: - manifest_data = yaml.safe_load(f) or {} + manifest_member = manifest_paths[0] + + if manifest_member is not None: + manifest_data = yaml.safe_load( + read_zip_member_limited(zf, manifest_member) + ) or {} if manifest_data is None: raise ValueError("Downloaded extension archive is missing 'extension.yml'") diff --git a/src/specify_cli/_download_security.py b/src/specify_cli/_download_security.py index 8e9d5dc8d8..962ccf9f8e 100644 --- a/src/specify_cli/_download_security.py +++ b/src/specify_cli/_download_security.py @@ -115,6 +115,59 @@ def verify_sha256( ) +def read_zip_member_limited( + zf: zipfile.ZipFile, + name: str, + *, + max_bytes: int = MAX_ZIP_MEMBER_BYTES, + error_type: type[ErrorT] = ValueError, + label: str | None = None, +) -> bytes: + """Read a single ZIP member into memory under a hard size cap. + + Reading a member with ``zf.open(name).read()`` is unbounded: a crafted + archive can declare a tiny ``file_size`` yet decompress to many gigabytes (a + "zip bomb"), exhausting memory before the caller ever inspects the data. + This rejects members whose *declared* size already exceeds *max_bytes* and, + to defend against headers that lie, also reads in bounded chunks and stops + one byte past the limit. + + Use this for any inline manifest/metadata read that happens *before* + :func:`safe_extract_zip` (which already enforces the same per-member bound + during extraction); a raw ``zf.open(...).read()`` bypasses that protection. + """ + member_label = label or name + try: + info = zf.getinfo(name) + except KeyError as exc: + _raise_from(error_type, f"ZIP member not found: {name}", exc) + if info.file_size > max_bytes: + _raise( + error_type, + f"ZIP member {member_label} exceeds maximum size of {max_bytes} bytes", + ) + + chunks: list[bytes] = [] + total = 0 + limit = max_bytes + 1 + try: + with zf.open(name, "r") as source: + while total < limit: + chunk = source.read(min(READ_CHUNK_SIZE, limit - total)) + if not chunk: + break + chunks.append(chunk) + total += len(chunk) + except (OSError, zipfile.BadZipFile, RuntimeError) as exc: + _raise_from(error_type, f"Failed to read ZIP member {member_label}: {exc}", exc) + if total > max_bytes: + _raise( + error_type, + f"ZIP member {member_label} exceeds maximum size of {max_bytes} bytes", + ) + return b"".join(chunks) + + def _safe_zip_name(name: str, *, error_type: type[ErrorT]) -> str: """Return a normalized ZIP member name or raise on traversal.""" if "\x00" in name: diff --git a/src/specify_cli/authentication/azure_devops.py b/src/specify_cli/authentication/azure_devops.py index 060d7bbf27..a5ce01b64d 100644 --- a/src/specify_cli/authentication/azure_devops.py +++ b/src/specify_cli/authentication/azure_devops.py @@ -114,8 +114,14 @@ def _acquire_via_client_credentials(entry: AuthConfigEntry) -> str | None: ) try: from specify_cli._download_security import read_response_limited - - with urllib.request.urlopen(req, timeout=30) as resp: # noqa: S310 + from specify_cli.authentication.http import _StripAuthOnRedirect + + # A 307/308 redirect preserves the POST body, which carries the + # client_secret. Reuse the package HTTPS-downgrade guard (empty host + # list ⇒ no auth header to strip, just the scheme check) so the + # secret can never be forwarded to a non-HTTPS, non-loopback host. + opener = urllib.request.build_opener(_StripAuthOnRedirect(())) + with opener.open(req, timeout=30) as resp: # noqa: S310 payload = _json.loads( read_response_limited( resp, diff --git a/tests/test_authentication.py b/tests/test_authentication.py index 6a98127d32..5367c15880 100644 --- a/tests/test_authentication.py +++ b/tests/test_authentication.py @@ -501,7 +501,12 @@ def test_resolve_token_azure_ad_success(self, monkeypatch): mock_resp.read.side_effect = io.BytesIO(b'{"access_token": "ad-acquired-token"}').read mock_resp.__enter__ = lambda s: s mock_resp.__exit__ = MagicMock(return_value=False) - with patch("urllib.request.urlopen", return_value=mock_resp): + # The token request goes through a strict-redirect opener (so a 307/308 + # cannot forward the client_secret body to a non-HTTPS host), not bare + # urlopen — patch the opener it builds. + mock_opener = MagicMock() + mock_opener.open.return_value = mock_resp + with patch("urllib.request.build_opener", return_value=mock_opener): assert AzureDevOpsAuth().resolve_token(entry) == "ad-acquired-token" def test_resolve_token_azure_ad_missing_secret_returns_none(self, monkeypatch): diff --git a/tests/test_download_security.py b/tests/test_download_security.py index 41e54f5075..8acf50c32d 100644 --- a/tests/test_download_security.py +++ b/tests/test_download_security.py @@ -11,6 +11,7 @@ from specify_cli._download_security import ( read_response_limited, + read_zip_member_limited, safe_extract_zip, verify_sha256, ) @@ -265,6 +266,40 @@ def test_safe_extract_zip_wraps_directory_filesystem_errors(tmp_path): ) +def test_read_zip_member_limited_returns_member_within_limit(tmp_path): + zip_path = tmp_path / "ok.zip" + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr("extension.yml", "extension:\n id: demo\n") + + with zipfile.ZipFile(zip_path, "r") as zf: + data = read_zip_member_limited(zf, "extension.yml") + + assert data == b"extension:\n id: demo\n" + + +def test_read_zip_member_limited_rejects_oversized_member(tmp_path): + # A manifest whose declared size already blows the cap (the zip-bomb shape: + # a few KB compressed that decompresses to gigabytes) is rejected before any + # of it is read into memory. + zip_path = tmp_path / "bomb.zip" + with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf: + zf.writestr("extension.yml", "a" * 5000) + + with zipfile.ZipFile(zip_path, "r") as zf: + with pytest.raises(ValueError, match="exceeds maximum size"): + read_zip_member_limited(zf, "extension.yml", max_bytes=16) + + +def test_read_zip_member_limited_wraps_missing_member(tmp_path): + zip_path = tmp_path / "ok.zip" + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr("other.txt", "x") + + with zipfile.ZipFile(zip_path, "r") as zf: + with pytest.raises(_CustomZipError, match="ZIP member not found"): + read_zip_member_limited(zf, "extension.yml", error_type=_CustomZipError) + + def test_safe_extract_zip_extracts_safe_archive(tmp_path): zip_path = tmp_path / "ok.zip" out_dir = tmp_path / "out" From 095b586385708967fd704bd2a5c6045ca2e3a44e Mon Sep 17 00:00:00 2001 From: Pascal Date: Mon, 8 Jun 2026 23:38:03 +0200 Subject: [PATCH 26/30] fix(security): pin tight read bounds on JSON responses; cap actual ZIP bytes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses the Copilot review on PR #2442 and the same pattern elsewhere. - safe_extract_zip(): track the cumulative bytes actually written and fail past max_total_bytes, so the total-size bound holds even if member headers understate file_size (the declared-total check alone could be evaded). Mirrors the existing per-member written guard — defense-in-depth consistency. - Pass an explicit max_bytes to read_response_limited() at every JSON call site instead of inheriting the 50 MiB archive/payload default: * MAX_JSON_METADATA_BYTES (1 MiB): Azure AD token, GitHub release metadata, and the existing latest-release fetch (migrated off an inline literal). * MAX_JSON_CATALOG_BYTES (8 MiB): preset, extension, workflow and integration catalog fetches. Binary/archive downloads keep the 50 MiB ceiling. Both ceilings are centralized as documented constants in _download_security.py. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/specify_cli/_download_security.py | 26 +++++++++++ src/specify_cli/_github_http.py | 9 +++- src/specify_cli/_version.py | 4 +- .../authentication/azure_devops.py | 6 ++- src/specify_cli/extensions.py | 3 ++ src/specify_cli/integrations/catalog.py | 3 +- src/specify_cli/presets.py | 3 ++ src/specify_cli/workflows/catalog.py | 3 +- tests/test_download_security.py | 44 +++++++++++++++++++ 9 files changed, 94 insertions(+), 7 deletions(-) diff --git a/src/specify_cli/_download_security.py b/src/specify_cli/_download_security.py index 962ccf9f8e..0e5ffbdd3d 100644 --- a/src/specify_cli/_download_security.py +++ b/src/specify_cli/_download_security.py @@ -18,6 +18,20 @@ MAX_ZIP_MEMBER_BYTES = 10 * 1024 * 1024 MAX_ZIP_TOTAL_BYTES = 50 * 1024 * 1024 READ_CHUNK_SIZE = 1024 * 1024 + +# Tighter ceilings for responses that are read fully into memory and parsed as +# JSON. The 50 MiB MAX_DOWNLOAD_BYTES default is sized for archive/payload +# downloads; JSON responses are far smaller, so capping them close to their real +# size shrinks the memory-DoS surface and keeps the "too large" error reachable +# (rather than only triggering on tens of MiB). Pass the matching constant +# explicitly at each JSON call site so the intended bound is pinned there. +# * METADATA — fixed-shape single-object responses (an OAuth token, one +# release's metadata): a few KiB in practice, 1 MiB is already generous. +# * CATALOG — listings that grow with the number of published items. The +# largest bundled catalog is ~130 KiB today, so 8 MiB leaves ~60x headroom +# for growth while staying well under the download ceiling. +MAX_JSON_METADATA_BYTES = 1 * 1024 * 1024 +MAX_JSON_CATALOG_BYTES = 8 * 1024 * 1024 SHA256_RE = re.compile(r"^[0-9a-fA-F]{64}$") @@ -263,6 +277,11 @@ def safe_extract_zip( normalized_members.append((member, normalized_name)) + # The loop above bounds the *declared* total via member.file_size, but a + # crafted archive can understate those headers. Mirror the per-member + # guard below with a cumulative count of the bytes actually written so + # the total-size bound holds even when the headers lie. + total_written = 0 for member, normalized_name in normalized_members: member_path = target_dir / normalized_name if member.is_dir(): @@ -298,6 +317,13 @@ def safe_extract_zip( f"ZIP member {member.filename} exceeds maximum size " f"of {max_member_bytes} bytes", ) + total_written += len(chunk) + if total_written > max_total_bytes: + _raise( + error_type, + f"ZIP archive exceeds maximum uncompressed size " + f"of {max_total_bytes} bytes", + ) dest.write(chunk) except (OSError, zipfile.BadZipFile, RuntimeError) as exc: _raise_from( diff --git a/src/specify_cli/_github_http.py b/src/specify_cli/_github_http.py index 85ec9319eb..852d0f785e 100644 --- a/src/specify_cli/_github_http.py +++ b/src/specify_cli/_github_http.py @@ -121,7 +121,10 @@ def resolve_github_release_asset_api_url( import json import urllib.error - from specify_cli._download_security import read_response_limited + from specify_cli._download_security import ( + MAX_JSON_METADATA_BYTES, + read_response_limited, + ) parsed = urlparse(download_url) parts = [unquote(part) for part in parsed.path.strip("/").split("/")] @@ -152,7 +155,9 @@ def resolve_github_release_asset_api_url( with open_url_fn(release_url, timeout=timeout) as response: release_data = json.loads( read_response_limited( - response, label=f"GitHub release metadata {release_url}" + response, + max_bytes=MAX_JSON_METADATA_BYTES, + label=f"GitHub release metadata {release_url}", ) ) # ValueError covers both an oversized body (raised by read_response_limited) diff --git a/src/specify_cli/_version.py b/src/specify_cli/_version.py index 12a63e5392..d2adde1265 100644 --- a/src/specify_cli/_version.py +++ b/src/specify_cli/_version.py @@ -29,7 +29,7 @@ from packaging.version import InvalidVersion, Version from ._console import console -from ._download_security import read_response_limited +from ._download_security import MAX_JSON_METADATA_BYTES, read_response_limited GITHUB_API_LATEST = "https://api.github.com/repos/github/spec-kit/releases/latest" _RESOLUTION_FAILURE_OFFLINE = "offline or timeout" @@ -123,7 +123,7 @@ def _fetch_latest_release_tag() -> tuple[str | None, str | None]: payload = json.loads( read_response_limited( resp, - max_bytes=1024 * 1024, + max_bytes=MAX_JSON_METADATA_BYTES, label="GitHub latest release", ).decode("utf-8") ) diff --git a/src/specify_cli/authentication/azure_devops.py b/src/specify_cli/authentication/azure_devops.py index a5ce01b64d..2e041004db 100644 --- a/src/specify_cli/authentication/azure_devops.py +++ b/src/specify_cli/authentication/azure_devops.py @@ -113,7 +113,10 @@ def _acquire_via_client_credentials(entry: AuthConfigEntry) -> str | None: headers={"Content-Type": "application/x-www-form-urlencoded"}, ) try: - from specify_cli._download_security import read_response_limited + from specify_cli._download_security import ( + MAX_JSON_METADATA_BYTES, + read_response_limited, + ) from specify_cli.authentication.http import _StripAuthOnRedirect # A 307/308 redirect preserves the POST body, which carries the @@ -125,6 +128,7 @@ def _acquire_via_client_credentials(entry: AuthConfigEntry) -> str | None: payload = _json.loads( read_response_limited( resp, + max_bytes=MAX_JSON_METADATA_BYTES, error_type=_TokenResponseTooLarge, label="Azure DevOps token response", ).decode("utf-8") diff --git a/src/specify_cli/extensions.py b/src/specify_cli/extensions.py index 01c05082d3..dae4d8402a 100644 --- a/src/specify_cli/extensions.py +++ b/src/specify_cli/extensions.py @@ -25,6 +25,7 @@ from packaging.specifiers import SpecifierSet, InvalidSpecifier from ._download_security import ( + MAX_JSON_CATALOG_BYTES, read_response_limited, safe_extract_zip, verify_sha256, @@ -2022,6 +2023,7 @@ def _fetch_single_catalog(self, entry: CatalogEntry, force_refresh: bool = False catalog_data = json.loads( read_response_limited( response, + max_bytes=MAX_JSON_CATALOG_BYTES, error_type=ExtensionError, label=f"extension catalog {entry.url}", ) @@ -2144,6 +2146,7 @@ def fetch_catalog(self, force_refresh: bool = False) -> Dict[str, Any]: catalog_data = json.loads( read_response_limited( response, + max_bytes=MAX_JSON_CATALOG_BYTES, error_type=ExtensionError, label=f"extension catalog {catalog_url}", ) diff --git a/src/specify_cli/integrations/catalog.py b/src/specify_cli/integrations/catalog.py index 83ad446642..6af83762d7 100644 --- a/src/specify_cli/integrations/catalog.py +++ b/src/specify_cli/integrations/catalog.py @@ -21,7 +21,7 @@ import yaml from packaging import version as pkg_version -from .._download_security import read_response_limited +from .._download_security import MAX_JSON_CATALOG_BYTES, read_response_limited from ..catalogs import CatalogEntry, CatalogStackBase @@ -174,6 +174,7 @@ def _fetch_single_catalog( catalog_data = json.loads( read_response_limited( resp, + max_bytes=MAX_JSON_CATALOG_BYTES, error_type=IntegrationCatalogError, label=f"integration catalog {entry.url}", ) diff --git a/src/specify_cli/presets.py b/src/specify_cli/presets.py index 8798e54545..f7f975c572 100644 --- a/src/specify_cli/presets.py +++ b/src/specify_cli/presets.py @@ -27,6 +27,7 @@ from packaging.specifiers import SpecifierSet, InvalidSpecifier from ._download_security import ( + MAX_JSON_CATALOG_BYTES, read_response_limited, safe_extract_zip, verify_sha256, @@ -2092,6 +2093,7 @@ def _fetch_single_catalog(self, entry: PresetCatalogEntry, force_refresh: bool = catalog_data = json.loads( read_response_limited( response, + max_bytes=MAX_JSON_CATALOG_BYTES, error_type=PresetError, label=f"preset catalog {entry.url}", ) @@ -2191,6 +2193,7 @@ def fetch_catalog(self, force_refresh: bool = False) -> Dict[str, Any]: catalog_data = json.loads( read_response_limited( response, + max_bytes=MAX_JSON_CATALOG_BYTES, error_type=PresetError, label=f"preset catalog {catalog_url}", ) diff --git a/src/specify_cli/workflows/catalog.py b/src/specify_cli/workflows/catalog.py index c4efca2600..7537555d5c 100644 --- a/src/specify_cli/workflows/catalog.py +++ b/src/specify_cli/workflows/catalog.py @@ -19,7 +19,7 @@ import yaml -from specify_cli._download_security import read_response_limited +from specify_cli._download_security import MAX_JSON_CATALOG_BYTES, read_response_limited # --------------------------------------------------------------------------- @@ -348,6 +348,7 @@ def _validate_catalog_url(url: str) -> None: data = json.loads( read_response_limited( resp, + max_bytes=MAX_JSON_CATALOG_BYTES, error_type=WorkflowCatalogError, label="workflow catalog", ).decode("utf-8") diff --git a/tests/test_download_security.py b/tests/test_download_security.py index 8acf50c32d..0f884c2f87 100644 --- a/tests/test_download_security.py +++ b/tests/test_download_security.py @@ -228,6 +228,50 @@ def test_safe_extract_zip_rejects_total_uncompressed_size(tmp_path): safe_extract_zip(zip_path, tmp_path / "out", max_total_bytes=5) +def test_safe_extract_zip_bounds_actual_written_bytes_when_headers_understate_size( + tmp_path, monkeypatch +): + # Defense in depth: the pre-extraction check sums the *declared* + # member.file_size values, which a crafted archive can understate so that + # check passes. If the ZIP reader then yields more bytes than the header + # promised, the extraction loop must still abort once the cumulative bytes + # actually written exceed max_total_bytes. CPython's own zipfile happens to + # bound member reads to file_size and CRC-check them, so we substitute a + # reader that does not — exercising our guard rather than the stdlib's. + zip_path = tmp_path / "liar.zip" + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr("a.txt", "") # declared file_size 0 ⇒ declared total stays 0 + zf.writestr("b.txt", "") + + class _OverreadingStream: + """A member reader that yields more bytes than any header declared.""" + + def __init__(self, payload: bytes): + self._remaining = payload + + def read(self, size: int = -1) -> bytes: + if size is None or size < 0: + size = len(self._remaining) + out, self._remaining = self._remaining[:size], self._remaining[size:] + return out + + def __enter__(self): + return self + + def __exit__(self, *exc): + return False + + # Each member streams 8 bytes despite declaring 0; the per-member cap (10 MiB + # default) is untouched, so only the cumulative guard can stop this. + monkeypatch.setattr( + zipfile.ZipFile, "open", lambda self, *a, **k: _OverreadingStream(b"x" * 8) + ) + + # 8 bytes for "a.txt" (total 8 ≤ 12), then "b.txt" busts the 12-byte ceiling. + with pytest.raises(ValueError, match="maximum uncompressed size"): + safe_extract_zip(zip_path, tmp_path / "out", max_total_bytes=12) + + def test_safe_extract_zip_wraps_bad_zip_file(tmp_path): zip_path = tmp_path / "bad.zip" zip_path.write_bytes(b"not a zip archive") From bacddff2840f28fef750672b7463ea2551f7c6b7 Mon Sep 17 00:00:00 2001 From: Pascal Date: Wed, 10 Jun 2026 08:08:36 +0200 Subject: [PATCH 27/30] fix: align checkout pins and centralize loopback predicate - pin actions/checkout to the repo-wide df4cb1c (v6.0.3) in lint.yml and security.yml - replace the ad-hoc ip_address loopback checks in the workflow add URL/catalog flows with the shared is_https_or_localhost_http predicate, so HTTP-on-loopback rules match the redirect handler - drop the empty member name from the zip dot-segment test: zipfile cannot write such an entry, the case crashed in the test itself --- .github/workflows/lint.yml | 2 +- .github/workflows/security.yml | 6 ++-- src/specify_cli/__init__.py | 59 ++++++--------------------------- tests/test_download_security.py | 4 ++- 4 files changed, 17 insertions(+), 54 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 83ccccde7d..5ba2989cc0 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -47,7 +47,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 # shellcheck is preinstalled on ubuntu-latest runners. # Start at --severity=error to block real bugs without flagging style diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml index 2e9124a357..a48f40c8e9 100644 --- a/.github/workflows/security.yml +++ b/.github/workflows/security.yml @@ -22,7 +22,7 @@ jobs: python-version: ["3.11", "3.12", "3.13"] steps: - name: Checkout - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 with: fetch-depth: 2 @@ -67,7 +67,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 with: # Need the PR base to compare baseline growth. fetch-depth: 0 @@ -148,7 +148,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 with: # Needed by check_secrets_baseline.py to read the baseline at base ref. fetch-depth: 0 diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index 24d9ae6a02..8f58f881cf 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -40,7 +40,11 @@ from rich.panel import Panel from rich.align import Align from rich.table import Table -from ._download_security import read_response_limited, read_zip_member_limited +from ._download_security import ( + is_https_or_localhost_http, + read_response_limited, + read_zip_member_limited, +) from .shared_infra import ( install_shared_infra as _install_shared_infra_impl, refresh_shared_templates as _refresh_shared_templates_impl, @@ -3087,20 +3091,9 @@ def _validate_and_install_local(yaml_path: Path, source_label: str) -> None: # Try as URL (http/https) if source.startswith("http://") or source.startswith("https://"): - from ipaddress import ip_address - from urllib.parse import urlparse from specify_cli.authentication.http import open_url as _open_url - parsed_src = urlparse(source) - src_host = parsed_src.hostname or "" - src_loopback = src_host == "localhost" - if not src_loopback: - try: - src_loopback = ip_address(src_host).is_loopback - except ValueError: - # Host is not an IP literal (e.g., a DNS name); keep default non-loopback. - pass - if parsed_src.scheme != "https" and not (parsed_src.scheme == "http" and src_loopback): + if not is_https_or_localhost_http(source): console.print("[red]Error:[/red] Only HTTPS URLs are allowed, except HTTP for localhost.") raise typer.Exit(1) @@ -3121,16 +3114,7 @@ def _validate_and_install_local(yaml_path: Path, source_label: str) -> None: strict_redirects=True, ) as resp: final_url = resp.geturl() - final_parsed = urlparse(final_url) - final_host = final_parsed.hostname or "" - final_lb = final_host == "localhost" - if not final_lb: - try: - final_lb = ip_address(final_host).is_loopback - except ValueError: - # Redirect host is not an IP literal; keep loopback as determined above. - pass - if final_parsed.scheme != "https" and not (final_parsed.scheme == "http" and final_lb): + if not is_https_or_localhost_http(final_url): console.print(f"[red]Error:[/red] URL redirected to non-HTTPS: {final_url}") raise typer.Exit(1) with tempfile.NamedTemporaryFile(suffix=".yml", delete=False) as tmp: @@ -3184,24 +3168,10 @@ def _validate_and_install_local(yaml_path: Path, source_label: str) -> None: raise typer.Exit(1) # Validate URL scheme (HTTPS required, HTTP allowed for localhost only) - from ipaddress import ip_address - from urllib.parse import urlparse - - parsed_url = urlparse(workflow_url) - url_host = parsed_url.hostname or "" - is_loopback = False - if url_host == "localhost": - is_loopback = True - else: - try: - is_loopback = ip_address(url_host).is_loopback - except ValueError: - # Host is not an IP literal (e.g., a regular hostname); treat as non-loopback. - pass - if parsed_url.scheme != "https" and not (parsed_url.scheme == "http" and is_loopback): + if not is_https_or_localhost_http(workflow_url): console.print( f"[red]Error:[/red] Workflow '{source}' has an invalid install URL. " - "Only HTTPS URLs are allowed, except HTTP for localhost/loopback." + "Only HTTPS URLs are allowed, except HTTP for localhost." ) raise typer.Exit(1) @@ -3233,16 +3203,7 @@ def _validate_and_install_local(yaml_path: Path, source_label: str) -> None: ) as response: # Validate final URL after redirects final_url = response.geturl() - final_parsed = urlparse(final_url) - final_host = final_parsed.hostname or "" - final_loopback = final_host == "localhost" - if not final_loopback: - try: - final_loopback = ip_address(final_host).is_loopback - except ValueError: - # Host is not an IP literal (e.g., a regular hostname); treat as non-loopback. - pass - if final_parsed.scheme != "https" and not (final_parsed.scheme == "http" and final_loopback): + if not is_https_or_localhost_http(final_url): if workflow_dir.exists(): import shutil shutil.rmtree(workflow_dir, ignore_errors=True) diff --git a/tests/test_download_security.py b/tests/test_download_security.py index 0f884c2f87..9c2eb2b18d 100644 --- a/tests/test_download_security.py +++ b/tests/test_download_security.py @@ -156,7 +156,9 @@ def test_safe_extract_zip_rejects_traversal(tmp_path, member_name): safe_extract_zip(zip_path, tmp_path / "out") -@pytest.mark.parametrize("member_name", ["", ".", "./file.txt", "nested/./file.txt", "nested//file.txt"]) +# An empty member name is rejected by _safe_zip_name too, but zipfile cannot +# even write such an entry, so it is not testable through this API. +@pytest.mark.parametrize("member_name", [".", "./file.txt", "nested/./file.txt", "nested//file.txt"]) def test_safe_extract_zip_rejects_dot_path_segments(tmp_path, member_name): zip_path = tmp_path / "bad.zip" with zipfile.ZipFile(zip_path, "w") as zf: From 8994fe2396ae7df962b87c1cff366e40170af08b Mon Sep 17 00:00:00 2001 From: Pascal Date: Wed, 10 Jun 2026 22:50:01 +0200 Subject: [PATCH 28/30] fix: pre-empt review feedback on pins, predicate reuse, and baseline gates - align the setup-uv pin in security.yml with test.yml (v8.2.0) - use is_https_or_localhost_http for the preset_add/extension_add URL checks and pass strict_redirects=True to the latest-release fetch and the release-asset resolver call sites - baseline gate scripts fail closed on unresolvable refs and git read errors instead of treating them as "baseline did not exist"; the security workflow re-runs on labeled/unlabeled so the ack label can turn the gate green without a push - regenerate the bandit baseline against HEAD (two entries referenced removed code, one had drifted); track baseline entries by file+test_id in tests so line drift no longer breaks them - raise ZIP size-limit errors outside the broad except in safe_extract_zip so an error_type subclassing OSError/RuntimeError cannot re-wrap them - tests: drop two redirect tests duplicated from test_authentication, move the downgrade test next to its siblings, assert the workflow catalog max_bytes, route OpenerDirector.open through urlopen in the modules that patch urlopen, add set -euo pipefail to the secret scan, misc cleanup (unused helper, redundant imports, EOF-less fake read) --- .github/bandit-baseline.json | 46 +-------------- .github/scripts/check_bandit_baseline.py | 31 +++++++++- .github/scripts/check_secrets_baseline.py | 34 ++++++++++- .github/workflows/security.yml | 10 +++- src/specify_cli/__init__.py | 31 +++++----- src/specify_cli/_download_security.py | 23 +++++--- src/specify_cli/_version.py | 1 + tests/http_helpers.py | 25 ++++++++ .../integrations/test_integration_catalog.py | 8 ++- tests/self_upgrade_helpers.py | 3 +- tests/test_authentication.py | 26 +++++++-- tests/test_baseline_gates.py | 10 ++++ tests/test_extensions.py | 57 ------------------- tests/test_security_workflow.py | 22 +++---- tests/test_self_upgrade_detection.py | 1 + tests/test_self_upgrade_execution.py | 1 + tests/test_self_upgrade_verification.py | 1 + tests/test_upgrade.py | 5 +- tests/test_workflows.py | 6 +- 19 files changed, 191 insertions(+), 150 deletions(-) diff --git a/.github/bandit-baseline.json b/.github/bandit-baseline.json index 345fea6d2d..14ee416174 100644 --- a/.github/bandit-baseline.json +++ b/.github/bandit-baseline.json @@ -1,47 +1,7 @@ { "results": [ { - "code": "103 if not req.get_header(\"Authorization\") and not strict_redirects:\n104 return urllib.request.urlopen(req, timeout=timeout)\n105 \n", - "col_offset": 15, - "end_col_offset": 59, - "filename": "src/specify_cli/_github_http.py", - "issue_confidence": "HIGH", - "issue_cwe": { - "id": 22, - "link": "https://cwe.mitre.org/data/definitions/22.html" - }, - "issue_severity": "MEDIUM", - "issue_text": "Audit url open for permitted schemes. Allowing use of file:/ or custom schemes is often unexpected.", - "line_number": 104, - "line_range": [ - 104 - ], - "more_info": "https://bandit.readthedocs.io/en/1.9.4/blacklists/blacklist_calls.html#b310-urllib-urlopen", - "test_id": "B310", - "test_name": "blacklist" - }, - { - "code": "113 \n114 with urllib.request.urlopen(req, timeout=30) as resp: # noqa: S310\n115 payload = _json.loads(\n", - "col_offset": 17, - "end_col_offset": 56, - "filename": "src/specify_cli/authentication/azure_devops.py", - "issue_confidence": "HIGH", - "issue_cwe": { - "id": 22, - "link": "https://cwe.mitre.org/data/definitions/22.html" - }, - "issue_severity": "MEDIUM", - "issue_text": "Audit url open for permitted schemes. Allowing use of file:/ or custom schemes is often unexpected.", - "line_number": 114, - "line_range": [ - 114 - ], - "more_info": "https://bandit.readthedocs.io/en/1.9.4/blacklists/blacklist_calls.html#b310-urllib-urlopen", - "test_id": "B310", - "test_name": "blacklist" - }, - { - "code": "170 return opener.open(req, timeout=timeout)\n171 return urllib.request.urlopen(req, timeout=timeout) # noqa: S310\n", + "code": "168 return opener.open(req, timeout=timeout)\n169 return urllib.request.urlopen(req, timeout=timeout) # noqa: S310\n", "col_offset": 11, "end_col_offset": 55, "filename": "src/specify_cli/authentication/http.py", @@ -52,9 +12,9 @@ }, "issue_severity": "MEDIUM", "issue_text": "Audit url open for permitted schemes. Allowing use of file:/ or custom schemes is often unexpected.", - "line_number": 171, + "line_number": 169, "line_range": [ - 171 + 169 ], "more_info": "https://bandit.readthedocs.io/en/1.9.4/blacklists/blacklist_calls.html#b310-urllib-urlopen", "test_id": "B310", diff --git a/.github/scripts/check_bandit_baseline.py b/.github/scripts/check_bandit_baseline.py index e67169feb7..badedc0307 100644 --- a/.github/scripts/check_bandit_baseline.py +++ b/.github/scripts/check_bandit_baseline.py @@ -43,14 +43,38 @@ ACK_LABEL = "security-baseline-change" +def _git_ok(*args: str) -> bool: + """True if the git command exits 0 (output discarded).""" + return ( + subprocess.run( + ["git", *args], + cwd=REPO_ROOT, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ).returncode + == 0 + ) + + def _read_baseline_at(ref: str) -> tuple[dict, bool]: """Return (baseline_json, file_existed_at_ref). Used for the base side. The head side reads the working tree to avoid silently fail-opening on an unfetched/invalid head ref. + + Only a missing *path* at a resolvable ref counts as "did not exist"; + an unresolvable ref or a failing ``git show`` aborts instead, so a + transient git failure cannot silently disable the gate. """ if not ref: return {"results": []}, False + if not _git_ok("rev-parse", "--verify", "--quiet", f"{ref}^{{commit}}"): + raise SystemExit( + f"Base ref {ref!r} cannot be resolved (unfetched or invalid). " + f"Refusing to fail-open on a security gate." + ) + if not _git_ok("cat-file", "-e", f"{ref}:{BASELINE_PATH}"): + return {"results": []}, False try: blob = subprocess.run( ["git", "show", f"{ref}:{BASELINE_PATH}"], @@ -60,8 +84,11 @@ def _read_baseline_at(ref: str) -> tuple[dict, bool]: stderr=subprocess.PIPE, text=True, ).stdout - except subprocess.CalledProcessError: - return {"results": []}, False + except subprocess.CalledProcessError as exc: + raise SystemExit( + f"Could not read baseline at {ref!r}: {exc.stderr.strip()}. " + f"Refusing to fail-open on a security gate." + ) try: return json.loads(blob), True except json.JSONDecodeError: diff --git a/.github/scripts/check_secrets_baseline.py b/.github/scripts/check_secrets_baseline.py index 2865bbde86..d16c0cfd47 100644 --- a/.github/scripts/check_secrets_baseline.py +++ b/.github/scripts/check_secrets_baseline.py @@ -56,10 +56,35 @@ def log_safe(self) -> str: ) +def _git_ok(*args: str) -> bool: + """True if the git command exits 0 (output discarded).""" + return ( + subprocess.run( + ["git", *args], + cwd=REPO_ROOT, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ).returncode + == 0 + ) + + def _read_baseline_at(ref: str) -> tuple[dict, bool]: - """Return (baseline_json, file_existed_at_ref). Base side only.""" + """Return (baseline_json, file_existed_at_ref). Base side only. + + Only a missing *path* at a resolvable ref counts as "did not exist"; + an unresolvable ref or a failing ``git show`` aborts instead, so a + transient git failure cannot silently disable the gate. + """ if not ref: return {"results": {}}, False + if not _git_ok("rev-parse", "--verify", "--quiet", f"{ref}^{{commit}}"): + raise SystemExit( + f"Base ref {ref!r} cannot be resolved (unfetched or invalid). " + f"Refusing to fail-open on a security gate." + ) + if not _git_ok("cat-file", "-e", f"{ref}:{BASELINE_PATH}"): + return {"results": {}}, False try: blob = subprocess.run( ["git", "show", f"{ref}:{BASELINE_PATH}"], @@ -69,8 +94,11 @@ def _read_baseline_at(ref: str) -> tuple[dict, bool]: stderr=subprocess.PIPE, text=True, ).stdout - except subprocess.CalledProcessError: - return {"results": {}}, False + except subprocess.CalledProcessError as exc: + raise SystemExit( + f"Could not read baseline at {ref!r}: {exc.stderr.strip()}. " + f"Refusing to fail-open on a security gate." + ) try: return json.loads(blob), True except json.JSONDecodeError: diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml index a48f40c8e9..abdebf77ac 100644 --- a/.github/workflows/security.yml +++ b/.github/workflows/security.yml @@ -7,6 +7,9 @@ on: push: branches: ["main"] pull_request: + # labeled/unlabeled so the baseline-growth gates re-evaluate when the + # acknowledgement label is added or removed, without requiring a push. + types: [opened, synchronize, reopened, labeled, unlabeled] schedule: - cron: "17 4 * * 1" workflow_dispatch: @@ -27,7 +30,7 @@ jobs: fetch-depth: 2 - name: Install uv - uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0 + uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # v8.2.0 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6 @@ -73,7 +76,7 @@ jobs: fetch-depth: 0 - name: Install uv - uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0 + uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # v8.2.0 - name: Set up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6 @@ -154,7 +157,7 @@ jobs: fetch-depth: 0 - name: Install uv - uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0 + uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # v8.2.0 - name: Set up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6 @@ -168,6 +171,7 @@ jobs: # rewriting the baseline file (so there's no spurious git diff). - name: Run detect-secrets run: | + set -euo pipefail git ls-files -z \ -- ':!:.secrets.baseline' \ ':!:uv.lock' \ diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index 6286d8c17f..0e5f594b1a 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -689,11 +689,8 @@ def preset_add( elif from_url: # Validate URL scheme before downloading - from urllib.parse import urlparse as _urlparse - _parsed = _urlparse(from_url) - _is_localhost = _parsed.hostname in ("localhost", "127.0.0.1", "::1") - if _parsed.scheme != "https" and not (_parsed.scheme == "http" and _is_localhost): - console.print(f"[red]Error:[/red] URL must use HTTPS (got {_parsed.scheme}://). HTTP is only allowed for localhost.") + if not is_https_or_localhost_http(from_url): + console.print("[red]Error:[/red] URL must use HTTPS. HTTP is only allowed for localhost.") raise typer.Exit(1) console.print(f"Installing preset from [cyan]{from_url}[/cyan]...") @@ -703,11 +700,15 @@ def preset_add( with tempfile.TemporaryDirectory() as tmpdir: zip_path = Path(tmpdir) / "preset.zip" try: + from functools import partial + from specify_cli.authentication.http import open_url as _open_url from specify_cli._github_http import resolve_github_release_asset_api_url _preset_extra_headers = None - _resolved_from_url = resolve_github_release_asset_api_url(from_url, _open_url) + _resolved_from_url = resolve_github_release_asset_api_url( + from_url, partial(_open_url, strict_redirects=True) + ) if _resolved_from_url: from_url = _resolved_from_url _preset_extra_headers = {"Accept": "application/octet-stream"} @@ -1601,13 +1602,9 @@ def extension_add( # Guard with ``not dev`` so that --dev + --from does not show a # confusing confirmation for a URL that will be ignored. if from_url and not dev: - from urllib.parse import urlparse from rich.markup import escape as _escape_markup - parsed = urlparse(from_url) - is_localhost = parsed.hostname in ("localhost", "127.0.0.1", "::1") - - if parsed.scheme != "https" and not (parsed.scheme == "http" and is_localhost): + if not is_https_or_localhost_http(from_url): console.print("[red]Error:[/red] URL must use HTTPS for security.") console.print("HTTP is only allowed for localhost URLs.") raise typer.Exit(1) @@ -3083,6 +3080,8 @@ def _validate_and_install_local(yaml_path: Path, source_label: str) -> None: # Try as URL (http/https) if source.startswith("http://") or source.startswith("https://"): + from functools import partial + from specify_cli.authentication.http import open_url as _open_url if not is_https_or_localhost_http(source): @@ -3092,7 +3091,9 @@ def _validate_and_install_local(yaml_path: Path, source_label: str) -> None: from specify_cli._github_http import resolve_github_release_asset_api_url as _resolve_gh_asset _wf_url_extra_headers = None - _resolved_wf_url = _resolve_gh_asset(source, _open_url, timeout=30) + _resolved_wf_url = _resolve_gh_asset( + source, partial(_open_url, strict_redirects=True), timeout=30 + ) if _resolved_wf_url: source = _resolved_wf_url _wf_url_extra_headers = {"Accept": "application/octet-stream"} @@ -3177,11 +3178,15 @@ def _validate_and_install_local(yaml_path: Path, source_label: str) -> None: workflow_file = workflow_dir / "workflow.yml" try: + from functools import partial + from specify_cli.authentication.http import open_url as _open_url from specify_cli._github_http import resolve_github_release_asset_api_url as _resolve_gh_asset _wf_cat_extra_headers = None - _resolved_workflow_url = _resolve_gh_asset(workflow_url, _open_url, timeout=30) + _resolved_workflow_url = _resolve_gh_asset( + workflow_url, partial(_open_url, strict_redirects=True), timeout=30 + ) if _resolved_workflow_url: workflow_url = _resolved_workflow_url _wf_cat_extra_headers = {"Accept": "application/octet-stream"} diff --git a/src/specify_cli/_download_security.py b/src/specify_cli/_download_security.py index 0e5ffbdd3d..75b8da91df 100644 --- a/src/specify_cli/_download_security.py +++ b/src/specify_cli/_download_security.py @@ -38,8 +38,9 @@ def is_https_or_localhost_http(url: str) -> bool: """Return True if *url* is HTTPS, or HTTP limited to loopback hosts. - Shared redirect-safety predicate used by the GitHub and auth HTTP redirect - handlers so the rule (and any future tightening of it) lives in one place. + Shared scheme-safety predicate used by the auth HTTP redirect handler and + by the direct URL validations in the CLI download flows, so the rule (and + any future tightening of it) lives in one place. The loopback allowance is a deliberate *exact-string* match on ``localhost`` / ``127.0.0.1`` / ``::1``, not an IP-range check: other @@ -304,6 +305,10 @@ def safe_extract_zip( exc, ) written = 0 + # Raised outside the try below: if error_type subclasses OSError or + # RuntimeError, raising inside would re-wrap the limit error as + # "Failed to extract" and lose the size-bound message. + limit_error: str | None = None try: with zf.open(member, "r") as source, member_path.open("wb") as dest: while True: @@ -312,18 +317,18 @@ def safe_extract_zip( break written += len(chunk) if written > max_member_bytes: - _raise( - error_type, + limit_error = ( f"ZIP member {member.filename} exceeds maximum size " - f"of {max_member_bytes} bytes", + f"of {max_member_bytes} bytes" ) + break total_written += len(chunk) if total_written > max_total_bytes: - _raise( - error_type, + limit_error = ( f"ZIP archive exceeds maximum uncompressed size " - f"of {max_total_bytes} bytes", + f"of {max_total_bytes} bytes" ) + break dest.write(chunk) except (OSError, zipfile.BadZipFile, RuntimeError) as exc: _raise_from( @@ -331,3 +336,5 @@ def safe_extract_zip( f"Failed to extract ZIP member {member.filename}: {exc}", exc, ) + if limit_error is not None: + _raise(error_type, limit_error) diff --git a/src/specify_cli/_version.py b/src/specify_cli/_version.py index d2adde1265..2a4f5e2fa7 100644 --- a/src/specify_cli/_version.py +++ b/src/specify_cli/_version.py @@ -119,6 +119,7 @@ def _fetch_latest_release_tag() -> tuple[str | None, str | None]: GITHUB_API_LATEST, timeout=5, extra_headers={"Accept": "application/vnd.github+json"}, + strict_redirects=True, ) as resp: payload = json.loads( read_response_limited( diff --git a/tests/http_helpers.py b/tests/http_helpers.py index 5c1026d385..5effd27e42 100644 --- a/tests/http_helpers.py +++ b/tests/http_helpers.py @@ -2,8 +2,11 @@ import io import json +import urllib.request from unittest.mock import MagicMock +import pytest + def mock_urlopen_response(payload: dict) -> MagicMock: """Build a urlopen context-manager mock whose read returns JSON.""" @@ -14,3 +17,25 @@ def mock_urlopen_response(payload: dict) -> MagicMock: cm.__enter__.return_value = resp cm.__exit__.return_value = False return cm + + +@pytest.fixture(autouse=True) +def route_opener_open_through_urlopen(monkeypatch): + """Route OpenerDirector.open through urllib.request.urlopen. + + ``open_url(..., strict_redirects=True)`` fetches via + ``build_opener(...).open()``, which bypasses ``urllib.request.urlopen`` + — and with it the urlopen patches these test modules are built on. + Delegating ``open()`` to urlopen at call time keeps those patches + effective; the redirect handler's own behavior is covered by + ``TestRedirectStripping`` in test_authentication.py. + + Import this fixture into a test module to activate it there. + """ + monkeypatch.setattr( + urllib.request.OpenerDirector, + "open", + lambda self, req, data=None, timeout=None: urllib.request.urlopen( + req, timeout=timeout + ), + ) diff --git a/tests/integrations/test_integration_catalog.py b/tests/integrations/test_integration_catalog.py index 06161c2e40..a2c0fc1d7c 100644 --- a/tests/integrations/test_integration_catalog.py +++ b/tests/integrations/test_integration_catalog.py @@ -1,5 +1,6 @@ """Tests for the integration catalog system (catalog.py).""" +import io import json import os @@ -323,8 +324,11 @@ def test_fetch_single_catalog_uses_bounded_read(self, tmp_path, monkeypatch): ) class FakeResponse: - def read(self, _size=-1): - return b"{}" + def __init__(self): + self._stream = io.BytesIO(b"{}") + + def read(self, size=-1): + return self._stream.read(size) def geturl(self): return entry.url diff --git a/tests/self_upgrade_helpers.py b/tests/self_upgrade_helpers.py index c363f57b13..fc0f339f92 100644 --- a/tests/self_upgrade_helpers.py +++ b/tests/self_upgrade_helpers.py @@ -18,7 +18,7 @@ _verify_upgrade, ) from tests.conftest import strip_ansi -from tests.http_helpers import mock_urlopen_response +from tests.http_helpers import mock_urlopen_response, route_opener_open_through_urlopen __all__ = ( "SENTINEL_GH_TOKEN", @@ -31,6 +31,7 @@ "_verify_upgrade", "mock_urlopen_response", "requires_posix", + "route_opener_open_through_urlopen", "runner", "strip_ansi", ) diff --git a/tests/test_authentication.py b/tests/test_authentication.py index 5367c15880..15c29633e9 100644 --- a/tests/test_authentication.py +++ b/tests/test_authentication.py @@ -822,6 +822,18 @@ def test_multi_hop_redirect_within_hosts_preserves_auth(self): auth3 = req3.get_header("Authorization") or req3.unredirected_hdrs.get("Authorization") assert auth3 == "Bearer tok" + def test_redirect_rejects_https_downgrade(self): + """HTTPS downloads must not follow redirects to non-local HTTP URLs.""" + from specify_cli.authentication.http import _StripAuthOnRedirect + from urllib.request import Request + import io + import urllib.error + handler = _StripAuthOnRedirect(("example.com",)) + req = Request("https://example.com/archive.zip") + with pytest.raises(urllib.error.URLError, match="unsafe redirect"): + handler.redirect_request(req, io.BytesIO(b""), 302, "Found", {}, + "http://evil.example.com/archive.zip") + # --------------------------------------------------------------------------- # _fetch_latest_release_tag delegation @@ -861,19 +873,25 @@ def test_gh_token_forwarded_when_configured(self, monkeypatch): assert captured["request"].get_header("Authorization") == "Bearer forwarded-sentinel" def test_no_config_means_no_auth(self, monkeypatch): - from unittest.mock import patch + from unittest.mock import MagicMock, patch from specify_cli._version import _fetch_latest_release_tag self._set_config(monkeypatch, []) captured, side_effect = self._capture_request() - with patch("specify_cli.authentication.http.urllib.request.urlopen", side_effect=side_effect): + # The release fetch uses strict_redirects=True, so the unauthenticated + # path goes through build_opener().open(), not urlopen. + mock_opener = MagicMock() + mock_opener.open.side_effect = side_effect + with patch("specify_cli.authentication.http.urllib.request.build_opener", return_value=mock_opener): _fetch_latest_release_tag() assert captured["request"].get_header("Authorization") is None def test_accept_header_present(self, monkeypatch): - from unittest.mock import patch + from unittest.mock import MagicMock, patch from specify_cli._version import _fetch_latest_release_tag self._set_config(monkeypatch, []) captured, side_effect = self._capture_request() - with patch("specify_cli.authentication.http.urllib.request.urlopen", side_effect=side_effect): + mock_opener = MagicMock() + mock_opener.open.side_effect = side_effect + with patch("specify_cli.authentication.http.urllib.request.build_opener", return_value=mock_opener): _fetch_latest_release_tag() assert captured["request"].get_header("Accept") == "application/vnd.github+json" diff --git a/tests/test_baseline_gates.py b/tests/test_baseline_gates.py index 42714b56e0..71f844aa16 100644 --- a/tests/test_baseline_gates.py +++ b/tests/test_baseline_gates.py @@ -320,6 +320,16 @@ def gate(self, tmp_path) -> GateHandle: _install_script(repo, BANDIT_SCRIPT) return GateHandle(config=BANDIT_GATE, repo=repo) + def test_unresolvable_base_ref_fails_closed(self, gate: GateHandle): + # A base ref that cannot be resolved (unfetched, typo) must block + # the gate, not be treated as "baseline did not exist yet". + gate.commit([("a.py", 10)], "base") + + result = gate.run(base="0123456789abcdef0123456789abcdef01234567") + + assert result.returncode == 1 + assert "Refusing to fail-open" in result.stderr + def test_no_base_ref_is_skipped(self, gate: GateHandle): gate.commit([], "init") # need at least one commit so HEAD resolves result = gate.run(base="") diff --git a/tests/test_extensions.py b/tests/test_extensions.py index 9a7017485e..c5ad40971b 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -3091,61 +3091,6 @@ def test_make_request_token_added_for_codeload_github_com(self, temp_dir, monkey req = catalog._make_request("https://codeload.github.com/org/repo/zip/refs/tags/v1.0.0") assert req.get_header("Authorization") == "Bearer ghp_testtoken" - def test_redirect_preserves_auth_for_github_to_codeload(self): - """Auth header is preserved when redirects stay within configured hosts.""" - from specify_cli.authentication.http import _StripAuthOnRedirect - from urllib.request import Request - import io - - handler = _StripAuthOnRedirect(("github.com", "codeload.github.com")) - original_url = "https://github.com/org/repo/archive/refs/tags/v1.zip" - redirect_url = "https://codeload.github.com/org/repo/zip/refs/tags/v1" - req = Request(original_url, headers={"Authorization": "Bearer ghp_test"}) - fp = io.BytesIO(b"") - new_req = handler.redirect_request(req, fp, 302, "Found", {}, redirect_url) - assert new_req is not None - auth = new_req.get_header("Authorization") or new_req.unredirected_hdrs.get("Authorization") - assert auth == "Bearer ghp_test" - - def test_redirect_strips_auth_for_github_to_external(self): - """Auth header is stripped when redirects leave configured hosts.""" - from specify_cli.authentication.http import _StripAuthOnRedirect - from urllib.request import Request - import io - - handler = _StripAuthOnRedirect(("github.com", "codeload.github.com")) - original_url = "https://github.com/org/repo/releases/download/v1/asset.zip" - redirect_url = "https://objects.githubusercontent.com/github-production-release-asset/12345" - req = Request(original_url, headers={"Authorization": "Bearer ghp_test"}) - fp = io.BytesIO(b"") - new_req = handler.redirect_request(req, fp, 302, "Found", {}, redirect_url) - assert new_req is not None - auth_header = new_req.headers.get("Authorization") - auth_unredirected = new_req.unredirected_hdrs.get("Authorization") - assert auth_header is None - assert auth_unredirected is None - - def test_redirect_rejects_https_downgrade(self): - """HTTPS downloads must not follow redirects to non-local HTTP URLs.""" - from specify_cli.authentication.http import _StripAuthOnRedirect - from urllib.request import Request - import io - import urllib.error - - handler = _StripAuthOnRedirect(("example.com",)) - req = Request("https://example.com/archive.zip") - fp = io.BytesIO(b"") - - with pytest.raises(urllib.error.URLError, match="unsafe redirect"): - handler.redirect_request( - req, - fp, - 302, - "Found", - {}, - "http://evil.example.com/archive.zip", - ) - def test_fetch_single_catalog_sends_auth_header(self, temp_dir, monkeypatch): """_fetch_single_catalog passes Authorization header when a provider is configured.""" from unittest.mock import patch, MagicMock @@ -3649,7 +3594,6 @@ def test_fetch_catalog_uses_bounded_read(self, temp_dir): def test_download_extension_sends_auth_header(self, temp_dir, monkeypatch): """download_extension passes Authorization header when a provider is configured.""" from unittest.mock import patch, MagicMock - import io import zipfile monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken") @@ -3714,7 +3658,6 @@ def test_download_extension_accepts_direct_github_rest_asset_url(self, temp_dir, """download_extension can use a GitHub REST release asset URL directly.""" from unittest.mock import patch, MagicMock import zipfile - import io monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken") self._inject_github_config(monkeypatch, token_env="GITHUB_TOKEN") diff --git a/tests/test_security_workflow.py b/tests/test_security_workflow.py index 7105ee7942..b4e052da33 100644 --- a/tests/test_security_workflow.py +++ b/tests/test_security_workflow.py @@ -69,10 +69,6 @@ def _step(job_name: str, step_name: str) -> dict: raise AssertionError(f"Step {step_name!r} not found in job {job_name!r}.") -def _step_run(job_name: str, step_name: str) -> str: - return _step(job_name, step_name)["run"] - - def _find_step_by_run_signature(job_name: str, marker: str) -> dict: """Locate a step in *job_name* whose ``run`` command contains *marker*. @@ -175,7 +171,11 @@ def test_security_workflow_triggers_are_preserved(self): triggers = _workflow_triggers() assert triggers["push"]["branches"] == ["main"] - assert triggers["pull_request"] is None + # labeled/unlabeled so the baseline-growth gates re-evaluate when the + # acknowledgement label is toggled, without requiring a new push. + assert triggers["pull_request"] == { + "types": ["opened", "synchronize", "reopened", "labeled", "unlabeled"] + } assert triggers["workflow_dispatch"] is None assert triggers["schedule"] == [{"cron": "17 4 * * 1"}] @@ -228,14 +228,14 @@ def test_bandit_baseline_tracks_only_accepted_findings(self): baseline = json.loads(BANDIT_BASELINE.read_text(encoding="utf-8")) results = baseline["results"] + # Identify entries by (filename, test_id), not line number: unrelated + # edits shift lines and force a baseline regen, and the growth gate + # (check_bandit_baseline.py) already guards full identities. assert { - (result["filename"], result["line_number"], result["test_id"]) - for result in results + (result["filename"], result["test_id"]) for result in results } == { - ("src/specify_cli/_github_http.py", 104, "B310"), - ("src/specify_cli/authentication/azure_devops.py", 114, "B310"), - ("src/specify_cli/authentication/http.py", 171, "B310"), - ("src/specify_cli/workflows/steps/shell/__init__.py", 35, "B602"), + ("src/specify_cli/authentication/http.py", "B310"), + ("src/specify_cli/workflows/steps/shell/__init__.py", "B602"), } assert {result["issue_severity"] for result in results} == {"MEDIUM", "HIGH"} diff --git a/tests/test_self_upgrade_detection.py b/tests/test_self_upgrade_detection.py index ab575e7435..73b55ebb79 100644 --- a/tests/test_self_upgrade_detection.py +++ b/tests/test_self_upgrade_detection.py @@ -13,6 +13,7 @@ from specify_cli import app from tests.self_upgrade_helpers import ( + route_opener_open_through_urlopen, # noqa: F401 (autouse fixture) _InstallMethod, _assemble_installer_argv, _completed_process, diff --git a/tests/test_self_upgrade_execution.py b/tests/test_self_upgrade_execution.py index 6696b4fc79..5c761014be 100644 --- a/tests/test_self_upgrade_execution.py +++ b/tests/test_self_upgrade_execution.py @@ -7,6 +7,7 @@ from specify_cli import app from tests.self_upgrade_helpers import ( + route_opener_open_through_urlopen, # noqa: F401 (autouse fixture) _completed_process, mock_urlopen_response, requires_posix, diff --git a/tests/test_self_upgrade_verification.py b/tests/test_self_upgrade_verification.py index f1a018f06c..c4e7eecf1b 100644 --- a/tests/test_self_upgrade_verification.py +++ b/tests/test_self_upgrade_verification.py @@ -8,6 +8,7 @@ from specify_cli import app from tests.self_upgrade_helpers import ( + route_opener_open_through_urlopen, # noqa: F401 (autouse fixture) SENTINEL_GH_TOKEN, SENTINEL_GITHUB_TOKEN, _InstallMethod, diff --git a/tests/test_upgrade.py b/tests/test_upgrade.py index f305b27efc..b6f3134286 100644 --- a/tests/test_upgrade.py +++ b/tests/test_upgrade.py @@ -25,7 +25,10 @@ _normalize_tag, ) from tests.conftest import strip_ansi -from tests.http_helpers import mock_urlopen_response +from tests.http_helpers import ( + mock_urlopen_response, + route_opener_open_through_urlopen, # noqa: F401 (autouse fixture) +) runner = CliRunner() diff --git a/tests/test_workflows.py b/tests/test_workflows.py index 0ca8a42cd9..59acb32f66 100644 --- a/tests/test_workflows.py +++ b/tests/test_workflows.py @@ -3476,11 +3476,13 @@ def _fake_urlopen(req, timeout=30): # Bounded read was invoked (not raw resp.read()). error_type must # be the WorkflowCatalogError so an oversized response surfaces # as a workflow-catalog domain error, not a generic ValueError - # that callers might miss. The size cap itself relies on the - # module-level default in _download_security.MAX_DOWNLOAD_BYTES. + # that callers might miss. + from specify_cli._download_security import MAX_JSON_CATALOG_BYTES + assert "kwargs" in recorded, "read_response_limited was not called" assert recorded["kwargs"]["error_type"] is WorkflowCatalogError assert recorded["kwargs"]["label"] == "workflow catalog" + assert recorded["kwargs"]["max_bytes"] == MAX_JSON_CATALOG_BYTES # ===== Integration Test ===== From 66fd3e09300af5d1e254ae20b64cb4a600de6ba7 Mon Sep 17 00:00:00 2001 From: Pascal Date: Thu, 11 Jun 2026 06:44:43 +0200 Subject: [PATCH 29/30] fix: error messages and docstring name the exact loopback hosts is_https_or_localhost_http allows HTTP for localhost, 127.0.0.1 and ::1; the user-facing messages and the open_url docstring only said localhost. --- src/specify_cli/__init__.py | 8 ++++---- src/specify_cli/authentication/http.py | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py index 3bcdc9b297..7cc4c73801 100644 --- a/src/specify_cli/__init__.py +++ b/src/specify_cli/__init__.py @@ -690,7 +690,7 @@ def preset_add( elif from_url: # Validate URL scheme before downloading if not is_https_or_localhost_http(from_url): - console.print("[red]Error:[/red] URL must use HTTPS. HTTP is only allowed for localhost.") + console.print("[red]Error:[/red] URL must use HTTPS. HTTP is only allowed for localhost (127.0.0.1, ::1).") raise typer.Exit(1) console.print(f"Installing preset from [cyan]{from_url}[/cyan]...") @@ -1606,7 +1606,7 @@ def extension_add( if not is_https_or_localhost_http(from_url): console.print("[red]Error:[/red] URL must use HTTPS for security.") - console.print("HTTP is only allowed for localhost URLs.") + console.print("HTTP is only allowed for localhost (127.0.0.1, ::1) URLs.") raise typer.Exit(1) safe_url = _escape_markup(from_url) @@ -3095,7 +3095,7 @@ def _validate_and_install_local(yaml_path: Path, source_label: str) -> None: from specify_cli.authentication.http import open_url as _open_url if not is_https_or_localhost_http(source): - console.print("[red]Error:[/red] Only HTTPS URLs are allowed, except HTTP for localhost.") + console.print("[red]Error:[/red] Only HTTPS URLs are allowed, except HTTP for localhost (127.0.0.1, ::1).") raise typer.Exit(1) from specify_cli._github_http import resolve_github_release_asset_api_url as _resolve_gh_asset @@ -3174,7 +3174,7 @@ def _validate_and_install_local(yaml_path: Path, source_label: str) -> None: if not is_https_or_localhost_http(workflow_url): console.print( f"[red]Error:[/red] Workflow '{source}' has an invalid install URL. " - "Only HTTPS URLs are allowed, except HTTP for localhost." + "Only HTTPS URLs are allowed, except HTTP for localhost (127.0.0.1, ::1)." ) raise typer.Exit(1) diff --git a/src/specify_cli/authentication/http.py b/src/specify_cli/authentication/http.py index 8ee061db76..113273916f 100644 --- a/src/specify_cli/authentication/http.py +++ b/src/specify_cli/authentication/http.py @@ -126,7 +126,8 @@ def open_url( *extra_headers* (e.g. ``Accept``) are merged into every attempt. When *strict_redirects* is true, unauthenticated requests also reject - redirects to non-HTTPS URLs except localhost. + redirects to non-HTTPS URLs, except HTTP to localhost / 127.0.0.1 / ::1 + (the exact hosts allowed by ``is_https_or_localhost_http``). """ entries = find_entries_for_url(url, _load_config()) From 6c8851bdefbed07acd144005f0b36f9aeaa3cc0b Mon Sep 17 00:00:00 2001 From: Pascal Date: Thu, 11 Jun 2026 17:09:13 +0200 Subject: [PATCH 30/30] docs(http): clarify redirect scheme guard is unconditional The non-HTTPS redirect rejection in _StripAuthOnRedirect applies to every authenticated attempt regardless of strict_redirects; the flag only extends the same guard to the unauthenticated fallback. Document both guards on the class and correct the open_url docstring, which previously gated the whole scheme restriction under strict_redirects. --- src/specify_cli/authentication/http.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/specify_cli/authentication/http.py b/src/specify_cli/authentication/http.py index 113273916f..c8b3540fab 100644 --- a/src/specify_cli/authentication/http.py +++ b/src/specify_cli/authentication/http.py @@ -58,7 +58,12 @@ def _hostname_in_hosts(hostname: str, hosts: tuple[str, ...]) -> bool: class _StripAuthOnRedirect(urllib.request.HTTPRedirectHandler): - """Drop ``Authorization`` when a redirect leaves the entry's declared hosts.""" + """Redirect handler that guards every redirect it is installed for. + + 1. Reject redirects to non-HTTPS URLs (loopback HTTP excepted, per + ``is_https_or_localhost_http``) — enforced unconditionally. + 2. Drop ``Authorization`` when a redirect leaves the entry's declared hosts. + """ def __init__(self, hosts: tuple[str, ...]) -> None: super().__init__() @@ -125,9 +130,13 @@ def open_url( 5. Non-auth errors (404, 500, network) raise immediately. *extra_headers* (e.g. ``Accept``) are merged into every attempt. - When *strict_redirects* is true, unauthenticated requests also reject - redirects to non-HTTPS URLs, except HTTP to localhost / 127.0.0.1 / ::1 - (the exact hosts allowed by ``is_https_or_localhost_http``). + + Redirect scheme safety: every authenticated attempt goes through + ``_StripAuthOnRedirect``, which always rejects redirects to non-HTTPS + URLs (except HTTP to localhost / 127.0.0.1 / ::1, the hosts allowed by + ``is_https_or_localhost_http``). *strict_redirects* extends that same + scheme guard to the unauthenticated fallback; without it, the fallback + follows redirects without the scheme check. """ entries = find_entries_for_url(url, _load_config())