diff --git a/packages/cli/src/commands/scan.ts b/packages/cli/src/commands/scan.ts index 493a914..2b83684 100644 --- a/packages/cli/src/commands/scan.ts +++ b/packages/cli/src/commands/scan.ts @@ -355,7 +355,10 @@ async function buildWrapperContext( ctx.spectral = { contractFiles: await findOpenApiFiles(repoPath) }; } if (ids.has(PIP_AUDIT_SPEC.id)) { - ctx.pipAudit = { requirementsPath: "requirements.txt" }; + // The wrapper auto-detects: a real requirements.txt is audited directly; + // otherwise it bridges pyproject.toml through `uv export`. The transient + // export lands in the gitignored .codehub/ meta dir. + ctx.pipAudit = { exportDir: resolveRepoMetaDir(repoPath) }; } return ctx; } diff --git a/packages/scanners/src/catalog.ts b/packages/scanners/src/catalog.ts index a71605c..69ee80b 100644 --- a/packages/scanners/src/catalog.ts +++ b/packages/scanners/src/catalog.ts @@ -54,7 +54,10 @@ export const BANDIT_SPEC: ScannerSpec = { languages: ["python"], iacTypes: [], sarifNative: true, - installCmd: "pip install 'bandit[sarif]==1.9.4'", + // The `[sarif]` extra pulls in `bandit-sarif-formatter`; without it bandit + // argparse-rejects `-f sarif` (exit 2). `uv tool install` keeps the tool on + // an isolated venv so it never shadows / is shadowed by a project env. + installCmd: "uv tool install 'bandit[sarif]==1.9.4'", version: "1.9.4", offlineCapable: true, priority: 1, @@ -84,7 +87,7 @@ export const PIP_AUDIT_SPEC: ScannerSpec = { languages: ["python"], iacTypes: [], sarifNative: false, - installCmd: "pip install pip-audit==2.10.0", + installCmd: "uv tool install pip-audit==2.10.0", version: "2.10.0", offlineCapable: false, priority: 1, diff --git a/packages/scanners/src/wrappers/p2-wrappers.test.ts b/packages/scanners/src/wrappers/p2-wrappers.test.ts index f55a9e4..88d4543 100644 --- a/packages/scanners/src/wrappers/p2-wrappers.test.ts +++ b/packages/scanners/src/wrappers/p2-wrappers.test.ts @@ -24,12 +24,17 @@ function makeFakeDeps( cmd: string, args: readonly string[], ) => { stdout: string; stderr?: string; exitCode?: number }, - opts: { readonly missing?: readonly string[] } = {}, + opts: { + readonly missing?: readonly string[]; + /** Absolute paths the fake `fileExists` should report as present. */ + readonly existing?: readonly string[]; + } = {}, ): { deps: WrapperDeps; calls: Array<{ cmd: string; args: readonly string[] }>; } { const missing = new Set(opts.missing ?? []); + const existing = new Set(opts.existing ?? []); const calls: Array<{ cmd: string; args: readonly string[] }> = []; const deps: WrapperDeps = { which: async (binary: string) => ({ found: !missing.has(binary) }), @@ -42,6 +47,7 @@ function makeFakeDeps( exitCode: out.exitCode ?? 0, }; }, + fileExists: async (path: string) => existing.has(path), }; return { deps, calls }; } @@ -250,10 +256,9 @@ test("pip-audit wrapper runs with --format json and converts to SARIF", async () }, ], }; - const { deps, calls } = makeFakeDeps(() => ({ - stdout: JSON.stringify(pipJson), - exitCode: 1, // pip-audit exits 1 on findings - })); + const { deps, calls } = makeFakeDeps(() => ({ stdout: JSON.stringify(pipJson), exitCode: 1 }), { + existing: [`${ctx.projectPath}/requirements.txt`], + }); const out = await createPipAuditWrapper(deps).run(ctx); const args = calls[0]?.args ?? []; assert.equal(calls[0]?.cmd, "pip-audit"); @@ -273,16 +278,92 @@ test("pip-audit wrapper runs with --format json and converts to SARIF", async () assert.deepEqual(ocProps?.["fixVersions"], ["2.20.0"]); }); -test("pip-audit wrapper honours custom requirementsPath", async () => { - const { deps, calls } = makeFakeDeps(() => ({ - stdout: JSON.stringify({ dependencies: [] }), - })); +test("pip-audit wrapper honours custom requirementsPath when it exists", async () => { + const { deps, calls } = makeFakeDeps(() => ({ stdout: JSON.stringify({ dependencies: [] }) }), { + existing: [`${ctx.projectPath}/requirements-dev.txt`], + }); await createPipAuditWrapper(deps, { - requirementsPath: "pyproject.toml", + requirementsPath: "requirements-dev.txt", }).run(ctx); const args = calls[0]?.args ?? []; const idx = args.indexOf("-r"); - assert.equal(args[idx + 1], "pyproject.toml"); + assert.equal(args[idx + 1], "requirements-dev.txt"); +}); + +// pyproject.toml (no requirements.txt) → uv export bridge, then audit the +// export but label findings against pyproject.toml. +test("pip-audit wrapper bridges pyproject.toml via uv export", async () => { + const pipJson = { + dependencies: [{ name: "jinja2", version: "3.1.0", vulns: [{ id: "GHSA-h5c8-rqwp-cp95" }] }], + }; + const { deps, calls } = makeFakeDeps( + (cmd) => { + // uv export writes the file (exit 0, no stdout); pip-audit returns JSON. + if (cmd === "uv") return { stdout: "", exitCode: 0 }; + return { stdout: JSON.stringify(pipJson), exitCode: 1 }; + }, + { existing: [`${ctx.projectPath}/pyproject.toml`] }, + ); + const out = await createPipAuditWrapper(deps, { exportDir: "/tmp/fake-repo/.codehub" }).run(ctx); + + // First call exports via uv; second audits the exported file. + assert.equal(calls[0]?.cmd, "uv"); + assert.ok(calls[0]?.args.includes("export")); + assert.ok(calls[0]?.args.includes("--format")); + assert.ok(calls[0]?.args.includes("requirements-txt")); + const exportIdx = calls[0]?.args.indexOf("-o") ?? -1; + assert.equal( + calls[0]?.args[exportIdx + 1], + "/tmp/fake-repo/.codehub/.pip-audit-requirements.txt", + ); + + assert.equal(calls[1]?.cmd, "pip-audit"); + const auditIdx = calls[1]?.args.indexOf("-r") ?? -1; + assert.equal(calls[1]?.args[auditIdx + 1], "/tmp/fake-repo/.codehub/.pip-audit-requirements.txt"); + + // Finding is labelled against pyproject.toml, NOT the transient export. + const result = out.sarif.runs[0]?.results?.[0]; + assert.equal(result?.ruleId, "GHSA-h5c8-rqwp-cp95"); + assert.equal(result?.locations?.[0]?.physicalLocation?.artifactLocation?.uri, "pyproject.toml"); +}); + +test("pip-audit wrapper warns when pyproject.toml present but uv missing", async () => { + const warnings: string[] = []; + const { deps, calls } = makeFakeDeps(() => ({ stdout: "" }), { + missing: ["uv"], + existing: [`${ctx.projectPath}/pyproject.toml`], + }); + const out = await createPipAuditWrapper(deps).run({ ...ctx, onWarn: (m) => warnings.push(m) }); + // pip-audit is never invoked — only the which("uv") probe runs, no runBinary. + assert.equal(calls.length, 0); + assert.equal(out.sarif.runs[0]?.results?.length, 0); + assert.ok(warnings.join(" | ").includes("uv"), `expected a uv advisory; got: ${warnings}`); +}); + +test("pip-audit wrapper warns when uv export fails", async () => { + const warnings: string[] = []; + const { deps } = makeFakeDeps( + (cmd) => { + if (cmd === "uv") return { stdout: "", stderr: "no lockfile", exitCode: 2 }; + return { stdout: JSON.stringify({ dependencies: [] }) }; + }, + { existing: [`${ctx.projectPath}/pyproject.toml`] }, + ); + const out = await createPipAuditWrapper(deps).run({ ...ctx, onWarn: (m) => warnings.push(m) }); + assert.equal(out.sarif.runs[0]?.results?.length, 0); + assert.ok(warnings.join(" | ").includes("uv export"), `got: ${warnings}`); +}); + +test("pip-audit wrapper warns when neither requirements.txt nor pyproject.toml exists", async () => { + const warnings: string[] = []; + const { deps, calls } = makeFakeDeps(() => ({ stdout: "" }), { existing: [] }); + const out = await createPipAuditWrapper(deps).run({ ...ctx, onWarn: (m) => warnings.push(m) }); + assert.equal(calls.length, 0); + assert.equal(out.sarif.runs[0]?.results?.length, 0); + assert.ok( + warnings.join(" | ").includes("no requirements.txt or pyproject.toml"), + `got: ${warnings}`, + ); }); test("pip-audit wrapper emits empty SARIF when binary missing", async () => { @@ -293,7 +374,9 @@ test("pip-audit wrapper emits empty SARIF when binary missing", async () => { }); test("pip-audit wrapper emits empty SARIF when stdout is garbage", async () => { - const { deps } = makeFakeDeps(() => ({ stdout: "not json at all", exitCode: 2 })); + const { deps } = makeFakeDeps(() => ({ stdout: "not json at all", exitCode: 2 }), { + existing: [`${ctx.projectPath}/requirements.txt`], + }); const out = await createPipAuditWrapper(deps).run(ctx); assert.equal(out.sarif.runs[0]?.results?.length, 0); }); diff --git a/packages/scanners/src/wrappers/pip-audit.ts b/packages/scanners/src/wrappers/pip-audit.ts index c7d28d9..a9ec824 100644 --- a/packages/scanners/src/wrappers/pip-audit.ts +++ b/packages/scanners/src/wrappers/pip-audit.ts @@ -1,23 +1,35 @@ /** - * pip-audit wrapper — Python environment / requirements vulnerability - * audit. + * pip-audit wrapper — Python dependency vulnerability audit. * - * Invocation (requirements.txt mode, the ergonomic default for project - * scans): + * Resolution order for what to audit (first hit wins): * - * pip-audit -r requirements.txt --format json --disable-pip - * --cache-dir /.codehub/pip-audit-cache --progress-spinner off + * 1. An explicit / default `requirements.txt` that EXISTS on disk → + * audit it directly: + * pip-audit -r requirements.txt --format json --disable-pip … + * `--disable-pip` keeps the audit fully offline; pip-audit reads the + * pinned file and never resolves an environment. * - * When `requirements.txt` is missing, pip-audit falls back to the - * current environment and (empirically) either complains about no - * requirements file or tries to probe the system env; either way it - * returns JSON on stdout. We tolerate non-zero exit (pip-audit exits - * 1 on findings) per the shared `invokeScanner` contract. + * 2. No requirements file, but a `pyproject.toml` exists → bridge via uv. + * pip-audit cannot audit a bare `pyproject.toml` (it would try to build + * a throwaway venv and resolve deps, which fails on locked/offline + * projects with `invalid requirements input`). Instead we export the + * resolved, HASHED dependency set with uv: + * uv export --quiet --format requirements-txt --no-emit-project \ + * -o /.pip-audit-requirements.txt + * then feed that to the same `-r … --disable-pip` path. uv emits hashes + * by default, which `--disable-pip` requires. SARIF findings are still + * labelled against `pyproject.toml` (the file the user recognises), not + * the transient export, via the converter's `requirementsPath` option. * - * Output is JSON, NOT SARIF — we post-process stdout through - * `pipAuditJsonToSarif` before returning. + * 3. Neither file present → emit empty SARIF with an advisory; there is + * nothing to audit. + * + * We tolerate non-zero exit (pip-audit exits 1 on findings) per the shared + * `invokeScanner` contract. Output is JSON, NOT SARIF — we post-process + * stdout through `pipAuditJsonToSarif` before returning. */ +import { join } from "node:path"; import { PIP_AUDIT_SPEC } from "../catalog.js"; import { type PipAuditConvertOptions, @@ -30,17 +42,27 @@ import { DEFAULT_DEPS, type WrapperDeps } from "./shared.js"; export interface PipAuditWrapperOptions { /** - * Explicit `-r ` to pass to pip-audit. Defaults to - * `requirements.txt` (pip-audit will gracefully no-op if the file is - * missing). + * Explicit `-r ` to pass to pip-audit, relative to the project root. + * Defaults to `requirements.txt`. When the file is absent the wrapper falls + * back to a `pyproject.toml` → uv-export bridge (see module docs). */ readonly requirementsPath?: string; + /** + * Directory to write the transient uv-exported requirements file into. + * Defaults to the project root. Callers should point this at the repo's + * `.codehub/` meta dir so the export lands in a gitignored location. + */ + readonly exportDir?: string; } +/** Filename for the uv-exported requirements bridge file (pyproject case). */ +const EXPORT_FILENAME = ".pip-audit-requirements.txt"; + export function createPipAuditWrapper( deps: WrapperDeps = DEFAULT_DEPS, opts: PipAuditWrapperOptions = {}, ): ScannerWrapper { + const fileExists = deps.fileExists ?? DEFAULT_DEPS.fileExists; return { spec: PIP_AUDIT_SPEC, run: async (ctx: ScannerRunContext): Promise => { @@ -56,42 +78,127 @@ export function createPipAuditWrapper( durationMs: performance.now() - started, }; } + const requirementsPath = opts.requirementsPath ?? "requirements.txt"; - const args: readonly string[] = [ - "-r", - requirementsPath, - "--format", - "json", - "--disable-pip", - "--progress-spinner", - "off", - ]; - const result = await deps.runBinary("pip-audit", args, { - timeoutMs: ctx.timeoutMs, - cwd: ctx.projectPath, - }); - const json = tryParseJson(result.stdout); - if (json === undefined) { - ctx.onWarn?.( - `${PIP_AUDIT_SPEC.id}: stdout was not valid JSON (stderr: ${truncate(result.stderr, 200)}); emitting empty SARIF.`, - ); - return { - spec: PIP_AUDIT_SPEC, - sarif: emptySarifFor(PIP_AUDIT_SPEC), - durationMs: performance.now() - started, - }; + const reqAbs = join(ctx.projectPath, requirementsPath); + + // 1. A real requirements file → audit it directly (the original path). + if (fileExists !== undefined && (await fileExists(reqAbs))) { + return await auditRequirementsFile(deps, ctx, started, requirementsPath, requirementsPath); + } + + // 2. No requirements file, but pyproject.toml → bridge through uv export. + const pyprojectAbs = join(ctx.projectPath, "pyproject.toml"); + if (fileExists !== undefined && (await fileExists(pyprojectAbs))) { + const bridged = await auditViaPyprojectBridge(deps, ctx, started, opts); + if (bridged !== undefined) return bridged; + // bridge failed — fall through to the no-input advisory below. } - const convertOpts: PipAuditConvertOptions = { requirementsPath }; - const sarif = pipAuditJsonToSarif(json, convertOpts); + + // 3. Nothing auditable. + ctx.onWarn?.( + `${PIP_AUDIT_SPEC.id}: no requirements.txt or pyproject.toml found in ${ctx.projectPath}; emitting empty SARIF.`, + ); return { spec: PIP_AUDIT_SPEC, - sarif, + sarif: emptySarifFor(PIP_AUDIT_SPEC), durationMs: performance.now() - started, }; }, }; } +/** + * Run pip-audit against a requirements-format file and convert to SARIF. + * `auditPath` is what pip-audit reads (`-r`); `sarifUri` is the file shown in + * SARIF locations (so the pyproject bridge can label findings against + * `pyproject.toml` while auditing a transient export). + */ +async function auditRequirementsFile( + deps: WrapperDeps, + ctx: ScannerRunContext, + started: number, + auditPath: string, + sarifUri: string, +): Promise { + const args: readonly string[] = [ + "-r", + auditPath, + "--format", + "json", + "--disable-pip", + "--progress-spinner", + "off", + ]; + const result = await deps.runBinary("pip-audit", args, { + timeoutMs: ctx.timeoutMs, + cwd: ctx.projectPath, + }); + const json = tryParseJson(result.stdout); + if (json === undefined) { + ctx.onWarn?.( + `${PIP_AUDIT_SPEC.id}: stdout was not valid JSON (stderr: ${truncate(result.stderr, 200)}); emitting empty SARIF.`, + ); + return { + spec: PIP_AUDIT_SPEC, + sarif: emptySarifFor(PIP_AUDIT_SPEC), + durationMs: performance.now() - started, + }; + } + const convertOpts: PipAuditConvertOptions = { requirementsPath: sarifUri }; + const sarif = pipAuditJsonToSarif(json, convertOpts); + return { + spec: PIP_AUDIT_SPEC, + sarif, + durationMs: performance.now() - started, + }; +} + +/** + * Export `pyproject.toml`'s resolved deps to a hashed requirements file via + * `uv export`, then audit it. Returns `undefined` (so the caller can emit its + * own advisory) when uv is missing or the export fails. + */ +async function auditViaPyprojectBridge( + deps: WrapperDeps, + ctx: ScannerRunContext, + started: number, + opts: PipAuditWrapperOptions, +): Promise { + const uvProbe = await deps.which("uv"); + if (!uvProbe.found) { + ctx.onWarn?.( + `${PIP_AUDIT_SPEC.id}: found pyproject.toml but 'uv' is not on PATH to export a lockfile; ` + + `install uv (https://docs.astral.sh/uv/) or add a requirements.txt. Emitting empty SARIF.`, + ); + return undefined; + } + const exportDir = opts.exportDir ?? ctx.projectPath; + const exportPath = join(exportDir, EXPORT_FILENAME); + const exportArgs: readonly string[] = [ + "export", + "--quiet", + "--format", + "requirements-txt", + "--no-emit-project", + "-o", + exportPath, + ]; + const exportResult = await deps.runBinary("uv", exportArgs, { + timeoutMs: ctx.timeoutMs, + cwd: ctx.projectPath, + }); + if (exportResult.exitCode !== 0) { + ctx.onWarn?.( + `${PIP_AUDIT_SPEC.id}: 'uv export' failed (exit ${exportResult.exitCode}: ${truncate(exportResult.stderr, 200)}); emitting empty SARIF.`, + ); + return undefined; + } + // Audit the export, but label findings against pyproject.toml — the file + // the user actually maintains. + return await auditRequirementsFile(deps, ctx, started, exportPath, "pyproject.toml"); +} + function truncate(s: string, max: number): string { if (s.length <= max) return s.trim(); return `${s.slice(0, max).trim()}…`; diff --git a/packages/scanners/src/wrappers/shared.ts b/packages/scanners/src/wrappers/shared.ts index bf61aa7..35dd093 100644 --- a/packages/scanners/src/wrappers/shared.ts +++ b/packages/scanners/src/wrappers/shared.ts @@ -5,6 +5,7 @@ * tests can mock `runBinary` without pulling in every other wrapper. */ +import { access } from "node:fs/promises"; import { type SarifLog, SarifLogSchema } from "@opencodehub/sarif"; import { type RunBinaryResult, runBinary, tryParseJson, which } from "../exec.js"; import { @@ -26,11 +27,26 @@ export interface WrapperDeps { args: readonly string[], opts: { readonly timeoutMs: number; readonly cwd?: string; readonly env?: NodeJS.ProcessEnv }, ) => Promise; + /** + * Optional filesystem existence probe. Wrappers that branch on which + * manifest a project ships (e.g. pip-audit: requirements.txt vs + * pyproject.toml) use this. Optional so test fakes that only stub + * `which`/`runBinary` keep compiling; defaults to a real `access` probe. + */ + readonly fileExists?: (path: string) => Promise; } export const DEFAULT_DEPS: WrapperDeps = { which, runBinary: (cmd, args, opts) => runBinary(cmd, args, opts), + fileExists: async (path: string): Promise => { + try { + await access(path); + return true; + } catch { + return false; + } + }, }; /**