Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
415 changes: 415 additions & 0 deletions .claude/workflows/world-class-code-exploration.mjs

Large diffs are not rendered by default.

18 changes: 18 additions & 0 deletions packages/cli/src/commands/scan.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import { readFileSync } from "node:fs";
import { mkdir, readFile, writeFile } from "node:fs/promises";
import { join, resolve } from "node:path";
import { pipeline } from "@opencodehub/ingestion";
import {
applyBaselineState,
applySuppressions,
Expand All @@ -45,10 +46,13 @@ import {
P1_SPECS,
PIP_AUDIT_SPEC,
type ProjectProfileGate,
RADON_SPEC,
runScanners,
type ScannerSpec,
type ScannerStatus,
SPECTRAL_SPEC,
TY_SPEC,
VULTURE_SPEC,
} from "@opencodehub/scanners";
import { resolveRepoMetaDir } from "@opencodehub/storage";
import { readRegistry } from "../registry.js";
Expand Down Expand Up @@ -360,6 +364,20 @@ async function buildWrapperContext(
// export lands in the gitignored .codehub/ meta dir.
ctx.pipAudit = { exportDir: resolveRepoMetaDir(repoPath) };
}
// Python tree-walking scanners (vulture/radon/ty) descend into `.venv` and
// report library noise unless told to skip the same dirs the indexer
// ignores. Reuse the indexer's single source of truth so the exclude set
// can't drift. Each wrapper anchors / formats these for its own CLI.
const ignoreDirs = pipeline.HARDCODED_IGNORES;
if (ids.has(VULTURE_SPEC.id)) {
ctx.vulture = { excludeGlobs: ignoreDirs };
}
if (ids.has(RADON_SPEC.id)) {
ctx.radon = { ignoreDirs };
}
if (ids.has(TY_SPEC.id)) {
ctx.ty = { excludeGlobs: ignoreDirs };
}
return ctx;
}

Expand Down
18 changes: 12 additions & 6 deletions packages/scanners/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -129,15 +129,15 @@ import { createHadolintWrapper, type HadolintWrapperOptions } from "./wrappers/h
import { createNpmAuditWrapper } from "./wrappers/npm-audit.js";
import { createOsvScannerWrapper } from "./wrappers/osv-scanner.js";
import { createPipAuditWrapper, type PipAuditWrapperOptions } from "./wrappers/pip-audit.js";
import { createRadonWrapper } from "./wrappers/radon.js";
import { createRadonWrapper, type RadonWrapperOptions } from "./wrappers/radon.js";
import { createRuffWrapper } from "./wrappers/ruff.js";
import { createSemgrepWrapper } from "./wrappers/semgrep.js";
import { DEFAULT_DEPS, type WrapperDeps } from "./wrappers/shared.js";
import { createSpectralWrapper, type SpectralWrapperOptions } from "./wrappers/spectral.js";
import { createTflintWrapper } from "./wrappers/tflint.js";
import { createTrivyWrapper } from "./wrappers/trivy.js";
import { createTyWrapper } from "./wrappers/ty.js";
import { createVultureWrapper } from "./wrappers/vulture.js";
import { createTyWrapper, type TyWrapperOptions } from "./wrappers/ty.js";
import { createVultureWrapper, type VultureWrapperOptions } from "./wrappers/vulture.js";

/**
* Per-scanner context passed to `createDefaultWrappers`. Some wrappers
Expand All @@ -157,6 +157,12 @@ export interface DefaultWrapperContext {
readonly hadolint?: HadolintWrapperOptions;
readonly spectral?: SpectralWrapperOptions;
readonly pipAudit?: PipAuditWrapperOptions;
// Python dead-code / complexity / type-check scanners walk the project
// tree directly; without an exclude they descend into `.venv` and report
// library noise. The CLI threads the indexer's ignore dirs in here.
readonly vulture?: VultureWrapperOptions;
readonly radon?: RadonWrapperOptions;
readonly ty?: TyWrapperOptions;
}

/**
Expand Down Expand Up @@ -216,11 +222,11 @@ function createWrapperFor(
case GRYPE_SPEC.id:
return deps ? createGrypeWrapper(deps) : createGrypeWrapper();
case VULTURE_SPEC.id:
return deps ? createVultureWrapper(deps) : createVultureWrapper();
return createVultureWrapper(deps ?? DEFAULT_DEPS, ctx.vulture ?? {});
case RADON_SPEC.id:
return deps ? createRadonWrapper(deps) : createRadonWrapper();
return createRadonWrapper(deps ?? DEFAULT_DEPS, ctx.radon ?? {});
case TY_SPEC.id:
return deps ? createTyWrapper(deps) : createTyWrapper();
return createTyWrapper(deps ?? DEFAULT_DEPS, ctx.ty ?? {});
case CLAMAV_SPEC.id:
return deps ? createClamAvWrapper(deps) : createClamAvWrapper();
case CHECKOV_DOCKER_COMPOSE_SPEC.id:
Expand Down
20 changes: 20 additions & 0 deletions packages/scanners/src/wrappers/extended-wrappers.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,26 @@ test("vulture wrapper emits empty SARIF when binary missing", async () => {
assert.ok(out.skipped?.includes("not found on PATH"));
});

test("vulture wrapper anchors excludeGlobs to path segments (no .venv noise)", async () => {
const { deps, calls } = makeFakeDeps(() => ({ stdout: "", exitCode: 0 }));
await createVultureWrapper(deps, { excludeGlobs: [".venv", "node_modules"] }).run(ctx);
const args = calls[0]?.args ?? [];
const idx = args.indexOf("--exclude");
assert.ok(idx >= 0, "must pass --exclude when excludeGlobs is non-empty");
const value = args[idx + 1] ?? "";
// Anchored to a full path segment — NOT the bare name, which vulture would
// substring-match and so suppress e.g. src/.venv_helpers.py.
assert.ok(value.includes("*/.venv/*"), `expected anchored .venv glob, got: ${value}`);
assert.ok(value.includes("*/node_modules/*"));
assert.ok(!value.split(",").includes(".venv"), "must not pass the bare name .venv");
});

test("vulture wrapper omits --exclude when no excludeGlobs given", async () => {
const { deps, calls } = makeFakeDeps(() => ({ stdout: "", exitCode: 0 }));
await createVultureWrapper(deps).run(ctx);
assert.ok(!(calls[0]?.args ?? []).includes("--exclude"));
});

// ---------- radon ---------------------------------------------------------

test("radon wrapper parses cc JSON into SARIF results above threshold", async () => {
Expand Down
21 changes: 19 additions & 2 deletions packages/scanners/src/wrappers/radon.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,20 @@ import type { ScannerRunContext, ScannerRunResult, ScannerWrapper } from "../spe
import { emptySarifFor } from "../spec.js";
import { DEFAULT_DEPS, type WrapperDeps } from "./shared.js";

export function createRadonWrapper(deps: WrapperDeps = DEFAULT_DEPS): ScannerWrapper {
export interface RadonWrapperOptions {
/**
* Directory names to skip (e.g. `.venv`, `node_modules`). radon's `-i`
* matches directory BASENAMES (not path globs), so the bare ignore names
* are passed through as-is. radon already skips hidden dirs by default, so
* `-i` mainly helps non-hidden entries (`node_modules`, `dist`, `build`).
*/
readonly ignoreDirs?: readonly string[];
}

export function createRadonWrapper(
deps: WrapperDeps = DEFAULT_DEPS,
opts: RadonWrapperOptions = {},
): ScannerWrapper {
return {
spec: RADON_SPEC,
run: async (ctx: ScannerRunContext): Promise<ScannerRunResult> => {
Expand All @@ -37,7 +50,11 @@ export function createRadonWrapper(deps: WrapperDeps = DEFAULT_DEPS): ScannerWra
durationMs: performance.now() - started,
};
}
const args: readonly string[] = ["cc", "-s", "-j", ctx.projectPath];
const ignoreArgs =
opts.ignoreDirs !== undefined && opts.ignoreDirs.length > 0
? ["-i", opts.ignoreDirs.join(",")]
: [];
const args: readonly string[] = ["cc", "-s", "-j", ...ignoreArgs, ctx.projectPath];
const result = await deps.runBinary("radon", args, {
timeoutMs: ctx.timeoutMs,
cwd: ctx.projectPath,
Expand Down
24 changes: 22 additions & 2 deletions packages/scanners/src/wrappers/ty.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,20 @@ import type { ScannerRunContext, ScannerRunResult, ScannerWrapper } from "../spe
import { emptySarifFor } from "../spec.js";
import { DEFAULT_DEPS, type WrapperDeps } from "./shared.js";

export function createTyWrapper(deps: WrapperDeps = DEFAULT_DEPS): ScannerWrapper {
export interface TyWrapperOptions {
/**
* Directory names to exclude (e.g. `.venv`, `node_modules`). ty uses
* gitignore-style excludes; a trailing `/` anchors to a directory. We also
* pass `--force-exclude` so the excludes apply even though the project path
* is given explicitly on the CLI (CLI-named paths bypass excludes otherwise).
*/
readonly excludeGlobs?: readonly string[];
}

export function createTyWrapper(
deps: WrapperDeps = DEFAULT_DEPS,
opts: TyWrapperOptions = {},
): ScannerWrapper {
return {
spec: TY_SPEC,
run: async (ctx: ScannerRunContext): Promise<ScannerRunResult> => {
Expand All @@ -37,7 +50,14 @@ export function createTyWrapper(deps: WrapperDeps = DEFAULT_DEPS): ScannerWrappe
durationMs: performance.now() - started,
};
}
const args: readonly string[] = ["check", ctx.projectPath];
const excludeArgs =
opts.excludeGlobs !== undefined && opts.excludeGlobs.length > 0
? [
...opts.excludeGlobs.flatMap((g) => ["--exclude", g.endsWith("/") ? g : `${g}/`]),
"--force-exclude",
]
: [];
const args: readonly string[] = ["check", ...excludeArgs, ctx.projectPath];
const result = await deps.runBinary("ty", args, {
timeoutMs: ctx.timeoutMs,
cwd: ctx.projectPath,
Expand Down
39 changes: 37 additions & 2 deletions packages/scanners/src/wrappers/vulture.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,33 @@ import { DEFAULT_DEPS, type WrapperDeps } from "./shared.js";
/** Minimum confidence percentage vulture emits findings at. */
const DEFAULT_MIN_CONFIDENCE = "80";

export function createVultureWrapper(deps: WrapperDeps = DEFAULT_DEPS): ScannerWrapper {
export interface VultureWrapperOptions {
/**
* Directory names the indexer ignores (e.g. `.venv`, `node_modules`).
* Threaded from the CLI so vulture doesn't walk the virtualenv and drown
* real findings in library dead-code. Anchored to path-segment globs
* inside the wrapper so a bare `.venv` can't substring-match `src/distance.py`.
*/
readonly excludeGlobs?: readonly string[];
}

/**
* Turn an ignore directory name into a vulture `--exclude` glob anchored to a
* path segment. vulture matches `--exclude` patterns against ABSOLUTE paths
* and treats a wildcard-free pattern as a substring match, so the bare name
* `.venv` would also suppress `src/.venv_helpers.py`. Wrapping it as a
* slash-delimited glob segment matches only when the name is a full directory
* segment. Patterns already containing a glob pass through untouched.
*/
function toVultureExcludeGlob(name: string): string {
if (/[*?[\]]/.test(name)) return name;
return `*/${name}/*`;
}

export function createVultureWrapper(
deps: WrapperDeps = DEFAULT_DEPS,
opts: VultureWrapperOptions = {},
): ScannerWrapper {
return {
spec: VULTURE_SPEC,
run: async (ctx: ScannerRunContext): Promise<ScannerRunResult> => {
Expand All @@ -38,7 +64,16 @@ export function createVultureWrapper(deps: WrapperDeps = DEFAULT_DEPS): ScannerW
durationMs: performance.now() - started,
};
}
const args: readonly string[] = [ctx.projectPath, "--min-confidence", DEFAULT_MIN_CONFIDENCE];
const excludeArgs =
opts.excludeGlobs !== undefined && opts.excludeGlobs.length > 0
? ["--exclude", opts.excludeGlobs.map(toVultureExcludeGlob).join(",")]
: [];
const args: readonly string[] = [
ctx.projectPath,
"--min-confidence",
DEFAULT_MIN_CONFIDENCE,
...excludeArgs,
];
const result = await deps.runBinary("vulture", args, {
timeoutMs: ctx.timeoutMs,
cwd: ctx.projectPath,
Expand Down
Loading