diff --git a/.cursor/plans/duplicate_detection_design_432b764e.plan.md b/.cursor/plans/duplicate_detection_design_432b764e.plan.md new file mode 100644 index 00000000..4f753389 --- /dev/null +++ b/.cursor/plans/duplicate_detection_design_432b764e.plan.md @@ -0,0 +1,533 @@ +--- +name: Duplicate Detection Design +overview: Create a decision-ready markdown plan for duplicate detection in Codegraph, comparing structural, token, AST, git, and optional embedding approaches, then recommending a deterministic staged pipeline and transparent scoring system for surfacing duplicate-refactor candidates. +todos: + - id: write-plan-md + content: Author docs/superpowers/plans/2026-05-19-duplicate-detection.md with a recommended structural-first design, approach catalog, pipeline diagram, scoring rubric, and rollout phases + status: pending + - id: cross-link-existing + content: Link plan sections to chunkFile, ProjectIndex/SymbolDef, agent search scoring, impact similarityIndex, and agent-search architecture constraints + status: pending + - id: open-questions + content: End doc with concrete defaults plus open questions for granularity, PR-scoped behavior, hotspot weighting, and future embedding integration + status: pending +isProject: false +--- + +# Duplicate Detection Approaches and Scoring Plan + +## Goal + +Create **[docs/superpowers/plans/2026-05-19-duplicate-detection.md](docs/superpowers/plans/2026-05-19-duplicate-detection.md)** as a design document for likely duplicate-code detection in Codegraph. + +The plan should compare several approaches, explain which ones can be combined, and define a scoring model that ranks duplicate-refactor candidates with stable reasons. It should stay design-only: no public README claims, CLI contracts, or skill updates until implementation ships. + +--- + +## Recommendation + +Use a **structural-first, embedding-adjacent** design: + +- **V1 core:** deterministic region hashing, normalized tokens, winnowed token shingles, and symbol/chunk metadata. +- **V1 optional where cheap:** normalized AST fingerprints for languages with reliable parse trees. +- **Not V1 core:** model embeddings, semantic-equivalence claims, or persistent SQLite schema changes. +- **External path:** continue to support semantic/vector workflows through `codegraph chunk` output rather than storing embeddings in Codegraph. + +This fits the existing architecture: Codegraph already centers on fast structural answers, vectorless agent search, Tree-sitter parsing, `ProjectIndex`, and semantic chunks. + +--- + +## Source Anchors + +The design document should explicitly link to these existing surfaces: + +- [docs/superpowers/plans/2026-05-14-agent-search-artifact-mcp.md](docs/superpowers/plans/2026-05-14-agent-search-artifact-mcp.md): establishes that agent search/explain are deterministic and avoid embeddings in the core path. +- [docs/agent-workflows.md](docs/agent-workflows.md): documents vectorless search/explain behavior and bounded agent packets. +- [docs/library-api.md](docs/library-api.md): documents chunking for LLM/vector workflows. +- [docs/how-it-works.md](docs/how-it-works.md): documents content-hash caching, Tree-sitter, `ProjectIndex`, and read-performance constraints. +- [src/chunking/chunkFile.ts](src/chunking/chunkFile.ts): source-level semantic chunk boundaries and `Chunk` metadata. +- [src/chunking/chunkSFC.ts](src/chunking/chunkSFC.ts): Vue/Svelte/Astro-style block chunking. +- [src/indexer/types.ts](src/indexer/types.ts): `ProjectIndex`, `ModuleIndex`, and `SymbolDef` (`kind`, `range`, `lineSpan`, `complexity`, `docstring`). +- [src/agent/search.ts](src/agent/search.ts): deterministic ranking style (`score`, reasons, limits, stable sort). +- [src/graphs/grep.ts](src/graphs/grep.ts): AST/text grep capabilities. +- [src/impact/parse.ts](src/impact/parse.ts) and [src/impact/types.ts](src/impact/types.ts): git `similarityIndex` handling for rename/copy metadata. + +--- + +## Problem Statement + +### In Scope + +- Surface likely duplicate or near-duplicate code regions for human/agent review. +- Prioritize refactor candidates where duplication creates maintenance risk. +- Provide explainable `reasons` so users can judge suggestions quickly. +- Keep runtime bounded on large repositories. +- Reuse existing discovery, chunking, symbol, graph, and impact infrastructure. + +### Out of Scope + +- Proving semantic equivalence. +- Cross-language clone detection in V1. +- Embedding-backed storage or model execution inside Codegraph core. +- Replacing `grep`, `search`, `refs`, or IDE clone-detection UX. +- Any persistent SQLite schema change in the first implementation. + +--- + +## Duplication Taxonomy + +| Clone type | Description | V1 support | Primary signals | +|------------|-------------|------------|-----------------| +| Type-1 | Exact duplicated text | Yes | Raw/normalized hash | +| Type-2 | Same structure with renamed identifiers/literals | Yes | Normalized AST hash, normalized token shingles | +| Type-3 | Edited copy with inserted/deleted statements | Partial | Winnowing, MinHash/Jaccard, LCS ratio | +| Type-4 | Semantically equivalent but structurally different | External only | Embeddings or deeper semantic analysis | + +The plan should be explicit that Type-4 is a discovery aid when powered externally, not a correctness claim. + +--- + +## Approach Catalog + +The markdown plan should describe each approach with inputs, Codegraph hooks, cost, strengths, weaknesses, and best clone types. + +### 1. Exact Region Hash + +- **What:** Hash normalized source text for a symbol/chunk region. +- **Inputs:** source text, unit byte/line range. +- **Hooks:** `ProjectIndex` symbols, `chunkFile`, cache hash behavior documented in `docs/how-it-works.md`. +- **Cost:** O(bytes), deterministic, no persistent schema needed. +- **Best for:** Type-1. +- **Weakness:** misses identifier renames and small edits. + +### 2. Comment/Whitespace-Normalized Hash + +- **What:** Drop comments and normalize whitespace before hashing. +- **Inputs:** source text plus optional language comment rules. +- **Hooks:** language definitions and Tree-sitter comments when available. +- **Cost:** O(bytes), deterministic. +- **Best for:** Type-1 with formatting drift. +- **Weakness:** comment stripping must avoid corrupting string literals; prefer parser-backed stripping when available. + +### 3. Symbol Metadata Prefilter + +- **What:** Use symbol kind, line span, complexity, exported/local status, and docstring presence to prioritize candidate comparisons. +- **Inputs:** `SymbolDef`, `ModuleIndex`, export metadata. +- **Hooks:** `src/indexer/types.ts`, `src/indexer/locals-and-exports.ts`. +- **Cost:** O(symbols), deterministic. +- **Best for:** reducing comparisons before precise similarity. +- **Weakness:** metadata is not proof of duplication and should never be sufficient alone. + +### 4. Chunk-Body Similarity + +- **What:** Compare semantically bounded chunks using token overlap and token-count similarity. +- **Inputs:** `Chunk` objects from `chunkFile`, `chunkTextFile`, and `chunkSFCFile`. +- **Hooks:** `src/chunking/chunkFile.ts`, `src/chunking/chunkTextFile.ts`, `src/chunking/chunkSFC.ts`. +- **Cost:** O(chunks + candidate pairs), deterministic. +- **Best for:** languages or file types where symbol extraction is weak. +- **Weakness:** chunks can be larger than functions, so scoring must penalize broad boilerplate. + +### 5. Token N-Gram Fingerprints + +- **What:** Normalize tokens, build k-token shingles, and bucket units by shared shingle hashes. +- **Inputs:** source text or parser tokens. +- **Hooks:** language definitions, chunk boundaries, symbol ranges. +- **Cost:** O(tokens); memory proportional to unique shingles. +- **Best for:** Type-2 and lightweight Type-3 candidate generation. +- **Weakness:** common boilerplate can create noisy buckets; bucket sizes need caps. + +### 6. Winnowing / MinHash + +- **What:** Keep representative fingerprints from token shingles to estimate similarity without all-pairs comparison. +- **Inputs:** normalized token shingles. +- **Hooks:** duplicate fingerprint index module added in a future implementation. +- **Cost:** O(tokens) indexing plus O(candidates) verification. +- **Best for:** scalable Type-3 near-copy detection. +- **Weakness:** requires tuning window size, shingle size, and banding thresholds. + +### 7. Normalized AST Fingerprint + +- **What:** Serialize normalized node structure while replacing identifiers and literals with placeholders. +- **Inputs:** Tree-sitter parse tree, unit range. +- **Hooks:** native Tree-sitter path, JS fallback where supported. +- **Cost:** parser-dependent; deterministic; good fit for languages with stable grammars. +- **Best for:** Type-2. +- **Weakness:** cross-language parity is harder; AST normalization rules must be explicit per language. + +### 8. AST Structural Grep Recurrence + +- **What:** Reuse AST grep to find repeated structural patterns users care about. +- **Inputs:** Tree-sitter query or text pattern. +- **Hooks:** `src/graphs/grep.ts`. +- **Cost:** O(files) per query. +- **Best for:** targeted repeated anti-patterns, not general clone detection. +- **Weakness:** requires known query shape. + +### 9. Git Similarity Metadata + +- **What:** Use git diff `similarity index` from rename/copy detection to flag changed-file duplication context. +- **Inputs:** git/raw diff. +- **Hooks:** `src/impact/parse.ts`, `src/impact/types.ts`. +- **Cost:** free when impact parsing already runs. +- **Best for:** PR-scoped copy/rename signals. +- **Weakness:** not whole-repo; depends on git diff metadata. + +### 10. External Embeddings + +- **What:** Use `codegraph chunk` output with a user-managed embedding model/vector store. +- **Inputs:** chunk text and metadata. +- **Hooks:** `codegraph chunk`, `chunkFile`, `chunkTextFile`. +- **Cost:** external model/runtime/storage; non-deterministic across providers/models. +- **Best for:** Type-4 discovery and natural-language similarity. +- **Weakness:** not suitable as core duplicate proof; should be documented as adjacent. + +--- + +## Recommended Pipeline + +```mermaid +flowchart LR + subgraph collect [CollectUnits] + files[DiscoveredFiles] + symbols[SymbolRanges] + chunks[ChunkRanges] + end + subgraph index [BuildFingerprints] + exact[NormalizedTextHash] + tokens[TokenShingles] + ast[NormalizedASTHash] + end + subgraph pair [GenerateCandidates] + buckets[InvertedBuckets] + caps[BucketAndUnitCaps] + end + subgraph rank [VerifyAndRank] + metrics[PairMetrics] + score[CompositeScore] + reasons[StableReasons] + end + files --> symbols + files --> chunks + symbols --> exact + chunks --> exact + symbols --> tokens + chunks --> tokens + symbols --> ast + exact --> buckets + tokens --> buckets + ast --> buckets + buckets --> caps + caps --> metrics + metrics --> score + score --> reasons +``` + +### Stage 1: Collect Comparable Units + +Default to a hybrid unit strategy: + +- Prefer symbol ranges for functions, methods, classes, interfaces, types, SQL routines, and SQL objects. +- Fall back to semantic chunks for files where symbol coverage is incomplete. +- Preserve both identifiers when a unit has both a symbol and chunk boundary. +- Skip units below `minTokens` (default: 40 for duplicate detection, not the chunking default of 150) unless exact hash matches are requested. +- Skip or split units above `maxTokens` (default: 800) to avoid whole-file false positives. + +Suggested unit type: + +```ts +type DuplicateUnit = { + id: string; + file: string; + languageId: string; + kind: "symbol" | "chunk" | "sql" | "text"; + name?: string; + symbolKind?: string; + startLine: number; + endLine: number; + tokenCount: number; + complexity?: number; +}; +``` + +### Stage 2: Build Fingerprints + +Generate cheap fingerprints first: + +- `rawHash`: exact source region hash. +- `normalizedTextHash`: comment/whitespace-normalized hash. +- `tokenShingles`: normalized token k-grams, default `k = 5`. +- `winnowedSignature`: representative shingle hashes, default window size `4`. +- `astShapeHash`: optional normalized AST structure hash where parse data is available. + +### Stage 3: Generate Candidates + +Avoid all-pairs comparison: + +- Bucket by `normalizedTextHash`, `astShapeHash`, and winnowed shingle hashes. +- Cap buckets larger than `maxBucketSize` (default: 200) or down-weight them as boilerplate. +- Only compare pairs that share enough evidence: + - exact normalized hash match, or + - AST shape hash match, or + - at least `minSharedShingles` (default: 3), or + - PR-scoped git `similarityIndex` signal. +- Preserve deterministic pair identity as sorted `(leftUnitId, rightUnitId)`. + +### Stage 4: Verify Pair Metrics + +For each candidate pair, compute bounded metrics: + +- `tokenJaccard` +- `orderedTokenSimilarity` (optional LCS ratio; only for small enough units) +- `shingleOverlap` +- `lengthRatio` +- `sameSymbolKind` +- `lineSpanRatio` +- `astShapeEqual` +- `sameFile` +- `sharedDependencyContext` (weak hint from graph adjacency) + +### Stage 5: Rank and Report + +Emit bounded, deterministic suggestions with `score`, `confidence`, `cloneType`, metrics, and stable reasons. + +```ts +type DuplicateUnitRef = { + file: string; + startLine: number; + endLine: number; + languageId: string; + kind: "symbol" | "chunk" | "sql" | "text"; + name?: string; + symbolKind?: string; +}; + +type DuplicateSuggestion = { + score: number; + confidence: "high" | "medium" | "low"; + cloneType: "exact" | "renamed" | "near" | "weak"; + left: DuplicateUnitRef; + right: DuplicateUnitRef; + metrics: { + tokenJaccard?: number; + shingleOverlap?: number; + lengthRatio?: number; + lineSpanRatio?: number; + complexityDelta?: number; + similarityIndex?: number; + }; + reasons: string[]; +}; +``` + +--- + +## Scoring System + +Use a transparent score capped to `0..100`. Signals should be explainable and stable; no score should depend on nondeterministic model output in the core path. + +### Positive Signals + +| Signal | Weight | Reason example | Notes | +|--------|--------|----------------|-------| +| Raw source hash match | +60 | `raw_hash_match` | Exact duplicate, strongest signal | +| Normalized text hash match | +50 | `normalized_text_hash_match` | Formatting/comment-insensitive Type-1 | +| AST shape hash match | +40 | `ast_shape_match` | Strong Type-2 signal | +| Token Jaccard >= 0.95 | +30 | `token_jaccard_0.97` | Strong near-exact signal | +| Token Jaccard >= 0.85 | +22 | `token_jaccard_0.88` | Good Type-2/Type-3 signal | +| Token Jaccard >= 0.70 | +12 | `token_jaccard_0.73` | Weak alone | +| Shingle overlap | +0 to +25 | `shared_shingles_14` | Scale linearly, cap at +25 | +| Ordered token similarity >= 0.80 | +10 | `ordered_similarity_0.84` | Optional expensive metric | +| Same symbol kind | +4 | `same_symbol_kind_function` | Small boost | +| Line span within 15% | +4 | `similar_line_span` | Small boost | +| Complexity within 20% | +3 | `similar_complexity` | Small boost | +| PR git similarity >= 80% | +20 | `git_similarity_92` | Only in impact/review mode | +| Shared dependency context | +3 | `shared_dependency_context` | Weak context hint | + +### Negative Signals + +| Signal | Weight | Reason example | Notes | +|--------|--------|----------------|-------| +| Token count below threshold | -25 | `trivial_body_penalty` | Avoid getters, tiny helpers | +| Length ratio outside 0.5..2.0 | -20 | `length_mismatch_penalty` | Avoid broad false positives | +| Boilerplate bucket too large | -20 | `boilerplate_bucket_penalty` | Common generated patterns | +| License/header-only region | -30 | `license_header_penalty` | Likely not actionable | +| Generated or vendored path | -15 | `generated_path_penalty` | Configurable; hard filter if ignored | +| Same file and adjacent regions | -10 | `same_file_adjacent_penalty` | Often internal repetition; still optional | + +### Hard Filters + +Discard pairs regardless of score when: + +- Either file is excluded by discovery config or CLI ignore globs. +- Ranges overlap in the same file. +- One unit fully contains the other and both represent the same enclosing symbol. +- Both units are below `minTokens`, unless raw hashes match and `--include-small` is set. +- Bucket size exceeds `maxBucketSize` and no exact/AST hash signal exists. + +### Confidence Tiers + +- **High:** `score >= 80`, or raw/normalized hash match with `tokenJaccard >= 0.90`, or AST shape match with `tokenJaccard >= 0.85`. +- **Medium:** `score >= 55` and `tokenJaccard >= 0.70`. +- **Low:** `score >= 35`; show only when requested or in verbose JSON. + +### Clone Type Classification + +- **exact:** raw or normalized text hash match. +- **renamed:** AST shape match or very high token similarity with different identifiers/literals. +- **near:** strong shingle/token similarity with edits. +- **weak:** score passes low threshold but lacks a strong structural proof. + +### Sort Order + +Sort deterministically: + +1. `confidence` rank (`high`, `medium`, `low`) +2. `score` descending +3. `tokenJaccard` descending +4. `left.file`, `left.startLine`, `right.file`, `right.startLine` + +--- + +## Output and UX Options + +Document these as future implementation choices, not commitments. + +### CLI + +Preferred first surface: + +```bash +codegraph duplicates ./src --min-confidence medium --json +codegraph duplicates ./src --cross-file-only --limit 50 +codegraph duplicates --provider git --base main --head HEAD +``` + +### Library + +```ts +const index = await buildProjectIndex(root); +const result = await findDuplicates(index, { + minConfidence: "medium", + crossFileOnly: true, + limit: 50, +}); +``` + +### Agent and MCP + +Add only after CLI/library behavior is stable: + +- `search` could surface duplicate handles for queries like `duplicate validation logic`. +- `explain` could include `duplicateCandidates` for a file/symbol. +- MCP could expose `duplicates` as a bounded, read-only tool. + +--- + +## Rollout Phases + +### Phase 0: Design Doc Only + +- Add the markdown plan. +- No public docs, CLI help, skill, or API changes. + +### Phase 1: In-Memory Engine + +- Create internal duplicate unit extraction and fingerprinting helpers. +- Implement exact hash, normalized text hash, token shingles, and winnowing. +- Add focused tests with inline fixtures. +- No cache or SQLite persistence. + +### Phase 2: CLI and Library Surface + +- Add `findDuplicates(index, options)` and `codegraph duplicates`. +- Return deterministic JSON with bounded results and omission counts. +- Add CLI regression tests. +- Update `docs/cli.md`, `docs/library-api.md`, and `codegraph-skill/codegraph/SKILL.md`. + +### Phase 3: AST Normalization + +- Add language-aware AST shape hashing for supported source languages. +- Update `docs/language-parity.md` and `docs/scenario-catalog.md`. +- Add per-language tests in `tests/languages/*.test.ts`. + +### Phase 4: PR-Scoped and Agent Integration + +- Use git `similarityIndex` and changed-symbol context in impact/review mode. +- Add duplicate candidates to `explain` packets or MCP only after output shape stabilizes. + +### Phase 5: Optional Persistence + +- Consider `.codegraph-cache/duplicates-v1` only if repeated runs need it. +- Avoid SQLite schema changes unless there is a clear query use case; if added, include migration tests per `AGENTS.md`. + +--- + +## Language and Parity Rules + +- V1 supports same-language comparisons only. +- Source languages should use symbol-first units where possible. +- SQL may start with statement/object chunks and SQL symbols where available. +- Vue/Svelte/Astro should compare script/style/template blocks separately through SFC chunking. +- Graph-first formats and config files may be chunk-only. +- If a language lacks AST normalization, token/chunk detection should still work and the limitation must be documented. + +--- + +## Testing Strategy + +When implemented, test real behavior rather than tuning just to pass thresholds. + +Core fixtures: + +- Exact duplicate functions in two files. +- Same function with renamed variables and literals. +- Near-copy function with one edited branch. +- Same file non-overlapping duplicate blocks. +- Negative: similar names with different bodies. +- Negative: tiny trivial helpers below threshold. +- Negative: generated/header boilerplate. +- Determinism: same repo produces the same ordered suggestions. +- Bounds: large repeated boilerplate bucket does not explode comparisons. +- PR mode: git `similarityIndex` boosts changed-file suggestions without requiring whole-repo scan. + +Likely test locations: + +- `tests/duplicates.test.ts` for engine behavior. +- `tests/cli-regressions.test.ts` for CLI output. +- `tests/languages/*.test.ts` for AST normalization parity when added. +- `tests/impact.test.ts` or `tests/impact-streaming.test.ts` for PR-scoped duplicate context. + +--- + +## Open Questions + +--- + +- Should `minTokens` default to 40 for duplicate detection, or stay closer to chunking's 150-token default? +- Should same-file duplicates be enabled by default, or should the default focus on cross-file refactor candidates? +- Should high-fan-in/hotspot files increase score because refactors are valuable, or decrease score because utility patterns are noisy? +- Should PR-scoped mode compare changed regions against the whole repo, or only changed files against touched dependency neighborhoods? +- Should embedding integration remain only a documented external workflow, or should a future plugin interface accept externally computed similarity scores? + +--- + +## Non-Goals for the Markdown Plan + +- Do not implement `duplicates` command or library API. +- Do not change SQLite schema. +- Do not add embedding dependencies or package installs. +- Do not update public capability docs until the feature exists. +- Do not claim language parity before tests prove it. + +--- + +## Acceptance criteria + +- [ ] New file `docs/superpowers/plans/2026-05-19-duplicate-detection.md` exists. +- [ ] Document includes recommendation, problem statement, source anchors, taxonomy, approach catalog, pipeline, scoring, rollout phases, parity rules, tests, and open questions. +- [ ] At least 10 approaches are documented with cost/strength tradeoffs. +- [ ] Combined pipeline and mermaid diagram are included. +- [ ] Scoring includes positive signals, negative signals, hard filters, confidence tiers, clone type classification, stable sort order, and example reasons. +- [ ] Plan links directly to `chunkFile`, `ProjectIndex`/`SymbolDef`, agent search scoring, AST grep, and impact `similarityIndex`. +- [ ] Plan clearly separates in-core structural detection from external embedding workflows. +- [ ] Plan does not require package installs, SQLite schema changes, or public documentation updates before implementation. diff --git a/REVIEW_ANALYSIS_NEXT.md b/REVIEW_ANALYSIS_NEXT.md new file mode 100644 index 00000000..6e7063d7 --- /dev/null +++ b/REVIEW_ANALYSIS_NEXT.md @@ -0,0 +1,160 @@ +# Follow-up Review Analysis + +Date: 2026-05-20 +Branch analyzed: `complete-review-plan-items` + +This file records a new review pass after the large decomposition branch. The +goal is to find follow-up opportunities to reduce surface area, simplify code, +correct behavior, and add focused regression coverage. + +## Review Evidence + +- `npx tsx src/cli.ts inspect ./src --limit 25` +- `npx tsx src/cli.ts hotspots ./src --limit 40 --json` +- `find src -name '*.ts' -not -path '*/dist/*' -print0 | xargs -0 wc -l | sort -nr` +- `npx tsx src/cli.ts explain src/cli.ts --json` +- `npx tsx src/cli.ts explain src/review.ts --json` +- `npx tsx src/cli.ts explain src/indexer/build-index.ts --json` +- `npx tsx src/cli.ts explain src/util/resolution.ts --json` +- `npx tsx src/cli.ts cycles --sort priority --json` +- `npx tsx src/cli.ts unresolved --json` +- Targeted `rg` searches for duplicated concurrency helpers, CLI option parsing, + relative path formatting, barrel imports, and bounded-output patterns. + +No dependency cycles were reported. The highest remaining concentration is in: + +- `src/cli.ts` - 1825 lines, broad command dispatch and graph/index orchestration. +- `src/review.ts` - 1647 lines, review diff collection, indexing, summaries, risk, and assembly. +- `src/indexer/build-index.ts` - 1471 lines, index build pipeline and incremental/cache orchestration. +- `src/util/resolution.ts` - 1450 lines, mixed TS/PHP/Python/Node/general resolution plus generic concurrency. +- `src/agent/explain.ts`, `src/agent-tools.ts`, and `src/agent/search.ts` - agent-facing output, lookup, bounds, and formatting. + +## Checklist + +### Correctness And Behavior + +- [ ] Fix include-root discovery handling for `inspect --root . ./src`. + - Finding: `npx tsx src/cli.ts inspect --root . ./src --limit 5` currently fails with a malformed gitignore root path containing mixed POSIX and Windows separators. + - Likely area: `src/cli.ts` include-root discovery setup and `src/config.ts` discovery root normalization. + - Also check recommended commands from `inspect`: `unresolved` and `cycles` recommendations currently omit the include-root target suffix, which can expand a scoped inspect into whole-repo follow-ups. + - Add tests in `tests/cli-regressions.test.ts` for `inspect --root . ./src`, include-root recommendations, and config `ignoreGlobs` with include roots. + +- [ ] Standardize CLI numeric option parsing and validation. + - Finding: several commands still use raw `Number(...)` for user input: `--threads`, `--symbols-detailed-max-edges`, `--max-hits`, `--max-callsites`, `--max-tests`, chunk token bounds, graph query depth, and impact options. + - Risk: invalid values can silently become `NaN`, `0`, or undefined depending on downstream code. + - Add shared helpers in `src/cli/options.ts` for positive integers, non-negative integers, optional integers, and bounded integers. + - Cover invalid and boundary values in `tests/cli-regressions.test.ts`, `tests/cli-command-modules.test.ts`, and command-specific tests. + +- [ ] Add regression coverage for scoped cache behavior in `inspect` and `hotspots`. + - Finding: `buildScopedReportGraph` combines disk cache reuse with include-root restriction. The flow is subtle and easy to regress. + - Add tests proving file counts, hotspots, cycles, and unresolved summaries are scoped when include roots are passed, both with cold builds and warm disk cache. + +### Decomposition And Surface Area + +- [ ] Extract the remaining command handlers from `src/cli.ts`. + - Current shape: `runCliWithActiveRuntime` still owns command context creation, root/discovery resolution, changed-file resolution, graph output, index output, `dumpmod`, `goto`, `refs`, `grep`, `inspect`, and `hotspots`. + - Target split: + - `src/cli/context.ts` for parsed args, runtime writers, root/discovery/include-root resolution, progress setup, and shared stdin/report helpers. + - `src/cli/graph.ts` for graph and SQLite export mode. + - `src/cli/index.ts` for index output. + - `src/cli/navigation.ts` for `dumpmod`, `goto`, and `refs`. + - `src/cli/grep.ts` for AST/text grep. + - `src/cli/inspect.ts` for inspect and hotspots. + - Keep `src/cli.ts` as a thin dispatcher. + - Verify with `tests/cli-regressions.test.ts`, `tests/cli-command-modules.test.ts`, and focused command tests. + +- [ ] Split `src/review.ts` into phase modules. + - Current shape: review options, deleted-file reconstruction, graph delta, SQL context, candidate tests, symbol summaries, risk scoring, and final assembly live in one file. + - Target split: + - `src/review/changes.ts` for changed file and diff collection. + - `src/review/deleted.ts` for deleted snapshots and deleted importer edges. + - `src/review/summaries.ts` for changed symbol and file summaries. + - `src/review/candidates.ts` for candidate test merging and ordering. + - `src/review/risk.ts` for risk summary and review tasks. + - `src/review/report.ts` for final assembly. + - Add focused tests around deleted-file cases, missing explicit files, review presets, candidate ordering, and diagnostics. + +- [ ] Split `src/util/resolution.ts` into language/domain-specific modules. + - Current shape: TS config paths, graph-only resolution, PHP Composer/class discovery, Python module resolution, Node package resolution, cache clearing, and generic `mapLimit` live together. + - Target split: + - `src/util/resolution/tsconfig.ts` + - `src/util/resolution/php.ts` + - `src/util/resolution/python.ts` + - `src/util/resolution/node.ts` + - Keep `src/util/resolution.ts` as a facade only. + - Add tests for cache clearing and language-specific resolution parity after the split. + +- [ ] Move generic concurrency helpers out of resolution code and consolidate duplicates. + - Finding: `src/util/resolution.ts` exports `mapLimit`, `src/review.ts` has a local `runWithConcurrency`, and `src/util/semaphore.ts` has `mapLimitSemaphore`. + - Target: a single `src/util/concurrency.ts` with documented behavior for order preservation, invalid limits, and rejection handling. + - Update users in `graph-builder`, `indexer/build-index`, impact modules, SQL modules, review, and agent explain. + - Keep `tests/map-limit.test.ts`, and add rejection/concurrency-order coverage if missing. + +- [ ] Reduce internal dependence on broad barrels. + - Finding: many implementation modules import from `../util.js`, `../graphs.js`, and `../indexer.js`, increasing fan-in on broad facade files and making public/internal boundaries blurry. + - Target: implementation modules import direct leaf modules unless they are intentionally using a stable domain facade. + - Add or extend package metadata/source-structure tests to prevent imports through the root public API and to limit broad internal barrel usage where practical. + +- [ ] Audit the public root API in `src/index.ts`. + - Finding: package exports only `"."`, while `src/index.ts` reexports a very broad surface of build, graph, impact, session, agent, MCP, SQLite, chunking, language, and utility APIs. + - Target: classify exports as public-stable, public-legacy, or internal-only. Deprecate or move internal-only exports behind narrower subpath exports in a planned major/minor-compatible way. + - Update `docs/library-api.md`, `README.md`, and package metadata tests when the public contract changes. + +### Duplication And Shared Helpers + +- [ ] Unify agent file/path/follow-up helpers. + - Finding: `relativeFile` wrappers remain in `src/agent/search.ts`, `src/agent/explain.ts`, and `src/agent/artifact.ts`; search/explain also duplicate symbol/file target resolution patterns. + - Target: expand `src/agent/normalize.ts` or add `src/agent/targets.ts` for file resolution, SQL-object detection, handle resolution, and common follow-up construction. + - Verify with `tests/agent-search.test.ts`, `tests/agent-explain.test.ts`, and `tests/artifact-build.test.ts`. + +- [ ] Centralize presentation bounds for CLI/review/agent output. + - Finding: bounded-output logic is partly shared for agent APIs, but CLI/review still have hard-coded slices such as changed files, review tasks, impact pretty refs, and candidate tests. + - Target: named constants and a small presentation-bound helper for CLI summaries, review summaries, agent changed context, and impact pretty output. + - Add tests that assert omission/truncation behavior and avoid silently changing output volume. + +- [ ] Consolidate relative path display helpers. + - Finding: many modules directly call `path.relative(...).replace(/\\/g, "/")` or local wrappers instead of `toProjectRelativePath` or a display helper. + - Target: use one project-relative display helper for CLI, review, graph rendering, agent output, and index/cache reporting. + - Add path-normalization regression coverage for POSIX, Windows-style absolute paths, and out-of-root paths. + +### Performance And Accuracy + +- [ ] Review `buildProjectIndex` phase boundaries for cache/incremental complexity. + - Current shape: `src/indexer/build-index.ts` still handles worker setup, parsed cache retention, file signatures, manifest snapshots, graph finalization, and incremental entry points. + - Target split: + - worker setup/teardown + - parsed cache retention + - manifest write/read orchestration + - index finalization + - incremental changed-file planning + - Add focused tests around cache strictness, changed/deleted files, worker usage, and report timings. + +- [ ] Improve PHP resolution maintainability and coverage. + - Finding: PHP Composer parsing, token scanning, package symbol indexing, PSR-0/PSR-4 resolution, classmap exclusion, and implicit files are concentrated in `src/util/resolution.ts`. + - Target: split into a PHP resolver module with fixtures for Composer `autoload`, `autoload-dev`, `files`, PSR-0, PSR-4, classmap, and `exclude-from-classmap`. + - Add tests in `tests/languages/php.test.ts`, `tests/goto.test.ts`, and `tests/references.test.ts` for each Composer branch. + +- [ ] Review unresolved-import classification for non-source artifacts. + - Finding: whole-repo `unresolved` reports include script command names, `.cursor` plan links, graph-visualization DOM IDs, Rust crate-relative imports, and sample fixture imports. + - Target: decide whether `unresolved` should be source-code-only by default, scope-aware by command, or classifier-aware for scripts/docs/test fixtures. + - Add docs and tests for intentional unresolved behavior so users can trust the signal. + +### Documentation + +- [ ] Document the intended CLI scoping model. + - Explain `--root`, positional include roots, config `ignoreGlobs`, `--include-glob`, `--ignore-glob`, gitignore handling, and cache reuse. + - Update `docs/cli.md` and `codegraph-skill/codegraph/SKILL.md` if command behavior or examples change. + +- [ ] Document public API boundaries before shrinking exports. + - Add a short public/internal API section to `docs/library-api.md`. + - Note which barrels are stable entry points and which modules are implementation details. + +## Suggested Order + +1. Fix the scoped `inspect --root . ./src` behavior and add regression tests. +2. Centralize CLI numeric parsing because it reduces future command-handler churn. +3. Extract CLI context/inspect/hotspots/graph handlers. +4. Move concurrency helpers to a neutral utility module. +5. Split `util/resolution.ts`, starting with PHP because it is the largest isolated domain. +6. Split `review.ts` by pipeline phase. +7. Reduce internal barrel imports and audit the public API surface. diff --git a/REVIEW_PLAN.md b/REVIEW_PLAN.md index 8c96351a..4244440e 100644 --- a/REVIEW_PLAN.md +++ b/REVIEW_PLAN.md @@ -56,35 +56,35 @@ Scope: `src/graphs`, `src/impact`, and `src/indexer`. ## Second-Pass Complexity Findings -- [ ] Split the CLI dispatcher into shared command context plus focused command runners. +- [x] Split the CLI dispatcher into shared command context plus focused command runners. - `src/cli.ts` has a 1,188-line `runCliWithActiveRuntime()` block that mixes argument parsing, discovery setup, graph/index/review command execution, output formatting, report writing, and repeated build-option assembly. - Suggested fix: extract a `CliCommandContext` builder, a `resolveCliScanPlan()` helper that returns files plus deleted/existing git state once, and move the remaining in-file command bodies into focused modules. Start with `graph`, `index`, `impact`, and `review` because they contain the most duplicated build/index/report plumbing. - Tests: CLI regression coverage for `graph --sqlite`, `graph --symbols-detailed`, `index --json`, `impact --pretty`, `review --summary`, and include-root/git-diff combinations. -- [ ] Factor import extraction into language-specific extractors with a shared binding sink. +- [x] Factor import extraction into language-specific extractors with a shared binding sink. - `src/indexer/imports.ts` still has a 1,018-line `collectImportsForFile()` orchestration block. It now shares implicit binding conversion, but graph-only handling, Python regex import extraction, statement overrides, native capture handling, and JS/CJS fallback remain coupled in one function. - Suggested fix: introduce an `ImportExtractionContext` with `resolveFrom()`, `pushBinding()`, fallback reporting, and source/language metadata; move Python, JS/CJS fallback, graph-only, and native statement override logic into separate modules under `src/indexer/imports/`. - Tests: import binding parity for TypeScript/JavaScript, Python multiline and alias forms, Java/Kotlin/C#/Go/Rust/PHP native and fallback paths, and graph-only module specifier behavior. -- [ ] Decompose detailed symbol graph collection into reusable AST passes. +- [x] Decompose detailed symbol graph collection into reusable AST passes. - `src/graphs/symbol-graph-detailed.ts` has a 690-line `buildSymbolGraphDetailed()` with nested walkers for definitions, aliases, member chains, calls, class inheritance, decorators, and Rust impls. - Suggested fix: build per-file indexes (`localsByName`, imported alias maps, namespace maps), extract small collectors for `uses/calls`, member chains, decorators, class inheritance, and Rust impls, and run one merged traversal per function body where possible. - Performance opportunity: current function-body processing walks the same subtree separately for alias uses, member uses, and calls; a merged visitor should reduce repeated AST traversal on large functions. - Tests: detailed symbol graph edge cases for namespace members, `membersOnly`, decorator edges, Java/C# inheritance, Rust impls, optional chaining, and max-edge pruning. -- [ ] Split review report generation into staged pipeline helpers. +- [x] Split review report generation into staged pipeline helpers. - `src/review.ts` has a 485-line `buildReviewReport()` that handles change discovery, diff normalization, deleted snapshots, incremental indexing, changed-symbol mapping, reference lookups, file summaries, graph delta, candidate tests, SQL context, risk summary, and final report assembly. - Suggested fix: extract `collectReviewChanges()`, `buildReviewIndex()`, `summarizeChangedFiles()`, `collectReviewGraphDelta()`, and `assembleReviewReport()` so each stage has explicit inputs/outputs and can be tested directly. - Correctness opportunity: make deleted/missing/ignored file status transitions explicit in one data structure instead of spreading them across changed-file sets, diff maps, and final summary branches. - Tests: raw diff, git `WORKTREE`, deleted files, missing explicit files, ignored diff files, include-symbol-details, SQL context, and candidate-test sorting. -- [ ] Share full and incremental index-build state machines. +- [x] Share full and incremental index-build state machines. - `src/indexer/build-index.ts` has two large overlapping flows: `buildIndexFromFileListShared()` at 303 lines and `buildProjectIndexIncremental()` at 371 lines. Both initialize reports, graph options, file signatures, worker pools, parsed caches, bloom filters, JSON dependency modules, graph adjacency, manifest writes, and final `ProjectIndex` assembly. - Suggested fix: extract reusable helpers for `IndexBuildRunState`, `prepareFileSignatures()`, `buildFileModules()`, `writeIndexManifest()`, `finalizeProjectIndex()`, and parsed-cache trimming. Keep full and incremental file-selection logic separate, but share execution and finalization. - Performance opportunity: centralizing file signature and cached-edge reuse should make it easier to avoid recomputing graph edges or SQL fact caches when only module cache state changed. - Tests: cache modes, incremental strict, manifest option mismatch, deleted tracked files, SQL corpus signatures, worker pool parity, and parsed-cache reuse. -- [ ] Split multi-language resolution into per-language modules and unify project symbol indexes. +- [x] Split multi-language resolution into per-language modules and unify project symbol indexes. - `src/util/resolution.ts` is 1,872 lines and combines TS path config, generic specifier resolution, node_modules resolution, JVM package indexes, PHP composer/symbol scanning, Go module/workspace handling, and Python package resolution. - Suggested fix: move language-specific resolvers into `src/util/resolution/{go,jvm,php,python,node}.ts` and keep `resolveImportSpecifier()` as a small dispatcher. Share a generic project-symbol-index builder for Java, Kotlin, and PHP while keeping PHP's multi-namespace/kind metadata. - Correctness opportunity: PHP, Java, and Kotlin symbol indexers use regex/token scanners independent of native parsed symbols; consider reusing native/local export extraction where available so import resolution and indexed symbols stay aligned. @@ -92,49 +92,49 @@ Scope: `src/graphs`, `src/impact`, and `src/indexer`. ## Third-Pass Complexity Findings -- [ ] Share declaration and export extraction infrastructure across locals, scope, and impact mapping. +- [x] Share declaration and export extraction infrastructure across locals, scope, and impact mapping. - `src/indexer/locals-and-exports.ts` has a 598-line `collectLocalsAndExportsFromSource()` and `src/indexer/scope.ts` has a 263-line `buildScopeIndexFromSource()`; both classify declaration names, compute ranges, walk parameters, and interpret language-specific definition nodes. `src/impact/map.ts` also repeats declaration-name checks while locating changed symbols. - Suggested fix: introduce a shared declaration walker that emits normalized declaration events (`name`, `kind`, `range`, `node`, `scopeRole`) and let locals/export extraction, scope indexing, and impact mapping consume those events. - Correctness opportunity: this would keep language additions from updating locals but not scope, or scope but not impact symbol mapping. - Tests: scope-quality, changed-line mapping, and cross-language local/export extraction for TypeScript, Python, Go, Rust, Kotlin, Swift, C/C++, PHP, and Ruby. -- [ ] Split module specifier extraction from edge resolution and reuse fallback/query handling. +- [x] Split module specifier extraction from edge resolution and reuse fallback/query handling. - `src/graphs/specifiers.ts` has a 330-line `collectModuleSpecifiersFromSource()` and `src/graph-edge-collector.ts` has a 109-complexity `collectEdgesForFile()`. Both contain language routing, fallback reporting, graph-only handling, HTML/style extras, and per-language resolution decisions. - Suggested fix: make specifier extraction return normalized `ModuleSpecifier` values plus diagnostics only, then move edge target resolution into a table of language-specific resolver functions. Share helper code for native query execution, JS fallback recovery, HTML/style appended specifiers, and graph-only resolution config. - Performance opportunity: the edge collector currently resolves all specifiers via `Promise.all`, which can burst filesystem work for large files; a bounded resolver queue would make graph builds smoother without changing results. - Tests: fallback import extraction diagnostics, graph-only document links, CSS/SCSS/Less URL imports, HTML/Vue/Svelte/Astro imports, PHP qualified references, JVM package fan-out, and dynamic import heuristics. -- [ ] Unify member-access parsing for goto, references, and detailed symbol graph. +- [x] Unify member-access parsing for goto, references, and detailed symbol graph. - Member/object/property extraction appears in `src/indexer/navigation-goto.ts`, `src/indexer/navigation.ts` (`collectNamespaceMemberRefs()`), and `src/graphs/symbol-graph-detailed.ts`. Each handles Python/Ruby/Go/Java/C#/Kotlin/Swift member shapes with local variations. - Suggested fix: create a `memberAccess.ts` helper that normalizes member nodes into `{ object, property, chain }` and supports language-specific node shapes. Use it in goto, namespace reference collection, and detailed symbol graph member/call collectors. - Correctness opportunity: optional chaining, Kotlin/Swift navigation expressions, Ruby scope resolution, and Go qualified types should resolve consistently across goto, refs, and symbol graph edges. - Tests: `goto`, `references`, and detailed-symbol edge cases for namespace members, optional/member chains, Ruby `::`, Go qualified types, Java static members, C# nested members, Kotlin aliases, and Swift static factories. -- [ ] Extract shared SQL lexical helpers and reuse them across facts, navigation, and MCP query checks. +- [x] Extract shared SQL lexical helpers and reuse them across facts, navigation, and MCP query checks. - `splitTopLevelCommaSeparated()` is duplicated in `src/sql/extractFacts.ts` and `src/sql/navigation.ts`; paren-depth logic is also duplicated. `src/mcp/server.ts` has its own SQL comment/literal stripper for resource checks. - Suggested fix: create `src/sql/lex.ts` for top-level splitting, paren depth, comment/string masking, and bounded identifier scanning. Reuse it in fact extraction, SQL navigation, and MCP SQLite query validation. - Correctness opportunity: SQL parsing edge cases such as quoted strings, comments, nested expressions, and CTE clauses should not diverge between review facts and navigation. - Tests: SQL fact extraction, SQL navigation, SQL review context, and MCP `query_sqlite` resource-bound rejection tests. -- [ ] Share native binding contracts between runtime and worker code. +- [x] Share native binding contracts between runtime and worker code. - `NativeBinding` is declared separately in `src/native/treeSitterNative.ts` and `src/worker/nativeExtractWorker.ts`, with slightly different optional capabilities. - Suggested fix: move the binding interface and result/fallback reason contracts into a shared native module imported by both runtime and worker code. - Correctness opportunity: adding a native capability such as syntax-tree parsing or compact queries should update one contract instead of relying on duplicated structural types. - Tests: native binding loader, native worker parity, native runtime mode, and native parser ownership tests. -- [ ] Decompose project-file discovery parsers into manifest-specific helpers. +- [x] Decompose project-file discovery parsers into manifest-specific helpers. - `src/util/projectFiles.ts` combines discovery traversal with many manifest-name parsers (`package.json`, TOML, INI, setup.py, Maven, Gradle, .NET, Go, Gem, Swift) and a long `PROJECT_FILE_DEFINITIONS` registry. - Suggested fix: move parser helpers and definitions into `src/util/projectFiles/definitions.ts` and `src/util/projectFiles/parsers.ts`, leaving traversal/path-safety logic in the main module. - Correctness opportunity: parser tests can cover manifest name extraction directly, instead of only through full project discovery. - Tests: project file discovery, workspace detection, language parity docs fixtures where project metadata affects external classification. -- [ ] Split MCP server into tool registry, HTTP transport, file security, and SQL guard modules. +- [x] Split MCP server into tool registry, HTTP transport, file security, and SQL guard modules. - `src/mcp/server.ts` is 1,286 lines and mixes MCP tool handlers, Streamable HTTP session handling, Host-header checks, JSON body parsing, file confinement, file-prefix reads, UTF-8 truncation, SQLite query guarding, and row/byte bounding. - Suggested fix: extract `mcp/tools.ts`, `mcp/http.ts`, `mcp/security.ts`, and `mcp/sqliteGuard.ts`, keeping `server.ts` as composition glue. - Reuse opportunity: UTF-8 truncation and path confinement are general utilities that could also support artifact/file-reading code paths. - Tests: MCP server tests for tool handlers, HTTP Host validation, request size limits, artifact path confinement, file reads, SQLite row/byte limits, and unsupported SQL functions. -- [ ] Centralize agent limit normalization and bounded output shaping. +- [x] Centralize agent limit normalization and bounded output shaping. - `src/agent/search.ts`, `src/agent/explain.ts`, and `src/agent-tools.ts` each normalize limits, bound results, and convert file/module references into agent-facing shapes. Handle formatting is shared, but bounded-list and output normalization policies are still scattered. - Suggested fix: introduce `src/agent/bounds.ts` and `src/agent/normalize.ts` for limit clamping, bounded-list metadata, relative path normalization, and common follow-up command shaping. - Correctness opportunity: agent search, explain, artifact questions, and tool wrappers should report omission counts and path shapes consistently. @@ -142,49 +142,49 @@ Scope: `src/graphs`, `src/impact`, and `src/indexer`. ## Fourth-Pass Complexity Findings -- [ ] Split native runtime orchestration from native query execution helpers. +- [x] Split native runtime orchestration from native query execution helpers. - `src/native/treeSitterNative.ts` is a high fan-in hotspot and now owns binding loading, runtime-mode enforcement, normalized query caching, compact/full query execution, single-query execution, JS fallback bridging, and syntax-tree parsing. - Suggested fix: extract native binding state/runtime-mode helpers, normalized query metadata, query execution wrappers, and JS fallback bridging into focused modules under `src/native/`. Keep the public runtime facade small and preserve existing exported entry points. - Correctness opportunity: compact imports, full language queries, ad-hoc queries, and syntax-tree parsing repeat fallback reason/error shaping; one result-normalization helper would keep native-required failures and unsupported-language behavior consistent. - Tests: native runtime mode, native query normalization, compact imports fallback, native parser ownership, native worker parity, and explicit native-required failure paths. -- [ ] Decompose SQLite persistence into schema, write/update, and query modules. +- [x] Decompose SQLite persistence into schema, write/update, and query modules. - `src/sqlite.ts` is 920 lines and combines schema creation/migration, insert/delete helpers, full writes, incremental updates, canned graph queries, raw read-only query validation, and snapshot metadata. - Suggested fix: split `sqlite/schema.ts`, `sqlite/write.ts`, `sqlite/update.ts`, `sqlite/query.ts`, and `sqlite/guards.ts`. Keep schema-version handling and migration helpers isolated so persistent storage changes have one upgrade path. - Correctness opportunity: `ensureSchema()` creates tables and patches columns in the same file as query execution; separating schema upgrades would make older on-disk database regression tests easier to maintain. - Tests: SQLite full write, incremental update/delete, schema migration from older fixtures, raw read-only guard behavior, artifact SQLite generation, and MCP SQLite query paths. -- [ ] Extract impact report assembly into reusable compact/full report stages. +- [x] Extract impact report assembly into reusable compact/full report stages. - `src/impact/report.ts` has a 265-line `buildCompactReport()` plus full-report assembly, re-export chain discovery, top impacts, clusters, cycles, and surface-area summaries in one module. - Suggested fix: introduce staged helpers for display-file normalization, file-index construction, compact serializers, graph summary sections, re-export chains, top impacts, clusters, and surface area. Share the precomputed file/symbol indexes between compact and full formats. - Performance opportunity: compact report construction repeatedly calls `displayFile()` and looks up file indexes across each section; a shared serializer context can avoid repeated normalization and make missing-index errors explicit. - Tests: compact/full impact reports, project file metadata, re-export chains, graph cycles, clusters, surface area summaries, top impacts, and schema-version compatibility. -- [ ] Split impact analyzer into direct-reference, transitive, and severity calculators. +- [x] Split impact analyzer into direct-reference, transitive, and severity calculators. - `src/impact/analyzer.ts` has a 221-line `analyzeImpact()` that handles option normalization, ignore/test matchers, bounded reference lookup, streaming emission, direct impact merging, file-level changes, and transitive propagation. The same module also owns severity scoring. - Suggested fix: extract `impact/direct.ts`, `impact/transitive.ts`, and `impact/severity.ts` around a shared `ImpactAnalysisContext` containing matchers, dependency stats, diagnostics, and emit hooks. - Correctness opportunity: include-test and ignore-glob policy is rebuilt across direct and transitive phases; one context would keep filtering and diagnostics consistent as new impact reasons are added. - Tests: direct refs with `maxRefs`, ignored/test refs, file-level changes, transitive depth/type-only edges, diagnostics counters, streaming partial items, and severity weight overrides. -- [ ] Centralize build-cache option normalization, manifest comparison, and reports. +- [x] Centralize build-cache option normalization, manifest comparison, and reports. - `src/indexer/build-cache.ts` is 807 lines and mixes workspace manifest edges, memory/disk module cache, file signatures, fallback extraction reports, manifest IO, build-option summaries, diffing, and graph-option equality. - Suggested fix: split cache option normalization/equality into `indexer/build-cache/options.ts`, manifest IO/verification into `manifest.ts`, module cache read/write into `module-cache.ts`, and report shaping into `reports.ts`. - Correctness opportunity: the same normalized option shapes drive manifest writes, manifest diffs, and graph-option equality; a single typed comparer would reduce false rebuilds and missed rebuilds when discovery or graph options change. - Tests: cache invalidation, cache strict/off modes, discovery option normalization, graph option equality, fallback extraction report aggregation, disk cache reuse, and manifest mismatch messages. -- [ ] Decompose graph-only document link extraction and chunking by format. +- [x] Decompose graph-only document link extraction and chunking by format. - `src/documentLinks.ts` is 692 lines and routes Markdown, MDX, Astro, Handlebars, reStructuredText, AsciiDoc, HTML attributes, inline scripts, link normalization, and Markdown parsing in one file. `src/chunking/chunkFile.ts` is another large format-sensitive flow for block extraction, splitting, merging, and gap filling. - Suggested fix: move document extractors into `documentLinks/{markdown,html,rst,asciidoc,sfc}.ts` behind a small dispatcher, and split chunking into match collection, block classification, large-block splitting, merge, and gap-fill helpers. - Correctness opportunity: document specifier normalization should match graph-only edge extraction, chunking, and source-style imports for mixed formats like MDX/Astro/SFC files. - Tests: Markdown reference/inline links, MDX/Astro/Handlebars imports, RST toctrees/targets, AsciiDoc xref/include/link forms, HTML `srcset`/inline scripts, CSS-style URLs, and chunk splitting/merge behavior. -- [ ] Split external dependency classification into manifest parsers, stdlib tables, and context lookup. +- [x] Split external dependency classification into manifest parsers, stdlib tables, and context lookup. - `src/graphs/external-classifier.ts` is 743 lines and contains large stdlib tables, manifest parsers for many ecosystems, ancestor-boundary search, package-name matching, caches, and final external classification. - Suggested fix: move ecosystem manifest readers into `graphs/external/manifests.ts`, stdlib/module tables into `stdlib.ts`, context/ancestor lookup into `context.ts`, and leave `classifyExternalSpecifier()` as a small coordinator. - Correctness opportunity: manifest parsing overlaps with project-file discovery but uses separate parsing rules; extracting parser units makes it easier to align dependency detection with discovery fixtures and language parity claims. - Tests: unresolved import classification for Node, Python, Ruby, Go, Rust, Zig, Java/Kotlin, .NET, C/C++, Swift, Composer, nested manifests, VCS boundaries, and cache reset/stats. -- [ ] Separate graph query parsing and traversal execution from SQLite query dispatch. +- [x] Separate graph query parsing and traversal execution from SQLite query dispatch. - `src/query.ts`, `src/graphs/queries.ts`, `src/cli/graphQueries.ts`, and `src/sqlite.ts` each participate in graph-query parsing, graph traversal, result bounding, cycle detail construction, and canned SQL-backed query dispatch. - Suggested fix: introduce a typed query AST/parser module and execution modules for in-memory graph queries and SQLite-backed canned queries. Reuse traversal helpers for neighbors, shortest paths, reverse dependencies, cycles, and unresolved imports. - Performance opportunity: `querySymbolNeighbors()` builds incoming/outgoing maps per call and then scans all edges again to materialize results; reusable adjacency indexes would help interactive agents and CLI graph queries on large symbol graphs. diff --git a/src/agent-tools.ts b/src/agent-tools.ts index befb1c24..9a2127a1 100644 --- a/src/agent-tools.ts +++ b/src/agent-tools.ts @@ -18,9 +18,9 @@ import { listProjectFiles, normalizePath, resolveFilePathFromRoot, - toProjectRelativePath, } from "./util.js"; -import { getFiniteNonNegativeLimit } from "./graphs/limits.js"; +import { boundAgentList, defaultAgentLimit, normalizeAgentLimit } from "./agent/bounds.js"; +import { normalizeAgentOutputPath } from "./agent/normalize.js"; type ToolRuntimeOptions = { index?: ProjectIndex; @@ -242,7 +242,7 @@ export async function tool_findSymbol( id: symbol.id, name: symbol.name, kind: String(symbol.kind), - file: toProjectRelativePath(root, symbol.file) ?? normalizePath(symbol.file), + file: normalizeToolFileOutput(root, symbol.file), ...(symbol.range ? { range: symbol.range } : {}), line: symbol.range?.start.line ?? 0, exactMatch, @@ -261,8 +261,7 @@ export async function tool_findSymbol( }); const exportedDefinitionsByFile = new Map>(); - const limit = getToolDefaultedLimit(options.maxResults, 20); - const limitedMatches = matches.slice(0, limit).map((match) => { + const limitedMatches = boundAgentList(matches, getToolDefaultedLimit(options.maxResults, 20)).items.map((match) => { const exportedDefinitions = exportedDefinitionsByFile.get(match.symbol.file) ?? getExportedSymbolIdsForFile(index, match.symbol.file); exportedDefinitionsByFile.set(match.symbol.file, exportedDefinitions); @@ -385,7 +384,7 @@ export async function tool_getDependencies( ...(options.depth !== undefined ? { depth: options.depth } : {}), limit: limit + 1, }); - const limited = dependencies.slice(0, limit).map((entry) => ({ + const limited = boundAgentList(dependencies, limit).items.map((entry) => ({ file: normalizeToolFileOutput(root, entry.file), depth: entry.depth, })); @@ -394,7 +393,7 @@ export async function tool_getDependencies( status: "ok", file: resolvedFile.relativeFile, dependencies: limited, - truncated: dependencies.length > limited.length, + truncated: dependencies.length !== limited.length, }; } catch (error) { return { status: "error", error: String(error) }; @@ -449,7 +448,7 @@ export async function tool_getReverseDependencies( ...(options.depth !== undefined ? { depth: options.depth } : {}), limit: limit + 1, }); - const limited = dependents.slice(0, limit).map((entry) => ({ + const limited = boundAgentList(dependents, limit).items.map((entry) => ({ file: normalizeToolFileOutput(root, entry.file), depth: entry.depth, })); @@ -458,7 +457,7 @@ export async function tool_getReverseDependencies( status: "ok", file: resolvedFile.relativeFile, dependents: limited, - truncated: dependents.length > limited.length, + truncated: dependents.length !== limited.length, }; } catch (error) { return { status: "error", error: String(error) }; @@ -526,12 +525,11 @@ async function collectToolGraph( } function getToolLimit(limit: number | undefined): number | undefined { - return getFiniteNonNegativeLimit(limit); + return normalizeAgentLimit(limit); } function getToolDefaultedLimit(limit: number | undefined, fallback: number): number { - const normalizedLimit = getFiniteNonNegativeLimit(limit); - return normalizedLimit ?? fallback; + return defaultAgentLimit(limit, fallback); } function resolveToolFileInput( @@ -559,7 +557,7 @@ function resolveToolFileInput( return { status: "ok", absPath, - relativeFile: toProjectRelativePath(root, absPath) ?? normalizePath(filePath), + relativeFile: normalizeToolFileOutput(root, absPath), }; } @@ -660,7 +658,7 @@ function getToolImportDisplayName(entry: ImportBinding): string { } function normalizeToolFileOutput(root: string, filePath: string): string { - return toProjectRelativePath(root, filePath) ?? normalizePath(filePath); + return normalizeAgentOutputPath(root, filePath); } async function getToolMissingFileResult( diff --git a/src/agent/artifact.ts b/src/agent/artifact.ts index f0acc561..9c410e29 100644 --- a/src/agent/artifact.ts +++ b/src/agent/artifact.ts @@ -5,6 +5,7 @@ import { defNodeId } from "../graphs/symbol-graph.js"; import { queryGraphSqliteRaw, writeGraphSqlite } from "../sqlite.js"; import { isFilePathWithinRoot, normalizePath, toProjectRelativePath } from "../util.js"; import { formatAgentSqlHandle, formatAgentSymbolHandle } from "./handles.js"; +import { normalizeAgentFilePath } from "./normalize.js"; import { createAgentSession } from "./session.js"; import type { AgentProjectSnapshot, AgentSession } from "./session.js"; import { quoteShellArg } from "./shell.js"; @@ -704,5 +705,5 @@ async function writeJson(filePath: string, value: unknown): Promise { } function relativeFile(root: string, file: string): string { - return toProjectRelativePath(root, file) ?? normalizePath(path.resolve(file)); + return normalizeAgentFilePath(root, file); } diff --git a/src/agent/bounds.ts b/src/agent/bounds.ts new file mode 100644 index 00000000..2363eecc --- /dev/null +++ b/src/agent/bounds.ts @@ -0,0 +1,45 @@ +export type BoundedAgentList = { + items: T[]; + omitted: number; +}; + +export type AgentLimitOptions = { + fallback?: number; + max?: number; +}; + +export function normalizeAgentLimit(limit: number | undefined, options: AgentLimitOptions = {}): number | undefined { + if (typeof limit !== "number" || !Number.isFinite(limit)) { + return options.fallback; + } + const floored = Math.max(0, Math.floor(limit)); + if (options.max === undefined) return floored; + return Math.min(options.max, floored); +} + +export function defaultAgentLimit(limit: number | undefined, fallback: number, max?: number): number { + const options: AgentLimitOptions = { fallback }; + if (max !== undefined) { + options.max = max; + } + return normalizeAgentLimit(limit, options) ?? fallback; +} + +export function boundAgentList(items: readonly T[], limit: number): BoundedAgentList { + const boundedItems = items.slice(0, limit); + return { + items: boundedItems, + omitted: countOmitted(items.length, boundedItems.length), + }; +} + +export function emptyAgentBoundedList(): BoundedAgentList { + return { + items: [], + omitted: 0, + }; +} + +export function countOmitted(total: number, visible: number): number { + return Math.max(0, total - visible); +} diff --git a/src/agent/explain.ts b/src/agent/explain.ts index 35b03e3e..71ac78c3 100644 --- a/src/agent/explain.ts +++ b/src/agent/explain.ts @@ -9,8 +9,14 @@ import { buildReviewReport } from "../review.js"; import { extractSqlFactsFromSource, sqlObjectBaseName } from "../sql/extractFacts.js"; import type { SqlStatementFact } from "../sql/types.js"; import type { Range } from "../types.js"; -import { normalizePath, toProjectRelativePath } from "../util.js"; +import { normalizePath } from "../util.js"; import { mapLimit } from "../util/resolution.js"; +import { + boundAgentList, + defaultAgentLimit, + emptyAgentBoundedList, + type BoundedAgentList, +} from "./bounds.js"; import { formatAgentFileHandle, formatAgentSqlHandle, @@ -21,6 +27,11 @@ import { parseAgentSqlHandle, parseAgentSymbolHandle, } from "./handles.js"; +import { + collectDefinitionFollowUps, + collectFileFollowUps as collectCommonFileFollowUps, + normalizeAgentFilePath, +} from "./normalize.js"; import { createAgentSession, type AgentProjectSnapshot, type AgentSession } from "./session.js"; import { quoteShellArg } from "./shell.js"; @@ -121,14 +132,9 @@ type SymbolLookup = { exportedIds: Set; }; -type BoundedList = { - items: T[]; - omitted: number; -}; - type ReferenceContext = { - references: BoundedList; - snippets: BoundedList; + references: BoundedAgentList; + snippets: BoundedAgentList; }; type ResolvedExplainTarget = @@ -373,14 +379,19 @@ async function buildExplanation( resolved: ResolvedExplainTarget, request: AgentExplainTarget, ): Promise { - const maxDependencies = normalizeLimit(request.maxDependencies, DEFAULT_MAX_DEPENDENCIES); - const maxReferences = normalizeLimit(request.maxReferences ?? request.maxDependencies, DEFAULT_MAX_DEPENDENCIES); - const maxRelatedSqlObjects = normalizeLimit( + const maxDependencies = defaultAgentLimit(request.maxDependencies, DEFAULT_MAX_DEPENDENCIES, MAX_DEPENDENCIES); + const maxReferences = defaultAgentLimit( + request.maxReferences ?? request.maxDependencies, + DEFAULT_MAX_DEPENDENCIES, + MAX_DEPENDENCIES, + ); + const maxRelatedSqlObjects = defaultAgentLimit( request.maxRelatedSqlObjects ?? request.maxDependencies, DEFAULT_MAX_DEPENDENCIES, + MAX_DEPENDENCIES, ); - const maxSnippets = normalizeLimit(request.maxSnippets, DEFAULT_MAX_SNIPPETS, MAX_SNIPPETS); - const maxSymbols = normalizeLimit(request.maxSymbols, DEFAULT_MAX_SYMBOLS, MAX_SYMBOLS); + const maxSnippets = defaultAgentLimit(request.maxSnippets, DEFAULT_MAX_SNIPPETS, MAX_SNIPPETS); + const maxSymbols = defaultAgentLimit(request.maxSymbols, DEFAULT_MAX_SYMBOLS, MAX_SYMBOLS); if (resolved.kind === "not_found") { return emptyExplanation(snapshot, { @@ -392,7 +403,7 @@ async function buildExplanation( const file = resolved.kind === "file" ? resolved.file : normalizePath(resolved.def.file); const relFile = relativeFile(snapshot.root, file); const allSymbols = collectFileSymbols(snapshot, lookup, file); - const symbols = allSymbols.slice(0, maxSymbols); + const boundedSymbols = boundAgentList(allSymbols, maxSymbols); const dependencies = collectDependencies(snapshot, file, maxDependencies, "forward"); const reverseDependencies = collectDependencies(snapshot, file, maxDependencies, "reverse"); const hotspots = collectTargetHotspots(snapshot, file); @@ -403,7 +414,7 @@ async function buildExplanation( const references = referenceContext.references; const snippets = referenceContext.snippets; const relatedSqlObjects = await collectRelatedSqlObjects(snapshot, lookup, resolved, file, maxRelatedSqlObjects); - const followUps = collectFollowUps(snapshot, resolved, symbols, relFile); + const followUps = collectFollowUps(snapshot, resolved, boundedSymbols.items, relFile); const changedContext = await collectChangedContext(request); return { @@ -419,7 +430,7 @@ async function buildExplanation( references.items, relatedSqlObjects.items, ), - symbols, + symbols: boundedSymbols.items, dependencies: dependencies.items, reverseDependencies: reverseDependencies.items, references: references.items, @@ -435,7 +446,7 @@ async function buildExplanation( snippets: maxSnippets, }, omittedCounts: { - symbols: Math.max(0, allSymbols.length - symbols.length), + symbols: boundedSymbols.omitted, dependencies: dependencies.omitted, reverseDependencies: reverseDependencies.omitted, references: references.omitted, @@ -543,23 +554,19 @@ function collectDependencies( file: string, limit: number, direction: "forward" | "reverse", -): BoundedList { +): BoundedAgentList { const startFile = normalizePath(file); const dependencies = direction === "forward" ? getDependencies(snapshot.fileGraph, startFile, { depth: 1 }) : getReverseDependencies(snapshot.fileGraph, startFile, { depth: 1 }); - const items = dependencies + const sortedDependencies = dependencies .map((dependency) => ({ file: relativeFile(snapshot.root, dependency.file), depth: dependency.depth, })) - .sort(compareDependencies) - .slice(0, limit); - return { - items, - omitted: Math.max(0, dependencies.length - items.length), - }; + .sort(compareDependencies); + return boundAgentList(sortedDependencies, limit); } function compareDependencies(left: AgentExplanationDependency, right: AgentExplanationDependency): number { @@ -603,7 +610,7 @@ async function collectReferenceContext( if (fileDelta !== 0) return fileDelta; return left.range.start.line - right.range.start.line; }); - const referenceItems = references.slice(0, referenceLimit); + const boundedReferences = boundAgentList(references, referenceLimit); const referencesWithContext = result.references.filter((reference) => reference.context !== undefined); const snippets = referencesWithContext @@ -613,17 +620,11 @@ async function collectReferenceContext( if (fileDelta !== 0) return fileDelta; return left.line - right.line; }); - const snippetItems = snippets.slice(0, snippetLimit); + const boundedSnippets = boundAgentList(snippets, snippetLimit); return { - references: { - items: referenceItems, - omitted: Math.max(0, references.length - referenceItems.length), - }, - snippets: { - items: snippetItems, - omitted: Math.max(0, snippets.length - snippetItems.length), - }, + references: boundedReferences, + snippets: boundedSnippets, }; } @@ -641,8 +642,8 @@ async function collectRelatedSqlObjects( resolved: Exclude, file: string, limit: number, -): Promise> { - if (resolved.kind !== "sql_object" && !isSqlFile(file)) return emptyBoundedList(); +): Promise> { + if (resolved.kind !== "sql_object" && !isSqlFile(file)) return emptyAgentBoundedList(); const sqlObjects = collectSqlObjectNodes(snapshot, lookup); const targetName = resolved.kind === "sql_object" ? (resolved.node?.name ?? resolved.def.localName) : undefined; @@ -679,11 +680,7 @@ async function collectRelatedSqlObjects( if (fileDelta !== 0) return fileDelta; return left.name.localeCompare(right.name); }); - const items = matches.slice(0, limit); - return { - items, - omitted: Math.max(0, matches.length - items.length), - }; + return boundAgentList(matches, limit); } type SqlObjectNodeInfo = { @@ -828,17 +825,10 @@ function sqlRelationRank(relation: string): number { return 3; } -function emptyBoundedList(): BoundedList { - return { - items: [], - omitted: 0, - }; -} - function emptyReferenceContext(): ReferenceContext { return { - references: emptyBoundedList(), - snippets: emptyBoundedList(), + references: emptyAgentBoundedList(), + snippets: emptyAgentBoundedList(), }; } @@ -848,11 +838,7 @@ function collectFollowUps( symbols: AgentExplanationSymbol[], relFile: string, ): string[] { - const followUps = new Set([ - `codegraph deps ${quoteShellArg(relFile)} --json`, - `codegraph rdeps ${quoteShellArg(relFile)} --json`, - `codegraph chunk ${quoteShellArg(relFile)}`, - ]); + const followUps = new Set(collectCommonFileFollowUps(relFile)); if (resolved.kind === "file") { for (const symbol of symbols.slice(0, 5)) { @@ -861,12 +847,13 @@ function collectFollowUps( ); } } else { - followUps.add( - `codegraph goto ${quoteShellArg(relFile)} ${resolved.def.range.start.line} ${resolved.def.range.start.column}`, - ); - followUps.add( - `codegraph refs --file ${quoteShellArg(relFile)} --line ${resolved.def.range.start.line} --col ${resolved.def.range.start.column} --pretty`, - ); + for (const command of collectDefinitionFollowUps( + relFile, + resolved.def.range.start.line, + resolved.def.range.start.column, + )) { + followUps.add(command); + } followUps.add( `codegraph search ${quoteShellArg(resolved.node?.name ?? resolved.def.localName)} --from ${quoteShellArg(relFile)} --json`, ); @@ -938,11 +925,6 @@ async function collectChangedContext(request: AgentExplainTarget): Promise(); - const limit = normalizeLimit(request.limit); + const limit = defaultAgentLimit(request.limit, DEFAULT_LIMIT, MAX_RESULTS); let fileNeighborIndex: Map | undefined; const getFileNeighborIndex = (): Map => { fileNeighborIndex ??= buildFileNeighborIndex(snapshot); @@ -191,9 +197,8 @@ async function searchSnapshot(snapshot: AgentProjectSnapshot, request: AgentSear const candidates = [...resultMap.values()] .filter((result) => result.score > 0) .sort(compareResults); - const results = candidates - .slice(0, limit) - .map(finalizeResult); + const boundedResults = boundAgentList(candidates, limit); + const results = boundedResults.items.map(finalizeResult); return { schemaVersion: 1, @@ -210,17 +215,12 @@ async function searchSnapshot(snapshot: AgentProjectSnapshot, request: AgentSear resultCount: results.length, totalCandidates: candidates.length, omittedCounts: { - results: Math.max(0, candidates.length - results.length), + results: boundedResults.omitted, }, results, }; } -function normalizeLimit(limit: number | undefined): number { - if (typeof limit !== "number" || !Number.isFinite(limit)) return DEFAULT_LIMIT; - return Math.min(MAX_RESULTS, Math.max(0, Math.floor(limit))); -} - function normalizeDepth(depth: number | undefined): number { if (typeof depth !== "number" || !Number.isFinite(depth)) return 1; return Math.min(MAX_GRAPH_DEPTH, Math.max(0, Math.floor(depth))); @@ -725,16 +725,17 @@ function addFileNeighbors( function addSymbolFollowUps(result: MutableSearchResult, relFile: string, def: SymbolDef | undefined): void { result.followUps.add(`codegraph explain ${quoteShellArg(result.handle)}`); if (def) { - result.followUps.add(`codegraph goto ${quoteShellArg(relFile)} ${def.range.start.line} ${def.range.start.column}`); - result.followUps.add(`codegraph refs --file ${quoteShellArg(relFile)} --line ${def.range.start.line} --col ${def.range.start.column} --pretty`); + for (const command of collectDefinitionFollowUps(relFile, def.range.start.line, def.range.start.column)) { + result.followUps.add(command); + } } addFileFollowUps(result, relFile); } function addFileFollowUps(result: MutableSearchResult, relFile: string): void { - result.followUps.add(`codegraph deps ${quoteShellArg(relFile)} --json`); - result.followUps.add(`codegraph rdeps ${quoteShellArg(relFile)} --json`); - result.followUps.add(`codegraph chunk ${quoteShellArg(relFile)}`); + for (const command of collectCommonFileFollowUps(relFile)) { + result.followUps.add(command); + } } function compareResults(left: MutableSearchResult, right: MutableSearchResult): number { @@ -758,6 +759,10 @@ function finalizeResult(result: MutableSearchResult): AgentSearchResult { return left.target.localeCompare(right.target); }); const followUps = [...result.followUps].sort(); + const boundedRankReasons = boundAgentList(rankReasons, MAX_RANK_REASONS_PER_RESULT); + const boundedEvidence = boundAgentList(evidence, MAX_EVIDENCE_PER_RESULT); + const boundedNeighbors = boundAgentList(neighbors, MAX_NEIGHBORS_PER_RESULT); + const boundedFollowUps = boundAgentList(followUps, MAX_FOLLOWUPS_PER_RESULT); return { handle: result.handle, @@ -766,19 +771,19 @@ function finalizeResult(result: MutableSearchResult): AgentSearchResult { file: result.file, ...(result.range ? { range: result.range } : {}), score: Number(result.score.toFixed(3)), - rankReasons: rankReasons.slice(0, MAX_RANK_REASONS_PER_RESULT), - evidence: evidence.slice(0, MAX_EVIDENCE_PER_RESULT), - neighbors: neighbors.slice(0, MAX_NEIGHBORS_PER_RESULT), - followUps: followUps.slice(0, MAX_FOLLOWUPS_PER_RESULT), + rankReasons: boundedRankReasons.items, + evidence: boundedEvidence.items, + neighbors: boundedNeighbors.items, + followUps: boundedFollowUps.items, omittedCounts: { - rankReasons: Math.max(0, rankReasons.length - MAX_RANK_REASONS_PER_RESULT), - evidence: Math.max(0, evidence.length - MAX_EVIDENCE_PER_RESULT), - neighbors: Math.max(0, neighbors.length - MAX_NEIGHBORS_PER_RESULT), - followUps: Math.max(0, followUps.length - MAX_FOLLOWUPS_PER_RESULT), + rankReasons: boundedRankReasons.omitted, + evidence: boundedEvidence.omitted, + neighbors: boundedNeighbors.omitted, + followUps: boundedFollowUps.omitted, }, }; } function relativeFile(root: string, file: string): string { - return toProjectRelativePath(root, file) ?? normalizePath(path.resolve(file)); + return normalizeAgentFilePath(root, file); } diff --git a/src/chunking/chunkBlocks.ts b/src/chunking/chunkBlocks.ts new file mode 100644 index 00000000..b5f38991 --- /dev/null +++ b/src/chunking/chunkBlocks.ts @@ -0,0 +1,80 @@ +import type { LanguageConfig } from "./languageConfig.js"; +import type { BlockCandidate, ChunkCapture, ChunkMatch } from "./types.js"; + +export type ChunkBlockGroups = { + mainBlocks: BlockCandidate[]; + innerBlocks: BlockCandidate[]; + comments: BlockCandidate[]; +}; + +export function collectChunkBlockGroups(language: LanguageConfig, matches: ChunkMatch[]): ChunkBlockGroups { + const mainBlocks: BlockCandidate[] = []; + const innerBlocks: BlockCandidate[] = []; + const comments: BlockCandidate[] = []; + + for (const match of matches) { + let nameCapture: ChunkCapture | undefined; + let blockCapture: ChunkCapture | undefined; + let innerCapture: ChunkCapture | undefined; + let blockKind: string | undefined; + + for (const capture of match.captures) { + const { name } = capture; + + if (name === language.captures.name) { + nameCapture = capture; + } + + if (language.captures.comments.includes(name)) { + comments.push({ + kind: name === "chunk.docstring" ? "docstring" : "comment", + startByte: capture.startByte, + endByte: capture.endByte, + startLine: capture.startLine, + endLine: capture.endLine, + }); + } + + if (name === language.captures.innerBlock) { + innerCapture = capture; + } + + if (name.startsWith(language.captures.blockPrefix) && name !== language.captures.innerBlock) { + blockCapture = capture; + blockKind = name.slice(language.captures.blockPrefix.length) || capture.nodeType; + } + } + + if (innerCapture) { + innerBlocks.push({ + kind: "inner", + startByte: innerCapture.startByte, + endByte: innerCapture.endByte, + startLine: innerCapture.startLine, + endLine: innerCapture.endLine, + }); + } + + if (blockCapture) { + const candidate: BlockCandidate = { + kind: blockKind ?? "block", + startByte: blockCapture.startByte, + endByte: blockCapture.endByte, + startLine: blockCapture.startLine, + endLine: blockCapture.endLine, + }; + + if (nameCapture) { + candidate.name = nameCapture.text; + } + + mainBlocks.push(candidate); + } + } + + mainBlocks.sort((a, b) => a.startByte - b.startByte || a.endByte - b.endByte); + innerBlocks.sort((a, b) => a.startByte - b.startByte || a.endByte - b.endByte); + comments.sort((a, b) => a.startByte - b.startByte || a.endByte - b.endByte); + + return { mainBlocks, innerBlocks, comments }; +} diff --git a/src/chunking/chunkFile.ts b/src/chunking/chunkFile.ts index f49b2e39..bfd99ee4 100644 --- a/src/chunking/chunkFile.ts +++ b/src/chunking/chunkFile.ts @@ -1,59 +1,11 @@ import type { LanguageConfig } from "./languageConfig.js"; -import { supportById } from "../languages.js"; -import { - executeJsQueryAsNativeMatches, - getNativeSingleQueryExecution, - isNativeBindingLoadedForLanguage, - shouldAvoidJsFallbackForLanguage, - type NativeMatch, -} from "../native/treeSitterNative.js"; +import { collectChunkBlockGroups } from "./chunkBlocks.js"; +import { getChunkMatches } from "./chunkMatches.js"; +import { fillGapsWithMiscChunks, mergeSmallChunks } from "./chunkMerge.js"; +import { splitLargeBlockSimple, splitLargeBlockUsingInnerBlocks } from "./chunkSplit.js"; +import type { Chunk, ChunkTokenizer } from "./types.js"; -/** - * Represents a semantic chunk of code or text, ready for LLM processing or vector embeddings. - */ -export interface Chunk { - /** Unique identifier for the chunk */ - id: string; - /** Language identifier (e.g., "javascript", "typescript", "python") */ - languageId: string; - /** Optional source file path */ - filePath?: string | undefined; - /** Chunk type (e.g., "function", "class", "method", "import", "misc") */ - type: string; - /** Symbol name if applicable (e.g., function name, class name) */ - name?: string; - /** 1-based start line number */ - startLine: number; - /** 1-based end line number */ - endLine: number; - /** The chunk content text */ - text: string; - /** Estimated token count */ - tokenCount: number; -} - -interface BlockCandidate { - kind: string; - name?: string; - startByte: number; - endByte: number; - startLine: number; - endLine: number; -} - -type ChunkCapture = { - name: string; - text: string; - startByte: number; - endByte: number; - startLine: number; - endLine: number; - nodeType: string; -}; - -type ChunkMatch = { - captures: ChunkCapture[]; -}; +export type { Chunk } from "./types.js"; /** * Options for semantic code chunking. @@ -70,7 +22,7 @@ export interface ChunkFileOptions { /** Maximum tokens per chunk (default: 400). Larger chunks are split. */ maxTokens?: number; /** Custom token counting function (default: whitespace-based) */ - tokenizer?: ((text: string) => number) | undefined; + tokenizer?: ChunkTokenizer | undefined; } function defaultTokenizer(text: string): number { @@ -87,82 +39,9 @@ function defaultTokenizer(text: string): number { */ export function chunkFile(opts: ChunkFileOptions): Chunk[] { const { language, source, filePath, minTokens = 150, maxTokens = 400, tokenizer = defaultTokenizer } = opts; - const matches = getChunkMatches(language, source, filePath); - - const newlineOffsets: number[] = []; - for (let i = 0; i < source.length; i++) { - if (source[i] === "\n") newlineOffsets.push(i); - } - - const mainBlocks: BlockCandidate[] = []; - const innerBlocks: BlockCandidate[] = []; - const comments: BlockCandidate[] = []; - - for (const match of matches) { - let nameCapture: ChunkCapture | undefined; - let blockCapture: ChunkCapture | undefined; - let innerCapture: ChunkCapture | undefined; - let blockKind: string | undefined; - - for (const capture of match.captures) { - const { name } = capture; - - if (name === language.captures.name) { - nameCapture = capture; - } - - if (language.captures.comments.includes(name)) { - comments.push({ - kind: name === "chunk.docstring" ? "docstring" : "comment", - startByte: capture.startByte, - endByte: capture.endByte, - startLine: capture.startLine, - endLine: capture.endLine, - }); - } - - if (name === language.captures.innerBlock) { - innerCapture = capture; - } - - if (name.startsWith(language.captures.blockPrefix) && name !== language.captures.innerBlock) { - blockCapture = capture; - blockKind = name.slice(language.captures.blockPrefix.length) || capture.nodeType; - } - } - - if (innerCapture) { - innerBlocks.push({ - kind: "inner", - startByte: innerCapture.startByte, - endByte: innerCapture.endByte, - startLine: innerCapture.startLine, - endLine: innerCapture.endLine, - }); - } - - if (blockCapture) { - const candidate: BlockCandidate = { - kind: blockKind ?? "block", - startByte: blockCapture.startByte, - endByte: blockCapture.endByte, - startLine: blockCapture.startLine, - endLine: blockCapture.endLine, - }; - - if (nameCapture) { - candidate.name = nameCapture.text; - } - - mainBlocks.push(candidate); - } - } - - mainBlocks.sort((a, b) => a.startByte - b.startByte || a.endByte - b.endByte); - innerBlocks.sort((a, b) => a.startByte - b.startByte || a.endByte - b.endByte); - comments.sort((a, b) => a.startByte - b.startByte || a.endByte - b.endByte); - + const newlineOffsets = collectNewlineOffsets(source); + const { mainBlocks, innerBlocks, comments } = collectChunkBlockGroups(language, matches); const preliminaryChunks: Chunk[] = []; let chunkIdCounter = 0; const makeChunkId = () => `${language.id}:${filePath ?? "unknown"}:${chunkIdCounter++}`; @@ -206,16 +85,16 @@ export function chunkFile(opts: ChunkFileOptions): Chunk[] { } } - for (const c of comments) { - const text = source.slice(c.startByte, c.endByte); + for (const comment of comments) { + const text = source.slice(comment.startByte, comment.endByte); const tokens = tokenizer(text); if (tokens === 0) continue; preliminaryChunks.push({ id: makeChunkId(), languageId: language.id, - type: c.kind, - startLine: c.startLine, - endLine: c.endLine, + type: comment.kind, + startLine: comment.startLine, + endLine: comment.endLine, text, tokenCount: tokens, ...(filePath !== undefined ? { filePath } : {}), @@ -226,7 +105,7 @@ export function chunkFile(opts: ChunkFileOptions): Chunk[] { const mergedChunks = mergeSmallChunks(preliminaryChunks, minTokens, maxTokens, tokenizer); - const finalChunks = fillGapsWithMiscChunks( + return fillGapsWithMiscChunks( mergedChunks, source, language.id, @@ -236,302 +115,12 @@ export function chunkFile(opts: ChunkFileOptions): Chunk[] { maxTokens, makeChunkId, ); - - return finalChunks; -} - -function getChunkMatches(language: LanguageConfig, source: string, filePath?: string | undefined): ChunkMatch[] { - const support = supportById(language.supportId); - if (support) { - const nativeExecution = getNativeSingleQueryExecution(source, support, language.queryText); - if (nativeExecution.matches) { - return nativeExecution.matches.map(toChunkMatchFromNative); - } - if (isNativeBindingLoadedForLanguage(support.id)) { - return []; - } - - try { - const matches = executeJsQueryAsNativeMatches( - source, - support, - language.definition.grammar(filePath), - language.queryText, - ); - return matches.map(toChunkMatchFromNative); - } catch { - return []; - } - } - - return []; -} - -function toChunkMatchFromNative(match: NativeMatch): ChunkMatch { - return { - captures: match.captures.map((capture) => ({ - name: capture.name, - text: capture.text, - startByte: capture.start.index, - endByte: capture.end.index, - startLine: capture.start.row + 1, - endLine: capture.end.row + 1, - nodeType: capture.nodeType, - })), - }; -} - -function splitLargeBlockSimple( - block: BlockCandidate, - source: string, - tokenizer: (text: string) => number, - maxTokens: number, - makeChunkId: () => string, - out: Chunk[], - languageId: string, - filePath?: string, -): void { - const text = source.slice(block.startByte, block.endByte); - const lines = text.split(/\r?\n/); - - let currentStartLine = block.startLine; - let currentLines: string[] = []; - let currentTokens = 0; - - const flush = () => { - if (!currentLines.length) return; - const chunkText = currentLines.join("\n"); - const tokenCount = tokenizer(chunkText); - const endLine = currentStartLine + currentLines.length - 1; - out.push({ - id: makeChunkId(), - languageId, - type: block.kind, - startLine: currentStartLine, - endLine, - text: chunkText, - tokenCount, - ...(filePath !== undefined ? { filePath } : {}), - ...(block.name !== undefined ? { name: block.name } : {}), - }); - currentLines = []; - currentTokens = 0; - }; - - for (let i = 0; i < lines.length; i++) { - const line = lines[i]!; - const lineTokens = tokenizer(line); - if (currentTokens + lineTokens > maxTokens && currentLines.length) { - flush(); - currentStartLine = block.startLine + i; - } - - currentLines.push(line); - currentTokens += lineTokens; - } - - flush(); -} - -function splitLargeBlockUsingInnerBlocks( - block: BlockCandidate, - innerBlocks: BlockCandidate[], - source: string, - tokenizer: (text: string) => number, - maxTokens: number, - makeChunkId: () => string, - out: Chunk[], - languageId: string, - newlineOffsets: number[], - filePath?: string, -): void { - const boundaries = new Set(); - boundaries.add(block.startByte); - boundaries.add(block.endByte); - for (const ib of innerBlocks) { - boundaries.add(ib.startByte); - boundaries.add(ib.endByte); - } - const sorted = Array.from(boundaries).sort((a, b) => a - b); - - type Segment = { startByte: number; endByte: number }; - const segments: Segment[] = []; - - for (let i = 0; i < sorted.length - 1; i++) { - const startByte = sorted[i]!; - const endByte = sorted[i + 1]!; - if (endByte <= startByte) continue; - const segText = source.slice(startByte, endByte); - if (!segText.trim()) continue; - segments.push({ startByte, endByte }); - } - - if (!segments.length) { - splitLargeBlockSimple(block, source, tokenizer, maxTokens, makeChunkId, out, languageId, filePath); - return; - } - - let currentStart = segments[0]!.startByte; - let currentEnd = segments[0]!.endByte; - let currentText = source.slice(currentStart, currentEnd); - let currentTokens = tokenizer(currentText); - - const pushChunk = () => { - const chunkText = source.slice(currentStart, currentEnd); - const tokenCount = tokenizer(chunkText); - const [startRowZero] = locateLineAndColFromByte(newlineOffsets, currentStart); - const [endRowZero] = locateLineAndColFromByte(newlineOffsets, currentEnd); - - out.push({ - id: makeChunkId(), - languageId, - type: block.kind, - startLine: startRowZero + 1, - endLine: endRowZero + 1, - text: chunkText, - tokenCount, - ...(filePath !== undefined ? { filePath } : {}), - ...(block.name !== undefined ? { name: block.name } : {}), - }); - }; - - for (let i = 1; i < segments.length; i++) { - const seg = segments[i]!; - const segText = source.slice(seg.startByte, seg.endByte); - const segTokens = tokenizer(segText); - - if (currentTokens + segTokens > maxTokens && currentTokens > 0) { - pushChunk(); - currentStart = seg.startByte; - currentEnd = seg.endByte; - currentText = segText; - currentTokens = segTokens; - } else { - currentEnd = seg.endByte; - currentText += segText; - currentTokens += segTokens; - } - } - - pushChunk(); -} - -function locateLineAndColFromByte(newlineOffsets: number[], byteOffset: number): [number, number] { - let low = 0; - let high = newlineOffsets.length; - while (low < high) { - const mid = (low + high) >>> 1; - if (newlineOffsets[mid]! < byteOffset) low = mid + 1; - else high = mid; - } - const line = low; - const prevNewline = low > 0 ? newlineOffsets[low - 1]! : -1; - const col = byteOffset - prevNewline - 1; - return [line, col]; -} - -function mergeSmallChunks( - chunks: Chunk[], - minTokens: number, - maxTokens: number, - tokenizer: (text: string) => number, -): Chunk[] { - if (!chunks.length) return []; - - const merged: Chunk[] = []; - let i = 0; - - while (i < chunks.length) { - let current = { ...chunks[i]! }; - i++; - - while (current.tokenCount < minTokens && i < chunks.length) { - const next = chunks[i]!; - const combinedText = `${current.text}\n${next.text}`; - const combinedTokens = tokenizer(combinedText); - if (combinedTokens > maxTokens) break; - - const resolvedName = current.name ?? next.name; - - current = { - ...current, - endLine: next.endLine, - text: combinedText, - tokenCount: combinedTokens, - type: current.type === next.type ? current.type : `${current.type}+${next.type}`, - ...(resolvedName !== undefined ? { name: resolvedName } : {}), - }; - i++; - } - - merged.push(current); - } - - return merged; } -function fillGapsWithMiscChunks( - chunks: Chunk[], - source: string, - languageId: string, - filePath: string | undefined, - tokenizer: (text: string) => number, - minTokens: number, - maxTokens: number, - makeChunkId: () => string, -): Chunk[] { - if (!chunks.length) { - const tokens = tokenizer(source); - if (tokens === 0) return []; - return [ - { - id: makeChunkId(), - languageId, - type: "misc", - startLine: 1, - endLine: source.split(/\r?\n/).length, - text: source, - tokenCount: tokens, - ...(filePath !== undefined ? { filePath } : {}), - }, - ]; - } - - const byLine = source.split(/\r?\n/); - const lastLine = byLine.length; - const result: Chunk[] = []; - let currentLine = 1; - - const pushMiscRange = (startLine: number, endLine: number) => { - if (startLine > endLine) return; - const text = byLine.slice(startLine - 1, endLine).join("\n"); - const tokens = tokenizer(text); - if (tokens === 0) return; - - result.push({ - id: makeChunkId(), - languageId, - type: "misc", - startLine, - endLine, - text, - tokenCount: tokens, - ...(filePath !== undefined ? { filePath } : {}), - }); - }; - - for (const chunk of chunks) { - if (chunk.startLine > currentLine) { - pushMiscRange(currentLine, chunk.startLine - 1); - } - result.push(chunk); - currentLine = chunk.endLine + 1; - } - - if (currentLine <= lastLine) { - pushMiscRange(currentLine, lastLine); +function collectNewlineOffsets(source: string): number[] { + const newlineOffsets: number[] = []; + for (let i = 0; i < source.length; i++) { + if (source[i] === "\n") newlineOffsets.push(i); } - - const final = mergeSmallChunks(result, minTokens, maxTokens, tokenizer); - return final; + return newlineOffsets; } diff --git a/src/chunking/chunkMatches.ts b/src/chunking/chunkMatches.ts new file mode 100644 index 00000000..12ca6f12 --- /dev/null +++ b/src/chunking/chunkMatches.ts @@ -0,0 +1,50 @@ +import type { LanguageConfig } from "./languageConfig.js"; +import { supportById } from "../languages.js"; +import { + executeJsQueryAsNativeMatches, + getNativeSingleQueryExecution, + isNativeBindingLoadedForLanguage, + type NativeMatch, +} from "../native/treeSitterNative.js"; +import type { ChunkMatch } from "./types.js"; + +export function getChunkMatches(language: LanguageConfig, source: string, filePath?: string | undefined): ChunkMatch[] { + const support = supportById(language.supportId); + if (support) { + const nativeExecution = getNativeSingleQueryExecution(source, support, language.queryText); + if (nativeExecution.matches) { + return nativeExecution.matches.map(toChunkMatchFromNative); + } + if (isNativeBindingLoadedForLanguage(support.id)) { + return []; + } + + try { + const matches = executeJsQueryAsNativeMatches( + source, + support, + language.definition.grammar(filePath), + language.queryText, + ); + return matches.map(toChunkMatchFromNative); + } catch { + return []; + } + } + + return []; +} + +function toChunkMatchFromNative(match: NativeMatch): ChunkMatch { + return { + captures: match.captures.map((capture) => ({ + name: capture.name, + text: capture.text, + startByte: capture.start.index, + endByte: capture.end.index, + startLine: capture.start.row + 1, + endLine: capture.end.row + 1, + nodeType: capture.nodeType, + })), + }; +} diff --git a/src/chunking/chunkMerge.ts b/src/chunking/chunkMerge.ts new file mode 100644 index 00000000..87f2f5e3 --- /dev/null +++ b/src/chunking/chunkMerge.ts @@ -0,0 +1,107 @@ +import type { Chunk, ChunkIdFactory, ChunkTokenizer } from "./types.js"; + +export function mergeSmallChunks( + chunks: Chunk[], + minTokens: number, + maxTokens: number, + tokenizer: ChunkTokenizer, +): Chunk[] { + if (!chunks.length) return []; + + const merged: Chunk[] = []; + let i = 0; + + while (i < chunks.length) { + let current = { ...chunks[i]! }; + i++; + + while (current.tokenCount < minTokens && i < chunks.length) { + const next = chunks[i]!; + const combinedText = `${current.text}\n${next.text}`; + const combinedTokens = tokenizer(combinedText); + if (combinedTokens > maxTokens) break; + + const resolvedName = current.name ?? next.name; + + current = { + ...current, + endLine: next.endLine, + text: combinedText, + tokenCount: combinedTokens, + type: current.type === next.type ? current.type : `${current.type}+${next.type}`, + ...(resolvedName !== undefined ? { name: resolvedName } : {}), + }; + i++; + } + + merged.push(current); + } + + return merged; +} + +export function fillGapsWithMiscChunks( + chunks: Chunk[], + source: string, + languageId: string, + filePath: string | undefined, + tokenizer: ChunkTokenizer, + minTokens: number, + maxTokens: number, + makeChunkId: ChunkIdFactory, +): Chunk[] { + if (!chunks.length) { + const tokens = tokenizer(source); + if (tokens === 0) return []; + return [ + { + id: makeChunkId(), + languageId, + type: "misc", + startLine: 1, + endLine: source.split(/\r?\n/).length, + text: source, + tokenCount: tokens, + ...(filePath !== undefined ? { filePath } : {}), + }, + ]; + } + + const byLine = source.split(/\r?\n/); + const lastLine = byLine.length; + const result: Chunk[] = []; + let currentLine = 1; + + const pushMiscRange = (startLine: number, endLine: number) => { + if (startLine > endLine) return; + const text = byLine.slice(startLine - 1, endLine).join("\n"); + const tokens = tokenizer(text); + if (tokens === 0) return; + + result.push({ + id: makeChunkId(), + languageId, + type: "misc", + startLine, + endLine, + text, + tokenCount: tokens, + ...(filePath !== undefined ? { filePath } : {}), + }); + }; + + for (const chunk of chunks) { + if (chunk.startLine > currentLine) { + pushMiscRange(currentLine, chunk.startLine - 1); + } + result.push(chunk); + currentLine = chunk.endLine + 1; + } + + if (currentLine <= lastLine) { + pushMiscRange(currentLine, lastLine); + } + + const final = mergeSmallChunks(result, minTokens, maxTokens, tokenizer); + return final; +} diff --git a/src/chunking/chunkSplit.ts b/src/chunking/chunkSplit.ts new file mode 100644 index 00000000..00db1cb8 --- /dev/null +++ b/src/chunking/chunkSplit.ts @@ -0,0 +1,150 @@ +import type { BlockCandidate, Chunk, ChunkIdFactory, ChunkTokenizer } from "./types.js"; + +export function splitLargeBlockSimple( + block: BlockCandidate, + source: string, + tokenizer: ChunkTokenizer, + maxTokens: number, + makeChunkId: ChunkIdFactory, + out: Chunk[], + languageId: string, + filePath?: string, +): void { + const text = source.slice(block.startByte, block.endByte); + const lines = text.split(/\r?\n/); + + let currentStartLine = block.startLine; + let currentLines: string[] = []; + let currentTokens = 0; + + const flush = () => { + if (!currentLines.length) return; + const chunkText = currentLines.join("\n"); + const tokenCount = tokenizer(chunkText); + const endLine = currentStartLine + currentLines.length - 1; + out.push({ + id: makeChunkId(), + languageId, + type: block.kind, + startLine: currentStartLine, + endLine, + text: chunkText, + tokenCount, + ...(filePath !== undefined ? { filePath } : {}), + ...(block.name !== undefined ? { name: block.name } : {}), + }); + currentLines = []; + currentTokens = 0; + }; + + for (let i = 0; i < lines.length; i++) { + const line = lines[i]!; + const lineTokens = tokenizer(line); + if (currentTokens + lineTokens > maxTokens && currentLines.length) { + flush(); + currentStartLine = block.startLine + i; + } + + currentLines.push(line); + currentTokens += lineTokens; + } + + flush(); +} + +export function splitLargeBlockUsingInnerBlocks( + block: BlockCandidate, + innerBlocks: BlockCandidate[], + source: string, + tokenizer: ChunkTokenizer, + maxTokens: number, + makeChunkId: ChunkIdFactory, + out: Chunk[], + languageId: string, + newlineOffsets: number[], + filePath?: string, +): void { + const boundaries = new Set(); + boundaries.add(block.startByte); + boundaries.add(block.endByte); + for (const ib of innerBlocks) { + boundaries.add(ib.startByte); + boundaries.add(ib.endByte); + } + const sorted = Array.from(boundaries).sort((a, b) => a - b); + + type Segment = { startByte: number; endByte: number }; + const segments: Segment[] = []; + + for (let i = 0; i < sorted.length - 1; i++) { + const startByte = sorted[i]!; + const endByte = sorted[i + 1]!; + if (endByte <= startByte) continue; + const segText = source.slice(startByte, endByte); + if (!segText.trim()) continue; + segments.push({ startByte, endByte }); + } + + if (!segments.length) { + splitLargeBlockSimple(block, source, tokenizer, maxTokens, makeChunkId, out, languageId, filePath); + return; + } + + let currentStart = segments[0]!.startByte; + let currentEnd = segments[0]!.endByte; + let currentText = source.slice(currentStart, currentEnd); + let currentTokens = tokenizer(currentText); + + const pushChunk = () => { + const chunkText = source.slice(currentStart, currentEnd); + const tokenCount = tokenizer(chunkText); + const [startRowZero] = locateLineAndColFromByte(newlineOffsets, currentStart); + const [endRowZero] = locateLineAndColFromByte(newlineOffsets, currentEnd); + + out.push({ + id: makeChunkId(), + languageId, + type: block.kind, + startLine: startRowZero + 1, + endLine: endRowZero + 1, + text: chunkText, + tokenCount, + ...(filePath !== undefined ? { filePath } : {}), + ...(block.name !== undefined ? { name: block.name } : {}), + }); + }; + + for (let i = 1; i < segments.length; i++) { + const seg = segments[i]!; + const segText = source.slice(seg.startByte, seg.endByte); + const segTokens = tokenizer(segText); + + if (currentTokens + segTokens > maxTokens && currentTokens > 0) { + pushChunk(); + currentStart = seg.startByte; + currentEnd = seg.endByte; + currentText = segText; + currentTokens = segTokens; + } else { + currentEnd = seg.endByte; + currentText += segText; + currentTokens += segTokens; + } + } + + pushChunk(); +} + +function locateLineAndColFromByte(newlineOffsets: number[], byteOffset: number): [number, number] { + let low = 0; + let high = newlineOffsets.length; + while (low < high) { + const mid = (low + high) >>> 1; + if (newlineOffsets[mid]! < byteOffset) low = mid + 1; + else high = mid; + } + const line = low; + const prevNewline = low > 0 ? newlineOffsets[low - 1]! : -1; + const col = byteOffset - prevNewline - 1; + return [line, col]; +} diff --git a/src/chunking/types.ts b/src/chunking/types.ts new file mode 100644 index 00000000..ad91506a --- /dev/null +++ b/src/chunking/types.ts @@ -0,0 +1,49 @@ +/** + * Represents a semantic chunk of code or text, ready for LLM processing or vector embeddings. + */ +export interface Chunk { + /** Unique identifier for the chunk */ + id: string; + /** Language identifier (e.g., "javascript", "typescript", "python") */ + languageId: string; + /** Optional source file path */ + filePath?: string | undefined; + /** Chunk type (e.g., "function", "class", "method", "import", "misc") */ + type: string; + /** Symbol name if applicable (e.g., function name, class name) */ + name?: string; + /** 1-based start line number */ + startLine: number; + /** 1-based end line number */ + endLine: number; + /** The chunk content text */ + text: string; + /** Estimated token count */ + tokenCount: number; +} + +export interface BlockCandidate { + kind: string; + name?: string; + startByte: number; + endByte: number; + startLine: number; + endLine: number; +} + +export type ChunkCapture = { + name: string; + text: string; + startByte: number; + endByte: number; + startLine: number; + endLine: number; + nodeType: string; +}; + +export type ChunkMatch = { + captures: ChunkCapture[]; +}; + +export type ChunkTokenizer = (text: string) => number; +export type ChunkIdFactory = () => string; diff --git a/src/cli.ts b/src/cli.ts index c06ce205..1cceb6cf 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -14,7 +14,7 @@ import { findReferences, } from "./indexer.js"; import type { BuildOptions, BuildReport } from "./indexer/types.js"; -import { buildReviewReport, type ReviewBuildReport, type ReviewDepth } from "./review.js"; +import type { ReviewBuildReport } from "./review.js"; import { collectGraph, graphToMermaid, @@ -33,11 +33,7 @@ import { getHotspots, type GraphBuildOptions, type SymbolGraph, - type SymbolNodeKind, } from "./graphs.js"; -import { analyzeImpactFromDiff } from "./impact/index.js"; -import type { CompactImpactReport, ImpactItem, ImpactOptions, ImpactReport, ChangedSymbol } from "./impact/types.js"; -import type { CandidateTestFile } from "./impact/context.js"; import { writeGraphSqlite, updateGraphSqlite } from "./sqlite.js"; import { isNativeTreeSitterAvailable, @@ -53,9 +49,11 @@ import { handleExplainCommand } from "./cli/explain.js"; import { handleGraphDeltaCommand } from "./cli/graphDelta.js"; import { handleGraphQueryCommand } from "./cli/graphQueries.js"; import { CLI_HELP_TEXT, helpTextForCommand, isKnownCliCommand } from "./cli/help.js"; +import { handleImpactCommand } from "./cli/impact.js"; import { handleMcpServeCommand } from "./cli/mcp.js"; import { isCliValueOption, parseCacheModeOption, parsePositiveIntegerOption } from "./cli/options.js"; import { getCodegraphPackageIdentity, getCodegraphVersion } from "./cli/packageInfo.js"; +import { handleReviewCommand } from "./cli/review.js"; import { handleSearchCommand } from "./cli/search.js"; import { handleSkillCommand } from "./cli/skill.js"; import { handleSqlCommand } from "./cli/sql.js"; @@ -804,306 +802,6 @@ function stabilizeSymbolGraph(graph: SymbolGraph): SymbolGraph { return { nodes: new Map(nodeEntries), edges }; } -const SYMBOL_NODE_KINDS: SymbolNodeKind[] = [ - "function", - "class", - "variable", - "interface", - "type", - "default", - "import", - "namespaceImport", -]; - -function symbolNodeKindFromString(kind?: string): SymbolNodeKind { - return kind && SYMBOL_NODE_KINDS.includes(kind as SymbolNodeKind) ? (kind as SymbolNodeKind) : "variable"; -} - -function ensureImpactReport(report: ImpactReport | CompactImpactReport): ImpactReport { - if (!("files" in report)) return report; - const files = report.files; - const resolveFilePath = (index: number): string => { - const file = files[index]; - if (!file) { - throw new Error(`Missing file path for index ${index} in compact impact report`); - } - return file; - }; - const resolveSurfaceArea = (surfaceArea: CompactImpactReport["surfaceArea"]) => ({ - files: surfaceArea.files.map((item) => ({ - file: resolveFilePath(item.file), - fanIn: item.fanIn, - fanOut: item.fanOut, - changed: item.changed, - impacted: item.impacted, - })), - topFanIn: surfaceArea.topFanIn.map((file) => resolveFilePath(file)), - topFanOut: surfaceArea.topFanOut.map((file) => resolveFilePath(file)), - }); - const changedFiles = report.changedFiles.map((cf) => ({ - file: resolveFilePath(cf.file), - hunks: cf.hunks, - })); - const changedSymbols = report.changedSymbols.map((cs) => { - const symbol: ChangedSymbol = { - id: cs.id, - file: resolveFilePath(cs.file), - name: cs.name, - kind: cs.kind, - exported: cs.exported, - range: cs.range, - ...(cs.typeOnly !== undefined ? { typeOnly: cs.typeOnly } : {}), - }; - return symbol; - }); - const impacted: ImpactItem[] = report.impacted.map((item) => { - const impact: ImpactItem = { - file: resolveFilePath(item.file), - symbols: item.symbols, - reasons: item.reasons, - severity: item.severity, - }; - if (item.depth !== undefined) impact.depth = item.depth; - if (item.typeOnly !== undefined) impact.typeOnly = item.typeOnly; - if (item.explain !== undefined) impact.explain = item.explain; - const maybeRefs = "refs" in item ? (item as { refs?: ImpactItem["refs"] }).refs : undefined; - if (maybeRefs !== undefined) impact.refs = maybeRefs; - return impact; - }); - const suggestions = report.suggestions?.map((suggestion) => ({ - file: resolveFilePath(suggestion.file), - kind: suggestion.kind, - ...(suggestion.range ? { range: suggestion.range } : {}), - ...(suggestion.symbol ? { symbol: suggestion.symbol } : {}), - ...(suggestion.relatedFile !== undefined ? { relatedFile: resolveFilePath(suggestion.relatedFile) } : {}), - ...(suggestion.details ? { details: suggestion.details } : {}), - confidence: suggestion.confidence, - })); - const exportSummary = report.exportSummary?.map((entry) => ({ - file: resolveFilePath(entry.file), - symbols: entry.symbols, - })); - const reexportChains = report.reexportChains - ? { - chains: report.reexportChains.chains.map((entry) => ({ - symbol: entry.symbol, - file: resolveFilePath(entry.file), - paths: entry.paths.map((pathChain) => pathChain.map((file) => resolveFilePath(file))), - })), - } - : undefined; - const topImpacts = report.topImpacts?.map((item) => ({ - file: resolveFilePath(item.file), - symbols: item.symbols, - reasons: item.reasons, - severity: item.severity, - ...(item.depth !== undefined ? { depth: item.depth } : {}), - ...(item.typeOnly !== undefined ? { typeOnly: item.typeOnly } : {}), - ...(item.explain ? { explain: item.explain } : {}), - })); - const clusters = report.clusters.map((cluster) => ({ - id: cluster.id, - files: cluster.files.map((file) => resolveFilePath(file)), - changedFiles: cluster.changedFiles.map((file) => resolveFilePath(file)), - totalSeverity: cluster.totalSeverity, - })); - const fileEdges = report.graph.fileEdges.map((edge) => ({ - from: resolveFilePath(edge.from), - to: resolveFilePath(edge.to), - ...(edge.typeOnly !== undefined ? { typeOnly: edge.typeOnly } : {}), - })); - const symbolEdges = report.graph.symbolEdges.map((edge) => ({ - from: edge.from, - to: edge.to, - label: edge.label, - })); - const result: ImpactReport = { - schemaVersion: report.schemaVersion, - format: "full", - changedFiles, - changedSymbols, - impacted, - ...(suggestions ? { suggestions } : {}), - ...(exportSummary ? { exportSummary } : {}), - ...(reexportChains ? { reexportChains } : {}), - ...(topImpacts ? { topImpacts } : {}), - surfaceArea: resolveSurfaceArea(report.surfaceArea), - clusters, - graph: { - fileEdges, - symbolEdges, - }, - }; - if (report.projectFiles) result.projectFiles = report.projectFiles; - if (report.warning) result.warning = report.warning; - return result; -} - -const IMPACT_REASON_LABELS: Record = { - directRef: "reason: direct reference", - namespaceMember: "reason: namespace member", - importAlias: "reason: import alias", - transitive: "reason: transitive dependency", - exportChain: "reason: export chain", - fileLevelChange: "reason: file-level change", -}; - -function formatImpactReasonLabel(item: Pick): string { - const primaryReason = item.explain?.reason ?? item.reasons[0]; - if (!primaryReason) return "reason: impact"; - return IMPACT_REASON_LABELS[primaryReason]; -} - -function formatImpactMermaid(report: ImpactReport, root: string): string { - const fileGraph: Graph = { nodes: new Set(), edges: [] }; - const ensureFileNode = (file: string) => fileGraph.nodes.add(file); - for (const cf of report.changedFiles) ensureFileNode(cf.file); - for (const item of report.impacted) ensureFileNode(item.file); - for (const symbol of report.changedSymbols) ensureFileNode(symbol.file); - for (const edge of report.graph.fileEdges) { - ensureFileNode(edge.from); - ensureFileNode(edge.to); - fileGraph.edges.push({ - from: edge.from, - to: { type: "file", path: edge.to }, - raw: "", - ...(edge.typeOnly ? { typeOnly: edge.typeOnly } : {}), - }); - } - - const symbolGraph: SymbolGraph = { nodes: new Map(), edges: [] }; - for (const sym of report.changedSymbols) { - symbolGraph.nodes.set(sym.id, { - id: sym.id, - file: sym.file, - name: sym.name, - kind: symbolNodeKindFromString(sym.kind), - }); - } - for (const edge of report.graph.symbolEdges) { - const fromSym = report.changedSymbols[edge.from]; - const toSym = report.changedSymbols[edge.to]; - if (!fromSym || !toSym) continue; - symbolGraph.edges.push({ - from: fromSym.id, - to: toSym.id, - ...(edge.label ? { label: edge.label } : {}), - }); - } - - return graphToMermaidSymbolsWithFiles(symbolGraph, fileGraph, root); -} - -function formatReviewSummary(report: Awaited>): string { - const lines: string[] = []; - const candidateCounts = countCandidateTestsByConfidence(report.candidateTests); - lines.push("Review Summary"); - lines.push("=============="); - lines.push(`Status: ${report.status}`); - lines.push(`Files changed: ${report.summary.filesChanged}`); - lines.push(`Symbols changed: ${report.summary.symbolsChanged}`); - lines.push( - `Candidate tests: ${report.summary.candidateTests} (high: ${candidateCounts.high}, medium: ${candidateCounts.medium}, low: ${candidateCounts.low})`, - ); - lines.push(`Risk: ${report.riskSummary.level} (${report.riskSummary.score})`); - if (report.riskSummary.signals.length) { - lines.push(`Signals: ${report.riskSummary.signals.join(", ")}`); - } - lines.push(""); - lines.push("Changed files:"); - if (!report.changedFiles.length) { - lines.push("- none"); - } else { - for (const file of report.changedFiles.slice(0, 20)) { - const symbolNames = file.symbols.slice(0, 5).map((symbol) => symbol.name); - const symbolSummary = symbolNames.length ? ` (${symbolNames.join(", ")})` : ""; - lines.push(`- ${file.file}: ${file.status}${symbolSummary}`); - } - const remainingFiles = report.changedFiles.length - 20; - if (remainingFiles > 0) { - lines.push(`- ... and ${remainingFiles} more`); - } - } - lines.push(""); - lines.push("Candidate tests:"); - if (!report.candidateTests.length) { - lines.push("- none"); - } else { - const listedCandidates = - appendCandidateTestGroup(lines, "High-confidence tests:", report.candidateTests, "high") + - appendCandidateTestGroup(lines, "Medium-confidence tests:", report.candidateTests, "medium"); - if (listedCandidates === 0) { - lines.push("No high- or medium-confidence test candidates found."); - } - appendLowConfidenceCandidateSummary(lines, candidateCounts.low); - } - lines.push(""); - lines.push("Review tasks:"); - if (!report.reviewTasks.length) { - lines.push("- none"); - } else { - for (const task of report.reviewTasks.slice(0, 8)) { - lines.push(`- ${task.id}: ${task.priority} - ${task.title} (${task.reason})`); - } - const remainingTasks = report.reviewTasks.length - 8; - if (remainingTasks > 0) { - lines.push(`- ... and ${remainingTasks} more`); - } - } - if (report.diagnostics) { - lines.push(""); - lines.push("Diagnostics:"); - lines.push(`- missing files: ${report.diagnostics.missingFiles.length}`); - lines.push(`- symbol mapping parse failures: ${report.diagnostics.symbolMappingParseFailures.length}`); - } - return `${lines.join("\n")}\n`; -} - -function countCandidateTestsByConfidence( - candidates: CandidateTestFile[], -): Record { - const counts: Record = { - high: 0, - medium: 0, - low: 0, - }; - for (const candidate of candidates) { - counts[candidate.confidence] += 1; - } - return counts; -} - -function appendCandidateTestGroup( - lines: string[], - title: string, - candidates: CandidateTestFile[], - confidence: CandidateTestFile["confidence"], -): number { - const matches = candidates.filter((candidate) => candidate.confidence === confidence); - if (!matches.length) return 0; - lines.push(title); - for (const candidate of matches.slice(0, 8)) { - lines.push(`- ${candidate.file}: ${candidate.reason}`); - } - const remaining = matches.length - 8; - if (remaining > 0) { - lines.push(`- ... and ${remaining} more`); - } - return matches.length; -} - -function appendLowConfidenceCandidateSummary(lines: string[], lowConfidenceCount: number): void { - if (lowConfidenceCount === 0) return; - lines.push(`Low-confidence pattern matches: ${lowConfidenceCount} available as breadth hints in full JSON.`); -} - -function parseReviewDepth(value: string): ReviewDepth | null { - if (value === "minimal" || value === "standard" || value === "deep") { - return value; - } - return null; -} - function parseNativeRuntimeMode(value: string | undefined): NativeRuntimeMode { if (value === undefined) return "auto"; if (value === "auto" || value === "on" || value === "off") { @@ -1112,18 +810,6 @@ function parseNativeRuntimeMode(value: string | undefined): NativeRuntimeMode { throw new Error(`Invalid --native value "${value}". Expected auto|on|off.`); } -type ImpactOptionsBuilder = Partial & { - base?: string; - head?: string; - cwd?: string; - pr?: number; - repo?: string; - diffText?: string; - threads?: number; - cache?: string; - cacheStrict?: boolean; -}; - async function runCliWithActiveRuntime(rawArgs: string[]) { const cmd = rawArgs[0] && !rawArgs[0].startsWith("-") ? rawArgs[0] : "graph"; const argTokens = rawArgs[0] && !rawArgs[0].startsWith("-") ? rawArgs.slice(1) : rawArgs; @@ -1940,253 +1626,58 @@ async function runCliWithActiveRuntime(rawArgs: string[]) { } if (cmd === "impact") { - const provider = getOpt("--provider") ?? "git"; - - if (provider !== "git" && provider !== "github" && provider !== "raw") { - throw new Error(`Unsupported provider: ${provider}`); - } - - const options: ImpactOptionsBuilder = { provider }; - - if (provider === "git") { - const base = getOpt("--base"); - const head = getOpt("--head"); - if (!base || !head) { - throw new Error( - "Impact provider 'git' requires --base and --head. Example: codegraph impact --provider git --base main --head HEAD", - ); - } - options.base = base; - options.head = head; - options.cwd = projectRootFs; - } else if (provider === "github") { - const pr = getOpt("--pr"); - const repo = getOpt("--repo"); - if (!pr || !repo) { - throw new Error( - "Impact provider 'github' requires --repo owner/name and --pr . Example: codegraph impact --provider github --repo acme/app --pr 42", - ); - } - options.pr = Number(pr); - if (!Number.isFinite(options.pr) || options.pr <= 0) { - throw new Error("Impact provider 'github' expects --pr as a positive integer."); - } - options.repo = repo; - } else if (provider === "raw") { - // For raw provider, diff text would come from stdin or file - // For now, assume stdin - const diffText = await new Promise((resolve) => { - let data = ""; - process.stdin.on("data", (chunk) => (data += chunk.toString())); - process.stdin.on("end", () => resolve(data)); - }); - options.diffText = diffText; - } - - // Parse other options - const threadsRaw = getOpt("--threads"); - const threads = threadsRaw ? Number(threadsRaw) : 0; - if (threadsRaw) options.threads = threads; - - const cache = parseCacheModeOption(getOpt("--cache")); - if (cache !== undefined) options.cache = cache; - - const cacheStrict = hasFlag("--cache-strict"); - if (cacheStrict) options.cacheStrict = true; - - if (hasFlag("--compact") || hasFlag("--compact-json")) options.compact = true; - - const maxRefs = getOpt("--max-refs"); - if (maxRefs) options.maxRefs = Number(maxRefs); - - const depth = getOpt("--depth"); - if (depth) options.depth = Number(depth); - - const includeTests = hasFlag("--include-tests"); - const membersOnly = hasFlag("--members-only"); - - const scope = getOpt("--scope"); - if (scope === "all" || scope === "imported") options.scope = scope; - - const refContext = getOpt("--ref-context"); - if (refContext) options.refContext = refContext as "line" | "block"; - - const refContextLines = getOpt("--ref-context-lines"); - if (refContextLines) options.refContextLines = Number(refContextLines); - - const refBlockMaxLines = getOpt("--ref-block-max-lines"); - if (refBlockMaxLines) options.refBlockMaxLines = Number(refBlockMaxLines); - - if (discoveryOptions.ignoreGlobs?.length) { - options.ignoreGlobs = discoveryOptions.ignoreGlobs; - } - - const verifyRefs = hasFlag("--verify-refs"); - if (verifyRefs) options.verifyReferences = true; - - const lcovPaths = parsed.options.get("--lcov"); - if (lcovPaths?.length) { - options.lcovPaths = lcovPaths; - options.testCoverageSuggestions = true; - } - - const coveragePaths = parsed.options.get("--coverage-report"); - if (coveragePaths?.length) { - options.coveragePaths = coveragePaths; - options.testCoverageSuggestions = true; - } - - const testCommandTemplate = getOpt("--test-command-template"); - if (testCommandTemplate) { - options.testCommandTemplate = testCommandTemplate; - options.testCoverageSuggestions = true; - } - - options.includeTests = includeTests; - options.membersOnly = membersOnly; - - const fastGraph = graphFlags.fast; - const resolveNodeModules = graphFlags.resolveNodeModules; - const dynamicImportHeuristics = graphFlags.dynamicImportHeuristics; - const resolutionHints = graphFlags.resolutionHints; - - const pretty = hasFlag("--pretty"); - const mermaid = hasFlag("--mermaid"); - - try { - const cacheMode = cache === "off" || cache === "memory" || cache === "disk" ? cache : undefined; - const indexOpts: BuildOptions = { - threads, - ...(nativeMode !== "auto" ? { native: nativeMode } : {}), - ...workerOpts, - ...(cacheMode !== undefined ? { cache: cacheMode } : {}), - ...(cacheStrict ? { cacheStrict: true } : {}), - }; - if (hasGraphOverrides) { - indexOpts.graph = { - fast: fastGraph, - resolveNodeModules, - dynamicImportHeuristics, - ...(resolutionHints.length ? { resolutionHints } : {}), - }; - } - const index = await buildProjectIndex(projectRootFs, { - ...indexOpts, - discovery: discoveryOptions, - onProgress: progressHandler, - }); - const report = await analyzeImpactFromDiff(projectRootFs, index, options as ImpactOptions); - const impactReport = ensureImpactReport(report); - - if (mermaid) { - writeStdoutLine(formatImpactMermaid(impactReport, projectRootFs)); - } else if (pretty) { - writeStdoutLine(`Impact Analysis Report`); - writeStdoutLine(`======================`); - if (impactReport.warning) { - writeStdoutLine(`WARNING: ${impactReport.warning}`); - writeStdoutLine(``); - } - writeStdoutLine(`Changed files: ${impactReport.changedFiles.length}`); - writeStdoutLine(`Changed symbols: ${impactReport.changedSymbols.length}`); - writeStdoutLine(`Impacted items: ${impactReport.impacted.length}`); - writeStdoutLine(``); - for (const item of impactReport.impacted.slice(0, 10)) { - const reasonLabel = formatImpactReasonLabel(item); - writeStdoutLine( - `${item.file}: ${item.symbols.join(", ")} (${reasonLabel}, severity: ${(item.severity * 100).toFixed(1)}%)`, - ); - if ("refs" in item && item.refs?.length) { - const contextsToShow = item.refs.slice(0, 2); - for (const ref of contextsToShow) { - writeStdoutLine(` Reference at ${ref.range.start.line}:${ref.range.start.column}:`); - const contextLines = ref.context!.split("\n").slice(0, 5); - for (const line of contextLines) { - writeStdoutLine(` ${line}`); - } - if (ref.context!.split("\n").length > 5) { - writeStdoutLine(` ...`); - } - } - if (item.refs.length > 2) { - writeStdoutLine(` ... and ${item.refs.length - 2} more references`); - } + await handleImpactCommand({ + projectRootFs, + discoveryOptions, + getOpt, + hasFlag, + parsedOptions: parsed.options, + nativeMode, + workerOpts, + graphOptions: hasGraphOverrides + ? { + fast: graphFlags.fast, + resolveNodeModules: graphFlags.resolveNodeModules, + dynamicImportHeuristics: graphFlags.dynamicImportHeuristics, + ...(graphFlags.resolutionHints.length ? { resolutionHints: graphFlags.resolutionHints } : {}), } - } - if (impactReport.impacted.length > 10) { - writeStdoutLine(`... and ${impactReport.impacted.length - 10} more`); - } - } else { - writeJSONLine(report); - } - } catch (error) { - writeStderrLine(`Impact analysis failed: ${error instanceof Error ? error.message : String(error)}`); - exitCli(1); - } + : undefined, + progressHandler, + readStdin: async () => + await new Promise((resolve) => { + let data = ""; + process.stdin.on("data", (chunk) => { + data += chunk.toString(); + }); + process.stdin.on("end", () => resolve(data)); + }), + writeJSONLine, + writeStdoutLine, + writeStderrLine, + exit: exitCli, + }); return; } // Review entry point: CLI workflow for review reports. if (cmd === "review") { const commandReport: CommandReport | undefined = reportEnabled ? { command: "review", timings: {} } : undefined; - const commandStart = performance.now(); - const base = getOpt("--base"); - const head = getOpt("--head"); - const changedSince = getOpt("--changed-since"); - const reviewDepthRaw = getOpt("--review-depth"); - const reviewDepth = reviewDepthRaw !== undefined ? parseReviewDepth(reviewDepthRaw) : null; - if (reviewDepthRaw !== undefined && !reviewDepth) { - writeStderrLine(`Invalid --review-depth value "${reviewDepthRaw}". Expected minimal|standard|deep.`); - exitCli(2); - } - const threadsRaw = getOpt("--threads"); - const threads = threadsRaw !== undefined ? Number(threadsRaw) : undefined; - const cache = parseCacheModeOption(getOpt("--cache")); - const cacheStrict = hasFlag("--cache-strict"); - const cacheVerify = hasFlag("--cache-verify"); - const incrementalStrict = hasFlag("--incremental-strict"); - const includeSymbolDetails = hasFlag("--include-symbol-details"); - const maxCallsitesRaw = getOpt("--max-callsites"); - const maxCallsites = maxCallsitesRaw !== undefined ? Number(maxCallsitesRaw) : undefined; - const maxTestsRaw = getOpt("--max-tests"); - const maxTests = maxTestsRaw !== undefined ? Number(maxTestsRaw) : undefined; - const reviewOpts: Parameters[1] = {}; - reviewOpts.discovery = discoveryOptions; - if (reviewDepth) reviewOpts.reviewDepth = reviewDepth; - if (base !== undefined) reviewOpts.gitBase = base; - if (head !== undefined) reviewOpts.gitHead = head; - if (changedSince !== undefined) reviewOpts.changedSince = changedSince; - if (threads !== undefined) reviewOpts.threads = threads; - if (cache === "off" || cache === "memory" || cache === "disk") { - reviewOpts.cache = cache; - } - if (nativeMode !== "auto") reviewOpts.native = nativeMode; - if (useNativeWorkers) reviewOpts.useNativeWorkers = true; - if (cacheStrict) reviewOpts.cacheStrict = true; - if (cacheVerify) reviewOpts.cacheVerify = true; - if (incrementalStrict) reviewOpts.incrementalStrict = true; - if (hasGraphOverrides) reviewOpts.graph = buildGraphOptions(); - if (includeSymbolDetails) { - reviewOpts.includeSymbolDetails = includeSymbolDetails; - } - if (maxCallsites !== undefined) reviewOpts.maxCallsites = maxCallsites; - if (maxTests !== undefined) reviewOpts.maxCandidates = maxTests; - if (commandReport) { - const reviewReport: ReviewBuildReport = { timings: {} }; - commandReport.review = reviewReport; - reviewOpts.report = reviewReport; - } - const report = await buildReviewReport(projectRootFs, reviewOpts); - if (hasFlag("--summary") || hasFlag("--pretty")) { - writeStdoutLine(formatReviewSummary(report).trimEnd()); - } else { - writeJSONLine(report); - } - if (commandReport) { - commandReport.timings.commandMs = Math.round(performance.now() - commandStart); - commandReport.timings.totalMs = commandReport.timings.commandMs; - await writeCommandReport(commandReport, reportFile); - } + await handleReviewCommand({ + projectRootFs, + discoveryOptions, + reportFile, + commandReport, + getOpt, + hasFlag, + nativeMode, + useNativeWorkers, + graphOptions: hasGraphOverrides ? buildGraphOptions() : undefined, + writeJSONLine, + writeStdoutLine, + writeStderrLine, + writeCommandReport, + exit: exitCli, + }); return; } diff --git a/src/cli/impact.ts b/src/cli/impact.ts new file mode 100644 index 00000000..d8b95bc7 --- /dev/null +++ b/src/cli/impact.ts @@ -0,0 +1,417 @@ +import { buildProjectIndex } from "../indexer.js"; +import type { BuildOptions } from "../indexer/types.js"; +import { + analyzeImpactFromDiff, + type ChangedSymbol, + type CompactImpactReport, + type ImpactItem, + type ImpactOptions, + type ImpactReport, +} from "../impact/index.js"; +import { graphToMermaidSymbolsWithFiles, type GraphBuildOptions, type SymbolGraph, type SymbolNodeKind } from "../graphs.js"; +import type { NativeRuntimeMode } from "../native/treeSitterNative.js"; +import type { Graph } from "../types.js"; +import type { ProjectFileDiscoveryOptions } from "../util.js"; +import { parseCacheModeOption } from "./options.js"; + +type ImpactOptionsBuilder = Partial & { + base?: string; + head?: string; + cwd?: string; + pr?: number; + repo?: string; + diffText?: string; + threads?: number; + cache?: BuildOptions["cache"]; + cacheStrict?: boolean; +}; + +export type ImpactCommandContext = { + projectRootFs: string; + discoveryOptions: ProjectFileDiscoveryOptions; + getOpt: (name: string) => string | undefined; + hasFlag: (name: string) => boolean; + parsedOptions: ReadonlyMap; + nativeMode: NativeRuntimeMode; + workerOpts: { useNativeWorkers: true } | Record; + graphOptions: GraphBuildOptions | undefined; + progressHandler: ((update: { current: number; total: number }) => void) | undefined; + readStdin: () => Promise; + writeJSONLine: (value: unknown) => void; + writeStdoutLine: (message: string) => void; + writeStderrLine: (message: string) => void; + exit: (code: number) => never; +}; + +const SYMBOL_NODE_KINDS: SymbolNodeKind[] = [ + "function", + "class", + "variable", + "interface", + "type", + "default", + "import", + "namespaceImport", +]; + +function symbolNodeKindFromString(kind?: string): SymbolNodeKind { + return kind && SYMBOL_NODE_KINDS.includes(kind as SymbolNodeKind) ? (kind as SymbolNodeKind) : "variable"; +} + +function ensureImpactReport(report: ImpactReport | CompactImpactReport): ImpactReport { + if (!("files" in report)) return report; + const files = report.files; + const resolveFilePath = (index: number): string => { + const file = files[index]; + if (!file) { + throw new Error(`Missing file path for index ${index} in compact impact report`); + } + return file; + }; + const resolveSurfaceArea = (surfaceArea: CompactImpactReport["surfaceArea"]) => ({ + files: surfaceArea.files.map((item) => ({ + file: resolveFilePath(item.file), + fanIn: item.fanIn, + fanOut: item.fanOut, + changed: item.changed, + impacted: item.impacted, + })), + topFanIn: surfaceArea.topFanIn.map((file) => resolveFilePath(file)), + topFanOut: surfaceArea.topFanOut.map((file) => resolveFilePath(file)), + }); + const changedFiles = report.changedFiles.map((cf) => ({ + file: resolveFilePath(cf.file), + hunks: cf.hunks, + })); + const changedSymbols = report.changedSymbols.map((cs) => { + const symbol: ChangedSymbol = { + id: cs.id, + file: resolveFilePath(cs.file), + name: cs.name, + kind: cs.kind, + exported: cs.exported, + range: cs.range, + ...(cs.typeOnly !== undefined ? { typeOnly: cs.typeOnly } : {}), + }; + return symbol; + }); + const impacted: ImpactItem[] = report.impacted.map((item) => { + const impact: ImpactItem = { + file: resolveFilePath(item.file), + symbols: item.symbols, + reasons: item.reasons, + severity: item.severity, + }; + if (item.depth !== undefined) impact.depth = item.depth; + if (item.typeOnly !== undefined) impact.typeOnly = item.typeOnly; + if (item.explain !== undefined) impact.explain = item.explain; + const maybeRefs = "refs" in item ? (item as { refs?: ImpactItem["refs"] }).refs : undefined; + if (maybeRefs !== undefined) impact.refs = maybeRefs; + return impact; + }); + const suggestions = report.suggestions?.map((suggestion) => ({ + file: resolveFilePath(suggestion.file), + kind: suggestion.kind, + ...(suggestion.range ? { range: suggestion.range } : {}), + ...(suggestion.symbol ? { symbol: suggestion.symbol } : {}), + ...(suggestion.relatedFile !== undefined ? { relatedFile: resolveFilePath(suggestion.relatedFile) } : {}), + ...(suggestion.details ? { details: suggestion.details } : {}), + confidence: suggestion.confidence, + })); + const exportSummary = report.exportSummary?.map((entry) => ({ + file: resolveFilePath(entry.file), + symbols: entry.symbols, + })); + const reexportChains = report.reexportChains + ? { + chains: report.reexportChains.chains.map((entry) => ({ + symbol: entry.symbol, + file: resolveFilePath(entry.file), + paths: entry.paths.map((pathChain) => pathChain.map((file) => resolveFilePath(file))), + })), + } + : undefined; + const topImpacts = report.topImpacts?.map((item) => ({ + file: resolveFilePath(item.file), + symbols: item.symbols, + reasons: item.reasons, + severity: item.severity, + ...(item.depth !== undefined ? { depth: item.depth } : {}), + ...(item.typeOnly !== undefined ? { typeOnly: item.typeOnly } : {}), + ...(item.explain ? { explain: item.explain } : {}), + })); + const clusters = report.clusters.map((cluster) => ({ + id: cluster.id, + files: cluster.files.map((file) => resolveFilePath(file)), + changedFiles: cluster.changedFiles.map((file) => resolveFilePath(file)), + totalSeverity: cluster.totalSeverity, + })); + const fileEdges = report.graph.fileEdges.map((edge) => ({ + from: resolveFilePath(edge.from), + to: resolveFilePath(edge.to), + ...(edge.typeOnly !== undefined ? { typeOnly: edge.typeOnly } : {}), + })); + const symbolEdges = report.graph.symbolEdges.map((edge) => ({ + from: edge.from, + to: edge.to, + label: edge.label, + })); + const result: ImpactReport = { + schemaVersion: report.schemaVersion, + format: "full", + changedFiles, + changedSymbols, + impacted, + ...(suggestions ? { suggestions } : {}), + ...(exportSummary ? { exportSummary } : {}), + ...(reexportChains ? { reexportChains } : {}), + ...(topImpacts ? { topImpacts } : {}), + surfaceArea: resolveSurfaceArea(report.surfaceArea), + clusters, + graph: { + fileEdges, + symbolEdges, + }, + }; + if (report.projectFiles) result.projectFiles = report.projectFiles; + if (report.warning) result.warning = report.warning; + return result; +} + +const IMPACT_REASON_LABELS: Record = { + directRef: "reason: direct reference", + namespaceMember: "reason: namespace member", + importAlias: "reason: import alias", + transitive: "reason: transitive dependency", + exportChain: "reason: export chain", + fileLevelChange: "reason: file-level change", +}; + +function formatImpactReasonLabel(item: Pick): string { + const primaryReason = item.explain?.reason ?? item.reasons[0]; + if (!primaryReason) return "reason: impact"; + return IMPACT_REASON_LABELS[primaryReason]; +} + +function formatImpactMermaid(report: ImpactReport, root: string): string { + const fileGraph: Graph = { nodes: new Set(), edges: [] }; + const ensureFileNode = (file: string) => fileGraph.nodes.add(file); + for (const cf of report.changedFiles) ensureFileNode(cf.file); + for (const item of report.impacted) ensureFileNode(item.file); + for (const symbol of report.changedSymbols) ensureFileNode(symbol.file); + for (const edge of report.graph.fileEdges) { + ensureFileNode(edge.from); + ensureFileNode(edge.to); + fileGraph.edges.push({ + from: edge.from, + to: { type: "file", path: edge.to }, + raw: "", + ...(edge.typeOnly ? { typeOnly: edge.typeOnly } : {}), + }); + } + + const symbolGraph: SymbolGraph = { nodes: new Map(), edges: [] }; + for (const sym of report.changedSymbols) { + symbolGraph.nodes.set(sym.id, { + id: sym.id, + file: sym.file, + name: sym.name, + kind: symbolNodeKindFromString(sym.kind), + }); + } + for (const edge of report.graph.symbolEdges) { + const fromSym = report.changedSymbols[edge.from]; + const toSym = report.changedSymbols[edge.to]; + if (!fromSym || !toSym) continue; + symbolGraph.edges.push({ + from: fromSym.id, + to: toSym.id, + ...(edge.label ? { label: edge.label } : {}), + }); + } + + return graphToMermaidSymbolsWithFiles(symbolGraph, fileGraph, root); +} + +function buildDiffProviderOptions(context: ImpactCommandContext): ImpactOptionsBuilder { + const provider = context.getOpt("--provider") ?? "git"; + if (provider !== "git" && provider !== "github" && provider !== "raw") { + throw new Error(`Unsupported provider: ${provider}`); + } + return { provider }; +} + +async function hydrateDiffProviderOptions( + context: ImpactCommandContext, + options: ImpactOptionsBuilder, +): Promise { + if (options.provider === "git") { + const base = context.getOpt("--base"); + const head = context.getOpt("--head"); + if (!base || !head) { + throw new Error( + "Impact provider 'git' requires --base and --head. Example: codegraph impact --provider git --base main --head HEAD", + ); + } + options.base = base; + options.head = head; + options.cwd = context.projectRootFs; + return; + } + + if (options.provider === "github") { + const pr = context.getOpt("--pr"); + const repo = context.getOpt("--repo"); + if (!pr || !repo) { + throw new Error( + "Impact provider 'github' requires --repo owner/name and --pr . Example: codegraph impact --provider github --repo acme/app --pr 42", + ); + } + options.pr = Number(pr); + if (!Number.isFinite(options.pr) || options.pr <= 0) { + throw new Error("Impact provider 'github' expects --pr as a positive integer."); + } + options.repo = repo; + return; + } + + options.diffText = await context.readStdin(); +} + +function applyAnalysisOptions(context: ImpactCommandContext, options: ImpactOptionsBuilder): void { + const threadsRaw = context.getOpt("--threads"); + const threads = threadsRaw ? Number(threadsRaw) : 0; + if (threadsRaw) options.threads = threads; + + const cache = parseCacheModeOption(context.getOpt("--cache")); + if (cache !== undefined) options.cache = cache; + + if (context.hasFlag("--cache-strict")) options.cacheStrict = true; + if (context.hasFlag("--compact") || context.hasFlag("--compact-json")) options.compact = true; + + const maxRefs = context.getOpt("--max-refs"); + if (maxRefs) options.maxRefs = Number(maxRefs); + + const depth = context.getOpt("--depth"); + if (depth) options.depth = Number(depth); + + const scope = context.getOpt("--scope"); + if (scope === "all" || scope === "imported") options.scope = scope; + + const refContext = context.getOpt("--ref-context"); + if (refContext) options.refContext = refContext as "line" | "block"; + + const refContextLines = context.getOpt("--ref-context-lines"); + if (refContextLines) options.refContextLines = Number(refContextLines); + + const refBlockMaxLines = context.getOpt("--ref-block-max-lines"); + if (refBlockMaxLines) options.refBlockMaxLines = Number(refBlockMaxLines); + + if (context.discoveryOptions.ignoreGlobs?.length) { + options.ignoreGlobs = context.discoveryOptions.ignoreGlobs; + } + + if (context.hasFlag("--verify-refs")) options.verifyReferences = true; + + const lcovPaths = context.parsedOptions.get("--lcov"); + if (lcovPaths?.length) { + options.lcovPaths = [...lcovPaths]; + options.testCoverageSuggestions = true; + } + + const coveragePaths = context.parsedOptions.get("--coverage-report"); + if (coveragePaths?.length) { + options.coveragePaths = [...coveragePaths]; + options.testCoverageSuggestions = true; + } + + const testCommandTemplate = context.getOpt("--test-command-template"); + if (testCommandTemplate) { + options.testCommandTemplate = testCommandTemplate; + options.testCoverageSuggestions = true; + } + + options.includeTests = context.hasFlag("--include-tests"); + options.membersOnly = context.hasFlag("--members-only"); +} + +function buildIndexOptions(context: ImpactCommandContext, options: ImpactOptionsBuilder): BuildOptions { + const cacheMode = options.cache === "off" || options.cache === "memory" || options.cache === "disk" ? options.cache : undefined; + const indexOpts: BuildOptions = { + threads: options.threads ?? 0, + discovery: context.discoveryOptions, + onProgress: context.progressHandler, + ...(context.nativeMode !== "auto" ? { native: context.nativeMode } : {}), + ...context.workerOpts, + ...(cacheMode !== undefined ? { cache: cacheMode } : {}), + ...(options.cacheStrict ? { cacheStrict: true } : {}), + }; + if (context.graphOptions) { + indexOpts.graph = context.graphOptions; + } + return indexOpts; +} + +function writePrettyImpactReport(context: ImpactCommandContext, impactReport: ImpactReport): void { + context.writeStdoutLine("Impact Analysis Report"); + context.writeStdoutLine("======================"); + if (impactReport.warning) { + context.writeStdoutLine(`WARNING: ${impactReport.warning}`); + context.writeStdoutLine(""); + } + context.writeStdoutLine(`Changed files: ${impactReport.changedFiles.length}`); + context.writeStdoutLine(`Changed symbols: ${impactReport.changedSymbols.length}`); + context.writeStdoutLine(`Impacted items: ${impactReport.impacted.length}`); + context.writeStdoutLine(""); + for (const item of impactReport.impacted.slice(0, 10)) { + const reasonLabel = formatImpactReasonLabel(item); + context.writeStdoutLine( + `${item.file}: ${item.symbols.join(", ")} (${reasonLabel}, severity: ${(item.severity * 100).toFixed(1)}%)`, + ); + if ("refs" in item && item.refs?.length) { + const contextsToShow = item.refs.slice(0, 2); + for (const ref of contextsToShow) { + context.writeStdoutLine(` Reference at ${ref.range.start.line}:${ref.range.start.column}:`); + const refContext = ref.context ?? ""; + const contextLines = refContext.split("\n").slice(0, 5); + for (const line of contextLines) { + context.writeStdoutLine(` ${line}`); + } + if (refContext.split("\n").length > 5) { + context.writeStdoutLine(" ..."); + } + } + if (item.refs.length > 2) { + context.writeStdoutLine(` ... and ${item.refs.length - 2} more references`); + } + } + } + if (impactReport.impacted.length > 10) { + context.writeStdoutLine(`... and ${impactReport.impacted.length - 10} more`); + } +} + +export async function handleImpactCommand(context: ImpactCommandContext): Promise { + const options = buildDiffProviderOptions(context); + await hydrateDiffProviderOptions(context, options); + applyAnalysisOptions(context, options); + + const pretty = context.hasFlag("--pretty"); + const mermaid = context.hasFlag("--mermaid"); + try { + const index = await buildProjectIndex(context.projectRootFs, buildIndexOptions(context, options)); + const report = await analyzeImpactFromDiff(context.projectRootFs, index, options as ImpactOptions); + const impactReport = ensureImpactReport(report); + + if (mermaid) { + context.writeStdoutLine(formatImpactMermaid(impactReport, context.projectRootFs)); + } else if (pretty) { + writePrettyImpactReport(context, impactReport); + } else { + context.writeJSONLine(report); + } + } catch (error) { + context.writeStderrLine(`Impact analysis failed: ${error instanceof Error ? error.message : String(error)}`); + context.exit(1); + } +} diff --git a/src/cli/review.ts b/src/cli/review.ts new file mode 100644 index 00000000..0029746f --- /dev/null +++ b/src/cli/review.ts @@ -0,0 +1,209 @@ +import { performance } from "node:perf_hooks"; +import { buildReviewReport, type ReviewBuildReport, type ReviewDepth } from "../review.js"; +import type { CandidateTestFile } from "../impact/context.js"; +import type { BuildReport } from "../indexer/types.js"; +import type { GraphBuildOptions } from "../graphs.js"; +import type { NativeRuntimeMode } from "../native/treeSitterNative.js"; +import type { ProjectFileDiscoveryOptions } from "../util.js"; +import { parseCacheModeOption } from "./options.js"; + +type CommandTimingReport = { + totalMs?: number; + resolveFilesMs?: number; + commandMs?: number; +}; + +type ReviewCommandReport = { + command: string; + timings: CommandTimingReport; + index?: BuildReport; + review?: ReviewBuildReport; +}; + +export type ReviewCommandContext = { + projectRootFs: string; + discoveryOptions: ProjectFileDiscoveryOptions; + reportFile: string | undefined; + commandReport: ReviewCommandReport | undefined; + getOpt: (name: string) => string | undefined; + hasFlag: (name: string) => boolean; + nativeMode: NativeRuntimeMode; + useNativeWorkers: boolean; + graphOptions: GraphBuildOptions | undefined; + writeJSONLine: (value: unknown) => void; + writeStdoutLine: (message: string) => void; + writeStderrLine: (message: string) => void; + writeCommandReport: (report: ReviewCommandReport, reportFile: string | undefined) => Promise; + exit: (code: number) => never; +}; + +function parseReviewDepth(value: string): ReviewDepth | null { + if (value === "minimal" || value === "standard" || value === "deep") { + return value; + } + return null; +} + +function countCandidateTestsByConfidence( + candidates: CandidateTestFile[], +): Record { + const counts: Record = { + high: 0, + medium: 0, + low: 0, + }; + for (const candidate of candidates) { + counts[candidate.confidence] += 1; + } + return counts; +} + +function appendCandidateTestGroup( + lines: string[], + title: string, + candidates: CandidateTestFile[], + confidence: CandidateTestFile["confidence"], +): number { + const matches = candidates.filter((candidate) => candidate.confidence === confidence); + if (!matches.length) return 0; + lines.push(title); + for (const candidate of matches.slice(0, 8)) { + lines.push(`- ${candidate.file}: ${candidate.reason}`); + } + const remaining = matches.length - 8; + if (remaining > 0) { + lines.push(`- ... and ${remaining} more`); + } + return matches.length; +} + +function appendLowConfidenceCandidateSummary(lines: string[], lowConfidenceCount: number): void { + if (lowConfidenceCount === 0) return; + lines.push(`Low-confidence pattern matches: ${lowConfidenceCount} available as breadth hints in full JSON.`); +} + +function formatReviewSummary(report: Awaited>): string { + const lines: string[] = []; + const candidateCounts = countCandidateTestsByConfidence(report.candidateTests); + lines.push("Review Summary"); + lines.push("=============="); + lines.push(`Status: ${report.status}`); + lines.push(`Files changed: ${report.summary.filesChanged}`); + lines.push(`Symbols changed: ${report.summary.symbolsChanged}`); + lines.push( + `Candidate tests: ${report.summary.candidateTests} (high: ${candidateCounts.high}, medium: ${candidateCounts.medium}, low: ${candidateCounts.low})`, + ); + lines.push(`Risk: ${report.riskSummary.level} (${report.riskSummary.score})`); + if (report.riskSummary.signals.length) { + lines.push(`Signals: ${report.riskSummary.signals.join(", ")}`); + } + lines.push(""); + lines.push("Changed files:"); + if (!report.changedFiles.length) { + lines.push("- none"); + } else { + for (const file of report.changedFiles.slice(0, 20)) { + const symbolNames = file.symbols.slice(0, 5).map((symbol) => symbol.name); + const symbolSummary = symbolNames.length ? ` (${symbolNames.join(", ")})` : ""; + lines.push(`- ${file.file}: ${file.status}${symbolSummary}`); + } + const remainingFiles = report.changedFiles.length - 20; + if (remainingFiles > 0) { + lines.push(`- ... and ${remainingFiles} more`); + } + } + lines.push(""); + lines.push("Candidate tests:"); + if (!report.candidateTests.length) { + lines.push("- none"); + } else { + const listedCandidates = + appendCandidateTestGroup(lines, "High-confidence tests:", report.candidateTests, "high") + + appendCandidateTestGroup(lines, "Medium-confidence tests:", report.candidateTests, "medium"); + if (listedCandidates === 0) { + lines.push("No high- or medium-confidence test candidates found."); + } + appendLowConfidenceCandidateSummary(lines, candidateCounts.low); + } + lines.push(""); + lines.push("Review tasks:"); + if (!report.reviewTasks.length) { + lines.push("- none"); + } else { + for (const task of report.reviewTasks.slice(0, 8)) { + lines.push(`- ${task.id}: ${task.priority} - ${task.title} (${task.reason})`); + } + const remainingTasks = report.reviewTasks.length - 8; + if (remainingTasks > 0) { + lines.push(`- ... and ${remainingTasks} more`); + } + } + if (report.diagnostics) { + lines.push(""); + lines.push("Diagnostics:"); + lines.push(`- missing files: ${report.diagnostics.missingFiles.length}`); + lines.push(`- symbol mapping parse failures: ${report.diagnostics.symbolMappingParseFailures.length}`); + } + return `${lines.join("\n")}\n`; +} + +export async function handleReviewCommand(context: ReviewCommandContext): Promise { + const commandStart = performance.now(); + const base = context.getOpt("--base"); + const head = context.getOpt("--head"); + const changedSince = context.getOpt("--changed-since"); + const reviewDepthRaw = context.getOpt("--review-depth"); + const reviewDepth = reviewDepthRaw !== undefined ? parseReviewDepth(reviewDepthRaw) : null; + if (reviewDepthRaw !== undefined && !reviewDepth) { + context.writeStderrLine(`Invalid --review-depth value "${reviewDepthRaw}". Expected minimal|standard|deep.`); + context.exit(2); + } + const threadsRaw = context.getOpt("--threads"); + const threads = threadsRaw !== undefined ? Number(threadsRaw) : undefined; + const cache = parseCacheModeOption(context.getOpt("--cache")); + const cacheStrict = context.hasFlag("--cache-strict"); + const cacheVerify = context.hasFlag("--cache-verify"); + const incrementalStrict = context.hasFlag("--incremental-strict"); + const includeSymbolDetails = context.hasFlag("--include-symbol-details"); + const maxCallsitesRaw = context.getOpt("--max-callsites"); + const maxCallsites = maxCallsitesRaw !== undefined ? Number(maxCallsitesRaw) : undefined; + const maxTestsRaw = context.getOpt("--max-tests"); + const maxTests = maxTestsRaw !== undefined ? Number(maxTestsRaw) : undefined; + const reviewOpts: Parameters[1] = {}; + reviewOpts.discovery = context.discoveryOptions; + if (reviewDepth) reviewOpts.reviewDepth = reviewDepth; + if (base !== undefined) reviewOpts.gitBase = base; + if (head !== undefined) reviewOpts.gitHead = head; + if (changedSince !== undefined) reviewOpts.changedSince = changedSince; + if (threads !== undefined) reviewOpts.threads = threads; + if (cache === "off" || cache === "memory" || cache === "disk") { + reviewOpts.cache = cache; + } + if (context.nativeMode !== "auto") reviewOpts.native = context.nativeMode; + if (context.useNativeWorkers) reviewOpts.useNativeWorkers = true; + if (cacheStrict) reviewOpts.cacheStrict = true; + if (cacheVerify) reviewOpts.cacheVerify = true; + if (incrementalStrict) reviewOpts.incrementalStrict = true; + if (context.graphOptions) reviewOpts.graph = context.graphOptions; + if (includeSymbolDetails) { + reviewOpts.includeSymbolDetails = includeSymbolDetails; + } + if (maxCallsites !== undefined) reviewOpts.maxCallsites = maxCallsites; + if (maxTests !== undefined) reviewOpts.maxCandidates = maxTests; + if (context.commandReport) { + const reviewReport: ReviewBuildReport = { timings: {} }; + context.commandReport.review = reviewReport; + reviewOpts.report = reviewReport; + } + const report = await buildReviewReport(context.projectRootFs, reviewOpts); + if (context.hasFlag("--summary") || context.hasFlag("--pretty")) { + context.writeStdoutLine(formatReviewSummary(report).trimEnd()); + } else { + context.writeJSONLine(report); + } + if (context.commandReport) { + context.commandReport.timings.commandMs = Math.round(performance.now() - commandStart); + context.commandReport.timings.totalMs = context.commandReport.timings.commandMs; + await context.writeCommandReport(context.commandReport, context.reportFile); + } +} diff --git a/src/documentLinks.ts b/src/documentLinks.ts index 504bf863..02274d6d 100644 --- a/src/documentLinks.ts +++ b/src/documentLinks.ts @@ -1,64 +1,20 @@ -import path from "node:path"; -import { extractJsTsSpecifiers, type ModuleSpecifier } from "./util.js"; +import type { ModuleSpecifier } from "./util.js"; +import { extractAsciidocModuleSpecifiers } from "./documentLinks/asciidoc.js"; +import { extractHtmlAttributeSpecifiers, extractHtmlInlineScriptSpecifiers } from "./documentLinks/html.js"; +import { extractMarkdownModuleSpecifiers, extractMdxModuleSpecifiers } from "./documentLinks/markdown.js"; +import { extractRstModuleSpecifiers } from "./documentLinks/rst.js"; +import { extractAstroModuleSpecifiers, extractHandlebarsModuleSpecifiers } from "./documentLinks/sfc.js"; + +export { extractAsciidocModuleSpecifiers } from "./documentLinks/asciidoc.js"; +export { extractHtmlAttributeSpecifiers, extractHtmlInlineScriptSpecifiers } from "./documentLinks/html.js"; +export { extractMarkdownModuleSpecifiers, extractMdxModuleSpecifiers } from "./documentLinks/markdown.js"; +export { extractRstModuleSpecifiers } from "./documentLinks/rst.js"; +export { extractAstroModuleSpecifiers, extractHandlebarsModuleSpecifiers } from "./documentLinks/sfc.js"; export const GRAPH_ONLY_LANGUAGE_IDS = new Set(["markdown", "mdx", "astro", "hbs", "rst", "adoc"]); const GRAPH_ONLY_ALIAS_LANGUAGE_IDS = new Set(["mdx", "astro"]); -const DEFAULT_HTML_TAG_ATTRS: Record = { - script: ["src"], - link: ["href"], - a: ["href"], - img: ["src", "srcset"], - source: ["src", "srcset"], - video: ["src"], - audio: ["src"], - iframe: ["src"], - track: ["src"], -}; - -const HTML_TAG_RE = /<(script|link|a|img|source|video|audio|iframe|track)\b([^>]*)>/gi; - -const DOCUMENT_RELATIVE_EXTENSIONS = new Set([ - ".md", - ".mdx", - ".astro", - ".hbs", - ".handlebars", - ".rst", - ".adoc", - ".asciidoc", - ".html", - ".htm", - ".css", - ".scss", - ".less", - ".js", - ".jsx", - ".ts", - ".tsx", - ".mts", - ".cts", - ".mjs", - ".cjs", - ".json", - ".svg", - ".png", - ".jpg", - ".jpeg", - ".gif", - ".webp", - ".avif", - ".mp4", - ".webm", - ".mp3", - ".wav", - ".ogg", - ".txt", - ".yaml", - ".yml", -]); - export function isGraphOnlyLanguage(languageId: string): boolean { return GRAPH_ONLY_LANGUAGE_IDS.has(languageId); } @@ -79,60 +35,6 @@ export function graphOnlySpecifierNeedsResolutionConfig(specifier: string): bool ); } -export function extractHtmlInlineScriptSpecifiers(source: string): ModuleSpecifier[] { - const out: ModuleSpecifier[] = []; - const inlineScriptRe = /]*)>([\s\S]*?)<\/script>/gi; - for (const match of source.matchAll(inlineScriptRe)) { - const attrs = match[1] ?? ""; - if (/\bsrc\s*=\s*["'][^"']+["']/i.test(attrs)) continue; - const body = match[2] ?? ""; - if (!body.trim()) continue; - out.push(...markResolutionKind(extractJsTsSpecifiers(body), "source")); - } - return dedupeModuleSpecifiers(out); -} - -export function extractHtmlAttributeSpecifiers( - source: string, - tagAttrNames: Record = DEFAULT_HTML_TAG_ATTRS, -): ModuleSpecifier[] { - const out: ModuleSpecifier[] = []; - - for (const match of source.matchAll(HTML_TAG_RE)) { - const tag = (match[1] ?? "").toLowerCase(); - const attrs = match[2] ?? ""; - const attrNames = tagAttrNames[tag] ?? []; - - for (const attrName of attrNames) { - const attrRe = new RegExp(`(?:^|\\s)${attrName}\\s*=\\s*(?:"([^"]+)"|'([^']+)'|([^\\s"'=<>\\x60]+))`, "i"); - const attrMatch = attrs.match(attrRe); - const raw = (attrMatch?.[1] ?? attrMatch?.[2] ?? attrMatch?.[3])?.trim(); - if (!raw) continue; - if (attrName === "srcset") { - const candidates = raw - .split(",") - .map((entry) => entry.trim().split(/\s+/)[0]?.trim()) - .filter((entry): entry is string => !!entry); - for (const spec of candidates) { - const normalized = normalizeLinkSpecifier(spec, { - preferRelative: true, - resolutionKind: "document", - }); - if (normalized) out.push(normalized); - } - continue; - } - const normalized = normalizeLinkSpecifier(raw, { - preferRelative: true, - resolutionKind: "document", - }); - if (normalized) out.push(normalized); - } - } - - return dedupeModuleSpecifiers(out); -} - export function extractGraphOnlyModuleSpecifiers(languageId: string, source: string): ModuleSpecifier[] { if (languageId === "markdown") { return extractMarkdownModuleSpecifiers(source); @@ -154,539 +56,3 @@ export function extractGraphOnlyModuleSpecifiers(languageId: string, source: str } return []; } - -export function extractMarkdownModuleSpecifiers(source: string): ModuleSpecifier[] { - const sanitized = stripMarkdownCode(source); - return extractMarkdownModuleSpecifiersFromSanitized(sanitized); -} - -function extractMarkdownModuleSpecifiersFromSanitized(sanitized: string): ModuleSpecifier[] { - const referenceDefs = collectMarkdownReferenceDefinitions(sanitized); - const out: ModuleSpecifier[] = []; - - for (const destination of collectMarkdownInlineLinkDestinations(sanitized)) { - const normalized = normalizeLinkSpecifier(destination, { - preferRelative: true, - resolutionKind: "document", - }); - if (normalized) out.push(normalized); - } - - for (const match of sanitized.matchAll(/!?\[([^\]]+)\]\[([^\]]*)\]/g)) { - const fullMatch = match[0] ?? ""; - if (fullMatch.startsWith("!")) continue; - const text = match[1]?.trim(); - const label = match[2]?.trim(); - const resolvedLabel = normalizeReferenceLabel(label || text); - if (!resolvedLabel) continue; - const destination = referenceDefs.get(resolvedLabel); - if (!destination) continue; - out.push(destination); - } - - for (const match of sanitized.matchAll(/<([^>\s]+)>/g)) { - const candidate = match[1]?.trim(); - if (!candidate) continue; - if (candidate.startsWith("/") || candidate.startsWith("?")) continue; - if (!isLikelyMarkdownAutolinkTarget(candidate)) continue; - const normalized = normalizeLinkSpecifier(candidate, { - preferRelative: true, - resolutionKind: "document", - }); - if (normalized) out.push(normalized); - } - - out.push( - ...extractHtmlAttributeSpecifiers(sanitized, { - a: ["href"], - }), - ); - - return dedupeModuleSpecifiers(out); -} - -export function extractMdxModuleSpecifiers(source: string): ModuleSpecifier[] { - const sanitized = stripMarkdownCode(source); - const out = extractMarkdownModuleSpecifiersFromSanitized(sanitized); - out.push(...markResolutionKind(extractJsTsSpecifiers(sanitized), "source")); - return dedupeModuleSpecifiers(out); -} - -export function extractAstroModuleSpecifiers(source: string): ModuleSpecifier[] { - const out: ModuleSpecifier[] = []; - out.push(...extractHtmlAttributeSpecifiers(source)); - out.push(...extractHtmlInlineScriptSpecifiers(source)); - - const frontmatterMatch = source.match(/^---\r?\n([\s\S]*?)\r?\n---/); - if (frontmatterMatch?.[1]) { - out.push(...markResolutionKind(extractJsTsSpecifiers(frontmatterMatch[1]), "source")); - } - - return dedupeModuleSpecifiers(out); -} - -export function extractHandlebarsModuleSpecifiers(source: string): ModuleSpecifier[] { - const out: ModuleSpecifier[] = []; - out.push(...extractHtmlAttributeSpecifiers(source)); - - for (const match of source.matchAll(/\{\{\s*>\s*(?:"([^"]+)"|'([^']+)'|([^\s}]+))/g)) { - const rawSpecifier = match[1] ?? match[2] ?? match[3]; - if (!rawSpecifier) continue; - const normalized = normalizeLinkSpecifier(rawSpecifier, { - preferRelative: true, - resolutionKind: "document", - }); - if (normalized) out.push(normalized); - } - - return dedupeModuleSpecifiers(out); -} - -export function extractRstModuleSpecifiers(source: string): ModuleSpecifier[] { - const out: ModuleSpecifier[] = []; - const namedTargets = collectRstTargetDefinitions(source); - - for (const match of source.matchAll(/`[^`<\n]*<([^>\n]+)>`_/g)) { - const rawSpecifier = match[1]?.trim(); - if (!rawSpecifier) continue; - const normalized = normalizeLinkSpecifier(rawSpecifier, { - preferRelative: true, - resolutionKind: "document", - forceRelative: true, - }); - if (normalized) out.push(normalized); - } - - for (const match of source.matchAll(/`([^`\n]+)`_/g)) { - const label = normalizeReferenceLabel(match[1]); - if (!label) continue; - const normalized = namedTargets.get(label); - if (normalized) out.push(normalized); - } - - for (const match of source.matchAll(/^\s*\.\.\s+include::\s+([^\s]+)\s*$/gm)) { - const rawSpecifier = match[1]?.trim(); - if (!rawSpecifier) continue; - const normalized = normalizeLinkSpecifier(rawSpecifier, { - preferRelative: true, - resolutionKind: "document", - forceRelative: true, - }); - if (normalized) out.push(normalized); - } - - out.push(...extractRstToctreeSpecifiers(source)); - - return dedupeModuleSpecifiers(out); -} - -export function extractAsciidocModuleSpecifiers(source: string): ModuleSpecifier[] { - const out: ModuleSpecifier[] = []; - - for (const match of source.matchAll(/\b(xref|link):([^\[\s]+)\[[^\]]*]/g)) { - const directive = (match[1] ?? "").toLowerCase(); - const rawSpecifier = match[2]?.trim(); - if (!rawSpecifier) continue; - const fileLikeTarget = - directive === "xref" ? isLikelyAsciidocXrefTarget(rawSpecifier) : isLikelyAsciidocFileTarget(rawSpecifier); - if (!fileLikeTarget) continue; - const ambiguousXrefTarget = directive === "xref" && isAmbiguousAsciidocXrefTarget(rawSpecifier); - const normalized = normalizeLinkSpecifier(rawSpecifier, { - preferRelative: true, - resolutionKind: "document", - forceRelative: true, - }); - if (normalized) { - out.push(ambiguousXrefTarget ? { ...normalized, dropIfUnresolved: true } : normalized); - } - } - - for (const match of source.matchAll(/\binclude::([^\[\n]+)\[[^\]]*]/g)) { - const rawSpecifier = match[1]?.trim(); - if (!rawSpecifier) continue; - const normalized = normalizeLinkSpecifier(rawSpecifier, { - preferRelative: true, - resolutionKind: "document", - forceRelative: true, - }); - if (normalized) out.push(normalized); - } - - for (const match of source.matchAll(/<<([^>,]+)(?:,[^>]*)?>>/g)) { - const rawSpecifier = match[1]?.trim(); - if (!rawSpecifier) continue; - if (!isLikelyAsciidocFileTarget(rawSpecifier)) continue; - const normalized = normalizeLinkSpecifier(rawSpecifier, { - preferRelative: true, - resolutionKind: "document", - forceRelative: true, - }); - if (normalized) out.push(normalized); - } - - out.push( - ...extractHtmlAttributeSpecifiers(source, { - a: ["href"], - }), - ); - - return dedupeModuleSpecifiers(out); -} - -function dedupeModuleSpecifiers(entries: ModuleSpecifier[]): ModuleSpecifier[] { - const out: ModuleSpecifier[] = []; - const seen = new Set(); - for (const entry of entries) { - const key = `${entry.spec}::${entry.typeOnly ? 1 : 0}::${entry.resolutionKind ?? ""}::${entry.dropIfUnresolved ? 1 : 0}`; - if (seen.has(key)) continue; - seen.add(key); - out.push(entry); - } - return out; -} - -function collectMarkdownReferenceDefinitions(source: string): Map { - const out = new Map(); - const definitionRe = /^\s{0,3}\[([^\]]+)\]:\s*(<[^>\n]+>|[^ \t\n]+)(?:[ \t]+(?:"[^"]*"|'[^']*'|\([^)]*\)))?\s*$/gm; - - for (const match of source.matchAll(definitionRe)) { - const label = normalizeReferenceLabel(match[1]); - const rawDestination = match[2]; - if (!label || !rawDestination) continue; - const normalized = normalizeLinkSpecifier(rawDestination, { - preferRelative: true, - resolutionKind: "document", - }); - if (normalized) out.set(label, normalized); - } - - return out; -} - -function collectRstTargetDefinitions(source: string): Map { - const out = new Map(); - const definitionRe = /^\s*\.\.\s+_([^:]+):\s*(\S+)\s*$/gm; - - for (const match of source.matchAll(definitionRe)) { - const label = normalizeReferenceLabel(match[1]); - const rawSpecifier = match[2]; - if (!label || !rawSpecifier) continue; - const normalized = normalizeLinkSpecifier(rawSpecifier, { - preferRelative: true, - resolutionKind: "document", - forceRelative: true, - }); - if (normalized) out.set(label, normalized); - } - - return out; -} - -function extractRstToctreeSpecifiers(source: string): ModuleSpecifier[] { - const out: ModuleSpecifier[] = []; - const lines = source.split(/\r?\n/); - let inToctree = false; - - for (const line of lines) { - if (/^\s*\.\.\s+toctree::\s*$/.test(line)) { - inToctree = true; - continue; - } - - if (!inToctree) continue; - - if (!line.trim()) { - continue; - } - - const indentMatch = line.match(/^(\s+)(.+)$/); - if (!indentMatch) { - inToctree = false; - continue; - } - - const content = indentMatch[2]?.trim(); - if (!content || content.startsWith(":")) { - continue; - } - - const titledMatch = content.match(/<([^>]+)>/); - const rawSpecifier = titledMatch?.[1]?.trim() ?? content; - const normalized = normalizeLinkSpecifier(rawSpecifier, { - preferRelative: true, - resolutionKind: "document", - forceRelative: true, - }); - if (normalized) out.push(normalized); - } - - return out; -} - -function normalizeReferenceLabel(label: string | undefined): string | null { - const normalized = label?.trim().replace(/\s+/g, " ").toLowerCase(); - return normalized ? normalized : null; -} - -function collectMarkdownInlineLinkDestinations(source: string): string[] { - const out: string[] = []; - - for (let index = 0; index < source.length; index += 1) { - if (source[index] !== "[") continue; - if (source[index - 1] === "!") continue; - - const labelEnd = findMarkdownLabelEnd(source, index + 1); - if (labelEnd < 0 || source[labelEnd + 1] !== "(") continue; - - const parsed = parseMarkdownInlineLink(source, labelEnd + 2); - if (!parsed) continue; - - out.push(extractMarkdownDestination(parsed.destination)); - index = parsed.endIndex; - } - - return out; -} - -function extractMarkdownDestination(rawDestination: string): string { - const trimmed = rawDestination.trim(); - if (!trimmed) return trimmed; - if (trimmed.startsWith("<")) { - const endIndex = trimmed.indexOf(">"); - if (endIndex > 0) return trimmed.slice(0, endIndex + 1); - } - const whitespaceIndex = trimmed.search(/\s/); - return whitespaceIndex >= 0 ? trimmed.slice(0, whitespaceIndex) : trimmed; -} - -function normalizeLinkSpecifier( - rawSpecifier: string, - opts?: { - preferRelative?: boolean; - forceRelative?: boolean; - resolutionKind?: "document" | "source"; - }, -): ModuleSpecifier | null { - const original = rawSpecifier.trim(); - if (!original) return null; - - let normalized = original; - if (normalized.startsWith("<") && normalized.endsWith(">")) { - normalized = normalized.slice(1, -1).trim(); - } - if (!normalized || normalized.startsWith("#")) return null; - if (isObviouslyDynamicSpecifier(normalized)) return null; - - const hasSchemePrefix = /^[A-Za-z][A-Za-z0-9+.-]*:/.test(normalized); - const isWindowsAbsolutePath = /^[A-Za-z]:[\\/]/.test(normalized); - const isProtocolRelative = normalized.startsWith("//"); - - if (!hasSchemePrefix && !isProtocolRelative && !isWindowsAbsolutePath) { - const hashIndex = normalized.indexOf("#"); - if (hashIndex >= 0) normalized = normalized.slice(0, hashIndex); - const queryIndex = normalized.indexOf("?"); - if (queryIndex >= 0) normalized = normalized.slice(0, queryIndex); - } - - normalized = normalized.trim(); - if (!normalized) return null; - - if (opts?.forceRelative && shouldForceRelativePath(normalized)) { - normalized = `./${normalized}`; - } else if (opts?.preferRelative && shouldPreferRelativePath(normalized)) { - normalized = `./${normalized}`; - } - - if (normalized === original) { - return { - spec: normalized, - ...(opts?.resolutionKind ? { resolutionKind: opts.resolutionKind } : {}), - }; - } - return { - spec: normalized, - raw: original, - ...(opts?.resolutionKind ? { resolutionKind: opts.resolutionKind } : {}), - }; -} - -function markResolutionKind(entries: ModuleSpecifier[], resolutionKind: "document" | "source"): ModuleSpecifier[] { - return entries.map((entry) => ({ - ...entry, - resolutionKind, - })); -} - -function shouldForceRelativePath(specifier: string): boolean { - if ( - specifier.startsWith(".") || - specifier.startsWith("/") || - specifier.startsWith("#") || - specifier.startsWith("@") || - specifier.startsWith("//") - ) { - return false; - } - if (/^[A-Za-z][A-Za-z0-9+.-]*:/.test(specifier)) return false; - if (/^[A-Za-z]:[\\/]/.test(specifier)) return false; - return true; -} - -function shouldPreferRelativePath(specifier: string): boolean { - if ( - specifier.startsWith(".") || - specifier.startsWith("/") || - specifier.startsWith("#") || - specifier.startsWith("@") || - specifier.startsWith("//") - ) { - return false; - } - if (/^[A-Za-z][A-Za-z0-9+.-]*:/.test(specifier)) return false; - if (/^[A-Za-z]:[\\/]/.test(specifier)) return false; - if (specifier.includes("/")) { - const firstSegment = specifier.split(/[\\/]/, 1)[0] ?? ""; - if (/^[A-Za-z0-9-]+(?:\.[A-Za-z0-9-]+)*\.[A-Za-z]{2,}$/i.test(firstSegment)) { - return false; - } - return true; - } - - const ext = path.extname(specifier).toLowerCase(); - return DOCUMENT_RELATIVE_EXTENSIONS.has(ext); -} - -function findMarkdownLabelEnd(source: string, openIndex: number): number { - let depth = 0; - for (let index = openIndex; index < source.length; index += 1) { - const char = source.charAt(index); - if (char === "\\") { - index += 1; - continue; - } - if (char === "[") { - depth += 1; - continue; - } - if (char !== "]") continue; - if (depth === 0) return index; - depth -= 1; - } - return -1; -} - -function parseMarkdownInlineLink(source: string, startIndex: number): { destination: string; endIndex: number } | null { - let depth = 1; - let destinationEnd = -1; - let quote: '"' | "'" | null = null; - let sawDestinationStart = false; - - for (let index = startIndex; index < source.length; index += 1) { - const char = source.charAt(index); - if (char === "\n") return null; - if (char === "\\") { - index += 1; - continue; - } - - if (!sawDestinationStart) { - if (/\s/.test(char)) continue; - sawDestinationStart = true; - } - - if (destinationEnd >= 0) { - if (quote) { - if (char === quote) quote = null; - continue; - } - if (char === '"' || char === "'") { - quote = char; - continue; - } - } - - if (char === "(") { - depth += 1; - continue; - } - - if (char === ")") { - depth -= 1; - if (depth !== 0) continue; - const destination = source.slice(startIndex, destinationEnd >= 0 ? destinationEnd : index).trim(); - return destination ? { destination, endIndex: index } : null; - } - - if (destinationEnd < 0 && /\s/.test(char) && depth === 1) { - destinationEnd = index; - } - } - - return null; -} - -function isLikelyMarkdownAutolinkTarget(candidate: string): boolean { - if (/^[A-Za-z][A-Za-z0-9+.-]*:/.test(candidate)) return true; - if (candidate.startsWith("//")) return true; - if (candidate.startsWith("./") || candidate.startsWith("../")) return true; - if (candidate.startsWith("/") || candidate.startsWith("\\")) return true; - if (/^[^\s/@]+@[^\s/@]+\.[^\s/@]+$/.test(candidate)) return false; - if (/^[A-Za-z][A-Za-z0-9:_-]*\/?$/.test(candidate)) return false; - if (candidate.includes("/") || candidate.includes("\\")) return true; - return !!path.extname(candidate).length; -} - -function isLikelyAsciidocXrefTarget(rawSpecifier: string): boolean { - if (isLikelyAsciidocFileTarget(rawSpecifier)) return true; - - const withoutFragment = rawSpecifier.trim().split("#", 1)[0]?.split("?", 1)[0]?.trim() ?? ""; - if (!withoutFragment) return false; - return /^[A-Za-z0-9._-]+$/.test(withoutFragment); -} - -function isAmbiguousAsciidocXrefTarget(rawSpecifier: string): boolean { - return isLikelyAsciidocXrefTarget(rawSpecifier) && !isLikelyAsciidocFileTarget(rawSpecifier); -} - -function isLikelyAsciidocFileTarget(rawSpecifier: string): boolean { - const trimmed = rawSpecifier.trim(); - if (!trimmed || trimmed.startsWith("#")) return false; - if (/^[A-Za-z][A-Za-z0-9+.-]*:/.test(trimmed)) return true; - if (trimmed.startsWith("//")) return true; - if (/^[A-Za-z]:[\\/]/.test(trimmed)) return true; - - const withoutFragment = trimmed.split("#", 1)[0]?.split("?", 1)[0]?.trim() ?? ""; - if (!withoutFragment) return false; - if (withoutFragment.startsWith("./") || withoutFragment.startsWith("../") || withoutFragment.startsWith("/")) { - return true; - } - if (withoutFragment.includes("/") || withoutFragment.includes("\\")) { - return true; - } - return !!path.extname(withoutFragment).length; -} - -function isObviouslyDynamicSpecifier(specifier: string): boolean { - return ( - specifier.includes("{") || - specifier.includes("}") || - specifier.includes("{{") || - specifier.includes("}}") || - specifier.includes("{%") || - specifier.includes("%}") || - specifier.includes("<%") || - specifier.includes("%>") || - specifier.includes("${") - ); -} - -function stripMarkdownCode(source: string): string { - let sanitized = source.replace(/(^|\n)(`{3,}|~{3,})[^\n]*\n[\s\S]*?\n\2[^\n]*(?=\n|$)/g, maskMatch); - sanitized = sanitized.replace(/`[^`\n]*`/g, maskMatch); - sanitized = sanitized.replace(/^(?: {4}|\t).*$/gm, maskMatch); - return sanitized; -} - -function maskMatch(segment: string): string { - return segment.replace(/[^\r\n]/g, " "); -} diff --git a/src/documentLinks/asciidoc.ts b/src/documentLinks/asciidoc.ts new file mode 100644 index 00000000..6e970834 --- /dev/null +++ b/src/documentLinks/asciidoc.ts @@ -0,0 +1,87 @@ +import path from "node:path"; +import type { ModuleSpecifier } from "../util.js"; +import { extractHtmlAttributeSpecifiers } from "./html.js"; +import { dedupeModuleSpecifiers, normalizeLinkSpecifier } from "./shared.js"; + +export function extractAsciidocModuleSpecifiers(source: string): ModuleSpecifier[] { + const out: ModuleSpecifier[] = []; + + for (const match of source.matchAll(/\b(xref|link):([^\[\s]+)\[[^\]]*]/g)) { + const directive = (match[1] ?? "").toLowerCase(); + const rawSpecifier = match[2]?.trim(); + if (!rawSpecifier) continue; + const fileLikeTarget = + directive === "xref" ? isLikelyAsciidocXrefTarget(rawSpecifier) : isLikelyAsciidocFileTarget(rawSpecifier); + if (!fileLikeTarget) continue; + const ambiguousXrefTarget = directive === "xref" && isAmbiguousAsciidocXrefTarget(rawSpecifier); + const normalized = normalizeLinkSpecifier(rawSpecifier, { + preferRelative: true, + resolutionKind: "document", + forceRelative: true, + }); + if (normalized) { + out.push(ambiguousXrefTarget ? { ...normalized, dropIfUnresolved: true } : normalized); + } + } + + for (const match of source.matchAll(/\binclude::([^\[\n]+)\[[^\]]*]/g)) { + const rawSpecifier = match[1]?.trim(); + if (!rawSpecifier) continue; + const normalized = normalizeLinkSpecifier(rawSpecifier, { + preferRelative: true, + resolutionKind: "document", + forceRelative: true, + }); + if (normalized) out.push(normalized); + } + + for (const match of source.matchAll(/<<([^>,]+)(?:,[^>]*)?>>/g)) { + const rawSpecifier = match[1]?.trim(); + if (!rawSpecifier) continue; + if (!isLikelyAsciidocFileTarget(rawSpecifier)) continue; + const normalized = normalizeLinkSpecifier(rawSpecifier, { + preferRelative: true, + resolutionKind: "document", + forceRelative: true, + }); + if (normalized) out.push(normalized); + } + + out.push( + ...extractHtmlAttributeSpecifiers(source, { + a: ["href"], + }), + ); + + return dedupeModuleSpecifiers(out); +} + +function isLikelyAsciidocXrefTarget(rawSpecifier: string): boolean { + if (isLikelyAsciidocFileTarget(rawSpecifier)) return true; + + const withoutFragment = rawSpecifier.trim().split("#", 1)[0]?.split("?", 1)[0]?.trim() ?? ""; + if (!withoutFragment) return false; + return /^[A-Za-z0-9._-]+$/.test(withoutFragment); +} + +function isAmbiguousAsciidocXrefTarget(rawSpecifier: string): boolean { + return isLikelyAsciidocXrefTarget(rawSpecifier) && !isLikelyAsciidocFileTarget(rawSpecifier); +} + +function isLikelyAsciidocFileTarget(rawSpecifier: string): boolean { + const trimmed = rawSpecifier.trim(); + if (!trimmed || trimmed.startsWith("#")) return false; + if (/^[A-Za-z][A-Za-z0-9+.-]*:/.test(trimmed)) return true; + if (trimmed.startsWith("//")) return true; + if (/^[A-Za-z]:[\\/]/.test(trimmed)) return true; + + const withoutFragment = trimmed.split("#", 1)[0]?.split("?", 1)[0]?.trim() ?? ""; + if (!withoutFragment) return false; + if (withoutFragment.startsWith("./") || withoutFragment.startsWith("../") || withoutFragment.startsWith("/")) { + return true; + } + if (withoutFragment.includes("/") || withoutFragment.includes("\\")) { + return true; + } + return !!path.extname(withoutFragment).length; +} diff --git a/src/documentLinks/html.ts b/src/documentLinks/html.ts new file mode 100644 index 00000000..833a7da1 --- /dev/null +++ b/src/documentLinks/html.ts @@ -0,0 +1,70 @@ +import { extractJsTsSpecifiers, type ModuleSpecifier } from "../util.js"; +import { dedupeModuleSpecifiers, markResolutionKind, normalizeLinkSpecifier } from "./shared.js"; + +const DEFAULT_HTML_TAG_ATTRS: Record = { + script: ["src"], + link: ["href"], + a: ["href"], + img: ["src", "srcset"], + source: ["src", "srcset"], + video: ["src"], + audio: ["src"], + iframe: ["src"], + track: ["src"], +}; + +const HTML_TAG_RE = /<(script|link|a|img|source|video|audio|iframe|track)\b([^>]*)>/gi; + +export function extractHtmlInlineScriptSpecifiers(source: string): ModuleSpecifier[] { + const out: ModuleSpecifier[] = []; + const inlineScriptRe = /]*)>([\s\S]*?)<\/script>/gi; + for (const match of source.matchAll(inlineScriptRe)) { + const attrs = match[1] ?? ""; + if (/\bsrc\s*=\s*["'][^"']+["']/i.test(attrs)) continue; + const body = match[2] ?? ""; + if (!body.trim()) continue; + out.push(...markResolutionKind(extractJsTsSpecifiers(body), "source")); + } + return dedupeModuleSpecifiers(out); +} + +export function extractHtmlAttributeSpecifiers( + source: string, + tagAttrNames: Record = DEFAULT_HTML_TAG_ATTRS, +): ModuleSpecifier[] { + const out: ModuleSpecifier[] = []; + + for (const match of source.matchAll(HTML_TAG_RE)) { + const tag = (match[1] ?? "").toLowerCase(); + const attrs = match[2] ?? ""; + const attrNames = tagAttrNames[tag] ?? []; + + for (const attrName of attrNames) { + const attrRe = new RegExp(`(?:^|\\s)${attrName}\\s*=\\s*(?:"([^"]+)"|'([^']+)'|([^\\s"'=<>\\x60]+))`, "i"); + const attrMatch = attrs.match(attrRe); + const raw = (attrMatch?.[1] ?? attrMatch?.[2] ?? attrMatch?.[3])?.trim(); + if (!raw) continue; + if (attrName === "srcset") { + const candidates = raw + .split(",") + .map((entry) => entry.trim().split(/\s+/)[0]?.trim()) + .filter((entry): entry is string => !!entry); + for (const spec of candidates) { + const normalized = normalizeLinkSpecifier(spec, { + preferRelative: true, + resolutionKind: "document", + }); + if (normalized) out.push(normalized); + } + continue; + } + const normalized = normalizeLinkSpecifier(raw, { + preferRelative: true, + resolutionKind: "document", + }); + if (normalized) out.push(normalized); + } + } + + return dedupeModuleSpecifiers(out); +} diff --git a/src/documentLinks/markdown.ts b/src/documentLinks/markdown.ts new file mode 100644 index 00000000..9ccaed9b --- /dev/null +++ b/src/documentLinks/markdown.ts @@ -0,0 +1,206 @@ +import path from "node:path"; +import { extractJsTsSpecifiers, type ModuleSpecifier } from "../util.js"; +import { extractHtmlAttributeSpecifiers } from "./html.js"; +import { + dedupeModuleSpecifiers, + markResolutionKind, + normalizeLinkSpecifier, + normalizeReferenceLabel, +} from "./shared.js"; + +export function extractMarkdownModuleSpecifiers(source: string): ModuleSpecifier[] { + const sanitized = stripMarkdownCode(source); + return extractMarkdownModuleSpecifiersFromSanitized(sanitized); +} + +function extractMarkdownModuleSpecifiersFromSanitized(sanitized: string): ModuleSpecifier[] { + const referenceDefs = collectMarkdownReferenceDefinitions(sanitized); + const out: ModuleSpecifier[] = []; + + for (const destination of collectMarkdownInlineLinkDestinations(sanitized)) { + const normalized = normalizeLinkSpecifier(destination, { + preferRelative: true, + resolutionKind: "document", + }); + if (normalized) out.push(normalized); + } + + for (const match of sanitized.matchAll(/!?\[([^\]]+)\]\[([^\]]*)\]/g)) { + const fullMatch = match[0] ?? ""; + if (fullMatch.startsWith("!")) continue; + const text = match[1]?.trim(); + const label = match[2]?.trim(); + const resolvedLabel = normalizeReferenceLabel(label || text); + if (!resolvedLabel) continue; + const destination = referenceDefs.get(resolvedLabel); + if (!destination) continue; + out.push(destination); + } + + for (const match of sanitized.matchAll(/<([^>\s]+)>/g)) { + const candidate = match[1]?.trim(); + if (!candidate) continue; + if (candidate.startsWith("/") || candidate.startsWith("?")) continue; + if (!isLikelyMarkdownAutolinkTarget(candidate)) continue; + const normalized = normalizeLinkSpecifier(candidate, { + preferRelative: true, + resolutionKind: "document", + }); + if (normalized) out.push(normalized); + } + + out.push( + ...extractHtmlAttributeSpecifiers(sanitized, { + a: ["href"], + }), + ); + + return dedupeModuleSpecifiers(out); +} + +export function extractMdxModuleSpecifiers(source: string): ModuleSpecifier[] { + const sanitized = stripMarkdownCode(source); + const out = extractMarkdownModuleSpecifiersFromSanitized(sanitized); + out.push(...markResolutionKind(extractJsTsSpecifiers(sanitized), "source")); + return dedupeModuleSpecifiers(out); +} + +function collectMarkdownReferenceDefinitions(source: string): Map { + const out = new Map(); + const definitionRe = /^\s{0,3}\[([^\]]+)\]:\s*(<[^>\n]+>|[^ \t\n]+)(?:[ \t]+(?:"[^"]*"|'[^']*'|\([^)]*\)))?\s*$/gm; + + for (const match of source.matchAll(definitionRe)) { + const label = normalizeReferenceLabel(match[1]); + const rawDestination = match[2]; + if (!label || !rawDestination) continue; + const normalized = normalizeLinkSpecifier(rawDestination, { + preferRelative: true, + resolutionKind: "document", + }); + if (normalized) out.set(label, normalized); + } + + return out; +} + +function collectMarkdownInlineLinkDestinations(source: string): string[] { + const out: string[] = []; + + for (let index = 0; index < source.length; index += 1) { + if (source[index] !== "[") continue; + if (source[index - 1] === "!") continue; + + const labelEnd = findMarkdownLabelEnd(source, index + 1); + if (labelEnd < 0 || source[labelEnd + 1] !== "(") continue; + + const parsed = parseMarkdownInlineLink(source, labelEnd + 2); + if (!parsed) continue; + + out.push(extractMarkdownDestination(parsed.destination)); + index = parsed.endIndex; + } + + return out; +} + +function extractMarkdownDestination(rawDestination: string): string { + const trimmed = rawDestination.trim(); + if (!trimmed) return trimmed; + if (trimmed.startsWith("<")) { + const endIndex = trimmed.indexOf(">"); + if (endIndex > 0) return trimmed.slice(0, endIndex + 1); + } + const whitespaceIndex = trimmed.search(/\s/); + return whitespaceIndex >= 0 ? trimmed.slice(0, whitespaceIndex) : trimmed; +} + +function findMarkdownLabelEnd(source: string, openIndex: number): number { + let depth = 0; + for (let index = openIndex; index < source.length; index += 1) { + const char = source.charAt(index); + if (char === "\\") { + index += 1; + continue; + } + if (char === "[") { + depth += 1; + continue; + } + if (char !== "]") continue; + if (depth === 0) return index; + depth -= 1; + } + return -1; +} + +function parseMarkdownInlineLink(source: string, startIndex: number): { destination: string; endIndex: number } | null { + let depth = 1; + let destinationEnd = -1; + let quote: '"' | "'" | null = null; + let sawDestinationStart = false; + + for (let index = startIndex; index < source.length; index += 1) { + const char = source.charAt(index); + if (char === "\n") return null; + if (char === "\\") { + index += 1; + continue; + } + + if (!sawDestinationStart) { + if (/\s/.test(char)) continue; + sawDestinationStart = true; + } + + if (destinationEnd >= 0) { + if (quote) { + if (char === quote) quote = null; + continue; + } + if (char === '"' || char === "'") { + quote = char; + continue; + } + } + + if (char === "(") { + depth += 1; + continue; + } + + if (char === ")") { + depth -= 1; + if (depth !== 0) continue; + const destination = source.slice(startIndex, destinationEnd >= 0 ? destinationEnd : index).trim(); + return destination ? { destination, endIndex: index } : null; + } + + if (destinationEnd < 0 && /\s/.test(char) && depth === 1) { + destinationEnd = index; + } + } + + return null; +} + +function isLikelyMarkdownAutolinkTarget(candidate: string): boolean { + if (/^[A-Za-z][A-Za-z0-9+.-]*:/.test(candidate)) return true; + if (candidate.startsWith("//")) return true; + if (candidate.startsWith("./") || candidate.startsWith("../")) return true; + if (candidate.startsWith("/") || candidate.startsWith("\\")) return true; + if (/^[^\s/@]+@[^\s/@]+\.[^\s/@]+$/.test(candidate)) return false; + if (/^[A-Za-z][A-Za-z0-9:_-]*\/?$/.test(candidate)) return false; + if (candidate.includes("/") || candidate.includes("\\")) return true; + return !!path.extname(candidate).length; +} + +function stripMarkdownCode(source: string): string { + let sanitized = source.replace(/(^|\n)(`{3,}|~{3,})[^\n]*\n[\s\S]*?\n\2[^\n]*(?=\n|$)/g, maskMatch); + sanitized = sanitized.replace(/`[^`\n]*`/g, maskMatch); + sanitized = sanitized.replace(/^(?: {4}|\t).*$/gm, maskMatch); + return sanitized; +} + +function maskMatch(segment: string): string { + return segment.replace(/[^\r\n]/g, " "); +} diff --git a/src/documentLinks/rst.ts b/src/documentLinks/rst.ts new file mode 100644 index 00000000..7f8b7be2 --- /dev/null +++ b/src/documentLinks/rst.ts @@ -0,0 +1,104 @@ +import type { ModuleSpecifier } from "../util.js"; +import { + dedupeModuleSpecifiers, + normalizeLinkSpecifier, + normalizeReferenceLabel, +} from "./shared.js"; + +export function extractRstModuleSpecifiers(source: string): ModuleSpecifier[] { + const out: ModuleSpecifier[] = []; + const namedTargets = collectRstTargetDefinitions(source); + + for (const match of source.matchAll(/`[^`<\n]*<([^>\n]+)>`_/g)) { + const rawSpecifier = match[1]?.trim(); + if (!rawSpecifier) continue; + const normalized = normalizeLinkSpecifier(rawSpecifier, { + preferRelative: true, + resolutionKind: "document", + forceRelative: true, + }); + if (normalized) out.push(normalized); + } + + for (const match of source.matchAll(/`([^`\n]+)`_/g)) { + const label = normalizeReferenceLabel(match[1]); + if (!label) continue; + const normalized = namedTargets.get(label); + if (normalized) out.push(normalized); + } + + for (const match of source.matchAll(/^\s*\.\.\s+include::\s+([^\s]+)\s*$/gm)) { + const rawSpecifier = match[1]?.trim(); + if (!rawSpecifier) continue; + const normalized = normalizeLinkSpecifier(rawSpecifier, { + preferRelative: true, + resolutionKind: "document", + forceRelative: true, + }); + if (normalized) out.push(normalized); + } + + out.push(...extractRstToctreeSpecifiers(source)); + + return dedupeModuleSpecifiers(out); +} + +function collectRstTargetDefinitions(source: string): Map { + const out = new Map(); + const definitionRe = /^\s*\.\.\s+_([^:]+):\s*(\S+)\s*$/gm; + + for (const match of source.matchAll(definitionRe)) { + const label = normalizeReferenceLabel(match[1]); + const rawSpecifier = match[2]; + if (!label || !rawSpecifier) continue; + const normalized = normalizeLinkSpecifier(rawSpecifier, { + preferRelative: true, + resolutionKind: "document", + forceRelative: true, + }); + if (normalized) out.set(label, normalized); + } + + return out; +} + +function extractRstToctreeSpecifiers(source: string): ModuleSpecifier[] { + const out: ModuleSpecifier[] = []; + const lines = source.split(/\r?\n/); + let inToctree = false; + + for (const line of lines) { + if (/^\s*\.\.\s+toctree::\s*$/.test(line)) { + inToctree = true; + continue; + } + + if (!inToctree) continue; + + if (!line.trim()) { + continue; + } + + const indentMatch = line.match(/^(\s+)(.+)$/); + if (!indentMatch) { + inToctree = false; + continue; + } + + const content = indentMatch[2]?.trim(); + if (!content || content.startsWith(":")) { + continue; + } + + const titledMatch = content.match(/<([^>]+)>/); + const rawSpecifier = titledMatch?.[1]?.trim() ?? content; + const normalized = normalizeLinkSpecifier(rawSpecifier, { + preferRelative: true, + resolutionKind: "document", + forceRelative: true, + }); + if (normalized) out.push(normalized); + } + + return out; +} diff --git a/src/documentLinks/sfc.ts b/src/documentLinks/sfc.ts new file mode 100644 index 00000000..e15a7134 --- /dev/null +++ b/src/documentLinks/sfc.ts @@ -0,0 +1,33 @@ +import { extractJsTsSpecifiers, type ModuleSpecifier } from "../util.js"; +import { extractHtmlAttributeSpecifiers, extractHtmlInlineScriptSpecifiers } from "./html.js"; +import { dedupeModuleSpecifiers, markResolutionKind, normalizeLinkSpecifier } from "./shared.js"; + +export function extractAstroModuleSpecifiers(source: string): ModuleSpecifier[] { + const out: ModuleSpecifier[] = []; + out.push(...extractHtmlAttributeSpecifiers(source)); + out.push(...extractHtmlInlineScriptSpecifiers(source)); + + const frontmatterMatch = source.match(/^---\r?\n([\s\S]*?)\r?\n---/); + if (frontmatterMatch?.[1]) { + out.push(...markResolutionKind(extractJsTsSpecifiers(frontmatterMatch[1]), "source")); + } + + return dedupeModuleSpecifiers(out); +} + +export function extractHandlebarsModuleSpecifiers(source: string): ModuleSpecifier[] { + const out: ModuleSpecifier[] = []; + out.push(...extractHtmlAttributeSpecifiers(source)); + + for (const match of source.matchAll(/\{\{\s*>\s*(?:"([^"]+)"|'([^']+)'|([^\s}]+))/g)) { + const rawSpecifier = match[1] ?? match[2] ?? match[3]; + if (!rawSpecifier) continue; + const normalized = normalizeLinkSpecifier(rawSpecifier, { + preferRelative: true, + resolutionKind: "document", + }); + if (normalized) out.push(normalized); + } + + return dedupeModuleSpecifiers(out); +} diff --git a/src/documentLinks/shared.ts b/src/documentLinks/shared.ts new file mode 100644 index 00000000..a936af9e --- /dev/null +++ b/src/documentLinks/shared.ts @@ -0,0 +1,173 @@ +import path from "node:path"; +import type { ModuleSpecifier } from "../util.js"; + +const DOCUMENT_RELATIVE_EXTENSIONS = new Set([ + ".md", + ".mdx", + ".astro", + ".hbs", + ".handlebars", + ".rst", + ".adoc", + ".asciidoc", + ".html", + ".htm", + ".css", + ".scss", + ".less", + ".js", + ".jsx", + ".ts", + ".tsx", + ".mts", + ".cts", + ".mjs", + ".cjs", + ".json", + ".svg", + ".png", + ".jpg", + ".jpeg", + ".gif", + ".webp", + ".avif", + ".mp4", + ".webm", + ".mp3", + ".wav", + ".ogg", + ".txt", + ".yaml", + ".yml", +]); + +export function dedupeModuleSpecifiers(entries: ModuleSpecifier[]): ModuleSpecifier[] { + const out: ModuleSpecifier[] = []; + const seen = new Set(); + for (const entry of entries) { + const key = `${entry.spec}::${entry.typeOnly ? 1 : 0}::${entry.resolutionKind ?? ""}::${entry.dropIfUnresolved ? 1 : 0}`; + if (seen.has(key)) continue; + seen.add(key); + out.push(entry); + } + return out; +} + +export function normalizeLinkSpecifier( + rawSpecifier: string, + opts?: { + preferRelative?: boolean; + forceRelative?: boolean; + resolutionKind?: "document" | "source"; + }, +): ModuleSpecifier | null { + const original = rawSpecifier.trim(); + if (!original) return null; + + let normalized = original; + if (normalized.startsWith("<") && normalized.endsWith(">")) { + normalized = normalized.slice(1, -1).trim(); + } + if (!normalized || normalized.startsWith("#")) return null; + if (isObviouslyDynamicSpecifier(normalized)) return null; + + const hasSchemePrefix = /^[A-Za-z][A-Za-z0-9+.-]*:/.test(normalized); + const isWindowsAbsolutePath = /^[A-Za-z]:[\\/]/.test(normalized); + const isProtocolRelative = normalized.startsWith("//"); + + if (!hasSchemePrefix && !isProtocolRelative && !isWindowsAbsolutePath) { + const hashIndex = normalized.indexOf("#"); + if (hashIndex >= 0) normalized = normalized.slice(0, hashIndex); + const queryIndex = normalized.indexOf("?"); + if (queryIndex >= 0) normalized = normalized.slice(0, queryIndex); + } + + normalized = normalized.trim(); + if (!normalized) return null; + + if (opts?.forceRelative && shouldForceRelativePath(normalized)) { + normalized = `./${normalized}`; + } else if (opts?.preferRelative && shouldPreferRelativePath(normalized)) { + normalized = `./${normalized}`; + } + + if (normalized === original) { + return { + spec: normalized, + ...(opts?.resolutionKind ? { resolutionKind: opts.resolutionKind } : {}), + }; + } + return { + spec: normalized, + raw: original, + ...(opts?.resolutionKind ? { resolutionKind: opts.resolutionKind } : {}), + }; +} + +export function markResolutionKind( + entries: ModuleSpecifier[], + resolutionKind: "document" | "source", +): ModuleSpecifier[] { + return entries.map((entry) => ({ + ...entry, + resolutionKind, + })); +} + +export function normalizeReferenceLabel(label: string | undefined): string | null { + const normalized = label?.trim().replace(/\s+/g, " ").toLowerCase(); + return normalized ? normalized : null; +} + +function shouldForceRelativePath(specifier: string): boolean { + if ( + specifier.startsWith(".") || + specifier.startsWith("/") || + specifier.startsWith("#") || + specifier.startsWith("@") || + specifier.startsWith("//") + ) { + return false; + } + if (/^[A-Za-z][A-Za-z0-9+.-]*:/.test(specifier)) return false; + if (/^[A-Za-z]:[\\/]/.test(specifier)) return false; + return true; +} + +function shouldPreferRelativePath(specifier: string): boolean { + if ( + specifier.startsWith(".") || + specifier.startsWith("/") || + specifier.startsWith("#") || + specifier.startsWith("@") || + specifier.startsWith("//") + ) { + return false; + } + if (/^[A-Za-z][A-Za-z0-9+.-]*:/.test(specifier)) return false; + if (/^[A-Za-z]:[\\/]/.test(specifier)) return false; + if (specifier.includes("/")) { + const firstSegment = specifier.split(/[\\/]/, 1)[0] ?? ""; + if (/^[A-Za-z0-9-]+(?:\.[A-Za-z0-9-]+)*\.[A-Za-z]{2,}$/i.test(firstSegment)) { + return false; + } + return true; + } + + const ext = path.extname(specifier).toLowerCase(); + return DOCUMENT_RELATIVE_EXTENSIONS.has(ext); +} + +export function isObviouslyDynamicSpecifier(specifier: string): boolean { + return ( + specifier.includes("{") || + specifier.includes("}") || + specifier.includes("{{") || + specifier.includes("}}") || + specifier.includes("{%") || + specifier.includes("%}") || + specifier.includes("<%") || + specifier.includes("%>") || + specifier.includes("${") + ); +} diff --git a/src/graph-edge-collector.ts b/src/graph-edge-collector.ts index 8fa20af4..8d49983d 100644 --- a/src/graph-edge-collector.ts +++ b/src/graph-edge-collector.ts @@ -2,16 +2,10 @@ import path from "node:path"; import { type JsLanguage } from "./jsFallback.js"; import { prepareSourceInput } from "./languages/filePrep.js"; import { type LanguageSupport } from "./languages.js"; -import type { EdgeTo, Edge } from "./types.js"; +import type { Edge } from "./types.js"; import { loadNearestTsconfigFor, - getGraphOnlyResolutionExtensions, type WorkspaceConfig, - resolveSpecifier, - resolveImportSpecifier, - resolvePythonModule, - resolveJvmPackageImportPaths, - getPhpComposerImplicitFiles, extractJsTsDynamicSpecifiers, } from "./util.js"; import { logWithLevel, type LogLevel } from "./logging.js"; @@ -28,6 +22,7 @@ import { } from "./native/treeSitterNative.js"; import { recordNativeExecutionOutcome } from "./native/nativeBackendReport.js"; import { collectModuleSpecifiersFromSource, type FallbackImportExtractionEvent } from "./graphs/specifiers.js"; +import { collectPhpComposerImplicitEdges, resolveModuleSpecifierEdges } from "./graphs/edgeResolution.js"; import type { GraphCacheEntry } from "./graphs/types.js"; import type { BuildReport } from "./indexer/types.js"; import type { SyntaxTreeLike } from "./languages/types.js"; @@ -153,7 +148,6 @@ export async function collectEdgesForFile( } } - const graphOnlyLanguage = isGraphOnlyLanguage(sup.id); const graphOnlyAliasLanguage = graphOnlyLanguageSupportsImportAliases(sup.id); const needsGraphOnlyResolutionConfig = graphOnlyAliasLanguage && specs.some(({ spec }) => graphOnlySpecifierNeedsResolutionConfig(spec)); @@ -163,104 +157,15 @@ export async function collectEdgesForFile( : { matchPath: undefined }; const edges: Edge[] = []; const edgeResolutionTasks = specs.map(async (entry) => { - const { spec, raw, typeOnly, phpImportType, resolved, confidence, resolutionKind, dropIfUnresolved } = entry; - let to: EdgeTo; - const resolutionExtensions = graphOnlyLanguage - ? getGraphOnlyResolutionExtensions(sup.id, resolutionKind ?? "document") - : undefined; - if (sup.id === "python") { - const relDotsMatch = spec.startsWith(".") ? spec.match(/^\.+/) : null; - const relDots = relDotsMatch ? relDotsMatch[0].length : 0; - const isDotsOnly = /^\.+$/.test(spec); - const res = await resolvePythonModule(projectRoot, file, isDotsOnly ? null : spec, relDots); - to = - typeof res === "string" - ? { type: "file", path: res.replace(/\\/g, "/") } - : { type: "external", name: res.external }; - } else if (sup.id === "go") { - const res = await resolveImportSpecifier(projectRoot, file, spec, sup.id, { - ...(matchPath ? { matchPath } : {}), - ...(workspaceConfig ? { workspaceConfig } : {}), - resolveNodeModules: !!opts.resolveNodeModules, - ...(opts.resolutionHints ? { resolutionHints: opts.resolutionHints } : {}), - }); - to = - typeof res === "string" - ? { type: "file", path: res.replace(/\\/g, "/") } - : { type: "external", name: res.external }; - } else if (sup.id === "java" || sup.id === "kotlin") { - const packageTargets = await resolveJvmPackageImportPaths(projectRoot, spec, sup.id); - if (packageTargets.length) { - return packageTargets.map((targetPath) => ({ - to: { type: "file", path: targetPath.replace(/\\/g, "/") } as EdgeTo, - spec, - ...(raw !== undefined && { raw }), - ...(typeOnly !== undefined && { typeOnly }), - ...(resolved !== undefined && { resolved }), - ...(confidence !== undefined && { confidence }), - })); - } - const res = await resolveImportSpecifier(projectRoot, file, spec, sup.id, { - ...(matchPath ? { matchPath } : {}), - ...(workspaceConfig ? { workspaceConfig } : {}), - resolveNodeModules: !!opts.resolveNodeModules, - ...(opts.resolutionHints ? { resolutionHints: opts.resolutionHints } : {}), - }); - to = - typeof res === "string" - ? { type: "file", path: res.replace(/\\/g, "/") } - : { type: "external", name: raw ?? res.external }; - } else if (["csharp", "ruby", "rust", "php"].includes(sup.id)) { - const { resolvePathLikeModule } = await import("./util.js"); - const res = - sup.id === "php" - ? await resolveImportSpecifier(projectRoot, file, spec, sup.id, { - ...(matchPath ? { matchPath } : {}), - ...(workspaceConfig ? { workspaceConfig } : {}), - resolveNodeModules: !!opts.resolveNodeModules, - ...(opts.resolutionHints ? { resolutionHints: opts.resolutionHints } : {}), - ...(phpImportType ? { phpImportType } : {}), - }) - : await resolvePathLikeModule(projectRoot, spec); - if (res && typeof res === "string") { - to = { type: "file", path: res.replace(/\\/g, "/") }; - } else { - // Fallback to resolveSpecifier for relative paths like ./foo - const res2 = await resolveSpecifier(file, spec, projectRoot, matchPath, workspaceConfig, { - resolveNodeModules: !!opts.resolveNodeModules, - ...(resolutionExtensions ? { resolutionExtensions } : {}), - ...(opts.resolutionHints ? { resolutionHints: opts.resolutionHints } : {}), - }); - to = - typeof res2 === "string" - ? { type: "file", path: res2.replace(/\\/g, "/") } - : { type: "external", name: raw ?? res2.external }; - } - } else { - const res = await resolveSpecifier(file, spec, projectRoot, matchPath, workspaceConfig, { - resolveNodeModules: !!opts.resolveNodeModules, - ...(resolutionExtensions ? { resolutionExtensions } : {}), - ...(opts.resolutionHints ? { resolutionHints: opts.resolutionHints } : {}), - ...(sup.id === "scss" && resolutionKind !== "document" ? { allowScssPartialResolution: true } : {}), - }); - to = - typeof res === "string" - ? { type: "file", path: res.replace(/\\/g, "/") } - : { type: "external", name: raw ?? res.external }; - } - if (to.type === "external" && dropIfUnresolved) { - return null; - } - return [ - { - to, - spec, - ...(raw !== undefined && { raw }), - ...(typeOnly !== undefined && { typeOnly }), - ...(resolved !== undefined && { resolved }), - ...(confidence !== undefined && { confidence }), - }, - ]; + return await resolveModuleSpecifierEdges(entry, { + support: sup, + file, + projectRoot, + workspaceConfig, + matchPath, + resolveNodeModules: !!opts.resolveNodeModules, + ...(opts.resolutionHints ? { resolutionHints: opts.resolutionHints } : {}), + }); }); for (const resolvedEdge of await Promise.all(edgeResolutionTasks)) { @@ -279,26 +184,7 @@ export async function collectEdgesForFile( } if (sup.id === "php") { - const implicitFiles = await getPhpComposerImplicitFiles(projectRoot, file); - const seenFileTargets = new Set( - edges - .map((edge) => (edge.to.type === "file" ? edge.to.path : null)) - .filter((target): target is string => !!target), - ); - for (const implicitFile of implicitFiles) { - const normalizedTarget = implicitFile.replace(/\\/g, "/"); - if (normalizedTarget === normalizedFile || seenFileTargets.has(normalizedTarget)) { - continue; - } - - const relativeRaw = path.relative(path.dirname(file), implicitFile).replace(/\\/g, "/"); - edges.push({ - from: normalizedFile, - to: { type: "file", path: normalizedTarget }, - raw: relativeRaw.startsWith(".") || relativeRaw.startsWith("/") ? relativeRaw : `./${relativeRaw}`, - }); - seenFileTargets.add(normalizedTarget); - } + edges.push(...(await collectPhpComposerImplicitEdges({ projectRoot, file, normalizedFile, existingEdges: edges }))); } emitCacheEntry(edges); return edges; diff --git a/src/graphs/cycles.ts b/src/graphs/cycles.ts new file mode 100644 index 00000000..1c4eda95 --- /dev/null +++ b/src/graphs/cycles.ts @@ -0,0 +1,184 @@ +import type { FileId, Graph } from "../types.js"; + +export type CycleInternalEdge = { + from: FileId; + to: FileId; + raw: string; + typeOnly?: boolean; +}; + +export type DetailedCycle = { + files: FileId[]; + entryEdges: CycleInternalEdge[]; + internalEdges: CycleInternalEdge[]; + fileCount: number; + internalEdgeCount: number; + fanInFromOutside: number; + priorityScore: number; + remediationHint: string; +}; + +export type CycleSortMode = "priority" | "size" | "fanin"; + +const DOCUMENT_ONLY_CYCLE_EXTENSIONS = new Set([".md", ".mdx", ".rst", ".adoc", ".asciidoc"]); + +function isDocumentOnlyCycleFile(file: string): boolean { + const normalized = file.toLowerCase().split(/[?#]/, 1)[0] ?? ""; + for (const extension of DOCUMENT_ONLY_CYCLE_EXTENSIONS) { + if (normalized.endsWith(extension)) { + return true; + } + } + return false; +} + +export function findCycles(graph: Graph): FileId[][] { + return findDetailedCycles(graph).map((cycle) => cycle.files); +} + +export function sortDetailedCycles(cycles: DetailedCycle[], mode: CycleSortMode = "priority"): DetailedCycle[] { + const sorted = [...cycles]; + sorted.sort((left, right) => { + if (mode === "size") { + if (right.fileCount !== left.fileCount) return right.fileCount - left.fileCount; + return right.priorityScore - left.priorityScore; + } + if (mode === "fanin") { + if (right.fanInFromOutside !== left.fanInFromOutside) { + return right.fanInFromOutside - left.fanInFromOutside; + } + return right.priorityScore - left.priorityScore; + } + return right.priorityScore - left.priorityScore; + }); + return sorted; +} + +export function findDetailedCycles( + graph: Graph, + options: { symbolCoupling?: Map } = {}, +): DetailedCycle[] { + const nodes = Array.from(graph.nodes); + const indexByNode = new Map(); + nodes.forEach((node, index) => indexByNode.set(node, index)); + + const adjacency = nodes.map(() => [] as number[]); + for (const edge of graph.edges) { + if (edge.to.type !== "file") continue; + const fromIndex = indexByNode.get(edge.from); + const toIndex = indexByNode.get(edge.to.path); + if (fromIndex !== undefined && toIndex !== undefined) { + adjacency[fromIndex]!.push(toIndex); + } + } + + const nodeCount = nodes.length; + const indices: number[] = new Array(nodeCount).fill(-1); + const lowlink: number[] = new Array(nodeCount).fill(-1); + const onStack = new Array(nodeCount).fill(false); + const stack: number[] = []; + let nextIndex = 0; + const stronglyConnectedComponents: number[][] = []; + + function strongconnect(vertex: number) { + indices[vertex] = nextIndex; + lowlink[vertex] = nextIndex; + nextIndex++; + stack.push(vertex); + onStack[vertex] = true; + + for (const neighbor of adjacency[vertex]!) { + if (indices[neighbor] === -1) { + strongconnect(neighbor); + lowlink[vertex] = Math.min(lowlink[vertex], lowlink[neighbor]!); + } else if (onStack[neighbor]) { + lowlink[vertex] = Math.min(lowlink[vertex], indices[neighbor]!); + } + } + + if (lowlink[vertex] === indices[vertex]) { + const component: number[] = []; + let popped: number; + do { + popped = stack.pop()!; + onStack[popped] = false; + component.push(popped); + } while (popped !== vertex); + if (component.length > 1 || adjacency[vertex]!.includes(vertex)) { + stronglyConnectedComponents.push(component); + } + } + } + + for (let index = 0; index < nodeCount; index++) { + if (indices[index] === -1) strongconnect(index); + } + + const cycleDetails: DetailedCycle[] = []; + for (const component of stronglyConnectedComponents) { + const files = component.map((index) => nodes[index]!); + if (files.every(isDocumentOnlyCycleFile)) { + continue; + } + const componentFiles = new Set(files); + const internalEdges: CycleInternalEdge[] = []; + const entryEdges: CycleInternalEdge[] = []; + let internalEdgeCount = 0; + let fanInFromOutside = 0; + + for (const edge of graph.edges) { + if (edge.to.type !== "file") continue; + const fromInComponent = componentFiles.has(edge.from); + const toInComponent = componentFiles.has(edge.to.path); + if (fromInComponent && toInComponent) { + internalEdgeCount += 1; + internalEdges.push({ + from: edge.from, + to: edge.to.path, + raw: edge.raw, + ...(edge.typeOnly !== undefined ? { typeOnly: edge.typeOnly } : {}), + }); + } + if (!fromInComponent && toInComponent) { + fanInFromOutside += 1; + entryEdges.push({ + from: edge.from, + to: edge.to.path, + raw: edge.raw, + ...(edge.typeOnly !== undefined ? { typeOnly: edge.typeOnly } : {}), + }); + } + } + + const priorityScore = files.length * 3 + fanInFromOutside * 2 + internalEdgeCount; + const couplingForEdge = (edge: CycleInternalEdge): number => + options.symbolCoupling?.get(`${edge.from} -> ${edge.to}`) ?? 0; + const weakestEdge = internalEdges.reduce((best, edge) => { + if (!best) return edge; + const bestCoupling = couplingForEdge(best); + const edgeCoupling = couplingForEdge(edge); + if (edgeCoupling !== bestCoupling) { + return edgeCoupling < bestCoupling ? edge : best; + } + if (!!edge.typeOnly && !best.typeOnly) return edge; + return best; + }, null); + + const remediationHint = weakestEdge + ? `Break ${weakestEdge.from} -> ${weakestEdge.to} (import ${weakestEdge.raw}) to reduce SCC coupling; estimated symbol coupling=${couplingForEdge(weakestEdge)}.` + : `Break one import edge in this ${files.length}-file SCC to remove the cycle.`; + + cycleDetails.push({ + files, + entryEdges, + internalEdges, + fileCount: files.length, + internalEdgeCount, + fanInFromOutside, + priorityScore, + remediationHint, + }); + } + + return sortDetailedCycles(cycleDetails, "priority"); +} diff --git a/src/graphs/edgeResolution.ts b/src/graphs/edgeResolution.ts new file mode 100644 index 00000000..2241c2d4 --- /dev/null +++ b/src/graphs/edgeResolution.ts @@ -0,0 +1,158 @@ +import path from "node:path"; +import type { LanguageSupport } from "../languages.js"; +import type { Edge, EdgeTo } from "../types.js"; +import { + getGraphOnlyResolutionExtensions, + getPhpComposerImplicitFiles, + resolveImportSpecifier, + resolveJvmPackageImportPaths, + resolvePythonModule, + resolveSpecifier, + type MatchPathFn, + type ModuleSpecifier, + type WorkspaceConfig, +} from "../util.js"; +import { isGraphOnlyLanguage } from "../documentLinks.js"; + +type ResolvedSpecifierEdge = { + to: EdgeTo; + spec: string; + raw?: string; + typeOnly?: boolean; + resolved?: ModuleSpecifier["resolved"]; + confidence?: number; +}; + +export type ModuleSpecifierResolutionContext = { + support: LanguageSupport; + file: string; + projectRoot: string; + workspaceConfig: WorkspaceConfig | undefined; + matchPath: MatchPathFn | undefined; + resolveNodeModules?: boolean; + resolutionHints?: string[]; +}; + +function edgeToResolvedFile(resolved: string): EdgeTo { + return { type: "file", path: resolved.replace(/\\/g, "/") }; +} + +function edgeToExternal(name: string): EdgeTo { + return { type: "external", name }; +} + +function withSpecifierMetadata(entry: ModuleSpecifier, to: EdgeTo): ResolvedSpecifierEdge { + return { + to, + spec: entry.spec, + ...(entry.raw !== undefined ? { raw: entry.raw } : {}), + ...(entry.typeOnly !== undefined ? { typeOnly: entry.typeOnly } : {}), + ...(entry.resolved !== undefined ? { resolved: entry.resolved } : {}), + ...(entry.confidence !== undefined ? { confidence: entry.confidence } : {}), + }; +} + +async function resolveGenericSpecifier( + entry: ModuleSpecifier, + context: ModuleSpecifierResolutionContext, + resolutionExtensions?: readonly string[], +): Promise { + const res = await resolveSpecifier( + context.file, + entry.spec, + context.projectRoot, + context.matchPath, + context.workspaceConfig, + { + resolveNodeModules: !!context.resolveNodeModules, + ...(resolutionExtensions ? { resolutionExtensions } : {}), + ...(context.resolutionHints ? { resolutionHints: context.resolutionHints } : {}), + ...(context.support.id === "scss" && entry.resolutionKind !== "document" + ? { allowScssPartialResolution: true } + : {}), + }, + ); + return typeof res === "string" ? edgeToResolvedFile(res) : edgeToExternal(entry.raw ?? res.external); +} + +async function resolveImportSpecifierEdge( + entry: ModuleSpecifier, + context: ModuleSpecifierResolutionContext, +): Promise { + const res = await resolveImportSpecifier(context.projectRoot, context.file, entry.spec, context.support.id, { + ...(context.matchPath ? { matchPath: context.matchPath } : {}), + ...(context.workspaceConfig ? { workspaceConfig: context.workspaceConfig } : {}), + resolveNodeModules: !!context.resolveNodeModules, + ...(context.resolutionHints ? { resolutionHints: context.resolutionHints } : {}), + ...(entry.phpImportType ? { phpImportType: entry.phpImportType } : {}), + }); + return typeof res === "string" ? edgeToResolvedFile(res) : edgeToExternal(entry.raw ?? res.external); +} + +export async function resolveModuleSpecifierEdges( + entry: ModuleSpecifier, + context: ModuleSpecifierResolutionContext, +): Promise { + const graphOnlyLanguage = isGraphOnlyLanguage(context.support.id); + const resolutionExtensions = graphOnlyLanguage + ? getGraphOnlyResolutionExtensions(context.support.id, entry.resolutionKind ?? "document") + : undefined; + + let to: EdgeTo; + if (context.support.id === "python") { + const relDotsMatch = entry.spec.startsWith(".") ? entry.spec.match(/^\.+/) : null; + const relDots = relDotsMatch ? relDotsMatch[0].length : 0; + const isDotsOnly = /^\.+$/.test(entry.spec); + const res = await resolvePythonModule(context.projectRoot, context.file, isDotsOnly ? null : entry.spec, relDots); + to = typeof res === "string" ? edgeToResolvedFile(res) : edgeToExternal(res.external); + } else if (context.support.id === "java" || context.support.id === "kotlin") { + const packageTargets = await resolveJvmPackageImportPaths(context.projectRoot, entry.spec, context.support.id); + if (packageTargets.length) { + return packageTargets.map((targetPath) => withSpecifierMetadata(entry, edgeToResolvedFile(targetPath))); + } + to = await resolveImportSpecifierEdge(entry, context); + } else if (context.support.id === "go" || context.support.id === "php") { + to = await resolveImportSpecifierEdge(entry, context); + } else if (["csharp", "ruby", "rust"].includes(context.support.id)) { + const { resolvePathLikeModule } = await import("../util.js"); + const pathLike = await resolvePathLikeModule(context.projectRoot, entry.spec); + to = pathLike ? edgeToResolvedFile(pathLike) : await resolveGenericSpecifier(entry, context, resolutionExtensions); + } else { + to = await resolveGenericSpecifier(entry, context, resolutionExtensions); + } + + if (to.type === "external" && entry.dropIfUnresolved) { + return null; + } + return [withSpecifierMetadata(entry, to)]; +} + +export async function collectPhpComposerImplicitEdges(args: { + projectRoot: string; + file: string; + normalizedFile: string; + existingEdges: readonly Edge[]; +}): Promise { + const implicitFiles = await getPhpComposerImplicitFiles(args.projectRoot, args.file); + const seenFileTargets = new Set( + args.existingEdges + .map((edge) => (edge.to.type === "file" ? edge.to.path : null)) + .filter((target): target is string => !!target), + ); + const edges: Edge[] = []; + for (const implicitFile of implicitFiles) { + const normalizedTarget = implicitFile.replace(/\\/g, "/"); + if (normalizedTarget === args.normalizedFile || seenFileTargets.has(normalizedTarget)) { + continue; + } + + const relativeRaw = path.relative(path.dirname(args.file), implicitFile).replace(/\\/g, "/"); + edges.push({ + from: args.normalizedFile, + to: { type: "file", path: normalizedTarget }, + raw: relativeRaw.startsWith(".") || relativeRaw.startsWith("/") ? relativeRaw : `./${relativeRaw}`, + }); + seenFileTargets.add(normalizedTarget); + } + return edges; +} diff --git a/src/graphs/external-classifier.ts b/src/graphs/external-classifier.ts index af0120af..fd941f7f 100644 --- a/src/graphs/external-classifier.ts +++ b/src/graphs/external-classifier.ts @@ -1,6 +1,10 @@ -import fs from "node:fs"; -import path from "node:path"; -import { builtinModules } from "node:module"; +import { declaredPackagesForContext } from "./external/context.js"; +import { isSupportedStdlib, isUrlSpecifier } from "./external/stdlib.js"; + +export { + getExternalClassifierCacheStats, + resetExternalClassifierCaches, +} from "./external/context.js"; export type ExternalResolutionStatus = "declared-package" | "stdlib" | "url" | "unresolved"; @@ -13,654 +17,6 @@ export type ExternalSpecifierClassificationOptions = { projectRoot?: string; }; -type DependencyManifest = { - declaredPackages: Set; - hasManifest: boolean; -}; - -type ExternalClassifierCacheStats = { - dependencyManifests: number; - declaredPackageContexts: number; -}; - -const MAX_EXTERNAL_CLASSIFIER_CACHE_ENTRIES = 512; -const MAX_MANIFEST_ANCESTOR_SEARCH_DEPTH = 64; - -class BoundedCacheMap extends Map { - constructor(private readonly maxEntries: number) { - super(); - } - - override set(key: K, value: V): this { - if (super.has(key)) { - super.delete(key); - } - super.set(key, value); - while (this.size > this.maxEntries) { - const oldest = this.keys().next(); - if (oldest.done) break; - super.delete(oldest.value); - } - return this; - } -} - -const NODE_BUILTIN_MODULES = new Set([ - ...builtinModules, - ...builtinModules.filter((name) => !name.startsWith("node:")).map((name) => `node:${name}`), -]); - -const PYTHON_STDLIB_MODULES = new Set([ - "__future__", - "abc", - "argparse", - "asyncio", - "collections", - "contextlib", - "dataclasses", - "datetime", - "decimal", - "functools", - "itertools", - "json", - "logging", - "math", - "os", - "pathlib", - "re", - "shutil", - "sqlite3", - "statistics", - "string", - "subprocess", - "sys", - "tempfile", - "time", - "typing", - "unittest", - "urllib", -]); - -const RUBY_STDLIB_MODULES = new Set([ - "date", - "digest", - "fileutils", - "json", - "logger", - "pathname", - "set", - "time", - "uri", - "yaml", -]); - -const GO_STDLIB_IMPORTS = new Set([ - "bufio", - "bytes", - "context", - "crypto", - "database", - "encoding", - "errors", - "fmt", - "io", - "log", - "math", - "net", - "net/http", - "os", - "path", - "path/filepath", - "reflect", - "regexp", - "sort", - "strconv", - "strings", - "sync", - "testing", - "time", -]); - -const CPP_STDLIB_HEADERS = new Set([ - "algorithm", - "array", - "chrono", - "cstdint", - "cstdio", - "cstdlib", - "exception", - "filesystem", - "fstream", - "functional", - "iostream", - "map", - "memory", - "optional", - "set", - "sstream", - "stdexcept", - "string", - "string_view", - "tuple", - "type_traits", - "unordered_map", - "unordered_set", - "utility", - "vector", -]); - -const C_STDLIB_HEADERS = new Set([ - "assert.h", - "ctype.h", - "errno.h", - "float.h", - "limits.h", - "math.h", - "setjmp.h", - "signal.h", - "stdarg.h", - "stdbool.h", - "stddef.h", - "stdint.h", - "stdio.h", - "stdlib.h", - "string.h", - "time.h", -]); - -const SWIFT_SDK_MODULES = new Set(["Foundation", "Dispatch", "Darwin", "Glibc", "SwiftUI", "UIKit"]); - -const dependencyManifestCache = new BoundedCacheMap(MAX_EXTERNAL_CLASSIFIER_CACHE_ENTRIES); -const declaredPackagesByContextCache = new BoundedCacheMap>(MAX_EXTERNAL_CLASSIFIER_CACHE_ENTRIES); - -export function resetExternalClassifierCaches(): void { - dependencyManifestCache.clear(); - declaredPackagesByContextCache.clear(); -} - -export function getExternalClassifierCacheStats(): ExternalClassifierCacheStats { - return { - dependencyManifests: dependencyManifestCache.size, - declaredPackageContexts: declaredPackagesByContextCache.size, - }; -} - -function directoryExists(directory: string): boolean { - if (!fs.existsSync(directory)) return false; - try { - return fs.statSync(directory).isDirectory(); - } catch { - return false; - } -} - -function pathExists(filePath: string): boolean { - return fs.existsSync(filePath); -} - -function readText(filePath: string): string | null { - if (!pathExists(filePath)) return null; - try { - return fs.readFileSync(filePath, "utf8"); - } catch { - return null; - } -} - -function readJsonObject(filePath: string): Record | null { - const raw = readText(filePath); - if (raw === null) return null; - try { - const parsed = JSON.parse(raw) as unknown; - if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return null; - return parsed as Record; - } catch { - return null; - } -} - -function readStringRecord(value: unknown): Record { - if (!value || typeof value !== "object" || Array.isArray(value)) return {}; - const entries = Object.entries(value).filter(([, entry]) => typeof entry === "string"); - return Object.fromEntries(entries) as Record; -} - -function addPackageWithSeparators(packageName: string, declaredPackages: Set): void { - const trimmed = packageName.trim(); - if (!trimmed) return; - declaredPackages.add(trimmed); - declaredPackages.add(trimmed.replace(/-/g, "_")); -} - -function addPackageJsonDependencies(filePath: string, declaredPackages: Set): boolean { - const parsed = readJsonObject(filePath); - if (!parsed) return false; - for (const field of ["dependencies", "devDependencies", "peerDependencies", "optionalDependencies"] as const) { - for (const packageName of Object.keys(readStringRecord(parsed[field]))) { - declaredPackages.add(packageName); - } - } - return true; -} - -function addComposerDependencies(filePath: string, declaredPackages: Set): boolean { - const parsed = readJsonObject(filePath); - if (!parsed) return false; - for (const field of ["require", "require-dev"] as const) { - for (const packageName of Object.keys(readStringRecord(parsed[field]))) { - if (packageName !== "php" && !packageName.startsWith("ext-")) { - declaredPackages.add(packageName); - } - } - } - return true; -} - -function addPythonPackageName(rawName: string, declaredPackages: Set): void { - const normalizedName = rawName.trim().split("[")[0]?.toLowerCase(); - if (!normalizedName || normalizedName === "python") return; - addPackageWithSeparators(normalizedName, declaredPackages); -} - -function addRequirementsDependencies(filePath: string, declaredPackages: Set): boolean { - const raw = readText(filePath); - if (raw === null) return false; - for (const line of raw.split(/\r?\n/)) { - const trimmed = line.trim(); - if (!trimmed || trimmed.startsWith("#") || trimmed.startsWith("-")) continue; - const dependency = trimmed.match(/^([A-Za-z0-9_.-]+)/); - if (dependency?.[1]) { - addPythonPackageName(dependency[1], declaredPackages); - } - } - return true; -} - -function addSetupCfgDependencies(filePath: string, declaredPackages: Set): boolean { - const raw = readText(filePath); - if (raw === null) return false; - let inInstallRequires = false; - for (const line of raw.split(/\r?\n/)) { - const trimmed = line.trim(); - if (!trimmed || trimmed.startsWith("#")) continue; - if (/^\[/.test(trimmed)) { - inInstallRequires = false; - continue; - } - if (/^install_requires\s*=/.test(trimmed)) { - inInstallRequires = true; - const inlineValue = trimmed.split("=").slice(1).join("=").trim(); - if (inlineValue) { - const packageName = inlineValue.match(/^([A-Za-z0-9_.-]+)/)?.[1]; - if (packageName) addPythonPackageName(packageName, declaredPackages); - } - continue; - } - if (inInstallRequires) { - const packageName = trimmed.match(/^([A-Za-z0-9_.-]+)/)?.[1]; - if (packageName) addPythonPackageName(packageName, declaredPackages); - } - } - return true; -} - -function addPipfileDependencies(filePath: string, declaredPackages: Set): boolean { - const raw = readText(filePath); - if (raw === null) return false; - let inPackageSection = false; - for (const line of raw.split(/\r?\n/)) { - const trimmed = line.trim(); - if (!trimmed || trimmed.startsWith("#")) continue; - const section = trimmed.match(/^\[([^\]]+)\]$/); - if (section) { - inPackageSection = ["packages", "dev-packages"].includes(section[1] ?? ""); - continue; - } - if (!inPackageSection) continue; - const dependency = trimmed.match(/^["']?([A-Za-z0-9_.-]+)["']?\s*=/); - if (dependency?.[1]) { - addPythonPackageName(dependency[1], declaredPackages); - } - } - return true; -} - -function addPyprojectDependencies(filePath: string, declaredPackages: Set): boolean { - const raw = readText(filePath); - if (raw === null) return false; - const dependencyArrays = raw.matchAll(/(?:^|\n)\s*[A-Za-z0-9_-]*dependencies\s*=\s*\[([\s\S]*?)\]/g); - for (const dependencyArray of dependencyArrays) { - for (const dependency of (dependencyArray[1] ?? "").matchAll(/["']([^"']+)["']/g)) { - const packageName = dependency[1]?.match(/^([A-Za-z0-9_.-]+)/)?.[1]; - if (packageName) { - addPythonPackageName(packageName, declaredPackages); - } - } - } - - let inPoetryDependencySection = false; - for (const line of raw.split(/\r?\n/)) { - const trimmed = line.trim(); - const section = trimmed.match(/^\[([^\]]+)\]$/); - if (section) { - inPoetryDependencySection = - section[1] === "tool.poetry.dependencies" || - /^tool\.poetry\.group\.[^.]+\.dependencies$/.test(section[1] ?? ""); - continue; - } - if (!inPoetryDependencySection || !trimmed || trimmed.startsWith("#")) continue; - const dependency = trimmed.match(/^["']?([A-Za-z0-9_.-]+)["']?\s*=/); - if (dependency?.[1]) { - addPythonPackageName(dependency[1], declaredPackages); - } - } - return true; -} - -function addCargoDependencies(filePath: string, declaredPackages: Set): boolean { - const raw = readText(filePath); - if (raw === null) return false; - let inDependencySection = false; - for (const line of raw.split(/\r?\n/)) { - const trimmed = line.trim(); - if (!trimmed || trimmed.startsWith("#")) continue; - const section = trimmed.match(/^\[([^\]]+)\]$/); - if (section) { - inDependencySection = /(^|\.)(dependencies|dev-dependencies|build-dependencies)$/.test(section[1] ?? ""); - continue; - } - if (!inDependencySection) continue; - const dependency = trimmed.match(/^([A-Za-z0-9_-]+)\s*=/); - if (dependency?.[1]) { - addPackageWithSeparators(dependency[1], declaredPackages); - } - } - return true; -} - -function addGoDependencies(filePath: string, declaredPackages: Set): boolean { - const raw = readText(filePath); - if (raw === null) return false; - let inRequireBlock = false; - for (const line of raw.split(/\r?\n/)) { - const trimmed = line.trim(); - if (!trimmed || trimmed.startsWith("//")) continue; - if (trimmed === "require (") { - inRequireBlock = true; - continue; - } - if (inRequireBlock) { - if (trimmed === ")") { - inRequireBlock = false; - continue; - } - const blockRequireMatch = trimmed.match(/^([^\s]+)\s+/); - if (blockRequireMatch?.[1]) declaredPackages.add(blockRequireMatch[1]); - continue; - } - const requireMatch = trimmed.match(/^require\s+([^\s]+)\s+/); - if (requireMatch?.[1]) declaredPackages.add(requireMatch[1]); - const moduleMatch = trimmed.match(/^module\s+([^\s]+)/); - if (moduleMatch?.[1]) declaredPackages.add(moduleMatch[1]); - } - return true; -} - -function countMatches(value: string, pattern: RegExp): number { - return value.match(pattern)?.length ?? 0; -} - -function addZigDependencies(filePath: string, declaredPackages: Set): boolean { - const raw = readText(filePath); - if (raw === null) return false; - let inDependencySection = false; - let dependencyDepth = 0; - for (const line of raw.split(/\r?\n/)) { - const trimmed = line.trim(); - if (!inDependencySection && trimmed.startsWith(".dependencies") && trimmed.includes(".{")) { - inDependencySection = true; - dependencyDepth = countMatches(trimmed, /\{/g) - countMatches(trimmed, /\}/g); - continue; - } - if (!inDependencySection) continue; - if (dependencyDepth === 1) { - const dependency = trimmed.match(/^\.((?:[A-Za-z0-9_]+)|(?:"[^"]+"))\s*=/); - if (dependency?.[1]) declaredPackages.add(dependency[1].replace(/^"|"$/g, "")); - } - dependencyDepth += countMatches(trimmed, /\{/g) - countMatches(trimmed, /\}/g); - if (dependencyDepth <= 0) { - inDependencySection = false; - } - } - return true; -} - -function addGemfileDependencies(filePath: string, declaredPackages: Set): boolean { - const raw = readText(filePath); - if (raw === null) return false; - for (const line of raw.split(/\r?\n/)) { - const dependency = line.match(/^\s*gem\s+["']([^"']+)["']/); - if (dependency?.[1]) { - declaredPackages.add(dependency[1]); - } - } - return true; -} - -function addGemspecDependencies(filePath: string, declaredPackages: Set): boolean { - const raw = readText(filePath); - if (raw === null) return false; - for (const dependency of raw.matchAll(/add(?:_runtime)?_dependency|add_development_dependency/g)) { - if (dependency.index === undefined) continue; - const afterMatch = raw.slice(dependency.index); - const packageName = afterMatch.match(/["']([^"']+)["']/)?.[1]; - if (packageName) declaredPackages.add(packageName); - } - return true; -} - -function addMavenDependencies(filePath: string, declaredPackages: Set): boolean { - const raw = readText(filePath); - if (raw === null) return false; - for (const dependencyBlock of raw.matchAll(/([\s\S]*?)<\/dependency>/g)) { - const groupId = dependencyBlock[1]?.match(/\s*([^<\s]+)\s*<\/groupId>/)?.[1]; - const artifactId = dependencyBlock[1]?.match(/\s*([^<\s]+)\s*<\/artifactId>/)?.[1]; - if (groupId) declaredPackages.add(groupId); - if (groupId && artifactId) declaredPackages.add(`${groupId}.${artifactId}`); - if (artifactId) declaredPackages.add(artifactId); - } - return true; -} - -function addGradleDependencies(filePath: string, declaredPackages: Set): boolean { - const raw = readText(filePath); - if (raw === null) return false; - for (const dependency of raw.matchAll(/["']([^:"']+):([^:"']+):[^"']+["']/g)) { - const groupId = dependency[1]; - const artifactId = dependency[2]; - if (groupId) declaredPackages.add(groupId); - if (groupId && artifactId) declaredPackages.add(`${groupId}.${artifactId}`); - if (artifactId) declaredPackages.add(artifactId); - } - return true; -} - -function addDotnetDependencies(filePath: string, declaredPackages: Set): boolean { - const raw = readText(filePath); - if (raw === null) return false; - for (const dependency of raw.matchAll(/]*\bInclude=["']([^"']+)["']/g)) { - if (dependency[1]) declaredPackages.add(dependency[1]); - } - return true; -} - -function addVcpkgDependencies(filePath: string, declaredPackages: Set): boolean { - const parsed = readJsonObject(filePath); - if (!parsed) return false; - const dependencies = parsed.dependencies; - if (!Array.isArray(dependencies)) return true; - for (const dependency of dependencies) { - if (typeof dependency === "string") { - declaredPackages.add(dependency); - } else if ( - dependency && - typeof dependency === "object" && - !Array.isArray(dependency) && - typeof dependency.name === "string" - ) { - declaredPackages.add(dependency.name); - } - } - return true; -} - -function addSwiftPackageDependencies(filePath: string, declaredPackages: Set): boolean { - const raw = readText(filePath); - if (raw === null) return false; - for (const dependency of raw.matchAll(/\.package\s*\([^)]*\bname:\s*"([^"]+)"/g)) { - if (dependency[1]) declaredPackages.add(dependency[1]); - } - for (const product of raw.matchAll(/\.product\s*\([^)]*\bname:\s*"([^"]+)"/g)) { - if (product[1]) declaredPackages.add(product[1]); - } - return true; -} - -function addGemspecs(directory: string, declaredPackages: Set): boolean { - if (!directoryExists(directory)) return false; - let found = false; - try { - for (const dirent of fs.readdirSync(directory, { withFileTypes: true })) { - if (!dirent.isFile() || !dirent.name.endsWith(".gemspec")) continue; - found = addGemspecDependencies(path.join(directory, dirent.name), declaredPackages) || found; - } - } catch { - return false; - } - return found; -} - -function addDotnetProjectFiles(directory: string, declaredPackages: Set): boolean { - if (!directoryExists(directory)) return false; - let found = false; - try { - for (const dirent of fs.readdirSync(directory, { withFileTypes: true })) { - if (!dirent.isFile() || !/\.(?:csproj|fsproj|vbproj)$/i.test(dirent.name)) continue; - found = addDotnetDependencies(path.join(directory, dirent.name), declaredPackages) || found; - } - } catch { - return false; - } - return found; -} - -function dependencyManifestForDirectory(directory: string): DependencyManifest { - const resolvedRoot = path.resolve(directory); - const cached = dependencyManifestCache.get(resolvedRoot); - if (cached) return cached; - - const declaredPackages = new Set(); - let hasManifest = false; - hasManifest = addPackageJsonDependencies(path.join(resolvedRoot, "package.json"), declaredPackages) || hasManifest; - hasManifest = - addRequirementsDependencies(path.join(resolvedRoot, "requirements.txt"), declaredPackages) || hasManifest; - hasManifest = - addRequirementsDependencies(path.join(resolvedRoot, "requirements.in"), declaredPackages) || hasManifest; - hasManifest = addPyprojectDependencies(path.join(resolvedRoot, "pyproject.toml"), declaredPackages) || hasManifest; - hasManifest = addSetupCfgDependencies(path.join(resolvedRoot, "setup.cfg"), declaredPackages) || hasManifest; - hasManifest = addPipfileDependencies(path.join(resolvedRoot, "Pipfile"), declaredPackages) || hasManifest; - hasManifest = addComposerDependencies(path.join(resolvedRoot, "composer.json"), declaredPackages) || hasManifest; - hasManifest = addCargoDependencies(path.join(resolvedRoot, "Cargo.toml"), declaredPackages) || hasManifest; - hasManifest = addGoDependencies(path.join(resolvedRoot, "go.mod"), declaredPackages) || hasManifest; - hasManifest = addZigDependencies(path.join(resolvedRoot, "build.zig.zon"), declaredPackages) || hasManifest; - hasManifest = addGemfileDependencies(path.join(resolvedRoot, "Gemfile"), declaredPackages) || hasManifest; - hasManifest = addGemspecs(resolvedRoot, declaredPackages) || hasManifest; - hasManifest = addMavenDependencies(path.join(resolvedRoot, "pom.xml"), declaredPackages) || hasManifest; - hasManifest = addGradleDependencies(path.join(resolvedRoot, "build.gradle"), declaredPackages) || hasManifest; - hasManifest = addGradleDependencies(path.join(resolvedRoot, "build.gradle.kts"), declaredPackages) || hasManifest; - hasManifest = addDotnetProjectFiles(resolvedRoot, declaredPackages) || hasManifest; - hasManifest = addVcpkgDependencies(path.join(resolvedRoot, "vcpkg.json"), declaredPackages) || hasManifest; - hasManifest = addSwiftPackageDependencies(path.join(resolvedRoot, "Package.swift"), declaredPackages) || hasManifest; - - const manifest = { declaredPackages, hasManifest }; - dependencyManifestCache.set(resolvedRoot, manifest); - return manifest; -} - -function parentDirectory(directory: string): string | null { - const parent = path.dirname(directory); - return parent === directory ? null : parent; -} - -function nearestVcsAncestor(startDirectory: string): string | null { - let current: string | null = path.resolve(startDirectory); - while (current) { - if (pathExists(path.join(current, ".git"))) return current; - current = parentDirectory(current); - } - return null; -} - -function nearestManifestAncestor(startDirectory: string, stopDirectory: string | null): string | null { - let depth = 0; - let current: string | null = path.resolve(startDirectory); - const resolvedStop = stopDirectory ? path.resolve(stopDirectory) : null; - while (current && depth <= MAX_MANIFEST_ANCESTOR_SEARCH_DEPTH) { - if (dependencyManifestForDirectory(current).hasManifest) return current; - if (resolvedStop && current === resolvedStop) break; - current = parentDirectory(current); - depth += 1; - } - return null; -} - -function isSameOrInside(directory: string, possibleAncestor: string): boolean { - const relative = path.relative(possibleAncestor, directory); - return relative === "" || (!relative.startsWith("..") && !path.isAbsolute(relative)); -} - -function declaredPackagesFromAncestors(startDirectory: string, stopDirectory: string): Set { - const resolvedStart = path.resolve(startDirectory); - const resolvedStop = path.resolve(stopDirectory); - const cacheKey = `${resolvedStart}\0${resolvedStop}`; - const cached = declaredPackagesByContextCache.get(cacheKey); - if (cached) return cached; - - const declaredPackages = new Set(); - let current: string | null = resolvedStart; - while (current && isSameOrInside(current, resolvedStop)) { - for (const packageName of dependencyManifestForDirectory(current).declaredPackages) { - declaredPackages.add(packageName); - } - if (current === resolvedStop) break; - current = parentDirectory(current); - } - - declaredPackagesByContextCache.set(cacheKey, declaredPackages); - return declaredPackages; -} - -function importerDirectoryForFile(importerFile: string): string { - if (directoryExists(importerFile)) return importerFile; - return path.dirname(importerFile); -} - -function declaredPackagesForContext(importerFile: string, projectRoot: string | undefined): Set { - const importerDirectory = path.resolve(importerDirectoryForFile(importerFile)); - const ancestorSearchStart = path.resolve(projectRoot ?? importerDirectory); - const vcsBoundary = nearestVcsAncestor(ancestorSearchStart); - const boundary = - nearestManifestAncestor(ancestorSearchStart, vcsBoundary) ?? path.resolve(projectRoot ?? importerDirectory); - if (!isSameOrInside(importerDirectory, boundary)) { - return dependencyManifestForDirectory(boundary).declaredPackages; - } - return declaredPackagesFromAncestors(importerDirectory, boundary); -} - -function isUrlSpecifier(specifier: string): boolean { - return /^[a-z][a-z0-9+.-]*:\/\//i.test(specifier) || specifier.startsWith("data:"); -} - function packageNameForSpecifier(specifier: string): string { if (specifier.startsWith("@")) { const [scope, name] = specifier.split("/"); @@ -676,43 +32,6 @@ function isDeclaredPackageSpecifier(specifier: string, declaredPackage: string): return packageNameForSpecifier(specifier) === declaredPackage; } -function extensionForFile(filePath: string): string { - return path.extname(filePath).toLowerCase(); -} - -function isSupportedStdlib(specifier: string, importerFile: string): boolean { - const ext = extensionForFile(importerFile); - const firstSegment = specifier.split(/[.:/]/)[0] ?? specifier; - if (NODE_BUILTIN_MODULES.has(specifier)) return true; - if ([".py", ".pyw"].includes(ext)) return PYTHON_STDLIB_MODULES.has(firstSegment); - if ([".rb"].includes(ext)) return RUBY_STDLIB_MODULES.has(specifier) || RUBY_STDLIB_MODULES.has(firstSegment); - if (ext === ".zig") return specifier === "std"; - if (ext === ".go") return GO_STDLIB_IMPORTS.has(specifier) || GO_STDLIB_IMPORTS.has(firstSegment); - if (ext === ".rs") - return ( - specifier === "std" || - specifier.startsWith("std::") || - specifier.startsWith("core::") || - specifier.startsWith("alloc::") - ); - if ([".java"].includes(ext)) - return ( - specifier.startsWith("java.") || - specifier.startsWith("javax.") || - specifier.startsWith("org.w3c.") || - specifier.startsWith("org.xml.") - ); - if ([".kt", ".kts"].includes(ext)) return specifier === "kotlin" || specifier.startsWith("kotlin."); - if (ext === ".cs") - return specifier === "System" || specifier.startsWith("System.") || specifier.startsWith("Microsoft."); - if (ext === ".swift") return SWIFT_SDK_MODULES.has(firstSegment); - if ([".c", ".h", ".i"].includes(ext)) return C_STDLIB_HEADERS.has(specifier); - if ([".cc", ".cpp", ".cxx", ".c++", ".hpp", ".hh", ".hxx", ".ipp", ".tpp", ".inl"].includes(ext)) { - return CPP_STDLIB_HEADERS.has(specifier) || C_STDLIB_HEADERS.has(specifier); - } - return false; -} - function isDeclaredPackage(specifier: string, importerFile: string, projectRoot: string | undefined): boolean { for (const declaredPackage of declaredPackagesForContext(importerFile, projectRoot)) { if (isDeclaredPackageSpecifier(specifier, declaredPackage)) { diff --git a/src/graphs/external/cache.ts b/src/graphs/external/cache.ts new file mode 100644 index 00000000..f3863992 --- /dev/null +++ b/src/graphs/external/cache.ts @@ -0,0 +1,18 @@ +export class BoundedCacheMap extends Map { + constructor(private readonly maxEntries: number) { + super(); + } + + override set(key: K, value: V): this { + if (super.has(key)) { + super.delete(key); + } + super.set(key, value); + while (this.size > this.maxEntries) { + const oldest = this.keys().next(); + if (oldest.done) break; + super.delete(oldest.value); + } + return this; + } +} diff --git a/src/graphs/external/context.ts b/src/graphs/external/context.ts new file mode 100644 index 00000000..e67bec93 --- /dev/null +++ b/src/graphs/external/context.ts @@ -0,0 +1,110 @@ +import path from "node:path"; +import { BoundedCacheMap } from "./cache.js"; +import { + createDependencyManifestForDirectory, + directoryExists, + pathExists, + type DependencyManifest, +} from "./manifests.js"; + +type ExternalClassifierCacheStats = { + dependencyManifests: number; + declaredPackageContexts: number; +}; + +const MAX_EXTERNAL_CLASSIFIER_CACHE_ENTRIES = 512; +const MAX_MANIFEST_ANCESTOR_SEARCH_DEPTH = 64; + +const dependencyManifestCache = new BoundedCacheMap(MAX_EXTERNAL_CLASSIFIER_CACHE_ENTRIES); +const declaredPackagesByContextCache = new BoundedCacheMap>(MAX_EXTERNAL_CLASSIFIER_CACHE_ENTRIES); + +export function resetExternalClassifierCaches(): void { + dependencyManifestCache.clear(); + declaredPackagesByContextCache.clear(); +} + +export function getExternalClassifierCacheStats(): ExternalClassifierCacheStats { + return { + dependencyManifests: dependencyManifestCache.size, + declaredPackageContexts: declaredPackagesByContextCache.size, + }; +} + +function dependencyManifestForDirectory(directory: string): DependencyManifest { + const resolvedRoot = path.resolve(directory); + const cached = dependencyManifestCache.get(resolvedRoot); + if (cached) return cached; + const manifest = createDependencyManifestForDirectory(resolvedRoot); + dependencyManifestCache.set(resolvedRoot, manifest); + return manifest; +} + +function parentDirectory(directory: string): string | null { + const parent = path.dirname(directory); + return parent === directory ? null : parent; +} + +function nearestVcsAncestor(startDirectory: string): string | null { + let current: string | null = path.resolve(startDirectory); + while (current) { + if (pathExists(path.join(current, ".git"))) return current; + current = parentDirectory(current); + } + return null; +} + +function nearestManifestAncestor(startDirectory: string, stopDirectory: string | null): string | null { + let depth = 0; + let current: string | null = path.resolve(startDirectory); + const resolvedStop = stopDirectory ? path.resolve(stopDirectory) : null; + while (current && depth <= MAX_MANIFEST_ANCESTOR_SEARCH_DEPTH) { + if (dependencyManifestForDirectory(current).hasManifest) return current; + if (resolvedStop && current === resolvedStop) break; + current = parentDirectory(current); + depth += 1; + } + return null; +} + +function isSameOrInside(directory: string, possibleAncestor: string): boolean { + const relative = path.relative(possibleAncestor, directory); + return relative === "" || (!relative.startsWith("..") && !path.isAbsolute(relative)); +} + +function declaredPackagesFromAncestors(startDirectory: string, stopDirectory: string): Set { + const resolvedStart = path.resolve(startDirectory); + const resolvedStop = path.resolve(stopDirectory); + const cacheKey = `${resolvedStart}\0${resolvedStop}`; + const cached = declaredPackagesByContextCache.get(cacheKey); + if (cached) return cached; + + const declaredPackages = new Set(); + let current: string | null = resolvedStart; + while (current && isSameOrInside(current, resolvedStop)) { + for (const packageName of dependencyManifestForDirectory(current).declaredPackages) { + declaredPackages.add(packageName); + } + if (current === resolvedStop) break; + current = parentDirectory(current); + } + + declaredPackagesByContextCache.set(cacheKey, declaredPackages); + return declaredPackages; +} + +function importerDirectoryForFile(importerFile: string): string { + if (directoryExists(importerFile)) return importerFile; + return path.dirname(importerFile); +} + +export function declaredPackagesForContext(importerFile: string, projectRoot: string | undefined): Set { + const importerDirectory = path.resolve(importerDirectoryForFile(importerFile)); + const ancestorSearchStart = path.resolve(projectRoot ?? importerDirectory); + const vcsBoundary = nearestVcsAncestor(ancestorSearchStart); + const boundary = + nearestManifestAncestor(ancestorSearchStart, vcsBoundary) ?? path.resolve(projectRoot ?? importerDirectory); + if (!isSameOrInside(importerDirectory, boundary)) { + return dependencyManifestForDirectory(boundary).declaredPackages; + } + return declaredPackagesFromAncestors(importerDirectory, boundary); +} diff --git a/src/graphs/external/manifests.ts b/src/graphs/external/manifests.ts new file mode 100644 index 00000000..c9714b31 --- /dev/null +++ b/src/graphs/external/manifests.ts @@ -0,0 +1,402 @@ +import fs from "node:fs"; +import path from "node:path"; + +export type DependencyManifest = { + declaredPackages: Set; + hasManifest: boolean; +}; + +export function directoryExists(directory: string): boolean { + if (!fs.existsSync(directory)) return false; + try { + return fs.statSync(directory).isDirectory(); + } catch { + return false; + } +} + +export function pathExists(filePath: string): boolean { + return fs.existsSync(filePath); +} + +function readText(filePath: string): string | null { + if (!pathExists(filePath)) return null; + try { + return fs.readFileSync(filePath, "utf8"); + } catch { + return null; + } +} + +function readJsonObject(filePath: string): Record | null { + const raw = readText(filePath); + if (raw === null) return null; + try { + const parsed: unknown = JSON.parse(raw); + if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return null; + return parsed as Record; + } catch { + return null; + } +} + +function readStringRecord(value: unknown): Record { + if (!value || typeof value !== "object" || Array.isArray(value)) return {}; + const entries = Object.entries(value).filter(([, entry]) => typeof entry === "string"); + return Object.fromEntries(entries) as Record; +} + +function addPackageWithSeparators(packageName: string, declaredPackages: Set): void { + const trimmed = packageName.trim(); + if (!trimmed) return; + declaredPackages.add(trimmed); + declaredPackages.add(trimmed.replace(/-/g, "_")); +} + +function addPackageJsonDependencies(filePath: string, declaredPackages: Set): boolean { + const parsed = readJsonObject(filePath); + if (!parsed) return false; + for (const field of ["dependencies", "devDependencies", "peerDependencies", "optionalDependencies"] as const) { + for (const packageName of Object.keys(readStringRecord(parsed[field]))) { + declaredPackages.add(packageName); + } + } + return true; +} + +function addComposerDependencies(filePath: string, declaredPackages: Set): boolean { + const parsed = readJsonObject(filePath); + if (!parsed) return false; + for (const field of ["require", "require-dev"] as const) { + for (const packageName of Object.keys(readStringRecord(parsed[field]))) { + if (packageName !== "php" && !packageName.startsWith("ext-")) { + declaredPackages.add(packageName); + } + } + } + return true; +} + +function addPythonPackageName(rawName: string, declaredPackages: Set): void { + const normalizedName = rawName.trim().split("[")[0]?.toLowerCase(); + if (!normalizedName || normalizedName === "python") return; + addPackageWithSeparators(normalizedName, declaredPackages); +} + +function addRequirementsDependencies(filePath: string, declaredPackages: Set): boolean { + const raw = readText(filePath); + if (raw === null) return false; + for (const line of raw.split(/\r?\n/)) { + const trimmed = line.trim(); + if (!trimmed || trimmed.startsWith("#") || trimmed.startsWith("-")) continue; + const dependency = trimmed.match(/^([A-Za-z0-9_.-]+)/); + if (dependency?.[1]) { + addPythonPackageName(dependency[1], declaredPackages); + } + } + return true; +} + +function addSetupCfgDependencies(filePath: string, declaredPackages: Set): boolean { + const raw = readText(filePath); + if (raw === null) return false; + let inInstallRequires = false; + for (const line of raw.split(/\r?\n/)) { + const trimmed = line.trim(); + if (!trimmed || trimmed.startsWith("#")) continue; + if (/^\[/.test(trimmed)) { + inInstallRequires = false; + continue; + } + if (/^install_requires\s*=/.test(trimmed)) { + inInstallRequires = true; + const inlineValue = trimmed.split("=").slice(1).join("=").trim(); + if (inlineValue) { + const packageName = inlineValue.match(/^([A-Za-z0-9_.-]+)/)?.[1]; + if (packageName) addPythonPackageName(packageName, declaredPackages); + } + continue; + } + if (inInstallRequires) { + const packageName = trimmed.match(/^([A-Za-z0-9_.-]+)/)?.[1]; + if (packageName) addPythonPackageName(packageName, declaredPackages); + } + } + return true; +} + +function addPipfileDependencies(filePath: string, declaredPackages: Set): boolean { + const raw = readText(filePath); + if (raw === null) return false; + let inPackageSection = false; + for (const line of raw.split(/\r?\n/)) { + const trimmed = line.trim(); + if (!trimmed || trimmed.startsWith("#")) continue; + const section = trimmed.match(/^\[([^\]]+)\]$/); + if (section) { + inPackageSection = ["packages", "dev-packages"].includes(section[1] ?? ""); + continue; + } + if (!inPackageSection) continue; + const dependency = trimmed.match(/^["']?([A-Za-z0-9_.-]+)["']?\s*=/); + if (dependency?.[1]) { + addPythonPackageName(dependency[1], declaredPackages); + } + } + return true; +} + +function addPyprojectDependencies(filePath: string, declaredPackages: Set): boolean { + const raw = readText(filePath); + if (raw === null) return false; + const dependencyArrays = raw.matchAll(/(?:^|\n)\s*[A-Za-z0-9_-]*dependencies\s*=\s*\[([\s\S]*?)\]/g); + for (const dependencyArray of dependencyArrays) { + for (const dependency of (dependencyArray[1] ?? "").matchAll(/["']([^"']+)["']/g)) { + const packageName = dependency[1]?.match(/^([A-Za-z0-9_.-]+)/)?.[1]; + if (packageName) { + addPythonPackageName(packageName, declaredPackages); + } + } + } + + let inPoetryDependencySection = false; + for (const line of raw.split(/\r?\n/)) { + const trimmed = line.trim(); + const section = trimmed.match(/^\[([^\]]+)\]$/); + if (section) { + inPoetryDependencySection = + section[1] === "tool.poetry.dependencies" || + /^tool\.poetry\.group\.[^.]+\.dependencies$/.test(section[1] ?? ""); + continue; + } + if (!inPoetryDependencySection || !trimmed || trimmed.startsWith("#")) continue; + const dependency = trimmed.match(/^["']?([A-Za-z0-9_.-]+)["']?\s*=/); + if (dependency?.[1]) { + addPythonPackageName(dependency[1], declaredPackages); + } + } + return true; +} + +function addCargoDependencies(filePath: string, declaredPackages: Set): boolean { + const raw = readText(filePath); + if (raw === null) return false; + let inDependencySection = false; + for (const line of raw.split(/\r?\n/)) { + const trimmed = line.trim(); + if (!trimmed || trimmed.startsWith("#")) continue; + const section = trimmed.match(/^\[([^\]]+)\]$/); + if (section) { + inDependencySection = /(^|\.)(dependencies|dev-dependencies|build-dependencies)$/.test(section[1] ?? ""); + continue; + } + if (!inDependencySection) continue; + const dependency = trimmed.match(/^([A-Za-z0-9_-]+)\s*=/); + if (dependency?.[1]) { + addPackageWithSeparators(dependency[1], declaredPackages); + } + } + return true; +} + +function addGoDependencies(filePath: string, declaredPackages: Set): boolean { + const raw = readText(filePath); + if (raw === null) return false; + let inRequireBlock = false; + for (const line of raw.split(/\r?\n/)) { + const trimmed = line.trim(); + if (!trimmed || trimmed.startsWith("//")) continue; + if (trimmed === "require (") { + inRequireBlock = true; + continue; + } + if (inRequireBlock) { + if (trimmed === ")") { + inRequireBlock = false; + continue; + } + const blockRequireMatch = trimmed.match(/^([^\s]+)\s+/); + if (blockRequireMatch?.[1]) declaredPackages.add(blockRequireMatch[1]); + continue; + } + const requireMatch = trimmed.match(/^require\s+([^\s]+)\s+/); + if (requireMatch?.[1]) declaredPackages.add(requireMatch[1]); + const moduleMatch = trimmed.match(/^module\s+([^\s]+)/); + if (moduleMatch?.[1]) declaredPackages.add(moduleMatch[1]); + } + return true; +} + +function countMatches(value: string, pattern: RegExp): number { + return value.match(pattern)?.length ?? 0; +} + +function addZigDependencies(filePath: string, declaredPackages: Set): boolean { + const raw = readText(filePath); + if (raw === null) return false; + let inDependencySection = false; + let dependencyDepth = 0; + for (const line of raw.split(/\r?\n/)) { + const trimmed = line.trim(); + if (!inDependencySection && trimmed.startsWith(".dependencies") && trimmed.includes(".{")) { + inDependencySection = true; + dependencyDepth = countMatches(trimmed, /\{/g) - countMatches(trimmed, /\}/g); + continue; + } + if (!inDependencySection) continue; + if (dependencyDepth === 1) { + const dependency = trimmed.match(/^\.((?:[A-Za-z0-9_]+)|(?:"[^"]+"))\s*=/); + if (dependency?.[1]) declaredPackages.add(dependency[1].replace(/^"|"$/g, "")); + } + dependencyDepth += countMatches(trimmed, /\{/g) - countMatches(trimmed, /\}/g); + if (dependencyDepth <= 0) { + inDependencySection = false; + } + } + return true; +} + +function addGemfileDependencies(filePath: string, declaredPackages: Set): boolean { + const raw = readText(filePath); + if (raw === null) return false; + for (const line of raw.split(/\r?\n/)) { + const dependency = line.match(/^\s*gem\s+["']([^"']+)["']/); + if (dependency?.[1]) { + declaredPackages.add(dependency[1]); + } + } + return true; +} + +function addGemspecDependencies(filePath: string, declaredPackages: Set): boolean { + const raw = readText(filePath); + if (raw === null) return false; + for (const dependency of raw.matchAll(/add(?:_runtime)?_dependency|add_development_dependency/g)) { + if (dependency.index === undefined) continue; + const afterMatch = raw.slice(dependency.index); + const packageName = afterMatch.match(/["']([^"']+)["']/)?.[1]; + if (packageName) declaredPackages.add(packageName); + } + return true; +} + +function addMavenDependencies(filePath: string, declaredPackages: Set): boolean { + const raw = readText(filePath); + if (raw === null) return false; + for (const dependencyBlock of raw.matchAll(/([\s\S]*?)<\/dependency>/g)) { + const groupId = dependencyBlock[1]?.match(/\s*([^<\s]+)\s*<\/groupId>/)?.[1]; + const artifactId = dependencyBlock[1]?.match(/\s*([^<\s]+)\s*<\/artifactId>/)?.[1]; + if (groupId) declaredPackages.add(groupId); + if (groupId && artifactId) declaredPackages.add(`${groupId}.${artifactId}`); + if (artifactId) declaredPackages.add(artifactId); + } + return true; +} + +function addGradleDependencies(filePath: string, declaredPackages: Set): boolean { + const raw = readText(filePath); + if (raw === null) return false; + for (const dependency of raw.matchAll(/["']([^:"']+):([^:"']+):[^"']+["']/g)) { + const groupId = dependency[1]; + const artifactId = dependency[2]; + if (groupId) declaredPackages.add(groupId); + if (groupId && artifactId) declaredPackages.add(`${groupId}.${artifactId}`); + if (artifactId) declaredPackages.add(artifactId); + } + return true; +} + +function addDotnetDependencies(filePath: string, declaredPackages: Set): boolean { + const raw = readText(filePath); + if (raw === null) return false; + for (const dependency of raw.matchAll(/]*\bInclude=["']([^"']+)["']/g)) { + if (dependency[1]) declaredPackages.add(dependency[1]); + } + return true; +} + +function addVcpkgDependencies(filePath: string, declaredPackages: Set): boolean { + const parsed = readJsonObject(filePath); + if (!parsed) return false; + const dependencies = parsed.dependencies; + if (!Array.isArray(dependencies)) return true; + for (const dependency of dependencies) { + if (typeof dependency === "string") { + declaredPackages.add(dependency); + } else if ( + dependency && + typeof dependency === "object" && + !Array.isArray(dependency) && + typeof dependency.name === "string" + ) { + declaredPackages.add(dependency.name); + } + } + return true; +} + +function addSwiftPackageDependencies(filePath: string, declaredPackages: Set): boolean { + const raw = readText(filePath); + if (raw === null) return false; + for (const dependency of raw.matchAll(/\.package\s*\([^)]*\bname:\s*"([^"]+)"/g)) { + if (dependency[1]) declaredPackages.add(dependency[1]); + } + for (const product of raw.matchAll(/\.product\s*\([^)]*\bname:\s*"([^"]+)"/g)) { + if (product[1]) declaredPackages.add(product[1]); + } + return true; +} + +function addGemspecs(directory: string, declaredPackages: Set): boolean { + if (!directoryExists(directory)) return false; + let found = false; + try { + for (const dirent of fs.readdirSync(directory, { withFileTypes: true })) { + if (!dirent.isFile() || !dirent.name.endsWith(".gemspec")) continue; + found = addGemspecDependencies(path.join(directory, dirent.name), declaredPackages) || found; + } + } catch { + return false; + } + return found; +} + +function addDotnetProjectFiles(directory: string, declaredPackages: Set): boolean { + if (!directoryExists(directory)) return false; + let found = false; + try { + for (const dirent of fs.readdirSync(directory, { withFileTypes: true })) { + if (!dirent.isFile() || !/\.(?:csproj|fsproj|vbproj)$/i.test(dirent.name)) continue; + found = addDotnetDependencies(path.join(directory, dirent.name), declaredPackages) || found; + } + } catch { + return false; + } + return found; +} + +export function createDependencyManifestForDirectory(directory: string): DependencyManifest { + const resolvedRoot = path.resolve(directory); + const declaredPackages = new Set(); + let hasManifest = false; + hasManifest = addPackageJsonDependencies(path.join(resolvedRoot, "package.json"), declaredPackages) || hasManifest; + hasManifest = addRequirementsDependencies(path.join(resolvedRoot, "requirements.txt"), declaredPackages) || hasManifest; + hasManifest = addRequirementsDependencies(path.join(resolvedRoot, "requirements.in"), declaredPackages) || hasManifest; + hasManifest = addPyprojectDependencies(path.join(resolvedRoot, "pyproject.toml"), declaredPackages) || hasManifest; + hasManifest = addSetupCfgDependencies(path.join(resolvedRoot, "setup.cfg"), declaredPackages) || hasManifest; + hasManifest = addPipfileDependencies(path.join(resolvedRoot, "Pipfile"), declaredPackages) || hasManifest; + hasManifest = addComposerDependencies(path.join(resolvedRoot, "composer.json"), declaredPackages) || hasManifest; + hasManifest = addCargoDependencies(path.join(resolvedRoot, "Cargo.toml"), declaredPackages) || hasManifest; + hasManifest = addGoDependencies(path.join(resolvedRoot, "go.mod"), declaredPackages) || hasManifest; + hasManifest = addZigDependencies(path.join(resolvedRoot, "build.zig.zon"), declaredPackages) || hasManifest; + hasManifest = addGemfileDependencies(path.join(resolvedRoot, "Gemfile"), declaredPackages) || hasManifest; + hasManifest = addGemspecs(resolvedRoot, declaredPackages) || hasManifest; + hasManifest = addMavenDependencies(path.join(resolvedRoot, "pom.xml"), declaredPackages) || hasManifest; + hasManifest = addGradleDependencies(path.join(resolvedRoot, "build.gradle"), declaredPackages) || hasManifest; + hasManifest = addGradleDependencies(path.join(resolvedRoot, "build.gradle.kts"), declaredPackages) || hasManifest; + hasManifest = addDotnetProjectFiles(resolvedRoot, declaredPackages) || hasManifest; + hasManifest = addVcpkgDependencies(path.join(resolvedRoot, "vcpkg.json"), declaredPackages) || hasManifest; + hasManifest = addSwiftPackageDependencies(path.join(resolvedRoot, "Package.swift"), declaredPackages) || hasManifest; + + return { declaredPackages, hasManifest }; +} diff --git a/src/graphs/external/stdlib.ts b/src/graphs/external/stdlib.ts new file mode 100644 index 00000000..31263713 --- /dev/null +++ b/src/graphs/external/stdlib.ts @@ -0,0 +1,171 @@ +import path from "node:path"; +import { builtinModules } from "node:module"; + +const NODE_BUILTIN_MODULES = new Set([ + ...builtinModules, + ...builtinModules.filter((name) => !name.startsWith("node:")).map((name) => `node:${name}`), +]); + +const PYTHON_STDLIB_MODULES = new Set([ + "__future__", + "abc", + "argparse", + "asyncio", + "collections", + "contextlib", + "dataclasses", + "datetime", + "decimal", + "functools", + "itertools", + "json", + "logging", + "math", + "os", + "pathlib", + "re", + "shutil", + "sqlite3", + "statistics", + "string", + "subprocess", + "sys", + "tempfile", + "time", + "typing", + "unittest", + "urllib", +]); + +const RUBY_STDLIB_MODULES = new Set([ + "date", + "digest", + "fileutils", + "json", + "logger", + "pathname", + "set", + "time", + "uri", + "yaml", +]); + +const GO_STDLIB_IMPORTS = new Set([ + "bufio", + "bytes", + "context", + "crypto", + "database", + "encoding", + "errors", + "fmt", + "io", + "log", + "math", + "net", + "net/http", + "os", + "path", + "path/filepath", + "reflect", + "regexp", + "sort", + "strconv", + "strings", + "sync", + "testing", + "time", +]); + +const CPP_STDLIB_HEADERS = new Set([ + "algorithm", + "array", + "chrono", + "cstdint", + "cstdio", + "cstdlib", + "exception", + "filesystem", + "fstream", + "functional", + "iostream", + "map", + "memory", + "optional", + "set", + "sstream", + "stdexcept", + "string", + "string_view", + "tuple", + "type_traits", + "unordered_map", + "unordered_set", + "utility", + "vector", +]); + +const C_STDLIB_HEADERS = new Set([ + "assert.h", + "ctype.h", + "errno.h", + "float.h", + "limits.h", + "math.h", + "setjmp.h", + "signal.h", + "stdarg.h", + "stdbool.h", + "stddef.h", + "stdint.h", + "stdio.h", + "stdlib.h", + "string.h", + "time.h", +]); + +const SWIFT_SDK_MODULES = new Set(["Foundation", "Dispatch", "Darwin", "Glibc", "SwiftUI", "UIKit"]); + +export function isUrlSpecifier(specifier: string): boolean { + return /^[a-z][a-z0-9+.-]*:\/\//i.test(specifier) || specifier.startsWith("data:"); +} + +function extensionForFile(filePath: string): string { + return path.extname(filePath).toLowerCase(); +} + +export function isSupportedStdlib(specifier: string, importerFile: string): boolean { + const ext = extensionForFile(importerFile); + const firstSegment = specifier.split(/[.:/]/)[0] ?? specifier; + if (NODE_BUILTIN_MODULES.has(specifier)) return true; + if ([".py", ".pyw"].includes(ext)) return PYTHON_STDLIB_MODULES.has(firstSegment); + if ([".rb"].includes(ext)) return RUBY_STDLIB_MODULES.has(specifier) || RUBY_STDLIB_MODULES.has(firstSegment); + if (ext === ".zig") return specifier === "std"; + if (ext === ".go") return GO_STDLIB_IMPORTS.has(specifier) || GO_STDLIB_IMPORTS.has(firstSegment); + if (ext === ".rs") { + return ( + specifier === "std" || + specifier.startsWith("std::") || + specifier.startsWith("core::") || + specifier.startsWith("alloc::") + ); + } + if ([".java"].includes(ext)) { + return ( + specifier.startsWith("java.") || + specifier.startsWith("javax.") || + specifier.startsWith("org.w3c.") || + specifier.startsWith("org.xml.") + ); + } + if ([".kt", ".kts"].includes(ext)) return specifier === "kotlin" || specifier.startsWith("kotlin."); + if (ext === ".cs") { + return specifier === "System" || specifier.startsWith("System.") || specifier.startsWith("Microsoft."); + } + if (ext === ".swift") return SWIFT_SDK_MODULES.has(firstSegment); + if ([".c", ".h", ".i"].includes(ext)) return C_STDLIB_HEADERS.has(specifier); + if ([".cc", ".cpp", ".cxx", ".c++", ".hpp", ".hh", ".hxx", ".ipp", ".tpp", ".inl"].includes(ext)) { + return CPP_STDLIB_HEADERS.has(specifier) || C_STDLIB_HEADERS.has(specifier); + } + return false; +} diff --git a/src/graphs/queries.ts b/src/graphs/queries.ts index a9d55ba7..58b4b9df 100644 --- a/src/graphs/queries.ts +++ b/src/graphs/queries.ts @@ -1,353 +1,18 @@ -import type { FileId, Graph } from "../types.js"; -import { - getForwardNeighbors, - getReverseNeighbors, - graphAdjacencyFor, - type GraphAdjacencyIndex, -} from "./adjacency.js"; -import { getFiniteNonNegativeLimit } from "./limits.js"; -import { builtinModules } from "node:module"; -import { - classifyExternalSpecifier, - type ExternalSpecifierClassification, - type ExternalSpecifierClassificationOptions, -} from "./external-classifier.js"; - -export type DependencyNode = { file: FileId; depth: number }; - -export type CycleInternalEdge = { - from: FileId; - to: FileId; - raw: string; - typeOnly?: boolean; -}; - -export type DetailedCycle = { - files: FileId[]; - entryEdges: CycleInternalEdge[]; - internalEdges: CycleInternalEdge[]; - fileCount: number; - internalEdgeCount: number; - fanInFromOutside: number; - priorityScore: number; - remediationHint: string; -}; - -export type CycleSortMode = "priority" | "size" | "fanin"; - -const NODE_BUILTIN_MODULES = new Set([ - ...builtinModules, - ...builtinModules.filter((name) => !name.startsWith("node:")).map((name) => `node:${name}`), -]); - -const DOCUMENT_ONLY_CYCLE_EXTENSIONS = new Set([".md", ".mdx", ".rst", ".adoc", ".asciidoc"]); - -function isNodeBuiltinSpecifier(specifier: string): boolean { - return NODE_BUILTIN_MODULES.has(specifier); -} - -function isDocumentOnlyCycleFile(file: string): boolean { - const normalized = file.toLowerCase().split(/[?#]/, 1)[0] ?? ""; - for (const extension of DOCUMENT_ONLY_CYCLE_EXTENSIONS) { - if (normalized.endsWith(extension)) { - return true; - } - } - return false; -} - -export function getDependencies( - graph: Graph, - startFile: FileId, - opts: { depth?: number; limit?: number; adjacency?: GraphAdjacencyIndex } = {}, -): DependencyNode[] { - const maxDepth = opts.depth ?? Number.POSITIVE_INFINITY; - const finiteLimit = getFiniteNonNegativeLimit(opts.limit); - const maxResults = finiteLimit ?? Number.POSITIVE_INFINITY; - if (maxResults === 0) { - return []; - } - const out: DependencyNode[] = []; - const visited = new Set(); - const queue: Array<{ file: string; depth: number }> = [{ file: startFile, depth: 0 }]; - const adjacency = opts.adjacency ?? graphAdjacencyFor(graph); - visited.add(startFile); - - let index = 0; - while (index < queue.length) { - const { file, depth } = queue[index++]!; - if (depth > 0) { - out.push({ file, depth }); - if (out.length >= maxResults) { - break; - } - } - if (depth >= maxDepth) continue; - - for (const neighbor of getForwardNeighbors(adjacency, file)) { - if (!visited.has(neighbor)) { - visited.add(neighbor); - queue.push({ file: neighbor, depth: depth + 1 }); - } - } - } - return out; -} - -export function getReverseDependencies( - graph: Graph, - targetFile: FileId, - opts: { depth?: number; limit?: number; adjacency?: GraphAdjacencyIndex } = {}, -): DependencyNode[] { - const maxDepth = opts.depth ?? Number.POSITIVE_INFINITY; - const finiteLimit = getFiniteNonNegativeLimit(opts.limit); - const maxResults = finiteLimit ?? Number.POSITIVE_INFINITY; - if (maxResults === 0) { - return []; - } - const out: DependencyNode[] = []; - const visited = new Set(); - const queue: Array<{ file: string; depth: number }> = [{ file: targetFile, depth: 0 }]; - const adjacency = opts.adjacency ?? graphAdjacencyFor(graph); - visited.add(targetFile); - - let index = 0; - while (index < queue.length) { - const { file, depth } = queue[index++]!; - if (depth > 0) { - out.push({ file, depth }); - if (out.length >= maxResults) { - break; - } - } - if (depth >= maxDepth) continue; - - for (const neighbor of getReverseNeighbors(adjacency, file)) { - if (!visited.has(neighbor)) { - visited.add(neighbor); - queue.push({ file: neighbor, depth: depth + 1 }); - } - } - } - return out; -} - -export function getShortestPath( - graph: Graph, - from: FileId, - to: FileId, - opts: { adjacency?: GraphAdjacencyIndex } = {}, -): FileId[] | null { - const visited = new Map(); - const queue: string[] = [from]; - const adjacency = opts.adjacency ?? graphAdjacencyFor(graph); - visited.set(from, null); - - let index = 0; - while (index < queue.length) { - const current = queue[index++]!; - if (current === to) { - const path: string[] = []; - let pointer: string | null = current; - while (pointer !== null) { - path.push(pointer); - pointer = visited.get(pointer)!; - } - return path.reverse(); - } - - for (const neighbor of getForwardNeighbors(adjacency, current)) { - if (!visited.has(neighbor)) { - visited.set(neighbor, current); - queue.push(neighbor); - } - } - } - return null; -} - -export function findCycles(graph: Graph): FileId[][] { - return findDetailedCycles(graph).map((cycle) => cycle.files); -} - -export function sortDetailedCycles(cycles: DetailedCycle[], mode: CycleSortMode = "priority"): DetailedCycle[] { - const sorted = [...cycles]; - sorted.sort((left, right) => { - if (mode === "size") { - if (right.fileCount !== left.fileCount) return right.fileCount - left.fileCount; - return right.priorityScore - left.priorityScore; - } - if (mode === "fanin") { - if (right.fanInFromOutside !== left.fanInFromOutside) { - return right.fanInFromOutside - left.fanInFromOutside; - } - return right.priorityScore - left.priorityScore; - } - return right.priorityScore - left.priorityScore; - }); - return sorted; -} - -export function findDetailedCycles( - graph: Graph, - options: { symbolCoupling?: Map } = {}, -): DetailedCycle[] { - const nodes = Array.from(graph.nodes); - const indexByNode = new Map(); - nodes.forEach((node, index) => indexByNode.set(node, index)); - - const adjacency = nodes.map(() => [] as number[]); - for (const edge of graph.edges) { - if (edge.to.type !== "file") continue; - const fromIndex = indexByNode.get(edge.from); - const toIndex = indexByNode.get(edge.to.path); - if (fromIndex !== undefined && toIndex !== undefined) { - adjacency[fromIndex]!.push(toIndex); - } - } - - const nodeCount = nodes.length; - const indices: number[] = new Array(nodeCount).fill(-1); - const lowlink: number[] = new Array(nodeCount).fill(-1); - const onStack = new Array(nodeCount).fill(false); - const stack: number[] = []; - let nextIndex = 0; - const stronglyConnectedComponents: number[][] = []; - - function strongconnect(vertex: number) { - indices[vertex] = nextIndex; - lowlink[vertex] = nextIndex; - nextIndex++; - stack.push(vertex); - onStack[vertex] = true; - - for (const neighbor of adjacency[vertex]!) { - if (indices[neighbor] === -1) { - strongconnect(neighbor); - lowlink[vertex] = Math.min(lowlink[vertex], lowlink[neighbor]!); - } else if (onStack[neighbor]) { - lowlink[vertex] = Math.min(lowlink[vertex], indices[neighbor]!); - } - } - - if (lowlink[vertex] === indices[vertex]) { - const component: number[] = []; - let popped: number; - do { - popped = stack.pop()!; - onStack[popped] = false; - component.push(popped); - } while (popped !== vertex); - if (component.length > 1 || adjacency[vertex]!.includes(vertex)) { - stronglyConnectedComponents.push(component); - } - } - } - - for (let index = 0; index < nodeCount; index++) { - if (indices[index] === -1) strongconnect(index); - } - - const cycleDetails: DetailedCycle[] = []; - for (const component of stronglyConnectedComponents) { - const files = component.map((index) => nodes[index]!); - if (files.every(isDocumentOnlyCycleFile)) { - continue; - } - const componentFiles = new Set(files); - const internalEdges: CycleInternalEdge[] = []; - const entryEdges: CycleInternalEdge[] = []; - let internalEdgeCount = 0; - let fanInFromOutside = 0; - - for (const edge of graph.edges) { - if (edge.to.type !== "file") continue; - const fromInComponent = componentFiles.has(edge.from); - const toInComponent = componentFiles.has(edge.to.path); - if (fromInComponent && toInComponent) { - internalEdgeCount += 1; - internalEdges.push({ - from: edge.from, - to: edge.to.path, - raw: edge.raw, - ...(edge.typeOnly !== undefined ? { typeOnly: edge.typeOnly } : {}), - }); - } - if (!fromInComponent && toInComponent) { - fanInFromOutside += 1; - entryEdges.push({ - from: edge.from, - to: edge.to.path, - raw: edge.raw, - ...(edge.typeOnly !== undefined ? { typeOnly: edge.typeOnly } : {}), - }); - } - } - - const priorityScore = files.length * 3 + fanInFromOutside * 2 + internalEdgeCount; - const couplingForEdge = (edge: CycleInternalEdge): number => - options.symbolCoupling?.get(`${edge.from} -> ${edge.to}`) ?? 0; - const weakestEdge = internalEdges.reduce((best, edge) => { - if (!best) return edge; - const bestCoupling = couplingForEdge(best); - const edgeCoupling = couplingForEdge(edge); - if (edgeCoupling !== bestCoupling) { - return edgeCoupling < bestCoupling ? edge : best; - } - if (!!edge.typeOnly && !best.typeOnly) return edge; - return best; - }, null); - - const remediationHint = weakestEdge - ? `Break ${weakestEdge.from} -> ${weakestEdge.to} (import ${weakestEdge.raw}) to reduce SCC coupling; estimated symbol coupling=${couplingForEdge(weakestEdge)}.` - : `Break one import edge in this ${files.length}-file SCC to remove the cycle.`; - - cycleDetails.push({ - files, - entryEdges, - internalEdges, - fileCount: files.length, - internalEdgeCount, - fanInFromOutside, - priorityScore, - remediationHint, - }); - } - - return sortDetailedCycles(cycleDetails, "priority"); -} - -export type UnresolvedImportOptions = ExternalSpecifierClassificationOptions; - -export function getUnresolvedImports( - graph: Graph, - opts: UnresolvedImportOptions = {}, -): Array<{ - name: string; - importers: Array<{ file: FileId; raw: string }>; -}> { - const unresolved = new Map>(); - const classificationCache = new Map(); - for (const edge of graph.edges) { - if (edge.to.type !== "external") continue; - if (isNodeBuiltinSpecifier(edge.to.name) || isNodeBuiltinSpecifier(edge.raw)) continue; - const classificationKey = `${edge.from}\0${edge.to.name}\0${edge.raw}\0${opts.projectRoot ?? ""}`; - let classification = classificationCache.get(classificationKey); - if (!classification) { - classification = classifyExternalSpecifier({ - raw: edge.raw, - externalName: edge.to.name, - importerFile: edge.from, - options: opts, - }); - classificationCache.set(classificationKey, classification); - } - if (classification.status !== "unresolved") continue; - const importers = unresolved.get(edge.to.name) ?? []; - importers.push({ file: edge.from, raw: edge.raw }); - unresolved.set(edge.to.name, importers); - } - return Array.from(unresolved.entries()) - .map(([name, importers]) => ({ name, importers })) - .sort((left, right) => right.importers.length - left.importers.length); -} +export { + findCycles, + findDetailedCycles, + sortDetailedCycles, + type CycleInternalEdge, + type CycleSortMode, + type DetailedCycle, +} from "./cycles.js"; +export { + getDependencies, + getReverseDependencies, + getShortestPath, + type DependencyNode, +} from "./traversal.js"; +export { + getUnresolvedImports, + type UnresolvedImportOptions, +} from "./unresolved.js"; diff --git a/src/graphs/symbol-graph-detailed.ts b/src/graphs/symbol-graph-detailed.ts index f79ac42a..4357a312 100644 --- a/src/graphs/symbol-graph-detailed.ts +++ b/src/graphs/symbol-graph-detailed.ts @@ -1,20 +1,24 @@ import { isJsFallbackAvailable, parseWithJsLanguage } from "../jsFallback.js"; import { type LanguageSupport } from "../languages.js"; import { isUnsupportedParserInputError, prepareSourceInput } from "../languages/filePrep.js"; -import type { SyntaxNodeLike, SyntaxTreeLike } from "../languages/types.js"; +import type { SyntaxTreeLike } from "../languages/types.js"; import { logWithLevel, type LogLevel } from "../logging.js"; import { ProjectedSyntaxTree } from "../native/projectedTree.js"; import { getNativeSyntaxTreeExecution } from "../native/treeSitterNative.js"; -import { - SymbolKind, - type ImportBinding, - type ProjectIndex, - type ResolvedExport, - type SymbolDef, -} from "../indexer/types.js"; +import { SymbolKind, type ProjectIndex, type ResolvedExport, type SymbolDef } from "../indexer/types.js"; import type { FileId } from "../types.js"; -import { sliceText, unquote } from "../util.js"; -import { buildSymbolGraph, defNodeId, nodeForDef, type SymbolGraph } from "./symbol-graph.js"; +import { buildSymbolGraph, type SymbolGraph } from "./symbol-graph.js"; +import { + collectDetailedDeclarations, +} from "./symbol-graph-detailed/ast.js"; +import { + emitClassInheritanceEdges, + emitFunctionBodyEdges, + emitPythonDecoratorEdges, + emitRustImplEdges, +} from "./symbol-graph-detailed/edgePasses.js"; +import { buildImportAliasMaps } from "./symbol-graph-detailed/importAliases.js"; +import { createMemberChainResolver } from "./symbol-graph-detailed/memberChains.js"; type BuildDetailedSymbolGraphOptions = { scope?: "all" | "imported"; @@ -28,9 +32,6 @@ type ResolvedDetailedExport = ResolvedExport; const normalizePath = (file: string) => file.replace(/\\/g, "/"); -const isIdentifierType = (sup: LanguageSupport, type: string) => - Array.isArray(sup.nodeTypes?.identifier) && sup.nodeTypes.identifier.includes(type); - export async function buildSymbolGraphDetailed( index: ProjectIndex, opts?: BuildDetailedSymbolGraphOptions, @@ -223,176 +224,29 @@ export async function buildSymbolGraphDetailed( throw new Error(`Failed to parse ${file}`); } - const aliasToTargetDef = new Map(); - const aliasToTargetModule = new Map(); - const targetModOf = (imp: ImportBinding) => { - const targetFile = typeof imp.resolved === "string" ? normalizePath(imp.resolved) : undefined; - return targetFile ? index.byFile.get(targetFile) : undefined; - }; - for (const imp of moduleEntry.imports) { - if (!imp) continue; - const targetModule = targetModOf(imp); - const targetFile = typeof imp.resolved === "string" ? normalizePath(imp.resolved) : undefined; - if (!targetModule || !targetFile) continue; - if (imp.kind === "named") { - const localFallback = targetModule.locals.find((local) => local.localName === imp.imported); - const resolved = - resolveExportNamespace(targetFile, imp.imported) ?? - (localFallback - ? { - kind: "resolved" as const, - def: localFallback, - } - : null); - if (resolved?.kind === "resolved") { - aliasToTargetDef.set(imp.local, resolved.def); - } else if (resolved?.kind === "namespace") { - aliasToTargetModule.set(imp.local, normalizePath(resolved.file)); - } - } else if (imp.kind === "default") { - const defaultExport = resolveExportFrom(targetFile, "default"); - const fallbackExport = targetModule.exports.find((entry) => entry.type === "local")?.target; - const def = defaultExport ?? fallbackExport; - if (def) aliasToTargetDef.set(imp.local, def); - aliasToTargetModule.set(imp.local, targetFile); - } else if (imp.kind === "namespace") { - aliasToTargetModule.set(imp.localNS, targetFile); - } - } - - const functionNodes: Array<{ name: string; node: SyntaxNodeLike; def: SymbolDef }> = []; - const classNodes: Array<{ name: string; node: SyntaxNodeLike; def: SymbolDef }> = []; - const constStringOf = new Map(); - const collectConsts = (node: SyntaxNodeLike) => { - if (node.type === "variable_declarator") { - const nameNode = node.childForFieldName("name"); - const valueNode = node.childForFieldName("value"); - if (nameNode && valueNode && valueNode.type === "string") { - const name = sliceText(nameNode, src); - const value = unquote(sliceText(valueNode, src)); - constStringOf.set(name, value); - } - } - for (const child of node.namedChildren) collectConsts(child); - }; - collectConsts(tree.rootNode); + const { aliasToTargetDef, aliasToTargetModule } = buildImportAliasMaps( + index, + moduleEntry, + resolveExportNamespace, + resolveExportFrom, + ); - const memberExpressionType = sup.nodeTypes.memberExpression ?? "member_expression"; - const propertyIdentifierTypes: string[] = sup.nodeTypes.propertyIdentifier ?? ["property_identifier"]; - const optionalMemberTypes = new Set([ - memberExpressionType, - "optional_member_expression", - "subscript_expression", - "optional_chain", - sup.id === "python" ? "attribute" : "", - ]); - const resolveMemberChainTarget = (chainNode: SyntaxNodeLike): SymbolDef | null => { - const names: string[] = []; - let current: SyntaxNodeLike | null = chainNode; - let base: SyntaxNodeLike | null = null; - const pushProp = (propNode: SyntaxNodeLike | null) => { - if (!propNode) return; - if (propertyIdentifierTypes.includes(propNode.type)) names.push(sliceText(propNode, src)); - else if (propNode.type === "string") names.push(unquote(sliceText(propNode, src))); - else if (propNode.type === "identifier") { - const keyName = sliceText(propNode, src); - const value = constStringOf.get(keyName); - if (typeof value === "string") names.push(value); - } - }; - while (current && optionalMemberTypes.has(current.type)) { - if (current.type === "subscript_expression") { - base = current.child(0) ?? base; - const indexNode = current.child(2); - pushProp(indexNode); - current = base; - } else if ( - current.type === memberExpressionType || - current.type === "optional_member_expression" || - current.type === "attribute" - ) { - base = current.child(0) ?? base; - const propNode = - current.childForFieldName?.("property") ?? current.child(2) ?? current.childForFieldName?.("attribute"); - pushProp(propNode); - current = base; - } else if (current.type === "optional_chain") { - current = current.child(0); - } else { - break; - } - } - if (!current || !isIdentifierType(sup, current.type)) return null; - const alias = sliceText(current, src); - const targetFile = aliasToTargetModule.get(alias); - if (!targetFile || !names.length) return null; - return resolveMemberPathFromModule(targetFile, names); - }; - const walkCollect = (node: SyntaxNodeLike) => { - if ( - node.type === "function_declaration" || - node.type === "function_definition" || - node.type === "method_declaration" || - node.type === "constructor_declaration" || - node.type === "function_item" || - node.type === "method" || - node.type === "singleton_method" - ) { - const nameNode = node.childForFieldName("name") ?? node.childForFieldName("type"); - const name = nameNode ? sliceText(nameNode, src) : undefined; - if (name) { - const def = moduleEntry.locals.find((local) => local.localName === name); - if (def) functionNodes.push({ name, node, def }); - } - } else if (node.type === "class_declaration" || node.type === "class_definition" || node.type === "class") { - const nameNode = node.childForFieldName("name"); - const name = nameNode ? sliceText(nameNode, src) : undefined; - if (name) { - const def = moduleEntry.locals.find((local) => local.localName === name); - if (def) classNodes.push({ name, node, def }); - } - } else if (node.type === "variable_declarator") { - const nameNode = node.childForFieldName("name"); - const valueNode = node.childForFieldName("value"); - if (nameNode && valueNode) { - const valueType = String(valueNode.type || ""); - if (/arrow_function|function/.test(valueType)) { - const name = sliceText(nameNode, src); - const def = moduleEntry.locals.find((local) => local.localName === name); - if (def) functionNodes.push({ name, node: valueNode, def }); - } - } - } else if (node.type === "assignment_expression") { - const left = node.childForFieldName("left"); - const right = node.childForFieldName("right"); - if (left && right) { - const valueType = String(right.type || ""); - if (/arrow_function|function/.test(valueType)) { - let name: string | null = null; - if (left.type === memberExpressionType) { - const prop = left.child(2); - if (prop && propertyIdentifierTypes.includes(prop.type)) name = sliceText(prop, src); - } else if (left.type === "identifier") { - name = sliceText(left, src); - } - if (name) { - const def = moduleEntry.locals.find((local) => local.localName === name); - if (def) functionNodes.push({ name, node: right, def }); - } - } - } - } - for (const child of node.namedChildren) walkCollect(child); - }; - walkCollect(tree.rootNode); + const { functionNodes, classNodes, constStringOf } = collectDetailedDeclarations( + tree.rootNode, + sup, + src, + moduleEntry.locals, + ); - const scanForAliasUse = (node: SyntaxNodeLike, cb: (name: string, atNode: SyntaxNodeLike) => void) => { - if (isIdentifierType(sup, node.type)) { - const name = sliceText(node, src); - cb(name, node); - } - for (const child of node.namedChildren) scanForAliasUse(child, cb); - }; + const memberResolver = createMemberChainResolver({ + sup, + source: src, + constStringOf, + aliasToTargetModule, + resolveMemberPathFromModule, + }); + const { memberExpressionType, optionalMemberTypes, propertyIdentifierTypes, resolveMemberChainTarget } = + memberResolver; const resolveIdentifier = (name: string): SymbolDef | null => { const fromAlias = aliasToTargetDef.get(name); @@ -400,309 +254,27 @@ export async function buildSymbolGraphDetailed( return moduleEntry.locals.find((local) => local.localName === name) ?? null; }; - const tryResolveNode = (node: SyntaxNodeLike, fromId: string, label: string) => { - if (isIdentifierType(sup, node.type) || node.type === "type_identifier") { - const name = sliceText(node, src); - const target = resolveIdentifier(name); - if (target) { - const toId = defNodeId(target); - if (!nodes.has(toId)) nodes.set(toId, nodeForDef(target)); - recordEdge(fromId, toId, label); - return; - } - } - if (optionalMemberTypes.has(node.type)) { - tryResolveChain(node, fromId, label); - } - }; - - const callNodeTypes = new Set(["call_expression", "call", "method_invocation", "invocation_expression"]); - const newNodeTypes = new Set([ - "new_expression", - "object_creation_expression", - "struct_expression", - "composite_literal", - ]); - - const getCallTarget = (node: SyntaxNodeLike): SyntaxNodeLike | null => { - const explicitTarget = - node.childForFieldName("function") ?? - node.childForFieldName("callee") ?? - node.childForFieldName("name") ?? - node.childForFieldName("method") ?? - node.childForFieldName("member") ?? - node.childForFieldName("expression"); - if (explicitTarget) return explicitTarget; - const nonArgumentChildren = node.namedChildren.filter((child) => child.type !== "argument_list"); - return nonArgumentChildren.length === 1 ? (nonArgumentChildren[0] ?? null) : null; - }; - - const getNewTarget = (node: SyntaxNodeLike) => - node.childForFieldName("constructor") ?? - node.childForFieldName("type") ?? - node.childForFieldName("name") ?? - node.namedChildren.find((child) => child.type === "type_identifier") ?? - node.child(0); - - const tryResolveChain = (node: SyntaxNodeLike, fromId?: string, label = "uses") => { - const targetDef = resolveMemberChainTarget(node); - if (targetDef && fromId) { - const toId = defNodeId(targetDef); - if (!nodes.has(toId)) nodes.set(toId, nodeForDef(targetDef)); - if (!recordEdge(fromId, toId, label)) return true; - return true; - } - return !!targetDef; - }; - - if (sup.id === "python") { - const addDecoratorUses = (node: SyntaxNodeLike) => { - if (node.type === "decorated_definition") { - const fn = node.namedChildren.find((child) => child.type === "function_definition"); - if (fn) addDecoratorUses(fn); - for (const decoratorChild of node.namedChildren) { - if (decoratorChild.type !== "decorator") continue; - const nameNode = fn?.childForFieldName("name"); - if (!nameNode) continue; - const name = sliceText(nameNode, src); - const def = moduleEntry.locals.find((local) => local.localName === name); - if (!def) continue; - const fromId = defNodeId(def); - if (!nodes.has(fromId)) nodes.set(fromId, nodeForDef(def)); - const expr = - decoratorChild.childForFieldName?.("name") ?? - decoratorChild.namedChildren?.[0] ?? - decoratorChild.child(1); - if (expr) tryResolveNode(expr, fromId, "decorates"); - } - } else if (node.type === "function_definition") { - const nameNode = node.childForFieldName("name"); - if (nameNode) { - const name = sliceText(nameNode, src); - const def = moduleEntry.locals.find((local) => local.localName === name); - if (def) { - const fromId = defNodeId(def); - if (!nodes.has(fromId)) nodes.set(fromId, nodeForDef(def)); - let prev = node.previousSibling; - while (prev) { - if (prev.type === "decorated_definition") { - for (const decoratorChild of prev.namedChildren) { - if (decoratorChild.type === "decorator") { - const expr = - decoratorChild.childForFieldName?.("name") ?? - decoratorChild.namedChildren?.[0] ?? - decoratorChild.child(1); - if (expr) tryResolveNode(expr, fromId, "decorates"); - } else if (decoratorChild.type === "attribute") { - tryResolveNode(decoratorChild, fromId, "decorates"); - } - } - } else if (prev.type === "decorator") { - const expr = prev.childForFieldName?.("name") ?? prev.namedChildren?.[0] ?? prev.child(1); - if (expr) tryResolveNode(expr, fromId, "decorates"); - } - prev = prev.previousSibling; - } - } - } - } - for (const child of node.namedChildren) addDecoratorUses(child); - }; - addDecoratorUses(tree.rootNode); - } - - for (const fn of functionNodes) { - const fromId = defNodeId(fn.def); - if (!nodes.has(fromId)) nodes.set(fromId, nodeForDef(fn.def)); - const seenAliases = new Set(); - if (!membersOnly) { - scanForAliasUse(fn.node, (name: string, atNode: SyntaxNodeLike) => { - if (seenAliases.has(name)) return; - let target: SymbolDef | null = aliasToTargetDef.get(name) ?? null; - if (!target) { - const modFile = aliasToTargetModule.get(name); - if (modFile) { - let exportedName: string | null = null; - const parent = atNode.parent; - if (parent && (parent.type === memberExpressionType || parent.type === "optional_member_expression")) { - const prop = parent.childForFieldName?.("property") ?? parent.child(2); - if (prop && propertyIdentifierTypes.includes(prop.type)) exportedName = sliceText(prop, src); - } - if (exportedName) { - target = resolveExportFrom(modFile, exportedName); - if (!target) { - const targetModule = index.byFile.get(modFile); - target = (targetModule?.locals ?? []).find((local) => local.localName === exportedName) ?? null; - } - } - } - } - if (!target) return; - seenAliases.add(name); - const toId = defNodeId(target); - if (!nodes.has(toId)) nodes.set(toId, nodeForDef(target)); - if (!recordEdge(fromId, toId, "uses")) return; - }); - } - - const walkForMembers = (node: SyntaxNodeLike) => { - const tryResolveChainLocal = (chainNode: SyntaxNodeLike) => { - const targetDef = resolveMemberChainTarget(chainNode); - if (targetDef) { - const toId = defNodeId(targetDef); - if (!nodes.has(toId)) nodes.set(toId, nodeForDef(targetDef)); - if (!recordEdge(fromId, toId, "uses")) return; - } - }; - - if (optionalMemberTypes.has(node.type)) tryResolveChainLocal(node); - for (const child of node.namedChildren ?? []) walkForMembers(child); - }; - walkForMembers(fn.node); - - const walkForCalls = (node: SyntaxNodeLike) => { - if (callNodeTypes.has(node.type)) { - if (sup.id === "go") { - const callTarget = getCallTarget(node); - const calleeName = - callTarget && isIdentifierType(sup, callTarget.type) ? sliceText(callTarget, src) : null; - if (calleeName === "new" || calleeName === "make") { - const argList = node.childForFieldName("arguments") ?? node.childForFieldName("argument_list"); - const typeNode = argList?.namedChildren?.find((child) => child.type === "type_identifier") ?? null; - if (typeNode) { - tryResolveNode(typeNode, fromId, "instantiates"); - } - return; - } - } - if (sup.id === "ruby" && node.type === "call") { - const methodNode = node.childForFieldName("method"); - const receiverNode = node.childForFieldName("receiver"); - const methodName = methodNode ? sliceText(methodNode, src) : null; - if (methodName === "new" && receiverNode) { - tryResolveNode(receiverNode, fromId, "instantiates"); - return; - } - if (methodNode) { - tryResolveNode(methodNode, fromId, "calls"); - return; - } - } - const callee = getCallTarget(node); - if (callee) tryResolveNode(callee, fromId, "calls"); - } - if (newNodeTypes.has(node.type)) { - const target = getNewTarget(node); - if (target) tryResolveNode(target, fromId, "instantiates"); - } - for (const child of node.namedChildren ?? []) walkForCalls(child); - }; - walkForCalls(fn.node); - } - - const collectIdentifiers = (node: SyntaxNodeLike, out: string[]) => { - if (isIdentifierType(sup, node.type) || node.type === "type_identifier") { - out.push(sliceText(node, src)); - } - for (const child of node.namedChildren ?? []) collectIdentifiers(child, out); - }; - - const findFirstNodeByType = (node: SyntaxNodeLike, type: string): SyntaxNodeLike | null => { - for (const child of node.namedChildren ?? []) { - if (child.type === type) return child; - const found = findFirstNodeByType(child, type); - if (found) return found; - } - return null; - }; - - const collectNodesByType = (node: SyntaxNodeLike, type: string, out: SyntaxNodeLike[]) => { - for (const child of node.namedChildren ?? []) { - if (child.type === type) out.push(child); - collectNodesByType(child, type, out); - } + const edgePassContext = { + index, + sup, + source: src, + moduleEntry, + nodes, + membersOnly, + memberExpressionType, + propertyIdentifierTypes, + optionalMemberTypes, + aliasToTargetDef, + aliasToTargetModule, + resolveIdentifier, + resolveExportFrom, + resolveMemberChainTarget, + recordEdge, }; - - for (const cls of classNodes) { - const fromId = defNodeId(cls.def); - if (!nodes.has(fromId)) nodes.set(fromId, nodeForDef(cls.def)); - if (sup.id === "java") { - const superClass = findFirstNodeByType(cls.node, "superclass"); - const superNode = superClass?.childForFieldName("name") ?? superClass?.namedChildren?.[0] ?? null; - if (superNode) tryResolveNode(superNode, fromId, "extends"); - - const interfaces = findFirstNodeByType(cls.node, "super_interfaces"); - if (interfaces) { - const names: string[] = []; - collectIdentifiers(interfaces, names); - for (const name of names) { - const target = resolveIdentifier(name); - if (!target) continue; - const toId = defNodeId(target); - if (!nodes.has(toId)) nodes.set(toId, nodeForDef(target)); - recordEdge(fromId, toId, "implements"); - } - } - continue; - } - - if (sup.id === "csharp") { - const baseList = findFirstNodeByType(cls.node, "base_list"); - if (baseList) { - const names: string[] = []; - collectIdentifiers(baseList, names); - names.forEach((name, indexWithinList) => { - const target = resolveIdentifier(name); - if (!target) return; - const toId = defNodeId(target); - if (!nodes.has(toId)) nodes.set(toId, nodeForDef(target)); - recordEdge(fromId, toId, indexWithinList === 0 ? "extends" : "implements"); - }); - } - continue; - } - - const superClause = findFirstNodeByType(cls.node, "extends_clause"); - const superNode = superClause?.namedChildren?.[0] ?? superClause?.child(1); - if (superNode) tryResolveNode(superNode, fromId, "extends"); - - const implementsClauses: SyntaxNodeLike[] = []; - collectNodesByType(cls.node, "implements_clause", implementsClauses); - for (const clause of implementsClauses) { - const names: string[] = []; - collectIdentifiers(clause, names); - for (const name of names) { - const target = resolveIdentifier(name); - if (!target) continue; - const toId = defNodeId(target); - if (!nodes.has(toId)) nodes.set(toId, nodeForDef(target)); - recordEdge(fromId, toId, "implements"); - } - } - } - - if (sup.id === "rust") { - const walkImpls = (node: SyntaxNodeLike) => { - if (node.type === "impl_item") { - const typeIdentifiers = node.namedChildren?.filter((child) => child.type === "type_identifier") ?? []; - if (typeIdentifiers.length >= 2) { - const traitName = sliceText(typeIdentifiers[0], src); - const typeName = sliceText(typeIdentifiers[1], src); - const typeDef = resolveIdentifier(typeName); - const traitDef = resolveIdentifier(traitName); - if (typeDef && traitDef) { - const fromId = defNodeId(typeDef); - const toId = defNodeId(traitDef); - if (!nodes.has(fromId)) nodes.set(fromId, nodeForDef(typeDef)); - if (!nodes.has(toId)) nodes.set(toId, nodeForDef(traitDef)); - recordEdge(fromId, toId, "implements"); - } - } - } - for (const child of node.namedChildren ?? []) walkImpls(child); - }; - walkImpls(tree.rootNode); - } + emitPythonDecoratorEdges(edgePassContext, tree.rootNode); + emitFunctionBodyEdges(edgePassContext, functionNodes); + emitClassInheritanceEdges(edgePassContext, classNodes); + emitRustImplEdges(edgePassContext, tree.rootNode); } catch (error) { if (isUnsupportedParserInputError(error)) { continue; diff --git a/src/graphs/symbol-graph-detailed/ast.ts b/src/graphs/symbol-graph-detailed/ast.ts new file mode 100644 index 00000000..d14d3c22 --- /dev/null +++ b/src/graphs/symbol-graph-detailed/ast.ts @@ -0,0 +1,145 @@ +import type { LanguageSupport } from "../../languages.js"; +import type { SyntaxNodeLike } from "../../languages/types.js"; +import type { SymbolDef } from "../../indexer/types.js"; +import { sliceText, unquote } from "../../util.js"; +import { + getMemberAccessParts, + memberExpressionTypeFor, + memberPropertyIdentifierTypes, +} from "../../util/memberAccess.js"; + +export type DetailedFunctionNode = { + name: string; + node: SyntaxNodeLike; + def: SymbolDef; +}; + +export type DetailedClassNode = { + name: string; + node: SyntaxNodeLike; + def: SymbolDef; +}; + +export type DetailedDeclarationPassResult = { + functionNodes: DetailedFunctionNode[]; + classNodes: DetailedClassNode[]; + constStringOf: Map; +}; + +export const isIdentifierType = (sup: LanguageSupport, type: string): boolean => + Array.isArray(sup.nodeTypes?.identifier) && sup.nodeTypes.identifier.includes(type); + +export function collectDetailedDeclarations( + rootNode: SyntaxNodeLike, + sup: LanguageSupport, + source: string, + locals: SymbolDef[], +): DetailedDeclarationPassResult { + const functionNodes: DetailedFunctionNode[] = []; + const classNodes: DetailedClassNode[] = []; + const constStringOf = new Map(); + const memberExpressionType = memberExpressionTypeFor(sup); + const propertyIdentifierTypes = memberPropertyIdentifierTypes(sup); + + const walk = (node: SyntaxNodeLike): void => { + if ( + node.type === "function_declaration" || + node.type === "function_definition" || + node.type === "method_declaration" || + node.type === "constructor_declaration" || + node.type === "function_item" || + node.type === "method" || + node.type === "singleton_method" + ) { + const nameNode = node.childForFieldName("name") ?? node.childForFieldName("type"); + const name = nameNode ? sliceText(nameNode, source) : undefined; + if (name) { + const def = locals.find((local) => local.localName === name); + if (def) functionNodes.push({ name, node, def }); + } + } else if (node.type === "class_declaration" || node.type === "class_definition" || node.type === "class") { + const nameNode = node.childForFieldName("name"); + const name = nameNode ? sliceText(nameNode, source) : undefined; + if (name) { + const def = locals.find((local) => local.localName === name); + if (def) classNodes.push({ name, node, def }); + } + } else if (node.type === "variable_declarator") { + const nameNode = node.childForFieldName("name"); + const valueNode = node.childForFieldName("value"); + if (nameNode && valueNode) { + if (valueNode.type === "string") { + const name = sliceText(nameNode, source); + const value = unquote(sliceText(valueNode, source)); + constStringOf.set(name, value); + } + const valueType = String(valueNode.type || ""); + if (/arrow_function|function/.test(valueType)) { + const name = sliceText(nameNode, source); + const def = locals.find((local) => local.localName === name); + if (def) functionNodes.push({ name, node: valueNode, def }); + } + } + } else if (node.type === "assignment_expression") { + const left = node.childForFieldName("left"); + const right = node.childForFieldName("right"); + if (left && right) { + const valueType = String(right.type || ""); + if (/arrow_function|function/.test(valueType)) { + let name: string | null = null; + if (left.type === memberExpressionType) { + const { property: prop } = getMemberAccessParts(sup, left); + if (prop && propertyIdentifierTypes.includes(prop.type)) name = sliceText(prop, source); + } else if (left.type === "identifier") { + name = sliceText(left, source); + } + if (name) { + const def = locals.find((local) => local.localName === name); + if (def) functionNodes.push({ name, node: right, def }); + } + } + } + } + + for (const child of node.namedChildren) walk(child); + }; + + walk(rootNode); + return { functionNodes, classNodes, constStringOf }; +} + +export function collectIdentifiers(node: SyntaxNodeLike, sup: LanguageSupport, source: string, out: string[]): void { + if (isIdentifierType(sup, node.type) || node.type === "type_identifier") { + out.push(sliceText(node, source)); + } + for (const child of node.namedChildren ?? []) collectIdentifiers(child, sup, source, out); +} + +export function findFirstNodeByType(node: SyntaxNodeLike, type: string): SyntaxNodeLike | null { + for (const child of node.namedChildren ?? []) { + if (child.type === type) return child; + const found = findFirstNodeByType(child, type); + if (found) return found; + } + return null; +} + +export function collectNodesByType(node: SyntaxNodeLike, type: string, out: SyntaxNodeLike[]): void { + for (const child of node.namedChildren ?? []) { + if (child.type === type) out.push(child); + collectNodesByType(child, type, out); + } +} + +export function scanForAliasUse( + node: SyntaxNodeLike, + sup: LanguageSupport, + source: string, + cb: (name: string, atNode: SyntaxNodeLike) => void, +): void { + if (isIdentifierType(sup, node.type)) { + const name = sliceText(node, source); + cb(name, node); + } + for (const child of node.namedChildren) scanForAliasUse(child, sup, source, cb); +} diff --git a/src/graphs/symbol-graph-detailed/edgePasses.ts b/src/graphs/symbol-graph-detailed/edgePasses.ts new file mode 100644 index 00000000..33d05bd4 --- /dev/null +++ b/src/graphs/symbol-graph-detailed/edgePasses.ts @@ -0,0 +1,314 @@ +import type { ModuleIndex, ProjectIndex, SymbolDef } from "../../indexer/types.js"; +import type { LanguageSupport } from "../../languages.js"; +import type { SyntaxNodeLike } from "../../languages/types.js"; +import { sliceText } from "../../util.js"; +import { getMemberAccessParts } from "../../util/memberAccess.js"; +import { defNodeId, nodeForDef, type SymbolGraph } from "../symbol-graph.js"; +import type { DetailedClassNode, DetailedFunctionNode } from "./ast.js"; +import { collectIdentifiers, collectNodesByType, findFirstNodeByType, isIdentifierType } from "./ast.js"; + +type EdgePassContext = { + index: ProjectIndex; + sup: LanguageSupport; + source: string; + moduleEntry: ModuleIndex; + nodes: SymbolGraph["nodes"]; + membersOnly: boolean; + memberExpressionType: string; + propertyIdentifierTypes: string[]; + optionalMemberTypes: Set; + aliasToTargetDef: Map; + aliasToTargetModule: Map; + resolveIdentifier: (name: string) => SymbolDef | null; + resolveExportFrom: (file: string, exportedName: string) => SymbolDef | null; + resolveMemberChainTarget: (chainNode: SyntaxNodeLike) => SymbolDef | null; + recordEdge: (fromId: string, toId: string, label?: string) => boolean; +}; + +function ensureNode(context: EdgePassContext, def: SymbolDef): string { + const id = defNodeId(def); + if (!context.nodes.has(id)) context.nodes.set(id, nodeForDef(def)); + return id; +} + +function recordDefEdge(context: EdgePassContext, fromId: string, target: SymbolDef, label: string): boolean { + const toId = ensureNode(context, target); + return context.recordEdge(fromId, toId, label); +} + +function tryResolveChain(context: EdgePassContext, node: SyntaxNodeLike, fromId?: string, label = "uses"): boolean { + const targetDef = context.resolveMemberChainTarget(node); + if (targetDef && fromId) { + recordDefEdge(context, fromId, targetDef, label); + return true; + } + return !!targetDef; +} + +function tryResolveNode(context: EdgePassContext, node: SyntaxNodeLike, fromId: string, label: string): void { + if (isIdentifierType(context.sup, node.type) || node.type === "type_identifier") { + const name = sliceText(node, context.source); + const target = context.resolveIdentifier(name); + if (target) { + recordDefEdge(context, fromId, target, label); + return; + } + } + if (context.optionalMemberTypes.has(node.type)) { + tryResolveChain(context, node, fromId, label); + } +} + +function getCallTarget(node: SyntaxNodeLike): SyntaxNodeLike | null { + const explicitTarget = + node.childForFieldName("function") ?? + node.childForFieldName("callee") ?? + node.childForFieldName("name") ?? + node.childForFieldName("method") ?? + node.childForFieldName("member") ?? + node.childForFieldName("expression"); + if (explicitTarget) return explicitTarget; + const nonArgumentChildren = node.namedChildren.filter((child) => child.type !== "argument_list"); + return nonArgumentChildren.length === 1 ? (nonArgumentChildren[0] ?? null) : null; +} + +function getNewTarget(node: SyntaxNodeLike): SyntaxNodeLike | null { + return ( + node.childForFieldName("constructor") ?? + node.childForFieldName("type") ?? + node.childForFieldName("name") ?? + node.namedChildren.find((child) => child.type === "type_identifier") ?? + node.child(0) + ); +} + +export function emitPythonDecoratorEdges(context: EdgePassContext, rootNode: SyntaxNodeLike): void { + if (context.sup.id !== "python") return; + + const addDecoratorUses = (node: SyntaxNodeLike): void => { + if (node.type === "decorated_definition") { + const fn = node.namedChildren.find((child) => child.type === "function_definition"); + if (fn) addDecoratorUses(fn); + for (const decoratorChild of node.namedChildren) { + if (decoratorChild.type !== "decorator") continue; + const nameNode = fn?.childForFieldName("name"); + if (!nameNode) continue; + const name = sliceText(nameNode, context.source); + const def = context.moduleEntry.locals.find((local) => local.localName === name); + if (!def) continue; + const fromId = ensureNode(context, def); + const expr = + decoratorChild.childForFieldName?.("name") ?? decoratorChild.namedChildren?.[0] ?? decoratorChild.child(1); + if (expr) tryResolveNode(context, expr, fromId, "decorates"); + } + } else if (node.type === "function_definition") { + const nameNode = node.childForFieldName("name"); + if (nameNode) { + const name = sliceText(nameNode, context.source); + const def = context.moduleEntry.locals.find((local) => local.localName === name); + if (def) { + const fromId = ensureNode(context, def); + let prev = node.previousSibling; + while (prev) { + if (prev.type === "decorated_definition") { + for (const decoratorChild of prev.namedChildren) { + if (decoratorChild.type === "decorator") { + const expr = + decoratorChild.childForFieldName?.("name") ?? + decoratorChild.namedChildren?.[0] ?? + decoratorChild.child(1); + if (expr) tryResolveNode(context, expr, fromId, "decorates"); + } else if (decoratorChild.type === "attribute") { + tryResolveNode(context, decoratorChild, fromId, "decorates"); + } + } + } else if (prev.type === "decorator") { + const expr = prev.childForFieldName?.("name") ?? prev.namedChildren?.[0] ?? prev.child(1); + if (expr) tryResolveNode(context, expr, fromId, "decorates"); + } + prev = prev.previousSibling; + } + } + } + } + for (const child of node.namedChildren) addDecoratorUses(child); + }; + + addDecoratorUses(rootNode); +} + +export function emitFunctionBodyEdges(context: EdgePassContext, functionNodes: DetailedFunctionNode[]): void { + const callNodeTypes = new Set(["call_expression", "call", "method_invocation", "invocation_expression"]); + const newNodeTypes = new Set([ + "new_expression", + "object_creation_expression", + "struct_expression", + "composite_literal", + ]); + + for (const fn of functionNodes) { + const fromId = ensureNode(context, fn.def); + const seenAliases = new Set(); + + const recordAliasUse = (node: SyntaxNodeLike): void => { + if (context.membersOnly || !isIdentifierType(context.sup, node.type)) return; + const name = sliceText(node, context.source); + if (seenAliases.has(name)) return; + let target: SymbolDef | null = context.aliasToTargetDef.get(name) ?? null; + if (!target) { + const modFile = context.aliasToTargetModule.get(name); + if (modFile) { + let exportedName: string | null = null; + const parent = node.parent; + if ( + parent && + (parent.type === context.memberExpressionType || parent.type === "optional_member_expression") + ) { + const { property: prop } = getMemberAccessParts(context.sup, parent); + if (prop && context.propertyIdentifierTypes.includes(prop.type)) { + exportedName = sliceText(prop, context.source); + } + } + if (exportedName) { + target = context.resolveExportFrom(modFile, exportedName); + if (!target) { + const targetModule = context.index.byFile.get(modFile); + target = (targetModule?.locals ?? []).find((local) => local.localName === exportedName) ?? null; + } + } + } + } + if (!target) return; + seenAliases.add(name); + recordDefEdge(context, fromId, target, "uses"); + }; + + const recordMemberUse = (node: SyntaxNodeLike): void => { + if (!context.optionalMemberTypes.has(node.type)) return; + const targetDef = context.resolveMemberChainTarget(node); + if (targetDef) { + recordDefEdge(context, fromId, targetDef, "uses"); + } + }; + + const recordCallOrInstantiation = (node: SyntaxNodeLike): boolean => { + if (callNodeTypes.has(node.type)) { + if (context.sup.id === "go") { + const callTarget = getCallTarget(node); + const calleeName = + callTarget && isIdentifierType(context.sup, callTarget.type) ? sliceText(callTarget, context.source) : null; + if (calleeName === "new" || calleeName === "make") { + const argList = node.childForFieldName("arguments") ?? node.childForFieldName("argument_list"); + const typeNode = argList?.namedChildren?.find((child) => child.type === "type_identifier") ?? null; + if (typeNode) { + tryResolveNode(context, typeNode, fromId, "instantiates"); + } + return false; + } + } + if (context.sup.id === "ruby" && node.type === "call") { + const methodNode = node.childForFieldName("method"); + const receiverNode = node.childForFieldName("receiver"); + const methodName = methodNode ? sliceText(methodNode, context.source) : null; + if (methodName === "new" && receiverNode) { + tryResolveNode(context, receiverNode, fromId, "instantiates"); + return false; + } + if (methodNode) { + tryResolveNode(context, methodNode, fromId, "calls"); + return false; + } + } + const callee = getCallTarget(node); + if (callee) tryResolveNode(context, callee, fromId, "calls"); + } + if (newNodeTypes.has(node.type)) { + const target = getNewTarget(node); + if (target) tryResolveNode(context, target, fromId, "instantiates"); + } + return true; + }; + + const walkFunctionBody = (node: SyntaxNodeLike, allowCallProcessing: boolean): void => { + recordAliasUse(node); + recordMemberUse(node); + const allowChildCallProcessing = allowCallProcessing ? recordCallOrInstantiation(node) : false; + for (const child of node.namedChildren ?? []) walkFunctionBody(child, allowChildCallProcessing); + }; + + walkFunctionBody(fn.node, true); + } +} + +export function emitClassInheritanceEdges(context: EdgePassContext, classNodes: DetailedClassNode[]): void { + for (const cls of classNodes) { + const fromId = ensureNode(context, cls.def); + if (context.sup.id === "java") { + const superClass = findFirstNodeByType(cls.node, "superclass"); + const superNode = superClass?.childForFieldName("name") ?? superClass?.namedChildren?.[0] ?? null; + if (superNode) tryResolveNode(context, superNode, fromId, "extends"); + + const interfaces = findFirstNodeByType(cls.node, "super_interfaces"); + if (interfaces) { + const names: string[] = []; + collectIdentifiers(interfaces, context.sup, context.source, names); + for (const name of names) { + const target = context.resolveIdentifier(name); + if (target) recordDefEdge(context, fromId, target, "implements"); + } + } + continue; + } + + if (context.sup.id === "csharp") { + const baseList = findFirstNodeByType(cls.node, "base_list"); + if (baseList) { + const names: string[] = []; + collectIdentifiers(baseList, context.sup, context.source, names); + names.forEach((name, indexWithinList) => { + const target = context.resolveIdentifier(name); + if (!target) return; + recordDefEdge(context, fromId, target, indexWithinList === 0 ? "extends" : "implements"); + }); + } + continue; + } + + const superClause = findFirstNodeByType(cls.node, "extends_clause"); + const superNode = superClause?.namedChildren?.[0] ?? superClause?.child(1); + if (superNode) tryResolveNode(context, superNode, fromId, "extends"); + + const implementsClauses: SyntaxNodeLike[] = []; + collectNodesByType(cls.node, "implements_clause", implementsClauses); + for (const clause of implementsClauses) { + const names: string[] = []; + collectIdentifiers(clause, context.sup, context.source, names); + for (const name of names) { + const target = context.resolveIdentifier(name); + if (target) recordDefEdge(context, fromId, target, "implements"); + } + } + } +} + +export function emitRustImplEdges(context: EdgePassContext, rootNode: SyntaxNodeLike): void { + if (context.sup.id !== "rust") return; + + const walkImpls = (node: SyntaxNodeLike): void => { + if (node.type === "impl_item") { + const typeIdentifiers = node.namedChildren?.filter((child) => child.type === "type_identifier") ?? []; + if (typeIdentifiers.length >= 2) { + const traitName = sliceText(typeIdentifiers[0], context.source); + const typeName = sliceText(typeIdentifiers[1], context.source); + const typeDef = context.resolveIdentifier(typeName); + const traitDef = context.resolveIdentifier(traitName); + if (typeDef && traitDef) { + const fromId = ensureNode(context, typeDef); + recordDefEdge(context, fromId, traitDef, "implements"); + } + } + } + for (const child of node.namedChildren ?? []) walkImpls(child); + }; + walkImpls(rootNode); +} diff --git a/src/graphs/symbol-graph-detailed/importAliases.ts b/src/graphs/symbol-graph-detailed/importAliases.ts new file mode 100644 index 00000000..5bd6e71a --- /dev/null +++ b/src/graphs/symbol-graph-detailed/importAliases.ts @@ -0,0 +1,60 @@ +import type { ModuleIndex, ProjectIndex, ResolvedExport, SymbolDef } from "../../indexer/types.js"; +import type { ImportBinding } from "../../indexer/types.js"; + +export type ImportAliasMaps = { + aliasToTargetDef: Map; + aliasToTargetModule: Map; +}; + +const normalizePath = (file: string) => file.replace(/\\/g, "/"); + +type ResolveExportNamespace = (file: string, exportedName: string) => ResolvedExport | null; +type ResolveExportFrom = (file: string, exportedName: string) => SymbolDef | null; + +function targetModuleForImport(index: ProjectIndex, imp: ImportBinding): ModuleIndex | undefined { + const targetFile = typeof imp.resolved === "string" ? normalizePath(imp.resolved) : undefined; + return targetFile ? index.byFile.get(targetFile) : undefined; +} + +export function buildImportAliasMaps( + index: ProjectIndex, + moduleEntry: ModuleIndex, + resolveExportNamespace: ResolveExportNamespace, + resolveExportFrom: ResolveExportFrom, +): ImportAliasMaps { + const aliasToTargetDef = new Map(); + const aliasToTargetModule = new Map(); + + for (const imp of moduleEntry.imports) { + const targetModule = targetModuleForImport(index, imp); + const targetFile = typeof imp.resolved === "string" ? normalizePath(imp.resolved) : undefined; + if (!targetModule || !targetFile) continue; + if (imp.kind === "named") { + const localFallback = targetModule.locals.find((local) => local.localName === imp.imported); + const fallbackResolved: ResolvedExport | null = localFallback + ? { + kind: "resolved", + def: localFallback, + } + : null; + const resolved = + resolveExportNamespace(targetFile, imp.imported) ?? + fallbackResolved; + if (resolved?.kind === "resolved") { + aliasToTargetDef.set(imp.local, resolved.def); + } else if (resolved?.kind === "namespace") { + aliasToTargetModule.set(imp.local, normalizePath(resolved.file)); + } + } else if (imp.kind === "default") { + const defaultExport = resolveExportFrom(targetFile, "default"); + const fallbackExport = targetModule.exports.find((entry) => entry.type === "local")?.target; + const def = defaultExport ?? fallbackExport; + if (def) aliasToTargetDef.set(imp.local, def); + aliasToTargetModule.set(imp.local, targetFile); + } else if (imp.kind === "namespace") { + aliasToTargetModule.set(imp.localNS, targetFile); + } + } + + return { aliasToTargetDef, aliasToTargetModule }; +} diff --git a/src/graphs/symbol-graph-detailed/memberChains.ts b/src/graphs/symbol-graph-detailed/memberChains.ts new file mode 100644 index 00000000..ebd4d1ac --- /dev/null +++ b/src/graphs/symbol-graph-detailed/memberChains.ts @@ -0,0 +1,51 @@ +import type { LanguageSupport } from "../../languages.js"; +import type { SyntaxNodeLike } from "../../languages/types.js"; +import type { SymbolDef } from "../../indexer/types.js"; +import { sliceText } from "../../util.js"; +import { + collectMemberAccessChain, + memberAccessTraversalTypes, + memberExpressionTypeFor, + memberPropertyIdentifierTypes, +} from "../../util/memberAccess.js"; +import { isIdentifierType } from "./ast.js"; + +export type MemberChainResolver = { + memberExpressionType: string; + propertyIdentifierTypes: string[]; + optionalMemberTypes: Set; + resolveMemberChainTarget: (chainNode: SyntaxNodeLike) => SymbolDef | null; +}; + +export function createMemberChainResolver(args: { + sup: LanguageSupport; + source: string; + constStringOf: Map; + aliasToTargetModule: Map; + resolveMemberPathFromModule: (startFile: string, names: string[]) => SymbolDef | null; +}): MemberChainResolver { + const memberExpressionType = memberExpressionTypeFor(args.sup); + const propertyIdentifierTypes = memberPropertyIdentifierTypes(args.sup); + const optionalMemberTypes = memberAccessTraversalTypes(args.sup); + + const resolveMemberChainTarget = (chainNode: SyntaxNodeLike): SymbolDef | null => { + const chain = collectMemberAccessChain({ + sup: args.sup, + source: args.source, + chainNode, + constStringOf: args.constStringOf, + }); + if (!chain || !isIdentifierType(args.sup, chain.base.type)) return null; + const alias = sliceText(chain.base, args.source); + const targetFile = args.aliasToTargetModule.get(alias); + if (!targetFile) return null; + return args.resolveMemberPathFromModule(targetFile, chain.names); + }; + + return { + memberExpressionType, + propertyIdentifierTypes, + optionalMemberTypes, + resolveMemberChainTarget, + }; +} diff --git a/src/graphs/traversal.ts b/src/graphs/traversal.ts new file mode 100644 index 00000000..63ba80f7 --- /dev/null +++ b/src/graphs/traversal.ts @@ -0,0 +1,120 @@ +import type { FileId, Graph } from "../types.js"; +import { + getForwardNeighbors, + getReverseNeighbors, + graphAdjacencyFor, + type GraphAdjacencyIndex, +} from "./adjacency.js"; +import { getFiniteNonNegativeLimit } from "./limits.js"; + +export type DependencyNode = { file: FileId; depth: number }; + +export function getDependencies( + graph: Graph, + startFile: FileId, + opts: { depth?: number; limit?: number; adjacency?: GraphAdjacencyIndex } = {}, +): DependencyNode[] { + const maxDepth = opts.depth ?? Number.POSITIVE_INFINITY; + const finiteLimit = getFiniteNonNegativeLimit(opts.limit); + const maxResults = finiteLimit ?? Number.POSITIVE_INFINITY; + if (maxResults === 0) { + return []; + } + const out: DependencyNode[] = []; + const visited = new Set(); + const queue: Array<{ file: string; depth: number }> = [{ file: startFile, depth: 0 }]; + const adjacency = opts.adjacency ?? graphAdjacencyFor(graph); + visited.add(startFile); + + let index = 0; + while (index < queue.length) { + const { file, depth } = queue[index++]!; + if (depth > 0) { + out.push({ file, depth }); + if (out.length >= maxResults) { + break; + } + } + if (depth >= maxDepth) continue; + + for (const neighbor of getForwardNeighbors(adjacency, file)) { + if (!visited.has(neighbor)) { + visited.add(neighbor); + queue.push({ file: neighbor, depth: depth + 1 }); + } + } + } + return out; +} + +export function getReverseDependencies( + graph: Graph, + targetFile: FileId, + opts: { depth?: number; limit?: number; adjacency?: GraphAdjacencyIndex } = {}, +): DependencyNode[] { + const maxDepth = opts.depth ?? Number.POSITIVE_INFINITY; + const finiteLimit = getFiniteNonNegativeLimit(opts.limit); + const maxResults = finiteLimit ?? Number.POSITIVE_INFINITY; + if (maxResults === 0) { + return []; + } + const out: DependencyNode[] = []; + const visited = new Set(); + const queue: Array<{ file: string; depth: number }> = [{ file: targetFile, depth: 0 }]; + const adjacency = opts.adjacency ?? graphAdjacencyFor(graph); + visited.add(targetFile); + + let index = 0; + while (index < queue.length) { + const { file, depth } = queue[index++]!; + if (depth > 0) { + out.push({ file, depth }); + if (out.length >= maxResults) { + break; + } + } + if (depth >= maxDepth) continue; + + for (const neighbor of getReverseNeighbors(adjacency, file)) { + if (!visited.has(neighbor)) { + visited.add(neighbor); + queue.push({ file: neighbor, depth: depth + 1 }); + } + } + } + return out; +} + +export function getShortestPath( + graph: Graph, + from: FileId, + to: FileId, + opts: { adjacency?: GraphAdjacencyIndex } = {}, +): FileId[] | null { + const visited = new Map(); + const queue: string[] = [from]; + const adjacency = opts.adjacency ?? graphAdjacencyFor(graph); + visited.set(from, null); + + let index = 0; + while (index < queue.length) { + const current = queue[index++]!; + if (current === to) { + const path: string[] = []; + let pointer: string | null = current; + while (pointer !== null) { + path.push(pointer); + pointer = visited.get(pointer)!; + } + return path.reverse(); + } + + for (const neighbor of getForwardNeighbors(adjacency, current)) { + if (!visited.has(neighbor)) { + visited.set(neighbor, current); + queue.push(neighbor); + } + } + } + return null; +} diff --git a/src/graphs/unresolved.ts b/src/graphs/unresolved.ts new file mode 100644 index 00000000..e7061d0d --- /dev/null +++ b/src/graphs/unresolved.ts @@ -0,0 +1,51 @@ +import { builtinModules } from "node:module"; +import type { FileId, Graph } from "../types.js"; +import { + classifyExternalSpecifier, + type ExternalSpecifierClassification, + type ExternalSpecifierClassificationOptions, +} from "./external-classifier.js"; + +const NODE_BUILTIN_MODULES = new Set([ + ...builtinModules, + ...builtinModules.filter((name) => !name.startsWith("node:")).map((name) => `node:${name}`), +]); + +function isNodeBuiltinSpecifier(specifier: string): boolean { + return NODE_BUILTIN_MODULES.has(specifier); +} + +export type UnresolvedImportOptions = ExternalSpecifierClassificationOptions; + +export function getUnresolvedImports( + graph: Graph, + opts: UnresolvedImportOptions = {}, +): Array<{ + name: string; + importers: Array<{ file: FileId; raw: string }>; +}> { + const unresolved = new Map>(); + const classificationCache = new Map(); + for (const edge of graph.edges) { + if (edge.to.type !== "external") continue; + if (isNodeBuiltinSpecifier(edge.to.name) || isNodeBuiltinSpecifier(edge.raw)) continue; + const classificationKey = `${edge.from}\0${edge.to.name}\0${edge.raw}\0${opts.projectRoot ?? ""}`; + let classification = classificationCache.get(classificationKey); + if (!classification) { + classification = classifyExternalSpecifier({ + raw: edge.raw, + externalName: edge.to.name, + importerFile: edge.from, + options: opts, + }); + classificationCache.set(classificationKey, classification); + } + if (classification.status !== "unresolved") continue; + const importers = unresolved.get(edge.to.name) ?? []; + importers.push({ file: edge.from, raw: edge.raw }); + unresolved.set(edge.to.name, importers); + } + return Array.from(unresolved.entries()) + .map(([name, importers]) => ({ name, importers })) + .sort((left, right) => right.importers.length - left.importers.length); +} diff --git a/src/impact/analyzer.ts b/src/impact/analyzer.ts index 2b1fbd49..364a4ecc 100644 --- a/src/impact/analyzer.ts +++ b/src/impact/analyzer.ts @@ -1,118 +1,13 @@ -import type { FileId, Edge } from "../types.js"; -import type { ProjectIndex, SymbolDef, Reference } from "../indexer.js"; +import type { FileId } from "../types.js"; +import type { ProjectIndex } from "../indexer.js"; import { compileTestPatterns, createIndexTestFileMatcher } from "./testPatterns.js"; -import type { ChangedSymbol, ImpactItem, ImpactReason, ImpactOptions, FileChange, SeverityWeights } from "./types.js"; -import { DEFAULT_SEVERITY_WEIGHTS } from "./types.js"; -import { findReferences } from "../indexer.js"; -import { Semaphore } from "../util/semaphore.js"; +import type { ChangedSymbol, ImpactItem, ImpactOptions, FileChange } from "./types.js"; import { createImpactIgnoreMatcher } from "./path.js"; - -/** - * Priority order for ImpactReason — higher number wins when merging explain.reason. - * Typed as Record so TypeScript enforces exhaustiveness: - * adding a new ImpactReason value will cause a compile error here until it is listed. - */ -const REASON_PRIORITY: Readonly> = { - directRef: 4, - namespaceMember: 3, - importAlias: 2, - exportChain: 1, - transitive: 0, - fileLevelChange: 0, -}; - -/** Explain object for impact severity calculation */ -type SeverityExplain = { - reason?: ImpactReason; - exported?: boolean; - fanIn?: number; - sameFile?: boolean; - typeOnly?: boolean; - depth?: number; - hints?: string[]; -}; - -/** Result of severity calculation with confidence */ -type SeverityResult = { - severity: number; - confidence: number; - explain: SeverityExplain; -}; - -type DependencyStats = { - fanInByFile: Map; - reverseDeps: Map; -}; - -const cachedFanInByGraph = new WeakMap>(); - -const severityWeightKeys: ReadonlyArray = [ - "directRef", - "namespaceMember", - "importAlias", - "transitive", - "exported", - "sameFile", - "typeOnly", - "depthDecay", -]; - -function referenceScanLimitForKeptRefs(maxRefs: number): number { - return Math.max(maxRefs + 50, maxRefs * 4); -} - -function normalizeSeverityWeights(weights: SeverityWeights): SeverityWeights { - const normalized: SeverityWeights = { ...DEFAULT_SEVERITY_WEIGHTS }; - const invalidEntries: string[] = []; - - for (const key of severityWeightKeys) { - const value = weights[key]; - if (!Number.isFinite(value) || value <= 0) { - invalidEntries.push(`${key}=${String(value)}`); - continue; - } - normalized[key] = value; - } - - if (normalized.depthDecay >= 1) { - invalidEntries.push(`depthDecay=${String(weights.depthDecay)}`); - } - - if (invalidEntries.length) { - throw new RangeError(`Invalid severity weights: ${invalidEntries.join(", ")}`); - } - - return normalized; -} - -function getCachedFanInByFile(index: ProjectIndex): Map { - const cached = cachedFanInByGraph.get(index.graph); - if (cached) return cached; - const { fanInByFile } = buildDependencyStats(index.graph.edges); - cachedFanInByGraph.set(index.graph, fanInByFile); - return fanInByFile; -} - -function buildDependencyStats(edges: Edge[]): DependencyStats { - const fanInByFile = new Map(); - const reverseDeps = new Map(); - - for (const edge of edges) { - if (edge.to.type !== "file") continue; - - const nextCount = (fanInByFile.get(edge.to.path) ?? 0) + 1; - fanInByFile.set(edge.to.path, nextCount); - - const incoming = reverseDeps.get(edge.to.path); - if (incoming) { - incoming.push(edge); - continue; - } - reverseDeps.set(edge.to.path, [edge]); - } - - return { fanInByFile, reverseDeps }; -} +import { analyzeDirectReferences } from "./direct.js"; +import { analyzeTransitiveImpact, seedTransitiveFromFiles } from "./transitive.js"; +import { buildDependencyStats } from "./severity.js"; +export { calculateSeverity, calculateTransitiveSeverity } from "./severity.js"; +export { seedTransitiveFromFiles } from "./transitive.js"; export async function analyzeImpact( index: ProjectIndex, @@ -177,143 +72,26 @@ export async function analyzeImpact( // Filter out changed symbols in ignored files const filteredChangedSymbols = changedSymbols.filter((s) => !isIgnored(s.file)); + const directOptions = { + maxRefs, + includeTests, + ...(refContext !== undefined ? { refContext } : {}), + ...(refContextLines !== undefined ? { refContextLines } : {}), + ...(refBlockMaxLines !== undefined ? { refBlockMaxLines } : {}), + ...(diagnostics !== undefined ? { diagnostics } : {}), + }; - // Direct impact analysis with bounded concurrency. - // Use a Semaphore so that slow tasks release their slot immediately rather than - // holding up a whole batch (which the old slice-based loop would do). - const semaphore = new Semaphore(8); - const tasks: Array> = []; - - for (const changedSymbol of filteredChangedSymbols) { - if (processedSymbols.has(changedSymbol.id)) continue; - processedSymbols.add(changedSymbol.id); - - tasks.push( - semaphore.withPermit(async () => { - const refs = await findReferences( - index, - { - def: { - file: changedSymbol.file, - localName: changedSymbol.name, - kind: changedSymbol.kind, - range: changedSymbol.range, - } as SymbolDef, - }, - refContext - ? { - context: refContext, - ...(refContextLines !== undefined && { - lines: refContextLines, - }), - ...(refBlockMaxLines !== undefined && { - blockMaxLines: refBlockMaxLines, - }), - maxReferences: referenceScanLimitForKeptRefs(maxRefs), - } - : { maxReferences: referenceScanLimitForKeptRefs(maxRefs) }, - ); - - if (refs.status === "ok") { - let keptRefs = 0; - for (let refIndex = 0; refIndex < refs.references.length; refIndex += 1) { - const ref = refs.references[refIndex]!; - if (diagnostics) diagnostics.refsScanned += 1; - if (!includeTests && isIndexTestFile(ref.file)) { - if (diagnostics) diagnostics.refsFilteredTests += 1; - continue; - } - if (isIgnored(ref.file)) { - if (diagnostics) diagnostics.refsFilteredIgnored += 1; - continue; - } - if (keptRefs >= maxRefs) { - if (diagnostics) { - diagnostics.refsDroppedByMaxRefs += refs.references.length - refIndex; - } - break; - } - keptRefs += 1; - - // Determine the reason for this reference (sync, before await) - let reason: ImpactReason = "directRef"; - if (ref.via?.namespaceMember) { - reason = "namespaceMember"; - } else if (ref.via?.import) { - reason = "importAlias"; - } - - const severityResult = calculateSeverity(changedSymbol, ref, [reason], 0, index, fanInByFile); - - // Re-read existing AFTER the await: concurrent semaphore tasks may - // have written to the same file entry while we were awaiting above. - const existing = impacted.get(ref.file); - const reasons: ImpactReason[] = existing?.reasons ? [...existing.reasons] : []; - if (!reasons.includes(reason)) { - reasons.push(reason); - } - - const symbols = existing?.symbols ? [...existing.symbols] : []; - if (!symbols.includes(changedSymbol.name)) { - symbols.push(changedSymbol.name); - } - - const existingRefs = existing?.refs ? [...existing.refs] : []; - if (refContext && ref.context !== undefined) { - existingRefs.push({ range: ref.range, context: ref.context }); - } - - // Merge hints from existing explain with new hints so no - // accumulated hint is lost when multiple symbols impact the same file. - const existingHints = existing?.explain?.hints ?? []; - const newHints = severityResult.explain.hints ?? []; - const mergedHints = - !existingHints.length && !newHints.length - ? undefined - : [...new Set([...existingHints, ...newHints])]; - - // Preserve the strongest explain.reason seen so far. Spreading - // severityResult.explain unconditionally could downgrade a prior - // directRef reason to importAlias when a weaker ref is processed later. - const existingReason = existing?.explain?.reason; - const newReason = severityResult.explain.reason; - let bestReason = existingReason; - if (bestReason === undefined) { - bestReason = newReason; - } else if (newReason !== undefined && REASON_PRIORITY[newReason] > REASON_PRIORITY[bestReason]) { - bestReason = newReason; - } - - const impactItem: ImpactItem = { - file: ref.file, - symbols, - reasons, - severity: Math.max(existing?.severity ?? 0, severityResult.severity), - depth: 0, - ...(refContext && existingRefs.length ? { refs: existingRefs } : {}), - explain: { - ...existing?.explain, - ...severityResult.explain, - ...(bestReason !== undefined && { reason: bestReason }), - ...(mergedHints && { hints: mergedHints }), - refsCount: (existing?.explain?.refsCount ?? 0) + 1, - }, - confidence: Math.max(existing?.confidence ?? 0, severityResult.confidence), - }; - - if (changedSymbol.typeOnly !== undefined) { - impactItem.typeOnly = changedSymbol.typeOnly; - } - - impacted.set(ref.file, impactItem); - emitImpactItem(impactItem, "partial"); - } - } - }), - ); - } - - await Promise.all(tasks); + await analyzeDirectReferences({ + index, + changedSymbols: filteredChangedSymbols, + impacted, + processedSymbols, + isIndexTestFile, + isIgnored, + fanInByFile, + options: directOptions, + emitImpactItem, + }); // Seed transitive impact from changed files. This is NOT redundant with // analyzeTransitiveImpact below: deleted/renamed files produce no changedSymbols @@ -335,312 +113,3 @@ export async function analyzeImpact( } return sorted; } - -function getDependentFiles(index: ProjectIndex, filePath: FileId, reverseDeps?: Map): FileId[] { - if (reverseDeps) { - return reverseDeps.get(filePath)?.map((edge) => edge.from) ?? []; - } - return index.graph.edges - .filter((edge) => edge.to.type === "file" && edge.to.path === filePath) - .map((edge) => edge.from); -} - -export function seedTransitiveFromFiles( - index: ProjectIndex, - impacted: Map, - changedFiles: FileChange[], - options: Partial & { projectRoot?: string }, - reverseDeps?: Map, - emitImpactItem?: (item: ImpactItem, phase: "partial" | "final") => void, -): void { - const { includeTests = false, testPatterns, ignoreGlobs = [] } = options; - const projectRoot = - options.projectRoot ?? index.projectRoot ?? index.projectFiles?.find((entry) => entry.projectRoot)?.projectRoot; - const patternMatchers = compileTestPatterns(testPatterns); - const isIndexTestFile = createIndexTestFileMatcher(index, patternMatchers, projectRoot); - const fallbackPathSet = new Set(options.fileLevelFallbackPaths ?? []); - const diagnostics = options.diagnostics; - const isIgnored = projectRoot ? createImpactIgnoreMatcher(projectRoot, ignoreGlobs) : () => false; - - for (const fileChange of changedFiles) { - if (isIgnored(fileChange.path)) continue; - - // Seed impact for modified (file-level fallback), deleted, and renamed files based on dependents - - const shouldSeedModifiedFallback = - fileChange.kind === "modified" && - options.fileLevelFallback && - (fallbackPathSet.has(fileChange.path) || - fileChange.isBinary || - fileChange.modeChanged || - !fileChange.hunks.length); - - if (shouldSeedModifiedFallback) { - if (impacted.has(fileChange.path)) continue; - const dependents = getDependentFiles(index, fileChange.path, reverseDeps); - if (dependents.length) { - if (diagnostics) diagnostics.fallbackSeededFiles += 1; - } - - for (const dependent of dependents) { - if (!includeTests && isIndexTestFile(dependent)) continue; - if (impacted.has(dependent) || isIgnored(dependent)) continue; - - impacted.set(dependent, { - file: dependent, - symbols: [], - reasons: ["fileLevelChange"], - severity: 0.45, - depth: 1, - explain: { - reason: "fileLevelChange", - depth: 1, - hints: ["changedFileNoSymbols"], - }, - confidence: 0.5, - }); - emitImpactItem?.(impacted.get(dependent)!, "partial"); - if (diagnostics) diagnostics.fallbackSeededDependents += 1; - } - } else if (fileChange.kind === "deleted" || fileChange.kind === "renamed") { - const lookupPaths = - fileChange.kind === "renamed" && fileChange.oldPath ? [fileChange.oldPath, fileChange.path] : [fileChange.path]; - const dependentSet = new Set(); - for (const lookupPath of lookupPaths) { - for (const dependent of getDependentFiles(index, lookupPath, reverseDeps)) { - dependentSet.add(dependent); - } - } - const dependents = [...dependentSet]; - if (dependents.length) { - if (diagnostics) diagnostics.fallbackSeededFiles += 1; - } - - for (const dependent of dependents) { - if (!includeTests && isIndexTestFile(dependent)) continue; - if (impacted.has(dependent) || isIgnored(dependent)) continue; - - const hints = ["fileDeleted"]; - if (fileChange.kind === "renamed") { - hints.push("fileRenamed"); - } - - const impactItem: ImpactItem = { - file: dependent, - symbols: [], - reasons: ["transitive"], - severity: 0.6, // Moderate severity for file-level changes - depth: 1, - explain: { - reason: "transitive", - depth: 1, - hints, - }, - confidence: 0.5, - }; - - impacted.set(dependent, impactItem); - emitImpactItem?.(impactItem, "partial"); - if (diagnostics) diagnostics.fallbackSeededDependents += 1; - } - } - } -} - -function analyzeTransitiveImpact( - impacted: Map, - maxDepth: number, - options: Partial & { projectRoot?: string }, - isIndexTestFile: (file: FileId) => boolean, - reverseDeps: Map, - emitImpactItem?: (item: ImpactItem, phase: "partial" | "final") => void, -): void { - const { ignoreGlobs = [] } = options; - const isIgnored = options.projectRoot ? createImpactIgnoreMatcher(options.projectRoot, ignoreGlobs) : () => false; - - const visited = new Set(); - const queue: Array<{ file: FileId; depth: number; reason: ImpactReason }> = []; - - // Initialize queue with directly impacted files - for (const [file] of impacted) { - if (isIgnored(file)) continue; - visited.add(file); - queue.push({ file, depth: 0, reason: "transitive" }); - } - - let qi = 0; - while (qi < queue.length) { - const { file, depth, reason } = queue[qi++]!; - if (depth >= maxDepth) continue; - - // Find files that depend on this file using reverse index - const edgesIn = reverseDeps.get(file) || []; - for (const edge of edgesIn) { - const dependentFile = edge.from; - if ( - visited.has(dependentFile) || - (!options.includeTests && isIndexTestFile(dependentFile)) || - isIgnored(dependentFile) - ) - continue; - - visited.add(dependentFile); - - const existing = impacted.get(dependentFile); - const reasons = existing?.reasons || []; - if (!reasons.includes(reason)) { - reasons.push(reason); - } - - const severity = calculateTransitiveSeverity(edge, depth + 1); - const upstreamConfidence = impacted.get(file)?.confidence ?? 0.6; - const nextConfidence = Math.max( - 0.2, - Math.min(1, upstreamConfidence * (edge.typeOnly ? 0.75 : 0.85) * Math.pow(0.95, depth)), - ); - - // Calculate fan-in for transitive items too - const fanIn = reverseDeps.get(dependentFile)?.length || 0; - - const transitiveItem: ImpactItem = { - file: dependentFile, - symbols: existing?.symbols || [], - reasons, - severity: Math.max(existing?.severity || 0, severity), - depth: depth + 1, - explain: { - ...existing?.explain, - reason, - depth: depth + 1, - ...(fanIn > 0 && { fanIn }), - }, - confidence: Math.max(existing?.confidence ?? 0, nextConfidence), - }; - - if (edge.typeOnly !== undefined) { - transitiveItem.typeOnly = edge.typeOnly; - if (transitiveItem.explain) { - transitiveItem.explain.typeOnly = edge.typeOnly; - } - } - - impacted.set(dependentFile, transitiveItem); - emitImpactItem?.(transitiveItem, "partial"); - - queue.push({ - file: dependentFile, - depth: depth + 1, - reason: "exportChain", - }); - } - } -} - -export function calculateSeverity( - changedSymbol: ChangedSymbol, - ref: Reference, - reasons: ImpactReason[], - depth: number, - index: ProjectIndex, - fanInByFile?: Map, - weights: SeverityWeights = DEFAULT_SEVERITY_WEIGHTS, -): SeverityResult { - const validatedWeights = normalizeSeverityWeights(weights); - - let score = 1.0; - let confidence = 1.0; // Start with high confidence - const explain: SeverityExplain = {}; - const hints: string[] = []; - - // Primary reason (use configurable weights) - if (reasons.includes("directRef")) { - score *= validatedWeights.directRef; - explain.reason = "directRef"; - confidence = 1.0; // Direct reference = highest confidence - } else if (reasons.includes("namespaceMember")) { - score *= validatedWeights.namespaceMember; - explain.reason = "namespaceMember"; - confidence = 0.9; // Namespace access is fairly reliable - } else if (reasons.includes("importAlias")) { - score *= validatedWeights.importAlias; - explain.reason = "importAlias"; - confidence = 0.85; // Import alias tracking is reliable - } else if (reasons.includes("fileLevelChange")) { - score *= validatedWeights.transitive * 0.9; - explain.reason = "fileLevelChange"; - confidence = 0.5; - } else { - score *= validatedWeights.transitive; - explain.reason = "transitive"; - confidence = 0.6; // Transitive impact is less certain - } - - // Exported symbols are more important (configurable) - if (changedSymbol.exported) { - score *= validatedWeights.exported; - explain.exported = true; - } - - // Calculate fan-in (how many files depend on the impacted file) - const fanInCounts = fanInByFile ?? getCachedFanInByFile(index); - const fanIn = fanInCounts.get(ref.file) ?? 0; - if (fanIn > 0) { - const fanInFactor = 1 + Math.min(Math.log10(fanIn + 1), 1); // Cap at doubling - score *= fanInFactor; - explain.fanIn = fanIn; - } - - // Same-file references are more important (configurable) - if (ref.file === changedSymbol.file) { - score *= validatedWeights.sameFile; - explain.sameFile = true; - } - - // Type-only changes are less severe (configurable) - if (changedSymbol.typeOnly) { - score *= validatedWeights.typeOnly; - explain.typeOnly = true; - } - - // Generate hints based on changed symbol characteristics - if (changedSymbol.exported) { - hints.push("exportChanged"); - } - - // signatureChanged is pre-computed once per symbol in locateChangedSymbolsWithLines - // (via computeSignatureChanged) so we don't re-parse the AST for every reference. - if (changedSymbol.signatureChanged) { - hints.push("signatureChanged"); - } - - if (hints.length) { - explain.hints = hints; - } - - // Depth decay (configurable) - score *= Math.pow(validatedWeights.depthDecay, depth); - explain.depth = depth; - - // Reduce confidence for deeper transitive impacts - confidence *= Math.pow(0.9, depth); - - return { - severity: Math.min(1.0, Math.max(0.0, score)), - confidence: Math.min(1.0, Math.max(0.0, confidence)), - explain, - }; -} - -function calculateTransitiveSeverity(edge: Edge, depth: number): number { - let score = 0.3; // Base transitive score - - // Type-only edges are less severe - if (edge.typeOnly) { - score *= 0.6; - } - - // Depth decay - score *= Math.pow(0.7, depth); - - return score; -} diff --git a/src/impact/direct.ts b/src/impact/direct.ts new file mode 100644 index 00000000..b4847219 --- /dev/null +++ b/src/impact/direct.ts @@ -0,0 +1,154 @@ +import type { FileId } from "../types.js"; +import type { ProjectIndex, SymbolDef } from "../indexer.js"; +import { findReferences } from "../indexer.js"; +import { Semaphore } from "../util/semaphore.js"; +import type { ChangedSymbol, ImpactItem, ImpactOptions, ImpactReason } from "./types.js"; +import { calculateSeverity, selectStrongerImpactReason } from "./severity.js"; + +type ImpactEmitter = (item: ImpactItem, phase: "partial" | "final") => void; + +export type DirectImpactOptions = Pick< + ImpactOptions, + "refContext" | "refContextLines" | "refBlockMaxLines" | "diagnostics" +> & { + maxRefs: number; + includeTests: boolean; +}; + +export type DirectImpactContext = { + index: ProjectIndex; + changedSymbols: ChangedSymbol[]; + impacted: Map; + processedSymbols: Set; + isIndexTestFile: (file: FileId) => boolean; + isIgnored: (file: FileId) => boolean; + fanInByFile: Map; + options: DirectImpactOptions; + emitImpactItem: ImpactEmitter; +}; + +function referenceScanLimitForKeptRefs(maxRefs: number): number { + return Math.max(maxRefs + 50, maxRefs * 4); +} + +export async function analyzeDirectReferences(context: DirectImpactContext): Promise { + const semaphore = new Semaphore(8); + const tasks: Array> = []; + + for (const changedSymbol of context.changedSymbols) { + if (context.processedSymbols.has(changedSymbol.id)) continue; + context.processedSymbols.add(changedSymbol.id); + + tasks.push(semaphore.withPermit(async () => analyzeChangedSymbolReferences(context, changedSymbol))); + } + + await Promise.all(tasks); +} + +async function analyzeChangedSymbolReferences( + context: DirectImpactContext, + changedSymbol: ChangedSymbol, +): Promise { + const { index, options } = context; + const refs = await findReferences( + index, + { + def: { + file: changedSymbol.file, + localName: changedSymbol.name, + kind: changedSymbol.kind, + range: changedSymbol.range, + } as SymbolDef, + }, + options.refContext + ? { + context: options.refContext, + ...(options.refContextLines !== undefined && { + lines: options.refContextLines, + }), + ...(options.refBlockMaxLines !== undefined && { + blockMaxLines: options.refBlockMaxLines, + }), + maxReferences: referenceScanLimitForKeptRefs(options.maxRefs), + } + : { maxReferences: referenceScanLimitForKeptRefs(options.maxRefs) }, + ); + + if (refs.status !== "ok") return; + + let keptRefs = 0; + for (let refIndex = 0; refIndex < refs.references.length; refIndex += 1) { + const ref = refs.references[refIndex]!; + const diagnostics = options.diagnostics; + if (diagnostics) diagnostics.refsScanned += 1; + if (!options.includeTests && context.isIndexTestFile(ref.file)) { + if (diagnostics) diagnostics.refsFilteredTests += 1; + continue; + } + if (context.isIgnored(ref.file)) { + if (diagnostics) diagnostics.refsFilteredIgnored += 1; + continue; + } + if (keptRefs >= options.maxRefs) { + if (diagnostics) { + diagnostics.refsDroppedByMaxRefs += refs.references.length - refIndex; + } + break; + } + keptRefs += 1; + + let reason: ImpactReason = "directRef"; + if (ref.via?.namespaceMember) { + reason = "namespaceMember"; + } else if (ref.via?.import) { + reason = "importAlias"; + } + + const severityResult = calculateSeverity(changedSymbol, ref, [reason], 0, index, context.fanInByFile); + const existing = context.impacted.get(ref.file); + const reasons: ImpactReason[] = existing?.reasons ? [...existing.reasons] : []; + if (!reasons.includes(reason)) { + reasons.push(reason); + } + + const symbols = existing?.symbols ? [...existing.symbols] : []; + if (!symbols.includes(changedSymbol.name)) { + symbols.push(changedSymbol.name); + } + + const existingRefs = existing?.refs ? [...existing.refs] : []; + if (options.refContext && ref.context !== undefined) { + existingRefs.push({ range: ref.range, context: ref.context }); + } + + const existingHints = existing?.explain?.hints ?? []; + const newHints = severityResult.explain.hints ?? []; + const mergedHints = + !existingHints.length && !newHints.length ? undefined : [...new Set([...existingHints, ...newHints])]; + const bestReason = selectStrongerImpactReason(existing?.explain?.reason, severityResult.explain.reason); + + const impactItem: ImpactItem = { + file: ref.file, + symbols, + reasons, + severity: Math.max(existing?.severity ?? 0, severityResult.severity), + depth: 0, + ...(options.refContext && existingRefs.length ? { refs: existingRefs } : {}), + explain: { + ...existing?.explain, + ...severityResult.explain, + ...(bestReason !== undefined && { reason: bestReason }), + ...(mergedHints && { hints: mergedHints }), + refsCount: (existing?.explain?.refsCount ?? 0) + 1, + }, + confidence: Math.max(existing?.confidence ?? 0, severityResult.confidence), + }; + + if (changedSymbol.typeOnly !== undefined) { + impactItem.typeOnly = changedSymbol.typeOnly; + } + + context.impacted.set(ref.file, impactItem); + context.emitImpactItem(impactItem, "partial"); + } +} diff --git a/src/impact/map.ts b/src/impact/map.ts index d0f32013..6065c470 100644 --- a/src/impact/map.ts +++ b/src/impact/map.ts @@ -1,6 +1,13 @@ import type { FileId } from "../types.js"; import type { ProjectIndex, SymbolDef, SymbolHandle } from "../indexer.js"; import { ensureParsedContext } from "../indexer.js"; +import { + buildTrackedSymbolPositions, + findLocalByStartPosition, + findTrackedDeclarationNameInAncestors, + isProjectSymbolExported, + symbolHandleFromLocal, +} from "../indexer/declarations.js"; import { isGraphOnlyLanguage } from "../documentLinks.js"; import { supportForFile } from "../languages.js"; import type { LanguageSupport } from "../languages.js"; @@ -10,11 +17,6 @@ import { collectChangedLines } from "./hunks.js"; export { collectChangedLines } from "./hunks.js"; -function symbolHandleFromLocal(file: FileId, local: SymbolDef): string { - const index = local.range.start.index ?? 0; - return `${file}::${local.localName}::${index}`; -} - export async function locateChangedSymbols( index: ProjectIndex, file: FileId, @@ -69,7 +71,7 @@ export async function locateChangedSymbolsWithLines( // Pre-build an O(1) position lookup so findDeclarationNameInAncestors does // not do an O(locals) scan for every candidate declaration name node. - const trackedPositions = mod ? buildTrackedPositions(mod.locals) : undefined; + const trackedPositions = mod ? buildTrackedSymbolPositions(mod.locals) : undefined; for (const node of changedNodes) { const classification = classifyChangedNode(node, source, sup); @@ -112,7 +114,7 @@ export async function locateChangedSymbolsWithLines( file, name: entry.symbolDef.localName, kind: entry.symbolDef.kind, - exported: isExported(index, file, entry.symbolDef), + exported: isProjectSymbolExported(index, file, entry.symbolDef), range: entry.symbolDef.range, typeOnly: entry.typeOnly, changedLines: [...entry.lines].sort((a, b) => a - b), @@ -146,7 +148,7 @@ export async function mapChangedLinesToSymbols( const changedLines = changedLinesOverride ?? collectChangedLines(hunks); const mod = index.byFile.get(file); - const trackedPositions = mod ? buildTrackedPositions(mod.locals) : undefined; + const trackedPositions = mod ? buildTrackedSymbolPositions(mod.locals) : undefined; const nodes = findNodesInLines(tree, changedLines); const linesByHandle = new Map>(); @@ -291,41 +293,6 @@ function isTypeOnlyDeclaration(node: SyntaxNodeLike, source: string): boolean { return false; } -/** Build an O(1)-lookup set of tracked symbol positions ("line:col") from locals. */ -function buildTrackedPositions(locals: readonly SymbolDef[]): Set { - const set = new Set(); - for (const l of locals) { - set.add(`${l.range.start.line}:${l.range.start.column}`); - } - return set; -} - -function findDeclarationNameInAncestors( - node: SyntaxNodeLike, - sup: LanguageSupport, - trackedPositions?: ReadonlySet, -): SyntaxNodeLike | null { - let cur: SyntaxNodeLike | null = node; - while (cur) { - for (const ch of cur.namedChildren || []) { - if (sup.isDeclarationName?.(ch)) { - // If we have a tracked-position set, only stop at names that are - // actually in the index. This prevents the search from halting at - // declaration names for symbols not tracked as separate locals (e.g. - // class method names) and allows climbing to a tracked ancestor instead. - if (trackedPositions) { - const line = (ch.startPosition?.row ?? 0) + 1; - const col = (ch.startPosition?.column ?? 0) + 1; - if (!trackedPositions.has(`${line}:${col}`)) continue; - } - return ch; - } - } - cur = cur.parent; - } - return null; -} - const SIGNATURE_DECL_TYPES = new Set([ "function_declaration", "function_definition", @@ -491,9 +458,7 @@ function findSymbolHandleForNode( if (classification?.type === "definition" && isDefinitionNameNode(node, sup, source)) { const definitionLine = node.startPosition?.row + 1; const definitionColumn = node.startPosition?.column + 1; - const local = mod.locals.find( - (l) => l.range.start.line === definitionLine && l.range.start.column === definitionColumn, - ); + const local = findLocalByStartPosition(mod.locals, definitionLine, definitionColumn); if (local) { return symbolHandleFromLocal(file, local); } @@ -507,32 +472,17 @@ function findSymbolHandleForNode( // Pass trackedPositions (pre-built from mod.locals) so the search skips // untracked names (e.g., method names when methods are not in locals) and // continues climbing to a tracked ancestor. - const nameNode = findDeclarationNameInAncestors(node, sup, trackedPositions); + const nameNode = findTrackedDeclarationNameInAncestors(node, sup, trackedPositions); if (nameNode) { const ancestorLine = nameNode.startPosition?.row + 1; const ancestorColumn = nameNode.startPosition?.column + 1; - const local = mod.locals.find( - (l) => l.range.start.line === ancestorLine && l.range.start.column === ancestorColumn, - ); + const local = findLocalByStartPosition(mod.locals, ancestorLine, ancestorColumn); return local ? symbolHandleFromLocal(file, local) : null; } return null; } -function isExported(index: ProjectIndex, file: FileId, symbolDef: SymbolDef): boolean { - const mod = index.byFile.get(file); - if (!mod) return false; - - const symbolIndex = symbolDef.range.start.index ?? 0; - return mod.exports.some( - (e) => - e.type === "local" && - e.target.localName === symbolDef.localName && - (e.target.range.start.index ?? 0) === symbolIndex, - ); -} - function isStyleDefinitionNode(node: SyntaxNodeLike, sup: LanguageSupport): boolean { const parentType = node.parent?.type ?? ""; if (sup.id === "css" || sup.id === "less") { diff --git a/src/impact/report.ts b/src/impact/report.ts index 60133791..ae00400c 100644 --- a/src/impact/report.ts +++ b/src/impact/report.ts @@ -5,7 +5,6 @@ import type { FileChange, ChangedSymbol, ImpactItem, - ImpactReason, ImpactReport, CompactImpactReport, ImpactOptions, @@ -14,17 +13,16 @@ import type { ReexportChainEntry, ImpactTopItem, ImpactSurfaceArea, - CompactImpactSurfaceArea, ImpactCluster, - CompactImpactCluster, ImpactCycle, ImpactDiagnostics, } from "./types.js"; -import { IMPACT_SCHEMA_VERSION } from "./types.js"; import { buildSymbolGraphDetailed, findDetailedCycles } from "../graphs.js"; import { discoverProjectFiles, normalizePath, resolveFilePathFromRoot } from "../util.js"; import { newFileRangeForHunk } from "./hunks.js"; import { createGraphFileResolver, normalizeImpactFileChange, toImpactReportFilePath } from "./path.js"; +import { buildCompactImpactReport } from "./reportCompact.js"; +import { buildFullImpactReport } from "./reportFull.js"; export { newFileRangeForHunk } from "./hunks.js"; export async function buildImpactReport( @@ -124,7 +122,7 @@ export async function buildImpactReport( // Check if compact format is requested if (options.compact) { - const report = buildCompactReport( + const report = buildCompactImpactReport({ changedFiles, changedSymbols, impactedItems, @@ -139,104 +137,30 @@ export async function buildImpactReport( symbolEdges, projectFiles, displayFile, - ); + }); if (options.warning) report.warning = options.warning; if (diagnostics) report.diagnostics = diagnostics; return report; } - const report: ImpactReport = { - schemaVersion: IMPACT_SCHEMA_VERSION, - format: "full", + return buildFullImpactReport({ projectFiles, changedFiles, - changedSymbols: changedSymbols.map((symbol) => ({ - ...symbol, - file: displayFile(symbol.file), - })), - impacted: impactedItems.map((item) => ({ - ...item, - file: displayFile(item.file), - })), - ...(suggestions.length - ? { - suggestions: suggestions.map((suggestion) => ({ - ...suggestion, - file: displayFile(suggestion.file), - ...(suggestion.relatedFile ? { relatedFile: displayFile(suggestion.relatedFile) } : {}), - })), - } - : {}), - ...(exportSummary.length - ? { - exportSummary: exportSummary.map((entry) => ({ - ...entry, - file: displayFile(entry.file), - })), - } - : {}), - ...(reexportChains - ? { - reexportChains: { - chains: reexportChains.chains.map((entry) => ({ - ...entry, - file: displayFile(entry.file), - paths: entry.paths.map((pathChain) => pathChain.map((file) => displayFile(file))), - })), - }, - } - : {}), - ...(topImpacts.length - ? { - topImpacts: topImpacts.map((item) => ({ - ...item, - file: displayFile(item.file), - })), - } - : {}), - surfaceArea: { - files: surfaceArea.files.map((item) => ({ - ...item, - file: displayFile(item.file), - })), - topFanIn: surfaceArea.topFanIn.map((file) => displayFile(file)), - topFanOut: surfaceArea.topFanOut.map((file) => displayFile(file)), - }, - clusters: clusters.map((cluster) => ({ - ...cluster, - files: cluster.files.map((file) => displayFile(file)), - changedFiles: cluster.changedFiles.map((file) => displayFile(file)), - })), - ...(cycles.length - ? { - cycles: cycles.map((cycle) => ({ - ...cycle, - files: cycle.files.map((file) => displayFile(file)), - entryEdges: cycle.entryEdges.map((edge) => ({ - ...edge, - from: displayFile(edge.from), - to: displayFile(edge.to), - })), - internalEdges: cycle.internalEdges.map((edge) => ({ - ...edge, - from: displayFile(edge.from), - to: displayFile(edge.to), - })), - })), - } - : {}), - graph: { - fileEdges: fileEdges.map((edge) => ({ - ...edge, - from: displayFile(edge.from), - to: displayFile(edge.to), - })), - symbolEdges, - }, - }; - if (diagnostics) report.diagnostics = diagnostics; - if (options.warning) report.warning = options.warning; - return report; + changedSymbols, + impactedItems, + suggestions, + exportSummary, + reexportChains, + topImpacts, + surfaceArea, + clusters, + cycles, + fileEdges, + symbolEdges, + displayFile, + diagnostics, + warning: options.warning, + }); } function buildImpactCycles( @@ -271,272 +195,6 @@ function buildImpactCycles( return out; } -function buildCompactReport( - changedFiles: Array<{ - file: FileId; - hunks: Array<{ start: number; end: number }>; - }>, - changedSymbols: ChangedSymbol[], - impactedItems: ImpactItem[], - suggestions: ImpactSuggestion[], - exportSummary: ExportSummaryEntry[], - reexportChains: { chains: ReexportChainEntry[] } | undefined, - topImpacts: ImpactTopItem[], - surfaceArea: ImpactSurfaceArea, - clusters: ImpactCluster[], - cycles: ImpactCycle[], - fileEdges: Array<{ - from: FileId; - to: FileId; - typeOnly?: boolean | undefined; - }>, - symbolEdges: Array<{ from: number; to: number; label: string }>, - projectFiles: ProjectIndex["projectFiles"], - displayFile: (file: FileId) => FileId, -): CompactImpactReport { - // Collect all unique file paths - const allFiles = new Set(); - - // Add files from changedFiles - for (const cf of changedFiles) { - allFiles.add(displayFile(cf.file)); - } - - // Add files from changedSymbols - for (const cs of changedSymbols) { - allFiles.add(displayFile(cs.file)); - } - - // Add files from impactedItems - for (const ii of impactedItems) { - allFiles.add(displayFile(ii.file)); - } - - // Add files from fileEdges - for (const fe of fileEdges) { - allFiles.add(displayFile(fe.from)); - allFiles.add(displayFile(fe.to)); - } - - // Add files from surface area - for (const item of surfaceArea.files) { - allFiles.add(displayFile(item.file)); - } - for (const file of surfaceArea.topFanIn) { - allFiles.add(displayFile(file)); - } - for (const file of surfaceArea.topFanOut) { - allFiles.add(displayFile(file)); - } - - for (const cycle of cycles) { - for (const file of cycle.files) allFiles.add(displayFile(file)); - } - - // Add files from suggestions - for (const suggestion of suggestions) { - allFiles.add(displayFile(suggestion.file)); - if (suggestion.relatedFile) allFiles.add(displayFile(suggestion.relatedFile)); - } - - if (reexportChains) { - for (const chain of reexportChains.chains) { - allFiles.add(displayFile(chain.file)); - for (const pathChain of chain.paths) { - for (const file of pathChain) { - allFiles.add(displayFile(file)); - } - } - } - } - - const filesArray = Array.from(allFiles); - const fileIndex = new Map(); - for (let i = 0; i < filesArray.length; i++) { - fileIndex.set(filesArray[i]!, i); - } - - // Convert to compact format - const compactChangedFiles = changedFiles.map((cf) => ({ - file: fileIndex.get(displayFile(cf.file))!, - hunks: cf.hunks, - })); - - const compactChangedSymbols = changedSymbols.map((cs) => { - const symbol: { - id: string; - file: number; - name: string; - kind: typeof cs.kind; - exported: boolean; - range: typeof cs.range; - typeOnly?: boolean; - } = { - id: cs.id, - file: fileIndex.get(displayFile(cs.file))!, - name: cs.name, - kind: cs.kind, - exported: cs.exported, - range: cs.range, - }; - - if (cs.typeOnly !== undefined) { - symbol.typeOnly = cs.typeOnly; - } - - return symbol; - }); - - const compactImpacted = impactedItems.map((ii) => { - const item: { - file: number; - symbols: string[]; - reasons: ImpactReason[]; - severity: number; - confidence?: number; - depth?: number; - typeOnly?: boolean; - explain?: NonNullable; - } = { - file: fileIndex.get(displayFile(ii.file))!, - symbols: ii.symbols, - reasons: ii.reasons, - severity: ii.severity, - ...(ii.confidence !== undefined ? { confidence: ii.confidence } : {}), - ...(ii.depth !== undefined ? { depth: ii.depth } : {}), - ...(ii.typeOnly !== undefined ? { typeOnly: ii.typeOnly } : {}), - ...(ii.explain !== undefined ? { explain: ii.explain } : {}), - }; - - return item; - }); - - const compactSuggestions = - suggestions.length - ? suggestions.map((suggestion) => ({ - file: fileIndex.get(displayFile(suggestion.file))!, - kind: suggestion.kind, - ...(suggestion.range ? { range: suggestion.range } : {}), - ...(suggestion.symbol ? { symbol: suggestion.symbol } : {}), - ...(suggestion.relatedFile !== undefined - ? { relatedFile: fileIndex.get(displayFile(suggestion.relatedFile))! } - : {}), - ...(suggestion.details ? { details: suggestion.details } : {}), - confidence: suggestion.confidence, - })) - : undefined; - - const compactExportSummary = - exportSummary.length - ? exportSummary.map((entry) => ({ - file: fileIndex.get(displayFile(entry.file))!, - symbols: entry.symbols, - })) - : undefined; - - const compactReexportChains = reexportChains - ? { - chains: reexportChains.chains.map((entry) => ({ - symbol: entry.symbol, - file: fileIndex.get(displayFile(entry.file))!, - paths: entry.paths.map((pathChain) => pathChain.map((file) => fileIndex.get(displayFile(file))!)), - })), - } - : undefined; - - const compactTopImpacts = - topImpacts.length - ? topImpacts.map((item) => ({ - file: fileIndex.get(displayFile(item.file))!, - symbols: item.symbols, - reasons: item.reasons, - severity: item.severity, - ...(item.confidence !== undefined ? { confidence: item.confidence } : {}), - ...(item.depth !== undefined ? { depth: item.depth } : {}), - ...(item.typeOnly !== undefined ? { typeOnly: item.typeOnly } : {}), - ...(item.explain ? { explain: item.explain } : {}), - })) - : undefined; - - const compactSurfaceArea: CompactImpactSurfaceArea = { - files: surfaceArea.files.map((item) => ({ - file: fileIndex.get(displayFile(item.file))!, - fanIn: item.fanIn, - fanOut: item.fanOut, - changed: item.changed, - impacted: item.impacted, - })), - topFanIn: surfaceArea.topFanIn.map((file) => fileIndex.get(displayFile(file))!), - topFanOut: surfaceArea.topFanOut.map((file) => fileIndex.get(displayFile(file))!), - }; - - const compactClusters: CompactImpactCluster[] = clusters.map((cluster) => ({ - id: cluster.id, - files: cluster.files.map((file) => fileIndex.get(displayFile(file))!), - changedFiles: cluster.changedFiles.map((file) => fileIndex.get(displayFile(file))!), - totalSeverity: cluster.totalSeverity, - })); - - const compactCycles = - cycles.length - ? cycles.map((cycle) => ({ - files: cycle.files.map((file) => fileIndex.get(displayFile(file))!), - entryEdges: cycle.entryEdges.map((edge) => ({ - from: fileIndex.get(displayFile(edge.from))!, - to: fileIndex.get(displayFile(edge.to))!, - raw: edge.raw, - ...(edge.typeOnly !== undefined ? { typeOnly: edge.typeOnly } : {}), - })), - internalEdges: cycle.internalEdges.map((edge) => ({ - from: fileIndex.get(displayFile(edge.from))!, - to: fileIndex.get(displayFile(edge.to))!, - raw: edge.raw, - ...(edge.typeOnly !== undefined ? { typeOnly: edge.typeOnly } : {}), - })), - fileCount: cycle.fileCount, - internalEdgeCount: cycle.internalEdgeCount, - fanInFromOutside: cycle.fanInFromOutside, - priorityScore: cycle.priorityScore, - remediationHint: cycle.remediationHint, - touchesChangedFile: cycle.touchesChangedFile, - touchesImpactedFile: cycle.touchesImpactedFile, - severity: cycle.severity, - })) - : undefined; - - const compactFileEdges = fileEdges.map((fe) => { - const edge: { from: number; to: number; typeOnly?: boolean } = { - from: fileIndex.get(displayFile(fe.from))!, - to: fileIndex.get(displayFile(fe.to))!, - }; - if (fe.typeOnly !== undefined) { - edge.typeOnly = fe.typeOnly; - } - return edge; - }); - - return { - schemaVersion: IMPACT_SCHEMA_VERSION, - format: "compact", - ...(projectFiles ? { projectFiles } : {}), - files: filesArray, - changedFiles: compactChangedFiles, - changedSymbols: compactChangedSymbols, - impacted: compactImpacted, - ...(compactSuggestions ? { suggestions: compactSuggestions } : {}), - ...(compactExportSummary ? { exportSummary: compactExportSummary } : {}), - ...(compactReexportChains ? { reexportChains: compactReexportChains } : {}), - ...(compactTopImpacts ? { topImpacts: compactTopImpacts } : {}), - surfaceArea: compactSurfaceArea, - clusters: compactClusters, - ...(compactCycles ? { cycles: compactCycles } : {}), - graph: { - fileEdges: compactFileEdges, - symbolEdges, - }, - }; -} - type ReexportEdge = { exporter: FileId; type: "reexport" | "exportStar" | "namespaceReexport"; diff --git a/src/impact/reportCompact.ts b/src/impact/reportCompact.ts new file mode 100644 index 00000000..4484726d --- /dev/null +++ b/src/impact/reportCompact.ts @@ -0,0 +1,298 @@ +import type { FileId } from "../types.js"; +import type { ProjectIndex } from "../indexer.js"; +import { IMPACT_SCHEMA_VERSION } from "./types.js"; +import type { + ChangedSymbol, + CompactImpactCluster, + CompactImpactReport, + CompactImpactSurfaceArea, + ExportSummaryEntry, + ImpactCluster, + ImpactCycle, + ImpactItem, + ImpactReason, + ImpactSuggestion, + ImpactSurfaceArea, + ImpactTopItem, + ReexportChainEntry, +} from "./types.js"; + +export type CompactImpactReportParts = { + changedFiles: Array<{ + file: FileId; + hunks: Array<{ start: number; end: number }>; + }>; + changedSymbols: ChangedSymbol[]; + impactedItems: ImpactItem[]; + suggestions: ImpactSuggestion[]; + exportSummary: ExportSummaryEntry[]; + reexportChains: { chains: ReexportChainEntry[] } | undefined; + topImpacts: ImpactTopItem[]; + surfaceArea: ImpactSurfaceArea; + clusters: ImpactCluster[]; + cycles: ImpactCycle[]; + fileEdges: Array<{ + from: FileId; + to: FileId; + typeOnly?: boolean | undefined; + }>; + symbolEdges: Array<{ from: number; to: number; label: string }>; + projectFiles: ProjectIndex["projectFiles"]; + displayFile: (file: FileId) => FileId; +}; + +export function buildCompactImpactReport(parts: CompactImpactReportParts): CompactImpactReport { + const context = buildCompactSerializerContext(parts); + + return { + schemaVersion: IMPACT_SCHEMA_VERSION, + format: "compact", + ...(parts.projectFiles ? { projectFiles: parts.projectFiles } : {}), + files: context.files, + changedFiles: parts.changedFiles.map((fileChange) => ({ + file: context.fileId(fileChange.file), + hunks: fileChange.hunks, + })), + changedSymbols: parts.changedSymbols.map((symbol) => compactChangedSymbol(symbol, context.fileId(symbol.file))), + impacted: parts.impactedItems.map((item) => compactImpactItem(item, context.fileId(item.file))), + ...buildCompactSuggestions(parts.suggestions, context.fileId), + ...buildCompactExportSummary(parts.exportSummary, context.fileId), + ...buildCompactReexportChains(parts.reexportChains, context.fileId), + ...buildCompactTopImpacts(parts.topImpacts, context.fileId), + surfaceArea: buildCompactSurfaceArea(parts.surfaceArea, context.fileId), + clusters: buildCompactClusters(parts.clusters, context.fileId), + ...buildCompactCycles(parts.cycles, context.fileId), + graph: { + fileEdges: parts.fileEdges.map((edge) => { + const compactEdge: { from: number; to: number; typeOnly?: boolean } = { + from: context.fileId(edge.from), + to: context.fileId(edge.to), + }; + if (edge.typeOnly !== undefined) { + compactEdge.typeOnly = edge.typeOnly; + } + return compactEdge; + }), + symbolEdges: parts.symbolEdges, + }, + }; +} + +type CompactSerializerContext = { + files: FileId[]; + fileId: (file: FileId) => number; +}; + +function buildCompactSerializerContext(parts: CompactImpactReportParts): CompactSerializerContext { + const allFiles = new Set(); + const addFile = (file: FileId): void => { + allFiles.add(parts.displayFile(file)); + }; + + for (const fileChange of parts.changedFiles) addFile(fileChange.file); + for (const symbol of parts.changedSymbols) addFile(symbol.file); + for (const item of parts.impactedItems) addFile(item.file); + for (const edge of parts.fileEdges) { + addFile(edge.from); + addFile(edge.to); + } + for (const item of parts.surfaceArea.files) addFile(item.file); + for (const file of parts.surfaceArea.topFanIn) addFile(file); + for (const file of parts.surfaceArea.topFanOut) addFile(file); + for (const cycle of parts.cycles) { + for (const file of cycle.files) addFile(file); + } + for (const suggestion of parts.suggestions) { + addFile(suggestion.file); + if (suggestion.relatedFile) addFile(suggestion.relatedFile); + } + if (parts.reexportChains) { + for (const chain of parts.reexportChains.chains) { + addFile(chain.file); + for (const pathChain of chain.paths) { + for (const file of pathChain) addFile(file); + } + } + } + + const files = Array.from(allFiles); + const fileIndex = new Map(); + for (let i = 0; i < files.length; i++) { + fileIndex.set(files[i]!, i); + } + return { + files, + fileId: (file: FileId): number => { + const id = fileIndex.get(parts.displayFile(file)); + if (id === undefined) { + throw new Error(`Missing file path in compact impact report index: ${file}`); + } + return id; + }, + }; +} + +function compactChangedSymbol(symbol: ChangedSymbol, file: number): CompactImpactReport["changedSymbols"][number] { + const compact: CompactImpactReport["changedSymbols"][number] = { + id: symbol.id, + file, + name: symbol.name, + kind: symbol.kind, + exported: symbol.exported, + range: symbol.range, + }; + if (symbol.typeOnly !== undefined) { + compact.typeOnly = symbol.typeOnly; + } + return compact; +} + +function compactImpactItem(item: ImpactItem, file: number): CompactImpactReport["impacted"][number] { + const compact: { + file: number; + symbols: string[]; + reasons: ImpactReason[]; + severity: number; + confidence?: number; + depth?: number; + typeOnly?: boolean; + explain?: NonNullable; + } = { + file, + symbols: item.symbols, + reasons: item.reasons, + severity: item.severity, + ...(item.confidence !== undefined ? { confidence: item.confidence } : {}), + ...(item.depth !== undefined ? { depth: item.depth } : {}), + ...(item.typeOnly !== undefined ? { typeOnly: item.typeOnly } : {}), + ...(item.explain !== undefined ? { explain: item.explain } : {}), + }; + return compact; +} + +function buildCompactSuggestions( + suggestions: ImpactSuggestion[], + fileId: (file: FileId) => number, +): Pick { + if (!suggestions.length) return {}; + return { + suggestions: suggestions.map((suggestion) => ({ + file: fileId(suggestion.file), + kind: suggestion.kind, + ...(suggestion.range ? { range: suggestion.range } : {}), + ...(suggestion.symbol ? { symbol: suggestion.symbol } : {}), + ...(suggestion.relatedFile !== undefined ? { relatedFile: fileId(suggestion.relatedFile) } : {}), + ...(suggestion.details ? { details: suggestion.details } : {}), + confidence: suggestion.confidence, + })), + }; +} + +function buildCompactExportSummary( + exportSummary: ExportSummaryEntry[], + fileId: (file: FileId) => number, +): Pick { + if (!exportSummary.length) return {}; + return { + exportSummary: exportSummary.map((entry) => ({ + file: fileId(entry.file), + symbols: entry.symbols, + })), + }; +} + +function buildCompactReexportChains( + reexportChains: { chains: ReexportChainEntry[] } | undefined, + fileId: (file: FileId) => number, +): Pick { + if (!reexportChains) return {}; + return { + reexportChains: { + chains: reexportChains.chains.map((entry) => ({ + symbol: entry.symbol, + file: fileId(entry.file), + paths: entry.paths.map((pathChain) => pathChain.map((file) => fileId(file))), + })), + }, + }; +} + +function buildCompactTopImpacts( + topImpacts: ImpactTopItem[], + fileId: (file: FileId) => number, +): Pick { + if (!topImpacts.length) return {}; + return { + topImpacts: topImpacts.map((item) => ({ + file: fileId(item.file), + symbols: item.symbols, + reasons: item.reasons, + severity: item.severity, + ...(item.confidence !== undefined ? { confidence: item.confidence } : {}), + ...(item.depth !== undefined ? { depth: item.depth } : {}), + ...(item.typeOnly !== undefined ? { typeOnly: item.typeOnly } : {}), + ...(item.explain ? { explain: item.explain } : {}), + })), + }; +} + +function buildCompactSurfaceArea( + surfaceArea: ImpactSurfaceArea, + fileId: (file: FileId) => number, +): CompactImpactSurfaceArea { + return { + files: surfaceArea.files.map((item) => ({ + file: fileId(item.file), + fanIn: item.fanIn, + fanOut: item.fanOut, + changed: item.changed, + impacted: item.impacted, + })), + topFanIn: surfaceArea.topFanIn.map((file) => fileId(file)), + topFanOut: surfaceArea.topFanOut.map((file) => fileId(file)), + }; +} + +function buildCompactClusters( + clusters: ImpactCluster[], + fileId: (file: FileId) => number, +): CompactImpactCluster[] { + return clusters.map((cluster) => ({ + id: cluster.id, + files: cluster.files.map((file) => fileId(file)), + changedFiles: cluster.changedFiles.map((file) => fileId(file)), + totalSeverity: cluster.totalSeverity, + })); +} + +function buildCompactCycles( + cycles: ImpactCycle[], + fileId: (file: FileId) => number, +): Pick { + if (!cycles.length) return {}; + return { + cycles: cycles.map((cycle) => ({ + files: cycle.files.map((file) => fileId(file)), + entryEdges: cycle.entryEdges.map((edge) => ({ + from: fileId(edge.from), + to: fileId(edge.to), + raw: edge.raw, + ...(edge.typeOnly !== undefined ? { typeOnly: edge.typeOnly } : {}), + })), + internalEdges: cycle.internalEdges.map((edge) => ({ + from: fileId(edge.from), + to: fileId(edge.to), + raw: edge.raw, + ...(edge.typeOnly !== undefined ? { typeOnly: edge.typeOnly } : {}), + })), + fileCount: cycle.fileCount, + internalEdgeCount: cycle.internalEdgeCount, + fanInFromOutside: cycle.fanInFromOutside, + priorityScore: cycle.priorityScore, + remediationHint: cycle.remediationHint, + touchesChangedFile: cycle.touchesChangedFile, + touchesImpactedFile: cycle.touchesImpactedFile, + severity: cycle.severity, + })), + }; +} diff --git a/src/impact/reportFull.ts b/src/impact/reportFull.ts new file mode 100644 index 00000000..e80320d9 --- /dev/null +++ b/src/impact/reportFull.ts @@ -0,0 +1,172 @@ +import type { FileId } from "../types.js"; +import type { ProjectIndex } from "../indexer.js"; +import { IMPACT_SCHEMA_VERSION } from "./types.js"; +import type { + ChangedSymbol, + ExportSummaryEntry, + ImpactCluster, + ImpactCycle, + ImpactDiagnostics, + ImpactItem, + ImpactReport, + ImpactSuggestion, + ImpactSurfaceArea, + ImpactTopItem, + ReexportChainEntry, +} from "./types.js"; + +export type FullImpactReportParts = { + changedFiles: Array<{ + file: FileId; + hunks: Array<{ start: number; end: number }>; + }>; + changedSymbols: ChangedSymbol[]; + impactedItems: ImpactItem[]; + suggestions: ImpactSuggestion[]; + exportSummary: ExportSummaryEntry[]; + reexportChains: { chains: ReexportChainEntry[] } | undefined; + topImpacts: ImpactTopItem[]; + surfaceArea: ImpactSurfaceArea; + clusters: ImpactCluster[]; + cycles: ImpactCycle[]; + fileEdges: Array<{ + from: FileId; + to: FileId; + typeOnly?: boolean | undefined; + }>; + symbolEdges: Array<{ from: number; to: number; label: string }>; + projectFiles: ProjectIndex["projectFiles"]; + displayFile: (file: FileId) => FileId; + diagnostics?: ImpactDiagnostics | undefined; + warning?: string | undefined; +}; + +export function buildFullImpactReport(parts: FullImpactReportParts): ImpactReport { + const report: ImpactReport = { + schemaVersion: IMPACT_SCHEMA_VERSION, + format: "full", + ...(parts.projectFiles ? { projectFiles: parts.projectFiles } : {}), + changedFiles: parts.changedFiles, + changedSymbols: parts.changedSymbols.map((symbol) => ({ + ...symbol, + file: parts.displayFile(symbol.file), + })), + impacted: parts.impactedItems.map((item) => ({ + ...item, + file: parts.displayFile(item.file), + })), + ...buildFullSuggestions(parts.suggestions, parts.displayFile), + ...buildFullExportSummary(parts.exportSummary, parts.displayFile), + ...buildFullReexportChains(parts.reexportChains, parts.displayFile), + ...buildFullTopImpacts(parts.topImpacts, parts.displayFile), + surfaceArea: { + files: parts.surfaceArea.files.map((item) => ({ + ...item, + file: parts.displayFile(item.file), + })), + topFanIn: parts.surfaceArea.topFanIn.map((file) => parts.displayFile(file)), + topFanOut: parts.surfaceArea.topFanOut.map((file) => parts.displayFile(file)), + }, + clusters: parts.clusters.map((cluster) => ({ + ...cluster, + files: cluster.files.map((file) => parts.displayFile(file)), + changedFiles: cluster.changedFiles.map((file) => parts.displayFile(file)), + })), + ...buildFullCycles(parts.cycles, parts.displayFile), + graph: { + fileEdges: parts.fileEdges.map((edge) => { + const fileEdge: { from: FileId; to: FileId; typeOnly?: boolean } = { + from: parts.displayFile(edge.from), + to: parts.displayFile(edge.to), + }; + if (edge.typeOnly !== undefined) { + fileEdge.typeOnly = edge.typeOnly; + } + return fileEdge; + }), + symbolEdges: parts.symbolEdges, + }, + }; + if (parts.diagnostics) report.diagnostics = parts.diagnostics; + if (parts.warning) report.warning = parts.warning; + return report; +} + +function buildFullSuggestions( + suggestions: ImpactSuggestion[], + displayFile: (file: FileId) => FileId, +): Pick { + if (!suggestions.length) return {}; + return { + suggestions: suggestions.map((suggestion) => ({ + ...suggestion, + file: displayFile(suggestion.file), + ...(suggestion.relatedFile ? { relatedFile: displayFile(suggestion.relatedFile) } : {}), + })), + }; +} + +function buildFullExportSummary( + exportSummary: ExportSummaryEntry[], + displayFile: (file: FileId) => FileId, +): Pick { + if (!exportSummary.length) return {}; + return { + exportSummary: exportSummary.map((entry) => ({ + ...entry, + file: displayFile(entry.file), + })), + }; +} + +function buildFullReexportChains( + reexportChains: { chains: ReexportChainEntry[] } | undefined, + displayFile: (file: FileId) => FileId, +): Pick { + if (!reexportChains) return {}; + return { + reexportChains: { + chains: reexportChains.chains.map((entry) => ({ + ...entry, + file: displayFile(entry.file), + paths: entry.paths.map((pathChain) => pathChain.map((file) => displayFile(file))), + })), + }, + }; +} + +function buildFullTopImpacts( + topImpacts: ImpactTopItem[], + displayFile: (file: FileId) => FileId, +): Pick { + if (!topImpacts.length) return {}; + return { + topImpacts: topImpacts.map((item) => ({ + ...item, + file: displayFile(item.file), + })), + }; +} + +function buildFullCycles( + cycles: ImpactCycle[], + displayFile: (file: FileId) => FileId, +): Pick { + if (!cycles.length) return {}; + return { + cycles: cycles.map((cycle) => ({ + ...cycle, + files: cycle.files.map((file) => displayFile(file)), + entryEdges: cycle.entryEdges.map((edge) => ({ + ...edge, + from: displayFile(edge.from), + to: displayFile(edge.to), + })), + internalEdges: cycle.internalEdges.map((edge) => ({ + ...edge, + from: displayFile(edge.from), + to: displayFile(edge.to), + })), + })), + }; +} diff --git a/src/impact/severity.ts b/src/impact/severity.ts new file mode 100644 index 00000000..ec05aeed --- /dev/null +++ b/src/impact/severity.ts @@ -0,0 +1,202 @@ +import type { FileId, Edge } from "../types.js"; +import type { ProjectIndex, Reference } from "../indexer.js"; +import type { ChangedSymbol, ImpactReason, SeverityWeights } from "./types.js"; +import { DEFAULT_SEVERITY_WEIGHTS } from "./types.js"; + +const REASON_PRIORITY: Readonly> = { + directRef: 4, + namespaceMember: 3, + importAlias: 2, + exportChain: 1, + transitive: 0, + fileLevelChange: 0, +}; + +export type SeverityExplain = { + reason?: ImpactReason; + exported?: boolean; + fanIn?: number; + sameFile?: boolean; + typeOnly?: boolean; + depth?: number; + hints?: string[]; +}; + +export type SeverityResult = { + severity: number; + confidence: number; + explain: SeverityExplain; +}; + +export type DependencyStats = { + fanInByFile: Map; + reverseDeps: Map; +}; + +const cachedFanInByGraph = new WeakMap>(); + +const severityWeightKeys: ReadonlyArray = [ + "directRef", + "namespaceMember", + "importAlias", + "transitive", + "exported", + "sameFile", + "typeOnly", + "depthDecay", +]; + +export function selectStrongerImpactReason( + existingReason: ImpactReason | undefined, + newReason: ImpactReason | undefined, +): ImpactReason | undefined { + if (existingReason === undefined) return newReason; + if (newReason !== undefined && REASON_PRIORITY[newReason] > REASON_PRIORITY[existingReason]) { + return newReason; + } + return existingReason; +} + +function normalizeSeverityWeights(weights: SeverityWeights): SeverityWeights { + const normalized: SeverityWeights = { ...DEFAULT_SEVERITY_WEIGHTS }; + const invalidEntries: string[] = []; + + for (const key of severityWeightKeys) { + const value = weights[key]; + if (!Number.isFinite(value) || value <= 0) { + invalidEntries.push(`${key}=${String(value)}`); + continue; + } + normalized[key] = value; + } + + if (normalized.depthDecay >= 1) { + invalidEntries.push(`depthDecay=${String(weights.depthDecay)}`); + } + + if (invalidEntries.length) { + throw new RangeError(`Invalid severity weights: ${invalidEntries.join(", ")}`); + } + + return normalized; +} + +function getCachedFanInByFile(index: ProjectIndex): Map { + const cached = cachedFanInByGraph.get(index.graph); + if (cached) return cached; + const { fanInByFile } = buildDependencyStats(index.graph.edges); + cachedFanInByGraph.set(index.graph, fanInByFile); + return fanInByFile; +} + +export function buildDependencyStats(edges: Edge[]): DependencyStats { + const fanInByFile = new Map(); + const reverseDeps = new Map(); + + for (const edge of edges) { + if (edge.to.type !== "file") continue; + + const nextCount = (fanInByFile.get(edge.to.path) ?? 0) + 1; + fanInByFile.set(edge.to.path, nextCount); + + const incoming = reverseDeps.get(edge.to.path); + if (incoming) { + incoming.push(edge); + continue; + } + reverseDeps.set(edge.to.path, [edge]); + } + + return { fanInByFile, reverseDeps }; +} + +export function calculateSeverity( + changedSymbol: ChangedSymbol, + ref: Reference, + reasons: ImpactReason[], + depth: number, + index: ProjectIndex, + fanInByFile?: Map, + weights: SeverityWeights = DEFAULT_SEVERITY_WEIGHTS, +): SeverityResult { + const validatedWeights = normalizeSeverityWeights(weights); + + let score = 1.0; + let confidence = 1.0; + const explain: SeverityExplain = {}; + const hints: string[] = []; + + if (reasons.includes("directRef")) { + score *= validatedWeights.directRef; + explain.reason = "directRef"; + confidence = 1.0; + } else if (reasons.includes("namespaceMember")) { + score *= validatedWeights.namespaceMember; + explain.reason = "namespaceMember"; + confidence = 0.9; + } else if (reasons.includes("importAlias")) { + score *= validatedWeights.importAlias; + explain.reason = "importAlias"; + confidence = 0.85; + } else if (reasons.includes("fileLevelChange")) { + score *= validatedWeights.transitive * 0.9; + explain.reason = "fileLevelChange"; + confidence = 0.5; + } else { + score *= validatedWeights.transitive; + explain.reason = "transitive"; + confidence = 0.6; + } + + if (changedSymbol.exported) { + score *= validatedWeights.exported; + explain.exported = true; + } + + const fanInCounts = fanInByFile ?? getCachedFanInByFile(index); + const fanIn = fanInCounts.get(ref.file) ?? 0; + if (fanIn > 0) { + const fanInFactor = 1 + Math.min(Math.log10(fanIn + 1), 1); + score *= fanInFactor; + explain.fanIn = fanIn; + } + + if (ref.file === changedSymbol.file) { + score *= validatedWeights.sameFile; + explain.sameFile = true; + } + + if (changedSymbol.typeOnly) { + score *= validatedWeights.typeOnly; + explain.typeOnly = true; + } + + if (changedSymbol.exported) { + hints.push("exportChanged"); + } + if (changedSymbol.signatureChanged) { + hints.push("signatureChanged"); + } + if (hints.length) { + explain.hints = hints; + } + + score *= Math.pow(validatedWeights.depthDecay, depth); + explain.depth = depth; + confidence *= Math.pow(0.9, depth); + + return { + severity: Math.min(1.0, Math.max(0.0, score)), + confidence: Math.min(1.0, Math.max(0.0, confidence)), + explain, + }; +} + +export function calculateTransitiveSeverity(edge: Edge, depth: number): number { + let score = 0.3; + if (edge.typeOnly) { + score *= 0.6; + } + score *= Math.pow(0.7, depth); + return score; +} diff --git a/src/impact/transitive.ts b/src/impact/transitive.ts new file mode 100644 index 00000000..6ff41d70 --- /dev/null +++ b/src/impact/transitive.ts @@ -0,0 +1,206 @@ +import type { FileId, Edge } from "../types.js"; +import type { ProjectIndex } from "../indexer.js"; +import { compileTestPatterns, createIndexTestFileMatcher } from "./testPatterns.js"; +import type { FileChange, ImpactItem, ImpactOptions, ImpactReason } from "./types.js"; +import { createImpactIgnoreMatcher } from "./path.js"; +import { calculateTransitiveSeverity } from "./severity.js"; + +type ImpactEmitter = (item: ImpactItem, phase: "partial" | "final") => void; + +function getDependentFiles(index: ProjectIndex, filePath: FileId, reverseDeps?: Map): FileId[] { + if (reverseDeps) { + return reverseDeps.get(filePath)?.map((edge) => edge.from) ?? []; + } + return index.graph.edges + .filter((edge) => edge.to.type === "file" && edge.to.path === filePath) + .map((edge) => edge.from); +} + +export function seedTransitiveFromFiles( + index: ProjectIndex, + impacted: Map, + changedFiles: FileChange[], + options: Partial & { projectRoot?: string }, + reverseDeps?: Map, + emitImpactItem?: ImpactEmitter, +): void { + const { includeTests = false, testPatterns, ignoreGlobs = [] } = options; + const projectRoot = + options.projectRoot ?? index.projectRoot ?? index.projectFiles?.find((entry) => entry.projectRoot)?.projectRoot; + const patternMatchers = compileTestPatterns(testPatterns); + const isIndexTestFile = createIndexTestFileMatcher(index, patternMatchers, projectRoot); + const fallbackPathSet = new Set(options.fileLevelFallbackPaths ?? []); + const diagnostics = options.diagnostics; + const isIgnored = projectRoot ? createImpactIgnoreMatcher(projectRoot, ignoreGlobs) : () => false; + + for (const fileChange of changedFiles) { + if (isIgnored(fileChange.path)) continue; + + const shouldSeedModifiedFallback = + fileChange.kind === "modified" && + options.fileLevelFallback && + (fallbackPathSet.has(fileChange.path) || + fileChange.isBinary || + fileChange.modeChanged || + !fileChange.hunks.length); + + if (shouldSeedModifiedFallback) { + if (impacted.has(fileChange.path)) continue; + const dependents = getDependentFiles(index, fileChange.path, reverseDeps); + if (dependents.length) { + if (diagnostics) diagnostics.fallbackSeededFiles += 1; + } + + for (const dependent of dependents) { + if (!includeTests && isIndexTestFile(dependent)) continue; + if (impacted.has(dependent) || isIgnored(dependent)) continue; + + impacted.set(dependent, { + file: dependent, + symbols: [], + reasons: ["fileLevelChange"], + severity: 0.45, + depth: 1, + explain: { + reason: "fileLevelChange", + depth: 1, + hints: ["changedFileNoSymbols"], + }, + confidence: 0.5, + }); + emitImpactItem?.(impacted.get(dependent)!, "partial"); + if (diagnostics) diagnostics.fallbackSeededDependents += 1; + } + continue; + } + + if (fileChange.kind !== "deleted" && fileChange.kind !== "renamed") continue; + + const lookupPaths = + fileChange.kind === "renamed" && fileChange.oldPath ? [fileChange.oldPath, fileChange.path] : [fileChange.path]; + const dependentSet = new Set(); + for (const lookupPath of lookupPaths) { + for (const dependent of getDependentFiles(index, lookupPath, reverseDeps)) { + dependentSet.add(dependent); + } + } + const dependents = [...dependentSet]; + if (dependents.length) { + if (diagnostics) diagnostics.fallbackSeededFiles += 1; + } + + for (const dependent of dependents) { + if (!includeTests && isIndexTestFile(dependent)) continue; + if (impacted.has(dependent) || isIgnored(dependent)) continue; + + const hints = ["fileDeleted"]; + if (fileChange.kind === "renamed") { + hints.push("fileRenamed"); + } + + const impactItem: ImpactItem = { + file: dependent, + symbols: [], + reasons: ["transitive"], + severity: 0.6, + depth: 1, + explain: { + reason: "transitive", + depth: 1, + hints, + }, + confidence: 0.5, + }; + + impacted.set(dependent, impactItem); + emitImpactItem?.(impactItem, "partial"); + if (diagnostics) diagnostics.fallbackSeededDependents += 1; + } + } +} + +export function analyzeTransitiveImpact( + impacted: Map, + maxDepth: number, + options: Partial & { projectRoot?: string }, + isIndexTestFile: (file: FileId) => boolean, + reverseDeps: Map, + emitImpactItem?: ImpactEmitter, +): void { + const { ignoreGlobs = [] } = options; + const isIgnored = options.projectRoot ? createImpactIgnoreMatcher(options.projectRoot, ignoreGlobs) : () => false; + + const visited = new Set(); + const queue: Array<{ file: FileId; depth: number; reason: ImpactReason }> = []; + + for (const [file] of impacted) { + if (isIgnored(file)) continue; + visited.add(file); + queue.push({ file, depth: 0, reason: "transitive" }); + } + + let qi = 0; + while (qi < queue.length) { + const { file, depth, reason } = queue[qi++]!; + if (depth >= maxDepth) continue; + + const edgesIn = reverseDeps.get(file) || []; + for (const edge of edgesIn) { + const dependentFile = edge.from; + if ( + visited.has(dependentFile) || + (!options.includeTests && isIndexTestFile(dependentFile)) || + isIgnored(dependentFile) + ) + continue; + + visited.add(dependentFile); + + const existing = impacted.get(dependentFile); + const reasons = existing?.reasons || []; + if (!reasons.includes(reason)) { + reasons.push(reason); + } + + const severity = calculateTransitiveSeverity(edge, depth + 1); + const upstreamConfidence = impacted.get(file)?.confidence ?? 0.6; + const nextConfidence = Math.max( + 0.2, + Math.min(1, upstreamConfidence * (edge.typeOnly ? 0.75 : 0.85) * Math.pow(0.95, depth)), + ); + + const fanIn = reverseDeps.get(dependentFile)?.length || 0; + + const transitiveItem: ImpactItem = { + file: dependentFile, + symbols: existing?.symbols || [], + reasons, + severity: Math.max(existing?.severity || 0, severity), + depth: depth + 1, + explain: { + ...existing?.explain, + reason, + depth: depth + 1, + ...(fanIn > 0 && { fanIn }), + }, + confidence: Math.max(existing?.confidence ?? 0, nextConfidence), + }; + + if (edge.typeOnly !== undefined) { + transitiveItem.typeOnly = edge.typeOnly; + if (transitiveItem.explain) { + transitiveItem.explain.typeOnly = edge.typeOnly; + } + } + + impacted.set(dependentFile, transitiveItem); + emitImpactItem?.(transitiveItem, "partial"); + + queue.push({ + file: dependentFile, + depth: depth + 1, + reason: "exportChain", + }); + } + } +} diff --git a/src/indexer/build-cache.ts b/src/indexer/build-cache.ts index e70ce323..068ac50b 100644 --- a/src/indexer/build-cache.ts +++ b/src/indexer/build-cache.ts @@ -1,807 +1,37 @@ -import fs from "node:fs"; -import fsp from "node:fs/promises"; -import path from "node:path"; -import fg from "fast-glob"; -import crypto from "node:crypto"; -import { supportForFile } from "../languages.js"; -import { logWithLevel, type LogLevel } from "../logging.js"; -import { shouldAvoidJsFallbackForLanguage } from "../native/treeSitterNative.js"; -import { buildBloomFilterFromSource } from "../util/bloomFilter.js"; -import { SqliteDatabase } from "../sqlite-driver.js"; -import type { FallbackImportExtractionEvent } from "../graphs/specifiers.js"; -import type { GraphCacheEntry, GraphBuildOptions } from "../graphs/types.js"; -import type { Edge } from "../types.js"; -import { - DEFAULT_PROJECT_MANIFESTS, - assertFilePathWithinRoot, - getGitBlobHashes, - isFilePathWithinRoot, - listProjectFiles, - normalizePath, - normalizeResolutionHints, - stringifyUnknown, - type ProjectFileDiscoveryOptions, -} from "../util.js"; -import type { - BuildFileReport, - BuildOptions, - BuildReport, - CacheReport, - FallbackImportExtractionReport, - ManifestReport, - ModuleIndex, -} from "./types.js"; - -const PARSED_CACHE_VERSION = 1; -type ModuleCacheEntry = { - version: number; - sig: string; - mod: ModuleIndex; -}; -const memoryCache = new Map(); - -type PackageJsonDependencyInfo = { - name?: string; - dependencies?: Record; - devDependencies?: Record; - peerDependencies?: Record; - optionalDependencies?: Record; -}; - -export async function collectWorkspaceManifestDependencyEdges( - projectRoot: string, - discovery?: ProjectFileDiscoveryOptions, - allowedManifestFiles?: ReadonlySet, - logLevel?: LogLevel, -): Promise { - const manifestPaths = await listProjectFiles(projectRoot, ["**/package.json"], { - ...discovery, - ...(logLevel ? { logLevel } : {}), - }); - const scopedManifestPaths = allowedManifestFiles - ? manifestPaths.filter((manifestPath) => allowedManifestFiles.has(manifestPath)) - : manifestPaths; - if (!scopedManifestPaths.length) return []; - - const manifestByPackageName = new Map(); - const parsedByPath = new Map(); - - for (const manifestPath of scopedManifestPaths) { - try { - const raw = await fsp.readFile(manifestPath, "utf8"); - const parsed = JSON.parse(raw) as PackageJsonDependencyInfo; - parsedByPath.set(manifestPath, parsed); - if (typeof parsed.name === "string" && parsed.name.trim()) { - manifestByPackageName.set(parsed.name, manifestPath); - } - } catch { - continue; - } - } - - const edges: Edge[] = []; - for (const [fromManifest, parsed] of parsedByPath.entries()) { - const dependencySets = [ - parsed.dependencies, - parsed.devDependencies, - parsed.peerDependencies, - parsed.optionalDependencies, - ]; - for (const dependencySet of dependencySets) { - if (!dependencySet) continue; - for (const dependencyName of Object.keys(dependencySet)) { - const toManifest = manifestByPackageName.get(dependencyName); - if (!toManifest) continue; - edges.push({ - from: fromManifest, - to: { type: "file", path: toManifest }, - raw: dependencyName, - }); - } - } - } - - return edges; -} - -const diskCacheDatabases = new Map(); - -function cacheRoot(projectRoot: string, opts?: BuildOptions): string { - return opts?.cacheDir || path.join(projectRoot, ".codegraph-cache", "index-v1"); -} - -function diskCacheDatabasePath(projectRoot: string, opts?: BuildOptions): string { - return path.join(cacheRoot(projectRoot, opts), "index-cache.sqlite").replace(/\\/g, "/"); -} - -function getDiskCacheDatabase(projectRoot: string, opts?: BuildOptions): SqliteDatabase { - const dbPath = diskCacheDatabasePath(projectRoot, opts); - const existing = diskCacheDatabases.get(dbPath); - if (existing) return existing; - fs.mkdirSync(path.dirname(dbPath), { recursive: true }); - const db = new SqliteDatabase(dbPath); - db.pragma("journal_mode = WAL"); - db.pragma("synchronous = NORMAL"); - db.exec(` - CREATE TABLE IF NOT EXISTS module_cache ( - file TEXT PRIMARY KEY, - sig TEXT NOT NULL, - version INTEGER NOT NULL, - payload TEXT NOT NULL, - updated_at INTEGER NOT NULL - ); - CREATE INDEX IF NOT EXISTS idx_module_cache_sig ON module_cache(sig); - `); - diskCacheDatabases.set(dbPath, db); - return db; -} - -export function closeDiskCacheDatabase(projectRoot: string, opts?: BuildOptions): void { - const dbPath = diskCacheDatabasePath(projectRoot, opts); - const db = diskCacheDatabases.get(dbPath); - if (!db) return; - try { - db.pragma("wal_checkpoint(TRUNCATE)"); - } catch { - // checkpoint best-effort - } - try { - db.close(); - diskCacheDatabases.delete(dbPath); - } catch { - // Keep handle for later retry if close fails. - } -} - -export const MANIFEST_VERSION = 2; - -export type ManifestFileEntry = GraphCacheEntry; - -type ManifestBuildOptions = { - cache?: BuildOptions["cache"]; - cacheStrict?: boolean; - useBloomFilters?: boolean; - preset?: BuildOptions["preset"]; - incrementalStrict?: boolean; - discovery?: { - includeGlobs?: string[]; - ignoreGlobs?: string[]; - globRoot?: string; - gitignoreRoot?: string; - useGitignore: boolean; - }; -}; - -export type IndexManifest = { - version: number; - projectRoot: string; - updatedAt: number; - lastCommit?: string; - configHash?: string; - graphOptions?: GraphBuildOptions; - buildOptions?: ManifestBuildOptions; - files: Record; -}; - -type ConfigHashResult = { - hash: string; - error?: string; -}; - -export function normalizeIndexedFileInputs(projectRoot: string, files: readonly string[], label: string): string[] { - return Array.from(new Set(files.filter(Boolean).map((file) => assertFilePathWithinRoot(projectRoot, file, label)))); -} - -export function sanitizeManifestEntriesForRoot( - projectRoot: string, - files: Record | undefined, -): Record { - const sanitizedEntries: Record = {}; - for (const [file, entry] of Object.entries(files ?? {})) { - if (!isFilePathWithinRoot(projectRoot, file)) continue; - sanitizedEntries[file] = entry; - } - return sanitizedEntries; -} - -export async function computeConfigHash(projectRoot: string, logLevel?: LogLevel): Promise { - try { - const configFiles = await fg([...DEFAULT_PROJECT_MANIFESTS, "**/.gitignore"], { - cwd: projectRoot, - absolute: true, - dot: true, - ignore: [ - "**/node_modules/**", - "**/.git/**", - "**/dist/**", - "**/build/**", - "**/target/**", - "**/.venv/**", - "**/__pycache__/**", - ], - }); - configFiles.sort(); - const hash = crypto.createHash("sha1"); - let firstError: string | undefined; - for (const file of configFiles) { - try { - const content = await fsp.readFile(file, "utf8"); - const relative = path.relative(projectRoot, file).replace(/\\/g, "/"); - hash.update(relative); - hash.update(content); - } catch (error) { - const message = `Failed to read config file "${file}": ${stringifyUnknown(error)}`; - if (!firstError) firstError = message; - logWithLevel(logLevel, "debug", "computeConfigHash:", message); - } - } - return { - hash: hash.digest("hex"), - ...(firstError ? { error: firstError } : {}), - }; - } catch (error) { - return { - hash: "", - error: `Failed to enumerate config files: ${stringifyUnknown(error)}`, - }; - } -} - -export function recordConfigHashResult( - manifestReport: ManifestReport | undefined, - configHashResult: { hash: string; error?: string }, - logLevel: LogLevel | undefined, -): string { - if (!configHashResult.error) return configHashResult.hash; - if (manifestReport) { - manifestReport.configHashError = configHashResult.error; - } - logWithLevel(logLevel, "warn", `Warning: ${configHashResult.error}`); - return configHashResult.hash; -} - -export type FileSignature = { - sig: string; - gitSig?: string; - cacheSig: string; - contentHash?: string; -}; - -export function initCacheReport( - report: BuildReport | undefined, - mode: BuildOptions["cache"] | undefined, -): CacheReport | undefined { - if (!report) return undefined; - if (!report.cache) { - report.cache = { mode: mode ?? "off", hits: 0, misses: 0 }; - } - return report.cache; -} - -export function initFileReport(report: BuildReport | undefined): BuildFileReport | undefined { - if (!report) return undefined; - if (!report.files) { - report.files = { total: 0, cached: 0, parsed: 0 }; - } - return report.files; -} - -export function recordFileFailure(report: BuildReport | undefined, file: string, error: unknown): void { - const fileReport = initFileReport(report); - if (!fileReport) return; - fileReport.failed = (fileReport.failed ?? 0) + 1; - const errors = fileReport.errors ?? []; - if (errors.length < 20) { - errors.push({ - file: file.replace(/\\/g, "/"), - message: stringifyUnknown(error), - }); - } - fileReport.errors = errors; -} - -function initFallbackImportExtractionReport( - report: BuildReport | undefined, -): FallbackImportExtractionReport | undefined { - if (!report) return undefined; - if (!report.graph) { - report.graph = { - fallbackImportExtraction: { - total: 0, - byLanguage: {}, - byReason: { - fast: 0, - "js-fallback-unavailable": 0, - "query-error": 0, - "query-empty": 0, - }, - files: {}, - }, - }; - } else if (!report.graph.fallbackImportExtraction) { - report.graph.fallbackImportExtraction = { - total: 0, - byLanguage: {}, - byReason: { - fast: 0, - "js-fallback-unavailable": 0, - "query-error": 0, - "query-empty": 0, - }, - files: {}, - }; - } - return report.graph.fallbackImportExtraction; -} - -export function createFallbackImportExtractionHandler( - report: BuildReport | undefined, - opts?: BuildOptions, -): ((event: FallbackImportExtractionEvent) => void) | undefined { - const fallbackReport = initFallbackImportExtractionReport(report); - const warned = new Set(); - const logLevel = opts?.logLevel ?? "warn"; - const shouldLog = logLevel !== "silent" && logLevel !== "error"; - - return (event: FallbackImportExtractionEvent) => { - const filePath = event.file ? event.file.replace(/\\/g, "/") : "unknown"; - if (fallbackReport) { - if (!fallbackReport.files[filePath]) { - fallbackReport.total += 1; - fallbackReport.byLanguage[event.language] = (fallbackReport.byLanguage[event.language] ?? 0) + 1; - fallbackReport.byReason ??= { - fast: 0, - "js-fallback-unavailable": 0, - "query-error": 0, - "query-empty": 0, - }; - fallbackReport.byReason[event.reason] += 1; - } - fallbackReport.files[filePath] = { - language: event.language, - reason: event.reason, - }; - } - if (!shouldLog) return; - const warningKey = `${event.language}:${event.reason}`; - if (warned.has(warningKey)) return; - warned.add(warningKey); - const severity = - event.reason === "fast" || - event.reason === "js-fallback-unavailable" || - shouldAvoidJsFallbackForLanguage(event.language) - ? "debug" - : "warn"; - let message = "Regex fallback import extraction"; - if (event.reason === "js-fallback-unavailable") { - message = `JS fallback unavailable for ${event.language} query recovery; using regex import extraction.`; - } else if (shouldAvoidJsFallbackForLanguage(event.language)) { - message = `Native import recovery degraded for ${event.language}; using native-owned fallback extraction.`; - } - logWithLevel(opts?.logLevel, severity, message, { - language: event.language, - reason: event.reason, - }); - }; -} - -export function initManifestReport( - report: BuildReport | undefined, - used: boolean, - reused: boolean, -): ManifestReport | undefined { - if (!report) return undefined; - if (!report.manifest) { - report.manifest = { used, reused }; - } else { - report.manifest.used = used; - report.manifest.reused = reused; - } - return report.manifest; -} - -async function fileContentHash(file: string): Promise { - const buffer = await fsp.readFile(file); - const hash = crypto.createHash("sha1"); - hash.update(buffer); - return hash.digest("hex"); -} - -async function fileStatSignature( - file: string, - strict?: boolean, - opts?: { includeContentHash?: boolean }, -): Promise<{ sig: string; contentHash?: string }> { - try { - const stat = await fsp.stat(file); - const useStrict = strict ?? true; - const shouldHash = useStrict || !!opts?.includeContentHash; - const contentHash = shouldHash ? await fileContentHash(file) : undefined; - if (!useStrict) { - return contentHash - ? { sig: `${stat.mtimeMs}:${stat.size}`, contentHash } - : { sig: `${stat.mtimeMs}:${stat.size}` }; - } - if (contentHash) { - return { - sig: `${stat.mtimeMs}:${stat.size}:${contentHash}`, - contentHash, - }; - } - return { sig: `${stat.mtimeMs}:${stat.size}` }; - } catch { - return { sig: "0:0" }; - } -} - -export async function fileSignature( - file: string, - strict?: boolean, - gitSig?: string, - opts?: { forceContentHash?: boolean }, -): Promise { - const includeContentHash = !!opts?.forceContentHash; - const statOpts = includeContentHash ? { includeContentHash: true } : undefined; - const { sig, contentHash } = await fileStatSignature(file, strict, statOpts); - const cacheSig = gitSig ?? contentHash ?? sig; - if (gitSig) { - return { - sig, - gitSig, - cacheSig, - ...(contentHash ? { contentHash } : {}), - }; - } - return { sig, cacheSig, ...(contentHash ? { contentHash } : {}) }; -} - -export async function cacheSignatureForFile(file: string, sigInfo: FileSignature): Promise { - if (sigInfo.gitSig) return sigInfo.gitSig; - if (sigInfo.contentHash) return sigInfo.contentHash; - const contentHash = await fileContentHash(file); - sigInfo.contentHash = contentHash; - return contentHash; -} - -export async function buildBloomFilterForFile( - file: string, -): Promise { - try { - const source = await fsp.readFile(file, "utf8"); - const support = supportForFile(file); - if (!support) return null; - return buildBloomFilterFromSource(source, support.id); - } catch { - return null; - } -} - -function isModuleIndex(value: unknown): value is ModuleIndex { - if (!value || typeof value !== "object") return false; - const mod = value as { - file?: unknown; - exports?: unknown; - imports?: unknown; - locals?: unknown; - }; - return ( - typeof mod.file === "string" && - Array.isArray(mod.exports) && - Array.isArray(mod.imports) && - Array.isArray(mod.locals) - ); -} - -export function tryLoadFromCache( - projectRoot: string, - file: string, - sig: string, - opts?: BuildOptions, - report?: BuildReport, -): ModuleIndex | null { - const mode = opts?.cache ?? "off"; - const cacheReport = initCacheReport(report, mode); - const cacheEnabled = mode !== "off"; - if (mode === "memory") { - const entry = memoryCache.get(file); - if (entry && entry.sig === sig) { - if (cacheEnabled && cacheReport) cacheReport.hits += 1; - return entry.mod; - } - if (cacheEnabled && cacheReport) cacheReport.misses += 1; - return null; - } - if (mode === "disk") { - try { - const db = getDiskCacheDatabase(projectRoot, opts); - const row = db.prepare("SELECT sig, version, payload FROM module_cache WHERE file = ?").get(file) as - | { sig: string; version: number; payload: string } - | undefined; - if (row && row.sig === sig && row.version === PARSED_CACHE_VERSION) { - const parsed = JSON.parse(row.payload) as unknown; - if (isModuleIndex(parsed)) { - if (cacheEnabled && cacheReport) cacheReport.hits += 1; - return parsed; - } - } - } catch { - // cache read failed - } - if (cacheEnabled && cacheReport) cacheReport.misses += 1; - } - return null; -} - -export function writeToCache( - projectRoot: string, - file: string, - sig: string, - mod: ModuleIndex, - opts?: BuildOptions, -): void { - const mode = opts?.cache ?? "off"; - if (mode === "memory") { - memoryCache.set(file, { version: PARSED_CACHE_VERSION, sig, mod }); - } else if (mode === "disk") { - try { - const db = getDiskCacheDatabase(projectRoot, opts); - db.prepare( - `INSERT INTO module_cache (file, sig, version, payload, updated_at) - VALUES (?, ?, ?, ?, ?) - ON CONFLICT(file) DO UPDATE SET - sig = excluded.sig, - version = excluded.version, - payload = excluded.payload, - updated_at = excluded.updated_at`, - ).run(file, sig, PARSED_CACHE_VERSION, JSON.stringify(mod), Date.now()); - } catch (error) { - logWithLevel(opts?.logLevel, "warn", "Warning: Failed to write to cache:", error); - } - } -} - -function manifestFilePath(projectRoot: string, opts?: BuildOptions): string { - return path.join(cacheRoot(projectRoot, opts), "manifest.json"); -} - -function isTransientFileContentionError(error: unknown): boolean { - if (!error || typeof error !== "object") return false; - const code = (error as NodeJS.ErrnoException).code; - return code === "EBUSY" || code === "EPERM" || code === "ENOTEMPTY"; -} - -function manifestTempFilePath(manifestPath: string): string { - const dir = path.dirname(manifestPath); - const base = path.basename(manifestPath); - return path.join(dir, `.${base}.${process.pid}.${crypto.randomUUID()}.tmp`); -} - -async function wait(ms: number): Promise { - await new Promise((resolve) => { - setTimeout(resolve, ms); - }); -} - -async function writeManifestAtomically(manifestPath: string, payload: string): Promise { - const retryDelays = [10, 25, 50, 100]; - for (let attempt = 0; attempt <= retryDelays.length; attempt += 1) { - const tempPath = manifestTempFilePath(manifestPath); - try { - await fsp.writeFile(tempPath, payload, "utf8"); - await fsp.rename(tempPath, manifestPath); - return; - } catch (error) { - try { - await fsp.rm(tempPath, { force: true }); - } catch { - // Cleanup is best-effort; the next attempt uses a fresh temp path. - } - const canRetry = attempt < retryDelays.length && isTransientFileContentionError(error); - if (!canRetry) throw error; - await wait(retryDelays[attempt]!); - } - } -} - -export async function loadManifest(projectRoot: string, opts?: BuildOptions): Promise { - try { - const manifestPath = manifestFilePath(projectRoot, opts); - const raw = await fsp.readFile(manifestPath, "utf8"); - const parsed = JSON.parse(raw) as IndexManifest; - if (parsed.version !== MANIFEST_VERSION) return null; - return parsed; - } catch { - return null; - } -} - -export async function writeManifest( - projectRoot: string, - opts: BuildOptions | undefined, - manifest: IndexManifest, -): Promise { - try { - const manifestPath = manifestFilePath(projectRoot, opts); - await fsp.mkdir(path.dirname(manifestPath), { recursive: true }); - await writeManifestAtomically(manifestPath, JSON.stringify(manifest, null, 2)); - } catch (error) { - logWithLevel(opts?.logLevel, "warn", "Warning: Failed to write manifest:", error); - } -} - -export async function verifyManifestEntries( - projectRoot: string, - manifest: IndexManifest, - opts: BuildOptions | undefined, - gitAvailable: boolean, -): Promise<{ mismatches: number; missing: number }> { - const entries = manifest.files ?? {}; - const files = Object.keys(entries); - const existingFiles = files.filter((file) => fs.existsSync(file)); - const missing = files.length - existingFiles.length; - const gitSigMap = gitAvailable - ? await getGitBlobHashes(projectRoot, existingFiles, { gitAvailable }) - : new Map(); - let mismatches = 0; - for (const file of existingFiles) { - const entry = entries[file]; - if (!entry) continue; - const sigInfo = await fileSignature(file, opts?.cacheStrict, gitSigMap.get(file)); - const matchesGitSig = !!entry.gitSig && !!sigInfo.gitSig && entry.gitSig === sigInfo.gitSig; - const matchesSig = entry.sig === sigInfo.sig; - if (!matchesGitSig && !matchesSig) mismatches += 1; - } - return { mismatches, missing }; -} - -function normalizeManifestBuildOptions(opts?: ManifestBuildOptions): ManifestBuildOptions { - return { - cache: opts?.cache ?? "off", - cacheStrict: opts?.cacheStrict ?? true, - useBloomFilters: opts?.useBloomFilters ?? true, - preset: opts?.preset, - incrementalStrict: opts?.incrementalStrict ?? false, - ...(opts?.discovery ? { discovery: opts.discovery } : {}), - }; -} - -function normalizeDiscoveryOptions(discovery?: ProjectFileDiscoveryOptions): ManifestBuildOptions["discovery"] { - if (!discovery) return undefined; - const normalizeGlob = (glob: string) => glob.trim().replace(/\\/g, "/"); - const includeGlobs = Array.from( - new Set((discovery.includeGlobs ?? []).map(normalizeGlob).filter(Boolean)), - ).sort(); - const ignoreGlobs = Array.from( - new Set((discovery.ignoreGlobs ?? []).map(normalizeGlob).filter(Boolean)), - ).sort(); - const globRoot = discovery.globRoot ? normalizePath(path.resolve(discovery.globRoot)) : undefined; - const gitignoreRoot = discovery.gitignoreRoot ? normalizePath(path.resolve(discovery.gitignoreRoot)) : undefined; - const useGitignore = discovery.useGitignore ?? true; - if (!includeGlobs.length && !ignoreGlobs.length && !globRoot && !gitignoreRoot && useGitignore) { - return undefined; - } - return { - ...(includeGlobs.length ? { includeGlobs } : {}), - ...(ignoreGlobs.length ? { ignoreGlobs } : {}), - ...(globRoot ? { globRoot } : {}), - ...(gitignoreRoot ? { gitignoreRoot } : {}), - useGitignore, - }; -} - -function normalizeBuildOptions(opts?: BuildOptions): ManifestBuildOptions { - const discovery = normalizeDiscoveryOptions(opts?.discovery); - return { - cache: opts?.cache ?? "off", - cacheStrict: opts?.cacheStrict ?? true, - useBloomFilters: opts?.useBloomFilters ?? true, - preset: opts?.preset, - incrementalStrict: opts?.incrementalStrict ?? false, - ...(discovery ? { discovery } : {}), - }; -} - -export function summarizeBuildOptions(opts?: BuildOptions): ManifestBuildOptions { - return normalizeBuildOptions(opts); -} - -function normalizeLanguageList(list?: string[]): string[] { - const out: string[] = []; - const seen = new Set(); - for (const entry of list ?? []) { - const normalized = entry.trim().toLowerCase(); - if (!normalized || seen.has(normalized)) continue; - seen.add(normalized); - out.push(normalized); - } - out.sort(); - return out; -} - -function normalizedDiscoveryOptionsEqual( - a: ManifestBuildOptions["discovery"], - b: ManifestBuildOptions["discovery"], -): boolean { - const normalizedA = a ?? { useGitignore: true }; - const normalizedB = b ?? { useGitignore: true }; - if (normalizedA.useGitignore !== normalizedB.useGitignore) return false; - if (normalizedA.globRoot !== normalizedB.globRoot) return false; - if (normalizedA.gitignoreRoot !== normalizedB.gitignoreRoot) return false; - const includeA = normalizedA.includeGlobs ?? []; - const includeB = normalizedB.includeGlobs ?? []; - if (includeA.length !== includeB.length) return false; - for (let i = 0; i < includeA.length; i++) { - if (includeA[i] !== includeB[i]) return false; - } - const ignoreA = normalizedA.ignoreGlobs ?? []; - const ignoreB = normalizedB.ignoreGlobs ?? []; - if (ignoreA.length !== ignoreB.length) return false; - for (let i = 0; i < ignoreA.length; i++) { - if (ignoreA[i] !== ignoreB[i]) return false; - } - return true; -} - -export function diffBuildOptions( - manifestOpts: ManifestBuildOptions | undefined, - currentOpts: BuildOptions | undefined, -): string[] { - if (!manifestOpts) return []; - const normalizedManifest = normalizeManifestBuildOptions(manifestOpts); - const normalizedCurrent = normalizeBuildOptions(currentOpts); - const diffs: string[] = []; - if (normalizedManifest.cache !== normalizedCurrent.cache) diffs.push("cache"); - if (normalizedManifest.cacheStrict !== normalizedCurrent.cacheStrict) { - diffs.push("cacheStrict"); - } - if (normalizedManifest.useBloomFilters !== normalizedCurrent.useBloomFilters) { - diffs.push("useBloomFilters"); - } - if (normalizedManifest.preset !== normalizedCurrent.preset) diffs.push("preset"); - if (normalizedManifest.incrementalStrict !== normalizedCurrent.incrementalStrict) { - diffs.push("incrementalStrict"); - } - if (!normalizedDiscoveryOptionsEqual(normalizedManifest.discovery, normalizedCurrent.discovery)) { - diffs.push("discovery"); - } - return diffs; -} - -export function normalizeGraphOptions(opts?: GraphBuildOptions): GraphBuildOptions { - const resolutionHints = normalizeResolutionHints(opts?.resolutionHints); - const fastRegexDisabledLanguages = normalizeLanguageList(opts?.fastRegexDisabledLanguages); - return { - fast: !!opts?.fast, - ...(fastRegexDisabledLanguages.length ? { fastRegexDisabledLanguages } : {}), - resolveNodeModules: !!opts?.resolveNodeModules, - dynamicImportHeuristics: !!opts?.dynamicImportHeuristics, - ...(resolutionHints.length ? { resolutionHints } : {}), - }; -} - -export function graphOptionsEqual(a?: GraphBuildOptions, b?: GraphBuildOptions): boolean { - if (!a && !b) return true; - if (!a || !b) return false; - const normalizedA = normalizeGraphOptions(a); - const normalizedB = normalizeGraphOptions(b); - if (!!normalizedA.fast !== !!normalizedB.fast) return false; - if (!!normalizedA.resolveNodeModules !== !!normalizedB.resolveNodeModules) { - return false; - } - if (!!normalizedA.dynamicImportHeuristics !== !!normalizedB.dynamicImportHeuristics) { - return false; - } - const disabledA = normalizedA.fastRegexDisabledLanguages ?? []; - const disabledB = normalizedB.fastRegexDisabledLanguages ?? []; - if (disabledA.length !== disabledB.length) return false; - for (let i = 0; i < disabledA.length; i++) { - if (disabledA[i] !== disabledB[i]) return false; - } - const hintsA = normalizedA.resolutionHints ?? []; - const hintsB = normalizedB.resolutionHints ?? []; - if (hintsA.length !== hintsB.length) return false; - for (let i = 0; i < hintsA.length; i++) { - if (hintsA[i] !== hintsB[i]) return false; - } - return true; -} +export { + MANIFEST_VERSION, + collectWorkspaceManifestDependencyEdges, + computeConfigHash, + loadManifest, + normalizeIndexedFileInputs, + sanitizeManifestEntriesForRoot, + verifyManifestEntries, + writeManifest, + type IndexManifest, + type ManifestFileEntry, +} from "./build-cache/manifest.js"; +export { + buildBloomFilterForFile, + cacheRoot, + cacheSignatureForFile, + closeDiskCacheDatabase, + fileSignature, + tryLoadFromCache, + writeToCache, + type FileSignature, +} from "./build-cache/module-cache.js"; +export { + diffBuildOptions, + graphOptionsEqual, + normalizeGraphOptions, + summarizeBuildOptions, + type ManifestBuildOptions, +} from "./build-cache/options.js"; +export { + createFallbackImportExtractionHandler, + initCacheReport, + initFileReport, + initManifestReport, + recordConfigHashResult, + recordFileFailure, +} from "./build-cache/reports.js"; diff --git a/src/indexer/build-cache/manifest.ts b/src/indexer/build-cache/manifest.ts new file mode 100644 index 00000000..81299aa8 --- /dev/null +++ b/src/indexer/build-cache/manifest.ts @@ -0,0 +1,257 @@ +import crypto from "node:crypto"; +import fs from "node:fs"; +import fsp from "node:fs/promises"; +import path from "node:path"; +import fg from "fast-glob"; +import type { GraphCacheEntry, GraphBuildOptions } from "../../graphs/types.js"; +import { logWithLevel, type LogLevel } from "../../logging.js"; +import type { Edge } from "../../types.js"; +import { + DEFAULT_PROJECT_MANIFESTS, + assertFilePathWithinRoot, + getGitBlobHashes, + isFilePathWithinRoot, + listProjectFiles, + stringifyUnknown, + type ProjectFileDiscoveryOptions, +} from "../../util.js"; +import type { BuildOptions } from "../types.js"; +import { cacheRoot, fileSignature } from "./module-cache.js"; +import type { ManifestBuildOptions } from "./options.js"; + +type PackageJsonDependencyInfo = { + name?: string; + dependencies?: Record; + devDependencies?: Record; + peerDependencies?: Record; + optionalDependencies?: Record; +}; + +export async function collectWorkspaceManifestDependencyEdges( + projectRoot: string, + discovery?: ProjectFileDiscoveryOptions, + allowedManifestFiles?: ReadonlySet, + logLevel?: LogLevel, +): Promise { + const manifestPaths = await listProjectFiles(projectRoot, ["**/package.json"], { + ...discovery, + ...(logLevel ? { logLevel } : {}), + }); + const scopedManifestPaths = allowedManifestFiles + ? manifestPaths.filter((manifestPath) => allowedManifestFiles.has(manifestPath)) + : manifestPaths; + if (!scopedManifestPaths.length) return []; + + const manifestByPackageName = new Map(); + const parsedByPath = new Map(); + + for (const manifestPath of scopedManifestPaths) { + try { + const raw = await fsp.readFile(manifestPath, "utf8"); + const parsed = JSON.parse(raw) as PackageJsonDependencyInfo; + parsedByPath.set(manifestPath, parsed); + if (typeof parsed.name === "string" && parsed.name.trim()) { + manifestByPackageName.set(parsed.name, manifestPath); + } + } catch { + continue; + } + } + + const edges: Edge[] = []; + for (const [fromManifest, parsed] of parsedByPath.entries()) { + const dependencySets = [ + parsed.dependencies, + parsed.devDependencies, + parsed.peerDependencies, + parsed.optionalDependencies, + ]; + for (const dependencySet of dependencySets) { + if (!dependencySet) continue; + for (const dependencyName of Object.keys(dependencySet)) { + const toManifest = manifestByPackageName.get(dependencyName); + if (!toManifest) continue; + edges.push({ + from: fromManifest, + to: { type: "file", path: toManifest }, + raw: dependencyName, + }); + } + } + } + + return edges; +} + +export const MANIFEST_VERSION = 2; + +export type ManifestFileEntry = GraphCacheEntry; + +export type IndexManifest = { + version: number; + projectRoot: string; + updatedAt: number; + lastCommit?: string; + configHash?: string; + graphOptions?: GraphBuildOptions; + buildOptions?: ManifestBuildOptions; + files: Record; +}; + +type ConfigHashResult = { + hash: string; + error?: string; +}; + +export function normalizeIndexedFileInputs(projectRoot: string, files: readonly string[], label: string): string[] { + return Array.from(new Set(files.filter(Boolean).map((file) => assertFilePathWithinRoot(projectRoot, file, label)))); +} + +export function sanitizeManifestEntriesForRoot( + projectRoot: string, + files: Record | undefined, +): Record { + const sanitizedEntries: Record = {}; + for (const [file, entry] of Object.entries(files ?? {})) { + if (!isFilePathWithinRoot(projectRoot, file)) continue; + sanitizedEntries[file] = entry; + } + return sanitizedEntries; +} + +export async function computeConfigHash(projectRoot: string, logLevel?: LogLevel): Promise { + try { + const configFiles = await fg([...DEFAULT_PROJECT_MANIFESTS, "**/.gitignore"], { + cwd: projectRoot, + absolute: true, + dot: true, + ignore: [ + "**/node_modules/**", + "**/.git/**", + "**/dist/**", + "**/build/**", + "**/target/**", + "**/.venv/**", + "**/__pycache__/**", + ], + }); + configFiles.sort(); + const hash = crypto.createHash("sha1"); + let firstError: string | undefined; + for (const file of configFiles) { + try { + const content = await fsp.readFile(file, "utf8"); + const relative = path.relative(projectRoot, file).replace(/\\/g, "/"); + hash.update(relative); + hash.update(content); + } catch (error) { + const message = `Failed to read config file "${file}": ${stringifyUnknown(error)}`; + if (!firstError) firstError = message; + logWithLevel(logLevel, "debug", "computeConfigHash:", message); + } + } + return { + hash: hash.digest("hex"), + ...(firstError ? { error: firstError } : {}), + }; + } catch (error) { + return { + hash: "", + error: `Failed to enumerate config files: ${stringifyUnknown(error)}`, + }; + } +} + +function manifestFilePath(projectRoot: string, opts?: BuildOptions): string { + return path.join(cacheRoot(projectRoot, opts), "manifest.json"); +} + +function isTransientFileContentionError(error: unknown): boolean { + if (!error || typeof error !== "object") return false; + const code = (error as NodeJS.ErrnoException).code; + return code === "EBUSY" || code === "EPERM" || code === "ENOTEMPTY"; +} + +function manifestTempFilePath(manifestPath: string): string { + const dir = path.dirname(manifestPath); + const base = path.basename(manifestPath); + return path.join(dir, `.${base}.${process.pid}.${crypto.randomUUID()}.tmp`); +} + +async function wait(ms: number): Promise { + await new Promise((resolve) => { + setTimeout(resolve, ms); + }); +} + +async function writeManifestAtomically(manifestPath: string, payload: string): Promise { + const retryDelays = [10, 25, 50, 100]; + for (let attempt = 0; attempt <= retryDelays.length; attempt += 1) { + const tempPath = manifestTempFilePath(manifestPath); + try { + await fsp.writeFile(tempPath, payload, "utf8"); + await fsp.rename(tempPath, manifestPath); + return; + } catch (error) { + try { + await fsp.rm(tempPath, { force: true }); + } catch { + // Cleanup is best-effort; the next attempt uses a fresh temp path. + } + const canRetry = attempt < retryDelays.length && isTransientFileContentionError(error); + if (!canRetry) throw error; + await wait(retryDelays[attempt]!); + } + } +} + +export async function loadManifest(projectRoot: string, opts?: BuildOptions): Promise { + try { + const manifestPath = manifestFilePath(projectRoot, opts); + const raw = await fsp.readFile(manifestPath, "utf8"); + const parsed = JSON.parse(raw) as IndexManifest; + if (parsed.version !== MANIFEST_VERSION) return null; + return parsed; + } catch { + return null; + } +} + +export async function writeManifest( + projectRoot: string, + opts: BuildOptions | undefined, + manifest: IndexManifest, +): Promise { + try { + const manifestPath = manifestFilePath(projectRoot, opts); + await fsp.mkdir(path.dirname(manifestPath), { recursive: true }); + await writeManifestAtomically(manifestPath, JSON.stringify(manifest, null, 2)); + } catch (error) { + logWithLevel(opts?.logLevel, "warn", "Warning: Failed to write manifest:", error); + } +} + +export async function verifyManifestEntries( + projectRoot: string, + manifest: IndexManifest, + opts: BuildOptions | undefined, + gitAvailable: boolean, +): Promise<{ mismatches: number; missing: number }> { + const entries = manifest.files ?? {}; + const files = Object.keys(entries); + const existingFiles = files.filter((file) => fs.existsSync(file)); + const missing = files.length - existingFiles.length; + const gitSigMap = gitAvailable + ? await getGitBlobHashes(projectRoot, existingFiles, { gitAvailable }) + : new Map(); + let mismatches = 0; + for (const file of existingFiles) { + const entry = entries[file]; + if (!entry) continue; + const sigInfo = await fileSignature(file, opts?.cacheStrict, gitSigMap.get(file)); + const matchesGitSig = !!entry.gitSig && !!sigInfo.gitSig && entry.gitSig === sigInfo.gitSig; + const matchesSig = entry.sig === sigInfo.sig; + if (!matchesGitSig && !matchesSig) mismatches += 1; + } + return { mismatches, missing }; +} diff --git a/src/indexer/build-cache/module-cache.ts b/src/indexer/build-cache/module-cache.ts new file mode 100644 index 00000000..dad35939 --- /dev/null +++ b/src/indexer/build-cache/module-cache.ts @@ -0,0 +1,235 @@ +import crypto from "node:crypto"; +import fs from "node:fs"; +import fsp from "node:fs/promises"; +import path from "node:path"; +import { supportForFile } from "../../languages.js"; +import { logWithLevel } from "../../logging.js"; +import { SqliteDatabase } from "../../sqlite-driver.js"; +import { buildBloomFilterFromSource } from "../../util/bloomFilter.js"; +import type { BuildOptions, BuildReport, ModuleIndex } from "../types.js"; +import { initCacheReport } from "./reports.js"; + +const PARSED_CACHE_VERSION = 1; + +type ModuleCacheEntry = { + version: number; + sig: string; + mod: ModuleIndex; +}; + +const memoryCache = new Map(); +const diskCacheDatabases = new Map(); + +export function cacheRoot(projectRoot: string, opts?: BuildOptions): string { + return opts?.cacheDir || path.join(projectRoot, ".codegraph-cache", "index-v1"); +} + +function diskCacheDatabasePath(projectRoot: string, opts?: BuildOptions): string { + return path.join(cacheRoot(projectRoot, opts), "index-cache.sqlite").replace(/\\/g, "/"); +} + +function getDiskCacheDatabase(projectRoot: string, opts?: BuildOptions): SqliteDatabase { + const dbPath = diskCacheDatabasePath(projectRoot, opts); + const existing = diskCacheDatabases.get(dbPath); + if (existing) return existing; + fs.mkdirSync(path.dirname(dbPath), { recursive: true }); + const db = new SqliteDatabase(dbPath); + db.pragma("journal_mode = WAL"); + db.pragma("synchronous = NORMAL"); + db.exec(` + CREATE TABLE IF NOT EXISTS module_cache ( + file TEXT PRIMARY KEY, + sig TEXT NOT NULL, + version INTEGER NOT NULL, + payload TEXT NOT NULL, + updated_at INTEGER NOT NULL + ); + CREATE INDEX IF NOT EXISTS idx_module_cache_sig ON module_cache(sig); + `); + diskCacheDatabases.set(dbPath, db); + return db; +} + +export function closeDiskCacheDatabase(projectRoot: string, opts?: BuildOptions): void { + const dbPath = diskCacheDatabasePath(projectRoot, opts); + const db = diskCacheDatabases.get(dbPath); + if (!db) return; + try { + db.pragma("wal_checkpoint(TRUNCATE)"); + } catch { + // checkpoint best-effort + } + try { + db.close(); + diskCacheDatabases.delete(dbPath); + } catch { + // Keep handle for later retry if close fails. + } +} + +async function fileContentHash(file: string): Promise { + const buffer = await fsp.readFile(file); + const hash = crypto.createHash("sha1"); + hash.update(buffer); + return hash.digest("hex"); +} + +async function fileStatSignature( + file: string, + strict?: boolean, + opts?: { includeContentHash?: boolean }, +): Promise<{ sig: string; contentHash?: string }> { + try { + const stat = await fsp.stat(file); + const useStrict = strict ?? true; + const shouldHash = useStrict || !!opts?.includeContentHash; + const contentHash = shouldHash ? await fileContentHash(file) : undefined; + if (!useStrict) { + return contentHash + ? { sig: `${stat.mtimeMs}:${stat.size}`, contentHash } + : { sig: `${stat.mtimeMs}:${stat.size}` }; + } + if (contentHash) { + return { + sig: `${stat.mtimeMs}:${stat.size}:${contentHash}`, + contentHash, + }; + } + return { sig: `${stat.mtimeMs}:${stat.size}` }; + } catch { + return { sig: "0:0" }; + } +} + +export type FileSignature = { + sig: string; + gitSig?: string; + cacheSig: string; + contentHash?: string; +}; + +export async function fileSignature( + file: string, + strict?: boolean, + gitSig?: string, + opts?: { forceContentHash?: boolean }, +): Promise { + const includeContentHash = !!opts?.forceContentHash; + const statOpts = includeContentHash ? { includeContentHash: true } : undefined; + const { sig, contentHash } = await fileStatSignature(file, strict, statOpts); + const cacheSig = gitSig ?? contentHash ?? sig; + if (gitSig) { + return { + sig, + gitSig, + cacheSig, + ...(contentHash ? { contentHash } : {}), + }; + } + return { sig, cacheSig, ...(contentHash ? { contentHash } : {}) }; +} + +export async function cacheSignatureForFile(file: string, sigInfo: FileSignature): Promise { + if (sigInfo.gitSig) return sigInfo.gitSig; + if (sigInfo.contentHash) return sigInfo.contentHash; + const contentHash = await fileContentHash(file); + sigInfo.contentHash = contentHash; + return contentHash; +} + +export async function buildBloomFilterForFile( + file: string, +): Promise { + try { + const source = await fsp.readFile(file, "utf8"); + const support = supportForFile(file); + if (!support) return null; + return buildBloomFilterFromSource(source, support.id); + } catch { + return null; + } +} + +function isModuleIndex(value: unknown): value is ModuleIndex { + if (!value || typeof value !== "object") return false; + const mod = value as { + file?: unknown; + exports?: unknown; + imports?: unknown; + locals?: unknown; + }; + return ( + typeof mod.file === "string" && + Array.isArray(mod.exports) && + Array.isArray(mod.imports) && + Array.isArray(mod.locals) + ); +} + +export function tryLoadFromCache( + projectRoot: string, + file: string, + sig: string, + opts?: BuildOptions, + report?: BuildReport, +): ModuleIndex | null { + const mode = opts?.cache ?? "off"; + const cacheReport = initCacheReport(report, mode); + const cacheEnabled = mode !== "off"; + if (mode === "memory") { + const entry = memoryCache.get(file); + if (entry && entry.sig === sig) { + if (cacheEnabled && cacheReport) cacheReport.hits += 1; + return entry.mod; + } + if (cacheEnabled && cacheReport) cacheReport.misses += 1; + return null; + } + if (mode === "disk") { + try { + const db = getDiskCacheDatabase(projectRoot, opts); + const row = db.prepare("SELECT sig, version, payload FROM module_cache WHERE file = ?").get(file) as + | { sig: string; version: number; payload: string } + | undefined; + if (row && row.sig === sig && row.version === PARSED_CACHE_VERSION) { + const parsed: unknown = JSON.parse(row.payload); + if (isModuleIndex(parsed)) { + if (cacheEnabled && cacheReport) cacheReport.hits += 1; + return parsed; + } + } + } catch { + // cache read failed + } + if (cacheEnabled && cacheReport) cacheReport.misses += 1; + } + return null; +} + +export function writeToCache( + projectRoot: string, + file: string, + sig: string, + mod: ModuleIndex, + opts?: BuildOptions, +): void { + const mode = opts?.cache ?? "off"; + if (mode === "memory") { + memoryCache.set(file, { version: PARSED_CACHE_VERSION, sig, mod }); + } else if (mode === "disk") { + try { + const db = getDiskCacheDatabase(projectRoot, opts); + db.prepare( + `INSERT INTO module_cache (file, sig, version, payload, updated_at) + VALUES (?, ?, ?, ?, ?) + ON CONFLICT(file) DO UPDATE SET + sig = excluded.sig, + version = excluded.version, + payload = excluded.payload, + updated_at = excluded.updated_at`, + ).run(file, sig, PARSED_CACHE_VERSION, JSON.stringify(mod), Date.now()); + } catch (error) { + logWithLevel(opts?.logLevel, "warn", "Warning: Failed to write to cache:", error); + } + } +} diff --git a/src/indexer/build-cache/options.ts b/src/indexer/build-cache/options.ts new file mode 100644 index 00000000..cf2f50b3 --- /dev/null +++ b/src/indexer/build-cache/options.ts @@ -0,0 +1,167 @@ +import path from "node:path"; +import type { GraphBuildOptions } from "../../graphs/types.js"; +import { + normalizePath, + normalizeResolutionHints, + type ProjectFileDiscoveryOptions, +} from "../../util.js"; +import type { BuildOptions } from "../types.js"; + +export type ManifestBuildOptions = { + cache?: BuildOptions["cache"]; + cacheStrict?: boolean; + useBloomFilters?: boolean; + preset?: BuildOptions["preset"]; + incrementalStrict?: boolean; + discovery?: { + includeGlobs?: string[]; + ignoreGlobs?: string[]; + globRoot?: string; + gitignoreRoot?: string; + useGitignore: boolean; + }; +}; + +function normalizeManifestBuildOptions(opts?: ManifestBuildOptions): ManifestBuildOptions { + return { + cache: opts?.cache ?? "off", + cacheStrict: opts?.cacheStrict ?? true, + useBloomFilters: opts?.useBloomFilters ?? true, + preset: opts?.preset, + incrementalStrict: opts?.incrementalStrict ?? false, + ...(opts?.discovery ? { discovery: opts.discovery } : {}), + }; +} + +function normalizeDiscoveryOptions(discovery?: ProjectFileDiscoveryOptions): ManifestBuildOptions["discovery"] { + if (!discovery) return undefined; + const normalizeGlob = (glob: string) => glob.trim().replace(/\\/g, "/"); + const includeGlobs = Array.from( + new Set((discovery.includeGlobs ?? []).map(normalizeGlob).filter(Boolean)), + ).sort(); + const ignoreGlobs = Array.from( + new Set((discovery.ignoreGlobs ?? []).map(normalizeGlob).filter(Boolean)), + ).sort(); + const globRoot = discovery.globRoot ? normalizePath(path.resolve(discovery.globRoot)) : undefined; + const gitignoreRoot = discovery.gitignoreRoot ? normalizePath(path.resolve(discovery.gitignoreRoot)) : undefined; + const useGitignore = discovery.useGitignore ?? true; + if (!includeGlobs.length && !ignoreGlobs.length && !globRoot && !gitignoreRoot && useGitignore) { + return undefined; + } + return { + ...(includeGlobs.length ? { includeGlobs } : {}), + ...(ignoreGlobs.length ? { ignoreGlobs } : {}), + ...(globRoot ? { globRoot } : {}), + ...(gitignoreRoot ? { gitignoreRoot } : {}), + useGitignore, + }; +} + +function normalizeBuildOptions(opts?: BuildOptions): ManifestBuildOptions { + const discovery = normalizeDiscoveryOptions(opts?.discovery); + return { + cache: opts?.cache ?? "off", + cacheStrict: opts?.cacheStrict ?? true, + useBloomFilters: opts?.useBloomFilters ?? true, + preset: opts?.preset, + incrementalStrict: opts?.incrementalStrict ?? false, + ...(discovery ? { discovery } : {}), + }; +} + +export function summarizeBuildOptions(opts?: BuildOptions): ManifestBuildOptions { + return normalizeBuildOptions(opts); +} + +function normalizeLanguageList(list?: string[]): string[] { + const out: string[] = []; + const seen = new Set(); + for (const entry of list ?? []) { + const normalized = entry.trim().toLowerCase(); + if (!normalized || seen.has(normalized)) continue; + seen.add(normalized); + out.push(normalized); + } + out.sort(); + return out; +} + +function orderedListsEqual(a: readonly string[], b: readonly string[]): boolean { + if (a.length !== b.length) return false; + for (let i = 0; i < a.length; i++) { + if (a[i] !== b[i]) return false; + } + return true; +} + +function normalizedDiscoveryOptionsEqual( + a: ManifestBuildOptions["discovery"], + b: ManifestBuildOptions["discovery"], +): boolean { + const normalizedA = a ?? { useGitignore: true }; + const normalizedB = b ?? { useGitignore: true }; + if (normalizedA.useGitignore !== normalizedB.useGitignore) return false; + if (normalizedA.globRoot !== normalizedB.globRoot) return false; + if (normalizedA.gitignoreRoot !== normalizedB.gitignoreRoot) return false; + if (!orderedListsEqual(normalizedA.includeGlobs ?? [], normalizedB.includeGlobs ?? [])) return false; + if (!orderedListsEqual(normalizedA.ignoreGlobs ?? [], normalizedB.ignoreGlobs ?? [])) return false; + return true; +} + +export function diffBuildOptions( + manifestOpts: ManifestBuildOptions | undefined, + currentOpts: BuildOptions | undefined, +): string[] { + if (!manifestOpts) return []; + const normalizedManifest = normalizeManifestBuildOptions(manifestOpts); + const normalizedCurrent = normalizeBuildOptions(currentOpts); + const diffs: string[] = []; + if (normalizedManifest.cache !== normalizedCurrent.cache) diffs.push("cache"); + if (normalizedManifest.cacheStrict !== normalizedCurrent.cacheStrict) { + diffs.push("cacheStrict"); + } + if (normalizedManifest.useBloomFilters !== normalizedCurrent.useBloomFilters) { + diffs.push("useBloomFilters"); + } + if (normalizedManifest.preset !== normalizedCurrent.preset) diffs.push("preset"); + if (normalizedManifest.incrementalStrict !== normalizedCurrent.incrementalStrict) { + diffs.push("incrementalStrict"); + } + if (!normalizedDiscoveryOptionsEqual(normalizedManifest.discovery, normalizedCurrent.discovery)) { + diffs.push("discovery"); + } + return diffs; +} + +export function normalizeGraphOptions(opts?: GraphBuildOptions): GraphBuildOptions { + const resolutionHints = normalizeResolutionHints(opts?.resolutionHints); + const fastRegexDisabledLanguages = normalizeLanguageList(opts?.fastRegexDisabledLanguages); + return { + fast: !!opts?.fast, + ...(fastRegexDisabledLanguages.length ? { fastRegexDisabledLanguages } : {}), + resolveNodeModules: !!opts?.resolveNodeModules, + dynamicImportHeuristics: !!opts?.dynamicImportHeuristics, + ...(resolutionHints.length ? { resolutionHints } : {}), + }; +} + +export function graphOptionsEqual(a?: GraphBuildOptions, b?: GraphBuildOptions): boolean { + if (!a && !b) return true; + if (!a || !b) return false; + const normalizedA = normalizeGraphOptions(a); + const normalizedB = normalizeGraphOptions(b); + if (!!normalizedA.fast !== !!normalizedB.fast) return false; + if (!!normalizedA.resolveNodeModules !== !!normalizedB.resolveNodeModules) { + return false; + } + if (!!normalizedA.dynamicImportHeuristics !== !!normalizedB.dynamicImportHeuristics) { + return false; + } + if (!orderedListsEqual(normalizedA.fastRegexDisabledLanguages ?? [], normalizedB.fastRegexDisabledLanguages ?? [])) { + return false; + } + if (!orderedListsEqual(normalizedA.resolutionHints ?? [], normalizedB.resolutionHints ?? [])) { + return false; + } + return true; +} diff --git a/src/indexer/build-cache/reports.ts b/src/indexer/build-cache/reports.ts new file mode 100644 index 00000000..8b034b1a --- /dev/null +++ b/src/indexer/build-cache/reports.ts @@ -0,0 +1,158 @@ +import { shouldAvoidJsFallbackForLanguage } from "../../native/treeSitterNative.js"; +import type { FallbackImportExtractionEvent } from "../../graphs/specifiers.js"; +import { logWithLevel, type LogLevel } from "../../logging.js"; +import { stringifyUnknown } from "../../util.js"; +import type { + BuildFileReport, + BuildOptions, + BuildReport, + CacheReport, + FallbackImportExtractionReport, + ManifestReport, +} from "../types.js"; + +export function initCacheReport( + report: BuildReport | undefined, + mode: BuildOptions["cache"] | undefined, +): CacheReport | undefined { + if (!report) return undefined; + if (!report.cache) { + report.cache = { mode: mode ?? "off", hits: 0, misses: 0 }; + } + return report.cache; +} + +export function initFileReport(report: BuildReport | undefined): BuildFileReport | undefined { + if (!report) return undefined; + if (!report.files) { + report.files = { total: 0, cached: 0, parsed: 0 }; + } + return report.files; +} + +export function recordFileFailure(report: BuildReport | undefined, file: string, error: unknown): void { + const fileReport = initFileReport(report); + if (!fileReport) return; + fileReport.failed = (fileReport.failed ?? 0) + 1; + const errors = fileReport.errors ?? []; + if (errors.length < 20) { + errors.push({ + file: file.replace(/\\/g, "/"), + message: stringifyUnknown(error), + }); + } + fileReport.errors = errors; +} + +function initFallbackImportExtractionReport( + report: BuildReport | undefined, +): FallbackImportExtractionReport | undefined { + if (!report) return undefined; + if (!report.graph) { + report.graph = { + fallbackImportExtraction: { + total: 0, + byLanguage: {}, + byReason: { + fast: 0, + "js-fallback-unavailable": 0, + "query-error": 0, + "query-empty": 0, + }, + files: {}, + }, + }; + } else if (!report.graph.fallbackImportExtraction) { + report.graph.fallbackImportExtraction = { + total: 0, + byLanguage: {}, + byReason: { + fast: 0, + "js-fallback-unavailable": 0, + "query-error": 0, + "query-empty": 0, + }, + files: {}, + }; + } + return report.graph.fallbackImportExtraction; +} + +export function createFallbackImportExtractionHandler( + report: BuildReport | undefined, + opts?: BuildOptions, +): ((event: FallbackImportExtractionEvent) => void) | undefined { + const fallbackReport = initFallbackImportExtractionReport(report); + const warned = new Set(); + const logLevel = opts?.logLevel ?? "warn"; + const shouldLog = logLevel !== "silent" && logLevel !== "error"; + + return (event: FallbackImportExtractionEvent) => { + const filePath = event.file ? event.file.replace(/\\/g, "/") : "unknown"; + if (fallbackReport) { + if (!fallbackReport.files[filePath]) { + fallbackReport.total += 1; + fallbackReport.byLanguage[event.language] = (fallbackReport.byLanguage[event.language] ?? 0) + 1; + fallbackReport.byReason ??= { + fast: 0, + "js-fallback-unavailable": 0, + "query-error": 0, + "query-empty": 0, + }; + fallbackReport.byReason[event.reason] += 1; + } + fallbackReport.files[filePath] = { + language: event.language, + reason: event.reason, + }; + } + if (!shouldLog) return; + const warningKey = `${event.language}:${event.reason}`; + if (warned.has(warningKey)) return; + warned.add(warningKey); + const severity = + event.reason === "fast" || + event.reason === "js-fallback-unavailable" || + shouldAvoidJsFallbackForLanguage(event.language) + ? "debug" + : "warn"; + let message = "Regex fallback import extraction"; + if (event.reason === "js-fallback-unavailable") { + message = `JS fallback unavailable for ${event.language} query recovery; using regex import extraction.`; + } else if (shouldAvoidJsFallbackForLanguage(event.language)) { + message = `Native import recovery degraded for ${event.language}; using native-owned fallback extraction.`; + } + logWithLevel(opts?.logLevel, severity, message, { + language: event.language, + reason: event.reason, + }); + }; +} + +export function initManifestReport( + report: BuildReport | undefined, + used: boolean, + reused: boolean, +): ManifestReport | undefined { + if (!report) return undefined; + if (!report.manifest) { + report.manifest = { used, reused }; + } else { + report.manifest.used = used; + report.manifest.reused = reused; + } + return report.manifest; +} + +export function recordConfigHashResult( + manifestReport: ManifestReport | undefined, + configHashResult: { hash: string; error?: string }, + logLevel: LogLevel | undefined, +): string { + if (!configHashResult.error) return configHashResult.hash; + if (manifestReport) { + manifestReport.configHashError = configHashResult.error; + } + logWithLevel(logLevel, "warn", `Warning: ${configHashResult.error}`); + return configHashResult.hash; +} diff --git a/src/indexer/build-index.ts b/src/indexer/build-index.ts index 1d8982e7..db6aa2f2 100644 --- a/src/indexer/build-index.ts +++ b/src/indexer/build-index.ts @@ -17,7 +17,6 @@ import { normalizePath, resolveSpecifier, resolveWorkspacePackage, - type ProjectFileInfo, } from "../util.js"; import { logWithLevel, type LogLevel } from "../logging.js"; import { collectGraph, collectEdgesForFile } from "../graphs.js"; @@ -79,6 +78,7 @@ import { type GraphDeltaReport, type ImportBinding, type IncrementalBuildOptions, + type ManifestReport, type ModuleIndex, type NativeBackendFallbackReason, type ParserBackendDegradationReport, @@ -546,6 +546,152 @@ type BuildIndexHelperOptions = { ignoreExistingManifest?: boolean; }; +type IndexBuildRunState = { + normalizedProjectRoot: string; + report: BuildReport | undefined; + timings: BuildReport["timings"] | undefined; + totalStart: number; + cacheMode: NonNullable; + cacheEnabled: boolean; + graphOptions: GraphBuildOptions; + onFallbackImportExtraction: ((event: FallbackImportExtractionEvent) => void) | undefined; +}; + +function createIndexBuildRunState( + projectRoot: string, + opts: BuildOptions | undefined, + graphOptions = normalizeGraphOptions(opts?.graph), +): IndexBuildRunState { + const report = opts?.report; + initNativeBackendReport(report); + const cacheMode = opts?.cache ?? "off"; + return { + normalizedProjectRoot: normalizePath(projectRoot), + report, + timings: report?.timings, + totalStart: performance.now(), + cacheMode, + cacheEnabled: cacheMode !== "off", + graphOptions, + onFallbackImportExtraction: createFallbackImportExtractionHandler(report, opts), + }; +} + +function buildConcurrency(opts: BuildOptions | undefined): number { + return Math.max(1, Math.min(Number(opts?.threads || 0) || 8, 64)); +} + +function parsedCacheMaxEntries(opts: BuildOptions | undefined): number { + return Math.max(1, opts?.parsedCacheMaxEntries ?? 1024); +} + +async function prepareFileSignatures(args: { + files: string[]; + opts: BuildOptions | undefined; + gitSigMap: Map; + cacheEnabled: boolean; + concurrency: number; +}): Promise> { + const entries = await mapLimit(args.files, args.concurrency, async (file) => { + const sigInfo = await fileSignature(file, args.opts?.cacheStrict, args.gitSigMap.get(file), { + forceContentHash: args.cacheEnabled, + }); + return [file, sigInfo] as const; + }); + return new Map(entries); +} + +function toManifestFileEntry(entry: GraphCacheEntry): ManifestFileEntry | undefined { + if (!entry.sig) return undefined; + return { + sig: entry.sig, + ...(entry.gitSig ? { gitSig: entry.gitSig } : {}), + ...(entry.sqlCorpusSig ? { sqlCorpusSig: entry.sqlCorpusSig } : {}), + edges: entry.edges, + }; +} + +async function writeIndexManifestSnapshot(args: { + projectRoot: string; + opts: BuildOptions | undefined; + graphOptions: GraphBuildOptions; + files: Map | Record; + timings: BuildReport["timings"] | undefined; + manifestReport: ManifestReport | undefined; + allowEmpty?: boolean; +}): Promise { + const files = + args.files instanceof Map ? Object.fromEntries(args.files) : args.files; + if (!Object.keys(files).length && !args.allowEmpty) return; + const writeManifestStart = performance.now(); + const lastCommit = await getGitHead(args.projectRoot); + const configHashResult = await computeConfigHash(args.projectRoot, args.opts?.logLevel); + const configHash = recordConfigHashResult(args.manifestReport, configHashResult, args.opts?.logLevel); + const manifestData: IndexManifest = { + version: MANIFEST_VERSION, + projectRoot: path.resolve(args.projectRoot).replace(/\\/g, "/"), + updatedAt: Date.now(), + ...(lastCommit ? { lastCommit } : {}), + ...(configHash ? { configHash } : {}), + graphOptions: args.graphOptions, + buildOptions: summarizeBuildOptions(args.opts), + files, + }; + await writeManifest(args.projectRoot, args.opts, manifestData); + if (args.timings) { + args.timings.writeManifestMs = Math.round(performance.now() - writeManifestStart); + } +} + +function retainedParsedCache( + parsedMap: Map, + opts: BuildOptions | undefined, +): Map | undefined { + const keepParsed = opts?.keepParsed ?? false; + const maxParsedEntries = parsedCacheMaxEntries(opts); + if (!keepParsed) { + parsedMap.clear(); + return undefined; + } + while (parsedMap.size > maxParsedEntries) { + const oldest = parsedMap.keys().next().value; + if (!oldest) break; + parsedMap.delete(oldest); + } + return parsedMap; +} + +async function finalizeProjectIndex(args: { + projectRoot: string; + normalizedProjectRoot: string; + opts: BuildOptions | undefined; + timings: BuildReport["timings"] | undefined; + totalStart: number; + graph: Graph; + modules: Map; + parsedMap: Map; + bloomFilterCache: import("../util/bloomFilter.js").BloomFilterCache | undefined; +}): Promise { + if (args.timings) args.timings.totalMs = Math.round(performance.now() - args.totalStart); + const projectFiles = await discoverProjectFiles(args.projectRoot, { + ...(args.opts?.logLevel ? { logLevel: args.opts.logLevel } : {}), + }); + const parsed = retainedParsedCache(args.parsedMap, args.opts); + return { + graph: args.graph, + graphAdjacency: buildGraphAdjacency(args.graph), + modules: args.modules, + byFile: args.modules, + projectRoot: args.normalizedProjectRoot, + ...(args.opts?.native ? { nativeMode: args.opts.native } : {}), + exportCache: new Map(), + scopeCache: new Map(), + ...(parsed ? { parsed } : {}), + ...(args.bloomFilterCache ? { bloomFilters: args.bloomFilterCache } : {}), + projectFiles, + }; +} + async function buildProjectIndexFromExport( projectRoot: string, opts?: BuildOptions, @@ -561,24 +707,25 @@ async function buildIndexFromFileListShared( helperOpts?: BuildIndexHelperOptions, ): Promise { clearImportResolutionCaches(); - const normalizedProjectRoot = normalizePath(projectRoot); - const report = opts?.report; - const timings = report?.timings; - const totalStart = performance.now(); + const { + normalizedProjectRoot, + report, + timings, + totalStart, + cacheMode, + cacheEnabled, + graphOptions, + onFallbackImportExtraction, + } = createIndexBuildRunState(projectRoot, opts); const manifestMode: ManifestMode = helperOpts?.manifestMode ?? "off"; const useManifest = manifestMode !== "off"; const shouldWriteManifest = manifestMode === "read-write"; - const cacheMode = opts?.cache ?? "off"; - const cacheEnabled = cacheMode !== "off"; - const graphOptions = normalizeGraphOptions(opts?.graph); initManifestReport(report, useManifest, false); - initNativeBackendReport(report); const normalizedFiles = Array.from(new Set(normalizeIndexedFileInputs(projectRoot, rawFiles ?? [], "Index file"))); if (!normalizedFiles.length && helperOpts?.warnNoFilesMessage) { logWithLevel(opts?.logLevel, "warn", helperOpts.warnNoFilesMessage); } const fileReport = initFileReport(report); - const onFallbackImportExtraction = createFallbackImportExtractionHandler(report, opts); if (fileReport) fileReport.total = normalizedFiles.length; const manifestStart = performance.now(); const manifest = useManifest && !helperOpts?.ignoreExistingManifest ? await loadManifest(projectRoot, opts) : null; @@ -605,25 +752,22 @@ async function buildIndexFromFileListShared( } const manifestEntries = shouldWriteManifest ? new Map() : undefined; const modules = new Map(); - const fileSignatures = new Map(); const gitAvailable = await isGitRepo(projectRoot); const useGitSignatures = gitAvailable && (cacheMode !== "off" || opts?.cacheStrict); const gitSigMap = useGitSignatures ? await getGitBlobHashes(projectRoot, normalizedFiles, { gitAvailable }) : new Map(); - const conc = Math.max(1, Math.min(Number(opts?.threads || 0) || 8, 64)); + const conc = buildConcurrency(opts); const sqlFiles = normalizedFiles .filter((file) => path.extname(file).toLowerCase() === ".sql") .sort((left, right) => left.localeCompare(right)); - const sqlFileSignatureEntries = await mapLimit(sqlFiles, conc, async (file) => { - const sigInfo = await fileSignature(file, opts?.cacheStrict, gitSigMap.get(file), { - forceContentHash: cacheEnabled, - }); - return [file, sigInfo] as const; + const fileSignatures = await prepareFileSignatures({ + files: sqlFiles, + opts, + gitSigMap, + cacheEnabled, + concurrency: conc, }); - for (const [file, sigInfo] of sqlFileSignatureEntries) { - fileSignatures.set(file, sigInfo); - } const sqlCorpusSig = sqlCorpusSignature(sqlFiles, fileSignatures); let sqlFactCachePromise: Promise | undefined; const getSqlFactCache = (): Promise => { @@ -653,13 +797,9 @@ async function buildIndexFromFileListShared( const graph: Graph = { nodes: new Set(normalizedFiles), edges: [] }; const onFileEdges = manifestEntries ? (file: string, entry: GraphCacheEntry) => { - if (!entry?.sig) return; - manifestEntries.set(file, { - sig: entry.sig, - ...(entry.gitSig ? { gitSig: entry.gitSig } : {}), - ...(entry.sqlCorpusSig ? { sqlCorpusSig: entry.sqlCorpusSig } : {}), - edges: entry.edges, - }); + const manifestEntry = toManifestFileEntry(entry); + if (!manifestEntry) return; + manifestEntries.set(file, manifestEntry); } : undefined; let processedFiles = 0; @@ -726,7 +866,7 @@ async function buildIndexFromFileListShared( workspaceConfig, workerSetup, parsedMap, - parsedCacheMaxEntries: Math.max(1, opts?.parsedCacheMaxEntries ?? 1024), + parsedCacheMaxEntries: parsedCacheMaxEntries(opts), jsonDependencies, bloomFilterCache, onFallbackImportExtraction, @@ -805,54 +945,27 @@ async function buildIndexFromFileListShared( ensureJsonModule(modules, jsonPath); } expandStarImports(modules); - if (manifestEntries && manifestEntries.size > 0) { - const writeManifestStart = performance.now(); - const lastCommit = await getGitHead(projectRoot); - const configHashResult = await computeConfigHash(projectRoot, opts?.logLevel); - const configHash = recordConfigHashResult(report?.manifest, configHashResult, opts?.logLevel); - const manifestData: IndexManifest = { - version: MANIFEST_VERSION, - projectRoot: path.resolve(projectRoot).replace(/\\/g, "/"), - updatedAt: Date.now(), - ...(lastCommit ? { lastCommit } : {}), - ...(configHash ? { configHash } : {}), + if (manifestEntries) { + await writeIndexManifestSnapshot({ + projectRoot, + opts, graphOptions, - buildOptions: summarizeBuildOptions(opts), - files: Object.fromEntries(manifestEntries), - }; - await writeManifest(projectRoot, opts, manifestData); - if (timings) { - timings.writeManifestMs = Math.round(performance.now() - writeManifestStart); - } - } - if (timings) timings.totalMs = Math.round(performance.now() - totalStart); - const projectFiles = await discoverProjectFiles(projectRoot, { - ...(opts?.logLevel ? { logLevel: opts.logLevel } : {}), - }); - const keepParsed = opts?.keepParsed ?? false; - const maxParsedEntries = Math.max(1, opts?.parsedCacheMaxEntries ?? 1024); - if (!keepParsed) { - parsedMap.clear(); - } else { - while (parsedMap.size > maxParsedEntries) { - const oldest = parsedMap.keys().next().value; - if (!oldest) break; - parsedMap.delete(oldest); - } + files: manifestEntries, + timings, + manifestReport: report?.manifest, + }); } - return { + return finalizeProjectIndex({ + projectRoot, + normalizedProjectRoot, + opts, + timings, + totalStart, graph, - graphAdjacency: buildGraphAdjacency(graph), modules, - byFile: modules, - projectRoot: normalizedProjectRoot, - ...(opts?.native ? { nativeMode: opts.native } : {}), - exportCache: new Map(), - scopeCache: new Map(), - parsed: keepParsed ? parsedMap : undefined, - ...(bloomFilterCache ? { bloomFilters: bloomFilterCache } : {}), - projectFiles, - }; + parsedMap, + bloomFilterCache, + }); } finally { await teardownWorkerPool(workerSetup, report); } @@ -928,21 +1041,22 @@ export async function buildProjectIndexIncremental( opts?: IncrementalBuildOptions, ): Promise { clearImportResolutionCaches(); - const normalizedProjectRoot = normalizePath(projectRoot); - const report = opts?.report; - initNativeBackendReport(report); - const timings = report?.timings; - const totalStart = performance.now(); - const cacheMode = opts?.cache ?? "off"; - const cacheEnabled = cacheMode !== "off"; + const graphOptions = normalizeGraphOptions(opts?.graph); + const strictIncremental = opts?.incrementalStrict ?? false; + if (strictIncremental && graphOptions.fast) graphOptions.fast = false; + const { + normalizedProjectRoot, + report, + timings, + totalStart, + cacheMode, + cacheEnabled, + onFallbackImportExtraction, + } = createIndexBuildRunState(projectRoot, opts, graphOptions); try { - const onFallbackImportExtraction = createFallbackImportExtractionHandler(report, opts); const manifestStart = performance.now(); const manifest = await loadManifest(projectRoot, opts); if (timings) timings.manifestMs = Math.round(performance.now() - manifestStart); - const graphOptions = normalizeGraphOptions(opts?.graph); - const strictIncremental = opts?.incrementalStrict ?? false; - if (strictIncremental && graphOptions.fast) graphOptions.fast = false; const manifestUsed = !!manifest; const manifestReport = initManifestReport(report, manifestUsed, false); if (manifestReport && !manifestUsed) manifestReport.reason = "missing"; @@ -1050,24 +1164,15 @@ export async function buildProjectIndexIncremental( } } if (allFiles.size === 0) { - const writeManifestStart = performance.now(); - const lastCommit = await getGitHead(projectRoot); - const configHashResult = await computeConfigHash(projectRoot, opts?.logLevel); - const configHash = recordConfigHashResult(manifestReport, configHashResult, opts?.logLevel); - const manifestData: IndexManifest = { - version: MANIFEST_VERSION, - projectRoot: path.resolve(projectRoot).replace(/\\/g, "/"), - updatedAt: Date.now(), - ...(lastCommit ? { lastCommit } : {}), - ...(configHash ? { configHash } : {}), + await writeIndexManifestSnapshot({ + projectRoot, + opts, graphOptions, - buildOptions: summarizeBuildOptions(opts), files: {}, - }; - await writeManifest(projectRoot, opts, manifestData); - if (timings) { - timings.writeManifestMs = Math.round(performance.now() - writeManifestStart); - } + timings, + manifestReport, + allowEmpty: true, + }); return { graph: { nodes: new Set(), edges: [] }, graphAdjacency: buildGraphAdjacency({ nodes: new Set(), edges: [] }), @@ -1080,14 +1185,20 @@ export async function buildProjectIndexIncremental( parsed: new Map(), }; } - const conc = Math.max(1, Math.min(Number(opts?.threads || 0) || 8, 64)); + const conc = buildConcurrency(opts); const workerSetup = await setupWorkerPool(opts); try { - const fileSignatures = new Map(); const useGitSignatures = gitAvailable; const gitSigMap = useGitSignatures ? await getGitBlobHashes(projectRoot, Array.from(allFiles), { gitAvailable }) : new Map(); + const fileSignatures = await prepareFileSignatures({ + files: Array.from(allFiles), + opts, + gitSigMap, + cacheEnabled, + concurrency: conc, + }); const changedFiles = new Set(); const modules = new Map(); const parsedMap = new Map(); @@ -1105,10 +1216,8 @@ export async function buildProjectIndexIncremental( dependentFilesOfDeletedTracked.forEach(markAsChanged); if (fileReport) fileReport.changed = changedFiles.size; for (const file of allFiles) { - const sigInfo = await fileSignature(file, opts?.cacheStrict, gitSigMap.get(file), { - forceContentHash: cacheEnabled, - }); - fileSignatures.set(file, sigInfo); + const sigInfo = fileSignatures.get(file); + if (!sigInfo) continue; const entry = trackedEntries[file]; const hasMatchingGitSig = !!entry?.gitSig && !!sigInfo.gitSig && entry.gitSig === sigInfo.gitSig; const hasMatchingSig = entry?.sig === sigInfo.sig; @@ -1154,7 +1263,7 @@ export async function buildProjectIndexIncremental( workspaceConfig, workerSetup, parsedMap, - parsedCacheMaxEntries: Math.max(1, opts?.parsedCacheMaxEntries ?? 1024), + parsedCacheMaxEntries: parsedCacheMaxEntries(opts), jsonDependencies, bloomFilterCache, onFallbackImportExtraction, @@ -1229,64 +1338,31 @@ export async function buildProjectIndexIncremental( ...(baseGraph ? { baseGraph } : {}), replaceFiles: new Set(changedFiles), onFileEdges: (file, entry) => { - if (!entry?.sig) return; - manifestEntries.set(file, { - sig: entry.sig, - ...(entry.gitSig ? { gitSig: entry.gitSig } : {}), - ...(entry.sqlCorpusSig ? { sqlCorpusSig: entry.sqlCorpusSig } : {}), - edges: entry.edges, - }); + const manifestEntry = toManifestFileEntry(entry); + if (!manifestEntry) return; + manifestEntries.set(file, manifestEntry); }, }); if (timings) timings.graphMs = Math.round(performance.now() - graphStart); - if (manifestEntries.size > 0) { - const writeManifestStart = performance.now(); - const lastCommit = await getGitHead(projectRoot); - const configHashResult = await computeConfigHash(projectRoot, opts?.logLevel); - const configHash = recordConfigHashResult(manifestReport, configHashResult, opts?.logLevel); - const manifestData: IndexManifest = { - version: MANIFEST_VERSION, - projectRoot: path.resolve(projectRoot).replace(/\\/g, "/"), - updatedAt: Date.now(), - ...(lastCommit ? { lastCommit } : {}), - ...(configHash ? { configHash } : {}), - graphOptions, - buildOptions: summarizeBuildOptions(opts), - files: Object.fromEntries(manifestEntries), - }; - await writeManifest(projectRoot, opts, manifestData); - if (timings) { - timings.writeManifestMs = Math.round(performance.now() - writeManifestStart); - } - } - if (timings) timings.totalMs = Math.round(performance.now() - totalStart); - const projectFiles: ProjectFileInfo[] = await discoverProjectFiles(projectRoot, { - ...(opts?.logLevel ? { logLevel: opts.logLevel } : {}), + await writeIndexManifestSnapshot({ + projectRoot, + opts, + graphOptions, + files: manifestEntries, + timings, + manifestReport, }); - const keepParsed = opts?.keepParsed ?? false; - const maxParsedEntries = Math.max(1, opts?.parsedCacheMaxEntries ?? 1024); - if (!keepParsed) { - parsedMap.clear(); - } else { - while (parsedMap.size > maxParsedEntries) { - const oldest = parsedMap.keys().next().value; - if (!oldest) break; - parsedMap.delete(oldest); - } - } - return { + return finalizeProjectIndex({ + projectRoot, + normalizedProjectRoot, + opts, + timings, + totalStart, graph, - graphAdjacency: buildGraphAdjacency(graph), modules, - byFile: modules, - projectRoot: normalizedProjectRoot, - ...(opts?.native ? { nativeMode: opts.native } : {}), - exportCache: new Map(), - scopeCache: new Map(), - parsed: keepParsed ? parsedMap : undefined, - ...(bloomFilterCache ? { bloomFilters: bloomFilterCache } : {}), - projectFiles, - }; + parsedMap, + bloomFilterCache, + }); } finally { await teardownWorkerPool(workerSetup, report); } diff --git a/src/indexer/declarations.ts b/src/indexer/declarations.ts new file mode 100644 index 00000000..b3456946 --- /dev/null +++ b/src/indexer/declarations.ts @@ -0,0 +1,83 @@ +import type { FileId } from "../types.js"; +import type { LanguageSupport } from "../languages.js"; +import type { SyntaxNodeLike } from "../languages/types.js"; +import { SymbolKind, type ExportEntry, type ProjectIndex, type SymbolDef, type SymbolHandle } from "./types.js"; +import type { BindingKind } from "./scope-types.js"; + +export function declarationKindToBindingKind(kind: string): BindingKind { + if (kind === "function") return "function"; + if (kind === "class" || kind === "interface") return "class"; + if (kind === "type") return "type"; + return "local"; +} + +export function bindingKindToSymbolKind(kind: BindingKind): SymbolKind { + if (kind === "function") return SymbolKind.Function; + if (kind === "class") return SymbolKind.Class; + if (kind === "type") return SymbolKind.TypeAlias; + return SymbolKind.Variable; +} + +export function symbolHandleFromLocal(file: FileId, local: SymbolDef): SymbolHandle { + const index = local.range.start.index ?? 0; + return `${file}::${local.localName}::${index}`; +} + +export function buildTrackedSymbolPositions(locals: readonly SymbolDef[]): Set { + const positions = new Set(); + for (const local of locals) { + positions.add(`${local.range.start.line}:${local.range.start.column}`); + } + return positions; +} + +export function findTrackedDeclarationNameInAncestors( + node: SyntaxNodeLike, + support: LanguageSupport, + trackedPositions?: ReadonlySet, +): SyntaxNodeLike | null { + let current: SyntaxNodeLike | null = node; + while (current) { + for (const child of current.namedChildren || []) { + if (!support.isDeclarationName?.(child)) continue; + if (trackedPositions) { + const line = (child.startPosition?.row ?? 0) + 1; + const column = (child.startPosition?.column ?? 0) + 1; + if (!trackedPositions.has(`${line}:${column}`)) continue; + } + return child; + } + current = current.parent; + } + return null; +} + +export function findLocalByStartPosition( + locals: readonly SymbolDef[], + line: number | undefined, + column: number | undefined, +): SymbolDef | undefined { + if (!line || !column) return undefined; + return locals.find((local) => local.range.start.line === line && local.range.start.column === column); +} + +export function isLocalSymbolExported(exports: readonly ExportEntry[], symbolDef: SymbolDef): boolean { + const symbolIndex = symbolDef.range.start.index ?? 0; + return exports.some( + (entry) => + entry.type === "local" && + entry.target.localName === symbolDef.localName && + (entry.target.range.start.index ?? 0) === symbolIndex, + ); +} + +export function isSymbolHandleExported(exports: readonly ExportEntry[], handle: SymbolHandle): boolean { + return exports.some( + (entry) => entry.type === "local" && symbolHandleFromLocal(entry.target.file, entry.target) === handle, + ); +} + +export function isProjectSymbolExported(index: ProjectIndex, file: FileId, symbolDef: SymbolDef): boolean { + const mod = index.byFile.get(file); + return mod ? isLocalSymbolExported(mod.exports, symbolDef) : false; +} diff --git a/src/indexer/imports.ts b/src/indexer/imports.ts index 67b01a46..801c98a3 100644 --- a/src/indexer/imports.ts +++ b/src/indexer/imports.ts @@ -1,5 +1,3 @@ -import fs from "node:fs"; -import path from "node:path"; import { isJsFallbackAvailable, isJsFallbackUnavailableError, @@ -8,35 +6,14 @@ import { } from "../jsFallback.js"; import { prepareSourceInput } from "../languages/filePrep.js"; import { - parseCsharpUsingDirective, - parseJavaImportStatement, - parseKotlinImportStatement, - parsePhpImportStatement, - parseRustImportStatement, -} from "../languages/importStatementParsers.js"; -import { - getGraphOnlyResolutionExtensions, - getPhpComposerImplicitFiles, loadNearestTsconfigFor, loadWorkspaceConfig, resolveImportSpecifier, - resolvePythonModule, - resolveSpecifier, - sliceText, - stripJsLikeComments, - stripPythonCommentsAndStrings, - unquote, } from "../util.js"; import { logWithLevel, type LogLevel } from "../logging.js"; import { type FallbackImportExtractionEvent, type FallbackImportExtractionReason } from "../graphs/specifiers.js"; import type { GraphBuildOptions } from "../graphs/types.js"; -import { - extractGraphOnlyModuleSpecifiers, - graphOnlyLanguageSupportsImportAliases, - graphOnlySpecifierNeedsResolutionConfig, - isGraphOnlyLanguage, -} from "../documentLinks.js"; -import { capturesByName, capturesNamed, rangeFromNativeCapture } from "../native/queryResults.js"; +import { isGraphOnlyLanguage } from "../documentLinks.js"; import { executeJsQueryAsNativeMatches, isNativeQueryAuthoritative, @@ -44,34 +21,20 @@ import { shouldAvoidJsFallbackForLanguage, type NativeQueryResults, } from "../native/treeSitterNative.js"; -import { parseGoImportAlias } from "./shared.js"; +import type { ResolvedImportTarget } from "./imports/context.js"; +import { collectGraphOnlyImports } from "./imports/graphOnly.js"; +import { collectJsTextImports } from "./imports/jsFallback.js"; +import { + applyStatementImportOverride, + createStatementImportOverrideState, + finalizeLanguageSpecificImports, +} from "./imports/languageSpecific.js"; +import { collectJsQueryCaptureImportBindings, collectNativeCaptureImportBindings } from "./imports/nativeCaptures.js"; +import { collectPythonImportsFromSource } from "./imports/python.js"; import type { LanguageSupport } from "../languages.js"; import type { JsLanguage } from "../languages/types.js"; import type { ImportBinding } from "./types.js"; -type ResolvedImportTarget = Exclude; - -function parseObjectPatternBindings(patternText: string): Array<{ imported: string; local: string }> { - const trimmed = patternText.trim(); - if (!trimmed.startsWith("{") || !trimmed.endsWith("}")) return []; - const body = trimmed.slice(1, -1).trim(); - if (!body) return []; - const parts = body - .split(",") - .map((part) => part.trim()) - .filter(Boolean); - const out: Array<{ imported: string; local: string }> = []; - for (const part of parts) { - const withoutDefault = part.replace(/\s*=\s*.+$/, "").trim(); - const match = withoutDefault.match(/^([A-Za-z_$][\w$]*)(?::\s*([A-Za-z_$][\w$]*))?$/); - if (!match) continue; - const imported = match[1]!; - const local = match[2] ?? imported; - out.push({ imported, local }); - } - return out; -} - export async function collectImportsForFile( file: string, projectRoot: string, @@ -100,42 +63,13 @@ export async function collectImportsForFile( const resolvedSup = sup; let resolvedLang = lang; if (isGraphOnlyLanguage(resolvedSup.id)) { - const entries = Array.from(extractGraphOnlyModuleSpecifiers(resolvedSup.id, resolvedSource)); - const needsGraphOnlyResolutionConfig = - graphOnlyLanguageSupportsImportAliases(resolvedSup.id) && - entries.some(({ spec }) => graphOnlySpecifierNeedsResolutionConfig(spec)); - const { matchPath } = needsGraphOnlyResolutionConfig - ? await loadNearestTsconfigFor(file, opts?.logLevel) - : { matchPath: undefined }; - const workspaceConfig = needsGraphOnlyResolutionConfig ? await loadWorkspaceConfig(projectRoot) : undefined; - const resolutionHints = opts?.graphOptions?.resolutionHints; - const resolvedSpecifiers = await Promise.all( - entries.map((entry) => - resolveSpecifier(file, entry.spec, projectRoot, matchPath, workspaceConfig, { - resolveNodeModules: !!opts?.graphOptions?.resolveNodeModules, - resolutionExtensions: getGraphOnlyResolutionExtensions(resolvedSup.id, entry.resolutionKind ?? "document"), - ...(resolutionHints ? { resolutionHints } : {}), - }), - ), - ); - return entries.flatMap((entry, index) => { - const resolved = resolvedSpecifiers[index]; - if (resolved === undefined) { - throw new Error(`Missing graph-only resolution result for ${resolvedSup.id}:${entry.spec}`); - } - if (typeof resolved !== "string" && entry.dropIfUnresolved) { - return []; - } - const from = entry.raw ?? entry.spec; - return [ - { - kind: "star" as const, - from, - ...(typeof resolved === "string" - ? { resolved: resolved.replace(/\\/g, "/") } - : { resolved: { ...resolved, external: from } }), - }, - ]; + return await collectGraphOnlyImports({ + file, + projectRoot, + source: resolvedSource, + languageId: resolvedSup.id, + ...(opts?.graphOptions ? { graphOptions: opts.graphOptions } : {}), + ...(opts?.logLevel ? { logLevel: opts.logLevel } : {}), }); } @@ -153,437 +87,17 @@ export async function collectImportsForFile( reason, }); }; - const normalizeGoImports = (): void => { - if (resolvedSup.id !== "go" || !imports.length) { - return; - } - const aliasByFrom = new Map(); - const importPattern = /^\s*(?:import\s+)?(?:(?[._A-Za-z][\w]*)\s+)?["'`](?[^"'`]+)["'`]/gm; - for (const match of resolvedSource.matchAll(importPattern)) { - const from = match.groups?.from; - if (!from) continue; - const alias = match.groups?.alias; - if (alias) { - aliasByFrom.set(from, alias); - } - } - - if (aliasByFrom.size === 0) { - return; - } - - const normalized: ImportBinding[] = []; - const seen = new Set(); - for (const imp of imports) { - const alias = aliasByFrom.get(imp.from); - let next: ImportBinding | null = imp; - if (alias === ".") { - next = { - kind: "star", - from: imp.from, - ...(imp.resolved !== undefined ? { resolved: imp.resolved } : {}), - ...(imp.typeOnly ? { typeOnly: imp.typeOnly } : {}), - }; - } else if (alias === "_") { - next = null; - } else if (alias && imp.kind === "namespace") { - next = { - ...imp, - localNS: alias, - }; - } - if (!next) continue; - const key = JSON.stringify(next); - if (seen.has(key)) continue; - seen.add(key); - normalized.push(next); - } - - imports.splice(0, imports.length, ...normalized); - }; - const appendJavaTextImports = async (): Promise => { - if (resolvedSup.id !== "java" || imports.length) { - return; - } - const importPattern = /^\s*import\s+(static\s+)?([A-Za-z_][\w.]*(?:\.\*)?)\s*;/gm; - for (const match of resolvedSource.matchAll(importPattern)) { - const isStatic = !!match[1]; - const rawSpec = match[2]; - if (!rawSpec) continue; - if (rawSpec.endsWith(".*")) { - const resolved = await resolveFrom(isStatic ? rawSpec.slice(0, -2) : rawSpec); - imports.push({ - kind: "star", - from: rawSpec, - resolved, - typeOnly: false, - }); - continue; - } - - const parts = rawSpec.split("."); - const imported = parts[parts.length - 1]; - if (!imported) continue; - const fromValue = isStatic ? parts.slice(0, -1).join(".") : rawSpec; - const resolved = await resolveFrom(fromValue); - imports.push({ - kind: "named", - local: imported, - imported, - from: fromValue, - resolved, - typeOnly: false, - }); - } - }; - const appendKotlinTextImports = async (): Promise => { - if (resolvedSup.id !== "kotlin" || imports.length) { - return; - } - const importPattern = /^\s*import\s+([A-Za-z_][\w.]*(?:\.\*)?)(?:\s+as\s+([A-Za-z_][\w]*))?\s*$/gm; - for (const match of resolvedSource.matchAll(importPattern)) { - const rawSpec = match[1]; - if (!rawSpec) continue; - if (rawSpec.endsWith(".*")) { - const fromValue = rawSpec.slice(0, -2); - const resolved = await resolveFrom(fromValue); - imports.push({ - kind: "star", - from: fromValue, - resolved, - typeOnly: false, - }); - continue; - } - - const parts = rawSpec.split("."); - const imported = parts[parts.length - 1]; - if (!imported) continue; - const resolved = await resolveFrom(rawSpec); - imports.push({ - kind: "named", - local: match[2] ?? imported, - imported, - from: rawSpec, - resolved, - typeOnly: false, - }); - } - }; - const appendPhpComposerImplicitImports = async (): Promise => { - if (resolvedSup.id !== "php") { - return; - } - - const implicitFiles = await getPhpComposerImplicitFiles(projectRoot, file); - const seenResolved = new Set( - imports - .map((entry) => (typeof entry.resolved === "string" ? entry.resolved : null)) - .filter((entry): entry is string => !!entry), - ); - - for (const implicitFile of implicitFiles) { - const normalizedResolved = implicitFile.replace(/\\/g, "/"); - if (normalizedResolved === file.replace(/\\/g, "/")) { - continue; - } - if (seenResolved.has(normalizedResolved)) { - continue; - } - - const relativeFrom = path.relative(path.dirname(file), implicitFile).replace(/\\/g, "/"); - const from = relativeFrom.startsWith(".") || relativeFrom.startsWith("/") ? relativeFrom : `./${relativeFrom}`; - imports.push({ - kind: "star", - from, - resolved: normalizedResolved, - mechanism: "php", - }); - seenResolved.add(normalizedResolved); - } - }; - const finalizeLanguageSpecificImports = async (): Promise => { - normalizeGoImports(); - await appendJavaTextImports(); - await appendKotlinTextImports(); - await appendPhpComposerImplicitImports(); - }; - const handledStatementImports = new Set(); - const applyStatementImportOverride = async (stmtText: string, typeOnly: boolean): Promise => { - const normalizedStmt = stmtText.trim(); - if (!normalizedStmt) return false; - - if (resolvedSup.id === "csharp") { - const parsed = parseCsharpUsingDirective(normalizedStmt); - if (!parsed) return false; - if (handledStatementImports.has(normalizedStmt)) return true; - handledStatementImports.add(normalizedStmt); - - let fromValue = parsed.from; - let resolved = await resolveFrom(fromValue); - if (parsed.alias) { - const fromParts = parsed.from.split("."); - const imported = fromParts[fromParts.length - 1] ?? parsed.alias; - if (typeof resolved !== "string" && fromParts.length > 1) { - const fallbackFrom = fromParts.slice(0, -1).join("."); - if (fallbackFrom) { - const fallbackResolved = await resolveFrom(fallbackFrom); - if (typeof fallbackResolved === "string") { - fromValue = fallbackFrom; - resolved = fallbackResolved; - } - } - } - imports.push({ - kind: "named", - local: parsed.alias, - imported, - from: fromValue, - resolved, - typeOnly, - }); - } else { - imports.push({ - kind: "star", - from: fromValue, - resolved, - typeOnly, - }); - } - return true; - } - - if (resolvedSup.id === "java") { - const parsed = parseJavaImportStatement(normalizedStmt); - if (!parsed) return false; - if (handledStatementImports.has(normalizedStmt)) return true; - handledStatementImports.add(normalizedStmt); - - const resolved = await resolveFrom(parsed.from); - if (parsed.kind === "star") { - imports.push({ - kind: "star", - from: parsed.from, - resolved, - typeOnly, - }); - } else { - imports.push({ - kind: "named", - local: parsed.imported, - imported: parsed.imported, - from: parsed.from, - resolved, - typeOnly, - }); - } - return true; - } - - if (resolvedSup.id === "kotlin") { - const parsed = parseKotlinImportStatement(normalizedStmt); - if (!parsed) return false; - if (handledStatementImports.has(normalizedStmt)) return true; - handledStatementImports.add(normalizedStmt); - - const resolved = await resolveFrom(parsed.from); - if (parsed.kind === "star") { - imports.push({ - kind: "star", - from: parsed.from, - resolved, - typeOnly, - }); - } else { - imports.push({ - kind: "named", - local: parsed.local, - imported: parsed.imported, - from: parsed.from, - resolved, - typeOnly, - }); - } - return true; - } - - if (resolvedSup.id === "rust") { - const parsed = parseRustImportStatement(normalizedStmt); - if (!parsed) return false; - if (handledStatementImports.has(normalizedStmt)) return true; - handledStatementImports.add(normalizedStmt); - - const resolved = await resolveFrom(parsed.from); - if (parsed.kind === "member") { - imports.push({ - kind: "named", - local: parsed.local, - imported: parsed.imported, - from: parsed.from, - resolved, - typeOnly, - }); - } else if (parsed.kind === "module") { - imports.push({ - kind: "namespace", - localNS: parsed.local, - from: parsed.from, - resolved, - typeOnly, - }); - } else { - imports.push({ - kind: "star", - from: parsed.from, - resolved, - typeOnly, - }); - } - return true; - } - - if (resolvedSup.id === "php") { - const parsed = parsePhpImportStatement(normalizedStmt, file); - if (!parsed.length) return false; - if (handledStatementImports.has(normalizedStmt)) return true; - handledStatementImports.add(normalizedStmt); - - for (const entry of parsed) { - if (entry.kind === "include") { - const resolved = await resolveFrom(entry.from); - imports.push({ - kind: "star", - from: entry.from, - resolved, - typeOnly, - mechanism: "php", - }); - continue; - } - const resolved = await resolveFrom(entry.from, entry.importType); - imports.push({ - kind: "named", - local: entry.local, - imported: entry.imported, - from: entry.from, - phpImportType: entry.importType, - resolved, - typeOnly, - mechanism: "php", - }); - } - return true; - } - - return false; - }; if (resolvedSup.id === "python") { - const pySrc = stripPythonCommentsAndStrings(resolvedSource); - const pushStar = async (moduleSpec: string) => { - const m = moduleSpec.match(/^(\.+)(.*)$/); - const relDots = m ? m[1]!.length : 0; - const mod = m ? m[2] || null : moduleSpec; - const resolved = await resolvePythonModule(projectRoot, file, mod, relDots); - imports.push({ - kind: "star", - from: moduleSpec, - resolved, - mechanism: "python", - }); - }; - const pushNamed = async (moduleSpec: string, imported: string, local: string) => { - const m = moduleSpec.match(/^(\.+)(.*)$/); - const relDots = m ? m[1]!.length : 0; - const mod = m ? m[2] || null : moduleSpec; - const resolved = await resolvePythonModule(projectRoot, file, mod, relDots); - let nsResolved: string | undefined; - if (typeof resolved === "string") { - let baseDir = resolved; - try { - const st = fs.statSync(baseDir); - if (!st.isDirectory() && baseDir.toLowerCase().endsWith("__init__.py")) baseDir = path.dirname(baseDir); - } catch { - /* stat failed */ - } - const sub = [ - path.join(baseDir, `${imported}.py`), - path.join(baseDir, imported, "__init__.py"), - path.join(baseDir, imported), - ]; - for (const c of sub) { - try { - if (fs.existsSync(c)) { - nsResolved = c.replace(/\\/g, "/"); - break; - } - } catch { - /* existsSync/stat: ignore */ - } - } - } - if (nsResolved) { - imports.push({ - kind: "namespace", - localNS: local, - from: moduleSpec, - resolved: nsResolved, - mechanism: "python", - }); - } else { - imports.push({ - kind: "named", - local, - imported, - from: moduleSpec, - resolved, - mechanism: "python", - }); - } - }; - const pushDefault = async (dotted: string, local: string) => { - const resolved = await resolvePythonModule(projectRoot, file, dotted, 0); - imports.push({ - kind: "namespace", - localNS: local, - from: dotted, - resolved, - mechanism: "python", - }); - }; - - const reFromLine = /^\s*from\s+([^\s]+)\s+import\s+([^\n#]+)/gm; - for (const m of pySrc.matchAll(reFromLine)) { - const mod = m[1]!.trim(); - const items = m[2]!.split(",").map((s) => s.trim()); - for (const it of items) { - if (it === "*") { - await pushStar(mod); - continue; - } - const am = it.match(/^([A-Za-z_][\w_]*)(?:\s+as\s+([A-Za-z_][\w_]*))?$/); - if (am) { - const imported = am[1]!; - const local = am[2] ?? imported; - await pushNamed(mod, imported, local); - } - } - } - const reImp = /^(?:\s*)import\s+([A-Za-z_][\w.]*)\s*(?:as\s+([A-Za-z_][\w_]*))?/gm; - for (const m of pySrc.matchAll(reImp)) { - const dotted = m[1]!; - const local = (m[2] ?? dotted.split(".")[0]) as string; - await pushDefault(dotted, local); - } + await collectPythonImportsFromSource({ + file, + projectRoot, + source: resolvedSource, + pushBinding: (binding) => imports.push(binding), + }); return imports; } - let key: "py" | "js" | "ts" = "ts"; - if (resolvedSup.id === "python") { - key = "py"; - } else if (resolvedSup.id === "js") { - key = "js"; - } const tsCfg = resolvedSup.id === "ts" || resolvedSup.id === "tsx" ? await loadNearestTsconfigFor(file, opts?.logLevel) @@ -612,189 +126,33 @@ export async function collectImportsForFile( resolvedImportCache.set(cacheKey, resolved); return await resolved; }; - const appendImplicitImportBinding = (args: { - from: string; - resolved: ResolvedImportTarget; - typeOnly: boolean; - stmtText: string; - alias?: string; - wildcard?: boolean; - }): void => { - const { from, resolved, typeOnly, stmtText, alias, wildcard } = args; - if (resolvedSup.id === "java") { - const parts = from.split("."); - const last = parts[parts.length - 1]; - if (last === "*") { - imports.push({ kind: "star", from, resolved, typeOnly }); - } else if (last && /^[A-Z]/.test(last)) { - imports.push({ kind: "named", local: last, imported: last, from, resolved, typeOnly }); - } - } else if (resolvedSup.id === "csharp") { - if (alias) { - const fromParts = from.split("."); - const imported = fromParts[fromParts.length - 1] ?? alias; - imports.push({ kind: "named", local: alias, imported, from, resolved, typeOnly }); - } else { - imports.push({ kind: "star", from, resolved, typeOnly }); - } - } else if (resolvedSup.id === "ruby") { - imports.push({ kind: "star", from, resolved }); - } else if (resolvedSup.id === "go") { - if (alias) { - if (alias === "_") return; - if (alias === ".") { - imports.push({ kind: "star", from, resolved }); - return; - } - imports.push({ kind: "namespace", localNS: alias, from, resolved }); - } else { - const parts = from.replace(/"/g, "").split("/"); - const last = parts[parts.length - 1]; - if (last) imports.push({ kind: "namespace", localNS: last, from, resolved }); - } - } else if (resolvedSup.id === "rust") { - if (stmtText.startsWith("mod ")) { - imports.push({ kind: "namespace", localNS: from, from, resolved }); - } else { - const parts = from.split("::"); - const last = parts[parts.length - 1]; - if (!last) return; - if (last === "*") { - imports.push({ kind: "star", from, resolved }); - } else { - imports.push({ kind: "named", local: last, imported: last, from, resolved }); - } - } - } else if (resolvedSup.id === "kotlin") { - if (wildcard || from.endsWith(".*")) { - imports.push({ kind: "star", from, resolved, typeOnly }); - } else { - const parts = from.split("."); - const imported = parts[parts.length - 1]; - if (imported) imports.push({ kind: "named", local: alias ?? imported, imported, from, resolved, typeOnly }); - } - } else if (resolvedSup.id === "swift") { - const parts = from.split("."); - const last = parts[parts.length - 1]; - if (!last) return; - if (parts.length === 1) { - imports.push({ kind: "namespace", localNS: last, from, resolved, typeOnly }); - imports.push({ kind: "star", from, resolved, typeOnly }); - } else { - imports.push({ kind: "named", local: last, imported: last, from, resolved, typeOnly }); - } - } else if (resolvedSup.id === "zig") { - if (alias) imports.push({ kind: "namespace", localNS: alias, from, resolved, typeOnly }); - } else if (resolvedSup.id === "c" || resolvedSup.id === "cpp") { - imports.push({ kind: "star", from, resolved, typeOnly }); - } + const languageContext = { + file, + projectRoot, + source: resolvedSource, + languageId: resolvedSup.id, + resolveFrom, + pushBinding: (binding: ImportBinding) => imports.push(binding), + getBindings: () => imports, + replaceBindings: (bindings: ImportBinding[]) => imports.splice(0, imports.length, ...bindings), + }; + const statementOverrideState = createStatementImportOverrideState(); + + const finalizeImports = async (): Promise => { + await finalizeLanguageSpecificImports(languageContext); + }; + + const applyStatementOverride = async (stmtText: string, typeOnly: boolean): Promise => { + return await applyStatementImportOverride(languageContext, statementOverrideState, stmtText, typeOnly); }; const runFallback = async () => { - const src = - resolvedSup.id === "ts" || resolvedSup.id === "tsx" || resolvedSup.id === "js" - ? stripJsLikeComments(resolvedSource) - : resolvedSource; - const typeOnlyImport = /\bimport\s+type\b/; - const reFrom = /^\s*import\s+([^\n;]*?)\s+from\s+(["'])(?[^"']+)\2/gm; - for (const m of src.matchAll(reFrom)) { - const clause = m[1]!.trim(); - const mod = m.groups?.m as string; - const typeOnly = typeOnlyImport.test(m[0]); - const resolved = await resolveFrom(mod); - const ns = clause.match(/^\*\s+as\s+([A-Za-z_$][\w$]*)$/); - if (ns) { - imports.push({ - kind: "namespace", - localNS: ns[1]!, - from: mod, - resolved, - typeOnly, - }); - continue; - } - const parts = clause.split(","); - if (parts.length) { - const first = parts[0]!.trim(); - if (first && !first.startsWith("{")) - imports.push({ - kind: "default", - local: first, - from: mod, - resolved, - typeOnly, - }); - const namedBlock = parts.slice(1).join(",").trim() || (first.startsWith("{") ? first : ""); - const names = namedBlock - .replace(/[{}]/g, "") - .split(",") - .map((s) => s.trim()) - .filter(Boolean); - for (const spec of names) { - const nm = spec.match(/^([A-Za-z_$][\w$]*)(?:\s+as\s+([A-Za-z_$][\w$]*))?$/); - if (!nm) continue; - const imported = nm[1]!; - const local = nm[2] ?? imported; - imports.push({ - kind: "named", - local, - imported, - from: mod, - resolved, - typeOnly, - }); - } - } - } - const reReqDefault = /\b(?:const|let|var)\s+([A-Za-z_$][\w$]*)\s*=\s*require\(\s*(["'])(?[^"']+)\2\s*\)/g; - for (const m of src.matchAll(reReqDefault)) { - const local = m[1]!; - const mod = m.groups?.m as string; - const resolved = await resolveFrom(mod); - imports.push({ - kind: "default", - local, - from: mod, - resolved, - mechanism: "cjs", - }); - } - const reReqNamed = /\b(?:const|let|var)\s*\{([^}]+)\}\s*=\s*require\(\s*(["'])(?[^"']+)\2\s*\)/g; - for (const m of src.matchAll(reReqNamed)) { - const specs = m[1]! - .split(",") - .map((s) => s.trim()) - .filter(Boolean); - const mod = m.groups?.m as string; - const resolved = await resolveFrom(mod); - for (const spec of specs) { - const nm = spec.match(/^([A-Za-z_$][\w$]*)(?::\s*([A-Za-z_$][\w$]*))?$/); - if (!nm) continue; - const imported = nm[1]!; - const local = nm[2] ?? imported; - imports.push({ - kind: "named", - local, - imported, - from: mod, - resolved, - mechanism: "cjs", - }); - } - } - const reImportEquals = /\bimport\s+([A-Za-z_$][\w$]*)\s*=\s*require\(\s*(["'])(?[^"']+)\2\s*\)/g; - for (const m of src.matchAll(reImportEquals)) { - const local = m[1]!; - const mod = m.groups?.m as string; - const resolved = await resolveFrom(mod); - imports.push({ - kind: "default", - local, - from: mod, - resolved, - mechanism: "cjs", - }); - } + await collectJsTextImports({ + source: resolvedSource, + languageId: resolvedSup.id, + resolveFrom, + pushBinding: (binding) => imports.push(binding), + }); }; const shouldUseTextImportRecoveryOnly = shouldAvoidJsFallbackForLanguage(resolvedSup.id); @@ -806,7 +164,7 @@ export async function collectImportsForFile( if (hasPotentialTextImportRecovery) { await runFallback(); } - await finalizeLanguageSpecificImports(); + await finalizeImports(); if (imports.length > importCountBeforeFallback && !isJsFallbackAvailable()) { reportFallback("js-fallback-unavailable"); } @@ -815,99 +173,19 @@ export async function collectImportsForFile( if (resolvedNativeQueries) { try { - for (const match of resolvedNativeQueries.importBindings) { - const caps = capturesByName(match); - const stmtText = caps["stmt"]?.text ?? ""; - const typeOnly = resolvedSup.isTypeOnly(stmtText); - if (await applyStatementImportOverride(stmtText, typeOnly)) { - continue; - } - const from = caps["from"] ? unquote(caps["from"].text) : undefined; - const patterns = capturesNamed(match, "pattern"); - - for (const pattern of patterns) { - if (pattern.nodeType !== "object_pattern" || !from) continue; - const resolved = await resolveFrom(from); - for (const binding of parseObjectPatternBindings(pattern.text)) { - imports.push({ - kind: "named", - local: binding.local, - imported: binding.imported, - from, - resolved, - typeOnly, - }); - } - } - - if (!from) continue; - const resolved = await resolveFrom(from); - if (caps["def"]) { - imports.push({ - kind: "default", - local: caps["def"].text, - from, - resolved, - typeOnly, - }); - } - if (caps["ns"]) { - if (resolvedSup.id === "go") { - const alias = parseGoImportAlias(stmtText); - if (alias === ".") { - imports.push({ - kind: "star", - from, - resolved, - typeOnly, - }); - } else if (alias !== "_") { - imports.push({ - kind: "namespace", - localNS: alias ?? caps["ns"].text, - from, - resolved, - typeOnly, - }); - } - } else { - imports.push({ - kind: "namespace", - localNS: caps["ns"].text, - from, - resolved, - typeOnly, - }); - } - } - - const inames = capturesNamed(match, "iname"); - const aliases = capturesNamed(match, "alias"); - for (let i = 0; i < inames.length; i++) { - const imported = inames[i]!.text; - const alias = aliases[i]?.text ?? imported; - imports.push({ - kind: "named", - local: alias, - imported, - from, - resolved, - typeOnly, - }); - } - - if (!caps["def"] && !caps["ns"] && !inames.length && !patterns.length) { - appendImplicitImportBinding({ - from, - resolved, - typeOnly, - stmtText, - ...(caps["alias"]?.text ? { alias: caps["alias"].text } : {}), - ...(caps["wild"] ? { wildcard: true } : {}), - }); - } - } - await finalizeLanguageSpecificImports(); + await collectNativeCaptureImportBindings( + { + source: resolvedSource, + languageId: resolvedSup.id, + isTypeOnly: (stmtText) => resolvedSup.isTypeOnly(stmtText), + resolveFrom, + pushBinding: (binding) => imports.push(binding), + languageContext, + applyStatementOverride, + }, + resolvedNativeQueries.importBindings, + ); + await finalizeImports(); // Native succeeded -- treat the result as authoritative even if empty, // but only when the importBindings query was not modified by // normalization. Languages whose importBindings query is normalized @@ -934,7 +212,7 @@ export async function collectImportsForFile( `JS fallback unavailable for ${resolvedSup.id} import-binding recovery; using regex import extraction.`, ); await runFallback(); - await finalizeLanguageSpecificImports(); + await finalizeImports(); return imports; } throw error; @@ -948,128 +226,19 @@ export async function collectImportsForFile( resolvedSup.queries.importBindings, tree, ); - for (const match of matches) { - const caps = Object.fromEntries(match.captures.map((capture) => [capture.name, capture] as const)); - const stmtText = caps["stmt"]?.text ?? ""; - const typeOnly = resolvedSup.isTypeOnly(stmtText); - if (await applyStatementImportOverride(stmtText, typeOnly)) { - continue; - } - const from: string | undefined = caps["from"] ? unquote(caps["from"].text) : undefined; - - const patterns = match.captures.filter((capture) => capture.name === "pattern"); - for (const pattern of patterns) { - const patternRange = rangeFromNativeCapture(pattern); - const patternNode = tree.rootNode.descendantForIndex( - patternRange.start.index ?? 0, - patternRange.end.index ?? 0, - ); - if (patternNode.type === "object_pattern" && from) { - for (const child of patternNode.namedChildren) { - if ( - child.type === "shorthand_property_identifier" || - child.type === "shorthand_property_identifier_pattern" - ) { - const name = sliceText(child, source); - const resolved = await resolveFrom(from); - imports.push({ - kind: "named", - local: name, - imported: name, - from, - resolved, - typeOnly, - }); - } else if (child.type === "pair_pattern") { - const key = child.childForFieldName("key"); - const value = child.childForFieldName("value"); - if (key && value && key.type === "property_identifier" && value.type === "identifier") { - const imported = sliceText(key, source); - const local = sliceText(value, source); - const resolved = await resolveFrom(from); - imports.push({ - kind: "named", - local, - imported, - from, - resolved, - typeOnly, - }); - } - } - } - } - } - - if (!from) continue; - const fromValue = from; - const resolved = await resolveFrom(fromValue); - if (caps["def"]) { - imports.push({ - kind: "default", - local: caps["def"].text, - from: fromValue, - resolved, - typeOnly, - }); - } - if (caps["ns"]) { - const nsName = caps["ns"].text; - if (resolvedSup.id === "go") { - const alias = parseGoImportAlias(stmtText); - if (alias === ".") { - imports.push({ - kind: "star", - from: fromValue, - resolved, - typeOnly, - }); - } else if (alias !== "_") { - imports.push({ - kind: "namespace", - localNS: alias ?? nsName, - from: fromValue, - resolved, - typeOnly, - }); - } - } else { - imports.push({ - kind: "namespace", - localNS: nsName, - from: fromValue, - resolved, - typeOnly, - }); - } - } - const inames = match.captures.filter((capture) => capture.name === "iname"); - const aliases = match.captures.filter((capture) => capture.name === "alias"); - for (let i = 0; i < inames.length; i++) { - const imported = inames[i]!.text; - const alias = aliases[i]?.text ?? imported; - imports.push({ - kind: "named", - local: alias, - imported, - from: fromValue, - resolved, - typeOnly, - }); - } - - // Heuristics for languages where we captured @from but no explicit bindings - if (fromValue && !caps["def"] && !caps["ns"] && !inames.length && !patterns.length) { - appendImplicitImportBinding({ - from: fromValue, - resolved, - typeOnly, - stmtText, - ...(caps["alias"]?.text ? { alias: caps["alias"].text } : {}), - ...(caps["wild"] ? { wildcard: true } : {}), - }); - } - } + await collectJsQueryCaptureImportBindings( + { + source: resolvedSource, + languageId: resolvedSup.id, + isTypeOnly: (stmtText) => resolvedSup.isTypeOnly(stmtText), + resolveFrom, + pushBinding: (binding) => imports.push(binding), + languageContext, + applyStatementOverride, + }, + matches, + tree, + ); } catch (error) { if (isNativeRequiredUnavailableError(error)) throw error; if (isJsFallbackUnavailableError(error)) { @@ -1078,11 +247,11 @@ export async function collectImportsForFile( await runFallback(); ranFallback = true; } - await finalizeLanguageSpecificImports(); + await finalizeImports(); // Only run fallback when query path produced no results if (!ranFallback && !imports.length) { await runFallback(); - await finalizeLanguageSpecificImports(); + await finalizeImports(); } return imports; } finally { diff --git a/src/indexer/imports/context.ts b/src/indexer/imports/context.ts new file mode 100644 index 00000000..b0b07c13 --- /dev/null +++ b/src/indexer/imports/context.ts @@ -0,0 +1,12 @@ +import type { ImportBinding } from "../types.js"; + +export type ResolvedImportTarget = Exclude; + +export type ImportResolver = ( + from: string, + phpImportType?: "class" | "function" | "const", +) => Promise; + +export type ImportBindingSink = { + pushBinding: (binding: ImportBinding) => void; +}; diff --git a/src/indexer/imports/graphOnly.ts b/src/indexer/imports/graphOnly.ts new file mode 100644 index 00000000..25694a09 --- /dev/null +++ b/src/indexer/imports/graphOnly.ts @@ -0,0 +1,64 @@ +import { + extractGraphOnlyModuleSpecifiers, + graphOnlyLanguageSupportsImportAliases, + graphOnlySpecifierNeedsResolutionConfig, +} from "../../documentLinks.js"; +import type { GraphBuildOptions } from "../../graphs/types.js"; +import type { LogLevel } from "../../logging.js"; +import { getGraphOnlyResolutionExtensions, loadNearestTsconfigFor, loadWorkspaceConfig, resolveSpecifier } from "../../util.js"; +import type { ImportBinding } from "../types.js"; + +export type GraphOnlyImportExtractionContext = { + file: string; + projectRoot: string; + source: string; + languageId: string; + graphOptions?: GraphBuildOptions; + logLevel?: LogLevel; +}; + +export async function collectGraphOnlyImports(context: GraphOnlyImportExtractionContext): Promise { + const entries = Array.from(extractGraphOnlyModuleSpecifiers(context.languageId, context.source)); + const needsResolutionConfig = + graphOnlyLanguageSupportsImportAliases(context.languageId) && + entries.some(({ spec }) => graphOnlySpecifierNeedsResolutionConfig(spec)); + + let matchPath: Awaited>["matchPath"] | undefined; + let workspaceConfig: Awaited> | undefined; + if (needsResolutionConfig) { + const tsconfig = await loadNearestTsconfigFor(context.file, context.logLevel); + matchPath = tsconfig.matchPath; + workspaceConfig = await loadWorkspaceConfig(context.projectRoot); + } + + const resolutionHints = context.graphOptions?.resolutionHints; + const resolvedSpecifiers = await Promise.all( + entries.map((entry) => + resolveSpecifier(context.file, entry.spec, context.projectRoot, matchPath, workspaceConfig, { + resolveNodeModules: !!context.graphOptions?.resolveNodeModules, + resolutionExtensions: getGraphOnlyResolutionExtensions(context.languageId, entry.resolutionKind ?? "document"), + ...(resolutionHints ? { resolutionHints } : {}), + }), + ), + ); + + const bindings: ImportBinding[] = []; + entries.forEach((entry, index) => { + const resolved = resolvedSpecifiers[index]; + if (resolved === undefined) { + throw new Error(`Missing graph-only resolution result for ${context.languageId}:${entry.spec}`); + } + if (typeof resolved !== "string" && entry.dropIfUnresolved) { + return; + } + + const from = entry.raw ?? entry.spec; + const normalizedResolved = typeof resolved === "string" ? resolved.replace(/\\/g, "/") : { ...resolved, external: from }; + bindings.push({ + kind: "star", + from, + resolved: normalizedResolved, + }); + }); + return bindings; +} diff --git a/src/indexer/imports/jsFallback.ts b/src/indexer/imports/jsFallback.ts new file mode 100644 index 00000000..b8242372 --- /dev/null +++ b/src/indexer/imports/jsFallback.ts @@ -0,0 +1,138 @@ +import { stripJsLikeComments } from "../../util.js"; +import type { ImportBindingSink, ImportResolver } from "./context.js"; + +export type JsTextImportExtractionContext = ImportBindingSink & { + source: string; + languageId: string; + resolveFrom: ImportResolver; +}; + +function sourceForTextImportExtraction(context: JsTextImportExtractionContext): string { + if (context.languageId === "ts" || context.languageId === "tsx" || context.languageId === "js") { + return stripJsLikeComments(context.source); + } + return context.source; +} + +function splitNamedImports(namedBlock: string): string[] { + return namedBlock + .replace(/[{}]/g, "") + .split(",") + .map((spec) => spec.trim()) + .filter(Boolean); +} + +async function collectEsImports(context: JsTextImportExtractionContext, source: string): Promise { + const typeOnlyImport = /\bimport\s+type\b/; + const fromPattern = /^\s*import\s+([^\n;]*?)\s+from\s+(["'])(?[^"']+)\2/gm; + for (const match of source.matchAll(fromPattern)) { + const clause = match[1]!.trim(); + const moduleSpecifier = match.groups?.module; + if (!moduleSpecifier) continue; + const typeOnly = typeOnlyImport.test(match[0]); + const resolved = await context.resolveFrom(moduleSpecifier); + const namespaceMatch = clause.match(/^\*\s+as\s+([A-Za-z_$][\w$]*)$/); + if (namespaceMatch) { + context.pushBinding({ + kind: "namespace", + localNS: namespaceMatch[1]!, + from: moduleSpecifier, + resolved, + typeOnly, + }); + continue; + } + + const parts = clause.split(","); + if (!parts.length) continue; + const first = parts[0]!.trim(); + if (first && !first.startsWith("{")) { + context.pushBinding({ + kind: "default", + local: first, + from: moduleSpecifier, + resolved, + typeOnly, + }); + } + const namedBlock = parts.slice(1).join(",").trim() || (first.startsWith("{") ? first : ""); + for (const spec of splitNamedImports(namedBlock)) { + const namedMatch = spec.match(/^([A-Za-z_$][\w$]*)(?:\s+as\s+([A-Za-z_$][\w$]*))?$/); + if (!namedMatch) continue; + const imported = namedMatch[1]!; + const local = namedMatch[2] ?? imported; + context.pushBinding({ + kind: "named", + local, + imported, + from: moduleSpecifier, + resolved, + typeOnly, + }); + } + } +} + +async function collectCommonJsImports(context: JsTextImportExtractionContext, source: string): Promise { + const defaultRequirePattern = + /\b(?:const|let|var)\s+([A-Za-z_$][\w$]*)\s*=\s*require\(\s*(["'])(?[^"']+)\2\s*\)/g; + for (const match of source.matchAll(defaultRequirePattern)) { + const local = match[1]!; + const moduleSpecifier = match.groups?.module; + if (!moduleSpecifier) continue; + const resolved = await context.resolveFrom(moduleSpecifier); + context.pushBinding({ + kind: "default", + local, + from: moduleSpecifier, + resolved, + mechanism: "cjs", + }); + } + + const namedRequirePattern = /\b(?:const|let|var)\s*\{([^}]+)\}\s*=\s*require\(\s*(["'])(?[^"']+)\2\s*\)/g; + for (const match of source.matchAll(namedRequirePattern)) { + const specs = match[1]! + .split(",") + .map((spec) => spec.trim()) + .filter(Boolean); + const moduleSpecifier = match.groups?.module; + if (!moduleSpecifier) continue; + const resolved = await context.resolveFrom(moduleSpecifier); + for (const spec of specs) { + const namedMatch = spec.match(/^([A-Za-z_$][\w$]*)(?::\s*([A-Za-z_$][\w$]*))?$/); + if (!namedMatch) continue; + const imported = namedMatch[1]!; + const local = namedMatch[2] ?? imported; + context.pushBinding({ + kind: "named", + local, + imported, + from: moduleSpecifier, + resolved, + mechanism: "cjs", + }); + } + } + + const importEqualsPattern = /\bimport\s+([A-Za-z_$][\w$]*)\s*=\s*require\(\s*(["'])(?[^"']+)\2\s*\)/g; + for (const match of source.matchAll(importEqualsPattern)) { + const local = match[1]!; + const moduleSpecifier = match.groups?.module; + if (!moduleSpecifier) continue; + const resolved = await context.resolveFrom(moduleSpecifier); + context.pushBinding({ + kind: "default", + local, + from: moduleSpecifier, + resolved, + mechanism: "cjs", + }); + } +} + +export async function collectJsTextImports(context: JsTextImportExtractionContext): Promise { + const source = sourceForTextImportExtraction(context); + await collectEsImports(context, source); + await collectCommonJsImports(context, source); +} diff --git a/src/indexer/imports/languageSpecific.ts b/src/indexer/imports/languageSpecific.ts new file mode 100644 index 00000000..befd72a8 --- /dev/null +++ b/src/indexer/imports/languageSpecific.ts @@ -0,0 +1,489 @@ +import path from "node:path"; +import { + parseCsharpUsingDirective, + parseJavaImportStatement, + parseKotlinImportStatement, + parsePhpImportStatement, + parseRustImportStatement, +} from "../../languages/importStatementParsers.js"; +import { getPhpComposerImplicitFiles } from "../../util.js"; +import type { ImportBinding } from "../types.js"; +import type { ImportBindingSink, ImportResolver, ResolvedImportTarget } from "./context.js"; + +export type LanguageSpecificImportContext = ImportBindingSink & { + file: string; + projectRoot: string; + source: string; + languageId: string; + resolveFrom: ImportResolver; + getBindings: () => ImportBinding[]; + replaceBindings: (bindings: ImportBinding[]) => void; +}; + +export type StatementImportOverrideState = { + handledStatements: Set; +}; + +export function createStatementImportOverrideState(): StatementImportOverrideState { + return { handledStatements: new Set() }; +} + +function normalizeGoImports(context: LanguageSpecificImportContext): void { + const imports = context.getBindings(); + if (context.languageId !== "go" || !imports.length) { + return; + } + const aliasByFrom = new Map(); + const importPattern = /^\s*(?:import\s+)?(?:(?[._A-Za-z][\w]*)\s+)?["'`](?[^"'`]+)["'`]/gm; + for (const match of context.source.matchAll(importPattern)) { + const from = match.groups?.from; + if (!from) continue; + const alias = match.groups?.alias; + if (alias) { + aliasByFrom.set(from, alias); + } + } + + if (!aliasByFrom.size) { + return; + } + + const normalized: ImportBinding[] = []; + const seen = new Set(); + for (const imp of imports) { + const alias = aliasByFrom.get(imp.from); + let next: ImportBinding | null = imp; + if (alias === ".") { + next = { + kind: "star", + from: imp.from, + ...(imp.resolved !== undefined ? { resolved: imp.resolved } : {}), + ...(imp.typeOnly ? { typeOnly: imp.typeOnly } : {}), + }; + } else if (alias === "_") { + next = null; + } else if (alias && imp.kind === "namespace") { + next = { + ...imp, + localNS: alias, + }; + } + if (!next) continue; + const key = JSON.stringify(next); + if (seen.has(key)) continue; + seen.add(key); + normalized.push(next); + } + + context.replaceBindings(normalized); +} + +async function appendJavaTextImports(context: LanguageSpecificImportContext): Promise { + if (context.languageId !== "java" || context.getBindings().length) { + return; + } + const importPattern = /^\s*import\s+(static\s+)?([A-Za-z_][\w.]*(?:\.\*)?)\s*;/gm; + for (const match of context.source.matchAll(importPattern)) { + const isStatic = !!match[1]; + const rawSpec = match[2]; + if (!rawSpec) continue; + if (rawSpec.endsWith(".*")) { + const resolved = await context.resolveFrom(isStatic ? rawSpec.slice(0, -2) : rawSpec); + context.pushBinding({ + kind: "star", + from: rawSpec, + resolved, + typeOnly: false, + }); + continue; + } + + const parts = rawSpec.split("."); + const imported = parts[parts.length - 1]; + if (!imported) continue; + const fromValue = isStatic ? parts.slice(0, -1).join(".") : rawSpec; + const resolved = await context.resolveFrom(fromValue); + context.pushBinding({ + kind: "named", + local: imported, + imported, + from: fromValue, + resolved, + typeOnly: false, + }); + } +} + +async function appendKotlinTextImports(context: LanguageSpecificImportContext): Promise { + if (context.languageId !== "kotlin" || context.getBindings().length) { + return; + } + const importPattern = /^\s*import\s+([A-Za-z_][\w.]*(?:\.\*)?)(?:\s+as\s+([A-Za-z_][\w]*))?\s*$/gm; + for (const match of context.source.matchAll(importPattern)) { + const rawSpec = match[1]; + if (!rawSpec) continue; + if (rawSpec.endsWith(".*")) { + const fromValue = rawSpec.slice(0, -2); + const resolved = await context.resolveFrom(fromValue); + context.pushBinding({ + kind: "star", + from: fromValue, + resolved, + typeOnly: false, + }); + continue; + } + + const parts = rawSpec.split("."); + const imported = parts[parts.length - 1]; + if (!imported) continue; + const resolved = await context.resolveFrom(rawSpec); + context.pushBinding({ + kind: "named", + local: match[2] ?? imported, + imported, + from: rawSpec, + resolved, + typeOnly: false, + }); + } +} + +async function appendPhpComposerImplicitImports(context: LanguageSpecificImportContext): Promise { + if (context.languageId !== "php") { + return; + } + + const implicitFiles = await getPhpComposerImplicitFiles(context.projectRoot, context.file); + const seenResolved = new Set( + context + .getBindings() + .map((entry) => (typeof entry.resolved === "string" ? entry.resolved : null)) + .filter((entry): entry is string => !!entry), + ); + + for (const implicitFile of implicitFiles) { + const normalizedResolved = implicitFile.replace(/\\/g, "/"); + if (normalizedResolved === context.file.replace(/\\/g, "/")) { + continue; + } + if (seenResolved.has(normalizedResolved)) { + continue; + } + + const relativeFrom = path.relative(path.dirname(context.file), implicitFile).replace(/\\/g, "/"); + const from = relativeFrom.startsWith(".") || relativeFrom.startsWith("/") ? relativeFrom : `./${relativeFrom}`; + context.pushBinding({ + kind: "star", + from, + resolved: normalizedResolved, + mechanism: "php", + }); + seenResolved.add(normalizedResolved); + } +} + +export async function finalizeLanguageSpecificImports(context: LanguageSpecificImportContext): Promise { + normalizeGoImports(context); + await appendJavaTextImports(context); + await appendKotlinTextImports(context); + await appendPhpComposerImplicitImports(context); +} + +function pushCsharpOverride( + context: LanguageSpecificImportContext, + parsed: NonNullable>, + typeOnly: boolean, + fromValue: string, + resolved: ResolvedImportTarget, +): void { + if (parsed.alias) { + const fromParts = parsed.from.split("."); + const imported = fromParts[fromParts.length - 1] ?? parsed.alias; + context.pushBinding({ + kind: "named", + local: parsed.alias, + imported, + from: fromValue, + resolved, + typeOnly, + }); + return; + } + + context.pushBinding({ + kind: "star", + from: fromValue, + resolved, + typeOnly, + }); +} + +async function applyCsharpStatementOverride( + context: LanguageSpecificImportContext, + normalizedStmt: string, + typeOnly: boolean, +): Promise { + const parsed = parseCsharpUsingDirective(normalizedStmt); + if (!parsed) return false; + + let fromValue = parsed.from; + let resolved = await context.resolveFrom(fromValue); + if (parsed.alias) { + const fromParts = parsed.from.split("."); + if (typeof resolved !== "string" && fromParts.length > 1) { + const fallbackFrom = fromParts.slice(0, -1).join("."); + if (fallbackFrom) { + const fallbackResolved = await context.resolveFrom(fallbackFrom); + if (typeof fallbackResolved === "string") { + fromValue = fallbackFrom; + resolved = fallbackResolved; + } + } + } + } + pushCsharpOverride(context, parsed, typeOnly, fromValue, resolved); + return true; +} + +async function applyJavaStatementOverride( + context: LanguageSpecificImportContext, + normalizedStmt: string, + typeOnly: boolean, +): Promise { + const parsed = parseJavaImportStatement(normalizedStmt); + if (!parsed) return false; + + const resolved = await context.resolveFrom(parsed.from); + if (parsed.kind === "star") { + context.pushBinding({ + kind: "star", + from: parsed.from, + resolved, + typeOnly, + }); + return true; + } + + context.pushBinding({ + kind: "named", + local: parsed.imported, + imported: parsed.imported, + from: parsed.from, + resolved, + typeOnly, + }); + return true; +} + +async function applyKotlinStatementOverride( + context: LanguageSpecificImportContext, + normalizedStmt: string, + typeOnly: boolean, +): Promise { + const parsed = parseKotlinImportStatement(normalizedStmt); + if (!parsed) return false; + + const resolved = await context.resolveFrom(parsed.from); + if (parsed.kind === "star") { + context.pushBinding({ + kind: "star", + from: parsed.from, + resolved, + typeOnly, + }); + return true; + } + + context.pushBinding({ + kind: "named", + local: parsed.local, + imported: parsed.imported, + from: parsed.from, + resolved, + typeOnly, + }); + return true; +} + +async function applyRustStatementOverride( + context: LanguageSpecificImportContext, + normalizedStmt: string, + typeOnly: boolean, +): Promise { + const parsed = parseRustImportStatement(normalizedStmt); + if (!parsed) return false; + + const resolved = await context.resolveFrom(parsed.from); + if (parsed.kind === "member") { + context.pushBinding({ + kind: "named", + local: parsed.local, + imported: parsed.imported, + from: parsed.from, + resolved, + typeOnly, + }); + } else if (parsed.kind === "module") { + context.pushBinding({ + kind: "namespace", + localNS: parsed.local, + from: parsed.from, + resolved, + typeOnly, + }); + } else { + context.pushBinding({ + kind: "star", + from: parsed.from, + resolved, + typeOnly, + }); + } + return true; +} + +async function applyPhpStatementOverride( + context: LanguageSpecificImportContext, + normalizedStmt: string, + typeOnly: boolean, +): Promise { + const parsed = parsePhpImportStatement(normalizedStmt, context.file); + if (!parsed.length) return false; + + for (const entry of parsed) { + if (entry.kind === "include") { + const resolved = await context.resolveFrom(entry.from); + context.pushBinding({ + kind: "star", + from: entry.from, + resolved, + typeOnly, + mechanism: "php", + }); + continue; + } + const resolved = await context.resolveFrom(entry.from, entry.importType); + context.pushBinding({ + kind: "named", + local: entry.local, + imported: entry.imported, + from: entry.from, + phpImportType: entry.importType, + resolved, + typeOnly, + mechanism: "php", + }); + } + return true; +} + +export async function applyStatementImportOverride( + context: LanguageSpecificImportContext, + state: StatementImportOverrideState, + stmtText: string, + typeOnly: boolean, +): Promise { + const normalizedStmt = stmtText.trim(); + if (!normalizedStmt) return false; + if (state.handledStatements.has(normalizedStmt)) return true; + + let handled = false; + if (context.languageId === "csharp") { + handled = await applyCsharpStatementOverride(context, normalizedStmt, typeOnly); + } else if (context.languageId === "java") { + handled = await applyJavaStatementOverride(context, normalizedStmt, typeOnly); + } else if (context.languageId === "kotlin") { + handled = await applyKotlinStatementOverride(context, normalizedStmt, typeOnly); + } else if (context.languageId === "rust") { + handled = await applyRustStatementOverride(context, normalizedStmt, typeOnly); + } else if (context.languageId === "php") { + handled = await applyPhpStatementOverride(context, normalizedStmt, typeOnly); + } + + if (!handled) return false; + state.handledStatements.add(normalizedStmt); + return true; +} + +export function appendImplicitImportBinding( + context: LanguageSpecificImportContext, + args: { + from: string; + resolved: ResolvedImportTarget; + typeOnly: boolean; + stmtText: string; + alias?: string; + wildcard?: boolean; + }, +): void { + const { from, resolved, typeOnly, stmtText, alias, wildcard } = args; + if (context.languageId === "java") { + const parts = from.split("."); + const last = parts[parts.length - 1]; + if (last === "*") { + context.pushBinding({ kind: "star", from, resolved, typeOnly }); + } else if (last && /^[A-Z]/.test(last)) { + context.pushBinding({ kind: "named", local: last, imported: last, from, resolved, typeOnly }); + } + } else if (context.languageId === "csharp") { + if (alias) { + const fromParts = from.split("."); + const imported = fromParts[fromParts.length - 1] ?? alias; + context.pushBinding({ kind: "named", local: alias, imported, from, resolved, typeOnly }); + } else { + context.pushBinding({ kind: "star", from, resolved, typeOnly }); + } + } else if (context.languageId === "ruby") { + context.pushBinding({ kind: "star", from, resolved }); + } else if (context.languageId === "go") { + const goAlias = alias; + if (goAlias === "_") return; + if (goAlias === ".") { + context.pushBinding({ kind: "star", from, resolved }); + return; + } + if (goAlias) { + context.pushBinding({ kind: "namespace", localNS: goAlias, from, resolved }); + return; + } + const parts = from.replace(/"/g, "").split("/"); + const last = parts[parts.length - 1]; + if (last) context.pushBinding({ kind: "namespace", localNS: last, from, resolved }); + } else if (context.languageId === "rust") { + if (stmtText.startsWith("mod ")) { + context.pushBinding({ kind: "namespace", localNS: from, from, resolved }); + } else { + const parts = from.split("::"); + const last = parts[parts.length - 1]; + if (!last) return; + if (last === "*") { + context.pushBinding({ kind: "star", from, resolved }); + } else { + context.pushBinding({ kind: "named", local: last, imported: last, from, resolved }); + } + } + } else if (context.languageId === "kotlin") { + if (wildcard || from.endsWith(".*")) { + context.pushBinding({ kind: "star", from, resolved, typeOnly }); + } else { + const parts = from.split("."); + const imported = parts[parts.length - 1]; + if (imported) context.pushBinding({ kind: "named", local: alias ?? imported, imported, from, resolved, typeOnly }); + } + } else if (context.languageId === "swift") { + const parts = from.split("."); + const last = parts[parts.length - 1]; + if (!last) return; + if (parts.length === 1) { + context.pushBinding({ kind: "namespace", localNS: last, from, resolved, typeOnly }); + context.pushBinding({ kind: "star", from, resolved, typeOnly }); + } else { + context.pushBinding({ kind: "named", local: last, imported: last, from, resolved, typeOnly }); + } + } else if (context.languageId === "zig") { + if (alias) context.pushBinding({ kind: "namespace", localNS: alias, from, resolved, typeOnly }); + } else if (context.languageId === "c" || context.languageId === "cpp") { + context.pushBinding({ kind: "star", from, resolved, typeOnly }); + } +} diff --git a/src/indexer/imports/nativeCaptures.ts b/src/indexer/imports/nativeCaptures.ts new file mode 100644 index 00000000..922837dc --- /dev/null +++ b/src/indexer/imports/nativeCaptures.ts @@ -0,0 +1,242 @@ +import type { JsSyntaxTree } from "../../jsFallback.js"; +import { capturesByName, capturesNamed, rangeFromNativeCapture } from "../../native/queryResults.js"; +import type { NativeCapture, NativeMatch } from "../../native/treeSitterNative.js"; +import { sliceText, unquote } from "../../util.js"; +import { parseGoImportAlias } from "../shared.js"; +import type { ImportBinding } from "../types.js"; +import type { ImportResolver, ResolvedImportTarget } from "./context.js"; +import { appendImplicitImportBinding, type LanguageSpecificImportContext } from "./languageSpecific.js"; + +type ImportCaptureExtractionContext = { + source: string; + languageId: string; + isTypeOnly: (stmtText: string) => boolean; + resolveFrom: ImportResolver; + pushBinding: (binding: ImportBinding) => void; + languageContext: LanguageSpecificImportContext; + applyStatementOverride: (stmtText: string, typeOnly: boolean) => Promise; +}; + +function parseObjectPatternBindings(patternText: string): Array<{ imported: string; local: string }> { + const trimmed = patternText.trim(); + if (!trimmed.startsWith("{") || !trimmed.endsWith("}")) return []; + const body = trimmed.slice(1, -1).trim(); + if (!body) return []; + const parts = body + .split(",") + .map((part) => part.trim()) + .filter(Boolean); + const out: Array<{ imported: string; local: string }> = []; + for (const part of parts) { + const withoutDefault = part.replace(/\s*=\s*.+$/, "").trim(); + const match = withoutDefault.match(/^([A-Za-z_$][\w$]*)(?::\s*([A-Za-z_$][\w$]*))?$/); + if (!match) continue; + const imported = match[1]!; + const local = match[2] ?? imported; + out.push({ imported, local }); + } + return out; +} + +async function pushTextObjectPatternBindings( + context: ImportCaptureExtractionContext, + patterns: NativeCapture[], + from: string | undefined, + typeOnly: boolean, +): Promise { + if (!from) return; + for (const pattern of patterns) { + if (pattern.nodeType !== "object_pattern") continue; + const resolved = await context.resolveFrom(from); + for (const binding of parseObjectPatternBindings(pattern.text)) { + context.pushBinding({ + kind: "named", + local: binding.local, + imported: binding.imported, + from, + resolved, + typeOnly, + }); + } + } +} + +async function pushTreeObjectPatternBindings( + context: ImportCaptureExtractionContext, + tree: JsSyntaxTree, + patterns: NativeCapture[], + from: string | undefined, + typeOnly: boolean, +): Promise { + if (!from) return; + for (const pattern of patterns) { + const patternRange = rangeFromNativeCapture(pattern); + const patternNode = tree.rootNode.descendantForIndex( + patternRange.start.index ?? 0, + patternRange.end.index ?? 0, + ); + if (patternNode.type !== "object_pattern") continue; + + for (const child of patternNode.namedChildren) { + if ( + child.type === "shorthand_property_identifier" || + child.type === "shorthand_property_identifier_pattern" + ) { + const name = sliceText(child, context.source); + const resolved = await context.resolveFrom(from); + context.pushBinding({ + kind: "named", + local: name, + imported: name, + from, + resolved, + typeOnly, + }); + } else if (child.type === "pair_pattern") { + const key = child.childForFieldName("key"); + const value = child.childForFieldName("value"); + if (key && value && key.type === "property_identifier" && value.type === "identifier") { + const imported = sliceText(key, context.source); + const local = sliceText(value, context.source); + const resolved = await context.resolveFrom(from); + context.pushBinding({ + kind: "named", + local, + imported, + from, + resolved, + typeOnly, + }); + } + } + } + } +} + +function pushNamespaceBinding( + context: ImportCaptureExtractionContext, + caps: Record, + stmtText: string, + from: string, + resolved: ResolvedImportTarget, + typeOnly: boolean, +): void { + const namespaceCapture = caps["ns"]; + if (!namespaceCapture) return; + if (context.languageId === "go") { + const alias = parseGoImportAlias(stmtText); + if (alias === ".") { + context.pushBinding({ + kind: "star", + from, + resolved, + typeOnly, + }); + } else if (alias !== "_") { + context.pushBinding({ + kind: "namespace", + localNS: alias ?? namespaceCapture.text, + from, + resolved, + typeOnly, + }); + } + return; + } + + context.pushBinding({ + kind: "namespace", + localNS: namespaceCapture.text, + from, + resolved, + typeOnly, + }); +} + +async function pushStandardBindings( + context: ImportCaptureExtractionContext, + match: NativeMatch, + caps: Record, + stmtText: string, + from: string | undefined, + patternCount: number, + typeOnly: boolean, +): Promise { + if (!from) return; + const resolved = await context.resolveFrom(from); + const defaultCapture = caps["def"]; + if (defaultCapture) { + context.pushBinding({ + kind: "default", + local: defaultCapture.text, + from, + resolved, + typeOnly, + }); + } + + pushNamespaceBinding(context, caps, stmtText, from, resolved, typeOnly); + + const inames = capturesNamed(match, "iname"); + const aliases = capturesNamed(match, "alias"); + for (let i = 0; i < inames.length; i++) { + const imported = inames[i]!.text; + const alias = aliases[i]?.text ?? imported; + context.pushBinding({ + kind: "named", + local: alias, + imported, + from, + resolved, + typeOnly, + }); + } + + if (!defaultCapture && !caps["ns"] && !inames.length && !patternCount) { + appendImplicitImportBinding(context.languageContext, { + from, + resolved, + typeOnly, + stmtText, + ...(caps["alias"]?.text ? { alias: caps["alias"].text } : {}), + ...(caps["wild"] ? { wildcard: true } : {}), + }); + } +} + +export async function collectNativeCaptureImportBindings( + context: ImportCaptureExtractionContext, + matches: NativeMatch[], +): Promise { + for (const match of matches) { + const caps = capturesByName(match); + const stmtText = caps["stmt"]?.text ?? ""; + const typeOnly = context.isTypeOnly(stmtText); + if (await context.applyStatementOverride(stmtText, typeOnly)) { + continue; + } + const from = caps["from"] ? unquote(caps["from"].text) : undefined; + const patterns = capturesNamed(match, "pattern"); + await pushTextObjectPatternBindings(context, patterns, from, typeOnly); + await pushStandardBindings(context, match, caps, stmtText, from, patterns.length, typeOnly); + } +} + +export async function collectJsQueryCaptureImportBindings( + context: ImportCaptureExtractionContext, + matches: NativeMatch[], + tree: JsSyntaxTree, +): Promise { + for (const match of matches) { + const caps = capturesByName(match); + const stmtText = caps["stmt"]?.text ?? ""; + const typeOnly = context.isTypeOnly(stmtText); + if (await context.applyStatementOverride(stmtText, typeOnly)) { + continue; + } + const from = caps["from"] ? unquote(caps["from"].text) : undefined; + const patterns = capturesNamed(match, "pattern"); + await pushTreeObjectPatternBindings(context, tree, patterns, from, typeOnly); + await pushStandardBindings(context, match, caps, stmtText, from, patterns.length, typeOnly); + } +} diff --git a/src/indexer/imports/python.ts b/src/indexer/imports/python.ts new file mode 100644 index 00000000..ab26abfb --- /dev/null +++ b/src/indexer/imports/python.ts @@ -0,0 +1,131 @@ +import fs from "node:fs"; +import path from "node:path"; +import { resolvePythonModule, stripPythonCommentsAndStrings } from "../../util.js"; +import type { ImportBindingSink, ResolvedImportTarget } from "./context.js"; + +export type PythonImportExtractionContext = ImportBindingSink & { + file: string; + projectRoot: string; + source: string; +}; + +function splitRelativeModuleSpec(moduleSpec: string): { relDots: number; mod: string | null } { + const match = moduleSpec.match(/^(\.+)(.*)$/); + if (!match) return { relDots: 0, mod: moduleSpec }; + return { + relDots: match[1]!.length, + mod: match[2] || null, + }; +} + +function resolvePythonNamespaceMember(resolved: ResolvedImportTarget, imported: string): string | undefined { + if (typeof resolved !== "string") return undefined; + let baseDir = resolved; + try { + const stat = fs.statSync(baseDir); + if (!stat.isDirectory() && baseDir.toLowerCase().endsWith("__init__.py")) { + baseDir = path.dirname(baseDir); + } + } catch { + return undefined; + } + + const candidates = [ + path.join(baseDir, `${imported}.py`), + path.join(baseDir, imported, "__init__.py"), + path.join(baseDir, imported), + ]; + for (const candidate of candidates) { + try { + if (fs.existsSync(candidate)) { + return candidate.replace(/\\/g, "/"); + } + } catch { + // Ignore filesystem races and continue trying remaining namespace candidates. + } + } + return undefined; +} + +async function pushStarImport(context: PythonImportExtractionContext, moduleSpec: string): Promise { + const { relDots, mod } = splitRelativeModuleSpec(moduleSpec); + const resolved = await resolvePythonModule(context.projectRoot, context.file, mod, relDots); + context.pushBinding({ + kind: "star", + from: moduleSpec, + resolved, + mechanism: "python", + }); +} + +async function pushNamedImport( + context: PythonImportExtractionContext, + moduleSpec: string, + imported: string, + local: string, +): Promise { + const { relDots, mod } = splitRelativeModuleSpec(moduleSpec); + const resolved = await resolvePythonModule(context.projectRoot, context.file, mod, relDots); + const namespaceResolved = resolvePythonNamespaceMember(resolved, imported); + if (namespaceResolved) { + context.pushBinding({ + kind: "namespace", + localNS: local, + from: moduleSpec, + resolved: namespaceResolved, + mechanism: "python", + }); + return; + } + + context.pushBinding({ + kind: "named", + local, + imported, + from: moduleSpec, + resolved, + mechanism: "python", + }); +} + +async function pushDefaultImport( + context: PythonImportExtractionContext, + dotted: string, + local: string, +): Promise { + const resolved = await resolvePythonModule(context.projectRoot, context.file, dotted, 0); + context.pushBinding({ + kind: "namespace", + localNS: local, + from: dotted, + resolved, + mechanism: "python", + }); +} + +export async function collectPythonImportsFromSource(context: PythonImportExtractionContext): Promise { + const pySrc = stripPythonCommentsAndStrings(context.source); + const fromLinePattern = /^\s*from\s+([^\s]+)\s+import\s+([^\n#]+)/gm; + for (const match of pySrc.matchAll(fromLinePattern)) { + const mod = match[1]!.trim(); + const items = match[2]!.split(",").map((item) => item.trim()); + for (const item of items) { + if (item === "*") { + await pushStarImport(context, mod); + continue; + } + const aliasMatch = item.match(/^([A-Za-z_][\w_]*)(?:\s+as\s+([A-Za-z_][\w_]*))?$/); + if (!aliasMatch) continue; + const imported = aliasMatch[1]!; + const local = aliasMatch[2] ?? imported; + await pushNamedImport(context, mod, imported, local); + } + } + + const importPattern = /^(?:\s*)import\s+([A-Za-z_][\w.]*)\s*(?:as\s+([A-Za-z_][\w_]*))?/gm; + for (const match of pySrc.matchAll(importPattern)) { + const dotted = match[1]!; + const local = match[2] ?? dotted.split(".")[0]!; + await pushDefaultImport(context, dotted, local); + } +} diff --git a/src/indexer/locals-and-exports.ts b/src/indexer/locals-and-exports.ts index 4270cf44..ff319ac2 100644 --- a/src/indexer/locals-and-exports.ts +++ b/src/indexer/locals-and-exports.ts @@ -12,6 +12,7 @@ import { type NativeRuntimeMode, } from "../native/treeSitterNative.js"; import { maskJsLikeCommentsAndStrings, sliceText, toRange, unquote } from "../util.js"; +import { bindingKindToSymbolKind } from "./declarations.js"; import { buildScopeIndexFromSource } from "./scope.js"; import { QUERY_DRIVEN_LOCALS_LANGUAGES } from "./shared.js"; import { SymbolKind } from "./types.js"; @@ -418,10 +419,7 @@ export function collectLocalsAndExportsFromSource( const scopeIdx = buildScopeIndexFromSource(file, source, support, lang, imports, { tree: scopeTree }); for (const b of scopeIdx.all) { if (!b.def) continue; - let kind: SymbolKind = SymbolKind.Variable; - if (b.kind === "function") kind = SymbolKind.Function; - else if (b.kind === "class") kind = SymbolKind.Class; - else if (b.kind === "type") kind = SymbolKind.TypeAlias; + const kind = bindingKindToSymbolKind(b.kind); pushLocal(b.name, kind, b.def, b.node); } } diff --git a/src/indexer/navigation-goto.ts b/src/indexer/navigation-goto.ts index fa03834d..0770967f 100644 --- a/src/indexer/navigation-goto.ts +++ b/src/indexer/navigation-goto.ts @@ -1,127 +1,17 @@ import type { LanguageSupport } from "../languages.js"; import type { SyntaxNodeLike } from "../languages/types.js"; import { sliceText } from "../util.js"; +import { + getMemberAccessParts, + getNavigationExpressionProperty, + isMemberAccessNode, + memberAccessTraversalTypes, +} from "../util/memberAccess.js"; import { ensureParsedContext } from "./parse-context.js"; import { okGoToResult } from "./navigation-provenance.js"; import { resolveExport, resolveImported } from "./navigation-resolve.js"; import type { GoToResult, ModuleIndex, ProjectIndex, ResolvedExport, SymbolDef } from "./types.js"; -type MemberAccessTarget = { - obj: SyntaxNodeLike | null; - prop: SyntaxNodeLike | null; -}; - -function getMemberAccessTarget(supId: string, memberNode: SyntaxNodeLike): MemberAccessTarget { - if (supId === "python") { - return { - obj: memberNode.childForFieldName("object") ?? memberNode.child(0), - prop: memberNode.childForFieldName("attribute") ?? memberNode.child(2), - }; - } - if (supId === "csharp") { - return { - obj: memberNode.child(0), - prop: memberNode.child(2), - }; - } - if (supId === "java") { - if (memberNode.type === "method_invocation") { - return { - obj: memberNode.childForFieldName("object") ?? memberNode.child(0), - prop: memberNode.childForFieldName("name") ?? memberNode.child(2), - }; - } - if (memberNode.type === "scoped_identifier" || memberNode.type === "scoped_type_identifier") { - return { - obj: memberNode.childForFieldName("scope") ?? memberNode.child(0), - prop: memberNode.childForFieldName("name") ?? memberNode.child(2), - }; - } - } - if (supId === "ruby") { - if (memberNode.type === "scope_resolution") { - return { - obj: memberNode.childForFieldName("scope") ?? memberNode.child(0), - prop: memberNode.childForFieldName("name") ?? memberNode.child(2), - }; - } - return { - obj: memberNode.childForFieldName("receiver") ?? memberNode.child(0), - prop: memberNode.childForFieldName("method") ?? memberNode.child(2), - }; - } - if (supId === "rust") { - if (memberNode.type === "scoped_identifier") { - return { - obj: memberNode.childForFieldName("path") ?? memberNode.child(0), - prop: memberNode.childForFieldName("name") ?? memberNode.child(2), - }; - } - } - if (supId === "go") { - if (memberNode.type === "qualified_type") { - return { - obj: memberNode.namedChildren[0] ?? memberNode.child(0), - prop: memberNode.namedChildren[1] ?? memberNode.child(1), - }; - } - } - if (supId === "kotlin" || supId === "swift") { - if (memberNode.type === "navigation_expression") { - const obj = memberNode.namedChildren[0] ?? memberNode.child(0); - const suffix = - memberNode.namedChildren.find((child) => child.type === "navigation_suffix") ?? memberNode.child(1); - if (suffix) { - return { - obj, - prop: - suffix.childForFieldName("suffix") ?? - suffix.childForFieldName("name") ?? - suffix.namedChildren[0] ?? - suffix.child(0), - }; - } - return { obj, prop: null }; - } - } - return { - obj: memberNode.child(0), - prop: memberNode.child(2), - }; -} - -function getNavigationSubProperty(expr: SyntaxNodeLike): SyntaxNodeLike | null { - const suffix = expr.namedChildren.find((child) => child.type === "navigation_suffix") ?? expr.child(1); - if (!suffix) return null; - return ( - suffix.childForFieldName?.("suffix") ?? - suffix.childForFieldName?.("name") ?? - suffix.namedChildren[0] ?? - suffix.child(0) - ); -} - -function isMemberAccessNode( - sup: { id: string; nodeTypes: { memberExpression?: string } }, - node: SyntaxNodeLike, -): boolean { - const memberExpressionType = sup.nodeTypes.memberExpression ?? "member_expression"; - return ( - node.type === memberExpressionType || - (sup.id === "go" && node.type === "qualified_type") || - node.type === "member_access_expression" || - node.type === "qualified_name" || - node.type === "field_access" || - node.type === "method_invocation" || - node.type === "scoped_identifier" || - node.type === "scoped_type_identifier" || - node.type === "call" || - node.type === "scope_resolution" || - node.type === "field_expression" || - node.type === "attribute" - ); -} - export async function resolveMemberAccessDefinition(params: { index: ProjectIndex; mod: ModuleIndex; @@ -136,16 +26,8 @@ export async function resolveMemberAccessDefinition(params: { } const memberNode = parent; - const { obj, prop } = getMemberAccessTarget(sup.id, memberNode); - const memberExpressionType = sup.nodeTypes.memberExpression ?? "member_expression"; - const optionalMemberTypes = new Set([ - memberExpressionType, - sup.id === "go" ? "qualified_type" : "", - "optional_member_expression", - "subscript_expression", - "optional_chain", - sup.id === "python" ? "attribute" : "", - ]); + const { object: obj, property: prop } = getMemberAccessParts(sup, memberNode); + const optionalMemberTypes = memberAccessTraversalTypes(sup); const resolveExpression = async (expr: SyntaxNodeLike): Promise => { const exprName = sliceText(expr, source); @@ -189,13 +71,11 @@ export async function resolveMemberAccessDefinition(params: { } if (optionalMemberTypes.has(expr.type)) { - const subObj = expr.child(0); - let subProp = - expr.type === "qualified_type" - ? (expr.namedChildren[1] ?? expr.child(1)) - : (expr.childForFieldName?.("property") ?? expr.child(2) ?? expr.childForFieldName?.("attribute")); + const parts = getMemberAccessParts(sup, expr); + const subObj = parts.object; + let subProp = parts.property; if (!subProp && expr.type === "navigation_expression") { - subProp = getNavigationSubProperty(expr); + subProp = getNavigationExpressionProperty(expr); } if (subObj && subProp) { const base = await resolveExpression(subObj); diff --git a/src/indexer/navigation.ts b/src/indexer/navigation.ts index 5f761be8..19059f73 100644 --- a/src/indexer/navigation.ts +++ b/src/indexer/navigation.ts @@ -27,6 +27,12 @@ import { DEFAULT_REF_CONTEXT_LINES } from "./shared.js"; import { type ScopeIndex } from "./scope.js"; import { type FileId, type Range } from "../types.js"; import { resolveImportSpecifier, sliceText, toRange } from "../util.js"; +import { + getMemberAccessParts, + isMemberAccessNode, + isMemberObjectIdentifier, + isMemberReferencePropertyIdentifier, +} from "../util/memberAccess.js"; import { type FindReferencesResult, type GoToRequest, @@ -424,54 +430,11 @@ export async function collectNamespaceMemberRefs( const source = parsed.source; const tree = parsed.tree; const ranges: Range[] = []; - const isRuby = sup.id === "ruby"; - let memberExpressionType = sup.nodeTypes.memberExpression; - if (!memberExpressionType) { - if (sup.id === "python") { - memberExpressionType = "attribute"; - } else if (sup.id === "ruby") { - memberExpressionType = "call"; - } else { - memberExpressionType = "member_expression"; - } - } - const isPropertyIdentifier = (nodeType: string): boolean => - (sup.nodeTypes.propertyIdentifier ?? ["property_identifier"]).includes(nodeType) || - nodeType === "field_identifier" || - nodeType === "type_identifier" || - nodeType === "identifier" || - nodeType === "constant"; - const isObjectIdentifier = (nodeType: string): boolean => - nodeType === "identifier" || - nodeType === "type_identifier" || - nodeType === "package_identifier" || - nodeType === "constant" || - nodeType === "namespace_identifier"; const walk = (node: SyntaxNodeLike): void => { - if ( - node.type === memberExpressionType || - (sup.id === "go" && node.type === "qualified_type") || - (isRuby && (node.type === "call" || node.type === "scope_resolution")) - ) { - let obj: SyntaxNodeLike | null = null; - let prop: SyntaxNodeLike | null = null; - if (isRuby) { - if (node.type === "scope_resolution") { - obj = node.childForFieldName("scope") ?? node.child(0); - prop = node.childForFieldName("name") ?? node.child(2); - } else { - obj = node.childForFieldName("receiver") ?? node.child(0); - prop = node.childForFieldName("method") ?? node.child(2); - } - } else if (sup.id === "go" && node.type === "qualified_type") { - obj = node.namedChildren[0] ?? node.child(0); - prop = node.namedChildren[1] ?? node.child(1); - } else { - obj = node.childForFieldName("object") ?? node.child(0); - prop = node.childForFieldName("property") ?? node.childForFieldName("attribute") ?? node.child(2); - } - if (obj && prop && isObjectIdentifier(obj.type) && isPropertyIdentifier(prop.type)) { + if (isMemberAccessNode(sup, node)) { + const { object: obj, property: prop } = getMemberAccessParts(sup, node); + if (obj && prop && isMemberObjectIdentifier(obj.type) && isMemberReferencePropertyIdentifier(sup, prop.type)) { const objectName = sliceText(obj, source); const propertyName = sliceText(prop, source); if (objectName === ns && propertyName === member) { diff --git a/src/indexer/parse-context.ts b/src/indexer/parse-context.ts index 0a07909b..084e1133 100644 --- a/src/indexer/parse-context.ts +++ b/src/indexer/parse-context.ts @@ -6,13 +6,12 @@ import { type NativeQueryResults, type NativeRuntimeMode, } from "../native/treeSitterNative.js"; +import type { NativeFallbackReason } from "../native/contracts.js"; import { ProjectedSyntaxTree } from "../native/projectedTree.js"; import { stringifyUnknown } from "../util.js"; import type { LanguageSupport } from "../languages.js"; import type { JsLanguage, SyntaxTreeLike } from "../languages/types.js"; -type NativeFallbackReason = "unavailable" | "unsupportedLanguage" | "queryFailure"; - export type ParsedFileContext = { source: string; tree: SyntaxTreeLike; diff --git a/src/indexer/scope.ts b/src/indexer/scope.ts index 11fc7ff3..d3bfa584 100644 --- a/src/indexer/scope.ts +++ b/src/indexer/scope.ts @@ -2,6 +2,7 @@ import { parseWithJsLanguage } from "../jsFallback.js"; import { sliceText, toRange } from "../util.js"; import { getNativeSyntaxTreeExecution, type NativeRuntimeMode } from "../native/treeSitterNative.js"; import { ProjectedSyntaxTree } from "../native/projectedTree.js"; +import { declarationKindToBindingKind } from "./declarations.js"; import type { LanguageSupport } from "../languages.js"; import type { JsLanguage, SyntaxNodeLike, SyntaxTreeLike } from "../languages/types.js"; import type { Range } from "../types.js"; @@ -70,13 +71,6 @@ export function buildScopeIndexFromSource( const customDeclLanguages = new Set(["c", "cpp", "kotlin", "swift"]); const paramParentTypes = new Set(["parameter_declaration", "parameter", "class_parameter", "lambda_parameters"]); - const toBindingKind = (kind: string): BindingKind => { - if (kind === "function") return "function"; - if (kind === "class" || kind === "interface") return "class"; - if (kind === "type") return "type"; - return "local"; - }; - const isParamNode = (node: SyntaxNodeLike): boolean => { let current: SyntaxNodeLike | null = node.parent; while (current) { @@ -219,7 +213,7 @@ export function buildScopeIndexFromSource( } if (customDeclLanguages.has(support.id) && idSet.has(node.type) && support.isDeclarationName(node)) { - const kind = isParamNode(node) ? "param" : toBindingKind(support.classifyDefinition(node)); + const kind = isParamNode(node) ? "param" : declarationKindToBindingKind(support.classifyDefinition(node)); addDecl(node, kind); } diff --git a/src/indexer/types.ts b/src/indexer/types.ts index 685cbde8..cacb9e35 100644 --- a/src/indexer/types.ts +++ b/src/indexer/types.ts @@ -2,7 +2,7 @@ import type { FallbackImportExtractionReason } from "../graphs/specifiers.js"; import type { GraphAdjacencyIndex } from "../graphs/adjacency.js"; import type { GraphBuildOptions } from "../graphs/types.js"; import type { LogLevel } from "../logging.js"; -import type { NativeRuntimeMode } from "../native/treeSitterNative.js"; +import type { NativeFallbackReason, NativeRuntimeMode } from "../native/contracts.js"; import type { ScopeIndex } from "./scope-types.js"; import type { ParsedFileContext } from "./parse-context.js"; import type { Edge, FileId, Graph, Range } from "../types.js"; @@ -181,7 +181,7 @@ export type ManifestReport = { configHashError?: string; }; -export type NativeBackendFallbackReason = "unavailable" | "unsupportedLanguage" | "queryFailure"; +export type NativeBackendFallbackReason = NativeFallbackReason; export type NativeBackendLanguageReport = { filesSeen: number; diff --git a/src/mcp/http.ts b/src/mcp/http.ts new file mode 100644 index 00000000..86d02162 --- /dev/null +++ b/src/mcp/http.ts @@ -0,0 +1,175 @@ +import type { IncomingMessage, Server as HttpServer, ServerResponse } from "node:http"; +import type { AddressInfo } from "node:net"; +import os from "node:os"; + +export type ParsedJsonBody = { status: "ok"; body: unknown } | { status: "too_large" } | { status: "invalid_json" }; + +export type AllowedHostHeaderRules = { + exact: Set; + loopbackOnly: Set; +}; + +export function getRequestPath(request: IncomingMessage): string { + return new URL(request.url ?? "/", "http://127.0.0.1").pathname; +} + +export async function readJsonRequestBody(request: IncomingMessage, maxBytes: number): Promise { + const contentLength = getContentLength(request); + if (contentLength !== undefined && contentLength > maxBytes) { + request.resume(); + return { status: "too_large" }; + } + + const chunks: Buffer[] = []; + let bytes = 0; + for await (const chunk of request) { + const buffer = typeof chunk === "string" ? Buffer.from(chunk) : chunk; + bytes += buffer.byteLength; + if (bytes > maxBytes) { + return { status: "too_large" }; + } + chunks.push(buffer); + } + + const rawBody = Buffer.concat(chunks).toString("utf8"); + try { + const body: unknown = rawBody.length ? JSON.parse(rawBody) : null; + return { status: "ok", body }; + } catch { + return { status: "invalid_json" }; + } +} + +export function emptyAllowedHostHeaderRules(): AllowedHostHeaderRules { + return { exact: new Set(), loopbackOnly: new Set() }; +} + +export function isAllowedHostHeader(request: IncomingMessage, allowedHostHeaders: AllowedHostHeaderRules): boolean { + const host = getHeaderValue(request.headers.host); + if (host === undefined) return false; + const normalizedHost = host.toLowerCase(); + if (allowedHostHeaders.exact.has(normalizedHost)) return true; + return allowedHostHeaders.loopbackOnly.has(normalizedHost) && isLoopbackRemoteAddress(request.socket.remoteAddress); +} + +export function buildAllowedHostHeaders(host: string, port: number): AllowedHostHeaderRules { + const allowed = emptyAllowedHostHeaderRules(); + allowed.exact.add(formatHostHeader(host, port).toLowerCase()); + if (isWildcardBindHost(host)) { + allowed.loopbackOnly.add(`127.0.0.1:${port}`); + allowed.loopbackOnly.add(`localhost:${port}`); + allowed.loopbackOnly.add(`[::1]:${port}`); + for (const localHost of localInterfaceHostHeaders(port)) { + allowed.exact.add(localHost); + } + } + if (host === "127.0.0.1") { + allowed.exact.add(`localhost:${port}`); + } + if (host === "::1" || host === "[::1]") { + allowed.exact.add(`[::1]:${port}`); + allowed.exact.add(`localhost:${port}`); + } + return allowed; +} + +export function formatHostForUrl(host: string): string { + if (host.includes(":") && !host.startsWith("[")) return `[${host}]`; + return host; +} + +export function writeJsonRpcError(response: ServerResponse, statusCode: number, message: string, code = -32000): void { + writeJsonResponse(response, statusCode, { + jsonrpc: "2.0", + id: null, + error: { code, message }, + }); +} + +export function writeJsonResponse(response: ServerResponse, statusCode: number, body: unknown): void { + response.writeHead(statusCode, { "content-type": "application/json" }); + response.end(JSON.stringify(body)); +} + +export async function listenOnHttpServer(server: HttpServer, port: number, host: string): Promise { + await new Promise((resolve, reject) => { + const onError = (error: Error): void => { + server.off("listening", onListening); + reject(error); + }; + const onListening = (): void => { + server.off("error", onError); + resolve(); + }; + server.once("error", onError); + server.once("listening", onListening); + server.listen(port, host); + }); +} + +export function getHttpServerPort(address: string | AddressInfo | null): number { + if (!address || typeof address === "string") { + throw new Error("HTTP server did not expose a TCP address."); + } + return address.port; +} + +export async function closeHttpServer(server: HttpServer): Promise { + if (!server.listening) return; + await new Promise((resolve, reject) => { + server.close((error) => { + if (error) reject(error); + else resolve(); + }); + }); +} + +export async function waitForHttpServerClose(server: HttpServer): Promise { + await new Promise((resolve, reject) => { + server.once("close", resolve); + server.once("error", reject); + }); +} + +export function getHeaderValue(value: string | string[] | undefined): string | undefined { + if (Array.isArray(value)) return value[0]; + return value; +} + +function getContentLength(request: IncomingMessage): number | undefined { + const raw = getHeaderValue(request.headers["content-length"]); + if (raw === undefined) return undefined; + const parsed = Number(raw); + if (!Number.isFinite(parsed) || parsed < 0) return undefined; + return parsed; +} + +function isWildcardBindHost(host: string): boolean { + return host === "0.0.0.0" || host === "::" || host === "[::]"; +} + +function localInterfaceHostHeaders(port: number): Set { + const hosts = new Set(); + const hostname = os.hostname().trim().toLowerCase(); + if (hostname) { + hosts.add(`${hostname}:${port}`); + } + const interfaces = os.networkInterfaces(); + for (const entries of Object.values(interfaces)) { + for (const entry of entries ?? []) { + if (entry.internal) continue; + const address = entry.address.split("%")[0] ?? entry.address; + hosts.add(formatHostHeader(address, port).toLowerCase()); + } + } + return hosts; +} + +function isLoopbackRemoteAddress(address: string | undefined): boolean { + if (!address) return false; + return address === "127.0.0.1" || address === "::1" || address === "::ffff:127.0.0.1" || address.startsWith("127."); +} + +function formatHostHeader(host: string, port: number): string { + return `${formatHostForUrl(host)}:${port}`; +} diff --git a/src/mcp/security.ts b/src/mcp/security.ts new file mode 100644 index 00000000..ee0f0e61 --- /dev/null +++ b/src/mcp/security.ts @@ -0,0 +1,139 @@ +import fs from "node:fs/promises"; +import path from "node:path"; + +import { isFilePathWithinRoot, normalizePath, toProjectRelativePath } from "../util.js"; + +export function resolveArtifactSqlitePathCandidate(root: string, artifactPath: string): string { + const resolved = path.isAbsolute(artifactPath) ? artifactPath : path.resolve(root, artifactPath); + const sqlitePath = + resolved.toLowerCase().endsWith(".sqlite") || resolved.toLowerCase().endsWith(".db") + ? resolved + : path.join(resolved, "codegraph.sqlite"); + return normalizePath(sqlitePath); +} + +export async function resolveReadableFile( + realRoot: string, + root: string, + filePath: string, +): Promise<{ realPath: string; displayPath: string }> { + const candidatePath = path.isAbsolute(filePath) ? path.resolve(filePath) : path.resolve(root, filePath); + const realPath = await assertRealPathCandidateWithinRoot(realRoot, candidatePath, "File"); + const displayPath = + toProjectRelativePath(root, candidatePath) ?? toProjectRelativePath(realRoot, realPath) ?? normalizePath(realPath); + return { realPath, displayPath }; +} + +export async function resolveProjectFile(realRoot: string, root: string, filePath: string): Promise { + const candidatePath = path.isAbsolute(filePath) ? path.resolve(filePath) : path.resolve(root, filePath); + const realPath = await assertRealPathCandidateWithinRoot(realRoot, candidatePath, "File"); + const lexicalRelativePath = toProjectRelativePath(root, candidatePath); + if (lexicalRelativePath) return normalizePath(candidatePath); + const realRelativePath = toProjectRelativePath(realRoot, realPath); + if (realRelativePath) return normalizePath(path.resolve(root, realRelativePath)); + throw new Error(`File is outside project root: ${normalizePath(realPath)} (root: ${normalizePath(realRoot)})`); +} + +export async function readFilePrefix( + filePath: string, + maxBytes: number, +): Promise<{ text: string; truncated: boolean }> { + const handle = await fs.open(filePath, "r"); + try { + const readLimit = maxBytes + 1; + const buffer = Buffer.alloc(readLimit); + const { bytesRead } = await handle.read(buffer, 0, readLimit, 0); + const outputBytes = Math.min(bytesRead, maxBytes); + const outputBuffer = trimToUtf8Boundary(buffer.subarray(0, outputBytes)); + return { + text: outputBuffer.toString("utf8"), + truncated: bytesRead > maxBytes, + }; + } finally { + await handle.close(); + } +} + +export async function assertRealPathCandidateWithinRoot( + realRoot: string, + filePath: string, + label: string, +): Promise { + const existingPath = await nearestExistingPath(filePath); + const realExistingPath = await fs.realpath(existingPath); + const relativeSuffix = path.relative(existingPath, filePath); + const realTargetPath = path.resolve(realExistingPath, relativeSuffix); + if (!isFilePathWithinRoot(realRoot, realTargetPath)) { + throw new Error( + `${label} is outside project root: ${normalizePath(realTargetPath)} (root: ${normalizePath(realRoot)})`, + ); + } + const finalRealPath = normalizePath(await fs.realpath(filePath)); + if (!isFilePathWithinRoot(realRoot, finalRealPath)) { + throw new Error(`${label} is outside project root: ${finalRealPath} (root: ${normalizePath(realRoot)})`); + } + return finalRealPath; +} + +export async function assertWritableDirectoryRealPathWithinRoot( + realRoot: string, + root: string, + requestedPath: string, + label: string, +): Promise { + const lexicalPath = path.isAbsolute(requestedPath) ? requestedPath : path.resolve(root, requestedPath); + const existingPath = await nearestExistingPath(lexicalPath); + const realExistingPath = await fs.realpath(existingPath); + const relativeSuffix = path.relative(existingPath, lexicalPath); + const realTargetPath = path.resolve(realExistingPath, relativeSuffix); + if (!isFilePathWithinRoot(realRoot, realTargetPath)) { + throw new Error( + `${label} is outside project root: ${normalizePath(realTargetPath)} (root: ${normalizePath(realRoot)})`, + ); + } + return normalizePath(realTargetPath); +} + +function trimToUtf8Boundary(buffer: Buffer): Buffer { + if (!buffer.length) return buffer; + let leadIndex = buffer.length - 1; + while (leadIndex >= 0) { + const byte = buffer[leadIndex]; + if (byte === undefined || (byte & 0xc0) !== 0x80) break; + leadIndex -= 1; + } + if (leadIndex < 0) return buffer.subarray(0, 0); + const leadByte = buffer[leadIndex]; + if (leadByte === undefined) return buffer.subarray(0, 0); + const continuationBytes = buffer.length - leadIndex - 1; + const expectedContinuationBytes = expectedUtf8ContinuationBytes(leadByte); + if (expectedContinuationBytes === null) return buffer.subarray(0, leadIndex); + if (continuationBytes < expectedContinuationBytes) return buffer.subarray(0, leadIndex); + return buffer; +} + +function expectedUtf8ContinuationBytes(byte: number): number | null { + if ((byte & 0x80) === 0) return 0; + if ((byte & 0xe0) === 0xc0) return 1; + if ((byte & 0xf0) === 0xe0) return 2; + if ((byte & 0xf8) === 0xf0) return 3; + return null; +} + +async function nearestExistingPath(filePath: string): Promise { + let current = filePath; + while (current !== path.dirname(current)) { + try { + await fs.stat(current); + return current; + } catch (error) { + if (!isMissingPathError(error)) throw error; + current = path.dirname(current); + } + } + return current; +} + +function isMissingPathError(error: unknown): boolean { + return error instanceof Error && "code" in error && error.code === "ENOENT"; +} diff --git a/src/mcp/server.ts b/src/mcp/server.ts index 7679e822..908b2487 100644 --- a/src/mcp/server.ts +++ b/src/mcp/server.ts @@ -1,13 +1,6 @@ import { randomUUID } from "node:crypto"; import fs from "node:fs/promises"; -import { - createServer, - type IncomingMessage, - type Server as HttpServer, - type ServerResponse, -} from "node:http"; -import type { AddressInfo } from "node:net"; -import os from "node:os"; +import { createServer, type IncomingMessage, type Server as HttpServer, type ServerResponse } from "node:http"; import path from "node:path"; import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; @@ -17,7 +10,6 @@ import { isInitializeRequest, ListToolsRequestSchema, type CallToolResult, - type Tool, } from "@modelcontextprotocol/sdk/types.js"; import type { Transport } from "@modelcontextprotocol/sdk/shared/transport.js"; import { z } from "zod"; @@ -31,9 +23,49 @@ import { getDependencies, getReverseDependencies, getShortestPath } from "../gra import { findReferences, goToDefinition } from "../indexer.js"; import { buildReviewReport, type ReviewDepth, type ReviewReport } from "../review.js"; import { queryGraphSqliteRaw, type RawSqlResult } from "../sqlite.js"; -import { assertFilePathWithinRoot, isFilePathWithinRoot, normalizePath, toProjectRelativePath } from "../util.js"; +import { normalizePath, toProjectRelativePath } from "../util.js"; import { createAgentSession } from "../agent/session.js"; import type { AgentSession } from "../agent/session.js"; +import { + assertMcpSqliteQueryResourceBounded, + boundRawSqlResult, + DEFAULT_SQLITE_BYTE_LIMIT, + normalizeSqliteRowLimit, +} from "./sqliteGuard.js"; +import { + DEFAULT_FILE_BYTES, + DEFAULT_MCP_COLLECTION_LIMIT, + listCodegraphMcpTools, + MAX_FILE_BYTES, + MAX_MCP_COLLECTION_LIMIT, + MCP_TOOLS, +} from "./tools.js"; +import { + buildAllowedHostHeaders, + closeHttpServer, + emptyAllowedHostHeaderRules, + formatHostForUrl, + getHeaderValue, + getHttpServerPort, + getRequestPath, + isAllowedHostHeader, + listenOnHttpServer, + readJsonRequestBody, + waitForHttpServerClose, + writeJsonResponse, + writeJsonRpcError, + type AllowedHostHeaderRules, +} from "./http.js"; + +export { listCodegraphMcpTools } from "./tools.js"; +import { + assertRealPathCandidateWithinRoot, + assertWritableDirectoryRealPathWithinRoot, + readFilePrefix, + resolveArtifactSqlitePathCandidate, + resolveProjectFile, + resolveReadableFile, +} from "./security.js"; export type CodegraphMcpServerOptions = { root: string; @@ -107,28 +139,8 @@ export type CodegraphMcpHandlers = { }) => Promise; }; -const DEFAULT_FILE_BYTES = 80_000; -const MAX_FILE_BYTES = 500_000; -const DEFAULT_SQLITE_ROW_LIMIT = 100; -const MAX_SQLITE_ROW_LIMIT = 500; -const DEFAULT_SQLITE_BYTE_LIMIT = 200_000; -const MAX_SQLITE_CELL_BYTES = 8_000; -const DEFAULT_MCP_COLLECTION_LIMIT = 100; -const MAX_MCP_COLLECTION_LIMIT = 500; const MCP_HTTP_PATH = "/mcp"; const MAX_MCP_HTTP_BODY_BYTES = 1_000_000; -const DISALLOWED_MCP_SQLITE_FUNCTIONS = new Set([ - "format", - "group_concat", - "hex", - "json_group_array", - "json_group_object", - "printf", - "quote", - "randomblob", - "string_agg", - "zeroblob", -]); export function createCodegraphMcpHandlers(options: CodegraphMcpServerOptions): CodegraphMcpHandlers { const root = path.resolve(options.root); @@ -221,12 +233,10 @@ export function createCodegraphMcpHandlers(options: CodegraphMcpServerOptions): snapshot.fileGraph, await resolveProjectFile(await realRoot, root, request.file), queryOptions, - ).map( - (dependency) => ({ - file: relative(dependency.file), - depth: dependency.depth, - }), - ); + ).map((dependency) => ({ + file: relative(dependency.file), + depth: dependency.depth, + })); return { dependencies }; }, @@ -277,11 +287,7 @@ export function createCodegraphMcpHandlers(options: CodegraphMcpServerOptions): if (!sqlitePath) { throw new Error("No SQLite artifact is available. Run artifact_build first or pass artifactPath."); } - const realSqlitePath = await assertRealPathCandidateWithinRoot( - await realRoot, - sqlitePath, - "SQLite artifact", - ); + const realSqlitePath = await assertRealPathCandidateWithinRoot(await realRoot, sqlitePath, "SQLite artifact"); assertMcpSqliteQueryResourceBounded(request.query); const result = await queryGraphSqliteRaw(realSqlitePath, request.query, request.params ?? [], { maxRows: normalizeSqliteRowLimit(request.limit), @@ -322,12 +328,15 @@ export function createCodegraphMcpHandlers(options: CodegraphMcpServerOptions): } function createCodegraphMcpProtocolServer(handlers: CodegraphMcpHandlers): McpServer { - const server = new McpServer({ - name: "codegraph", - version: "1.0.0", - }, { - capabilities: { tools: {} }, - }); + const server = new McpServer( + { + name: "codegraph", + version: "1.0.0", + }, + { + capabilities: { tools: {} }, + }, + ); server.server.setRequestHandler(ListToolsRequestSchema, () => ({ tools: MCP_TOOLS })); server.server.setRequestHandler(CallToolRequestSchema, async (request): Promise => { @@ -525,197 +534,6 @@ async function closeMcpSession(session: { await Promise.allSettled([session.transport.close(), session.server.close()]); } -function getRequestPath(request: IncomingMessage): string { - const rawUrl = request.url ?? "/"; - return new URL(rawUrl, "http://127.0.0.1").pathname; -} - -type ParsedJsonBody = - | { status: "ok"; body: unknown } - | { status: "too_large" } - | { status: "invalid_json" }; - -type AllowedHostHeaderRules = { - exact: Set; - loopbackOnly: Set; -}; - -async function readJsonRequestBody(request: IncomingMessage, maxBytes: number): Promise { - const contentLength = getContentLength(request); - if (contentLength !== undefined && contentLength > maxBytes) { - request.resume(); - return { status: "too_large" }; - } - - const chunks: Buffer[] = []; - let bytes = 0; - for await (const chunk of request) { - const buffer = typeof chunk === "string" ? Buffer.from(chunk) : chunk; - bytes += buffer.byteLength; - if (bytes > maxBytes) { - return { status: "too_large" }; - } - chunks.push(buffer); - } - - const rawBody = Buffer.concat(chunks).toString("utf8"); - try { - const body: unknown = rawBody.length ? JSON.parse(rawBody) : null; - return { status: "ok", body }; - } catch { - return { status: "invalid_json" }; - } -} - -function getHeaderValue(value: string | string[] | undefined): string | undefined { - if (Array.isArray(value)) { - return value[0]; - } - return value; -} - -function getContentLength(request: IncomingMessage): number | undefined { - const contentLength = getHeaderValue(request.headers["content-length"]); - if (contentLength === undefined) return undefined; - const parsedLength = Number(contentLength); - if (!Number.isFinite(parsedLength) || parsedLength < 0) return undefined; - return parsedLength; -} - -function emptyAllowedHostHeaderRules(): AllowedHostHeaderRules { - return { - exact: new Set(), - loopbackOnly: new Set(), - }; -} - -function isAllowedHostHeader(request: IncomingMessage, allowedHostHeaders: AllowedHostHeaderRules): boolean { - const host = getHeaderValue(request.headers.host); - if (host === undefined) return false; - const normalizedHost = host.toLowerCase(); - if (allowedHostHeaders.exact.has(normalizedHost)) return true; - return allowedHostHeaders.loopbackOnly.has(normalizedHost) && isLoopbackRemoteAddress(request.socket.remoteAddress); -} - -function buildAllowedHostHeaders(host: string, port: number): AllowedHostHeaderRules { - const allowed = emptyAllowedHostHeaderRules(); - allowed.exact.add(formatHostHeader(host, port).toLowerCase()); - if (isWildcardBindHost(host)) { - allowed.loopbackOnly.add(`127.0.0.1:${port}`); - allowed.loopbackOnly.add(`localhost:${port}`); - allowed.loopbackOnly.add(`[::1]:${port}`); - for (const localHost of localInterfaceHostHeaders(port)) { - allowed.exact.add(localHost); - } - } - if (host === "127.0.0.1") { - allowed.exact.add(`localhost:${port}`); - } - if (host === "::1" || host === "[::1]") { - allowed.exact.add(`[::1]:${port}`); - allowed.exact.add(`localhost:${port}`); - } - return allowed; -} - -function isWildcardBindHost(host: string): boolean { - return host === "0.0.0.0" || host === "::" || host === "[::]"; -} - -function isLoopbackRemoteAddress(address: string | undefined): boolean { - if (address === undefined) return false; - const normalized = address.toLowerCase(); - return normalized === "::1" || normalized === "::ffff:127.0.0.1" || normalized.startsWith("127."); -} - -function localInterfaceHostHeaders(port: number): Set { - const hosts = new Set(); - const hostname = os.hostname().trim().toLowerCase(); - if (hostname) { - hosts.add(`${hostname}:${port}`); - } - for (const interfaces of Object.values(os.networkInterfaces())) { - for (const entry of interfaces ?? []) { - if (entry.internal) continue; - const address = entry.address.split("%")[0] ?? entry.address; - hosts.add(formatHostHeader(address, port).toLowerCase()); - } - } - return hosts; -} - -function formatHostForUrl(host: string): string { - if (host.includes(":") && !host.startsWith("[")) { - return `[${host}]`; - } - return host; -} - -function formatHostHeader(host: string, port: number): string { - return `${formatHostForUrl(host)}:${port}`; -} - -function writeJsonRpcError( - response: ServerResponse, - statusCode: number, - message: string, - code = -32000, -): void { - writeJsonResponse(response, statusCode, { - jsonrpc: "2.0", - error: { code, message }, - id: null, - }); -} - -function writeJsonResponse(response: ServerResponse, statusCode: number, body: unknown): void { - response.writeHead(statusCode, { "content-type": "application/json" }); - response.end(JSON.stringify(body)); -} - -async function listenOnHttpServer(server: HttpServer, port: number, host: string): Promise { - await new Promise((resolve, reject) => { - const onError = (error: Error) => { - server.off("listening", onListening); - reject(error); - }; - const onListening = () => { - server.off("error", onError); - resolve(); - }; - server.once("error", onError); - server.once("listening", onListening); - server.listen(port, host); - }); -} - -function getHttpServerPort(address: string | AddressInfo | null): number { - if (address === null || typeof address === "string") { - throw new Error("HTTP server did not expose a TCP port."); - } - return address.port; -} - -async function closeHttpServer(server: HttpServer): Promise { - if (!server.listening) return; - await new Promise((resolve, reject) => { - server.close((error) => { - if (error) { - reject(error); - return; - } - resolve(); - }); - }); -} - -async function waitForHttpServerClose(server: HttpServer): Promise { - await new Promise((resolve, reject) => { - server.once("close", resolve); - server.once("error", reject); - }); -} - async function callMcpTool(handlers: CodegraphMcpHandlers, name: string, input: unknown): Promise { switch (name) { case "search": @@ -780,15 +598,6 @@ function toToolResult(value: unknown): CallToolResult { }; } -function resolveArtifactSqlitePathCandidate(root: string, artifactPath: string): string { - const resolved = path.isAbsolute(artifactPath) ? artifactPath : path.resolve(root, artifactPath); - const sqlitePath = - resolved.toLowerCase().endsWith(".sqlite") || resolved.toLowerCase().endsWith(".db") - ? resolved - : path.join(resolved, "codegraph.sqlite"); - return normalizePath(sqlitePath); -} - const searchSchema = z.object({ query: z.string(), mode: z.enum(["hybrid", "symbol", "path", "text", "graph", "sql"]).optional(), @@ -869,418 +678,3 @@ const artifactBuildSchema = z.object({ questions: z.boolean().optional(), force: z.boolean().optional(), }); - -function objectSchema(properties: Record, required: string[] = []): Tool["inputSchema"] { - return required.length ? { type: "object", properties, required } : { type: "object", properties }; -} - -const stringProperty = { type: "string" }; -const booleanProperty = { type: "boolean" }; - -const MCP_TOOLS: Tool[] = [ - { - name: "search", - description: "Deterministic ranked search across files, symbols, chunks, SQL objects, and graph context.", - inputSchema: objectSchema( - { - query: stringProperty, - mode: { type: "string", enum: ["hybrid", "symbol", "path", "text", "graph", "sql"] }, - from: stringProperty, - depth: { type: "integer", minimum: 0, default: 1, description: "Graph neighborhood depth." }, - limit: { type: "integer", minimum: 0, maximum: 100, default: 20 }, - }, - ["query"], - ), - }, - { - name: "get_file", - description: "Read a bounded project file by relative path.", - inputSchema: objectSchema( - { file: stringProperty, maxBytes: { type: "integer", minimum: 1, maximum: MAX_FILE_BYTES } }, - ["file"], - ), - }, - { - name: "get_symbol", - description: "Resolve a stable search or explain handle.", - inputSchema: objectSchema({ handle: stringProperty }, ["handle"]), - }, - { - name: "goto", - description: "Resolve the definition at a file position.", - inputSchema: objectSchema( - { file: stringProperty, line: { type: "integer", minimum: 1 }, column: { type: "integer", minimum: 0 } }, - ["file", "line", "column"], - ), - }, - { - name: "refs", - description: "Find references by stable handle or file position.", - inputSchema: { - type: "object", - properties: { - handle: stringProperty, - file: stringProperty, - line: { type: "integer", minimum: 1 }, - column: { type: "integer", minimum: 0 }, - limit: { - type: "integer", - minimum: 0, - maximum: MAX_MCP_COLLECTION_LIMIT, - default: DEFAULT_MCP_COLLECTION_LIMIT, - }, - }, - oneOf: [ - { - required: ["handle"], - not: { - anyOf: [{ required: ["file"] }, { required: ["line"] }, { required: ["column"] }], - }, - }, - { - required: ["file", "line", "column"], - not: { required: ["handle"] }, - }, - ], - }, - }, - { - name: "deps", - description: "List file dependencies.", - inputSchema: objectSchema( - { - file: stringProperty, - depth: { type: "integer", minimum: 0, default: 1 }, - limit: { - type: "integer", - minimum: 0, - maximum: MAX_MCP_COLLECTION_LIMIT, - default: DEFAULT_MCP_COLLECTION_LIMIT, - }, - }, - ["file"], - ), - }, - { - name: "rdeps", - description: "List reverse file dependencies.", - inputSchema: objectSchema( - { - file: stringProperty, - depth: { type: "integer", minimum: 0, default: 1 }, - limit: { - type: "integer", - minimum: 0, - maximum: MAX_MCP_COLLECTION_LIMIT, - default: DEFAULT_MCP_COLLECTION_LIMIT, - }, - }, - ["file"], - ), - }, - { - name: "path", - description: "Find the shortest dependency path between two files.", - inputSchema: objectSchema({ from: stringProperty, to: stringProperty }, ["from", "to"]), - }, - { - name: "impact", - description: "Build compact impact context for a git range.", - inputSchema: objectSchema({ base: stringProperty, head: stringProperty }, ["base", "head"]), - }, - { - name: "review", - description: "Build review context for a git range.", - inputSchema: objectSchema( - { - base: stringProperty, - head: stringProperty, - reviewDepth: { type: "string", enum: ["minimal", "standard", "deep"] }, - }, - ["base", "head"], - ), - }, - { - name: "query_sqlite", - description: "Run a bounded read-only SQL query against the graph SQLite artifact.", - inputSchema: objectSchema( - { - query: stringProperty, - params: { - type: "array", - items: { oneOf: [{ type: "string" }, { type: "number" }, { type: "null" }] }, - }, - limit: { type: "integer", minimum: 0, maximum: MAX_SQLITE_ROW_LIMIT, default: DEFAULT_SQLITE_ROW_LIMIT }, - }, - ["query"], - ), - }, - { - name: "artifact_build", - description: "Build Codegraph artifacts when write tools are explicitly enabled.", - inputSchema: objectSchema({ - outDir: stringProperty, - sqlite: booleanProperty, - graphJson: booleanProperty, - report: booleanProperty, - questions: booleanProperty, - force: booleanProperty, - }), - }, -]; - -export function listCodegraphMcpTools(): Tool[] { - return MCP_TOOLS.map((tool) => ({ ...tool })); -} - -function normalizeSqliteRowLimit(limit: number | undefined): number { - if (typeof limit !== "number" || !Number.isFinite(limit)) return DEFAULT_SQLITE_ROW_LIMIT; - return Math.min(MAX_SQLITE_ROW_LIMIT, Math.max(0, Math.floor(limit))); -} - -function assertMcpSqliteQueryResourceBounded(sql: string): void { - const searchableSql = stripSqlCommentsAndLiterals(sql).toLowerCase(); - if (/\bwith\s+recursive\b/.test(searchableSql)) { - throw new Error("MCP query_sqlite does not support recursive SQLite queries."); - } - const functionPattern = /\b([a-z_][a-z0-9_]*)\s*\(/gi; - for (const match of searchableSql.matchAll(functionPattern)) { - const functionName = match[1]; - if (functionName !== undefined && DISALLOWED_MCP_SQLITE_FUNCTIONS.has(functionName)) { - throw new Error(`MCP query_sqlite rejected unsupported SQLite function ${functionName}.`); - } - } - const quotedFunctionPattern = /(?:"((?:[^"]|"")*)"|`((?:[^`]|``)*)`|\[([^\]]*)\])\s*\(/g; - for (const match of searchableSql.matchAll(quotedFunctionPattern)) { - const functionName = (match[1] ?? match[2] ?? match[3] ?? "").replace(/""|``/g, (escaped) => escaped[0] ?? ""); - if (DISALLOWED_MCP_SQLITE_FUNCTIONS.has(functionName)) { - throw new Error(`MCP query_sqlite rejected unsupported SQLite function ${functionName}.`); - } - } -} - -function stripSqlCommentsAndLiterals(sql: string): string { - let output = ""; - let index = 0; - while (index < sql.length) { - const char = sql[index]; - const next = sql[index + 1]; - if (char === "-" && next === "-") { - index += 2; - while (index < sql.length && sql[index] !== "\n") { - output += " "; - index += 1; - } - continue; - } - if (char === "/" && next === "*") { - index += 2; - while (index < sql.length && !(sql[index] === "*" && sql[index + 1] === "/")) { - output += " "; - index += 1; - } - index = Math.min(sql.length, index + 2); - continue; - } - if (char === "'") { - output += " "; - index += 1; - while (index < sql.length) { - if (sql[index] === "'") { - if (sql[index + 1] === "'") { - output += " "; - index += 2; - continue; - } - index += 1; - break; - } - output += " "; - index += 1; - } - continue; - } - output += char; - index += 1; - } - return output; -} - -function boundRawSqlResult(result: RawSqlResult, byteLimit: number): RawSqlResult { - const rows: Array> = []; - let bytes = Buffer.byteLength(JSON.stringify({ columns: result.columns, rows: [] }), "utf8"); - let truncated = result.truncated ?? false; - - for (const rawRow of result.rows) { - if (rowContainsTruncatedValue(rawRow)) { - truncated = true; - } - const row = rawRow.map(normalizeSqliteValue); - const rowBytes = Buffer.byteLength(JSON.stringify(row), "utf8"); - if (bytes + rowBytes > byteLimit) { - truncated = true; - break; - } - rows.push(row); - bytes += rowBytes; - } - - return { - ...result, - rows, - byteLimit, - bytes, - truncated, - }; -} - -function rowContainsTruncatedValue(row: Array): boolean { - return row.some( - (value) => - (typeof value === "string" && Buffer.byteLength(value, "utf8") > MAX_SQLITE_CELL_BYTES) || - value instanceof Uint8Array, - ); -} - -function normalizeSqliteValue(value: unknown): unknown { - if (typeof value === "string") return truncateUtf8(value, MAX_SQLITE_CELL_BYTES); - if (typeof value === "bigint") return value.toString(); - if (value instanceof Uint8Array) return `<${value.byteLength} bytes>`; - return value; -} - -function truncateUtf8(value: string, maxBytes: number): string { - if (Buffer.byteLength(value, "utf8") <= maxBytes) return value; - let output = ""; - let bytes = 0; - for (const char of value) { - const charBytes = Buffer.byteLength(char, "utf8"); - if (bytes + charBytes > maxBytes) break; - output += char; - bytes += charBytes; - } - return `${output}...[truncated]`; -} - -async function resolveReadableFile( - realRoot: string, - root: string, - filePath: string, -): Promise<{ realPath: string; displayPath: string }> { - const candidatePath = path.isAbsolute(filePath) ? path.resolve(filePath) : path.resolve(root, filePath); - const realPath = await assertRealPathCandidateWithinRoot(realRoot, candidatePath, "File"); - const displayPath = - toProjectRelativePath(root, candidatePath) ?? toProjectRelativePath(realRoot, realPath) ?? normalizePath(realPath); - return { realPath, displayPath }; -} - -async function resolveProjectFile(realRoot: string, root: string, filePath: string): Promise { - const candidatePath = path.isAbsolute(filePath) ? path.resolve(filePath) : path.resolve(root, filePath); - const realPath = await assertRealPathCandidateWithinRoot(realRoot, candidatePath, "File"); - const lexicalRelativePath = toProjectRelativePath(root, candidatePath); - if (lexicalRelativePath) return normalizePath(candidatePath); - const realRelativePath = toProjectRelativePath(realRoot, realPath); - if (realRelativePath) return normalizePath(path.resolve(root, realRelativePath)); - throw new Error(`File is outside project root: ${normalizePath(realPath)} (root: ${normalizePath(realRoot)})`); -} - -async function readFilePrefix(filePath: string, maxBytes: number): Promise<{ text: string; truncated: boolean }> { - const handle = await fs.open(filePath, "r"); - try { - const readLimit = maxBytes + 1; - const buffer = Buffer.alloc(readLimit); - const { bytesRead } = await handle.read(buffer, 0, readLimit, 0); - const outputBytes = Math.min(bytesRead, maxBytes); - const outputBuffer = trimToUtf8Boundary(buffer.subarray(0, outputBytes)); - return { - text: outputBuffer.toString("utf8"), - truncated: bytesRead > maxBytes, - }; - } finally { - await handle.close(); - } -} - -function trimToUtf8Boundary(buffer: Buffer): Buffer { - if (!buffer.length) return buffer; - let leadIndex = buffer.length - 1; - while (leadIndex >= 0) { - const byte = buffer[leadIndex]; - if (byte === undefined || (byte & 0xc0) !== 0x80) break; - leadIndex -= 1; - } - if (leadIndex < 0) return buffer.subarray(0, 0); - const leadByte = buffer[leadIndex]; - if (leadByte === undefined) return buffer.subarray(0, 0); - const continuationBytes = buffer.length - leadIndex - 1; - const expectedContinuationBytes = expectedUtf8ContinuationBytes(leadByte); - if (expectedContinuationBytes === null) return buffer.subarray(0, leadIndex); - if (continuationBytes < expectedContinuationBytes) return buffer.subarray(0, leadIndex); - return buffer; -} - -function expectedUtf8ContinuationBytes(byte: number): number | null { - if ((byte & 0x80) === 0) return 0; - if ((byte & 0xe0) === 0xc0) return 1; - if ((byte & 0xf0) === 0xe0) return 2; - if ((byte & 0xf8) === 0xf0) return 3; - return null; -} - -async function assertRealPathCandidateWithinRoot( - realRoot: string, - filePath: string, - label: string, -): Promise { - const existingPath = await nearestExistingPath(filePath); - const realExistingPath = await fs.realpath(existingPath); - const relativeSuffix = path.relative(existingPath, filePath); - const realTargetPath = path.resolve(realExistingPath, relativeSuffix); - if (!isFilePathWithinRoot(realRoot, realTargetPath)) { - throw new Error( - `${label} is outside project root: ${normalizePath(realTargetPath)} (root: ${normalizePath(realRoot)})`, - ); - } - const finalRealPath = normalizePath(await fs.realpath(filePath)); - if (!isFilePathWithinRoot(realRoot, finalRealPath)) { - throw new Error(`${label} is outside project root: ${finalRealPath} (root: ${normalizePath(realRoot)})`); - } - return finalRealPath; -} - -async function assertWritableDirectoryRealPathWithinRoot( - realRoot: string, - root: string, - requestedPath: string, - label: string, -): Promise { - const lexicalPath = path.isAbsolute(requestedPath) ? requestedPath : path.resolve(root, requestedPath); - const existingPath = await nearestExistingPath(lexicalPath); - const realExistingPath = await fs.realpath(existingPath); - const relativeSuffix = path.relative(existingPath, lexicalPath); - const realTargetPath = path.resolve(realExistingPath, relativeSuffix); - if (!isFilePathWithinRoot(realRoot, realTargetPath)) { - throw new Error( - `${label} is outside project root: ${normalizePath(realTargetPath)} (root: ${normalizePath(realRoot)})`, - ); - } - return normalizePath(realTargetPath); -} - -async function nearestExistingPath(filePath: string): Promise { - let current = filePath; - while (current !== path.dirname(current)) { - try { - await fs.stat(current); - return current; - } catch (error) { - if (!isMissingPathError(error)) throw error; - current = path.dirname(current); - } - } - return current; -} - -function isMissingPathError(error: unknown): boolean { - return error instanceof Error && "code" in error && error.code === "ENOENT"; -} diff --git a/src/mcp/sqliteGuard.ts b/src/mcp/sqliteGuard.ts new file mode 100644 index 00000000..880958d9 --- /dev/null +++ b/src/mcp/sqliteGuard.ts @@ -0,0 +1,102 @@ +import { maskSqlStringsAndComments } from "../sql/lex.js"; +import type { RawSqlResult } from "../sqlite.js"; + +export const DEFAULT_SQLITE_ROW_LIMIT = 100; +export const MAX_SQLITE_ROW_LIMIT = 500; +export const DEFAULT_SQLITE_BYTE_LIMIT = 200_000; +const MAX_SQLITE_CELL_BYTES = 8_000; + +const DISALLOWED_MCP_SQLITE_FUNCTIONS = new Set([ + "format", + "group_concat", + "hex", + "json_group_array", + "json_group_object", + "printf", + "quote", + "randomblob", + "string_agg", + "zeroblob", +]); + +export function normalizeSqliteRowLimit(limit: number | undefined): number { + if (typeof limit !== "number" || !Number.isFinite(limit)) return DEFAULT_SQLITE_ROW_LIMIT; + return Math.min(MAX_SQLITE_ROW_LIMIT, Math.max(0, Math.floor(limit))); +} + +export function assertMcpSqliteQueryResourceBounded(sql: string): void { + const searchableSql = maskSqlStringsAndComments(sql).toLowerCase(); + if (/\bwith\s+recursive\b/.test(searchableSql)) { + throw new Error("MCP query_sqlite does not support recursive SQLite queries."); + } + const functionPattern = /\b([a-z_][a-z0-9_]*)\s*\(/gi; + for (const match of searchableSql.matchAll(functionPattern)) { + const functionName = match[1]; + if (functionName !== undefined && DISALLOWED_MCP_SQLITE_FUNCTIONS.has(functionName)) { + throw new Error(`MCP query_sqlite rejected unsupported SQLite function ${functionName}.`); + } + } + const quotedFunctionPattern = /(?:"((?:[^"]|"")*)"|`((?:[^`]|``)*)`|\[([^\]]*)\])\s*\(/g; + for (const match of searchableSql.matchAll(quotedFunctionPattern)) { + const functionName = (match[1] ?? match[2] ?? match[3] ?? "").replace(/""|``/g, (escaped) => escaped[0] ?? ""); + if (DISALLOWED_MCP_SQLITE_FUNCTIONS.has(functionName)) { + throw new Error(`MCP query_sqlite rejected unsupported SQLite function ${functionName}.`); + } + } +} + +export function boundRawSqlResult(result: RawSqlResult, byteLimit: number): RawSqlResult { + const rows: Array> = []; + let bytes = Buffer.byteLength(JSON.stringify({ columns: result.columns, rows: [] }), "utf8"); + let truncated = result.truncated ?? false; + + for (const rawRow of result.rows) { + if (rowContainsTruncatedValue(rawRow)) { + truncated = true; + } + const row = rawRow.map(normalizeSqliteValue); + const rowBytes = Buffer.byteLength(JSON.stringify(row), "utf8"); + if (bytes + rowBytes > byteLimit) { + truncated = true; + break; + } + rows.push(row); + bytes += rowBytes; + } + + return { + ...result, + rows, + byteLimit, + bytes, + truncated, + }; +} + +function rowContainsTruncatedValue(row: Array): boolean { + return row.some( + (value) => + (typeof value === "string" && Buffer.byteLength(value, "utf8") > MAX_SQLITE_CELL_BYTES) || + value instanceof Uint8Array, + ); +} + +function normalizeSqliteValue(value: unknown): unknown { + if (typeof value === "string") return truncateUtf8(value, MAX_SQLITE_CELL_BYTES); + if (typeof value === "bigint") return value.toString(); + if (value instanceof Uint8Array) return `<${value.byteLength} bytes>`; + return value; +} + +function truncateUtf8(value: string, maxBytes: number): string { + if (Buffer.byteLength(value, "utf8") <= maxBytes) return value; + let output = ""; + let bytes = 0; + for (const char of value) { + const charBytes = Buffer.byteLength(char, "utf8"); + if (bytes + charBytes > maxBytes) break; + output += char; + bytes += charBytes; + } + return `${output}...[truncated]`; +} diff --git a/src/mcp/tools.ts b/src/mcp/tools.ts new file mode 100644 index 00000000..3c1cbf04 --- /dev/null +++ b/src/mcp/tools.ts @@ -0,0 +1,171 @@ +import type { Tool } from "@modelcontextprotocol/sdk/types.js"; + +import { DEFAULT_SQLITE_ROW_LIMIT, MAX_SQLITE_ROW_LIMIT } from "./sqliteGuard.js"; + +export const DEFAULT_FILE_BYTES = 80_000; +export const MAX_FILE_BYTES = 500_000; +export const DEFAULT_MCP_COLLECTION_LIMIT = 100; +export const MAX_MCP_COLLECTION_LIMIT = 500; + +function objectSchema(properties: Record, required: string[] = []): Tool["inputSchema"] { + return required.length ? { type: "object", properties, required } : { type: "object", properties }; +} + +const stringProperty = { type: "string" }; +const booleanProperty = { type: "boolean" }; + +export const MCP_TOOLS: Tool[] = [ + { + name: "search", + description: "Deterministic ranked search across files, symbols, chunks, SQL objects, and graph context.", + inputSchema: objectSchema( + { + query: stringProperty, + mode: { type: "string", enum: ["hybrid", "symbol", "path", "text", "graph", "sql"] }, + from: stringProperty, + depth: { type: "integer", minimum: 0, default: 1, description: "Graph neighborhood depth." }, + limit: { type: "integer", minimum: 0, maximum: 100, default: 20 }, + }, + ["query"], + ), + }, + { + name: "get_file", + description: "Read a bounded project file by relative path.", + inputSchema: objectSchema( + { file: stringProperty, maxBytes: { type: "integer", minimum: 1, maximum: MAX_FILE_BYTES } }, + ["file"], + ), + }, + { + name: "get_symbol", + description: "Resolve a stable search or explain handle.", + inputSchema: objectSchema({ handle: stringProperty }, ["handle"]), + }, + { + name: "goto", + description: "Resolve the definition at a file position.", + inputSchema: objectSchema( + { file: stringProperty, line: { type: "integer", minimum: 1 }, column: { type: "integer", minimum: 0 } }, + ["file", "line", "column"], + ), + }, + { + name: "refs", + description: "Find references by stable handle or file position.", + inputSchema: { + type: "object", + properties: { + handle: stringProperty, + file: stringProperty, + line: { type: "integer", minimum: 1 }, + column: { type: "integer", minimum: 0 }, + limit: { + type: "integer", + minimum: 0, + maximum: MAX_MCP_COLLECTION_LIMIT, + default: DEFAULT_MCP_COLLECTION_LIMIT, + }, + }, + oneOf: [ + { + required: ["handle"], + not: { + anyOf: [{ required: ["file"] }, { required: ["line"] }, { required: ["column"] }], + }, + }, + { + required: ["file", "line", "column"], + not: { required: ["handle"] }, + }, + ], + }, + }, + { + name: "deps", + description: "List file dependencies.", + inputSchema: objectSchema( + { + file: stringProperty, + depth: { type: "integer", minimum: 0, default: 1 }, + limit: { + type: "integer", + minimum: 0, + maximum: MAX_MCP_COLLECTION_LIMIT, + default: DEFAULT_MCP_COLLECTION_LIMIT, + }, + }, + ["file"], + ), + }, + { + name: "rdeps", + description: "List reverse file dependencies.", + inputSchema: objectSchema( + { + file: stringProperty, + depth: { type: "integer", minimum: 0, default: 1 }, + limit: { + type: "integer", + minimum: 0, + maximum: MAX_MCP_COLLECTION_LIMIT, + default: DEFAULT_MCP_COLLECTION_LIMIT, + }, + }, + ["file"], + ), + }, + { + name: "path", + description: "Find the shortest dependency path between two files.", + inputSchema: objectSchema({ from: stringProperty, to: stringProperty }, ["from", "to"]), + }, + { + name: "impact", + description: "Build compact impact context for a git range.", + inputSchema: objectSchema({ base: stringProperty, head: stringProperty }, ["base", "head"]), + }, + { + name: "review", + description: "Build review context for a git range.", + inputSchema: objectSchema( + { + base: stringProperty, + head: stringProperty, + reviewDepth: { type: "string", enum: ["minimal", "standard", "deep"] }, + }, + ["base", "head"], + ), + }, + { + name: "query_sqlite", + description: "Run a bounded read-only SQL query against the graph SQLite artifact.", + inputSchema: objectSchema( + { + query: stringProperty, + params: { + type: "array", + items: { oneOf: [{ type: "string" }, { type: "number" }, { type: "null" }] }, + }, + limit: { type: "integer", minimum: 0, maximum: MAX_SQLITE_ROW_LIMIT, default: DEFAULT_SQLITE_ROW_LIMIT }, + }, + ["query"], + ), + }, + { + name: "artifact_build", + description: "Build Codegraph artifacts when write tools are explicitly enabled.", + inputSchema: objectSchema({ + outDir: stringProperty, + sqlite: booleanProperty, + graphJson: booleanProperty, + report: booleanProperty, + questions: booleanProperty, + force: booleanProperty, + }), + }, +]; + +export function listCodegraphMcpTools(): Tool[] { + return MCP_TOOLS.map((tool) => ({ ...tool })); +} diff --git a/src/native/contracts.ts b/src/native/contracts.ts new file mode 100644 index 00000000..bf31a878 --- /dev/null +++ b/src/native/contracts.ts @@ -0,0 +1,117 @@ +export type NativePoint = { + row: number; + column: number; + index: number; +}; + +export type NativeCapture = { + name: string; + text: string; + nodeType: string; + start: NativePoint; + end: NativePoint; +}; + +export type NativeMatch = { + patternIndex: number; + captures: NativeCapture[]; +}; + +export type NativeQueryResults = { + imports: NativeMatch[]; + exports: NativeMatch[]; + locals: NativeMatch[]; + importBindings: NativeMatch[]; +}; + +export type NativeSyntaxNode = { + id: number; + parentId: number; + nodeType: string; + named: boolean; + start: NativePoint; + end: NativePoint; + childIds: number[]; + namedChildIds: number[]; + childFieldNames: string[]; +}; + +export type NativeSyntaxTree = { + rootId: number; + nodes: NativeSyntaxNode[]; +}; + +export type CompactCapture = { + name: string; + text: string; +}; + +export type CompactMatch = { + patternIndex: number; + captures: CompactCapture[]; +}; + +export type CompactQueryResults = { + imports: CompactMatch[]; +}; + +export type NativeFallbackReason = "unavailable" | "unsupportedLanguage" | "queryFailure"; + +export type NativeQueryExecution = { + results: NativeQueryResults | null; + fallbackReason?: NativeFallbackReason; + error?: string; +}; + +export type CompactImportsExecution = { + results: CompactQueryResults | null; + fallbackReason?: NativeFallbackReason; + error?: string; +}; + +export type NativeSingleQueryExecution = { + matches: NativeMatch[] | null; + fallbackReason?: NativeFallbackReason; + error?: string; +}; + +export type UnifiedQueryExecution = { + matches: NativeMatch[] | null; + backend: "native" | "js"; + fallbackReason?: NativeFallbackReason; + error?: string; +}; + +export type NativeSyntaxTreeExecution = { + tree: NativeSyntaxTree | null; + fallbackReason?: NativeFallbackReason; + error?: string; +}; + +export type NativeRuntimeMode = "auto" | "on" | "off"; + +/** + * Controls which query kinds are executed in a native call. + * - "imports": only run the imports query (used by graph mode) + * - "full": run all query kinds (used by full indexing) + */ +export type NativeQueryScope = "imports" | "full"; + +export type NativeBinding = { + runLanguageQueries: ( + source: string, + languageId: string, + importsQuery: string, + exportsQuery: string, + localsQuery: string, + importBindingsQuery: string, + ) => NativeQueryResults; + runImportsQueryCompact?: (source: string, languageId: string, importsQuery: string) => CompactQueryResults; + runQuery?: (source: string, languageId: string, queryText: string) => { matches: NativeMatch[] }; + parseSyntaxTree?: (source: string, languageId: string) => NativeSyntaxTree; + supportedLanguageIds: () => string[]; +}; + +export type NativeBindingState = + | { loaded: true; binding: NativeBinding; supportedLanguageIds: Set } + | { loaded: false; error?: unknown }; diff --git a/src/native/execution.ts b/src/native/execution.ts new file mode 100644 index 00000000..9823c47a --- /dev/null +++ b/src/native/execution.ts @@ -0,0 +1,224 @@ +import type { LanguageSupport } from "../languages.js"; +import { stringifyUnknown } from "../util.js"; +import type { + CompactImportsExecution, + NativeBindingState, + NativeQueryExecution, + NativeQueryResults, + NativeQueryScope, + NativeRuntimeMode, + NativeSingleQueryExecution, + NativeSyntaxTreeExecution, +} from "./contracts.js"; +import { getCachedNormalizedQuery, normalizeNativeQueryForSupport } from "./queries.js"; +import { loadBinding, resolveNativeBindingState, throwIfNativeRequiredUnavailable } from "./runtime.js"; + +export function runNativeLanguageQueries( + source: string, + support: LanguageSupport, + mode?: NativeRuntimeMode, +): NativeQueryResults | null { + return getNativeQueryExecution(source, support, mode).results; +} + +export function getNativeQueryExecutionForState( + source: string, + support: LanguageSupport, + state: NativeBindingState = loadBinding(), + scope: NativeQueryScope = "full", +): NativeQueryExecution { + if (!state.loaded) { + return unavailableQueryExecution(state); + } + if (!state.supportedLanguageIds.has(support.id)) { + return { results: null, fallbackReason: "unsupportedLanguage" }; + } + const importsOnly = scope === "imports"; + try { + return { + results: state.binding.runLanguageQueries( + source, + support.id, + getCachedNormalizedQuery(support, "imports"), + importsOnly ? "" : getCachedNormalizedQuery(support, "exports"), + importsOnly ? "" : getCachedNormalizedQuery(support, "locals"), + importsOnly ? "" : getCachedNormalizedQuery(support, "importBindings"), + ), + }; + } catch (error) { + return { + results: null, + fallbackReason: "queryFailure", + error: error instanceof Error ? error.message : String(error), + }; + } +} + +export function getNativeQueryExecution( + source: string, + support: LanguageSupport, + mode?: NativeRuntimeMode, + scope: NativeQueryScope = "full", +): NativeQueryExecution { + const state = resolveNativeBindingState(mode); + throwIfNativeRequiredUnavailable(mode, state); + return getNativeQueryExecutionForState(source, support, state, scope); +} + +/** + * Run only the imports query with a compact payload (name + text only). + * Falls back to the full execution path if the compact entrypoint is not + * available in the native binding. + */ +export function getCompactImportsExecution( + source: string, + support: LanguageSupport, + mode?: NativeRuntimeMode, +): CompactImportsExecution { + const state = resolveNativeBindingState(mode); + throwIfNativeRequiredUnavailable(mode, state); + if (!state.loaded) { + return unavailableCompactExecution(state); + } + if (!state.supportedLanguageIds.has(support.id)) { + return { results: null, fallbackReason: "unsupportedLanguage" }; + } + const importsQuery = getCachedNormalizedQuery(support, "imports"); + try { + if (state.binding.runImportsQueryCompact) { + return { + results: state.binding.runImportsQueryCompact(source, support.id, importsQuery), + }; + } + const full = getNativeQueryExecutionForState(source, support, state, "imports"); + if (!full.results) return full; + return { + results: { + imports: full.results.imports.map((match) => ({ + patternIndex: match.patternIndex, + captures: match.captures.map((capture) => ({ name: capture.name, text: capture.text })), + })), + }, + }; + } catch (error) { + return { + results: null, + fallbackReason: "queryFailure", + error: error instanceof Error ? error.message : String(error), + }; + } +} + +export function getNativeSingleQueryExecution( + source: string, + support: LanguageSupport, + queryText: string, + mode?: NativeRuntimeMode, +): NativeSingleQueryExecution { + const state = resolveNativeBindingState(mode); + throwIfNativeRequiredUnavailable(mode, state); + if (!state.loaded) { + return unavailableSingleQueryExecution(state); + } + if (!state.supportedLanguageIds.has(support.id)) { + return { matches: null, fallbackReason: "unsupportedLanguage" }; + } + if (!state.binding.runQuery) { + return { + matches: null, + fallbackReason: "unavailable", + error: "native binding does not expose runQuery", + }; + } + const normalizedQuery = normalizeNativeQueryForSupport(support, "adHoc", queryText); + try { + return { + matches: state.binding.runQuery(source, support.id, normalizedQuery).matches, + }; + } catch (error) { + return { + matches: null, + fallbackReason: "queryFailure", + error: error instanceof Error ? error.message : String(error), + }; + } +} + +export function getNativeSyntaxTreeExecution( + source: string, + support: LanguageSupport, + mode?: NativeRuntimeMode, +): NativeSyntaxTreeExecution { + const state = resolveNativeBindingState(mode); + throwIfNativeRequiredUnavailable(mode, state); + if (!state.loaded) { + return unavailableSyntaxTreeExecution(state); + } + if (!state.supportedLanguageIds.has(support.id)) { + return { tree: null, fallbackReason: "unsupportedLanguage" }; + } + if (!state.binding.parseSyntaxTree) { + return { + tree: null, + fallbackReason: "unavailable", + error: "native binding does not expose parseSyntaxTree", + }; + } + try { + return { + tree: state.binding.parseSyntaxTree(source, support.id), + }; + } catch (error) { + return { + tree: null, + fallbackReason: "queryFailure", + error: error instanceof Error ? error.message : String(error), + }; + } +} + +function unavailableQueryExecution(state: Extract): NativeQueryExecution { + return { + results: null, + fallbackReason: "unavailable", + ...nativeError(state), + }; +} + +function unavailableCompactExecution(state: Extract): CompactImportsExecution { + return { + results: null, + fallbackReason: "unavailable", + ...nativeError(state), + }; +} + +function unavailableSingleQueryExecution( + state: Extract, +): NativeSingleQueryExecution { + return { + matches: null, + fallbackReason: "unavailable", + ...nativeError(state), + }; +} + +function unavailableSyntaxTreeExecution( + state: Extract, +): NativeSyntaxTreeExecution { + return { + tree: null, + fallbackReason: "unavailable", + ...nativeError(state), + }; +} + +function nativeError(state: Extract): { error?: string } { + if (!state.error) { + return {}; + } + if (state.error instanceof Error) { + return { error: state.error.message }; + } + return { error: stringifyUnknown(state.error) }; +} diff --git a/src/native/jsBridge.ts b/src/native/jsBridge.ts new file mode 100644 index 00000000..6800a9e7 --- /dev/null +++ b/src/native/jsBridge.ts @@ -0,0 +1,74 @@ +import { + executeJsQueryAsNativeMatches as executeJsQueryAsNativeMatchesViaPackage, + type JsLanguage, + type JsNativeMatch, + type JsSyntaxTree, +} from "../jsFallback.js"; +import type { LanguageSupport } from "../languages.js"; +import type { NativeMatch, NativeRuntimeMode, UnifiedQueryExecution } from "./contracts.js"; +import { getNativeSingleQueryExecution } from "./execution.js"; +import { isNativeBindingLoadedForLanguage } from "./runtime.js"; + +const NATIVE_ONLY_JS_FAMILY_LANGUAGE_IDS = new Set(["js", "ts", "tsx"]); + +export function shouldAvoidJsFallbackForLanguage(languageId: string): boolean { + return NATIVE_ONLY_JS_FAMILY_LANGUAGE_IDS.has(languageId); +} + +export function executeJsQueryAsNativeMatches( + source: string, + support: LanguageSupport, + lang: JsLanguage, + queryText: string, + tree?: JsSyntaxTree, +): NativeMatch[] { + return executeJsQueryAsNativeMatchesViaPackage(source, lang, queryText, tree) as NativeMatch[] & JsNativeMatch[]; +} + +export function getUnifiedQueryExecution( + source: string, + support: LanguageSupport, + queryText: string, + opts?: { + tree?: JsSyntaxTree; + mode?: NativeRuntimeMode; + lang?: JsLanguage; + getLanguage?: () => JsLanguage; + }, +): UnifiedQueryExecution { + const nativeExecution = getNativeSingleQueryExecution(source, support, queryText, opts?.mode); + if (nativeExecution.matches) { + return { + matches: nativeExecution.matches, + backend: "native", + }; + } + if (shouldAvoidJsFallbackForLanguage(support.id) && isNativeBindingLoadedForLanguage(support.id, opts?.mode)) { + return { + matches: null, + backend: "native", + ...(nativeExecution.fallbackReason ? { fallbackReason: nativeExecution.fallbackReason } : {}), + ...(nativeExecution.error ? { error: nativeExecution.error } : {}), + }; + } + try { + const resolvedLang = opts?.lang ?? opts?.getLanguage?.(); + if (!resolvedLang) { + throw new Error("JS query fallback requires a language"); + } + const matches = executeJsQueryAsNativeMatches(source, support, resolvedLang, queryText, opts?.tree); + return { + matches, + backend: "js", + ...(nativeExecution.fallbackReason ? { fallbackReason: nativeExecution.fallbackReason } : {}), + ...(nativeExecution.error ? { error: nativeExecution.error } : {}), + }; + } catch (error) { + return { + matches: null, + backend: "js", + fallbackReason: nativeExecution.fallbackReason ?? "queryFailure", + error: error instanceof Error ? error.message : String(error), + }; + } +} diff --git a/src/native/queries.ts b/src/native/queries.ts new file mode 100644 index 00000000..c015ea7d --- /dev/null +++ b/src/native/queries.ts @@ -0,0 +1,88 @@ +import type { LanguageSupport } from "../languages.js"; +import type { NativeCompatibilityQueryKind, NativeQueryKind } from "../languages/types.js"; + +export const NATIVE_QUERY_KINDS: NativeQueryKind[] = ["imports", "exports", "locals", "importBindings"]; + +/** + * Per-language cache of normalized query text and modification status. + * Normalization is constant for a given (support.id, queryKind) pair, + * so we compute it once per language per kind. + */ +const normalizedQueryCache = new Map>(); + +export function normalizeNativeQueryForSupport( + support: LanguageSupport, + kind: NativeCompatibilityQueryKind, + queryText: string, +): string { + return support.native?.normalizeQuery?.(kind, queryText) ?? queryText; +} + +function getOrComputeNormalizedEntry( + support: LanguageSupport, + kind: NativeQueryKind, +): { text: string; wasModified: boolean } { + let byKind = normalizedQueryCache.get(support.id); + if (!byKind) { + byKind = new Map(); + normalizedQueryCache.set(support.id, byKind); + } + let entry = byKind.get(kind); + if (!entry) { + const original = support.queries[kind]; + const normalized = normalizeNativeQueryForSupport(support, kind, original); + entry = { text: normalized, wasModified: normalized !== original }; + byKind.set(kind, entry); + } + return entry; +} + +/** + * Returns the normalized query text for the support's own query. + * Cached per (support.id, kind) to avoid re-running regex normalization + * on every file. + */ +export function getCachedNormalizedQuery(support: LanguageSupport, kind: NativeQueryKind): string { + return getOrComputeNormalizedEntry(support, kind).text; +} + +/** + * Returns true when the native query for this (support, kind) differs from + * the original JS query - meaning the language has grammar divergence and + * empty native results should NOT be treated as authoritative. + */ +export function isNativeQueryModified(support: LanguageSupport, kind: NativeQueryKind): boolean { + return getOrComputeNormalizedEntry(support, kind).wasModified; +} + +export function isNativeQueryAuthoritative(support: LanguageSupport, kind: NativeQueryKind): boolean { + if (!isNativeQueryModified(support, kind)) { + return true; + } + return support.native?.authoritativeKinds?.includes(kind) ?? false; +} + +export function getNativeQueryMetadataForSupport(support: LanguageSupport): { + normalizedQueryKinds: NativeQueryKind[]; + skippedQueryKinds: NativeQueryKind[]; +} { + const normalizedQueryKinds: NativeQueryKind[] = []; + const skippedQueryKinds: NativeQueryKind[] = []; + + for (const kind of NATIVE_QUERY_KINDS) { + if (!isNativeQueryModified(support, kind)) { + continue; + } + normalizedQueryKinds.push(kind); + const originalQuery = support.queries[kind]; + const normalized = normalizeNativeQueryForSupport(support, kind, originalQuery); + if (originalQuery.trim().length && !normalized.trim().length) { + skippedQueryKinds.push(kind); + } + } + + return { + normalizedQueryKinds, + skippedQueryKinds, + }; +} diff --git a/src/native/runtime.ts b/src/native/runtime.ts new file mode 100644 index 00000000..af13680e --- /dev/null +++ b/src/native/runtime.ts @@ -0,0 +1,102 @@ +import path from "node:path"; +import { createRequire } from "node:module"; +import { fileURLToPath } from "node:url"; +import { stringifyUnknown } from "../util.js"; +import { loadNativeBinding } from "./bindingLoader.js"; +import type { NativeBinding, NativeBindingState, NativeRuntimeMode } from "./contracts.js"; + +const require = createRequire(import.meta.url); +const localNativePackageRoot = path.resolve( + path.dirname(fileURLToPath(import.meta.url)), + "../../packages/codegraph-native", +); + +const NATIVE_REQUIRED_ERROR_PREFIX = "native tree-sitter required by explicit option but unavailable"; + +let bindingState: NativeBindingState | undefined; + +export function __resetNativeTreeSitterBindingForTests(): void { + bindingState = undefined; +} + +export function isNativeTreeSitterDisabledByEnv(env: NodeJS.ProcessEnv = process.env): boolean { + const rawValue = env.CODEGRAPH_DISABLE_NATIVE; + if (typeof rawValue !== "string") { + return false; + } + const normalized = rawValue.trim().toLowerCase(); + return normalized === "1" || normalized === "true" || normalized === "yes"; +} + +export function normalizeNativeRuntimeMode(mode?: NativeRuntimeMode): NativeRuntimeMode { + return mode ?? "auto"; +} + +export function loadBinding(): NativeBindingState { + if (bindingState) return bindingState; + const loaded = loadNativeBinding({ + packageName: "@lzehrung/codegraph-native", + localPackageRoot: localNativePackageRoot, + requireFn: require, + resolveFn: require.resolve, + }); + if (loaded.binding) { + bindingState = { + loaded: true, + binding: loaded.binding, + supportedLanguageIds: new Set(loaded.binding.supportedLanguageIds()), + }; + return bindingState; + } + bindingState = { loaded: false, error: loaded.error }; + return bindingState; +} + +export function resolveNativeBindingState( + mode?: NativeRuntimeMode, + env: NodeJS.ProcessEnv = process.env, +): NativeBindingState { + const normalizedMode = normalizeNativeRuntimeMode(mode); + if (normalizedMode === "off") { + return { + loaded: false, + error: new Error("native tree-sitter disabled by explicit option"), + }; + } + if (normalizedMode === "auto" && isNativeTreeSitterDisabledByEnv(env)) { + return { + loaded: false, + error: new Error("native tree-sitter disabled by CODEGRAPH_DISABLE_NATIVE"), + }; + } + return loadBinding(); +} + +export function isNativeTreeSitterAvailable(mode?: NativeRuntimeMode): boolean { + return resolveNativeBindingState(mode).loaded; +} + +export function getNativeTreeSitterLoadError(mode?: NativeRuntimeMode): unknown { + const state = resolveNativeBindingState(mode); + return state.loaded ? undefined : state.error; +} + +export function getNativeTreeSitterSupportedLanguageIds(mode?: NativeRuntimeMode): string[] { + const state = resolveNativeBindingState(mode); + return state.loaded ? Array.from(state.supportedLanguageIds).sort() : []; +} + +export function isNativeRequiredUnavailableError(error: unknown): boolean { + return error instanceof Error && error.message.startsWith(NATIVE_REQUIRED_ERROR_PREFIX); +} + +export function throwIfNativeRequiredUnavailable(mode: NativeRuntimeMode | undefined, state: NativeBindingState): void { + if (normalizeNativeRuntimeMode(mode) !== "on" || state.loaded) return; + const suffix = state.error ? `: ${stringifyUnknown(state.error)}` : ""; + throw new Error(`${NATIVE_REQUIRED_ERROR_PREFIX}${suffix}`); +} + +export function isNativeBindingLoadedForLanguage(languageId: string, mode?: NativeRuntimeMode): boolean { + const state = resolveNativeBindingState(mode); + return state.loaded && state.supportedLanguageIds.has(languageId); +} diff --git a/src/native/treeSitterNative.ts b/src/native/treeSitterNative.ts index 04b9285e..80af6bb2 100644 --- a/src/native/treeSitterNative.ts +++ b/src/native/treeSitterNative.ts @@ -1,574 +1,54 @@ -import path from "node:path"; -import { fileURLToPath } from "node:url"; -import { createRequire } from "node:module"; -import { - executeJsQueryAsNativeMatches as executeJsQueryAsNativeMatchesViaPackage, - type JsLanguage, - type JsNativeMatch, - type JsSyntaxTree, -} from "../jsFallback.js"; -import type { LanguageSupport } from "../languages.js"; -import type { NativeCompatibilityQueryKind, NativeQueryKind } from "../languages/types.js"; -import { stringifyUnknown } from "../util.js"; -import { loadNativeBinding } from "./bindingLoader.js"; - -export const NATIVE_QUERY_KINDS: NativeQueryKind[] = ["imports", "exports", "locals", "importBindings"]; - -export type NativePoint = { - row: number; - column: number; - index: number; -}; - -export type NativeCapture = { - name: string; - text: string; - nodeType: string; - start: NativePoint; - end: NativePoint; -}; - -export type NativeMatch = { - patternIndex: number; - captures: NativeCapture[]; -}; - -export type NativeQueryResults = { - imports: NativeMatch[]; - exports: NativeMatch[]; - locals: NativeMatch[]; - importBindings: NativeMatch[]; -}; - -export type NativeSyntaxNode = { - id: number; - parentId: number; - nodeType: string; - named: boolean; - start: NativePoint; - end: NativePoint; - childIds: number[]; - namedChildIds: number[]; - childFieldNames: string[]; -}; - -export type NativeSyntaxTree = { - rootId: number; - nodes: NativeSyntaxNode[]; -}; - -export type CompactCapture = { - name: string; - text: string; -}; - -export type CompactMatch = { - patternIndex: number; - captures: CompactCapture[]; -}; - -export type CompactQueryResults = { - imports: CompactMatch[]; -}; - -export type NativeQueryExecution = { - results: NativeQueryResults | null; - fallbackReason?: "unavailable" | "unsupportedLanguage" | "queryFailure"; - error?: string; -}; - -export type NativeRuntimeMode = "auto" | "on" | "off"; - -/** - * Controls which query kinds are executed in a native call. - * - "imports": only run the imports query (used by graph mode) - * - "full": run all query kinds (used by full indexing) - */ -export type NativeQueryScope = "imports" | "full"; - -type NativeBinding = { - runLanguageQueries: ( - source: string, - languageId: string, - importsQuery: string, - exportsQuery: string, - localsQuery: string, - importBindingsQuery: string, - ) => NativeQueryResults; - runImportsQueryCompact?: (source: string, languageId: string, importsQuery: string) => CompactQueryResults; - runQuery?: (source: string, languageId: string, queryText: string) => { matches: NativeMatch[] }; - parseSyntaxTree?: (source: string, languageId: string) => NativeSyntaxTree; - supportedLanguageIds: () => string[]; -}; - -const require = createRequire(import.meta.url); -const localNativePackageRoot = path.resolve( - path.dirname(fileURLToPath(import.meta.url)), - "../../packages/codegraph-native", -); - -let bindingState: - | { loaded: true; binding: NativeBinding; supportedLanguageIds: Set } - | { loaded: false; error?: unknown } - | undefined; - -export function __resetNativeTreeSitterBindingForTests(): void { - bindingState = undefined; -} - -export function isNativeTreeSitterDisabledByEnv(env: NodeJS.ProcessEnv = process.env): boolean { - const rawValue = env.CODEGRAPH_DISABLE_NATIVE; - if (typeof rawValue !== "string") { - return false; - } - const normalized = rawValue.trim().toLowerCase(); - return normalized === "1" || normalized === "true" || normalized === "yes"; -} - -function normalizeNativeRuntimeMode(mode?: NativeRuntimeMode): NativeRuntimeMode { - return mode ?? "auto"; -} - -function loadBinding(): - | { loaded: true; binding: NativeBinding; supportedLanguageIds: Set } - | { loaded: false; error?: unknown } { - if (bindingState) return bindingState; - const loaded = loadNativeBinding({ - packageName: "@lzehrung/codegraph-native", - localPackageRoot: localNativePackageRoot, - requireFn: require, - resolveFn: require.resolve, - }); - if (loaded.binding) { - bindingState = { - loaded: true, - binding: loaded.binding, - supportedLanguageIds: new Set(loaded.binding.supportedLanguageIds()), - }; - return bindingState; - } - bindingState = { loaded: false, error: loaded.error }; - return bindingState; -} - -function resolveNativeBindingState(mode?: NativeRuntimeMode, env: NodeJS.ProcessEnv = process.env): NativeBindingState { - const normalizedMode = normalizeNativeRuntimeMode(mode); - if (normalizedMode === "off") { - return { - loaded: false, - error: new Error("native tree-sitter disabled by explicit option"), - }; - } - if (normalizedMode === "auto" && isNativeTreeSitterDisabledByEnv(env)) { - return { - loaded: false, - error: new Error("native tree-sitter disabled by CODEGRAPH_DISABLE_NATIVE"), - }; - } - return loadBinding(); -} - -export function normalizeNativeQueryForSupport( - support: LanguageSupport, - kind: NativeCompatibilityQueryKind, - queryText: string, -): string { - return support.native?.normalizeQuery?.(kind, queryText) ?? queryText; -} - -/** - * Per-language cache of normalized query text and modification status. - * Normalization is constant for a given (support.id, queryKind) pair, - * so we compute it once per language per kind. - */ -const normalizedQueryCache = new Map>(); - -function getOrComputeNormalizedEntry( - support: LanguageSupport, - kind: NativeQueryKind, -): { text: string; wasModified: boolean } { - let byKind = normalizedQueryCache.get(support.id); - if (!byKind) { - byKind = new Map(); - normalizedQueryCache.set(support.id, byKind); - } - let entry = byKind.get(kind); - if (!entry) { - const original = support.queries[kind]; - const normalized = normalizeNativeQueryForSupport(support, kind, original); - entry = { text: normalized, wasModified: normalized !== original }; - byKind.set(kind, entry); - } - return entry; -} - -/** - * Returns the normalized query text for the support's own query. - * Cached per (support.id, kind) to avoid re-running regex normalization - * on every file. - */ -export function getCachedNormalizedQuery(support: LanguageSupport, kind: NativeQueryKind): string { - return getOrComputeNormalizedEntry(support, kind).text; -} - -/** - * Returns true when the native query for this (support, kind) differs from - * the original JS query - meaning the language has grammar divergence and - * empty native results should NOT be treated as authoritative. - */ -export function isNativeQueryModified(support: LanguageSupport, kind: NativeQueryKind): boolean { - return getOrComputeNormalizedEntry(support, kind).wasModified; -} - -export function isNativeQueryAuthoritative(support: LanguageSupport, kind: NativeQueryKind): boolean { - if (!isNativeQueryModified(support, kind)) { - return true; - } - return support.native?.authoritativeKinds?.includes(kind) ?? false; -} - -export function getNativeQueryMetadataForSupport(support: LanguageSupport): { - normalizedQueryKinds: NativeQueryKind[]; - skippedQueryKinds: NativeQueryKind[]; -} { - const normalizedQueryKinds: NativeQueryKind[] = []; - const skippedQueryKinds: NativeQueryKind[] = []; - - for (const kind of NATIVE_QUERY_KINDS) { - if (!isNativeQueryModified(support, kind)) { - continue; - } - normalizedQueryKinds.push(kind); - const originalQuery = support.queries[kind]; - const normalized = normalizeNativeQueryForSupport(support, kind, originalQuery); - if (originalQuery.trim().length && !normalized.trim().length) { - skippedQueryKinds.push(kind); - } - } - - return { - normalizedQueryKinds, - skippedQueryKinds, - }; -} - -export function isNativeTreeSitterAvailable(mode?: NativeRuntimeMode): boolean { - return resolveNativeBindingState(mode).loaded; -} - -export function getNativeTreeSitterLoadError(mode?: NativeRuntimeMode): unknown { - const state = resolveNativeBindingState(mode); - return state.loaded ? undefined : state.error; -} - -export function getNativeTreeSitterSupportedLanguageIds(mode?: NativeRuntimeMode): string[] { - const state = resolveNativeBindingState(mode); - return state.loaded ? Array.from(state.supportedLanguageIds).sort() : []; -} - -export function runNativeLanguageQueries( - source: string, - support: LanguageSupport, - mode?: NativeRuntimeMode, -): NativeQueryResults | null { - return getNativeQueryExecution(source, support, mode).results; -} - -type NativeBindingState = - | { loaded: true; binding: NativeBinding; supportedLanguageIds: Set } - | { loaded: false; error?: unknown }; - -const NATIVE_REQUIRED_ERROR_PREFIX = "native tree-sitter required by explicit option but unavailable"; - -export function isNativeRequiredUnavailableError(error: unknown): boolean { - return error instanceof Error && error.message.startsWith(NATIVE_REQUIRED_ERROR_PREFIX); -} - -function throwIfNativeRequiredUnavailable(mode: NativeRuntimeMode | undefined, state: NativeBindingState): void { - if (normalizeNativeRuntimeMode(mode) !== "on" || state.loaded) return; - const suffix = state.error ? `: ${stringifyUnknown(state.error)}` : ""; - throw new Error(`${NATIVE_REQUIRED_ERROR_PREFIX}${suffix}`); -} - -export function getNativeQueryExecutionForState( - source: string, - support: LanguageSupport, - state: NativeBindingState = loadBinding(), - scope: NativeQueryScope = "full", -): NativeQueryExecution { - if (!state.loaded) { - return { - results: null, - fallbackReason: "unavailable", - ...(state.error - ? { - error: state.error instanceof Error ? state.error.message : stringifyUnknown(state.error), - } - : {}), - }; - } - if (!state.supportedLanguageIds.has(support.id)) { - return { results: null, fallbackReason: "unsupportedLanguage" }; - } - const importsOnly = scope === "imports"; - try { - return { - results: state.binding.runLanguageQueries( - source, - support.id, - getCachedNormalizedQuery(support, "imports"), - importsOnly ? "" : getCachedNormalizedQuery(support, "exports"), - importsOnly ? "" : getCachedNormalizedQuery(support, "locals"), - importsOnly ? "" : getCachedNormalizedQuery(support, "importBindings"), - ), - }; - } catch (error) { - return { - results: null, - fallbackReason: "queryFailure", - error: error instanceof Error ? error.message : String(error), - }; - } -} - -export function getNativeQueryExecution( - source: string, - support: LanguageSupport, - mode?: NativeRuntimeMode, - scope: NativeQueryScope = "full", -): NativeQueryExecution { - const state = resolveNativeBindingState(mode); - throwIfNativeRequiredUnavailable(mode, state); - return getNativeQueryExecutionForState(source, support, state, scope); -} - -export type CompactImportsExecution = { - results: CompactQueryResults | null; - fallbackReason?: "unavailable" | "unsupportedLanguage" | "queryFailure"; - error?: string; -}; - -export type NativeSingleQueryExecution = { - matches: NativeMatch[] | null; - fallbackReason?: "unavailable" | "unsupportedLanguage" | "queryFailure"; - error?: string; -}; - -export type UnifiedQueryExecution = { - matches: NativeMatch[] | null; - backend: "native" | "js"; - fallbackReason?: "unavailable" | "unsupportedLanguage" | "queryFailure"; - error?: string; -}; - -export type NativeSyntaxTreeExecution = { - tree: NativeSyntaxTree | null; - fallbackReason?: "unavailable" | "unsupportedLanguage" | "queryFailure"; - error?: string; -}; - -const NATIVE_ONLY_JS_FAMILY_LANGUAGE_IDS = new Set(["js", "ts", "tsx"]); - -export function shouldAvoidJsFallbackForLanguage(languageId: string): boolean { - return NATIVE_ONLY_JS_FAMILY_LANGUAGE_IDS.has(languageId); -} - -export function isNativeBindingLoadedForLanguage(languageId: string, mode?: NativeRuntimeMode): boolean { - const state = resolveNativeBindingState(mode); - return state.loaded && state.supportedLanguageIds.has(languageId); -} - -/** - * Run only the imports query with a compact payload (name + text only). - * Falls back to the full execution path if the compact entrypoint is not - * available in the native binding. - */ -export function getCompactImportsExecution( - source: string, - support: LanguageSupport, - mode?: NativeRuntimeMode, -): CompactImportsExecution { - const state = resolveNativeBindingState(mode); - throwIfNativeRequiredUnavailable(mode, state); - if (!state.loaded) { - return { - results: null, - fallbackReason: "unavailable", - ...(state.error - ? { - error: state.error instanceof Error ? state.error.message : stringifyUnknown(state.error), - } - : {}), - }; - } - if (!state.supportedLanguageIds.has(support.id)) { - return { results: null, fallbackReason: "unsupportedLanguage" }; - } - const importsQuery = getCachedNormalizedQuery(support, "imports"); - try { - if (state.binding.runImportsQueryCompact) { - return { - results: state.binding.runImportsQueryCompact(source, support.id, importsQuery), - }; - } - // Fallback: use full execution with imports scope - const full = getNativeQueryExecutionForState(source, support, state, "imports"); - if (!full.results) return full; - return { - results: { - imports: full.results.imports.map((m) => ({ - patternIndex: m.patternIndex, - captures: m.captures.map((c) => ({ name: c.name, text: c.text })), - })), - }, - }; - } catch (error) { - return { - results: null, - fallbackReason: "queryFailure", - error: error instanceof Error ? error.message : String(error), - }; - } -} - -export function getNativeSingleQueryExecution( - source: string, - support: LanguageSupport, - queryText: string, - mode?: NativeRuntimeMode, -): NativeSingleQueryExecution { - const state = resolveNativeBindingState(mode); - throwIfNativeRequiredUnavailable(mode, state); - if (!state.loaded) { - return { - matches: null, - fallbackReason: "unavailable", - ...(state.error - ? { - error: state.error instanceof Error ? state.error.message : stringifyUnknown(state.error), - } - : {}), - }; - } - if (!state.supportedLanguageIds.has(support.id)) { - return { matches: null, fallbackReason: "unsupportedLanguage" }; - } - if (!state.binding.runQuery) { - return { - matches: null, - fallbackReason: "unavailable", - error: "native binding does not expose runQuery", - }; - } - const normalizedQuery = normalizeNativeQueryForSupport(support, "adHoc", queryText); - try { - return { - matches: state.binding.runQuery(source, support.id, normalizedQuery).matches, - }; - } catch (error) { - return { - matches: null, - fallbackReason: "queryFailure", - error: error instanceof Error ? error.message : String(error), - }; - } -} - -export function executeJsQueryAsNativeMatches( - source: string, - support: LanguageSupport, - lang: JsLanguage, - queryText: string, - tree?: JsSyntaxTree, -): NativeMatch[] { - return executeJsQueryAsNativeMatchesViaPackage(source, lang, queryText, tree) as NativeMatch[] & JsNativeMatch[]; -} - -export function getUnifiedQueryExecution( - source: string, - support: LanguageSupport, - queryText: string, - opts?: { - tree?: JsSyntaxTree; - mode?: NativeRuntimeMode; - lang?: JsLanguage; - getLanguage?: () => JsLanguage; - }, -): UnifiedQueryExecution { - const nativeExecution = getNativeSingleQueryExecution(source, support, queryText, opts?.mode); - if (nativeExecution.matches) { - return { - matches: nativeExecution.matches, - backend: "native", - }; - } - if (shouldAvoidJsFallbackForLanguage(support.id) && isNativeBindingLoadedForLanguage(support.id, opts?.mode)) { - return { - matches: null, - backend: "native", - ...(nativeExecution.fallbackReason ? { fallbackReason: nativeExecution.fallbackReason } : {}), - ...(nativeExecution.error ? { error: nativeExecution.error } : {}), - }; - } - try { - const resolvedLang = opts?.lang ?? opts?.getLanguage?.(); - if (!resolvedLang) { - throw new Error("JS query fallback requires a language"); - } - const matches = executeJsQueryAsNativeMatches(source, support, resolvedLang, queryText, opts?.tree); - return { - matches, - backend: "js", - ...(nativeExecution.fallbackReason ? { fallbackReason: nativeExecution.fallbackReason } : {}), - ...(nativeExecution.error ? { error: nativeExecution.error } : {}), - }; - } catch (error) { - return { - matches: null, - backend: "js", - fallbackReason: nativeExecution.fallbackReason ?? "queryFailure", - error: error instanceof Error ? error.message : String(error), - }; - } -} - -export function getNativeSyntaxTreeExecution( - source: string, - support: LanguageSupport, - mode?: NativeRuntimeMode, -): NativeSyntaxTreeExecution { - const state = resolveNativeBindingState(mode); - throwIfNativeRequiredUnavailable(mode, state); - if (!state.loaded) { - return { - tree: null, - fallbackReason: "unavailable", - ...(state.error - ? { - error: state.error instanceof Error ? state.error.message : stringifyUnknown(state.error), - } - : {}), - }; - } - if (!state.supportedLanguageIds.has(support.id)) { - return { tree: null, fallbackReason: "unsupportedLanguage" }; - } - if (!state.binding.parseSyntaxTree) { - return { - tree: null, - fallbackReason: "unavailable", - error: "native binding does not expose parseSyntaxTree", - }; - } - try { - return { - tree: state.binding.parseSyntaxTree(source, support.id), - }; - } catch (error) { - return { - tree: null, - fallbackReason: "queryFailure", - error: error instanceof Error ? error.message : String(error), - }; - } -} +export type { + CompactCapture, + CompactImportsExecution, + CompactMatch, + CompactQueryResults, + NativeBinding, + NativeCapture, + NativeFallbackReason, + NativeMatch, + NativePoint, + NativeQueryExecution, + NativeQueryResults, + NativeQueryScope, + NativeRuntimeMode, + NativeSingleQueryExecution, + NativeSyntaxNode, + NativeSyntaxTree, + NativeSyntaxTreeExecution, + UnifiedQueryExecution, +} from "./contracts.js"; + +export { + __resetNativeTreeSitterBindingForTests, + getNativeTreeSitterLoadError, + getNativeTreeSitterSupportedLanguageIds, + isNativeBindingLoadedForLanguage, + isNativeRequiredUnavailableError, + isNativeTreeSitterAvailable, + isNativeTreeSitterDisabledByEnv, +} from "./runtime.js"; + +export { + getCachedNormalizedQuery, + getNativeQueryMetadataForSupport, + isNativeQueryAuthoritative, + isNativeQueryModified, + NATIVE_QUERY_KINDS, + normalizeNativeQueryForSupport, +} from "./queries.js"; + +export { + getCompactImportsExecution, + getNativeQueryExecution, + getNativeQueryExecutionForState, + getNativeSingleQueryExecution, + getNativeSyntaxTreeExecution, + runNativeLanguageQueries, +} from "./execution.js"; + +export { + executeJsQueryAsNativeMatches, + getUnifiedQueryExecution, + shouldAvoidJsFallbackForLanguage, +} from "./jsBridge.js"; diff --git a/src/query.ts b/src/query.ts index 90ea8ac8..a8a6c029 100644 --- a/src/query.ts +++ b/src/query.ts @@ -1,210 +1,12 @@ -import type { SymbolGraph, SymbolNode, SymbolEdge, SymbolNodeKind } from "./graphs.js"; - -export type SymbolQuery = { - text?: string; - nameIncludes?: string; - fileIncludes?: string; - docstringIncludes?: string; - kinds?: SymbolNodeKind[]; -}; - -export type GraphQuery = - | { kind: "mostCalledMethods"; limit: number } - | { kind: "dependencyChain"; className: string } - | { kind: "controllersMostEndpoints"; limit: number } - | { kind: "classesImplementing"; interfaceName: string } - | { kind: "affectedFunctionsForModule"; modulePath: string } - | { kind: "highestComplexityClasses"; limit: number } - | { kind: "highestComplexityFunctions"; limit: number }; - -const tokenize = (input: string): string[] => - input.match(/[^\s"]+:"[^"]+"|"[^"]+"|\S+/g)?.map((token) => token.trim()) ?? []; - -const normalizeToken = (token: string): string => - token.startsWith('"') && token.endsWith('"') ? token.slice(1, -1) : token; - -export function parseSymbolQuery(input: string): SymbolQuery { - const query: SymbolQuery = {}; - const residual: string[] = []; - for (const raw of tokenize(input)) { - const token = normalizeToken(raw); - const idx = token.indexOf(":"); - if (idx <= 0) { - if (token) residual.push(token); - continue; - } - const key = token.slice(0, idx).toLowerCase(); - let value = token.slice(idx + 1); - if (value.startsWith('"') && value.endsWith('"')) { - value = value.slice(1, -1); - } - if (!value) continue; - if (key === "kind" || key === "kinds") { - const kinds = value - .split(",") - .map((k) => k.trim()) - .filter(Boolean) as SymbolNodeKind[]; - if (kinds.length) query.kinds = kinds; - continue; - } - if (key === "name") { - query.nameIncludes = value; - continue; - } - if (key === "file") { - query.fileIncludes = value; - continue; - } - if (key === "doc" || key === "docstring") { - query.docstringIncludes = value; - continue; - } - residual.push(token); - } - if (residual.length) query.text = residual.join(" "); - return query; -} - -const normalizePhrase = (value: string): string => - value - .trim() - .replace(/^["']|["']$/g, "") - .replace(/^the\s+/i, ""); - -const parseLimit = (input: string, fallback: number): number => { - const match = /(?:top|most)\s+(\d+)/i.exec(input); - if (!match) return fallback; - const limit = Number(match[1]); - return Number.isFinite(limit) && limit > 0 ? limit : fallback; -}; - -export function parseGraphQuery(input: string): GraphQuery | null { - const text = input.trim(); - const lower = text.toLowerCase(); - - if (lower.includes("most called methods")) { - return { kind: "mostCalledMethods", limit: parseLimit(text, 10) }; - } - if (lower.includes("dependency chain")) { - const match = /dependency chain for (.+?) class/i.exec(text); - if (!match) return null; - return { - kind: "dependencyChain", - className: normalizePhrase(match[1] ?? ""), - }; - } - if (lower.includes("controllers have the most endpoints")) { - return { kind: "controllersMostEndpoints", limit: parseLimit(text, 10) }; - } - if (lower.includes("implement") && lower.includes("interface")) { - const match = /implement(?:s)? (.+?) interface/i.exec(text); - if (!match) return null; - return { - kind: "classesImplementing", - interfaceName: normalizePhrase(match[1] ?? ""), - }; - } - if (lower.includes("affected") && lower.includes("module")) { - const match = - /change (?:this )?module\s+["']?([^"']+)["']?/i.exec(text) ?? /module\s+["']?([^"']+)["']?/i.exec(text); - if (!match) return null; - return { - kind: "affectedFunctionsForModule", - modulePath: normalizePhrase(match[1] ?? ""), - }; - } - if (lower.includes("highest complexity") && lower.includes("function")) { - return { kind: "highestComplexityFunctions", limit: parseLimit(text, 10) }; - } - if (lower.includes("highest complexity")) { - return { kind: "highestComplexityClasses", limit: parseLimit(text, 10) }; - } - return null; -} - -const includesFolded = (value: string | undefined, needle: string): boolean => { - if (!value) return false; - return value.toLowerCase().includes(needle.toLowerCase()); -}; - -export function querySymbols(sg: SymbolGraph, query: SymbolQuery): SymbolNode[] { - const textNeedle = query.text?.trim(); - return [...sg.nodes.values()].filter((node) => { - if (query.kinds && !query.kinds.includes(node.kind)) return false; - if (query.nameIncludes && !includesFolded(node.name, query.nameIncludes)) return false; - if (query.fileIncludes && !includesFolded(node.file, query.fileIncludes)) return false; - if (query.docstringIncludes && !includesFolded(node.docstring, query.docstringIncludes)) return false; - if (textNeedle) { - const haystack = [node.name, node.file, node.docstring].filter(Boolean).join(" "); - if (!includesFolded(haystack, textNeedle)) return false; - } - return true; - }); -} - -export type NeighborQuery = { - symbolId: string; - direction?: "out" | "in" | "both"; - maxDepth?: number; - edgeLabels?: string[]; -}; - -export type NeighborResult = { - nodes: SymbolNode[]; - edges: SymbolEdge[]; -}; - -export function querySymbolNeighbors(sg: SymbolGraph, query: NeighborQuery): NeighborResult { - const direction = query.direction ?? "both"; - const maxDepth = typeof query.maxDepth === "number" && query.maxDepth > 0 ? query.maxDepth : 1; - const labelFilter = query.edgeLabels?.length ? new Set(query.edgeLabels) : null; - - const outgoing = new Map(); - const incoming = new Map(); - for (const edge of sg.edges) { - if (labelFilter && edge.label && !labelFilter.has(edge.label)) continue; - const outList = outgoing.get(edge.from) ?? []; - outList.push(edge); - outgoing.set(edge.from, outList); - const inList = incoming.get(edge.to) ?? []; - inList.push(edge); - incoming.set(edge.to, inList); - } - - const visited = new Set(); - const frontier: Array<{ id: string; depth: number }> = [{ id: query.symbolId, depth: 0 }]; - visited.add(query.symbolId); - - const edgeSet = new Set(); - let frontierIndex = 0; - while (frontierIndex < frontier.length) { - const current = frontier[frontierIndex++]; - if (!current || current.depth >= maxDepth) continue; - const expandOut = direction === "out" || direction === "both"; - const expandIn = direction === "in" || direction === "both"; - if (expandOut) { - for (const edge of outgoing.get(current.id) ?? []) { - const key = `${edge.from}->${edge.to}::${edge.label ?? ""}`; - edgeSet.add(key); - if (!visited.has(edge.to)) { - visited.add(edge.to); - frontier.push({ id: edge.to, depth: current.depth + 1 }); - } - } - } - if (expandIn) { - for (const edge of incoming.get(current.id) ?? []) { - const key = `${edge.from}->${edge.to}::${edge.label ?? ""}`; - edgeSet.add(key); - if (!visited.has(edge.from)) { - visited.add(edge.from); - frontier.push({ id: edge.from, depth: current.depth + 1 }); - } - } - } - } - - const edges = sg.edges.filter((edge) => edgeSet.has(`${edge.from}->${edge.to}::${edge.label ?? ""}`)); - const nodes = [...visited].map((id) => sg.nodes.get(id)).filter((node): node is SymbolNode => !!node); - return { nodes, edges }; -} +export { + parseGraphQuery, + parseSymbolQuery, + type GraphQuery, + type SymbolQuery, +} from "./query/parser.js"; +export { + querySymbolNeighbors, + querySymbols, + type NeighborQuery, + type NeighborResult, +} from "./query/symbols.js"; diff --git a/src/query/parser.ts b/src/query/parser.ts new file mode 100644 index 00000000..0a2faa0e --- /dev/null +++ b/src/query/parser.ts @@ -0,0 +1,123 @@ +import type { SymbolNodeKind } from "../graphs.js"; + +export type SymbolQuery = { + text?: string; + nameIncludes?: string; + fileIncludes?: string; + docstringIncludes?: string; + kinds?: SymbolNodeKind[]; +}; + +export type GraphQuery = + | { kind: "mostCalledMethods"; limit: number } + | { kind: "dependencyChain"; className: string } + | { kind: "controllersMostEndpoints"; limit: number } + | { kind: "classesImplementing"; interfaceName: string } + | { kind: "affectedFunctionsForModule"; modulePath: string } + | { kind: "highestComplexityClasses"; limit: number } + | { kind: "highestComplexityFunctions"; limit: number }; + +const tokenize = (input: string): string[] => + input.match(/[^\s"]+:"[^"]+"|"[^"]+"|\S+/g)?.map((token) => token.trim()) ?? []; + +const normalizeToken = (token: string): string => + token.startsWith('"') && token.endsWith('"') ? token.slice(1, -1) : token; + +export function parseSymbolQuery(input: string): SymbolQuery { + const query: SymbolQuery = {}; + const residual: string[] = []; + for (const raw of tokenize(input)) { + const token = normalizeToken(raw); + const idx = token.indexOf(":"); + if (idx <= 0) { + if (token) residual.push(token); + continue; + } + const key = token.slice(0, idx).toLowerCase(); + let value = token.slice(idx + 1); + if (value.startsWith('"') && value.endsWith('"')) { + value = value.slice(1, -1); + } + if (!value) continue; + if (key === "kind" || key === "kinds") { + const kinds = value + .split(",") + .map((k) => k.trim()) + .filter(Boolean) as SymbolNodeKind[]; + if (kinds.length) query.kinds = kinds; + continue; + } + if (key === "name") { + query.nameIncludes = value; + continue; + } + if (key === "file") { + query.fileIncludes = value; + continue; + } + if (key === "doc" || key === "docstring") { + query.docstringIncludes = value; + continue; + } + residual.push(token); + } + if (residual.length) query.text = residual.join(" "); + return query; +} + +const normalizePhrase = (value: string): string => + value + .trim() + .replace(/^["']|["']$/g, "") + .replace(/^the\s+/i, ""); + +const parseLimit = (input: string, fallback: number): number => { + const match = /(?:top|most)\s+(\d+)/i.exec(input); + if (!match) return fallback; + const limit = Number(match[1]); + return Number.isFinite(limit) && limit > 0 ? limit : fallback; +}; + +export function parseGraphQuery(input: string): GraphQuery | null { + const text = input.trim(); + const lower = text.toLowerCase(); + + if (lower.includes("most called methods")) { + return { kind: "mostCalledMethods", limit: parseLimit(text, 10) }; + } + if (lower.includes("dependency chain")) { + const match = /dependency chain for (.+?) class/i.exec(text); + if (!match) return null; + return { + kind: "dependencyChain", + className: normalizePhrase(match[1] ?? ""), + }; + } + if (lower.includes("controllers have the most endpoints")) { + return { kind: "controllersMostEndpoints", limit: parseLimit(text, 10) }; + } + if (lower.includes("implement") && lower.includes("interface")) { + const match = /implement(?:s)? (.+?) interface/i.exec(text); + if (!match) return null; + return { + kind: "classesImplementing", + interfaceName: normalizePhrase(match[1] ?? ""), + }; + } + if (lower.includes("affected") && lower.includes("module")) { + const match = + /change (?:this )?module\s+["']?([^"']+)["']?/i.exec(text) ?? /module\s+["']?([^"']+)["']?/i.exec(text); + if (!match) return null; + return { + kind: "affectedFunctionsForModule", + modulePath: normalizePhrase(match[1] ?? ""), + }; + } + if (lower.includes("highest complexity") && lower.includes("function")) { + return { kind: "highestComplexityFunctions", limit: parseLimit(text, 10) }; + } + if (lower.includes("highest complexity")) { + return { kind: "highestComplexityClasses", limit: parseLimit(text, 10) }; + } + return null; +} diff --git a/src/query/symbols.ts b/src/query/symbols.ts new file mode 100644 index 00000000..c37cd6a5 --- /dev/null +++ b/src/query/symbols.ts @@ -0,0 +1,89 @@ +import type { SymbolGraph, SymbolNode, SymbolEdge } from "../graphs.js"; +import type { SymbolQuery } from "./parser.js"; + +const includesFolded = (value: string | undefined, needle: string): boolean => { + if (!value) return false; + return value.toLowerCase().includes(needle.toLowerCase()); +}; + +export function querySymbols(sg: SymbolGraph, query: SymbolQuery): SymbolNode[] { + const textNeedle = query.text?.trim(); + return [...sg.nodes.values()].filter((node) => { + if (query.kinds && !query.kinds.includes(node.kind)) return false; + if (query.nameIncludes && !includesFolded(node.name, query.nameIncludes)) return false; + if (query.fileIncludes && !includesFolded(node.file, query.fileIncludes)) return false; + if (query.docstringIncludes && !includesFolded(node.docstring, query.docstringIncludes)) return false; + if (textNeedle) { + const haystack = [node.name, node.file, node.docstring].filter(Boolean).join(" "); + if (!includesFolded(haystack, textNeedle)) return false; + } + return true; + }); +} + +export type NeighborQuery = { + symbolId: string; + direction?: "out" | "in" | "both"; + maxDepth?: number; + edgeLabels?: string[]; +}; + +export type NeighborResult = { + nodes: SymbolNode[]; + edges: SymbolEdge[]; +}; + +export function querySymbolNeighbors(sg: SymbolGraph, query: NeighborQuery): NeighborResult { + const direction = query.direction ?? "both"; + const maxDepth = typeof query.maxDepth === "number" && query.maxDepth > 0 ? query.maxDepth : 1; + const labelFilter = query.edgeLabels?.length ? new Set(query.edgeLabels) : null; + + const outgoing = new Map(); + const incoming = new Map(); + for (const edge of sg.edges) { + if (labelFilter && edge.label && !labelFilter.has(edge.label)) continue; + const outList = outgoing.get(edge.from) ?? []; + outList.push(edge); + outgoing.set(edge.from, outList); + const inList = incoming.get(edge.to) ?? []; + inList.push(edge); + incoming.set(edge.to, inList); + } + + const visited = new Set(); + const frontier: Array<{ id: string; depth: number }> = [{ id: query.symbolId, depth: 0 }]; + visited.add(query.symbolId); + + const edgeSet = new Set(); + let frontierIndex = 0; + while (frontierIndex < frontier.length) { + const current = frontier[frontierIndex++]; + if (!current || current.depth >= maxDepth) continue; + const expandOut = direction === "out" || direction === "both"; + const expandIn = direction === "in" || direction === "both"; + if (expandOut) { + for (const edge of outgoing.get(current.id) ?? []) { + const key = `${edge.from}->${edge.to}::${edge.label ?? ""}`; + edgeSet.add(key); + if (!visited.has(edge.to)) { + visited.add(edge.to); + frontier.push({ id: edge.to, depth: current.depth + 1 }); + } + } + } + if (expandIn) { + for (const edge of incoming.get(current.id) ?? []) { + const key = `${edge.from}->${edge.to}::${edge.label ?? ""}`; + edgeSet.add(key); + if (!visited.has(edge.from)) { + visited.add(edge.from); + frontier.push({ id: edge.from, depth: current.depth + 1 }); + } + } + } + } + + const edges = sg.edges.filter((edge) => edgeSet.has(`${edge.from}->${edge.to}::${edge.label ?? ""}`)); + const nodes = [...visited].map((id) => sg.nodes.get(id)).filter((node): node is SymbolNode => !!node); + return { nodes, edges }; +} diff --git a/src/review.ts b/src/review.ts index 06fc09ad..01eb9b43 100644 --- a/src/review.ts +++ b/src/review.ts @@ -19,6 +19,7 @@ import { type SymbolDef, symbolId, } from "./indexer.js"; +import { isSymbolHandleExported } from "./indexer/declarations.js"; import type { GraphBuildOptions } from "./graphs/types.js"; import { locateChangedSymbolsWithLines, mapChangedLinesToSymbols } from "./impact/map.js"; import { parseUnifiedDiff } from "./impact/parse.js"; @@ -608,7 +609,7 @@ function isRiskRelevantSymbolMappingFile(file: string): boolean { } function isExported(mod: { exports: ExportEntry[] }, handle: string): boolean { - return mod.exports.some((e) => e.type === "local" && symbolId(e.target) === handle); + return isSymbolHandleExported(mod.exports, handle); } function listReviewableExports(mod: ModuleIndex): ReviewableExportEntry[] { @@ -927,19 +928,19 @@ async function runWithConcurrency(items: T[], limit: number, worker: (item return results; } -/** - * Build the structured review report used by programmatic review agents. - * - * The report keeps changed files, changed symbols, graph deltas, candidate tests, - * risk signals, review tasks, diagnostics, and optional snippets as data instead - * of terminal prose. Prefer this API over CLI summary output when composing - * deterministic model context or review file packs. - */ -export async function buildReviewReport(projectRoot: string, opts: ReviewOptions = {}): Promise { - const appliedOptions = applyReviewPresetOptions(opts); - const reviewReport = appliedOptions.report; - const reviewTimings = reviewReport?.timings; - const totalStart = performance.now(); +type ReviewChangeCollection = { + changedFiles: Set; + explicitFiles: Set; + diffHunksByFile: Map; + diffKindsByFile: Map; + diffChangesByFile: Map; +}; + +async function collectReviewChanges( + projectRoot: string, + appliedOptions: ReviewOptions, + reviewTimings?: ReviewTimingReport, +): Promise { const normalizeFile = (file: string, label: string) => assertFilePathWithinRoot(projectRoot, file, label); const discoveryIgnoreGlobs = appliedOptions.discovery?.ignoreGlobs ?? []; const discoveryGlobRoot = appliedOptions.discovery?.globRoot ?? projectRoot; @@ -976,9 +977,10 @@ export async function buildReviewReport(projectRoot: string, opts: ReviewOptions } const diffStart = performance.now(); + const shouldLoadGitDiff = (appliedOptions.gitBase || appliedOptions.changedSince) && changedFiles.size; const diffText = appliedOptions.diffText ?? - ((appliedOptions.gitBase || appliedOptions.changedSince) && changedFiles.size > 0 + (shouldLoadGitDiff ? await getUnifiedDiff(projectRoot, { base: appliedOptions.gitBase, head: appliedOptions.gitHead, @@ -989,6 +991,7 @@ export async function buildReviewReport(projectRoot: string, opts: ReviewOptions if (reviewTimings) { reviewTimings.diffMs = Math.round(performance.now() - diffStart); } + const diffHunksByFile = new Map(); const diffKindsByFile = new Map(); const diffChangesByFile = new Map(); @@ -1015,68 +1018,47 @@ export async function buildReviewReport(projectRoot: string, opts: ReviewOptions } } - if (changedFiles.size === 0) { - const riskSummary = computeRiskSummary({ - filesChanged: 0, - symbolsChanged: 0, - exportedChanged: 0, - missingFiles: 0, - parseFailures: 0, - }); - const projectFiles = await discoverProjectFiles(projectRoot); - const report: ReviewReport = { - schemaVersion: REVIEW_SCHEMA_VERSION, - status: "no_changes", - projectFiles, - summary: { filesChanged: 0, symbolsChanged: 0, candidateTests: 0 }, - riskSummary, - reviewTasks: buildReviewTasks({ - filesChanged: 0, - symbolsChanged: 0, - exportedChanged: 0, - candidateTests: 0, - missingFiles: 0, - parseFailures: 0, - }), - changedFiles: [], - graphDelta: [], - candidateTests: [], - }; - if (appliedOptions.gitBase !== undefined) report.base = appliedOptions.gitBase; - if (appliedOptions.gitHead !== undefined) report.head = appliedOptions.gitHead; - if (reviewTimings) reviewTimings.totalMs = Math.round(performance.now() - totalStart); - return report; - } - - const changedFileList = Array.from(changedFiles).sort(comparePaths); - const diagnostics: ReviewDiagnostics = { - missingFiles: [], - symbolMappingParseFailures: [], + return { + changedFiles, + explicitFiles, + diffHunksByFile, + diffKindsByFile, + diffChangesByFile, }; +} + +type ReviewIndexStage = { + index: ProjectIndex; + existenceByFile: Map; + deletedFiles: string[]; + deletedSnapshots: Map; + graphOptions: GraphBuildOptions; +}; + +async function buildReviewIndex(input: { + projectRoot: string; + appliedOptions: ReviewOptions; + changedFileList: string[]; + diffKindsByFile: ReadonlyMap; + diffChangesByFile: ReadonlyMap; + includeSymbolDetails: boolean; + maxCallsites: number; + reviewReport?: ReviewBuildReport; + reviewTimings?: ReviewTimingReport; +}): Promise { + const { + projectRoot, + appliedOptions, + changedFileList, + diffKindsByFile, + diffChangesByFile, + includeSymbolDetails, + maxCallsites, + reviewReport, + reviewTimings, + } = input; const fastGraphRequested = appliedOptions.graph?.fast ?? false; const graphOptions = appliedOptions.graph ? { ...appliedOptions.graph, fast: fastGraphRequested } : { fast: false }; - const includeSymbolDetails = appliedOptions.includeSymbolDetails ?? false; - const diffContextLines = - typeof appliedOptions.diffContextLines === "number" && appliedOptions.diffContextLines >= 0 - ? appliedOptions.diffContextLines - : 2; - const maxCallsites = - typeof appliedOptions.maxCallsites === "number" && appliedOptions.maxCallsites >= 0 - ? appliedOptions.maxCallsites - : 5; - const referenceConcurrency = - typeof appliedOptions.referenceConcurrency === "number" && appliedOptions.referenceConcurrency > 0 - ? appliedOptions.referenceConcurrency - : 8; - const sourceCache = new Map(); - const loadSource = async (file: string): Promise => { - const cached = sourceCache.get(file); - if (cached !== undefined) return cached; - const parsed = index.parsed?.get(file); - const source = parsed?.source ?? (await fsp.readFile(file, "utf8")); - sourceCache.set(file, source); - return source; - }; const existenceChecks = await Promise.all( changedFileList.map(async (file) => ({ file, @@ -1114,12 +1096,200 @@ export async function buildReviewReport(projectRoot: string, opts: ReviewOptions reviewTimings.indexMs = Math.round(performance.now() - indexStart); } + return { + index, + existenceByFile, + deletedFiles, + deletedSnapshots, + graphOptions, + }; +} + +async function collectReviewGraphDelta(input: { + projectRoot: string; + index: ProjectIndex; + changedFiles: ReadonlySet; + deletedFiles: readonly string[]; + deletedSnapshots: ReadonlyMap; +}): Promise { + const graphEdges = new Map(); + for (const edge of input.index.graph.edges.filter((entry) => input.changedFiles.has(entry.from))) { + const relativeEdge = toRelativeEdge(input.projectRoot, edge); + graphEdges.set(edgeKey(relativeEdge), relativeEdge); + } + for (const edge of await collectDeletedImporterEdges(input.index, input.deletedFiles, input.projectRoot)) { + const relativeEdge = toRelativeEdge(input.projectRoot, edge); + graphEdges.set(edgeKey(relativeEdge), relativeEdge); + } + for (const edge of await collectDeletedSnapshotEdges(input.deletedSnapshots, input.projectRoot)) { + const relativeEdge = toRelativeEdge(input.projectRoot, edge); + graphEdges.set(edgeKey(relativeEdge), relativeEdge); + } + return Array.from(graphEdges.values()).sort(compareEdges); +} + +async function collectReviewCandidateTests(input: { + projectRoot: string; + index: ProjectIndex; + changedFileList: string[]; + changedSymbolIds: string[]; + deletedFiles: readonly string[]; + appliedOptions: ReviewOptions; + reviewTimings?: ReviewTimingReport; +}): Promise { + const candidateStart = performance.now(); + const candidateTests = mergeCandidateTestEntries( + listCandidateTestFiles(input.index, input.changedFileList, input.changedSymbolIds, { + maxCandidates: input.appliedOptions.maxCandidates ?? 50, + ...(input.appliedOptions.testPatterns ? { testPatterns: input.appliedOptions.testPatterns } : {}), + projectRoot: input.projectRoot, + }), + await listDirectDeletedFileTestImporters( + input.index, + input.deletedFiles, + input.appliedOptions.testPatterns, + input.projectRoot, + ), + ) + .map((candidate) => ({ + ...candidate, + file: relativePath(input.projectRoot, candidate.file), + })) + .sort((left, right) => { + const confidenceCompare = confidenceRank(right.confidence) - confidenceRank(left.confidence); + if (confidenceCompare !== 0) return confidenceCompare; + const fileCompare = comparePaths(left.file, right.file); + if (fileCompare !== 0) return fileCompare; + return left.reason.localeCompare(right.reason); + }) + .slice(0, input.appliedOptions.maxCandidates ?? 50); + if (input.reviewTimings) { + input.reviewTimings.candidatesMs = Math.round(performance.now() - candidateStart); + } + return candidateTests; +} + +async function collectReviewSqlContext(input: { + projectRoot: string; + index: ProjectIndex; + changedFileList: string[]; +}): Promise { + const indexedFiles = Array.from(input.index.byFile.keys()); + const normalizedChangedFiles = new Set(input.changedFileList.map(normalizePath)); + const indexedFilesCoverMoreThanReviewSet = indexedFiles.some((file) => !normalizedChangedFiles.has(normalizePath(file))); + const sqlContextProjectFiles = + indexedFilesCoverMoreThanReviewSet && indexedFiles.some((file) => path.extname(file).toLowerCase() === ".sql") + ? indexedFiles + : undefined; + return await collectSqlReviewContext(input.projectRoot, { + changedFiles: input.changedFileList, + ...(sqlContextProjectFiles ? { projectFiles: sqlContextProjectFiles } : {}), + }); +} + +function assembleReviewReport(input: { + appliedOptions: ReviewOptions; + projectFiles: ProjectFileInfo[]; + summaries: ReviewFileSummary[]; + changedSymbolIds: string[]; + candidateTests: CandidateTestFile[]; + graphDelta: Edge[]; + sqlContext?: SqlReviewContext; + diagnostics: ReviewDiagnostics; + riskRelevantParseFailures: number; + exportedChangedCount: number; +}): ReviewReport { + const report: ReviewReport = { + schemaVersion: REVIEW_SCHEMA_VERSION, + status: "ok", + projectFiles: input.projectFiles, + summary: { + filesChanged: input.summaries.length, + symbolsChanged: input.changedSymbolIds.length, + candidateTests: input.candidateTests.length, + }, + riskSummary: computeRiskSummary({ + filesChanged: input.summaries.length, + symbolsChanged: input.changedSymbolIds.length, + exportedChanged: input.exportedChangedCount, + missingFiles: input.diagnostics.missingFiles.length, + parseFailures: input.riskRelevantParseFailures, + }), + reviewTasks: buildReviewTasks({ + filesChanged: input.summaries.length, + symbolsChanged: input.changedSymbolIds.length, + exportedChanged: input.exportedChangedCount, + candidateTests: input.candidateTests.length, + missingFiles: input.diagnostics.missingFiles.length, + parseFailures: input.riskRelevantParseFailures, + }), + changedFiles: input.summaries, + graphDelta: input.graphDelta, + candidateTests: input.candidateTests, + ...(input.sqlContext ? { sqlContext: input.sqlContext } : {}), + ...(hasDiagnostics(input.diagnostics) ? { diagnostics: input.diagnostics } : {}), + }; + if (input.appliedOptions.gitBase !== undefined) report.base = input.appliedOptions.gitBase; + report.head = input.appliedOptions.gitHead ?? "HEAD"; + return report; +} + +type ReviewChangedFileSummaries = { + summaries: ReviewFileSummary[]; + changedSymbolIds: string[]; + exportedChangedCount: number; + riskRelevantParseFailures: number; +}; + +async function summarizeChangedFiles(input: { + projectRoot: string; + index: ProjectIndex; + changedFileList: string[]; + diffHunksByFile: ReadonlyMap; + diffKindsByFile: ReadonlyMap; + explicitFiles: ReadonlySet; + existenceByFile: ReadonlyMap; + deletedSnapshots: ReadonlyMap; + includeSymbolDetails: boolean; + includeDiffContext: boolean; + diffContextLines: number; + maxCallsites: number; + referenceConcurrency: number; + diagnostics: ReviewDiagnostics; + reviewTimings?: ReviewTimingReport; +}): Promise { + const { + projectRoot, + index, + changedFileList, + diffHunksByFile, + diffKindsByFile, + explicitFiles, + existenceByFile, + deletedSnapshots, + includeSymbolDetails, + includeDiffContext, + diffContextLines, + maxCallsites, + referenceConcurrency, + diagnostics, + reviewTimings, + } = input; + const sourceCache = new Map(); + const loadSource = async (file: string): Promise => { + const cached = sourceCache.get(file); + if (cached !== undefined) return cached; + const parsed = index.parsed?.get(file); + const source = parsed?.source ?? (await fsp.readFile(file, "utf8")); + sourceCache.set(file, source); + return source; + }; + const filesWithModules = changedFileList.map((file) => ({ file, mod: index.byFile.get(file), hunks: diffHunksByFile.get(file), })); - const includeDiffContext = appliedOptions.includeDiffContext ?? (includeSymbolDetails && diffHunksByFile.size > 0); const fileEntries = await Promise.all( filesWithModules.map(async ({ file, mod, hunks }) => { @@ -1215,7 +1385,7 @@ export async function buildReviewReport(projectRoot: string, opts: ReviewOptions const definitionSnippet = snippet ? { definitionSnippet: snippet } : {}; const diffLines = diffLinesByHandle.get(handle) ?? new Set(); const diffSnippets = - includeDiffContext && diffLines.size > 0 + includeDiffContext && diffLines.size ? collectDiffSnippets(source, local.range, diffLines, diffContextLines) : []; @@ -1323,6 +1493,7 @@ export async function buildReviewReport(projectRoot: string, opts: ReviewOptions }; }), ); + const summaries = summariesWithHandles.map((entry) => entry.summary); const changedSymbolIds = summariesWithHandles.flatMap((entry) => entry.handles); const exportedChangedCount = summaries.reduce((count, summary) => { @@ -1333,90 +1504,144 @@ export async function buildReviewReport(projectRoot: string, opts: ReviewOptions isRiskRelevantSymbolMappingFile(path.join(projectRoot, file)), ).length; - const graphEdges = new Map(); - for (const edge of index.graph.edges.filter((entry) => changedFiles.has(entry.from))) { - const relativeEdge = toRelativeEdge(projectRoot, edge); - graphEdges.set(edgeKey(relativeEdge), relativeEdge); - } - for (const edge of await collectDeletedImporterEdges(index, deletedFiles, projectRoot)) { - const relativeEdge = toRelativeEdge(projectRoot, edge); - graphEdges.set(edgeKey(relativeEdge), relativeEdge); - } - for (const edge of await collectDeletedSnapshotEdges(deletedSnapshots, projectRoot)) { - const relativeEdge = toRelativeEdge(projectRoot, edge); - graphEdges.set(edgeKey(relativeEdge), relativeEdge); - } - const graphDelta = Array.from(graphEdges.values()).sort(compareEdges); + return { + summaries, + changedSymbolIds, + exportedChangedCount, + riskRelevantParseFailures, + }; +} - const candidateStart = performance.now(); - const candidateTests = mergeCandidateTestEntries( - listCandidateTestFiles(index, changedFileList, changedSymbolIds, { - maxCandidates: appliedOptions.maxCandidates ?? 50, - ...(appliedOptions.testPatterns ? { testPatterns: appliedOptions.testPatterns } : {}), - projectRoot, - }), - await listDirectDeletedFileTestImporters(index, deletedFiles, appliedOptions.testPatterns, projectRoot), - ) - .map((candidate) => ({ - ...candidate, - file: relativePath(projectRoot, candidate.file), - })) - .sort((left, right) => { - const confidenceCompare = confidenceRank(right.confidence) - confidenceRank(left.confidence); - if (confidenceCompare !== 0) return confidenceCompare; - const fileCompare = comparePaths(left.file, right.file); - if (fileCompare !== 0) return fileCompare; - return left.reason.localeCompare(right.reason); - }) - .slice(0, appliedOptions.maxCandidates ?? 50); - if (reviewTimings) { - reviewTimings.candidatesMs = Math.round(performance.now() - candidateStart); +/** + * Build the structured review report used by programmatic review agents. + * + * The report keeps changed files, changed symbols, graph deltas, candidate tests, + * risk signals, review tasks, diagnostics, and optional snippets as data instead + * of terminal prose. Prefer this API over CLI summary output when composing + * deterministic model context or review file packs. + */ +export async function buildReviewReport(projectRoot: string, opts: ReviewOptions = {}): Promise { + const appliedOptions = applyReviewPresetOptions(opts); + const reviewReport = appliedOptions.report; + const reviewTimings = reviewReport?.timings; + const totalStart = performance.now(); + const { changedFiles, explicitFiles, diffHunksByFile, diffKindsByFile, diffChangesByFile } = + await collectReviewChanges(projectRoot, appliedOptions, reviewTimings); + + if (changedFiles.size === 0) { + const riskSummary = computeRiskSummary({ + filesChanged: 0, + symbolsChanged: 0, + exportedChanged: 0, + missingFiles: 0, + parseFailures: 0, + }); + const projectFiles = await discoverProjectFiles(projectRoot); + const report: ReviewReport = { + schemaVersion: REVIEW_SCHEMA_VERSION, + status: "no_changes", + projectFiles, + summary: { filesChanged: 0, symbolsChanged: 0, candidateTests: 0 }, + riskSummary, + reviewTasks: buildReviewTasks({ + filesChanged: 0, + symbolsChanged: 0, + exportedChanged: 0, + candidateTests: 0, + missingFiles: 0, + parseFailures: 0, + }), + changedFiles: [], + graphDelta: [], + candidateTests: [], + }; + if (appliedOptions.gitBase !== undefined) report.base = appliedOptions.gitBase; + if (appliedOptions.gitHead !== undefined) report.head = appliedOptions.gitHead; + if (reviewTimings) reviewTimings.totalMs = Math.round(performance.now() - totalStart); + return report; } - const projectFiles = index.projectFiles ?? (await discoverProjectFiles(projectRoot)); - const indexedFiles = Array.from(index.byFile.keys()); - const normalizedChangedFiles = new Set(changedFileList.map(normalizePath)); - const indexedFilesCoverMoreThanReviewSet = indexedFiles.some((file) => !normalizedChangedFiles.has(normalizePath(file))); - const sqlContextProjectFiles = - indexedFilesCoverMoreThanReviewSet && indexedFiles.some((file) => path.extname(file).toLowerCase() === ".sql") - ? indexedFiles - : undefined; - const sqlContext = await collectSqlReviewContext(projectRoot, { - changedFiles: changedFileList, - ...(sqlContextProjectFiles ? { projectFiles: sqlContextProjectFiles } : {}), + const changedFileList = Array.from(changedFiles).sort(comparePaths); + const diagnostics: ReviewDiagnostics = { + missingFiles: [], + symbolMappingParseFailures: [], + }; + const includeSymbolDetails = appliedOptions.includeSymbolDetails ?? false; + const diffContextLines = + typeof appliedOptions.diffContextLines === "number" && appliedOptions.diffContextLines >= 0 + ? appliedOptions.diffContextLines + : 2; + const maxCallsites = + typeof appliedOptions.maxCallsites === "number" && appliedOptions.maxCallsites >= 0 + ? appliedOptions.maxCallsites + : 5; + const referenceConcurrency = + typeof appliedOptions.referenceConcurrency === "number" && appliedOptions.referenceConcurrency > 0 + ? appliedOptions.referenceConcurrency + : 8; + const { index, existenceByFile, deletedFiles, deletedSnapshots } = await buildReviewIndex({ + projectRoot, + appliedOptions, + changedFileList, + diffKindsByFile, + diffChangesByFile, + includeSymbolDetails, + maxCallsites, + ...(reviewReport ? { reviewReport } : {}), + ...(reviewTimings ? { reviewTimings } : {}), }); - const report: ReviewReport = { - schemaVersion: REVIEW_SCHEMA_VERSION, - status: "ok", + const includeDiffContext = appliedOptions.includeDiffContext ?? (includeSymbolDetails && diffHunksByFile.size > 0); + + const { summaries, changedSymbolIds, exportedChangedCount, riskRelevantParseFailures } = await summarizeChangedFiles({ + projectRoot, + index, + changedFileList, + diffHunksByFile, + diffKindsByFile, + explicitFiles, + existenceByFile, + deletedSnapshots, + includeSymbolDetails, + includeDiffContext, + diffContextLines, + maxCallsites, + referenceConcurrency, + diagnostics, + ...(reviewTimings ? { reviewTimings } : {}), + }); + + const graphDelta = await collectReviewGraphDelta({ + projectRoot, + index, + changedFiles, + deletedFiles, + deletedSnapshots, + }); + + const candidateTests = await collectReviewCandidateTests({ + projectRoot, + index, + changedFileList, + changedSymbolIds, + deletedFiles, + appliedOptions, + ...(reviewTimings ? { reviewTimings } : {}), + }); + + const projectFiles = index.projectFiles ?? (await discoverProjectFiles(projectRoot)); + const sqlContext = await collectReviewSqlContext({ projectRoot, index, changedFileList }); + const report = assembleReviewReport({ + appliedOptions, projectFiles, - summary: { - filesChanged: summaries.length, - symbolsChanged: changedSymbolIds.length, - candidateTests: candidateTests.length, - }, - riskSummary: computeRiskSummary({ - filesChanged: summaries.length, - symbolsChanged: changedSymbolIds.length, - exportedChanged: exportedChangedCount, - missingFiles: diagnostics.missingFiles.length, - parseFailures: riskRelevantParseFailures, - }), - reviewTasks: buildReviewTasks({ - filesChanged: summaries.length, - symbolsChanged: changedSymbolIds.length, - exportedChanged: exportedChangedCount, - candidateTests: candidateTests.length, - missingFiles: diagnostics.missingFiles.length, - parseFailures: riskRelevantParseFailures, - }), - changedFiles: summaries, - graphDelta, + summaries, + changedSymbolIds, candidateTests, + graphDelta, ...(sqlContext ? { sqlContext } : {}), - ...(hasDiagnostics(diagnostics) ? { diagnostics } : {}), - }; - if (appliedOptions.gitBase !== undefined) report.base = appliedOptions.gitBase; - report.head = appliedOptions.gitHead ?? "HEAD"; + diagnostics, + riskRelevantParseFailures, + exportedChangedCount, + }); if (reviewTimings) reviewTimings.totalMs = Math.round(performance.now() - totalStart); return report; } diff --git a/src/sql/extractFacts.ts b/src/sql/extractFacts.ts index 2107649b..6cb4e928 100644 --- a/src/sql/extractFacts.ts +++ b/src/sql/extractFacts.ts @@ -1,7 +1,18 @@ import { normalizePath } from "../util/paths.js"; import { classifySqlFile } from "./classifySqlFile.js"; +import { + createSqlObjectNameRegExp, + maskSqlStringsAndComments, + normalizeSqlObjectName, + SQL_OBJECT_NAME_PATTERN, + splitTopLevelCommaSeparated, + sqlObjectBaseName, + sqlParenDepthAt, +} from "./lex.js"; import type { SqlFactKind, SqlFileRole, SqlStatementFact } from "./types.js"; +export { maskSqlStringsAndComments, normalizeSqlObjectName, sqlObjectBaseName } from "./lex.js"; + type SqlFactDraft = { kind: SqlFactKind; objectName: string | null; @@ -18,9 +29,7 @@ type SqlStatementSlice = { endIndex: number; }; -const IDENTIFIER_PART = String.raw`(?:"(?:""|[^"])+"|` + "`[^`]+`" + String.raw`|\[[^\]]+\]|[A-Za-z_][\w$]*)`; -const OBJECT_NAME = String.raw`${IDENTIFIER_PART}(?:\s*\.\s*${IDENTIFIER_PART}){0,2}`; -const OBJECT_NAME_RE = new RegExp(OBJECT_NAME, "iy"); +const OBJECT_NAME_RE = createSqlObjectNameRegExp("iy"); const SQL_KEYWORDS = new Set([ "select", "from", @@ -49,7 +58,11 @@ function lineStartsFor(source: string): number[] { return lineStarts; } -function positionAt(lineStarts: readonly number[], sourceLength: number, index: number): { line: number; column: number } { +function positionAt( + lineStarts: readonly number[], + sourceLength: number, + index: number, +): { line: number; column: number } { const boundedIndex = Math.max(0, Math.min(index, sourceLength)); let low = 0; let high = lineStarts.length - 1; @@ -266,104 +279,6 @@ function splitSqlStatements(source: string): SqlStatementSlice[] { return statements; } -export function maskSqlStringsAndComments(statement: string): string { - let out = ""; - let i = 0; - let singleQuoted = false; - let doubleQuoted = false; - let backtickQuoted = false; - let bracketQuoted = false; - let lineComment = false; - let blockComment = false; - let dollarQuote: string | null = null; - - while (i < statement.length) { - const char = statement[i] ?? ""; - const next = statement[i + 1] ?? ""; - - if (char === "\n") { - lineComment = false; - out += "\n"; - i += 1; - continue; - } - - if (lineComment || blockComment || dollarQuote || singleQuoted) { - if (blockComment && char === "*" && next === "/") { - blockComment = false; - out += " "; - i += 2; - continue; - } - if (dollarQuote && statement.startsWith(dollarQuote, i)) { - out += " ".repeat(dollarQuote.length); - i += dollarQuote.length; - dollarQuote = null; - continue; - } - if (singleQuoted && char === "'" && next === "'") { - out += " "; - i += 2; - continue; - } - if (singleQuoted && char === "'") { - singleQuoted = false; - } - out += char === "\n" ? "\n" : " "; - i += 1; - continue; - } - - if (doubleQuoted || backtickQuoted || bracketQuoted) { - out += char; - if (doubleQuoted && char === '"' && next === '"') { - out += next; - i += 2; - continue; - } - if (doubleQuoted && char === '"') doubleQuoted = false; - if (backtickQuoted && char === "`") backtickQuoted = false; - if (bracketQuoted && char === "]") bracketQuoted = false; - i += 1; - continue; - } - - if (char === "-" && next === "-") { - lineComment = true; - out += " "; - i += 2; - continue; - } - if (char === "/" && next === "*") { - blockComment = true; - out += " "; - i += 2; - continue; - } - if (char === "'") { - singleQuoted = true; - out += " "; - i += 1; - continue; - } - if (char === '"') doubleQuoted = true; - if (char === "`") backtickQuoted = true; - if (char === "[") bracketQuoted = true; - if (char === "$") { - const tagMatch = statement.slice(i).match(/^\$[A-Za-z_][\w$]*\$|^\$\$/); - if (tagMatch?.[0]) { - dollarQuote = tagMatch[0]; - out += " ".repeat(dollarQuote.length); - i += dollarQuote.length; - continue; - } - } - out += char; - i += 1; - } - return out; -} - const SQL_OBJECT_MODIFIERS = new Set(["lateral", "only"]); function skipWhitespace(text: string, index: number): number { @@ -394,44 +309,6 @@ function findObjectAfter(text: string, pattern: RegExp): string | null { return objectAt(text, match.index + match[0].length); } -function normalizeSqlIdentifierPart(raw: string): string { - const trimmed = raw.trim(); - if (trimmed.startsWith('"') && trimmed.endsWith('"')) { - return trimmed.slice(1, -1).replace(/""/g, '"'); - } - if (trimmed.startsWith("`") && trimmed.endsWith("`")) { - return trimmed.slice(1, -1); - } - if (trimmed.startsWith("[") && trimmed.endsWith("]")) { - return trimmed.slice(1, -1); - } - return trimmed; -} - -export function normalizeSqlObjectName(raw: string | undefined): string | null { - const trimmed = raw?.trim(); - if (!trimmed) return null; - const parts = trimmed.match(new RegExp(IDENTIFIER_PART, "g")) ?? []; - const normalizedParts = parts.map(normalizeSqlIdentifierPart).filter(Boolean); - if (!normalizedParts.length) return null; - return normalizedParts.join("."); -} - -export function sqlObjectBaseName(name: string): string { - const parts = name.split(".").filter(Boolean); - return parts.at(-1) ?? name; -} - -function parenDepthAt(text: string, index: number): number { - let depth = 0; - for (let cursor = 0; cursor < index; cursor += 1) { - const char = text[cursor]; - if (char === "(") depth += 1; - if (char === ")") depth = Math.max(0, depth - 1); - } - return depth; -} - function collectObjectsAfterKeywords( text: string, keywords: readonly string[], @@ -441,7 +318,7 @@ function collectObjectsAfterKeywords( const keywordPattern = keywords.map((keyword) => keyword.replace(/\s+/g, String.raw`\s+`)).join("|"); const re = new RegExp(String.raw`\b(?:${keywordPattern})\s+`, "gi"); for (const match of text.matchAll(re)) { - if (opts?.topLevelOnly && parenDepthAt(text, match.index ?? 0) > 0) continue; + if (opts?.topLevelOnly && sqlParenDepthAt(text, match.index ?? 0) > 0) continue; const name = objectAt(text, (match.index ?? 0) + match[0].length); if (name && !SQL_KEYWORDS.has(name.toLowerCase())) names.push(name); } @@ -456,29 +333,12 @@ function clauseEndIndex(text: string, start: number): number { return boundary < 0 ? text.length : start + boundary; } -function splitTopLevelCommaSeparated(text: string): string[] { - const parts: string[] = []; - let start = 0; - let depth = 0; - for (let index = 0; index < text.length; index += 1) { - const char = text[index]; - if (char === "(") depth += 1; - if (char === ")") depth = Math.max(0, depth - 1); - if (char === "," && depth === 0) { - parts.push(text.slice(start, index)); - start = index + 1; - } - } - parts.push(text.slice(start)); - return parts; -} - function collectCommaSeparatedObjectsAfterKeywords(text: string, keywords: readonly string[]): string[] { const names: string[] = []; const keywordPattern = keywords.map((keyword) => keyword.replace(/\s+/g, String.raw`\s+`)).join("|"); const re = new RegExp(String.raw`\b(?:${keywordPattern})\s+`, "gi"); for (const match of text.matchAll(re)) { - if (parenDepthAt(text, match.index ?? 0) > 0) continue; + if (sqlParenDepthAt(text, match.index ?? 0) > 0) continue; const start = (match.index ?? 0) + match[0].length; const clause = text.slice(start, clauseEndIndex(text, start)); for (const part of splitTopLevelCommaSeparated(clause)) { @@ -512,12 +372,12 @@ function collectCteReads(text: string): { names: Set; facts: SqlFactDraf const names = new Set(); const facts: SqlFactDraft[] = []; const ctePattern = new RegExp( - String.raw`(?:\bwith\s+(?:recursive\s+)?|,\s*)(${OBJECT_NAME})(?:\s*\([^)]*\))?\s+as\s*\(`, + String.raw`(?:\bwith\s+(?:recursive\s+)?|,\s*)(${SQL_OBJECT_NAME_PATTERN})(?:\s*\([^)]*\))?\s+as\s*\(`, "gi", ); for (const match of text.matchAll(ctePattern)) { - if (parenDepthAt(text, match.index ?? 0) > 0) continue; + if (sqlParenDepthAt(text, match.index ?? 0) > 0) continue; const name = normalizeSqlObjectName(match[1]); if (!name) continue; for (const key of cteNameKeys(name)) names.add(key); @@ -531,13 +391,22 @@ function collectCteReads(text: string): { names: Set; facts: SqlFactDraf } function createDefinitionFact(text: string): SqlFactDraft | null { - const tableName = findObjectAfter(text, /\bcreate\s+(?:(?:temporary|temp|unlogged|global\s+temporary|local\s+temporary)\s+)*table\s+(?:if\s+not\s+exists\s+)?/i); + const tableName = findObjectAfter( + text, + /\bcreate\s+(?:(?:temporary|temp|unlogged|global\s+temporary|local\s+temporary)\s+)*table\s+(?:if\s+not\s+exists\s+)?/i, + ); if (tableName) return { kind: "defines_table", objectName: tableName, relatedObjectName: null }; - const viewName = findObjectAfter(text, /\bcreate\s+(?:or\s+replace\s+)?(?:materialized\s+)?view\s+(?:if\s+not\s+exists\s+)?/i); + const viewName = findObjectAfter( + text, + /\bcreate\s+(?:or\s+replace\s+)?(?:materialized\s+)?view\s+(?:if\s+not\s+exists\s+)?/i, + ); if (viewName) return { kind: "defines_view", objectName: viewName, relatedObjectName: null }; - const indexName = findObjectAfter(text, /\bcreate\s+(?:unique\s+)?index\s+(?:concurrently\s+)?(?:if\s+not\s+exists\s+)?/i); + const indexName = findObjectAfter( + text, + /\bcreate\s+(?:unique\s+)?index\s+(?:concurrently\s+)?(?:if\s+not\s+exists\s+)?/i, + ); if (indexName) { return { kind: "defines_index", @@ -695,10 +564,9 @@ export function extractSqlFactsFromSource(filePath: string, source: string): Sql const facts: SqlStatementFact[] = []; for (const statement of splitSqlStatements(source)) { const drafts = extractStatementFactDrafts(statement.text); - const resolvedDrafts = - drafts.length - ? drafts - : [{ kind: "unknown_statement", objectName: null, relatedObjectName: null } satisfies SqlFactDraft]; + const resolvedDrafts = drafts.length + ? drafts + : [{ kind: "unknown_statement", objectName: null, relatedObjectName: null } satisfies SqlFactDraft]; for (const draft of resolvedDrafts) { facts.push(toFact(filePath, role, statement, draft, facts.length)); } diff --git a/src/sql/lex.ts b/src/sql/lex.ts new file mode 100644 index 00000000..123e9604 --- /dev/null +++ b/src/sql/lex.ts @@ -0,0 +1,161 @@ +export const SQL_IDENTIFIER_PART_PATTERN = + String.raw`(?:"(?:""|[^"])+"|` + "`[^`]+`" + String.raw`|\[[^\]]+\]|[A-Za-z_][\w$]*)`; + +export const SQL_OBJECT_NAME_PATTERN = String.raw`${SQL_IDENTIFIER_PART_PATTERN}(?:\s*\.\s*${SQL_IDENTIFIER_PART_PATTERN}){0,2}`; + +export function createSqlObjectNameRegExp(flags = "iy"): RegExp { + return new RegExp(SQL_OBJECT_NAME_PATTERN, flags); +} + +export function normalizeSqlIdentifierPart(raw: string): string { + const trimmed = raw.trim(); + if (trimmed.startsWith('"') && trimmed.endsWith('"')) { + return trimmed.slice(1, -1).replace(/""/g, '"'); + } + if (trimmed.startsWith("`") && trimmed.endsWith("`")) { + return trimmed.slice(1, -1); + } + if (trimmed.startsWith("[") && trimmed.endsWith("]")) { + return trimmed.slice(1, -1); + } + return trimmed; +} + +export function normalizeSqlObjectName(raw: string | undefined): string | null { + const trimmed = raw?.trim(); + if (!trimmed) return null; + const parts = trimmed.match(new RegExp(SQL_IDENTIFIER_PART_PATTERN, "g")) ?? []; + const normalizedParts = parts.map(normalizeSqlIdentifierPart).filter(Boolean); + if (!normalizedParts.length) return null; + return normalizedParts.join("."); +} + +export function sqlObjectBaseName(name: string): string { + const parts = name.split(".").filter(Boolean); + return parts.at(-1) ?? name; +} + +export function sqlParenDepthAt(text: string, index: number): number { + let depth = 0; + for (let cursor = 0; cursor < index; cursor += 1) { + const char = text[cursor]; + if (char === "(") depth += 1; + if (char === ")") depth = Math.max(0, depth - 1); + } + return depth; +} + +export function splitTopLevelCommaSeparated(text: string): string[] { + const parts: string[] = []; + let start = 0; + let depth = 0; + for (let index = 0; index < text.length; index += 1) { + const char = text[index]; + if (char === "(") depth += 1; + if (char === ")") depth = Math.max(0, depth - 1); + if (char === "," && depth === 0) { + parts.push(text.slice(start, index)); + start = index + 1; + } + } + parts.push(text.slice(start)); + return parts; +} + +export function maskSqlStringsAndComments(statement: string): string { + let out = ""; + let i = 0; + let singleQuoted = false; + let doubleQuoted = false; + let backtickQuoted = false; + let bracketQuoted = false; + let lineComment = false; + let blockComment = false; + let dollarQuote: string | null = null; + + while (i < statement.length) { + const char = statement[i] ?? ""; + const next = statement[i + 1] ?? ""; + + if (char === "\n") { + lineComment = false; + out += "\n"; + i += 1; + continue; + } + + if (lineComment || blockComment || dollarQuote || singleQuoted) { + if (blockComment && char === "*" && next === "/") { + blockComment = false; + out += " "; + i += 2; + continue; + } + if (dollarQuote && statement.startsWith(dollarQuote, i)) { + out += " ".repeat(dollarQuote.length); + i += dollarQuote.length; + dollarQuote = null; + continue; + } + if (singleQuoted && char === "'" && next === "'") { + out += " "; + i += 2; + continue; + } + if (singleQuoted && char === "'") { + singleQuoted = false; + } + out += char === "\n" ? "\n" : " "; + i += 1; + continue; + } + + if (doubleQuoted || backtickQuoted || bracketQuoted) { + out += char; + if (doubleQuoted && char === '"' && next === '"') { + out += next; + i += 2; + continue; + } + if (doubleQuoted && char === '"') doubleQuoted = false; + if (backtickQuoted && char === "`") backtickQuoted = false; + if (bracketQuoted && char === "]") bracketQuoted = false; + i += 1; + continue; + } + + if (char === "-" && next === "-") { + lineComment = true; + out += " "; + i += 2; + continue; + } + if (char === "/" && next === "*") { + blockComment = true; + out += " "; + i += 2; + continue; + } + if (char === "'") { + singleQuoted = true; + out += " "; + i += 1; + continue; + } + if (char === '"') doubleQuoted = true; + if (char === "`") backtickQuoted = true; + if (char === "[") bracketQuoted = true; + if (char === "$") { + const tagMatch = statement.slice(i).match(/^\$[A-Za-z_][\w$]*\$|^\$\$/); + if (tagMatch?.[0]) { + dollarQuote = tagMatch[0]; + out += " ".repeat(dollarQuote.length); + i += dollarQuote.length; + continue; + } + } + out += char; + i += 1; + } + return out; +} diff --git a/src/sql/navigation.ts b/src/sql/navigation.ts index f753fd3f..3ebf5a79 100644 --- a/src/sql/navigation.ts +++ b/src/sql/navigation.ts @@ -2,15 +2,25 @@ import fsp from "node:fs/promises"; import path from "node:path"; import { createNavigationProvenance, okGoToResult } from "../indexer/navigation-provenance.js"; -import type { FindReferencesResult, GoToRequest, GoToResult, ProjectIndex, Reference, SymbolDef } from "../indexer/types.js"; +import type { + FindReferencesResult, + GoToRequest, + GoToResult, + ProjectIndex, + Reference, + SymbolDef, +} from "../indexer/types.js"; import type { Range } from "../types.js"; import { normalizePath } from "../util/paths.js"; +import { extractSqlFactsFromSource } from "./extractFacts.js"; import { - extractSqlFactsFromSource, maskSqlStringsAndComments, normalizeSqlObjectName, + SQL_IDENTIFIER_PART_PATTERN, + splitTopLevelCommaSeparated, sqlObjectBaseName, -} from "./extractFacts.js"; + sqlParenDepthAt, +} from "./lex.js"; import type { SqlStatementFact } from "./types.js"; type SqlStatementNavigationSlice = { @@ -44,7 +54,9 @@ function rangeForToken(line: number, column: number): Range { } function sqlFiles(index: ProjectIndex): string[] { - return Array.from(index.byFile.keys()).filter(isSqlFile).sort((left, right) => left.localeCompare(right)); + return Array.from(index.byFile.keys()) + .filter(isSqlFile) + .sort((left, right) => left.localeCompare(right)); } function pushDefinition(lookup: Map, key: string, definition: SymbolDef): void { @@ -84,7 +96,10 @@ function preferredSqlDefinition(definitions: SymbolDef[], currentFile: string): return null; } -function sqlDefinitionMatches(lookup: SqlDefinitionLookup, objectName: string): { exact: SymbolDef[]; basename: SymbolDef[] } { +function sqlDefinitionMatches( + lookup: SqlDefinitionLookup, + objectName: string, +): { exact: SymbolDef[]; basename: SymbolDef[] } { const normalizedName = objectName.toLowerCase(); const basenameKey = sqlObjectBaseName(objectName).toLowerCase(); const exact = lookup.exact.get(normalizedName) ?? []; @@ -92,7 +107,7 @@ function sqlDefinitionMatches(lookup: SqlDefinitionLookup, objectName: string): return { exact, basename }; } -const SQL_IDENTIFIER = String.raw`(?:"(?:""|[^"])+"|` + "`[^`]+`" + String.raw`|\[[^\]]+\]|[A-Za-z_][A-Za-z0-9_$]*)`; +const SQL_IDENTIFIER = SQL_IDENTIFIER_PART_PATTERN; const SQL_DOTTED_TOKEN = String.raw`${SQL_IDENTIFIER}(?:\s*\.\s*${SQL_IDENTIFIER})*`; const SQL_DOTTED_TOKEN_RE = new RegExp(SQL_DOTTED_TOKEN, "g"); const SQL_SOURCE_KEYWORDS = new Set([ @@ -129,7 +144,10 @@ function wordAtPosition(source: string, line: number, column: number): string | } function sqlObjectNameParts(name: string): string[] { - return name.split(".").map((part) => part.trim()).filter(Boolean); + return name + .split(".") + .map((part) => part.trim()) + .filter(Boolean); } function sqlStatementSlices(facts: readonly SqlStatementFact[]): SqlStatementNavigationSlice[] { @@ -150,34 +168,9 @@ function sqlStatementSlices(facts: readonly SqlStatementFact[]): SqlStatementNav } function sqlStatementAtLine(facts: readonly SqlStatementFact[], line: number): SqlStatementNavigationSlice | null { - return sqlStatementSlices(facts).find((statement) => line >= statement.startLine && line <= statement.endLine) ?? null; -} - -function parenDepthAt(text: string, index: number): number { - let depth = 0; - for (let cursor = 0; cursor < index; cursor += 1) { - const char = text[cursor]; - if (char === "(") depth += 1; - if (char === ")") depth = Math.max(0, depth - 1); - } - return depth; -} - -function splitTopLevelCommaSeparated(text: string): string[] { - const parts: string[] = []; - let start = 0; - let depth = 0; - for (let index = 0; index < text.length; index += 1) { - const char = text[index]; - if (char === "(") depth += 1; - if (char === ")") depth = Math.max(0, depth - 1); - if (char === "," && depth === 0) { - parts.push(text.slice(start, index)); - start = index + 1; - } - } - parts.push(text.slice(start)); - return parts; + return ( + sqlStatementSlices(facts).find((statement) => line >= statement.startLine && line <= statement.endLine) ?? null + ); } function sourceClauseEndIndex(text: string, start: number): number { @@ -192,7 +185,7 @@ function cteNamesForStatement(text: string): Set { "gi", ); for (const match of text.matchAll(cteRe)) { - if (parenDepthAt(text, match.index ?? 0) > 0) continue; + if (sqlParenDepthAt(text, match.index ?? 0) > 0) continue; const name = normalizeSqlObjectName(match[1]); if (!name) continue; cteNames.add(name.toLowerCase()); @@ -238,7 +231,7 @@ function sqlAliasMapForStatement(statementText: string): Map { const cteNames = cteNamesForStatement(statementText); const clauseRe = /\b(?:from|using|join|inner\s+join|left\s+join|right\s+join|full\s+join|cross\s+join)\s+/gi; for (const match of statementText.matchAll(clauseRe)) { - if (parenDepthAt(statementText, match.index ?? 0) > 0) continue; + if (sqlParenDepthAt(statementText, match.index ?? 0) > 0) continue; const start = (match.index ?? 0) + match[0].length; const clause = statementText.slice(start, sourceClauseEndIndex(statementText, start)); for (const part of splitTopLevelCommaSeparated(clause)) { @@ -261,7 +254,11 @@ function unambiguousSqlPrefixDefinitionName(lookup: SqlDefinitionLookup, objectN return null; } -function resolveQualifiedSqlName(lookup: SqlDefinitionLookup, name: string, statementText: string | null): string | null { +function resolveQualifiedSqlName( + lookup: SqlDefinitionLookup, + name: string, + statementText: string | null, +): string | null { if (sqlDefinitionsFromLookup(lookup, name).length) return name; const parts = sqlObjectNameParts(name); if (parts.length < 2) return name; @@ -300,7 +297,11 @@ function matchesSqlDefinitionName(name: string, targetNames: ReadonlySet return targetNames.has(normalized) || targetNames.has(baseName); } -function prefixMatchesSqlDefinition(lookup: SqlDefinitionLookup, prefix: string, targetNames: ReadonlySet): boolean { +function prefixMatchesSqlDefinition( + lookup: SqlDefinitionLookup, + prefix: string, + targetNames: ReadonlySet, +): boolean { const matches = sqlDefinitionMatches(lookup, prefix); if (matches.exact.length) { return matches.exact.some((definition) => matchesSqlDefinitionName(definition.localName, targetNames)); diff --git a/src/sqlite.ts b/src/sqlite.ts index b1b1a732..1e961498 100644 --- a/src/sqlite.ts +++ b/src/sqlite.ts @@ -1,920 +1,8 @@ -import fs from "node:fs/promises"; -import path from "node:path"; -import type { Graph } from "./types.js"; -import type { SymbolGraph, SymbolNode } from "./graphs.js"; -import { parseGraphQuery } from "./query.js"; -import { isReadOnlySqliteError, SqliteDatabase, type SqliteStatement } from "./sqlite-driver.js"; - -export type SqliteGraphOptions = { - fileGraph: Graph; - symbolGraph: SymbolGraph; - outputPath: string; -}; - -export type SqliteGraphUpdateOptions = { - fileGraph: Graph; - symbolGraph: SymbolGraph; - outputPath: string; - changedFiles: string[]; - deletedFiles?: string[]; - /** - * When true, reconcile DB rows against the provided full graph for changed/deleted files. - * Use this with full project graphs for accurate incremental CI patching. - */ - fullGraphSync?: boolean; -}; - -export type GraphQueryResult = - | { - kind: "mostCalledMethods"; - results: Array<{ name: string; file: string; count: number }>; - } - | { kind: "dependencyChain"; results: string[] } - | { - kind: "controllersMostEndpoints"; - results: Array<{ name: string; file: string; count: number }>; - } - | { - kind: "classesImplementing"; - results: Array<{ name: string; file: string }>; - } - | { - kind: "affectedFunctionsForModule"; - results: Array<{ name: string; file: string }>; - } - | { - kind: "highestComplexityClasses"; - results: Array<{ name: string; file: string; complexity: number }>; - } - | { - kind: "highestComplexityFunctions"; - results: Array<{ name: string; file: string; complexity: number }>; - }; - -export type RawSqlResult = { - columns: string[]; - rows: Array>; - rowLimit?: number; - byteLimit?: number; - bytes?: number; - truncated?: boolean; -}; - -const SQLITE_SCHEMA_VERSION = 2; - -const toSqliteText = (value: unknown): string => { - if (typeof value === "string") return value; - if (typeof value === "number" || typeof value === "boolean") { - return String(value); - } - return ""; -}; - -const hasColumn = (db: SqliteDatabase, table: string, column: string): boolean => { - const rows = db.prepare(`PRAGMA table_info(${table});`).raw().all(); - for (const row of rows) { - if (!Array.isArray(row)) continue; - const name = row[1] ? String(row[1]) : ""; - if (name === column) return true; - } - return false; -}; - -const ensureSymbolsVisibilityColumn = (db: SqliteDatabase) => { - if (hasColumn(db, "symbols", "visibility")) return; - db.exec("ALTER TABLE symbols ADD COLUMN visibility TEXT;"); -}; - -const ensureSchema = (db: SqliteDatabase) => { - db.pragma("journal_mode = WAL"); - db.pragma("synchronous = NORMAL"); - db.pragma("temp_store = MEMORY"); - db.pragma("foreign_keys = ON"); - - db.exec(` - CREATE TABLE IF NOT EXISTS files ( - path TEXT PRIMARY KEY, - is_external INTEGER NOT NULL DEFAULT 0 - ); - CREATE TABLE IF NOT EXISTS symbols ( - id TEXT PRIMARY KEY, - file TEXT NOT NULL, - name TEXT NOT NULL, - kind TEXT, - docstring TEXT, - line_span INTEGER, - complexity INTEGER, - visibility TEXT, - FOREIGN KEY(file) REFERENCES files(path) - ); - CREATE TABLE IF NOT EXISTS file_edges ( - from_path TEXT NOT NULL, - to_path TEXT NOT NULL, - to_type TEXT NOT NULL, - raw TEXT, - type_only INTEGER, - FOREIGN KEY(from_path) REFERENCES files(path), - FOREIGN KEY(to_path) REFERENCES files(path) - ); - CREATE TABLE IF NOT EXISTS symbol_edges ( - from_id TEXT NOT NULL, - to_id TEXT NOT NULL, - label TEXT, - FOREIGN KEY(from_id) REFERENCES symbols(id), - FOREIGN KEY(to_id) REFERENCES symbols(id) - ); - CREATE TABLE IF NOT EXISTS graph_metadata ( - key TEXT PRIMARY KEY, - value TEXT NOT NULL - ); - CREATE TABLE IF NOT EXISTS graph_snapshots ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - created_at INTEGER NOT NULL, - mode TEXT NOT NULL, - changed_files INTEGER NOT NULL, - deleted_files INTEGER NOT NULL, - file_nodes INTEGER NOT NULL, - file_edges INTEGER NOT NULL, - symbol_nodes INTEGER NOT NULL, - symbol_edges INTEGER NOT NULL - ); - CREATE TABLE IF NOT EXISTS graph_snapshot_files ( - snapshot_id INTEGER NOT NULL, - file_path TEXT NOT NULL, - change_kind TEXT NOT NULL, - FOREIGN KEY(snapshot_id) REFERENCES graph_snapshots(id) - ); - `); - - ensureSymbolsVisibilityColumn(db); - db.prepare("INSERT OR REPLACE INTO graph_metadata (key, value) VALUES (?, ?);").run([ - "schema_version", - String(SQLITE_SCHEMA_VERSION), - ]); - - const indexSpecs: Array<{ name: string; sql: string }> = [ - { - name: "idx_files_external", - sql: "CREATE INDEX idx_files_external ON files(is_external);", - }, - { - name: "idx_symbols_file", - sql: "CREATE INDEX idx_symbols_file ON symbols(file);", - }, - { - name: "idx_symbols_name", - sql: "CREATE INDEX idx_symbols_name ON symbols(name);", - }, - { - name: "idx_symbols_lower_name", - sql: "CREATE INDEX idx_symbols_lower_name ON symbols(lower(name));", - }, - { - name: "idx_symbols_kind", - sql: "CREATE INDEX idx_symbols_kind ON symbols(kind);", - }, - { - name: "idx_symbols_name_kind", - sql: "CREATE INDEX idx_symbols_name_kind ON symbols(name, kind);", - }, - { - name: "idx_symbols_file_kind", - sql: "CREATE INDEX idx_symbols_file_kind ON symbols(file, kind);", - }, - { - name: "idx_symbols_kind_file", - sql: "CREATE INDEX idx_symbols_kind_file ON symbols(kind, file);", - }, - { - name: "idx_symbols_kind_id", - sql: "CREATE INDEX idx_symbols_kind_id ON symbols(kind, id);", - }, - { - name: "idx_symbols_kind_complexity", - sql: "CREATE INDEX idx_symbols_kind_complexity ON symbols(kind, complexity DESC);", - }, - { - name: "idx_file_edges_from", - sql: "CREATE INDEX idx_file_edges_from ON file_edges(from_path);", - }, - { - name: "idx_file_edges_to", - sql: "CREATE INDEX idx_file_edges_to ON file_edges(to_path);", - }, - { - name: "idx_file_edges_type", - sql: "CREATE INDEX idx_file_edges_type ON file_edges(to_type);", - }, - { - name: "idx_file_edges_from_file", - sql: "CREATE INDEX idx_file_edges_from_file ON file_edges(from_path, to_path) WHERE to_type = 'file';", - }, - { - name: "idx_file_edges_to_file", - sql: "CREATE INDEX idx_file_edges_to_file ON file_edges(to_path, from_path) WHERE to_type = 'file';", - }, - { - name: "idx_symbol_edges_from", - sql: "CREATE INDEX idx_symbol_edges_from ON symbol_edges(from_id);", - }, - { - name: "idx_symbol_edges_to", - sql: "CREATE INDEX idx_symbol_edges_to ON symbol_edges(to_id);", - }, - { - name: "idx_symbol_edges_label", - sql: "CREATE INDEX idx_symbol_edges_label ON symbol_edges(label);", - }, - { - name: "idx_symbol_edges_label_to", - sql: "CREATE INDEX idx_symbol_edges_label_to ON symbol_edges(label, to_id);", - }, - { - name: "idx_symbol_edges_label_from", - sql: "CREATE INDEX idx_symbol_edges_label_from ON symbol_edges(label, from_id);", - }, - { - name: "idx_symbol_edges_label_from_to", - sql: "CREATE INDEX idx_symbol_edges_label_from_to ON symbol_edges(label, from_id, to_id);", - }, - { - name: "idx_graph_snapshots_created_at", - sql: "CREATE INDEX idx_graph_snapshots_created_at ON graph_snapshots(created_at DESC);", - }, - { - name: "idx_graph_snapshot_files_snapshot", - sql: "CREATE INDEX idx_graph_snapshot_files_snapshot ON graph_snapshot_files(snapshot_id);", - }, - { - name: "idx_graph_snapshot_files_path", - sql: "CREATE INDEX idx_graph_snapshot_files_path ON graph_snapshot_files(file_path);", - }, - ]; - - const indexRows = db - .prepare("SELECT name FROM sqlite_master WHERE type = 'index' AND name NOT LIKE 'sqlite_%';") - .raw() - .all() as Array>; - const existingIndexes = new Set(); - for (const row of indexRows) { - if (!Array.isArray(row)) continue; - const name = toSqliteText(row[0]); - if (name) existingIndexes.add(name); - } - - let createdIndex = false; - for (const spec of indexSpecs) { - if (existingIndexes.has(spec.name)) continue; - db.exec(spec.sql); - createdIndex = true; - } - - if (createdIndex) { - db.exec("ANALYZE;"); - } -}; - -const execRows = (db: SqliteDatabase, sql: string): Array> => { - const rows = db.prepare(sql).raw().all(); - const normalized: Array> = []; - for (const row of rows) { - if (!Array.isArray(row)) { - throw new Error("Expected sqlite raw() results to be row arrays."); - } - normalized.push(row); - } - return normalized; -}; - -const execRowsParams = ( - db: SqliteDatabase, - sql: string, - params: Array, -): Array> => { - const rows = db.prepare(sql).raw().all(params); - const normalized: Array> = []; - for (const row of rows) { - if (!Array.isArray(row)) { - throw new Error("Expected sqlite raw() results to be row arrays."); - } - normalized.push(row); - } - return normalized; -}; - -const loadDirectFileDependencies = (db: SqliteDatabase, fromPath: string): string[] => - execRowsParams( - db, - ` - SELECT to_path - FROM file_edges - WHERE to_type = ? AND from_path = ? - ORDER BY rowid; - `, - ["file", fromPath], - ) - .map((row) => toSqliteText(row[0])) - .filter(Boolean); - -const loadDirectFileDependents = (db: SqliteDatabase, toPath: string): string[] => - execRowsParams( - db, - ` - SELECT from_path - FROM file_edges - WHERE to_type = ? AND to_path = ? - ORDER BY rowid; - `, - ["file", toPath], - ) - .map((row) => toSqliteText(row[0])) - .filter(Boolean); - -const bfsFileTraversal = (start: string, loadNeighbors: (file: string) => string[]): string[] => { - const visited = new Set(); - const queue: string[] = [start]; - let head = 0; - visited.add(start); - const result: string[] = []; - while (head < queue.length) { - const current = queue[head]; - head += 1; - if (!current) continue; - for (const next of loadNeighbors(current)) { - if (visited.has(next)) continue; - visited.add(next); - result.push(next); - queue.push(next); - } - } - return result; -}; - -const collectSymbolIdsForFiles = (symbolGraph: SymbolGraph, changedSet: Set): Set => { - const ids = new Set(); - for (const [id, node] of symbolGraph.nodes.entries()) { - if (changedSet.has(node.file)) ids.add(id); - } - return ids; -}; - -const symbolGraphEdgesForFiles = (symbolGraph: SymbolGraph, changedSet: Set) => { - const edgeList = []; - for (const edge of symbolGraph.edges) { - const fromNode = symbolGraph.nodes.get(edge.from); - const toNode = symbolGraph.nodes.get(edge.to); - if (!fromNode || !toNode) continue; - if (changedSet.has(fromNode.file) || changedSet.has(toNode.file)) { - edgeList.push(edge); - } - } - return edgeList; -}; - -const fileGraphEdgesForFiles = (fileGraph: Graph, changedSet: Set) => - fileGraph.edges.filter((edge) => changedSet.has(edge.from)); - -const symbolGraphEdgesForSymbolIds = (symbolGraph: SymbolGraph, symbolIds: Set) => { - const edgeList = []; - for (const edge of symbolGraph.edges) { - if (symbolIds.has(edge.from) || symbolIds.has(edge.to)) { - edgeList.push(edge); - } - } - return edgeList; -}; - -const insertFiles = (db: SqliteDatabase, files: Array<{ path: string; isExternal: boolean }>) => { - const stmt = db.prepare("INSERT OR REPLACE INTO files (path, is_external) VALUES (?, ?);"); - for (const file of files) { - stmt.run([file.path, file.isExternal ? 1 : 0]); - } -}; - -const dedupeFileEntries = ( - entries: Array<{ path: string; isExternal: boolean }>, -): Array<{ path: string; isExternal: boolean }> => { - const unique = new Map(); - for (const entry of entries) { - const existing = unique.get(entry.path); - if (existing === undefined) { - unique.set(entry.path, entry.isExternal); - } else if (entry.isExternal) { - unique.set(entry.path, true); - } - } - return [...unique.entries()].map(([path, isExternal]) => ({ - path, - isExternal, - })); -}; - -const insertSymbols = (db: SqliteDatabase, nodes: SymbolNode[]) => { - const stmt = db.prepare( - "INSERT OR REPLACE INTO symbols (id, file, name, kind, docstring, line_span, complexity, visibility) VALUES (?, ?, ?, ?, ?, ?, ?, ?);", - ); - for (const node of nodes) { - stmt.run([ - node.id, - node.file, - node.name, - node.kind, - node.docstring ?? null, - node.lineSpan ?? null, - node.complexity ?? null, - node.visibility ?? null, - ]); - } -}; - -const insertFileEdges = (db: SqliteDatabase, edges: Graph["edges"]) => { - const stmt = db.prepare( - "INSERT INTO file_edges (from_path, to_path, to_type, raw, type_only) VALUES (?, ?, ?, ?, ?);", - ); - for (const edge of edges) { - const toPath = edge.to.type === "file" ? edge.to.path : edge.to.name; - stmt.run([edge.from, toPath, edge.to.type, edge.raw, edge.typeOnly ? 1 : 0]); - } -}; - -const insertSymbolEdges = (db: SqliteDatabase, edges: SymbolGraph["edges"]) => { - const stmt = db.prepare("INSERT INTO symbol_edges (from_id, to_id, label) VALUES (?, ?, ?);"); - for (const edge of edges) { - stmt.run([edge.from, edge.to, edge.label ?? null]); - } -}; - -const clearCurrentGraphState = (db: SqliteDatabase) => { - db.exec(` - DELETE FROM symbol_edges; - DELETE FROM file_edges; - DELETE FROM symbols; - DELETE FROM files; - `); -}; - -const readSymbolIdsForFiles = (db: SqliteDatabase, files: string[]): string[] => { - if (!files.length) return []; - const placeholders = files.map(() => "?").join(", "); - const sql = `SELECT id FROM symbols WHERE file IN (${placeholders});`; - const values = execRowsParams(db, sql, files); - return values - .map((row) => { - const id = toSqliteText(row[0]); - return id || null; - }) - .filter((id): id is string => !!id); -}; - -const deleteBySymbolIds = (db: SqliteDatabase, ids: string[]) => { - if (!ids.length) return; - const placeholders = ids.map(() => "?").join(", "); - db.prepare(`DELETE FROM symbol_edges WHERE from_id IN (${placeholders});`).run(ids); - db.prepare(`DELETE FROM symbol_edges WHERE to_id IN (${placeholders});`).run(ids); - db.prepare(`DELETE FROM symbols WHERE id IN (${placeholders});`).run(ids); -}; - -const deleteFileEdgesForFiles = (db: SqliteDatabase, files: string[]) => { - if (!files.length) return; - const placeholders = files.map(() => "?").join(", "); - db.prepare(`DELETE FROM file_edges WHERE from_path IN (${placeholders});`).run(files); -}; - -const deleteFileEdgesToFiles = (db: SqliteDatabase, files: string[]) => { - if (!files.length) return; - const placeholders = files.map(() => "?").join(", "); - db.prepare(`DELETE FROM file_edges WHERE to_type = 'file' AND to_path IN (${placeholders});`).run(files); -}; - -const deleteFilesByPath = (db: SqliteDatabase, files: string[]) => { - if (!files.length) return; - const placeholders = files.map(() => "?").join(", "); - db.prepare(`DELETE FROM files WHERE path IN (${placeholders});`).run(files); -}; - -const recordGraphSnapshot = ( - db: SqliteDatabase, - options: { - mode: "full" | "incremental"; - changedFiles: string[]; - deletedFiles: string[]; - fileNodes: number; - fileEdges: number; - symbolNodes: number; - symbolEdges: number; - }, -) => { - const snapshotStmt = db.prepare(`INSERT INTO graph_snapshots ( - created_at, - mode, - changed_files, - deleted_files, - file_nodes, - file_edges, - symbol_nodes, - symbol_edges - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?);`); - const result = snapshotStmt.run([ - Date.now(), - options.mode, - options.changedFiles.length, - options.deletedFiles.length, - options.fileNodes, - options.fileEdges, - options.symbolNodes, - options.symbolEdges, - ]); - const snapshotId = Number(result.lastInsertRowid); - const fileRows = [ - ...options.changedFiles.map((file) => ({ - file, - kind: "changed" as const, - })), - ...options.deletedFiles.map((file) => ({ - file, - kind: "deleted" as const, - })), - ]; - if (!fileRows.length) return; - const fileStmt = db.prepare( - "INSERT INTO graph_snapshot_files (snapshot_id, file_path, change_kind) VALUES (?, ?, ?);", - ); - for (const row of fileRows) { - fileStmt.run([snapshotId, row.file, row.kind]); - } -}; - -const readOrCreateDb = async (outputPath: string, options?: { readonly?: boolean }) => { - const readonly = options?.readonly ?? false; - const dir = path.dirname(outputPath); - if (dir && !readonly) { - await fs.mkdir(dir, { recursive: true }); - } - const db = new SqliteDatabase(outputPath, { - readonly, - }); - return { db }; -}; - -async function withSqliteDatabase( - outputPath: string, - callback: (db: SqliteDatabase) => T | Promise, -): Promise { - const { db } = await readOrCreateDb(outputPath); - try { - ensureSchema(db); - return await callback(db); - } finally { - db.close(); - } -} - -async function withReadOnlySqliteDatabase( - outputPath: string, - callback: (db: SqliteDatabase) => T | Promise, -): Promise { - const { db } = await readOrCreateDb(outputPath, { readonly: true }); - try { - return await callback(db); - } finally { - db.close(); - } -} - -function assertReadOnlyQueryStatement(stmt: SqliteStatement): void { - if (stmt.columns().length) return; - throw new Error("Raw SQLite queries must be read-only result-producing statements such as SELECT or PRAGMA."); -} - -const deleteUnreferencedExternalFiles = (db: SqliteDatabase) => { - db.exec(` - DELETE FROM files - WHERE is_external = 1 - AND path NOT IN ( - SELECT DISTINCT to_path - FROM file_edges - WHERE to_type = 'external' - ); - `); -}; - -const dedupePreservingOrder = (values: string[]): string[] => { - const seen = new Set(); - const deduped: string[] = []; - for (const value of values) { - if (seen.has(value)) continue; - seen.add(value); - deduped.push(value); - } - return deduped; -}; - -export async function writeGraphSqlite(options: SqliteGraphOptions): Promise { - await withSqliteDatabase(options.outputPath, (db) => { - const runInsert = db.transaction(() => { - clearCurrentGraphState(db); - const fileEntries: Array<{ path: string; isExternal: boolean }> = []; - for (const file of options.fileGraph.nodes) { - fileEntries.push({ path: file, isExternal: false }); - } - for (const edge of options.fileGraph.edges) { - if (edge.to.type === "external") { - fileEntries.push({ path: edge.to.name, isExternal: true }); - } else { - fileEntries.push({ path: edge.to.path, isExternal: false }); - } - } - insertFiles(db, dedupeFileEntries(fileEntries)); - insertFileEdges(db, options.fileGraph.edges); - insertSymbols(db, [...options.symbolGraph.nodes.values()]); - insertSymbolEdges(db, options.symbolGraph.edges); - recordGraphSnapshot(db, { - mode: "full", - changedFiles: [], - deletedFiles: [], - fileNodes: options.fileGraph.nodes.size, - fileEdges: options.fileGraph.edges.length, - symbolNodes: options.symbolGraph.nodes.size, - symbolEdges: options.symbolGraph.edges.length, - }); - }); - runInsert(); - db.exec("ANALYZE;"); - }); -} - -export async function updateGraphSqlite(options: SqliteGraphUpdateOptions): Promise { - await withSqliteDatabase(options.outputPath, (db) => { - const runUpdate = db.transaction(() => { - const changedSet = new Set(options.changedFiles); - const deletedSet = new Set(options.deletedFiles ?? []); - const touchedSet = new Set([...changedSet, ...deletedSet]); - const touchedFiles = [...touchedSet]; - - const removedSymbolIds = readSymbolIdsForFiles(db, touchedFiles); - deleteBySymbolIds(db, removedSymbolIds); - deleteFileEdgesForFiles(db, touchedFiles); - deleteFileEdgesToFiles(db, [...deletedSet]); - deleteFilesByPath(db, [...deletedSet]); - - const fileEntries: Array<{ path: string; isExternal: boolean }> = []; - for (const file of changedSet) { - fileEntries.push({ path: file, isExternal: false }); - } - - for (const edge of options.fileGraph.edges) { - if (!changedSet.has(edge.from)) continue; - if (edge.to.type === "external") { - fileEntries.push({ path: edge.to.name, isExternal: true }); - } else { - fileEntries.push({ path: edge.to.path, isExternal: false }); - } - } - - if (fileEntries.length) { - insertFiles(db, dedupeFileEntries(fileEntries)); - } - - const changedSymbolIds = collectSymbolIdsForFiles(options.symbolGraph, changedSet); - const changedSymbolNodes = [...changedSymbolIds] - .map((id) => options.symbolGraph.nodes.get(id)) - .filter((node): node is SymbolNode => !!node); - if (changedSymbolNodes.length) { - insertSymbols(db, changedSymbolNodes); - } - - const fileEdges = fileGraphEdgesForFiles(options.fileGraph, changedSet); - if (fileEdges.length) { - insertFileEdges(db, fileEdges); - } - - const symbolEdges = options.fullGraphSync - ? symbolGraphEdgesForSymbolIds(options.symbolGraph, changedSymbolIds) - : symbolGraphEdgesForFiles(options.symbolGraph, changedSet); - if (symbolEdges.length) { - insertSymbolEdges(db, symbolEdges); - } - - deleteUnreferencedExternalFiles(db); - - recordGraphSnapshot(db, { - mode: "incremental", - changedFiles: [...changedSet], - deletedFiles: [...deletedSet], - fileNodes: changedSet.size, - fileEdges: fileEdges.length, - symbolNodes: changedSymbolNodes.length, - symbolEdges: symbolEdges.length, - }); - }); - runUpdate(); - db.exec("ANALYZE;"); - }); -} - -export async function queryGraphSqlite(outputPath: string, queryText: string): Promise { - const parsed = parseGraphQuery(queryText); - if (!parsed) { - throw new Error("Unsupported query text."); - } - return await withReadOnlySqliteDatabase(outputPath, (db) => { - switch (parsed.kind) { - case "mostCalledMethods": { - const rows = execRowsParams( - db, - ` - SELECT s.name, s.file, COUNT(*) as cnt - FROM symbol_edges e - JOIN symbols s ON s.id = e.to_id - WHERE e.label = ? AND s.kind = ? - GROUP BY s.id - ORDER BY cnt DESC - LIMIT ?; - `, - ["calls", "function", parsed.limit], - ); - return { - kind: parsed.kind, - results: rows.map((row) => ({ - name: String(row[0]), - file: String(row[1]), - count: Number(row[2]), - })), - }; - } - case "dependencyChain": { - const rows = execRowsParams(db, `SELECT file FROM symbols WHERE name = ? AND kind = ? ORDER BY file;`, [ - parsed.className, - "class", - ]); - const startFiles = rows.map((row) => toSqliteText(row[0])).filter(Boolean); - if (!startFiles.length) { - return { kind: parsed.kind, results: [] }; - } - const chain = dedupePreservingOrder( - startFiles.flatMap((startFile) => bfsFileTraversal(startFile, (file) => loadDirectFileDependencies(db, file))), - ); - return { kind: parsed.kind, results: chain }; - } - case "controllersMostEndpoints": { - const rows = execRowsParams( - db, - ` - SELECT c.name, c.file, COUNT(f.id) as cnt - FROM symbols c - LEFT JOIN symbols f - ON f.file = c.file - AND f.kind = ? - AND ( - lower(f.name) LIKE ? OR - lower(f.name) LIKE ? OR - lower(f.name) LIKE ? OR - lower(f.name) LIKE ? OR - lower(f.name) LIKE ? - ) - WHERE c.kind = ? AND c.name LIKE ? - GROUP BY c.id - ORDER BY cnt DESC - LIMIT ?; - `, - ["function", "get%", "post%", "put%", "delete%", "patch%", "class", "%Controller", parsed.limit], - ); - return { - kind: parsed.kind, - results: rows.map((row) => ({ - name: String(row[0]), - file: String(row[1]), - count: Number(row[2]), - })), - }; - } - case "classesImplementing": { - const rows = execRowsParams( - db, - ` - SELECT DISTINCT s.name, s.file - FROM symbol_edges e - JOIN symbols s ON s.id = e.from_id - JOIN symbols t ON t.id = e.to_id - WHERE e.label = ? AND t.name = ?; - `, - ["implements", parsed.interfaceName], - ); - return { - kind: parsed.kind, - results: rows.map((row) => ({ - name: String(row[0]), - file: String(row[1]), - })), - }; - } - case "affectedFunctionsForModule": { - const reverseDeps = bfsFileTraversal(parsed.modulePath, (file) => loadDirectFileDependents(db, file)); - const impactedFiles = [parsed.modulePath, ...reverseDeps]; - if (!impactedFiles.length) { - return { kind: parsed.kind, results: [] }; - } - const placeholders = impactedFiles.map(() => "?").join(", "); - const rows = execRowsParams( - db, - ` - SELECT name, file - FROM symbols - WHERE kind = ? - AND file IN (${placeholders}); - `, - ["function", ...impactedFiles], - ); - return { - kind: parsed.kind, - results: rows.map((row) => ({ - name: String(row[0]), - file: String(row[1]), - })), - }; - } - case "highestComplexityClasses": { - const rows = execRowsParams( - db, - ` - SELECT name, file, COALESCE(complexity, 0) as score - FROM symbols - WHERE kind = ? - ORDER BY score DESC - LIMIT ?; - `, - ["class", parsed.limit], - ); - return { - kind: parsed.kind, - results: rows.map((row) => ({ - name: String(row[0]), - file: String(row[1]), - complexity: Number(row[2]), - })), - }; - } - case "highestComplexityFunctions": { - const rows = execRowsParams( - db, - ` - SELECT name, file, COALESCE(complexity, 0) as score - FROM symbols - WHERE kind = ? - ORDER BY score DESC - LIMIT ?; - `, - ["function", parsed.limit], - ); - return { - kind: parsed.kind, - results: rows.map((row) => ({ - name: String(row[0]), - file: String(row[1]), - complexity: Number(row[2]), - })), - }; - } - } - }); -} - -export async function queryGraphSqliteRaw( - outputPath: string, - sql: string, - params: Array = [], - options?: { maxRows?: number | undefined }, -): Promise { - return await withReadOnlySqliteDatabase(outputPath, (db) => { - try { - const stmt = db.prepare(sql); - assertReadOnlyQueryStatement(stmt); - const columns = stmt.columns().map((col) => col.name); - const rowLimit = options?.maxRows; - if (rowLimit !== undefined) { - const rows: Array> = []; - let truncated = false; - for (const row of stmt.raw().iterate(params) as Iterable>) { - if (rows.length >= rowLimit) { - truncated = true; - break; - } - rows.push(row); - } - return { - columns, - rows, - rowLimit, - truncated, - }; - } - const rows = stmt.raw().all(params) as Array>; - return { columns, rows }; - } catch (error) { - if (isReadOnlySqliteError(error)) { - throw new Error("Raw SQLite queries must be read-only result-producing statements such as SELECT or PRAGMA."); - } - throw error; - } - }); -} +export type { + GraphQueryResult, + RawSqlResult, + SqliteGraphOptions, + SqliteGraphUpdateOptions, +} from "./sqlite/types.js"; +export { writeGraphSqlite, updateGraphSqlite } from "./sqlite/write.js"; +export { queryGraphSqlite, queryGraphSqliteRaw } from "./sqlite/query.js"; diff --git a/src/sqlite/canned-query.ts b/src/sqlite/canned-query.ts new file mode 100644 index 00000000..a8a6f3d7 --- /dev/null +++ b/src/sqlite/canned-query.ts @@ -0,0 +1,220 @@ +import { parseGraphQuery } from "../query.js"; +import type { SqliteDatabase } from "../sqlite-driver.js"; +import type { GraphQueryResult } from "./types.js"; +import { dedupePreservingOrder, execRowsParams, toSqliteText } from "./common.js"; +import { withReadOnlySqliteDatabase } from "./database.js"; + +const loadDirectFileDependencies = (db: SqliteDatabase, fromPath: string): string[] => + execRowsParams( + db, + ` + SELECT to_path + FROM file_edges + WHERE to_type = ? AND from_path = ? + ORDER BY rowid; + `, + ["file", fromPath], + ) + .map((row) => toSqliteText(row[0])) + .filter(Boolean); + +const loadDirectFileDependents = (db: SqliteDatabase, toPath: string): string[] => + execRowsParams( + db, + ` + SELECT from_path + FROM file_edges + WHERE to_type = ? AND to_path = ? + ORDER BY rowid; + `, + ["file", toPath], + ) + .map((row) => toSqliteText(row[0])) + .filter(Boolean); + +const bfsFileTraversal = (start: string, loadNeighbors: (file: string) => string[]): string[] => { + const visited = new Set(); + const queue: string[] = [start]; + let head = 0; + visited.add(start); + const result: string[] = []; + while (head < queue.length) { + const current = queue[head]; + head += 1; + if (!current) continue; + for (const next of loadNeighbors(current)) { + if (visited.has(next)) continue; + visited.add(next); + result.push(next); + queue.push(next); + } + } + return result; +}; + +export async function queryGraphSqlite(outputPath: string, queryText: string): Promise { + const parsed = parseGraphQuery(queryText); + if (!parsed) { + throw new Error("Unsupported query text."); + } + return await withReadOnlySqliteDatabase(outputPath, (db) => { + switch (parsed.kind) { + case "mostCalledMethods": { + const rows = execRowsParams( + db, + ` + SELECT s.name, s.file, COUNT(*) as cnt + FROM symbol_edges e + JOIN symbols s ON s.id = e.to_id + WHERE e.label = ? AND s.kind = ? + GROUP BY s.id + ORDER BY cnt DESC + LIMIT ?; + `, + ["calls", "function", parsed.limit], + ); + return { + kind: parsed.kind, + results: rows.map((row) => ({ + name: String(row[0]), + file: String(row[1]), + count: Number(row[2]), + })), + }; + } + case "dependencyChain": { + const rows = execRowsParams(db, `SELECT file FROM symbols WHERE name = ? AND kind = ? ORDER BY file;`, [ + parsed.className, + "class", + ]); + const startFiles = rows.map((row) => toSqliteText(row[0])).filter(Boolean); + if (!startFiles.length) { + return { kind: parsed.kind, results: [] }; + } + const chain = dedupePreservingOrder( + startFiles.flatMap((startFile) => bfsFileTraversal(startFile, (file) => loadDirectFileDependencies(db, file))), + ); + return { kind: parsed.kind, results: chain }; + } + case "controllersMostEndpoints": { + const rows = execRowsParams( + db, + ` + SELECT c.name, c.file, COUNT(f.id) as cnt + FROM symbols c + LEFT JOIN symbols f + ON f.file = c.file + AND f.kind = ? + AND ( + lower(f.name) LIKE ? OR + lower(f.name) LIKE ? OR + lower(f.name) LIKE ? OR + lower(f.name) LIKE ? OR + lower(f.name) LIKE ? + ) + WHERE c.kind = ? AND c.name LIKE ? + GROUP BY c.id + ORDER BY cnt DESC + LIMIT ?; + `, + ["function", "get%", "post%", "put%", "delete%", "patch%", "class", "%Controller", parsed.limit], + ); + return { + kind: parsed.kind, + results: rows.map((row) => ({ + name: String(row[0]), + file: String(row[1]), + count: Number(row[2]), + })), + }; + } + case "classesImplementing": { + const rows = execRowsParams( + db, + ` + SELECT DISTINCT s.name, s.file + FROM symbol_edges e + JOIN symbols s ON s.id = e.from_id + JOIN symbols t ON t.id = e.to_id + WHERE e.label = ? AND t.name = ?; + `, + ["implements", parsed.interfaceName], + ); + return { + kind: parsed.kind, + results: rows.map((row) => ({ + name: String(row[0]), + file: String(row[1]), + })), + }; + } + case "affectedFunctionsForModule": { + const reverseDeps = bfsFileTraversal(parsed.modulePath, (file) => loadDirectFileDependents(db, file)); + const impactedFiles = [parsed.modulePath, ...reverseDeps]; + if (!impactedFiles.length) { + return { kind: parsed.kind, results: [] }; + } + const placeholders = impactedFiles.map(() => "?").join(", "); + const rows = execRowsParams( + db, + ` + SELECT name, file + FROM symbols + WHERE kind = ? + AND file IN (${placeholders}); + `, + ["function", ...impactedFiles], + ); + return { + kind: parsed.kind, + results: rows.map((row) => ({ + name: String(row[0]), + file: String(row[1]), + })), + }; + } + case "highestComplexityClasses": { + const rows = execRowsParams( + db, + ` + SELECT name, file, COALESCE(complexity, 0) as score + FROM symbols + WHERE kind = ? + ORDER BY score DESC + LIMIT ?; + `, + ["class", parsed.limit], + ); + return { + kind: parsed.kind, + results: rows.map((row) => ({ + name: String(row[0]), + file: String(row[1]), + complexity: Number(row[2]), + })), + }; + } + case "highestComplexityFunctions": { + const rows = execRowsParams( + db, + ` + SELECT name, file, COALESCE(complexity, 0) as score + FROM symbols + WHERE kind = ? + ORDER BY score DESC + LIMIT ?; + `, + ["function", parsed.limit], + ); + return { + kind: parsed.kind, + results: rows.map((row) => ({ + name: String(row[0]), + file: String(row[1]), + complexity: Number(row[2]), + })), + }; + } + } + }); +} diff --git a/src/sqlite/common.ts b/src/sqlite/common.ts new file mode 100644 index 00000000..c30461b1 --- /dev/null +++ b/src/sqlite/common.ts @@ -0,0 +1,48 @@ +import type { SqliteDatabase } from "../sqlite-driver.js"; + +export const toSqliteText = (value: unknown): string => { + if (typeof value === "string") return value; + if (typeof value === "number" || typeof value === "boolean") { + return String(value); + } + return ""; +}; + +export const execRows = (db: SqliteDatabase, sql: string): Array> => { + const rows = db.prepare(sql).raw().all(); + const normalized: Array> = []; + for (const row of rows) { + if (!Array.isArray(row)) { + throw new Error("Expected sqlite raw() results to be row arrays."); + } + normalized.push(row); + } + return normalized; +}; + +export const execRowsParams = ( + db: SqliteDatabase, + sql: string, + params: Array, +): Array> => { + const rows = db.prepare(sql).raw().all(params); + const normalized: Array> = []; + for (const row of rows) { + if (!Array.isArray(row)) { + throw new Error("Expected sqlite raw() results to be row arrays."); + } + normalized.push(row); + } + return normalized; +}; + +export const dedupePreservingOrder = (values: string[]): string[] => { + const seen = new Set(); + const deduped: string[] = []; + for (const value of values) { + if (seen.has(value)) continue; + seen.add(value); + deduped.push(value); + } + return deduped; +}; diff --git a/src/sqlite/database.ts b/src/sqlite/database.ts new file mode 100644 index 00000000..faa24951 --- /dev/null +++ b/src/sqlite/database.ts @@ -0,0 +1,46 @@ +import fs from "node:fs/promises"; +import path from "node:path"; +import { SqliteDatabase, type SqliteStatement } from "../sqlite-driver.js"; +import { ensureSchema } from "./schema.js"; + +const readOrCreateDb = async (outputPath: string, options?: { readonly?: boolean }) => { + const readonly = options?.readonly ?? false; + const dir = path.dirname(outputPath); + if (dir && !readonly) { + await fs.mkdir(dir, { recursive: true }); + } + const db = new SqliteDatabase(outputPath, { + readonly, + }); + return { db }; +}; + +export async function withSqliteDatabase( + outputPath: string, + callback: (db: SqliteDatabase) => T | Promise, +): Promise { + const { db } = await readOrCreateDb(outputPath); + try { + ensureSchema(db); + return await callback(db); + } finally { + db.close(); + } +} + +export async function withReadOnlySqliteDatabase( + outputPath: string, + callback: (db: SqliteDatabase) => T | Promise, +): Promise { + const { db } = await readOrCreateDb(outputPath, { readonly: true }); + try { + return await callback(db); + } finally { + db.close(); + } +} + +export function assertReadOnlyQueryStatement(stmt: SqliteStatement): void { + if (stmt.columns().length) return; + throw new Error("Raw SQLite queries must be read-only result-producing statements such as SELECT or PRAGMA."); +} diff --git a/src/sqlite/query.ts b/src/sqlite/query.ts new file mode 100644 index 00000000..157c3622 --- /dev/null +++ b/src/sqlite/query.ts @@ -0,0 +1,45 @@ +import { isReadOnlySqliteError } from "../sqlite-driver.js"; +import type { RawSqlResult } from "./types.js"; +import { assertReadOnlyQueryStatement, withReadOnlySqliteDatabase } from "./database.js"; + +export { queryGraphSqlite } from "./canned-query.js"; + +export async function queryGraphSqliteRaw( + outputPath: string, + sql: string, + params: Array = [], + options?: { maxRows?: number | undefined }, +): Promise { + return await withReadOnlySqliteDatabase(outputPath, (db) => { + try { + const stmt = db.prepare(sql); + assertReadOnlyQueryStatement(stmt); + const columns = stmt.columns().map((col) => col.name); + const rowLimit = options?.maxRows; + if (rowLimit !== undefined) { + const rows: Array> = []; + let truncated = false; + for (const row of stmt.raw().iterate(params) as Iterable>) { + if (rows.length >= rowLimit) { + truncated = true; + break; + } + rows.push(row); + } + return { + columns, + rows, + rowLimit, + truncated, + }; + } + const rows = stmt.raw().all(params) as Array>; + return { columns, rows }; + } catch (error) { + if (isReadOnlySqliteError(error)) { + throw new Error("Raw SQLite queries must be read-only result-producing statements such as SELECT or PRAGMA."); + } + throw error; + } + }); +} diff --git a/src/sqlite/schema.ts b/src/sqlite/schema.ts new file mode 100644 index 00000000..b337f7e5 --- /dev/null +++ b/src/sqlite/schema.ts @@ -0,0 +1,208 @@ +import type { SqliteDatabase } from "../sqlite-driver.js"; +import { toSqliteText } from "./common.js"; + +export const SQLITE_SCHEMA_VERSION = 2; + +const hasColumn = (db: SqliteDatabase, table: string, column: string): boolean => { + const rows = db.prepare(`PRAGMA table_info(${table});`).raw().all(); + for (const row of rows) { + if (!Array.isArray(row)) continue; + const name = row[1] ? String(row[1]) : ""; + if (name === column) return true; + } + return false; +}; + +const ensureSymbolsVisibilityColumn = (db: SqliteDatabase) => { + if (hasColumn(db, "symbols", "visibility")) return; + db.exec("ALTER TABLE symbols ADD COLUMN visibility TEXT;"); +}; + +export const ensureSchema = (db: SqliteDatabase) => { + db.pragma("journal_mode = WAL"); + db.pragma("synchronous = NORMAL"); + db.pragma("temp_store = MEMORY"); + db.pragma("foreign_keys = ON"); + + db.exec(` + CREATE TABLE IF NOT EXISTS files ( + path TEXT PRIMARY KEY, + is_external INTEGER NOT NULL DEFAULT 0 + ); + CREATE TABLE IF NOT EXISTS symbols ( + id TEXT PRIMARY KEY, + file TEXT NOT NULL, + name TEXT NOT NULL, + kind TEXT, + docstring TEXT, + line_span INTEGER, + complexity INTEGER, + visibility TEXT, + FOREIGN KEY(file) REFERENCES files(path) + ); + CREATE TABLE IF NOT EXISTS file_edges ( + from_path TEXT NOT NULL, + to_path TEXT NOT NULL, + to_type TEXT NOT NULL, + raw TEXT, + type_only INTEGER, + FOREIGN KEY(from_path) REFERENCES files(path), + FOREIGN KEY(to_path) REFERENCES files(path) + ); + CREATE TABLE IF NOT EXISTS symbol_edges ( + from_id TEXT NOT NULL, + to_id TEXT NOT NULL, + label TEXT, + FOREIGN KEY(from_id) REFERENCES symbols(id), + FOREIGN KEY(to_id) REFERENCES symbols(id) + ); + CREATE TABLE IF NOT EXISTS graph_metadata ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL + ); + CREATE TABLE IF NOT EXISTS graph_snapshots ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + created_at INTEGER NOT NULL, + mode TEXT NOT NULL, + changed_files INTEGER NOT NULL, + deleted_files INTEGER NOT NULL, + file_nodes INTEGER NOT NULL, + file_edges INTEGER NOT NULL, + symbol_nodes INTEGER NOT NULL, + symbol_edges INTEGER NOT NULL + ); + CREATE TABLE IF NOT EXISTS graph_snapshot_files ( + snapshot_id INTEGER NOT NULL, + file_path TEXT NOT NULL, + change_kind TEXT NOT NULL, + FOREIGN KEY(snapshot_id) REFERENCES graph_snapshots(id) + ); + `); + + ensureSymbolsVisibilityColumn(db); + db.prepare("INSERT OR REPLACE INTO graph_metadata (key, value) VALUES (?, ?);").run([ + "schema_version", + String(SQLITE_SCHEMA_VERSION), + ]); + + const indexSpecs: Array<{ name: string; sql: string }> = [ + { + name: "idx_files_external", + sql: "CREATE INDEX idx_files_external ON files(is_external);", + }, + { + name: "idx_symbols_file", + sql: "CREATE INDEX idx_symbols_file ON symbols(file);", + }, + { + name: "idx_symbols_name", + sql: "CREATE INDEX idx_symbols_name ON symbols(name);", + }, + { + name: "idx_symbols_lower_name", + sql: "CREATE INDEX idx_symbols_lower_name ON symbols(lower(name));", + }, + { + name: "idx_symbols_kind", + sql: "CREATE INDEX idx_symbols_kind ON symbols(kind);", + }, + { + name: "idx_symbols_name_kind", + sql: "CREATE INDEX idx_symbols_name_kind ON symbols(name, kind);", + }, + { + name: "idx_symbols_file_kind", + sql: "CREATE INDEX idx_symbols_file_kind ON symbols(file, kind);", + }, + { + name: "idx_symbols_kind_file", + sql: "CREATE INDEX idx_symbols_kind_file ON symbols(kind, file);", + }, + { + name: "idx_symbols_kind_id", + sql: "CREATE INDEX idx_symbols_kind_id ON symbols(kind, id);", + }, + { + name: "idx_symbols_kind_complexity", + sql: "CREATE INDEX idx_symbols_kind_complexity ON symbols(kind, complexity DESC);", + }, + { + name: "idx_file_edges_from", + sql: "CREATE INDEX idx_file_edges_from ON file_edges(from_path);", + }, + { + name: "idx_file_edges_to", + sql: "CREATE INDEX idx_file_edges_to ON file_edges(to_path);", + }, + { + name: "idx_file_edges_type", + sql: "CREATE INDEX idx_file_edges_type ON file_edges(to_type);", + }, + { + name: "idx_file_edges_from_file", + sql: "CREATE INDEX idx_file_edges_from_file ON file_edges(from_path, to_path) WHERE to_type = 'file';", + }, + { + name: "idx_file_edges_to_file", + sql: "CREATE INDEX idx_file_edges_to_file ON file_edges(to_path, from_path) WHERE to_type = 'file';", + }, + { + name: "idx_symbol_edges_from", + sql: "CREATE INDEX idx_symbol_edges_from ON symbol_edges(from_id);", + }, + { + name: "idx_symbol_edges_to", + sql: "CREATE INDEX idx_symbol_edges_to ON symbol_edges(to_id);", + }, + { + name: "idx_symbol_edges_label", + sql: "CREATE INDEX idx_symbol_edges_label ON symbol_edges(label);", + }, + { + name: "idx_symbol_edges_label_to", + sql: "CREATE INDEX idx_symbol_edges_label_to ON symbol_edges(label, to_id);", + }, + { + name: "idx_symbol_edges_label_from", + sql: "CREATE INDEX idx_symbol_edges_label_from ON symbol_edges(label, from_id);", + }, + { + name: "idx_symbol_edges_label_from_to", + sql: "CREATE INDEX idx_symbol_edges_label_from_to ON symbol_edges(label, from_id, to_id);", + }, + { + name: "idx_graph_snapshots_created_at", + sql: "CREATE INDEX idx_graph_snapshots_created_at ON graph_snapshots(created_at DESC);", + }, + { + name: "idx_graph_snapshot_files_snapshot", + sql: "CREATE INDEX idx_graph_snapshot_files_snapshot ON graph_snapshot_files(snapshot_id);", + }, + { + name: "idx_graph_snapshot_files_path", + sql: "CREATE INDEX idx_graph_snapshot_files_path ON graph_snapshot_files(file_path);", + }, + ]; + + const indexRows = db + .prepare("SELECT name FROM sqlite_master WHERE type = 'index' AND name NOT LIKE 'sqlite_%';") + .raw() + .all() as Array>; + const existingIndexes = new Set(); + for (const row of indexRows) { + if (!Array.isArray(row)) continue; + const name = toSqliteText(row[0]); + if (name) existingIndexes.add(name); + } + + let createdIndex = false; + for (const spec of indexSpecs) { + if (existingIndexes.has(spec.name)) continue; + db.exec(spec.sql); + createdIndex = true; + } + + if (createdIndex) { + db.exec("ANALYZE;"); + } +}; diff --git a/src/sqlite/types.ts b/src/sqlite/types.ts new file mode 100644 index 00000000..f62741ce --- /dev/null +++ b/src/sqlite/types.ts @@ -0,0 +1,57 @@ +import type { Graph } from "../types.js"; +import type { SymbolGraph } from "../graphs.js"; + +export type SqliteGraphOptions = { + fileGraph: Graph; + symbolGraph: SymbolGraph; + outputPath: string; +}; + +export type SqliteGraphUpdateOptions = { + fileGraph: Graph; + symbolGraph: SymbolGraph; + outputPath: string; + changedFiles: string[]; + deletedFiles?: string[]; + /** + * When true, reconcile DB rows against the provided full graph for changed/deleted files. + * Use this with full project graphs for accurate incremental CI patching. + */ + fullGraphSync?: boolean; +}; + +export type GraphQueryResult = + | { + kind: "mostCalledMethods"; + results: Array<{ name: string; file: string; count: number }>; + } + | { kind: "dependencyChain"; results: string[] } + | { + kind: "controllersMostEndpoints"; + results: Array<{ name: string; file: string; count: number }>; + } + | { + kind: "classesImplementing"; + results: Array<{ name: string; file: string }>; + } + | { + kind: "affectedFunctionsForModule"; + results: Array<{ name: string; file: string }>; + } + | { + kind: "highestComplexityClasses"; + results: Array<{ name: string; file: string; complexity: number }>; + } + | { + kind: "highestComplexityFunctions"; + results: Array<{ name: string; file: string; complexity: number }>; + }; + +export type RawSqlResult = { + columns: string[]; + rows: Array>; + rowLimit?: number; + byteLimit?: number; + bytes?: number; + truncated?: boolean; +}; diff --git a/src/sqlite/write.ts b/src/sqlite/write.ts new file mode 100644 index 00000000..3387b146 --- /dev/null +++ b/src/sqlite/write.ts @@ -0,0 +1,315 @@ +import type { SymbolGraph, SymbolNode } from "../graphs.js"; +import type { Graph } from "../types.js"; +import type { SqliteDatabase } from "../sqlite-driver.js"; +import type { SqliteGraphOptions, SqliteGraphUpdateOptions } from "./types.js"; +import { execRowsParams } from "./common.js"; +import { withSqliteDatabase } from "./database.js"; + +const collectSymbolIdsForFiles = (symbolGraph: SymbolGraph, changedSet: Set): Set => { + const ids = new Set(); + for (const [id, node] of symbolGraph.nodes.entries()) { + if (changedSet.has(node.file)) ids.add(id); + } + return ids; +}; + +const symbolGraphEdgesForFiles = (symbolGraph: SymbolGraph, changedSet: Set) => { + const edgeList = []; + for (const edge of symbolGraph.edges) { + const fromNode = symbolGraph.nodes.get(edge.from); + const toNode = symbolGraph.nodes.get(edge.to); + if (!fromNode || !toNode) continue; + if (changedSet.has(fromNode.file) || changedSet.has(toNode.file)) { + edgeList.push(edge); + } + } + return edgeList; +}; + +const fileGraphEdgesForFiles = (fileGraph: Graph, changedSet: Set) => + fileGraph.edges.filter((edge) => changedSet.has(edge.from)); + +const symbolGraphEdgesForSymbolIds = (symbolGraph: SymbolGraph, symbolIds: Set) => { + const edgeList = []; + for (const edge of symbolGraph.edges) { + if (symbolIds.has(edge.from) || symbolIds.has(edge.to)) { + edgeList.push(edge); + } + } + return edgeList; +}; + +const insertFiles = (db: SqliteDatabase, files: Array<{ path: string; isExternal: boolean }>) => { + const stmt = db.prepare("INSERT OR REPLACE INTO files (path, is_external) VALUES (?, ?);"); + for (const file of files) { + stmt.run([file.path, file.isExternal ? 1 : 0]); + } +}; + +const dedupeFileEntries = ( + entries: Array<{ path: string; isExternal: boolean }>, +): Array<{ path: string; isExternal: boolean }> => { + const unique = new Map(); + for (const entry of entries) { + const existing = unique.get(entry.path); + if (existing === undefined) { + unique.set(entry.path, entry.isExternal); + } else if (entry.isExternal) { + unique.set(entry.path, true); + } + } + return [...unique.entries()].map(([path, isExternal]) => ({ + path, + isExternal, + })); +}; + +const insertSymbols = (db: SqliteDatabase, nodes: SymbolNode[]) => { + const stmt = db.prepare( + "INSERT OR REPLACE INTO symbols (id, file, name, kind, docstring, line_span, complexity, visibility) VALUES (?, ?, ?, ?, ?, ?, ?, ?);", + ); + for (const node of nodes) { + stmt.run([ + node.id, + node.file, + node.name, + node.kind, + node.docstring ?? null, + node.lineSpan ?? null, + node.complexity ?? null, + node.visibility ?? null, + ]); + } +}; + +const insertFileEdges = (db: SqliteDatabase, edges: Graph["edges"]) => { + const stmt = db.prepare( + "INSERT INTO file_edges (from_path, to_path, to_type, raw, type_only) VALUES (?, ?, ?, ?, ?);", + ); + for (const edge of edges) { + const toPath = edge.to.type === "file" ? edge.to.path : edge.to.name; + stmt.run([edge.from, toPath, edge.to.type, edge.raw, edge.typeOnly ? 1 : 0]); + } +}; + +const insertSymbolEdges = (db: SqliteDatabase, edges: SymbolGraph["edges"]) => { + const stmt = db.prepare("INSERT INTO symbol_edges (from_id, to_id, label) VALUES (?, ?, ?);"); + for (const edge of edges) { + stmt.run([edge.from, edge.to, edge.label ?? null]); + } +}; + +const clearCurrentGraphState = (db: SqliteDatabase) => { + db.exec(` + DELETE FROM symbol_edges; + DELETE FROM file_edges; + DELETE FROM symbols; + DELETE FROM files; + `); +}; + +const readSymbolIdsForFiles = (db: SqliteDatabase, files: string[]): string[] => { + if (!files.length) return []; + const placeholders = files.map(() => "?").join(", "); + const sql = `SELECT id FROM symbols WHERE file IN (${placeholders});`; + const values = execRowsParams(db, sql, files); + return values + .map((row) => { + const id = typeof row[0] === "string" ? row[0] : ""; + return id || null; + }) + .filter((id): id is string => !!id); +}; + +const deleteBySymbolIds = (db: SqliteDatabase, ids: string[]) => { + if (!ids.length) return; + const placeholders = ids.map(() => "?").join(", "); + db.prepare(`DELETE FROM symbol_edges WHERE from_id IN (${placeholders});`).run(ids); + db.prepare(`DELETE FROM symbol_edges WHERE to_id IN (${placeholders});`).run(ids); + db.prepare(`DELETE FROM symbols WHERE id IN (${placeholders});`).run(ids); +}; + +const deleteFileEdgesForFiles = (db: SqliteDatabase, files: string[]) => { + if (!files.length) return; + const placeholders = files.map(() => "?").join(", "); + db.prepare(`DELETE FROM file_edges WHERE from_path IN (${placeholders});`).run(files); +}; + +const deleteFileEdgesToFiles = (db: SqliteDatabase, files: string[]) => { + if (!files.length) return; + const placeholders = files.map(() => "?").join(", "); + db.prepare(`DELETE FROM file_edges WHERE to_type = 'file' AND to_path IN (${placeholders});`).run(files); +}; + +const deleteFilesByPath = (db: SqliteDatabase, files: string[]) => { + if (!files.length) return; + const placeholders = files.map(() => "?").join(", "); + db.prepare(`DELETE FROM files WHERE path IN (${placeholders});`).run(files); +}; + +const recordGraphSnapshot = ( + db: SqliteDatabase, + options: { + mode: "full" | "incremental"; + changedFiles: string[]; + deletedFiles: string[]; + fileNodes: number; + fileEdges: number; + symbolNodes: number; + symbolEdges: number; + }, +) => { + const snapshotStmt = db.prepare(`INSERT INTO graph_snapshots ( + created_at, + mode, + changed_files, + deleted_files, + file_nodes, + file_edges, + symbol_nodes, + symbol_edges + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?);`); + const result = snapshotStmt.run([ + Date.now(), + options.mode, + options.changedFiles.length, + options.deletedFiles.length, + options.fileNodes, + options.fileEdges, + options.symbolNodes, + options.symbolEdges, + ]); + const snapshotId = Number(result.lastInsertRowid); + const fileRows = [ + ...options.changedFiles.map((file) => ({ + file, + kind: "changed" as const, + })), + ...options.deletedFiles.map((file) => ({ + file, + kind: "deleted" as const, + })), + ]; + if (!fileRows.length) return; + const fileStmt = db.prepare( + "INSERT INTO graph_snapshot_files (snapshot_id, file_path, change_kind) VALUES (?, ?, ?);", + ); + for (const row of fileRows) { + fileStmt.run([snapshotId, row.file, row.kind]); + } +}; + +const deleteUnreferencedExternalFiles = (db: SqliteDatabase) => { + db.exec(` + DELETE FROM files + WHERE is_external = 1 + AND path NOT IN ( + SELECT DISTINCT to_path + FROM file_edges + WHERE to_type = 'external' + ); + `); +}; + +export async function writeGraphSqlite(options: SqliteGraphOptions): Promise { + await withSqliteDatabase(options.outputPath, (db) => { + const runInsert = db.transaction(() => { + clearCurrentGraphState(db); + const fileEntries: Array<{ path: string; isExternal: boolean }> = []; + for (const file of options.fileGraph.nodes) { + fileEntries.push({ path: file, isExternal: false }); + } + for (const edge of options.fileGraph.edges) { + if (edge.to.type === "external") { + fileEntries.push({ path: edge.to.name, isExternal: true }); + } else { + fileEntries.push({ path: edge.to.path, isExternal: false }); + } + } + insertFiles(db, dedupeFileEntries(fileEntries)); + insertFileEdges(db, options.fileGraph.edges); + insertSymbols(db, [...options.symbolGraph.nodes.values()]); + insertSymbolEdges(db, options.symbolGraph.edges); + recordGraphSnapshot(db, { + mode: "full", + changedFiles: [], + deletedFiles: [], + fileNodes: options.fileGraph.nodes.size, + fileEdges: options.fileGraph.edges.length, + symbolNodes: options.symbolGraph.nodes.size, + symbolEdges: options.symbolGraph.edges.length, + }); + }); + runInsert(); + db.exec("ANALYZE;"); + }); +} + +export async function updateGraphSqlite(options: SqliteGraphUpdateOptions): Promise { + await withSqliteDatabase(options.outputPath, (db) => { + const runUpdate = db.transaction(() => { + const changedSet = new Set(options.changedFiles); + const deletedSet = new Set(options.deletedFiles ?? []); + const touchedSet = new Set([...changedSet, ...deletedSet]); + const touchedFiles = [...touchedSet]; + + const removedSymbolIds = readSymbolIdsForFiles(db, touchedFiles); + deleteBySymbolIds(db, removedSymbolIds); + deleteFileEdgesForFiles(db, touchedFiles); + deleteFileEdgesToFiles(db, [...deletedSet]); + deleteFilesByPath(db, [...deletedSet]); + + const fileEntries: Array<{ path: string; isExternal: boolean }> = []; + for (const file of changedSet) { + fileEntries.push({ path: file, isExternal: false }); + } + + for (const edge of options.fileGraph.edges) { + if (!changedSet.has(edge.from)) continue; + if (edge.to.type === "external") { + fileEntries.push({ path: edge.to.name, isExternal: true }); + } else { + fileEntries.push({ path: edge.to.path, isExternal: false }); + } + } + + if (fileEntries.length) { + insertFiles(db, dedupeFileEntries(fileEntries)); + } + + const changedSymbolIds = collectSymbolIdsForFiles(options.symbolGraph, changedSet); + const changedSymbolNodes = [...changedSymbolIds] + .map((id) => options.symbolGraph.nodes.get(id)) + .filter((node): node is SymbolNode => !!node); + if (changedSymbolNodes.length) { + insertSymbols(db, changedSymbolNodes); + } + + const fileEdges = fileGraphEdgesForFiles(options.fileGraph, changedSet); + if (fileEdges.length) { + insertFileEdges(db, fileEdges); + } + + const symbolEdges = options.fullGraphSync + ? symbolGraphEdgesForSymbolIds(options.symbolGraph, changedSymbolIds) + : symbolGraphEdgesForFiles(options.symbolGraph, changedSet); + if (symbolEdges.length) { + insertSymbolEdges(db, symbolEdges); + } + + deleteUnreferencedExternalFiles(db); + + recordGraphSnapshot(db, { + mode: "incremental", + changedFiles: [...changedSet], + deletedFiles: [...deletedSet], + fileNodes: changedSet.size, + fileEdges: fileEdges.length, + symbolNodes: changedSymbolNodes.length, + symbolEdges: symbolEdges.length, + }); + }); + runUpdate(); + db.exec("ANALYZE;"); + }); +} diff --git a/src/util/memberAccess.ts b/src/util/memberAccess.ts new file mode 100644 index 00000000..3dbdbb65 --- /dev/null +++ b/src/util/memberAccess.ts @@ -0,0 +1,195 @@ +import type { LanguageSupport } from "../languages.js"; +import type { SyntaxNodeLike } from "../languages/types.js"; +import { sliceText, unquote } from "../util.js"; + +export type MemberAccessParts = { + object: SyntaxNodeLike | null; + property: SyntaxNodeLike | null; +}; + +export type MemberAccessChain = { + base: SyntaxNodeLike; + names: string[]; +}; + +export function memberExpressionTypeFor(sup: LanguageSupport): string { + if (sup.nodeTypes.memberExpression) return sup.nodeTypes.memberExpression; + if (sup.id === "python") return "attribute"; + if (sup.id === "ruby") return "call"; + return "member_expression"; +} + +export function memberPropertyIdentifierTypes(sup: LanguageSupport): string[] { + return [...(sup.nodeTypes.propertyIdentifier ?? ["property_identifier"])]; +} + +export function memberReferencePropertyIdentifierTypes(sup: LanguageSupport): string[] { + return [...memberPropertyIdentifierTypes(sup), "field_identifier", "type_identifier", "identifier", "constant"]; +} + +export function memberAccessTraversalTypes(sup: LanguageSupport): Set { + const types = new Set([ + memberExpressionTypeFor(sup), + "optional_member_expression", + "subscript_expression", + "optional_chain", + ]); + if (sup.id === "go") types.add("qualified_type"); + if (sup.id === "python") types.add("attribute"); + if (sup.id === "kotlin" || sup.id === "swift") types.add("navigation_expression"); + return types; +} + +export function isMemberAccessNode(sup: LanguageSupport, node: SyntaxNodeLike): boolean { + const memberExpressionType = memberExpressionTypeFor(sup); + return ( + node.type === memberExpressionType || + (sup.id === "go" && node.type === "qualified_type") || + node.type === "member_access_expression" || + node.type === "qualified_name" || + node.type === "field_access" || + node.type === "method_invocation" || + node.type === "scoped_identifier" || + node.type === "scoped_type_identifier" || + node.type === "call" || + node.type === "scope_resolution" || + node.type === "field_expression" || + node.type === "attribute" || + node.type === "navigation_expression" + ); +} + +export function isMemberObjectIdentifier(nodeType: string): boolean { + return ( + nodeType === "identifier" || + nodeType === "type_identifier" || + nodeType === "package_identifier" || + nodeType === "constant" || + nodeType === "namespace_identifier" + ); +} + +export function isMemberReferencePropertyIdentifier(sup: LanguageSupport, nodeType: string): boolean { + return memberReferencePropertyIdentifierTypes(sup).includes(nodeType); +} + +export function getNavigationExpressionProperty(expr: SyntaxNodeLike): SyntaxNodeLike | null { + const suffix = expr.namedChildren.find((child) => child.type === "navigation_suffix") ?? expr.child(1); + if (!suffix) return null; + return ( + suffix.childForFieldName("suffix") ?? suffix.childForFieldName("name") ?? suffix.namedChildren[0] ?? suffix.child(0) + ); +} + +export function getMemberAccessParts(sup: LanguageSupport, memberNode: SyntaxNodeLike): MemberAccessParts { + if (sup.id === "python") { + return { + object: memberNode.childForFieldName("object") ?? memberNode.child(0), + property: memberNode.childForFieldName("attribute") ?? memberNode.child(2), + }; + } + if (sup.id === "csharp") { + return { + object: memberNode.child(0), + property: memberNode.child(2), + }; + } + if (sup.id === "java") { + if (memberNode.type === "method_invocation") { + return { + object: memberNode.childForFieldName("object") ?? memberNode.child(0), + property: memberNode.childForFieldName("name") ?? memberNode.child(2), + }; + } + if (memberNode.type === "scoped_identifier" || memberNode.type === "scoped_type_identifier") { + return { + object: memberNode.childForFieldName("scope") ?? memberNode.child(0), + property: memberNode.childForFieldName("name") ?? memberNode.child(2), + }; + } + } + if (sup.id === "ruby") { + if (memberNode.type === "scope_resolution") { + return { + object: memberNode.childForFieldName("scope") ?? memberNode.child(0), + property: memberNode.childForFieldName("name") ?? memberNode.child(2), + }; + } + return { + object: memberNode.childForFieldName("receiver") ?? memberNode.child(0), + property: memberNode.childForFieldName("method") ?? memberNode.child(2), + }; + } + if (sup.id === "rust" && memberNode.type === "scoped_identifier") { + return { + object: memberNode.childForFieldName("path") ?? memberNode.child(0), + property: memberNode.childForFieldName("name") ?? memberNode.child(2), + }; + } + if (sup.id === "go" && memberNode.type === "qualified_type") { + return { + object: memberNode.namedChildren[0] ?? memberNode.child(0), + property: memberNode.namedChildren[1] ?? memberNode.child(1), + }; + } + if ((sup.id === "kotlin" || sup.id === "swift") && memberNode.type === "navigation_expression") { + return { + object: memberNode.namedChildren[0] ?? memberNode.child(0), + property: getNavigationExpressionProperty(memberNode), + }; + } + return { + object: memberNode.childForFieldName("object") ?? memberNode.child(0), + property: + memberNode.childForFieldName("property") ?? memberNode.childForFieldName("attribute") ?? memberNode.child(2), + }; +} + +export function collectMemberAccessChain(args: { + sup: LanguageSupport; + source: string; + chainNode: SyntaxNodeLike; + constStringOf?: Map; +}): MemberAccessChain | null { + const names: string[] = []; + let current: SyntaxNodeLike | null = args.chainNode; + let base: SyntaxNodeLike | null = null; + const traversalTypes = memberAccessTraversalTypes(args.sup); + const propertyTypes = memberPropertyIdentifierTypes(args.sup); + + const pushPropertyName = (propNode: SyntaxNodeLike | null): void => { + if (!propNode) return; + if (propertyTypes.includes(propNode.type)) { + names.push(sliceText(propNode, args.source)); + return; + } + if (propNode.type === "string") { + names.push(unquote(sliceText(propNode, args.source))); + return; + } + if (propNode.type !== "identifier") return; + const keyName = sliceText(propNode, args.source); + const value = args.constStringOf?.get(keyName); + if (typeof value === "string") names.push(value); + }; + + while (current && traversalTypes.has(current.type)) { + if (current.type === "subscript_expression") { + base = current.child(0) ?? base; + pushPropertyName(current.child(2)); + current = base; + continue; + } + if (current.type === "optional_chain") { + current = current.child(0); + continue; + } + const parts = getMemberAccessParts(args.sup, current); + base = parts.object ?? base; + pushPropertyName(parts.property); + current = base; + } + + if (!current || !names.length) return null; + return { base: current, names }; +} diff --git a/src/util/projectFiles.ts b/src/util/projectFiles.ts index e215e993..628722fd 100644 --- a/src/util/projectFiles.ts +++ b/src/util/projectFiles.ts @@ -6,8 +6,22 @@ import picomatch from "picomatch"; import { logWithLevel, type LogLevel } from "../logging.js"; import { stringifyUnknown } from "./ast.js"; import { isFilePathWithinRoot, normalizePath } from "./paths.js"; +import { + PROJECT_FILE_DEFINITIONS, + type ProjectFileDefinition, + type ProjectFileInfo, +} from "./projectFiles/definitions.js"; +import { trimToNull } from "./projectFiles/parsers.js"; import { mapLimitSemaphore } from "./semaphore.js"; +export type { + ProjectFileDefinition, + ProjectFileInfo, + ProjectFileKind, + ProjectFileRole, + ProjectFileType, +} from "./projectFiles/definitions.js"; + export const DEFAULT_PROJECT_FILE_IGNORES = [ "**/node_modules/**", "**/.git/**", @@ -448,465 +462,6 @@ async function filterRealPathsWithinRoot(paths: string[], realRoot: string): Pro return entries.map((entry) => entry.path); } -export type ProjectFileKind = "file" | "dir"; -export type ProjectFileRole = "manifest" | "lockfile" | "config" | "solution" | "ide"; -export type ProjectFileType = - | "node" - | "typescript" - | "python" - | "rust" - | "go" - | "maven" - | "gradle" - | "dotnet" - | "ruby" - | "php" - | "swift" - | "native" - | "ide"; - -export type ProjectFileInfo = { - path: string; - kind: ProjectFileKind; - type: ProjectFileType; - role: ProjectFileRole; - projectRoot: string; - name?: string; -}; - -type ProjectFileDefinition = { - type: ProjectFileType; - role: ProjectFileRole; - kind: ProjectFileKind; - patterns: string[]; - parseName?: (contents: string, filePath: string) => string | null; - nameFromPath?: "file" | "dir"; -}; - -function trimToNull(value: string | null | undefined): string | null { - const trimmed = value?.trim(); - return trimmed ? trimmed : null; -} - -function isPlainRecord(value: unknown): value is Record { - return typeof value === "object" && value !== null && !Array.isArray(value); -} - -function parseJsonName(raw: string): string | null { - try { - const data: unknown = JSON.parse(raw); - if (!isPlainRecord(data)) return null; - const name = data.name; - if (typeof name !== "string") return null; - return trimToNull(name); - } catch { - return null; - } -} - -function stripTomlInlineComment(line: string): string { - let quote: "'" | '"' | null = null; - for (let i = 0; i < line.length; i += 1) { - const ch = line[i]; - if (quote) { - if (ch === quote) quote = null; - continue; - } - if (ch === "'" || ch === '"') { - quote = ch; - continue; - } - if (ch === "#") return line.slice(0, i); - } - return line; -} - -function parseTomlName(raw: string, sections: string[]): string | null { - const lines = raw.split(/\r?\n/); - let currentSection = ""; - for (const rawLine of lines) { - const line = stripTomlInlineComment(rawLine).trim(); - if (!line) continue; - const sectionMatch = line.match(/^\[([^\]]+)\]\s*$/); - if (sectionMatch) { - currentSection = (sectionMatch[1] ?? "").trim(); - continue; - } - if (!sections.includes(currentSection)) continue; - const nameMatch = line.match(/^name\s*=\s*("([^"]*)"|'([^']*)')/); - if (!nameMatch) continue; - return trimToNull(nameMatch[2] ?? nameMatch[3] ?? ""); - } - return null; -} - -function parseIniName(raw: string, section: string, key: string): string | null { - const lines = raw.split(/\r?\n/); - let currentSection = ""; - const targetSection = section.toLowerCase(); - const targetKey = key.toLowerCase(); - for (const rawLine of lines) { - const trimmed = rawLine.trim(); - if (!trimmed || trimmed.startsWith("#") || trimmed.startsWith(";")) continue; - const sectionMatch = trimmed.match(/^\[([^\]]+)\]\s*$/); - if (sectionMatch) { - currentSection = (sectionMatch[1] ?? "").trim().toLowerCase(); - continue; - } - if (currentSection !== targetSection) continue; - const keyMatch = trimmed.match(/^([^=]+)=(.+)$/); - if (!keyMatch) continue; - const foundKey = (keyMatch[1] ?? "").trim().toLowerCase(); - if (foundKey !== targetKey) continue; - const value = (keyMatch[2] ?? "").trim(); - return trimToNull(value.replace(/^['"]|['"]$/g, "")); - } - return null; -} - -function parseSetupPyName(raw: string): string | null { - const match = raw.match(/\bname\s*=\s*["']([^"']+)["']/); - return trimToNull(match?.[1]); -} - -function parsePomName(raw: string): string | null { - const withoutParent = raw.replace(/[\s\S]*?<\/parent>/gi, ""); - const nameMatch = withoutParent.match(/\s*([^<]+)\s*<\/name>/i); - if (nameMatch) return trimToNull(nameMatch[1]); - const artifactMatch = withoutParent.match(/\s*([^<]+)\s*<\/artifactId>/i); - if (artifactMatch) return trimToNull(artifactMatch[1]); - return null; -} - -function parseGradleName(raw: string): string | null { - const match = raw.match(/\brootProject\.name\s*=\s*["']([^"']+)["']/); - return trimToNull(match?.[1]); -} - -function parseGradlePropertiesName(raw: string): string | null { - const match = raw.match(/^\s*rootProject\.name\s*=\s*["']([^"']+)["']/m); - return trimToNull(match?.[1]); -} - -function parseDotnetName(raw: string): string | null { - const tags = ["AssemblyName", "PackageId", "RootNamespace"]; - for (const tag of tags) { - const match = raw.match(new RegExp(`<${tag}>\\s*([^<]+)\\s*`, "i")); - if (match) return trimToNull(match[1]); - } - return null; -} - -function stripInlineComment(line: string): string { - let quote: "'" | '"' | null = null; - for (let i = 0; i < line.length; i += 1) { - const ch = line[i]; - if (quote) { - if (ch === quote) quote = null; - continue; - } - if (ch === "'" || ch === '"') { - quote = ch; - continue; - } - if (ch === "#") return line.slice(0, i).trim(); - } - return line.trim(); -} - -function parseGoModuleName(raw: string): string | null { - const lines = raw.split(/\r?\n/); - for (const rawLine of lines) { - const line = stripInlineComment(rawLine); - if (!line) continue; - const match = line.match(/^module\s+(.+)$/); - if (match) return trimToNull(match[1]); - } - return null; -} - -function parseGemspecName(raw: string): string | null { - const match = raw.match(/\bname\s*=\s*["']([^"']+)["']/); - return trimToNull(match?.[1]); -} - -function parseSwiftPackageName(raw: string): string | null { - const match = raw.match(/\bname\s*:\s*["']([^"']+)["']/); - return trimToNull(match?.[1]); -} - -const PROJECT_FILE_DEFINITIONS: ProjectFileDefinition[] = [ - { - type: "node", - role: "manifest", - kind: "file", - patterns: ["package.json"], - parseName: parseJsonName, - nameFromPath: "dir", - }, - { - type: "node", - role: "lockfile", - kind: "file", - patterns: ["package-lock.json", "pnpm-lock.yaml", "yarn.lock", "bun.lockb"], - }, - { - type: "node", - role: "config", - kind: "file", - patterns: ["pnpm-workspace.yaml"], - nameFromPath: "dir", - }, - { - type: "node", - role: "config", - kind: "file", - patterns: ["lerna.json", "nx.json", "turbo.json"], - parseName: parseJsonName, - nameFromPath: "dir", - }, - { - type: "typescript", - role: "config", - kind: "file", - patterns: ["tsconfig.json", "jsconfig.json"], - }, - { - type: "python", - role: "manifest", - kind: "file", - patterns: ["pyproject.toml"], - parseName: (raw) => parseTomlName(raw, ["project", "tool.poetry"]), - nameFromPath: "dir", - }, - { - type: "python", - role: "manifest", - kind: "file", - patterns: ["setup.cfg"], - parseName: (raw) => parseIniName(raw, "metadata", "name"), - nameFromPath: "dir", - }, - { - type: "python", - role: "manifest", - kind: "file", - patterns: ["setup.py"], - parseName: parseSetupPyName, - nameFromPath: "dir", - }, - { - type: "python", - role: "manifest", - kind: "file", - patterns: ["requirements.txt", "requirements.in", "Pipfile"], - nameFromPath: "dir", - }, - { - type: "python", - role: "lockfile", - kind: "file", - patterns: ["Pipfile.lock", "poetry.lock"], - }, - { - type: "rust", - role: "manifest", - kind: "file", - patterns: ["Cargo.toml"], - parseName: (raw) => parseTomlName(raw, ["package"]), - nameFromPath: "dir", - }, - { - type: "rust", - role: "lockfile", - kind: "file", - patterns: ["Cargo.lock"], - }, - { - type: "rust", - role: "config", - kind: "file", - patterns: ["rust-toolchain", "rust-toolchain.toml"], - nameFromPath: "dir", - }, - { - type: "go", - role: "manifest", - kind: "file", - patterns: ["go.mod"], - parseName: parseGoModuleName, - nameFromPath: "dir", - }, - { - type: "go", - role: "lockfile", - kind: "file", - patterns: ["go.sum"], - }, - { - type: "go", - role: "config", - kind: "file", - patterns: ["go.work"], - nameFromPath: "dir", - }, - { - type: "ruby", - role: "manifest", - kind: "file", - patterns: ["Gemfile"], - nameFromPath: "dir", - }, - { - type: "ruby", - role: "lockfile", - kind: "file", - patterns: ["Gemfile.lock"], - }, - { - type: "ruby", - role: "manifest", - kind: "file", - patterns: ["*.gemspec"], - parseName: parseGemspecName, - nameFromPath: "file", - }, - { - type: "maven", - role: "manifest", - kind: "file", - patterns: ["pom.xml"], - parseName: parsePomName, - nameFromPath: "dir", - }, - { - type: "maven", - role: "config", - kind: "file", - patterns: ["mvnw"], - nameFromPath: "dir", - }, - { - type: "gradle", - role: "manifest", - kind: "file", - patterns: ["build.gradle", "build.gradle.kts", "settings.gradle", "settings.gradle.kts"], - parseName: parseGradleName, - nameFromPath: "dir", - }, - { - type: "gradle", - role: "config", - kind: "file", - patterns: ["gradle.properties"], - parseName: parseGradlePropertiesName, - nameFromPath: "dir", - }, - { - type: "gradle", - role: "config", - kind: "file", - patterns: ["gradlew"], - nameFromPath: "dir", - }, - { - type: "dotnet", - role: "manifest", - kind: "file", - patterns: ["*.csproj", "*.fsproj", "*.vbproj"], - parseName: parseDotnetName, - nameFromPath: "file", - }, - { - type: "dotnet", - role: "solution", - kind: "file", - patterns: ["*.sln"], - nameFromPath: "file", - }, - { - type: "dotnet", - role: "config", - kind: "file", - patterns: ["Directory.Build.props", "Directory.Build.targets", "global.json"], - nameFromPath: "dir", - }, - { - type: "php", - role: "manifest", - kind: "file", - patterns: ["composer.json"], - parseName: parseJsonName, - nameFromPath: "dir", - }, - { - type: "php", - role: "lockfile", - kind: "file", - patterns: ["composer.lock"], - }, - { - type: "native", - role: "manifest", - kind: "file", - patterns: [ - "CMakeLists.txt", - "Makefile", - "makefile", - "GNUmakefile", - "configure.ac", - "configure.in", - "meson.build", - "conanfile.txt", - "conanfile.py", - ], - nameFromPath: "dir", - }, - { - type: "native", - role: "config", - kind: "file", - patterns: ["CMakePresets.json", "CMakeUserPresets.json", "meson_options.txt"], - nameFromPath: "dir", - }, - { - type: "native", - role: "manifest", - kind: "file", - patterns: ["vcpkg.json"], - parseName: parseJsonName, - nameFromPath: "dir", - }, - { - type: "swift", - role: "manifest", - kind: "file", - patterns: ["Package.swift"], - parseName: parseSwiftPackageName, - nameFromPath: "dir", - }, - { - type: "swift", - role: "lockfile", - kind: "file", - patterns: ["Package.resolved"], - }, - { - type: "swift", - role: "config", - kind: "dir", - patterns: ["*.xcodeproj", "*.xcworkspace"], - nameFromPath: "file", - }, - { - type: "ide", - role: "ide", - kind: "dir", - patterns: [".idea"], - nameFromPath: "dir", - }, -]; - function toProjectGlob(pattern: string): string { return pattern.startsWith("**/") ? pattern : `**/${pattern}`; } diff --git a/src/util/projectFiles/definitions.ts b/src/util/projectFiles/definitions.ts new file mode 100644 index 00000000..dda01148 --- /dev/null +++ b/src/util/projectFiles/definitions.ts @@ -0,0 +1,320 @@ +import { + parseDotnetName, + parseGemspecName, + parseGoModuleName, + parseGradleName, + parseGradlePropertiesName, + parseIniName, + parseJsonName, + parsePomName, + parseSetupPyName, + parseSwiftPackageName, + parseTomlName, +} from "./parsers.js"; + +export type ProjectFileKind = "file" | "dir"; +export type ProjectFileRole = "manifest" | "lockfile" | "config" | "solution" | "ide"; +export type ProjectFileType = + | "node" + | "typescript" + | "python" + | "rust" + | "go" + | "maven" + | "gradle" + | "dotnet" + | "ruby" + | "php" + | "swift" + | "native" + | "ide"; + +export type ProjectFileInfo = { + path: string; + kind: ProjectFileKind; + type: ProjectFileType; + role: ProjectFileRole; + projectRoot: string; + name?: string; +}; + +export type ProjectFileDefinition = { + type: ProjectFileType; + role: ProjectFileRole; + kind: ProjectFileKind; + patterns: string[]; + parseName?: (contents: string, filePath: string) => string | null; + nameFromPath?: "file" | "dir"; +}; + +export const PROJECT_FILE_DEFINITIONS: ProjectFileDefinition[] = [ + { + type: "node", + role: "manifest", + kind: "file", + patterns: ["package.json"], + parseName: parseJsonName, + nameFromPath: "dir", + }, + { + type: "node", + role: "lockfile", + kind: "file", + patterns: ["package-lock.json", "pnpm-lock.yaml", "yarn.lock", "bun.lockb"], + }, + { + type: "node", + role: "config", + kind: "file", + patterns: ["pnpm-workspace.yaml"], + nameFromPath: "dir", + }, + { + type: "node", + role: "config", + kind: "file", + patterns: ["lerna.json", "nx.json", "turbo.json"], + parseName: parseJsonName, + nameFromPath: "dir", + }, + { + type: "typescript", + role: "config", + kind: "file", + patterns: ["tsconfig.json", "jsconfig.json"], + }, + { + type: "python", + role: "manifest", + kind: "file", + patterns: ["pyproject.toml"], + parseName: (raw) => parseTomlName(raw, ["project", "tool.poetry"]), + nameFromPath: "dir", + }, + { + type: "python", + role: "manifest", + kind: "file", + patterns: ["setup.cfg"], + parseName: (raw) => parseIniName(raw, "metadata", "name"), + nameFromPath: "dir", + }, + { + type: "python", + role: "manifest", + kind: "file", + patterns: ["setup.py"], + parseName: parseSetupPyName, + nameFromPath: "dir", + }, + { + type: "python", + role: "manifest", + kind: "file", + patterns: ["requirements.txt", "requirements.in", "Pipfile"], + nameFromPath: "dir", + }, + { + type: "python", + role: "lockfile", + kind: "file", + patterns: ["Pipfile.lock", "poetry.lock"], + }, + { + type: "rust", + role: "manifest", + kind: "file", + patterns: ["Cargo.toml"], + parseName: (raw) => parseTomlName(raw, ["package"]), + nameFromPath: "dir", + }, + { + type: "rust", + role: "lockfile", + kind: "file", + patterns: ["Cargo.lock"], + }, + { + type: "rust", + role: "config", + kind: "file", + patterns: ["rust-toolchain", "rust-toolchain.toml"], + nameFromPath: "dir", + }, + { + type: "go", + role: "manifest", + kind: "file", + patterns: ["go.mod"], + parseName: parseGoModuleName, + nameFromPath: "dir", + }, + { + type: "go", + role: "lockfile", + kind: "file", + patterns: ["go.sum"], + }, + { + type: "go", + role: "config", + kind: "file", + patterns: ["go.work"], + nameFromPath: "dir", + }, + { + type: "ruby", + role: "manifest", + kind: "file", + patterns: ["Gemfile"], + nameFromPath: "dir", + }, + { + type: "ruby", + role: "lockfile", + kind: "file", + patterns: ["Gemfile.lock"], + }, + { + type: "ruby", + role: "manifest", + kind: "file", + patterns: ["*.gemspec"], + parseName: parseGemspecName, + nameFromPath: "file", + }, + { + type: "maven", + role: "manifest", + kind: "file", + patterns: ["pom.xml"], + parseName: parsePomName, + nameFromPath: "dir", + }, + { + type: "maven", + role: "config", + kind: "file", + patterns: ["mvnw"], + nameFromPath: "dir", + }, + { + type: "gradle", + role: "manifest", + kind: "file", + patterns: ["build.gradle", "build.gradle.kts", "settings.gradle", "settings.gradle.kts"], + parseName: parseGradleName, + nameFromPath: "dir", + }, + { + type: "gradle", + role: "config", + kind: "file", + patterns: ["gradle.properties"], + parseName: parseGradlePropertiesName, + nameFromPath: "dir", + }, + { + type: "gradle", + role: "config", + kind: "file", + patterns: ["gradlew"], + nameFromPath: "dir", + }, + { + type: "dotnet", + role: "manifest", + kind: "file", + patterns: ["*.csproj", "*.fsproj", "*.vbproj"], + parseName: parseDotnetName, + nameFromPath: "file", + }, + { + type: "dotnet", + role: "solution", + kind: "file", + patterns: ["*.sln"], + nameFromPath: "file", + }, + { + type: "dotnet", + role: "config", + kind: "file", + patterns: ["Directory.Build.props", "Directory.Build.targets", "global.json"], + nameFromPath: "dir", + }, + { + type: "php", + role: "manifest", + kind: "file", + patterns: ["composer.json"], + parseName: parseJsonName, + nameFromPath: "dir", + }, + { + type: "php", + role: "lockfile", + kind: "file", + patterns: ["composer.lock"], + }, + { + type: "native", + role: "manifest", + kind: "file", + patterns: [ + "CMakeLists.txt", + "Makefile", + "makefile", + "GNUmakefile", + "configure.ac", + "configure.in", + "meson.build", + "conanfile.txt", + "conanfile.py", + ], + nameFromPath: "dir", + }, + { + type: "native", + role: "config", + kind: "file", + patterns: ["CMakePresets.json", "CMakeUserPresets.json", "meson_options.txt"], + nameFromPath: "dir", + }, + { + type: "native", + role: "manifest", + kind: "file", + patterns: ["vcpkg.json"], + parseName: parseJsonName, + nameFromPath: "dir", + }, + { + type: "swift", + role: "manifest", + kind: "file", + patterns: ["Package.swift"], + parseName: parseSwiftPackageName, + nameFromPath: "dir", + }, + { + type: "swift", + role: "lockfile", + kind: "file", + patterns: ["Package.resolved"], + }, + { + type: "swift", + role: "config", + kind: "dir", + patterns: ["*.xcodeproj", "*.xcworkspace"], + nameFromPath: "file", + }, + { + type: "ide", + role: "ide", + kind: "dir", + patterns: [".idea"], + nameFromPath: "dir", + }, +]; diff --git a/src/util/projectFiles/parsers.ts b/src/util/projectFiles/parsers.ts new file mode 100644 index 00000000..bf254263 --- /dev/null +++ b/src/util/projectFiles/parsers.ts @@ -0,0 +1,151 @@ +export function trimToNull(value: string | null | undefined): string | null { + const trimmed = value?.trim(); + return trimmed ? trimmed : null; +} + +function isPlainRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null && !Array.isArray(value); +} + +export function parseJsonName(raw: string): string | null { + try { + const data: unknown = JSON.parse(raw); + if (!isPlainRecord(data)) return null; + const name = data.name; + if (typeof name !== "string") return null; + return trimToNull(name); + } catch { + return null; + } +} + +function stripTomlInlineComment(line: string): string { + let quote: "'" | '"' | null = null; + for (let i = 0; i < line.length; i += 1) { + const ch = line[i]; + if (quote) { + if (ch === quote) quote = null; + continue; + } + if (ch === "'" || ch === '"') { + quote = ch; + continue; + } + if (ch === "#") return line.slice(0, i); + } + return line; +} + +export function parseTomlName(raw: string, sections: string[]): string | null { + const lines = raw.split(/\r?\n/); + let currentSection = ""; + for (const rawLine of lines) { + const line = stripTomlInlineComment(rawLine).trim(); + if (!line) continue; + const sectionMatch = line.match(/^\[([^\]]+)\]\s*$/); + if (sectionMatch) { + currentSection = (sectionMatch[1] ?? "").trim(); + continue; + } + if (!sections.includes(currentSection)) continue; + const nameMatch = line.match(/^name\s*=\s*("([^"]*)"|'([^']*)')/); + if (!nameMatch) continue; + return trimToNull(nameMatch[2] ?? nameMatch[3] ?? ""); + } + return null; +} + +export function parseIniName(raw: string, section: string, key: string): string | null { + const lines = raw.split(/\r?\n/); + let currentSection = ""; + const targetSection = section.toLowerCase(); + const targetKey = key.toLowerCase(); + for (const rawLine of lines) { + const trimmed = rawLine.trim(); + if (!trimmed || trimmed.startsWith("#") || trimmed.startsWith(";")) continue; + const sectionMatch = trimmed.match(/^\[([^\]]+)\]\s*$/); + if (sectionMatch) { + currentSection = (sectionMatch[1] ?? "").trim().toLowerCase(); + continue; + } + if (currentSection !== targetSection) continue; + const keyMatch = trimmed.match(/^([^=]+)=(.+)$/); + if (!keyMatch) continue; + const foundKey = (keyMatch[1] ?? "").trim().toLowerCase(); + if (foundKey !== targetKey) continue; + const value = (keyMatch[2] ?? "").trim(); + return trimToNull(value.replace(/^['"]|['"]$/g, "")); + } + return null; +} + +export function parseSetupPyName(raw: string): string | null { + const match = raw.match(/\bname\s*=\s*["']([^"']+)["']/); + return trimToNull(match?.[1]); +} + +export function parsePomName(raw: string): string | null { + const withoutParent = raw.replace(/[\s\S]*?<\/parent>/gi, ""); + const nameMatch = withoutParent.match(/\s*([^<]+)\s*<\/name>/i); + if (nameMatch) return trimToNull(nameMatch[1]); + const artifactMatch = withoutParent.match(/\s*([^<]+)\s*<\/artifactId>/i); + if (artifactMatch) return trimToNull(artifactMatch[1]); + return null; +} + +export function parseGradleName(raw: string): string | null { + const match = raw.match(/\brootProject\.name\s*=\s*["']([^"']+)["']/); + return trimToNull(match?.[1]); +} + +export function parseGradlePropertiesName(raw: string): string | null { + const match = raw.match(/^\s*rootProject\.name\s*=\s*["']([^"']+)["']/m); + return trimToNull(match?.[1]); +} + +export function parseDotnetName(raw: string): string | null { + const tags = ["AssemblyName", "PackageId", "RootNamespace"]; + for (const tag of tags) { + const match = raw.match(new RegExp(`<${tag}>\\s*([^<]+)\\s*`, "i")); + if (match) return trimToNull(match[1]); + } + return null; +} + +function stripInlineComment(line: string): string { + let quote: "'" | '"' | null = null; + for (let i = 0; i < line.length; i += 1) { + const ch = line[i]; + if (quote) { + if (ch === quote) quote = null; + continue; + } + if (ch === "'" || ch === '"') { + quote = ch; + continue; + } + if (ch === "#") return line.slice(0, i).trim(); + } + return line.trim(); +} + +export function parseGoModuleName(raw: string): string | null { + const lines = raw.split(/\r?\n/); + for (const rawLine of lines) { + const line = stripInlineComment(rawLine); + if (!line) continue; + const match = line.match(/^module\s+(.+)$/); + if (match) return trimToNull(match[1]); + } + return null; +} + +export function parseGemspecName(raw: string): string | null { + const match = raw.match(/\bname\s*=\s*["']([^"']+)["']/); + return trimToNull(match?.[1]); +} + +export function parseSwiftPackageName(raw: string): string | null { + const match = raw.match(/\bname\s*:\s*["']([^"']+)["']/); + return trimToNull(match?.[1]); +} diff --git a/src/util/resolution.ts b/src/util/resolution.ts index 316417be..d443370f 100644 --- a/src/util/resolution.ts +++ b/src/util/resolution.ts @@ -3,7 +3,7 @@ import fsp from "node:fs/promises"; import path from "node:path"; import { createMatchPath } from "tsconfig-paths"; import { logWithLevel, type LogLevel } from "../logging.js"; -import { stringifyUnknown, unquote } from "./ast.js"; +import { stringifyUnknown } from "./ast.js"; import { parseJsonc } from "./comments.js"; import { normalizePath, normalizeResolutionHints } from "./paths.js"; import { listProjectFiles } from "./projectFiles.js"; @@ -19,6 +19,22 @@ import { type MinimalPackageJson, type WorkspaceConfig, } from "./workspace.js"; +import { + clearJvmResolutionCaches, + resolveJavaImportPath, + resolveKotlinImportPath, +} from "./resolution/jvm.js"; +import { resolveGoImportPath } from "./resolution/go.js"; +import { findNearestFile, isDirectory } from "./resolution/files.js"; +export { resolveGoImportPath } from "./resolution/go.js"; +export { resolveJvmPackageImportPaths } from "./resolution/jvm.js"; +import { + addProjectSymbolFile, + getOrCreateProjectSymbolIndex, + listProjectLanguageFiles, + sortProjectSymbolIndex, + type LanguageProjectSymbolIndex, +} from "./resolution/projectSymbols.js"; export { listResolutionCandidates } from "./resolutionCandidates.js"; @@ -234,22 +250,6 @@ export async function resolvePathLikeModule( return null; } -type GoModuleInfo = { - modulePath: string; - moduleRoot: string; - replacements: Map; -}; - -type KotlinSymbolIndexEntry = { - packageName: string | null; - symbols: Set; -}; - -type JavaSymbolIndexEntry = { - packageName: string | null; - symbols: Set; -}; - type PhpSymbolKind = "class" | "function" | "const"; type PhpPackageSymbolIndexEntry = { @@ -273,144 +273,12 @@ type PhpComposerConfig = { files: string[]; }; -type LanguageProjectSymbolIndex = { - files: string[]; - filesByPackage: Map; - filesByPackageSymbol: Map>; -}; - -const kotlinImportResolutionCache = new Map(); -const kotlinSymbolIndexCache = new Map(); -const kotlinProjectSymbolIndexCache = new Map>(); -const javaImportResolutionCache = new Map(); -const javaSymbolIndexCache = new Map(); -const javaProjectSymbolIndexCache = new Map>(); const phpImportResolutionCache = new Map(); const phpSymbolIndexCache = new Map(); const phpProjectSymbolIndexCache = new Map>(); const phpComposerConfigCache = new Map>(); const phpComposerAutoloadFileCache = new Map>>(); -async function listProjectLanguageFiles(projectRoot: string, patterns: string[]): Promise { - return await listProjectFiles(projectRoot, patterns); -} - -function addProjectSymbolFile( - index: LanguageProjectSymbolIndex, - packageName: string, - filePath: string, - symbols: Set, -): void { - const packageFiles = index.filesByPackage.get(packageName) ?? []; - packageFiles.push(filePath); - index.filesByPackage.set(packageName, packageFiles); - - let symbolFiles = index.filesByPackageSymbol.get(packageName); - if (!symbolFiles) { - symbolFiles = new Map(); - index.filesByPackageSymbol.set(packageName, symbolFiles); - } - for (const symbolName of symbols) { - const files = symbolFiles.get(symbolName) ?? []; - files.push(filePath); - symbolFiles.set(symbolName, files); - } -} - -function sortProjectSymbolIndex(index: LanguageProjectSymbolIndex): void { - for (const [packageName, files] of index.filesByPackage) { - files.sort((left, right) => normalizePath(left).localeCompare(normalizePath(right))); - index.filesByPackage.set(packageName, files); - } - for (const symbolFiles of index.filesByPackageSymbol.values()) { - for (const [symbolName, files] of symbolFiles) { - files.sort((left, right) => normalizePath(left).localeCompare(normalizePath(right))); - symbolFiles.set(symbolName, files); - } - } -} - -async function buildProjectSymbolIndex }>( - projectRoot: string, - patterns: string[], - readIndexEntry: (filePath: string) => Promise, -): Promise { - const files = await listProjectLanguageFiles(projectRoot, patterns); - const index: LanguageProjectSymbolIndex = { - files, - filesByPackage: new Map(), - filesByPackageSymbol: new Map>(), - }; - - const indexEntries = await mapLimit(files, 8, async (filePath) => { - try { - const entry = await readIndexEntry(filePath); - return { filePath, entry }; - } catch { - // Ignore unreadable files and keep indexing the project. - return null; - } - }); - - for (const indexEntry of indexEntries) { - if (!indexEntry || indexEntry.entry.packageName === null) continue; - addProjectSymbolFile(index, indexEntry.entry.packageName, indexEntry.filePath, indexEntry.entry.symbols); - } - - sortProjectSymbolIndex(index); - return index; -} - -function getOrCreateProjectSymbolIndex( - cache: Map>, - projectRoot: string, - buildIndex: () => Promise, -): Promise { - const cached = cache.get(projectRoot); - if (cached) return cached; - const pending = buildIndex().catch((error) => { - cache.delete(projectRoot); - throw error; - }); - cache.set(projectRoot, pending); - return pending; -} - -async function getKotlinProjectSymbolIndex(projectRoot: string): Promise { - return await getOrCreateProjectSymbolIndex( - kotlinProjectSymbolIndexCache, - projectRoot, - async () => await buildProjectSymbolIndex(projectRoot, ["**/*.kt", "**/*.kts"], readKotlinSymbolIndex), - ); -} - -async function getJavaProjectSymbolIndex(projectRoot: string): Promise { - return await getOrCreateProjectSymbolIndex( - javaProjectSymbolIndexCache, - projectRoot, - async () => await buildProjectSymbolIndex(projectRoot, ["**/*.java"], readJavaSymbolIndex), - ); -} - -async function getJvmProjectSymbolIndex( - projectRoot: string, - languageId: "java" | "kotlin", -): Promise { - return languageId === "kotlin" - ? await getKotlinProjectSymbolIndex(projectRoot) - : await getJavaProjectSymbolIndex(projectRoot); -} - -export async function resolveJvmPackageImportPaths( - projectRoot: string, - spec: string, - languageId: "java" | "kotlin", -): Promise { - const projectIndex = await getJvmProjectSymbolIndex(projectRoot, languageId); - const packageCandidates = projectIndex.filesByPackage.get(spec) ?? []; - return packageCandidates.map((candidate) => path.resolve(candidate)); -} - async function getPhpProjectSymbolIndex(projectRoot: string): Promise { return await getOrCreateProjectSymbolIndex(phpProjectSymbolIndexCache, projectRoot, async () => { const files = await listProjectLanguageFiles(projectRoot, ["**/*.php"]); @@ -441,246 +309,6 @@ async function getPhpProjectSymbolIndex(projectRoot: string): Promise { - let dir = path.resolve(startDir); - const stop = path.resolve(stopDir); - while (true) { - const candidate = path.join(dir, fileName); - if (await fileExists(candidate)) return candidate; - if (dir === stop) break; - const parent = path.dirname(dir); - if (parent === dir) break; - dir = parent; - } - return null; -} - -async function parseGoMod(moduleRoot: string): Promise { - const modPath = path.join(moduleRoot, "go.mod"); - if (!(await fileExists(modPath))) return null; - const raw = await fsp.readFile(modPath, "utf8"); - const lines = raw.split(/\r?\n/); - let modulePath: string | null = null; - const replacements = new Map(); - for (const rawLine of lines) { - const line = stripInlineComment(rawLine); - if (!line) continue; - if (!modulePath) { - const moduleMatch = line.match(/^module\s+(.+)$/); - if (moduleMatch) { - modulePath = unquote(moduleMatch[1]?.trim() ?? ""); - continue; - } - } - const replaceMatch = line.match(/^replace\s+(\S+)(?:\s+v[^\s]+)?\s+=>\s+(\S+)/); - if (replaceMatch) { - const from = unquote(replaceMatch[1] ?? ""); - const toRaw = unquote(replaceMatch[2] ?? ""); - if (!from || !toRaw) continue; - if (path.isAbsolute(toRaw) || toRaw.startsWith(".")) { - const toPath = path.resolve(moduleRoot, toRaw); - replacements.set(from, toPath); - } - } - } - if (!modulePath) return null; - return { - modulePath, - moduleRoot, - replacements, - }; -} - -async function parseGoWork(goWorkPath: string): Promise { - const content = await fsp.readFile(goWorkPath, "utf8"); - const lines = content.split(/\r?\n/); - const modules: string[] = []; - let inUseBlock = false; - for (const rawLine of lines) { - const line = stripInlineComment(rawLine); - if (!line) continue; - if (line.startsWith("use (")) { - inUseBlock = true; - continue; - } - if (inUseBlock) { - if (line.startsWith(")")) { - inUseBlock = false; - continue; - } - modules.push(unquote(line)); - continue; - } - const match = line.match(/^use\s+(.+)$/); - if (match) { - modules.push(unquote(match[1] ?? "")); - } - } - return modules.filter(Boolean); -} - -async function findGoPackageEntry(dirPath: string): Promise { - try { - const stat = await fsp.stat(dirPath); - if (!stat.isDirectory()) return null; - } catch { - return null; - } - let entries: fs.Dirent[] = []; - try { - entries = await fsp.readdir(dirPath, { withFileTypes: true }); - } catch { - return null; - } - const goFiles = entries - .filter((entry) => entry.isFile() && entry.name.endsWith(".go") && !entry.name.endsWith("_test.go")) - .map((entry) => entry.name) - .sort((a, b) => a.localeCompare(b)); - if (!goFiles.length) return null; - return path.join(dirPath, goFiles[0] ?? ""); -} - -function isGoStdLib(spec: string): boolean { - const base = spec.split("/")[0] ?? ""; - return !!base.length && !base.includes("."); -} - -async function resolveGoModuleImport(moduleInfo: GoModuleInfo, spec: string): Promise { - const { modulePath, moduleRoot, replacements } = moduleInfo; - if (spec === modulePath || spec.startsWith(`${modulePath}/`)) { - const subPath = spec === modulePath ? "" : spec.slice(modulePath.length + 1); - const targetDir = path.join(moduleRoot, subPath); - const entry = await findGoPackageEntry(targetDir); - if (entry) return entry; - } - for (const [from, toPath] of replacements.entries()) { - if (spec === from || spec.startsWith(`${from}/`)) { - const subPath = spec === from ? "" : spec.slice(from.length + 1); - const targetDir = path.join(toPath, subPath); - const entry = await findGoPackageEntry(targetDir); - if (entry) return entry; - } - } - const vendorDir = path.join(moduleRoot, "vendor", spec); - const vendored = await findGoPackageEntry(vendorDir); - if (vendored) return vendored; - return null; -} - -export async function resolveGoImportPath(projectRoot: string, fromFile: string, spec: string): Promise { - const startDir = path.dirname(fromFile); - const goWorkPath = await findNearestFile(startDir, projectRoot, "go.work"); - const moduleInfos: GoModuleInfo[] = []; - - if (goWorkPath) { - const workDir = path.dirname(goWorkPath); - const useDirs = await parseGoWork(goWorkPath); - for (const useDir of useDirs) { - if (!useDir) continue; - const moduleRoot = path.resolve(workDir, useDir); - const modInfo = await parseGoMod(moduleRoot); - if (modInfo) moduleInfos.push(modInfo); - } - } - - if (!moduleInfos.length) { - const goModPath = await findNearestFile(startDir, projectRoot, "go.mod"); - if (goModPath) { - const moduleRoot = path.dirname(goModPath); - const modInfo = await parseGoMod(moduleRoot); - if (modInfo) moduleInfos.push(modInfo); - } - } - - for (const moduleInfo of moduleInfos) { - const resolved = await resolveGoModuleImport(moduleInfo, spec); - if (resolved) return resolved; - } - - if (isGoStdLib(spec)) { - const goRoot = process.env.GOROOT; - if (goRoot) { - const stdlibDir = path.join(goRoot, "src", spec); - const entry = await findGoPackageEntry(stdlibDir); - if (entry) return entry; - } - } - - return null; -} - -async function readKotlinSymbolIndex(filePath: string): Promise { - const cached = kotlinSymbolIndexCache.get(filePath); - if (cached) return cached; - - const source = await fsp.readFile(filePath, "utf8"); - const packageName = source.match(/^\s*package\s+([A-Za-z_][\w.]*)/m)?.[1] ?? null; - const symbols = new Set(); - const declarationPattern = /\b(?:class|object|fun|typealias|interface)\s+([A-Za-z_][\w]*)\b/g; - for (const match of source.matchAll(declarationPattern)) { - const symbolName = match[1]; - if (symbolName) symbols.add(symbolName); - } - - const entry = { packageName, symbols }; - kotlinSymbolIndexCache.set(filePath, entry); - return entry; -} - -async function resolveKotlinImportPath(projectRoot: string, spec: string): Promise { - const cacheKey = `${projectRoot}::${spec}`; - const cached = kotlinImportResolutionCache.get(cacheKey); - if (cached !== undefined) return cached; - - const parts = spec.split(".").filter(Boolean); - const projectIndex = await getKotlinProjectSymbolIndex(projectRoot); - if (parts.length < 2) { - const packageCandidates = projectIndex.filesByPackage.get(spec) ?? []; - const resolved = packageCandidates[0] ? path.resolve(packageCandidates[0]) : null; - kotlinImportResolutionCache.set(cacheKey, resolved); - return resolved; - } - - const importedName = parts[parts.length - 1]!; - const packageName = importedName === "*" ? parts.slice(0, -1).join(".") : parts.slice(0, -1).join("."); - const packageCandidates = projectIndex.filesByPackage.get(packageName) ?? []; - - if (importedName === "*") { - const resolved = packageCandidates[0] ? path.resolve(packageCandidates[0]) : null; - kotlinImportResolutionCache.set(cacheKey, resolved); - return resolved; - } - - const symbolFiles = projectIndex.filesByPackageSymbol.get(packageName)?.get(importedName) ?? []; - const resolvedCandidate = symbolFiles[0] ?? packageCandidates[0] ?? null; - const resolved = resolvedCandidate ? path.resolve(resolvedCandidate) : null; - kotlinImportResolutionCache.set(cacheKey, resolved); - return resolved; -} - -async function readJavaSymbolIndex(filePath: string): Promise { - const cached = javaSymbolIndexCache.get(filePath); - if (cached) return cached; - - const source = await fsp.readFile(filePath, "utf8"); - const packageName = source.match(/^\s*package\s+([A-Za-z_][\w.]*)\s*;/m)?.[1] ?? null; - const symbols = new Set(); - const declarationPattern = /\b(?:class|interface|enum)\s+([A-Za-z_][\w]*)\b/g; - for (const match of source.matchAll(declarationPattern)) { - const symbolName = match[1]; - if (symbolName) symbols.add(symbolName); - } - - const entry = { packageName, symbols }; - javaSymbolIndexCache.set(filePath, entry); - return entry; -} - async function readPhpSymbolIndex(filePath: string): Promise { const cached = phpSymbolIndexCache.get(filePath); if (cached) return cached; @@ -1384,42 +1012,6 @@ async function resolvePhpImportPath( return pathLikeResolved; } -async function resolveJavaImportPath(projectRoot: string, spec: string): Promise { - const cacheKey = `${projectRoot}::${spec}`; - const cached = javaImportResolutionCache.get(cacheKey); - if (cached !== undefined) return cached; - - const parts = spec.split(".").filter(Boolean); - if (parts.length < 2) { - javaImportResolutionCache.set(cacheKey, null); - return null; - } - - const projectIndex = await getJavaProjectSymbolIndex(projectRoot); - const exactPackageFiles = projectIndex.filesByPackage.get(spec) ?? []; - if (exactPackageFiles[0]) { - const resolved = path.resolve(exactPackageFiles[0]); - javaImportResolutionCache.set(cacheKey, resolved); - return resolved; - } - - const importedName = parts[parts.length - 1]!; - const packageName = importedName === "*" ? parts.slice(0, -1).join(".") : parts.slice(0, -1).join("."); - - const packageCandidates = projectIndex.filesByPackage.get(packageName) ?? []; - if (importedName === "*") { - const resolved = packageCandidates[0] ? path.resolve(packageCandidates[0]) : null; - javaImportResolutionCache.set(cacheKey, resolved); - return resolved; - } - - const symbolFiles = projectIndex.filesByPackageSymbol.get(packageName)?.get(importedName) ?? []; - const resolvedCandidate = symbolFiles[0] ?? packageCandidates[0] ?? null; - const resolved = resolvedCandidate ? path.resolve(resolvedCandidate) : null; - javaImportResolutionCache.set(cacheKey, resolved); - return resolved; -} - export async function resolveImportSpecifier( projectRoot: string, fromFile: string, @@ -1679,15 +1271,6 @@ async function findPythonPackageAnchor(startDir: string): Promise { return topWithInit; } -async function isDirectory(p: string): Promise { - try { - const st = await fsp.stat(p); - return st.isDirectory(); - } catch { - return false; - } -} - export async function resolvePythonModule( projectRoot: string, fromFile: string, @@ -1803,12 +1386,7 @@ export function clearImportResolutionCaches(): void { resolveSpecifierCache.clear(); resolvePythonModuleCache.clear(); clearFileExistsCache(); - kotlinImportResolutionCache.clear(); - kotlinSymbolIndexCache.clear(); - kotlinProjectSymbolIndexCache.clear(); - javaImportResolutionCache.clear(); - javaSymbolIndexCache.clear(); - javaProjectSymbolIndexCache.clear(); + clearJvmResolutionCaches(); phpImportResolutionCache.clear(); phpSymbolIndexCache.clear(); phpProjectSymbolIndexCache.clear(); diff --git a/src/util/resolution/files.ts b/src/util/resolution/files.ts new file mode 100644 index 00000000..d51599f9 --- /dev/null +++ b/src/util/resolution/files.ts @@ -0,0 +1,26 @@ +import fsp from "node:fs/promises"; +import path from "node:path"; +import { fileExists } from "../workspace.js"; + +export async function findNearestFile(startDir: string, stopDir: string, fileName: string): Promise { + let dir = path.resolve(startDir); + const stop = path.resolve(stopDir); + while (true) { + const candidate = path.join(dir, fileName); + if (await fileExists(candidate)) return candidate; + if (dir === stop) break; + const parent = path.dirname(dir); + if (parent === dir) break; + dir = parent; + } + return null; +} + +export async function isDirectory(p: string): Promise { + try { + const st = await fsp.stat(p); + return st.isDirectory(); + } catch { + return false; + } +} diff --git a/src/util/resolution/go.ts b/src/util/resolution/go.ts new file mode 100644 index 00000000..939b2c10 --- /dev/null +++ b/src/util/resolution/go.ts @@ -0,0 +1,175 @@ +import fs from "node:fs"; +import fsp from "node:fs/promises"; +import path from "node:path"; +import { unquote } from "../ast.js"; +import { fileExists } from "../workspace.js"; +import { findNearestFile } from "./files.js"; + +type GoModuleInfo = { + modulePath: string; + moduleRoot: string; + replacements: Map; +}; + +function stripInlineComment(line: string): string { + const idx = line.indexOf("//"); + return idx === -1 ? line.trim() : line.slice(0, idx).trim(); +} + +async function parseGoMod(moduleRoot: string): Promise { + const modPath = path.join(moduleRoot, "go.mod"); + if (!(await fileExists(modPath))) return null; + const raw = await fsp.readFile(modPath, "utf8"); + const lines = raw.split(/\r?\n/); + let modulePath: string | null = null; + const replacements = new Map(); + for (const rawLine of lines) { + const line = stripInlineComment(rawLine); + if (!line) continue; + if (!modulePath) { + const moduleMatch = line.match(/^module\s+(.+)$/); + if (moduleMatch) { + modulePath = unquote(moduleMatch[1]?.trim() ?? ""); + continue; + } + } + const replaceMatch = line.match(/^replace\s+(\S+)(?:\s+v[^\s]+)?\s+=>\s+(\S+)/); + if (replaceMatch) { + const from = unquote(replaceMatch[1] ?? ""); + const toRaw = unquote(replaceMatch[2] ?? ""); + if (!from || !toRaw) continue; + if (path.isAbsolute(toRaw) || toRaw.startsWith(".")) { + const toPath = path.resolve(moduleRoot, toRaw); + replacements.set(from, toPath); + } + } + } + if (!modulePath) return null; + return { + modulePath, + moduleRoot, + replacements, + }; +} + +async function parseGoWork(goWorkPath: string): Promise { + const content = await fsp.readFile(goWorkPath, "utf8"); + const lines = content.split(/\r?\n/); + const modules: string[] = []; + let inUseBlock = false; + for (const rawLine of lines) { + const line = stripInlineComment(rawLine); + if (!line) continue; + if (line.startsWith("use (")) { + inUseBlock = true; + continue; + } + if (inUseBlock) { + if (line.startsWith(")")) { + inUseBlock = false; + continue; + } + modules.push(unquote(line)); + continue; + } + const match = line.match(/^use\s+(.+)$/); + if (match) { + modules.push(unquote(match[1] ?? "")); + } + } + return modules.filter(Boolean); +} + +async function findGoPackageEntry(dirPath: string): Promise { + try { + const stat = await fsp.stat(dirPath); + if (!stat.isDirectory()) return null; + } catch { + return null; + } + let entries: fs.Dirent[] = []; + try { + entries = await fsp.readdir(dirPath, { withFileTypes: true }); + } catch { + return null; + } + const goFiles = entries + .filter((entry) => entry.isFile() && entry.name.endsWith(".go") && !entry.name.endsWith("_test.go")) + .map((entry) => entry.name) + .sort((a, b) => a.localeCompare(b)); + if (!goFiles.length) return null; + return path.join(dirPath, goFiles[0] ?? ""); +} + +function isGoStdLib(spec: string): boolean { + const base = spec.split("/")[0] ?? ""; + return !!base.length && !base.includes("."); +} + +async function resolveGoModuleImport(moduleInfo: GoModuleInfo, spec: string): Promise { + const { modulePath, moduleRoot, replacements } = moduleInfo; + if (spec === modulePath || spec.startsWith(`${modulePath}/`)) { + const subPath = spec === modulePath ? "" : spec.slice(modulePath.length + 1); + const targetDir = path.join(moduleRoot, subPath); + const entry = await findGoPackageEntry(targetDir); + if (entry) return entry; + } + for (const [from, toPath] of replacements.entries()) { + if (spec === from || spec.startsWith(`${from}/`)) { + const subPath = spec === from ? "" : spec.slice(from.length + 1); + const targetDir = path.join(toPath, subPath); + const entry = await findGoPackageEntry(targetDir); + if (entry) return entry; + } + } + const vendorDir = path.join(moduleRoot, "vendor", spec); + const vendored = await findGoPackageEntry(vendorDir); + if (vendored) return vendored; + return null; +} + +export async function resolveGoImportPath( + projectRoot: string, + fromFile: string, + spec: string, +): Promise { + const startDir = path.dirname(fromFile); + const goWorkPath = await findNearestFile(startDir, projectRoot, "go.work"); + const moduleInfos: GoModuleInfo[] = []; + + if (goWorkPath) { + const workDir = path.dirname(goWorkPath); + const useDirs = await parseGoWork(goWorkPath); + for (const useDir of useDirs) { + if (!useDir) continue; + const moduleRoot = path.resolve(workDir, useDir); + const modInfo = await parseGoMod(moduleRoot); + if (modInfo) moduleInfos.push(modInfo); + } + } + + if (!moduleInfos.length) { + const goModPath = await findNearestFile(startDir, projectRoot, "go.mod"); + if (goModPath) { + const moduleRoot = path.dirname(goModPath); + const modInfo = await parseGoMod(moduleRoot); + if (modInfo) moduleInfos.push(modInfo); + } + } + + for (const moduleInfo of moduleInfos) { + const resolved = await resolveGoModuleImport(moduleInfo, spec); + if (resolved) return resolved; + } + + if (isGoStdLib(spec)) { + const goRoot = process.env.GOROOT; + if (goRoot) { + const stdlibDir = path.join(goRoot, "src", spec); + const entry = await findGoPackageEntry(stdlibDir); + if (entry) return entry; + } + } + + return null; +} diff --git a/src/util/resolution/jvm.ts b/src/util/resolution/jvm.ts new file mode 100644 index 00000000..a28999af --- /dev/null +++ b/src/util/resolution/jvm.ts @@ -0,0 +1,172 @@ +import fsp from "node:fs/promises"; +import path from "node:path"; +import { + buildProjectSymbolIndex, + getOrCreateProjectSymbolIndex, + type LanguageProjectSymbolIndex, +} from "./projectSymbols.js"; + +type KotlinSymbolIndexEntry = { + packageName: string | null; + symbols: Set; +}; + +type JavaSymbolIndexEntry = { + packageName: string | null; + symbols: Set; +}; + +const kotlinImportResolutionCache = new Map(); +const kotlinSymbolIndexCache = new Map(); +const kotlinProjectSymbolIndexCache = new Map>(); +const javaImportResolutionCache = new Map(); +const javaSymbolIndexCache = new Map(); +const javaProjectSymbolIndexCache = new Map>(); + +async function readKotlinSymbolIndex(filePath: string): Promise { + const cached = kotlinSymbolIndexCache.get(filePath); + if (cached) return cached; + + const source = await fsp.readFile(filePath, "utf8"); + const packageName = source.match(/^\s*package\s+([A-Za-z_][\w.]*)/m)?.[1] ?? null; + const symbols = new Set(); + const declarationPattern = /\b(?:class|object|fun|typealias|interface)\s+([A-Za-z_][\w]*)\b/g; + for (const match of source.matchAll(declarationPattern)) { + const symbolName = match[1]; + if (symbolName) symbols.add(symbolName); + } + + const entry = { packageName, symbols }; + kotlinSymbolIndexCache.set(filePath, entry); + return entry; +} + +async function readJavaSymbolIndex(filePath: string): Promise { + const cached = javaSymbolIndexCache.get(filePath); + if (cached) return cached; + + const source = await fsp.readFile(filePath, "utf8"); + const packageName = source.match(/^\s*package\s+([A-Za-z_][\w.]*)\s*;/m)?.[1] ?? null; + const symbols = new Set(); + const declarationPattern = /\b(?:class|interface|enum)\s+([A-Za-z_][\w]*)\b/g; + for (const match of source.matchAll(declarationPattern)) { + const symbolName = match[1]; + if (symbolName) symbols.add(symbolName); + } + + const entry = { packageName, symbols }; + javaSymbolIndexCache.set(filePath, entry); + return entry; +} + +async function getKotlinProjectSymbolIndex(projectRoot: string): Promise { + return await getOrCreateProjectSymbolIndex( + kotlinProjectSymbolIndexCache, + projectRoot, + async () => await buildProjectSymbolIndex(projectRoot, ["**/*.kt", "**/*.kts"], readKotlinSymbolIndex), + ); +} + +async function getJavaProjectSymbolIndex(projectRoot: string): Promise { + return await getOrCreateProjectSymbolIndex( + javaProjectSymbolIndexCache, + projectRoot, + async () => await buildProjectSymbolIndex(projectRoot, ["**/*.java"], readJavaSymbolIndex), + ); +} + +async function getJvmProjectSymbolIndex( + projectRoot: string, + languageId: "java" | "kotlin", +): Promise { + if (languageId === "kotlin") { + return await getKotlinProjectSymbolIndex(projectRoot); + } + return await getJavaProjectSymbolIndex(projectRoot); +} + +export async function resolveJvmPackageImportPaths( + projectRoot: string, + spec: string, + languageId: "java" | "kotlin", +): Promise { + const projectIndex = await getJvmProjectSymbolIndex(projectRoot, languageId); + const packageCandidates = projectIndex.filesByPackage.get(spec) ?? []; + return packageCandidates.map((candidate) => path.resolve(candidate)); +} + +export async function resolveKotlinImportPath(projectRoot: string, spec: string): Promise { + const cacheKey = `${projectRoot}::${spec}`; + const cached = kotlinImportResolutionCache.get(cacheKey); + if (cached !== undefined) return cached; + + const parts = spec.split(".").filter(Boolean); + const projectIndex = await getKotlinProjectSymbolIndex(projectRoot); + if (parts.length < 2) { + const packageCandidates = projectIndex.filesByPackage.get(spec) ?? []; + const resolved = packageCandidates[0] ? path.resolve(packageCandidates[0]) : null; + kotlinImportResolutionCache.set(cacheKey, resolved); + return resolved; + } + + const importedName = parts[parts.length - 1]!; + const packageName = parts.slice(0, -1).join("."); + const packageCandidates = projectIndex.filesByPackage.get(packageName) ?? []; + + if (importedName === "*") { + const resolved = packageCandidates[0] ? path.resolve(packageCandidates[0]) : null; + kotlinImportResolutionCache.set(cacheKey, resolved); + return resolved; + } + + const symbolFiles = projectIndex.filesByPackageSymbol.get(packageName)?.get(importedName) ?? []; + const resolvedCandidate = symbolFiles[0] ?? packageCandidates[0] ?? null; + const resolved = resolvedCandidate ? path.resolve(resolvedCandidate) : null; + kotlinImportResolutionCache.set(cacheKey, resolved); + return resolved; +} + +export async function resolveJavaImportPath(projectRoot: string, spec: string): Promise { + const cacheKey = `${projectRoot}::${spec}`; + const cached = javaImportResolutionCache.get(cacheKey); + if (cached !== undefined) return cached; + + const parts = spec.split(".").filter(Boolean); + if (parts.length < 2) { + javaImportResolutionCache.set(cacheKey, null); + return null; + } + + const projectIndex = await getJavaProjectSymbolIndex(projectRoot); + const exactPackageFiles = projectIndex.filesByPackage.get(spec) ?? []; + if (exactPackageFiles[0]) { + const resolved = path.resolve(exactPackageFiles[0]); + javaImportResolutionCache.set(cacheKey, resolved); + return resolved; + } + + const importedName = parts[parts.length - 1]!; + const packageName = parts.slice(0, -1).join("."); + + const packageCandidates = projectIndex.filesByPackage.get(packageName) ?? []; + if (importedName === "*") { + const resolved = packageCandidates[0] ? path.resolve(packageCandidates[0]) : null; + javaImportResolutionCache.set(cacheKey, resolved); + return resolved; + } + + const symbolFiles = projectIndex.filesByPackageSymbol.get(packageName)?.get(importedName) ?? []; + const resolvedCandidate = symbolFiles[0] ?? packageCandidates[0] ?? null; + const resolved = resolvedCandidate ? path.resolve(resolvedCandidate) : null; + javaImportResolutionCache.set(cacheKey, resolved); + return resolved; +} + +export function clearJvmResolutionCaches(): void { + kotlinImportResolutionCache.clear(); + kotlinSymbolIndexCache.clear(); + kotlinProjectSymbolIndexCache.clear(); + javaImportResolutionCache.clear(); + javaSymbolIndexCache.clear(); + javaProjectSymbolIndexCache.clear(); +} diff --git a/src/util/resolution/projectSymbols.ts b/src/util/resolution/projectSymbols.ts new file mode 100644 index 00000000..6b007b45 --- /dev/null +++ b/src/util/resolution/projectSymbols.ts @@ -0,0 +1,94 @@ +import { normalizePath } from "../paths.js"; +import { listProjectFiles } from "../projectFiles.js"; +import { mapLimitSemaphore } from "../semaphore.js"; + +export type LanguageProjectSymbolIndex = { + files: string[]; + filesByPackage: Map; + filesByPackageSymbol: Map>; +}; + +export async function listProjectLanguageFiles(projectRoot: string, patterns: string[]): Promise { + return await listProjectFiles(projectRoot, patterns); +} + +export function addProjectSymbolFile( + index: LanguageProjectSymbolIndex, + packageName: string, + filePath: string, + symbols: Set, +): void { + const packageFiles = index.filesByPackage.get(packageName) ?? []; + packageFiles.push(filePath); + index.filesByPackage.set(packageName, packageFiles); + + let symbolFiles = index.filesByPackageSymbol.get(packageName); + if (!symbolFiles) { + symbolFiles = new Map(); + index.filesByPackageSymbol.set(packageName, symbolFiles); + } + for (const symbolName of symbols) { + const files = symbolFiles.get(symbolName) ?? []; + files.push(filePath); + symbolFiles.set(symbolName, files); + } +} + +export function sortProjectSymbolIndex(index: LanguageProjectSymbolIndex): void { + for (const [packageName, files] of index.filesByPackage) { + files.sort((left, right) => normalizePath(left).localeCompare(normalizePath(right))); + index.filesByPackage.set(packageName, files); + } + for (const symbolFiles of index.filesByPackageSymbol.values()) { + for (const [symbolName, files] of symbolFiles) { + files.sort((left, right) => normalizePath(left).localeCompare(normalizePath(right))); + symbolFiles.set(symbolName, files); + } + } +} + +export async function buildProjectSymbolIndex }>( + projectRoot: string, + patterns: string[], + readIndexEntry: (filePath: string) => Promise, +): Promise { + const files = await listProjectLanguageFiles(projectRoot, patterns); + const index: LanguageProjectSymbolIndex = { + files, + filesByPackage: new Map(), + filesByPackageSymbol: new Map>(), + }; + + const indexEntries = await mapLimitSemaphore(files, 8, async (filePath) => { + try { + const entry = await readIndexEntry(filePath); + return { filePath, entry }; + } catch { + // Ignore unreadable files and keep indexing the project. + return null; + } + }); + + for (const indexEntry of indexEntries) { + if (!indexEntry || indexEntry.entry.packageName === null) continue; + addProjectSymbolFile(index, indexEntry.entry.packageName, indexEntry.filePath, indexEntry.entry.symbols); + } + + sortProjectSymbolIndex(index); + return index; +} + +export function getOrCreateProjectSymbolIndex( + cache: Map>, + projectRoot: string, + buildIndex: () => Promise, +): Promise { + const cached = cache.get(projectRoot); + if (cached) return cached; + const pending = buildIndex().catch((error) => { + cache.delete(projectRoot); + throw error; + }); + cache.set(projectRoot, pending); + return pending; +} diff --git a/src/worker/nativeExtractWorker.ts b/src/worker/nativeExtractWorker.ts index 945f0f7e..470b4643 100644 --- a/src/worker/nativeExtractWorker.ts +++ b/src/worker/nativeExtractWorker.ts @@ -3,7 +3,12 @@ import { createRequire } from "node:module"; import path from "node:path"; import { fileURLToPath } from "node:url"; -import type { NativeQueryResults, CompactQueryResults } from "../native/treeSitterNative.js"; +import type { + CompactQueryResults, + NativeBinding, + NativeFallbackReason, + NativeQueryResults, +} from "../native/contracts.js"; import { loadNativeBinding } from "../native/bindingLoader.js"; import type { NativeBindingLoadResult } from "../native/bindingLoader.js"; @@ -24,23 +29,10 @@ export type NativeExtractResult = { source: string; nativeResults: NativeQueryResults | null; compactResults: CompactQueryResults | null; - fallbackReason?: "unavailable" | "unsupportedLanguage" | "queryFailure"; + fallbackReason?: NativeFallbackReason; error?: string; }; -type NativeBinding = { - runLanguageQueries: ( - source: string, - languageId: string, - importsQuery: string, - exportsQuery: string, - localsQuery: string, - importBindingsQuery: string, - ) => NativeQueryResults; - runImportsQueryCompact?: (source: string, languageId: string, importsQuery: string) => CompactQueryResults; - supportedLanguageIds: () => string[]; -}; - const require = createRequire(import.meta.url); const localNativePackageRoot = path.resolve( path.dirname(fileURLToPath(import.meta.url)), diff --git a/tests/mcp-server.test.ts b/tests/mcp-server.test.ts index 153ddffc..a9abc1dd 100644 --- a/tests/mcp-server.test.ts +++ b/tests/mcp-server.test.ts @@ -407,6 +407,9 @@ describe("codegraph MCP handlers", () => { await expect(handlers.query_sqlite({ query: "SELECT 'randomblob(300000)' AS text;" })).resolves.toEqual( expect.objectContaining({ rows: [["randomblob(300000)"]] }), ); + await expect( + handlers.query_sqlite({ query: "SELECT 1 AS ok /* zeroblob(300000) */ -- randomblob(300000)" }), + ).resolves.toEqual(expect.objectContaining({ rows: [[1]] })); }); it("disables artifact builds by default and in explicit read-only mode", async () => { diff --git a/tests/project-file-discovery.test.ts b/tests/project-file-discovery.test.ts index 62956be7..a6399786 100644 --- a/tests/project-file-discovery.test.ts +++ b/tests/project-file-discovery.test.ts @@ -5,6 +5,7 @@ import fs from "node:fs/promises"; import { buildProjectIndex, listProjectFiles, discoverProjectFiles } from "../src/index.js"; import { DEFAULT_PROJECT_MANIFESTS } from "../src/util.js"; import { isRelativePathInside, translateGlobRootIgnoreGlobsForScanRoot } from "../src/util/projectFiles.js"; +import { parseDotnetName, parseGoModuleName, parsePomName, parseTomlName } from "../src/util/projectFiles/parsers.js"; const normalize = (value: string) => value.replace(/\\/g, "/"); @@ -21,6 +22,18 @@ async function createFile(filePath: string, contents: string) { } describe("project file discovery", () => { + it("parses manifest names without full project traversal", () => { + expect(parseTomlName('[project]\nname = "py-app" # comment\n', ["project"])).toBe("py-app"); + expect(parseTomlName("[package]\nname = 'rust-app'\n", ["package"])).toBe("rust-app"); + expect(parseGoModuleName('module example.com/app # "commented"\n')).toBe("example.com/app"); + expect(parsePomName("Parentchild")).toBe( + "child", + ); + expect(parseDotnetName("DotNet.App")).toBe( + "DotNet.App", + ); + }); + it("fails explicitly when the project root is invalid", async () => { const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "codegraph-project-missing-")); const missingRoot = path.join(tempDir, "missing-root");