diff --git a/crates/codegraph-core/src/lib.rs b/crates/codegraph-core/src/lib.rs index a40b9d77..a4e7a3a2 100644 --- a/crates/codegraph-core/src/lib.rs +++ b/crates/codegraph-core/src/lib.rs @@ -187,3 +187,28 @@ pub fn extract_dataflow_analysis( ) -> Option { ast_analysis::engine::extract_dataflow_standalone(&source, &file_path, lang_id.as_deref()) } + +/// Batch counterpart to `extract_dataflow_analysis`: read and analyse many files +/// in parallel in a single NAPI call. +/// +/// The native orchestrator's P6 vertex pass needs a `DataflowResult` for every +/// dataflow-bearing file on a full build. Calling `extract_dataflow_analysis` +/// once per file serialised hundreds of parses on the JS event loop and dominated +/// the native full-build benchmark. This reads each path from disk and runs the +/// dataflow extractor across the rayon thread pool, returning results positionally +/// (`None` where the file could not be read or the language has no dataflow rules), +/// so the caller maps them straight back onto its input list. Each `parse_source` +/// builds its own tree-sitter `Parser`, so the work is embarrassingly parallel. +#[napi] +pub fn extract_dataflow_analysis_batch( + file_paths: Vec, +) -> Vec> { + use rayon::prelude::*; + file_paths + .par_iter() + .map(|file_path| { + let source = std::fs::read_to_string(file_path).ok()?; + ast_analysis::engine::extract_dataflow_standalone(&source, file_path, None) + }) + .collect() +} diff --git a/src/domain/graph/builder/stages/native-orchestrator.ts b/src/domain/graph/builder/stages/native-orchestrator.ts index 10ec9f65..f6721c87 100644 --- a/src/domain/graph/builder/stages/native-orchestrator.ts +++ b/src/domain/graph/builder/stages/native-orchestrator.ts @@ -370,22 +370,52 @@ async function runDataflowVertexPass( const nativeDataflow = new Map(); const wasmStubs = new Map(); - for (const relPath of filesToProcess) { - const absPath = path.join(ctx.rootDir, relPath); - const source = readFileSafe(absPath); - if (!source) continue; - let result: DataflowResult | null = null; + const absPaths = filesToProcess.map((relPath) => path.join(ctx.rootDir, relPath)); + + // Batch the per-file dataflow extraction into one NAPI call so the parses run + // across the rayon thread pool instead of serially on the event loop — this is + // the dominant cost of a native full build (#perf). Older addons predate the + // batch export, so fall back to the per-file path when it is unavailable. + let batchResults: (DataflowResult | null)[] | null = null; + if (typeof native.extractDataflowAnalysisBatch === 'function') { try { - result = native.extractDataflowAnalysis(source, absPath); + batchResults = native.extractDataflowAnalysisBatch(absPaths); } catch { - // Language-specific parse failure — fall through to WASM. + batchResults = null; // fall through to per-file extraction below + } + } + + for (let i = 0; i < filesToProcess.length; i++) { + const relPath = filesToProcess[i]!; + let result: DataflowResult | null = null; + if (batchResults) { + result = batchResults[i] ?? null; + } else { + let source: string; + try { + source = readFileSafe(absPaths[i]!); + } catch { + // Unreadable file — mirror batch-path behaviour and route to WASM. + wasmStubs.set(relPath, { definitions: [], _langId: null, _tree: null }); + continue; + } + if (!source) { + // Empty file — same treatment as batch returning null. + wasmStubs.set(relPath, { definitions: [], _langId: null, _tree: null }); + continue; + } + try { + result = native.extractDataflowAnalysis(source, absPaths[i]!); + } catch { + // Language-specific parse failure — fall through to WASM. + } } if (result) { // Normalise the native DataflowResult: Rust emits `bindingType: string | null` // (flat) while the TS dataflow layer expects `binding: { type, index? }` (object). // patchNativeResult handles this via patchDataflow for the full parse path; - // extractDataflowAnalysis is a vertex-only fast path that bypasses patchNativeResult, - // so we apply the same normalisation here. + // extractDataflowAnalysis(Batch) is a vertex-only fast path that bypasses + // patchNativeResult, so we apply the same normalisation here. patchDataflowResult(result); nativeDataflow.set(relPath, result); } else { diff --git a/src/features/dataflow.ts b/src/features/dataflow.ts index e0489479..369265be 100644 --- a/src/features/dataflow.ts +++ b/src/features/dataflow.ts @@ -741,11 +741,24 @@ function makeNodeResolver( stmts: ReturnType, relPath: string, ): (funcName: string) => { id: number } | null { + // Memoise per (relPath, funcName). buildDataflowVerticesAndEdges resolves the + // same handful of function names many times per file — once per param, return, + // assignment, argFlow, summary row, and capture — and each miss costs one or + // two `nodes` table queries. The nodes table is never mutated during the P6 + // vertex pass (only dataflow* tables are written), so the lookup is stable for + // the lifetime of the resolver; caching collapses tens of thousands of + // redundant queries on a full build into one per distinct name (#perf). + const cache = new Map(); return (funcName: string): { id: number } | null => { + const cached = cache.get(funcName); + if (cached !== undefined) return cached; const local = stmts.getNodeByNameAndFile.all(funcName, relPath) as { id: number }[]; - if (local.length > 0) return local[0]!; - const global = stmts.getNodeByName.all(funcName) as { id: number }[]; - return global.length > 0 ? global[0]! : null; + const resolved = + local.length > 0 + ? local[0]! + : ((stmts.getNodeByName.all(funcName) as { id: number }[])[0] ?? null); + cache.set(funcName, resolved); + return resolved; }; } diff --git a/src/types.ts b/src/types.ts index c85f36f2..40bcb1b4 100644 --- a/src/types.ts +++ b/src/types.ts @@ -2286,6 +2286,14 @@ export interface NativeAddon { filePath: string, langId?: string | null, ): DataflowResult | null; + /** + * Batch counterpart to {@link extractDataflowAnalysis}: read and analyse many + * files in parallel (rayon) in a single NAPI call. Results are positional — + * `null` where the file could not be read or has no dataflow rules. Optional: + * older published addons predate this export, so callers must feature-detect + * and fall back to per-file `extractDataflowAnalysis`. + */ + extractDataflowAnalysisBatch?(filePaths: string[]): (DataflowResult | null)[]; ParseTreeCache: new () => NativeParseTreeCache; NativeDatabase: { openReadWrite(dbPath: string): NativeDatabase;