Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions crates/codegraph-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -187,3 +187,28 @@ pub fn extract_dataflow_analysis(
) -> Option<types::DataflowResult> {
ast_analysis::engine::extract_dataflow_standalone(&source, &file_path, lang_id.as_deref())
}

/// Batch counterpart to `extract_dataflow_analysis`: read and analyse many files
/// in parallel in a single NAPI call.
///
/// The native orchestrator's P6 vertex pass needs a `DataflowResult` for every
/// dataflow-bearing file on a full build. Calling `extract_dataflow_analysis`
/// once per file serialised hundreds of parses on the JS event loop and dominated
/// the native full-build benchmark. This reads each path from disk and runs the
/// dataflow extractor across the rayon thread pool, returning results positionally
/// (`None` where the file could not be read or the language has no dataflow rules),
/// so the caller maps them straight back onto its input list. Each `parse_source`
/// builds its own tree-sitter `Parser`, so the work is embarrassingly parallel.
#[napi]
pub fn extract_dataflow_analysis_batch(
file_paths: Vec<String>,
) -> Vec<Option<types::DataflowResult>> {
use rayon::prelude::*;
file_paths
.par_iter()
.map(|file_path| {
let source = std::fs::read_to_string(file_path).ok()?;
ast_analysis::engine::extract_dataflow_standalone(&source, file_path, None)
})
.collect()
}
48 changes: 39 additions & 9 deletions src/domain/graph/builder/stages/native-orchestrator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -370,22 +370,52 @@ async function runDataflowVertexPass(
const nativeDataflow = new Map<string, DataflowResult>();
const wasmStubs = new Map<string, { definitions: []; _langId: null; _tree: null }>();

for (const relPath of filesToProcess) {
const absPath = path.join(ctx.rootDir, relPath);
const source = readFileSafe(absPath);
if (!source) continue;
let result: DataflowResult | null = null;
const absPaths = filesToProcess.map((relPath) => path.join(ctx.rootDir, relPath));

// Batch the per-file dataflow extraction into one NAPI call so the parses run
// across the rayon thread pool instead of serially on the event loop — this is
// the dominant cost of a native full build (#perf). Older addons predate the
// batch export, so fall back to the per-file path when it is unavailable.
let batchResults: (DataflowResult | null)[] | null = null;
if (typeof native.extractDataflowAnalysisBatch === 'function') {
try {
result = native.extractDataflowAnalysis(source, absPath);
batchResults = native.extractDataflowAnalysisBatch(absPaths);
} catch {
// Language-specific parse failure — fall through to WASM.
batchResults = null; // fall through to per-file extraction below
}
}

for (let i = 0; i < filesToProcess.length; i++) {
const relPath = filesToProcess[i]!;
let result: DataflowResult | null = null;
if (batchResults) {
result = batchResults[i] ?? null;
} else {
let source: string;
try {
source = readFileSafe(absPaths[i]!);
} catch {
// Unreadable file — mirror batch-path behaviour and route to WASM.
wasmStubs.set(relPath, { definitions: [], _langId: null, _tree: null });
continue;
}
if (!source) {
// Empty file — same treatment as batch returning null.
wasmStubs.set(relPath, { definitions: [], _langId: null, _tree: null });
continue;
}
try {
result = native.extractDataflowAnalysis(source, absPaths[i]!);
} catch {
// Language-specific parse failure — fall through to WASM.
}
}
if (result) {
// Normalise the native DataflowResult: Rust emits `bindingType: string | null`
// (flat) while the TS dataflow layer expects `binding: { type, index? }` (object).
// patchNativeResult handles this via patchDataflow for the full parse path;
// extractDataflowAnalysis is a vertex-only fast path that bypasses patchNativeResult,
// so we apply the same normalisation here.
// extractDataflowAnalysis(Batch) is a vertex-only fast path that bypasses
// patchNativeResult, so we apply the same normalisation here.
patchDataflowResult(result);
nativeDataflow.set(relPath, result);
} else {
Expand Down
19 changes: 16 additions & 3 deletions src/features/dataflow.ts
Original file line number Diff line number Diff line change
Expand Up @@ -741,11 +741,24 @@ function makeNodeResolver(
stmts: ReturnType<typeof prepareNodeResolvers>,
relPath: string,
): (funcName: string) => { id: number } | null {
// Memoise per (relPath, funcName). buildDataflowVerticesAndEdges resolves the
// same handful of function names many times per file — once per param, return,
// assignment, argFlow, summary row, and capture — and each miss costs one or
// two `nodes` table queries. The nodes table is never mutated during the P6
// vertex pass (only dataflow* tables are written), so the lookup is stable for
// the lifetime of the resolver; caching collapses tens of thousands of
// redundant queries on a full build into one per distinct name (#perf).
const cache = new Map<string, { id: number } | null>();
return (funcName: string): { id: number } | null => {
const cached = cache.get(funcName);
if (cached !== undefined) return cached;
const local = stmts.getNodeByNameAndFile.all(funcName, relPath) as { id: number }[];
if (local.length > 0) return local[0]!;
const global = stmts.getNodeByName.all(funcName) as { id: number }[];
return global.length > 0 ? global[0]! : null;
const resolved =
local.length > 0
? local[0]!
: ((stmts.getNodeByName.all(funcName) as { id: number }[])[0] ?? null);
cache.set(funcName, resolved);
return resolved;
};
}

Expand Down
8 changes: 8 additions & 0 deletions src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2286,6 +2286,14 @@ export interface NativeAddon {
filePath: string,
langId?: string | null,
): DataflowResult | null;
/**
* Batch counterpart to {@link extractDataflowAnalysis}: read and analyse many
* files in parallel (rayon) in a single NAPI call. Results are positional —
* `null` where the file could not be read or has no dataflow rules. Optional:
* older published addons predate this export, so callers must feature-detect
* and fall back to per-file `extractDataflowAnalysis`.
*/
extractDataflowAnalysisBatch?(filePaths: string[]): (DataflowResult | null)[];
ParseTreeCache: new () => NativeParseTreeCache;
NativeDatabase: {
openReadWrite(dbPath: string): NativeDatabase;
Expand Down
Loading