From c7f505754307410211d04f6658b6e6bbe1c6bce4 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Sat, 20 Jun 2026 19:47:57 -0600 Subject: [PATCH 1/4] feat: add ignoreAdditionalDirs config to let repos extend IGNORE_DIRS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an `ignoreAdditionalDirs` key to `.codegraphrc.json` (array of strings) that is merged with the global IGNORE_DIRS set at file-collection time. This lets each repo declare its own carve-outs without baking them into the hardcoded global default. - Add `buildIgnoreSet(additionalDirs?)` helper to `shared/constants.ts` that merges IGNORE_DIRS with any extra dirs without mutating the original set - Add `ignoreAdditionalDirs: string[]` to `CodegraphConfig` in `types.ts` and `DEFAULTS` in `infrastructure/config.ts`; include it in `BUILD_HASH_KEYS` so config changes trigger a full rebuild - Update `collectFiles` in `builder/helpers.ts` to merge both `ignoreDirs` and `ignoreAdditionalDirs` into the walk's ignore set via `buildIgnoreSet` - Remove `crates` from the global IGNORE_DIRS default — it was added to handle NAPI-RS artifacts in this repo's Rust workspace, but silently excluded `crates/` in every other codebase; add `"ignoreAdditionalDirs": ["crates"]` to this repo's `.codegraphrc.json` instead Closes #1649 Impact: 6 functions changed, 2 affected --- .codegraphrc.json | 3 ++- src/domain/graph/builder/helpers.ts | 21 +++++++++++-------- src/infrastructure/config.ts | 2 ++ src/shared/constants.ts | 9 +++++++++ src/types.ts | 2 ++ tests/unit/builder.test.ts | 16 +++++++++++++++ tests/unit/constants.test.ts | 31 +++++++++++++++++++++++++++++ 7 files changed, 75 insertions(+), 9 deletions(-) diff --git a/.codegraphrc.json b/.codegraphrc.json index ce8446627..815885b8d 100644 --- a/.codegraphrc.json +++ b/.codegraphrc.json @@ -1,4 +1,5 @@ { "embeddings": { "model": "bge-large" }, - "exclude": ["crates/**"] + "exclude": ["crates/**"], + "ignoreAdditionalDirs": ["crates"] } diff --git a/src/domain/graph/builder/helpers.ts b/src/domain/graph/builder/helpers.ts index c2f8ab0c4..c8dae2983 100644 --- a/src/domain/graph/builder/helpers.ts +++ b/src/domain/graph/builder/helpers.ts @@ -8,7 +8,7 @@ import fs from 'node:fs'; import path from 'node:path'; import { purgeFilesData } from '../../../db/index.js'; import { debug, warn } from '../../../infrastructure/logger.js'; -import { EXTENSIONS, IGNORE_DIRS, normalizePath } from '../../../shared/constants.js'; +import { buildIgnoreSet, EXTENSIONS, normalizePath } from '../../../shared/constants.js'; import { compileGlobs, globToRegex, matchesAny } from '../../../shared/globs.js'; import type { BetterSqlite3Database, @@ -56,13 +56,12 @@ export const CHA_DISPATCH_PENALTY = 0.1; export const CHA_TYPED_DISPATCH_CONFIDENCE = 0.8; /** Check if a directory entry should be skipped (ignored dirs, dotfiles). */ -function shouldSkipEntry(entry: fs.Dirent, extraIgnore: Set | null): boolean { +function shouldSkipEntry(entry: fs.Dirent, ignoreSet: Set): boolean { if (entry.name.startsWith('.') && entry.name !== '.') { - if (IGNORE_DIRS.has(entry.name)) return true; + if (ignoreSet.has(entry.name)) return true; if (entry.isDirectory()) return true; } - if (IGNORE_DIRS.has(entry.name)) return true; - if (extraIgnore?.has(entry.name)) return true; + if (ignoreSet.has(entry.name)) return true; return false; } @@ -140,7 +139,8 @@ interface CollectContext { readonly excludeRegexes: readonly RegExp[]; readonly gitignoreRegexes: readonly RegExp[]; readonly hasGlobFilters: boolean; - readonly extraIgnore: Set | null; + /** Merged set of IGNORE_DIRS + config.ignoreDirs + config.ignoreAdditionalDirs. */ + readonly ignoreSet: Set; readonly visited: Set; } @@ -193,7 +193,7 @@ function walkCollect( let hasFiles = false; for (const entry of entries) { - if (shouldSkipEntry(entry, ctx.extraIgnore)) continue; + if (shouldSkipEntry(entry, ctx.ignoreSet)) continue; const full = path.join(dir, entry.name); if (entry.isDirectory()) { @@ -237,13 +237,18 @@ export function collectFiles( const includeRegexes = compileGlobs(config.include); const excludeRegexes = compileGlobs(config.exclude); const gitignoreRegexes = readGitignorePatterns(dir); + // Build the merged ignore set: + // - config.ignoreDirs are appended to IGNORE_DIRS (existing behaviour: per-repo overrides) + // - config.ignoreAdditionalDirs are also merged in on top of IGNORE_DIRS (new feature) + const extraDirs = [...(config.ignoreDirs ?? []), ...(config.ignoreAdditionalDirs ?? [])]; + const ignoreSet = buildIgnoreSet(extraDirs.length ? extraDirs : undefined); const ctx: CollectContext = { rootDir: dir, includeRegexes, excludeRegexes, gitignoreRegexes, hasGlobFilters: includeRegexes.length > 0 || excludeRegexes.length > 0, - extraIgnore: config.ignoreDirs ? new Set(config.ignoreDirs) : null, + ignoreSet, visited: new Set(), }; diff --git a/src/infrastructure/config.ts b/src/infrastructure/config.ts index 519f17570..1e5492ee9 100644 --- a/src/infrastructure/config.ts +++ b/src/infrastructure/config.ts @@ -21,6 +21,7 @@ export const DEFAULTS = { include: [] as string[], exclude: [] as string[], ignoreDirs: [] as string[], + ignoreAdditionalDirs: [] as string[], extensions: [] as string[], aliases: {} as Record, build: { @@ -452,6 +453,7 @@ const BUILD_HASH_KEYS: ReadonlyArray = [ 'include', 'exclude', 'ignoreDirs', + 'ignoreAdditionalDirs', 'extensions', 'aliases', 'build', diff --git a/src/shared/constants.ts b/src/shared/constants.ts index af702f2d9..f2b1a3c2e 100644 --- a/src/shared/constants.ts +++ b/src/shared/constants.ts @@ -34,6 +34,15 @@ export const IGNORE_DIRS: ArrayCompatSet = withArrayCompat( ]), ); +/** + * Merge the global IGNORE_DIRS set with a per-repo additional list from config. + * Returns a new Set — does not mutate IGNORE_DIRS. + */ +export function buildIgnoreSet(additionalDirs?: string[]): Set { + if (!additionalDirs || additionalDirs.length === 0) return IGNORE_DIRS; + return new Set([...IGNORE_DIRS, ...additionalDirs]); +} + export const EXTENSIONS: ArrayCompatSet = withArrayCompat(new Set(SUPPORTED_EXTENSIONS)); /** diff --git a/src/types.ts b/src/types.ts index fa2890b6b..08910e9cc 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1351,6 +1351,8 @@ export interface CodegraphConfig { include: string[]; exclude: string[]; ignoreDirs: string[]; + /** Additional directory names to ignore on top of the built-in IGNORE_DIRS set. */ + ignoreAdditionalDirs: string[]; extensions: string[]; aliases: Record; diff --git a/tests/unit/builder.test.ts b/tests/unit/builder.test.ts index a7140e2ae..a619e969e 100644 --- a/tests/unit/builder.test.ts +++ b/tests/unit/builder.test.ts @@ -105,6 +105,22 @@ describe('collectFiles', () => { expect(basenames).toContain('app.js'); }); + it('respects config.ignoreAdditionalDirs', () => { + const files = collectFiles(tmpDir, [], { ignoreAdditionalDirs: ['lib'] }); + const basenames = files.map((f) => path.basename(f)); + expect(basenames).not.toContain('helper.py'); + // src files still present + expect(basenames).toContain('app.js'); + }); + + it('merges ignoreAdditionalDirs with ignoreDirs when both are set', () => { + // ignoreDirs excludes 'lib', ignoreAdditionalDirs excludes 'src' + const files = collectFiles(tmpDir, [], { ignoreDirs: ['lib'], ignoreAdditionalDirs: ['src'] }); + const basenames = files.map((f) => path.basename(f)); + expect(basenames).not.toContain('helper.py'); // lib excluded + expect(basenames).not.toContain('app.js'); // src excluded + }); + it('returns empty array for non-existent directory (graceful)', () => { const files = collectFiles(path.join(tmpDir, 'does-not-exist')); expect(files).toEqual([]); diff --git a/tests/unit/constants.test.ts b/tests/unit/constants.test.ts index 81ed4dbaa..fcce3bc84 100644 --- a/tests/unit/constants.test.ts +++ b/tests/unit/constants.test.ts @@ -5,6 +5,7 @@ import path from 'node:path'; import { describe, expect, it } from 'vitest'; import { + buildIgnoreSet, EXTENSIONS, IGNORE_DIRS, isSupportedFile, @@ -49,6 +50,36 @@ describe('IGNORE_DIRS', () => { expect(IGNORE_DIRS.has(dir)).toBe(true); } }); + + it('does not contain crates (repo-specific dirs belong in ignoreAdditionalDirs config)', () => { + expect(IGNORE_DIRS.has('crates')).toBe(false); + }); +}); + +describe('buildIgnoreSet', () => { + it('returns IGNORE_DIRS when no additional dirs provided', () => { + const result = buildIgnoreSet(); + expect(result).toBe(IGNORE_DIRS); + }); + + it('returns IGNORE_DIRS when empty array provided', () => { + const result = buildIgnoreSet([]); + expect(result).toBe(IGNORE_DIRS); + }); + + it('merges additional dirs on top of IGNORE_DIRS', () => { + const result = buildIgnoreSet(['crates', 'generated']); + expect(result.has('node_modules')).toBe(true); // from IGNORE_DIRS + expect(result.has('crates')).toBe(true); // additional + expect(result.has('generated')).toBe(true); // additional + }); + + it('does not mutate IGNORE_DIRS', () => { + const before = new Set(IGNORE_DIRS); + buildIgnoreSet(['crates']); + expect(IGNORE_DIRS.size).toBe(before.size); + expect(IGNORE_DIRS.has('crates')).toBe(false); + }); }); describe('shouldIgnore', () => { From 6a6c7ef5db06b1f7c501441479444454a67d8ce7 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Sat, 20 Jun 2026 21:41:50 -0600 Subject: [PATCH 2/4] fix: tighten buildIgnoreSet return type to ReadonlySet (#1666) Prevents accidental mutation of the shared IGNORE_DIRS global when the caller receives a direct reference (the no-extras fast path). Update shouldSkipEntry and CollectContext.ignoreSet to accept ReadonlySet consistently. Impact: 3 functions changed, 2 affected --- src/domain/graph/builder/helpers.ts | 4 ++-- src/shared/constants.ts | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/domain/graph/builder/helpers.ts b/src/domain/graph/builder/helpers.ts index c8dae2983..1c6859667 100644 --- a/src/domain/graph/builder/helpers.ts +++ b/src/domain/graph/builder/helpers.ts @@ -56,7 +56,7 @@ export const CHA_DISPATCH_PENALTY = 0.1; export const CHA_TYPED_DISPATCH_CONFIDENCE = 0.8; /** Check if a directory entry should be skipped (ignored dirs, dotfiles). */ -function shouldSkipEntry(entry: fs.Dirent, ignoreSet: Set): boolean { +function shouldSkipEntry(entry: fs.Dirent, ignoreSet: ReadonlySet): boolean { if (entry.name.startsWith('.') && entry.name !== '.') { if (ignoreSet.has(entry.name)) return true; if (entry.isDirectory()) return true; @@ -140,7 +140,7 @@ interface CollectContext { readonly gitignoreRegexes: readonly RegExp[]; readonly hasGlobFilters: boolean; /** Merged set of IGNORE_DIRS + config.ignoreDirs + config.ignoreAdditionalDirs. */ - readonly ignoreSet: Set; + readonly ignoreSet: ReadonlySet; readonly visited: Set; } diff --git a/src/shared/constants.ts b/src/shared/constants.ts index f2b1a3c2e..9dc88056e 100644 --- a/src/shared/constants.ts +++ b/src/shared/constants.ts @@ -38,7 +38,7 @@ export const IGNORE_DIRS: ArrayCompatSet = withArrayCompat( * Merge the global IGNORE_DIRS set with a per-repo additional list from config. * Returns a new Set — does not mutate IGNORE_DIRS. */ -export function buildIgnoreSet(additionalDirs?: string[]): Set { +export function buildIgnoreSet(additionalDirs?: string[]): ReadonlySet { if (!additionalDirs || additionalDirs.length === 0) return IGNORE_DIRS; return new Set([...IGNORE_DIRS, ...additionalDirs]); } From 1bbbc3d033a2a9e0510ae44d6b31a2b0c4c0e19b Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Sat, 20 Jun 2026 21:42:00 -0600 Subject: [PATCH 3/4] fix: re-enable PRAGMA foreign_keys after native buildGraph() (#1666) FK enforcement is disabled before buildGraph() as a workaround for old-binary purge failures (< v3.14). Re-enable it immediately after buildGraph() returns so JS post-passes (CHA, dataflow, structure) run with full FK enforcement rather than inheriting the workaround. Impact: 1 functions changed, 5 affected --- src/domain/graph/builder/stages/native-orchestrator.ts | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/domain/graph/builder/stages/native-orchestrator.ts b/src/domain/graph/builder/stages/native-orchestrator.ts index d47358f99..aeb0da38f 100644 --- a/src/domain/graph/builder/stages/native-orchestrator.ts +++ b/src/domain/graph/builder/stages/native-orchestrator.ts @@ -2105,8 +2105,8 @@ export async function tryNativeOrchestrator( // nodes/edges during incremental builds, so FK enforcement causes the purge // statements to fail silently — leaving stale nodes and edges that then get // duplicated when the barrel-candidate re-parse re-inserts them (issue #1644). - // Disabling FK before buildGraph() lets the purge succeed. FK enforcement is - // restored automatically when this connection is closed after the build. + // Disabling FK before buildGraph() lets the purge succeed; re-enable afterwards + // so JS post-passes run with full FK enforcement. try { ctx.nativeDb.exec('PRAGMA foreign_keys = OFF'); } catch { @@ -2119,6 +2119,12 @@ export async function tryNativeOrchestrator( JSON.stringify(ctx.aliases), JSON.stringify(ctx.opts), ); + // Restore FK enforcement for JS post-passes (CHA, dataflow, structure). + try { + ctx.nativeDb.exec('PRAGMA foreign_keys = ON'); + } catch { + // exec may not exist on very old addon versions — safe to ignore + } const result = JSON.parse(resultJson) as NativeOrchestratorResult; if (result.earlyExit) { From 80752f2b88edd072bc551c3e4e6b0635b028a721 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Sat, 20 Jun 2026 22:28:17 -0600 Subject: [PATCH 4/4] fix: apply ignoreAdditionalDirs and ignoreDirs in watch mode (#1666) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The watcher previously used the global IGNORE_DIRS directly via shouldIgnore(), so removing 'crates' from IGNORE_DIRS (the main change in this PR) caused the watcher to traverse and trigger rebuilds for files under crates/ — directly contradicting the ignoreAdditionalDirs exclusion that works correctly in batch builds via collectFiles. Fix: load .codegraphrc.json in setupWatcher, build the merged ignore set with buildIgnoreSet(ignoreDirs + ignoreAdditionalDirs), store it in WatcherContext, and thread it through collectTrackedFiles (polling mode) and shouldIgnorePath (native OS watcher mode). Both watcher paths now respect the same exclusion set as the batch build path. Impact: 6 functions changed, 4 affected --- src/domain/graph/watcher.ts | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/src/domain/graph/watcher.ts b/src/domain/graph/watcher.ts index f0ef8028c..d922307e6 100644 --- a/src/domain/graph/watcher.ts +++ b/src/domain/graph/watcher.ts @@ -1,17 +1,18 @@ import fs from 'node:fs'; import path from 'node:path'; import { closeDb, getNodeId as getNodeIdQuery, initSchema, openDb } from '../../db/index.js'; +import { loadConfig } from '../../infrastructure/config.js'; import { debug, info, warn } from '../../infrastructure/logger.js'; -import { isSupportedFile, normalizePath, shouldIgnore } from '../../shared/constants.js'; +import { buildIgnoreSet, isSupportedFile, normalizePath } from '../../shared/constants.js'; import { DbError } from '../../shared/errors.js'; import { createParseTreeCache, getActiveEngine } from '../parser.js'; import { type IncrementalStmts, rebuildFile } from './builder/incremental.js'; import { appendChangeEvents, buildChangeEvent, diffSymbols } from './change-journal.js'; import { appendJournalEntriesAndStampHeader } from './journal.js'; -function shouldIgnorePath(filePath: string): boolean { +function shouldIgnorePath(filePath: string, ignoreSet: ReadonlySet): boolean { const parts = filePath.split(path.sep); - return parts.some((p) => shouldIgnore(p)); + return parts.some((p) => ignoreSet.has(p) || p.startsWith('.')); } /** Prepare all SQL statements needed by the watcher's incremental rebuild. */ @@ -139,7 +140,7 @@ function logRebuildResults(updates: RebuildResult[]): void { } /** Recursively collect tracked source files for stat-based polling. */ -function collectTrackedFiles(dir: string, result: string[]): void { +function collectTrackedFiles(dir: string, result: string[], ignoreSet: ReadonlySet): void { let entries: fs.Dirent[]; try { entries = fs.readdirSync(dir, { withFileTypes: true }); @@ -148,10 +149,10 @@ function collectTrackedFiles(dir: string, result: string[]): void { return; } for (const entry of entries) { - if (shouldIgnore(entry.name)) continue; + if (ignoreSet.has(entry.name) || entry.name.startsWith('.')) continue; const full = path.join(dir, entry.name); if (entry.isDirectory()) { - collectTrackedFiles(full, result); + collectTrackedFiles(full, result, ignoreSet); } else if (isSupportedFile(entry.name)) { result.push(full); } @@ -168,6 +169,8 @@ interface WatcherContext { pending: Set; timer: ReturnType | null; debounceMs: number; + /** Merged ignore set from IGNORE_DIRS + config.ignoreDirs + config.ignoreAdditionalDirs. */ + ignoreSet: ReadonlySet; } /** Initialize DB, engine, cache, and statements for watch mode. */ @@ -177,6 +180,12 @@ function setupWatcher(rootDir: string, opts: { engine?: string; dbPath?: string throw new DbError('No graph.db found. Run `codegraph build` first.', { file: dbPath }); } + // Load repo config so ignoreDirs and ignoreAdditionalDirs are respected by + // the watcher the same way they are by collectFiles in the batch build path. + const config = loadConfig(rootDir); + const extraDirs = [...(config.ignoreDirs ?? []), ...(config.ignoreAdditionalDirs ?? [])]; + const ignoreSet = buildIgnoreSet(extraDirs.length ? extraDirs : undefined); + const db = openDb(dbPath); initSchema(db); const engineOpts: import('../../types.js').EngineOpts = { @@ -205,6 +214,7 @@ function setupWatcher(rootDir: string, opts: { engine?: string; dbPath?: string pending: new Set(), timer: null, debounceMs: 300, + ignoreSet, }; } @@ -223,7 +233,7 @@ function startPollingWatcher(ctx: WatcherContext, pollIntervalMs: number): () => const mtimeMap = new Map(); const initial: string[] = []; - collectTrackedFiles(ctx.rootDir, initial); + collectTrackedFiles(ctx.rootDir, initial, ctx.ignoreSet); for (const f of initial) { try { mtimeMap.set(f, fs.statSync(f).mtimeMs); @@ -235,7 +245,7 @@ function startPollingWatcher(ctx: WatcherContext, pollIntervalMs: number): () => const pollTimer = setInterval(() => { const current: string[] = []; - collectTrackedFiles(ctx.rootDir, current); + collectTrackedFiles(ctx.rootDir, current, ctx.ignoreSet); const currentSet = new Set(current); for (const f of current) { @@ -270,7 +280,7 @@ function startPollingWatcher(ctx: WatcherContext, pollIntervalMs: number): () => function startNativeWatcher(ctx: WatcherContext): () => void { const watcher = fs.watch(ctx.rootDir, { recursive: true }, (_eventType, filename) => { if (!filename) return; - if (shouldIgnorePath(filename)) return; + if (shouldIgnorePath(filename, ctx.ignoreSet)) return; if (!isSupportedFile(filename)) return; ctx.pending.add(path.join(ctx.rootDir, filename));