diff --git a/.codegraphrc.json b/.codegraphrc.json index ce8446627..815885b8d 100644 --- a/.codegraphrc.json +++ b/.codegraphrc.json @@ -1,4 +1,5 @@ { "embeddings": { "model": "bge-large" }, - "exclude": ["crates/**"] + "exclude": ["crates/**"], + "ignoreAdditionalDirs": ["crates"] } diff --git a/crates/codegraph-core/src/domain/graph/builder/stages/collect_files.rs b/crates/codegraph-core/src/domain/graph/builder/stages/collect_files.rs index 59a9e561d..cf127d92d 100644 --- a/crates/codegraph-core/src/domain/graph/builder/stages/collect_files.rs +++ b/crates/codegraph-core/src/domain/graph/builder/stages/collect_files.rs @@ -28,9 +28,6 @@ const DEFAULT_IGNORE_DIRS: &[&str] = &[ "venv", "env", ".env", - // Rust workspace convention — contains only Rust source and NAPI-RS generated - // binding artifacts (index.js / index.d.ts) that produce false complexity readings. - "crates", ]; /// All supported file extensions (mirrors the JS `EXTENSIONS` set). diff --git a/src/domain/graph/builder/helpers.ts b/src/domain/graph/builder/helpers.ts index c2f8ab0c4..1c6859667 100644 --- a/src/domain/graph/builder/helpers.ts +++ b/src/domain/graph/builder/helpers.ts @@ -8,7 +8,7 @@ import fs from 'node:fs'; import path from 'node:path'; import { purgeFilesData } from '../../../db/index.js'; import { debug, warn } from '../../../infrastructure/logger.js'; -import { EXTENSIONS, IGNORE_DIRS, normalizePath } from '../../../shared/constants.js'; +import { buildIgnoreSet, EXTENSIONS, normalizePath } from '../../../shared/constants.js'; import { compileGlobs, globToRegex, matchesAny } from '../../../shared/globs.js'; import type { BetterSqlite3Database, @@ -56,13 +56,12 @@ export const CHA_DISPATCH_PENALTY = 0.1; export const CHA_TYPED_DISPATCH_CONFIDENCE = 0.8; /** Check if a directory entry should be skipped (ignored dirs, dotfiles). */ -function shouldSkipEntry(entry: fs.Dirent, extraIgnore: Set | null): boolean { +function shouldSkipEntry(entry: fs.Dirent, ignoreSet: ReadonlySet): boolean { if (entry.name.startsWith('.') && entry.name !== '.') { - if (IGNORE_DIRS.has(entry.name)) return true; + if (ignoreSet.has(entry.name)) return true; if (entry.isDirectory()) return true; } - if (IGNORE_DIRS.has(entry.name)) return true; - if (extraIgnore?.has(entry.name)) return true; + if (ignoreSet.has(entry.name)) return true; return false; } @@ -140,7 +139,8 @@ interface CollectContext { readonly excludeRegexes: readonly RegExp[]; readonly gitignoreRegexes: readonly RegExp[]; readonly hasGlobFilters: boolean; - readonly extraIgnore: Set | null; + /** Merged set of IGNORE_DIRS + config.ignoreDirs + config.ignoreAdditionalDirs. */ + readonly ignoreSet: ReadonlySet; readonly visited: Set; } @@ -193,7 +193,7 @@ function walkCollect( let hasFiles = false; for (const entry of entries) { - if (shouldSkipEntry(entry, ctx.extraIgnore)) continue; + if (shouldSkipEntry(entry, ctx.ignoreSet)) continue; const full = path.join(dir, entry.name); if (entry.isDirectory()) { @@ -237,13 +237,18 @@ export function collectFiles( const includeRegexes = compileGlobs(config.include); const excludeRegexes = compileGlobs(config.exclude); const gitignoreRegexes = readGitignorePatterns(dir); + // Build the merged ignore set: + // - config.ignoreDirs are appended to IGNORE_DIRS (existing behaviour: per-repo overrides) + // - config.ignoreAdditionalDirs are also merged in on top of IGNORE_DIRS (new feature) + const extraDirs = [...(config.ignoreDirs ?? []), ...(config.ignoreAdditionalDirs ?? [])]; + const ignoreSet = buildIgnoreSet(extraDirs.length ? extraDirs : undefined); const ctx: CollectContext = { rootDir: dir, includeRegexes, excludeRegexes, gitignoreRegexes, hasGlobFilters: includeRegexes.length > 0 || excludeRegexes.length > 0, - extraIgnore: config.ignoreDirs ? new Set(config.ignoreDirs) : null, + ignoreSet, visited: new Set(), }; diff --git a/src/domain/graph/builder/stages/native-orchestrator.ts b/src/domain/graph/builder/stages/native-orchestrator.ts index 5b04ba39b..db92277ef 100644 --- a/src/domain/graph/builder/stages/native-orchestrator.ts +++ b/src/domain/graph/builder/stages/native-orchestrator.ts @@ -2105,11 +2105,9 @@ export async function tryNativeOrchestrator( // nodes/edges during incremental builds, so FK enforcement causes the purge // statements to fail silently — leaving stale nodes and edges that then get // duplicated when the barrel-candidate re-parse re-inserts them (issue #1644). - // Disabling FK before buildGraph() lets the purge succeed. FK stays OFF for - // the entire connection lifetime (through backfillNativeDroppedFiles and - // buildDataflowP4ForNative) and is restored when the connection is closed. - // This is intentional: call_edge_id values written by P4 are always looked up - // live from the edges table, so no phantom FK reference is ever produced. + // Disabling FK before buildGraph() lets the purge succeed; FK is restored in + // a finally block so post-passes (gap-repair, structure patch) retain FK protection + // even if buildGraph() throws. try { ctx.nativeDb.exec('PRAGMA foreign_keys = OFF'); } catch { diff --git a/src/domain/graph/watcher.ts b/src/domain/graph/watcher.ts index f0ef8028c..d922307e6 100644 --- a/src/domain/graph/watcher.ts +++ b/src/domain/graph/watcher.ts @@ -1,17 +1,18 @@ import fs from 'node:fs'; import path from 'node:path'; import { closeDb, getNodeId as getNodeIdQuery, initSchema, openDb } from '../../db/index.js'; +import { loadConfig } from '../../infrastructure/config.js'; import { debug, info, warn } from '../../infrastructure/logger.js'; -import { isSupportedFile, normalizePath, shouldIgnore } from '../../shared/constants.js'; +import { buildIgnoreSet, isSupportedFile, normalizePath } from '../../shared/constants.js'; import { DbError } from '../../shared/errors.js'; import { createParseTreeCache, getActiveEngine } from '../parser.js'; import { type IncrementalStmts, rebuildFile } from './builder/incremental.js'; import { appendChangeEvents, buildChangeEvent, diffSymbols } from './change-journal.js'; import { appendJournalEntriesAndStampHeader } from './journal.js'; -function shouldIgnorePath(filePath: string): boolean { +function shouldIgnorePath(filePath: string, ignoreSet: ReadonlySet): boolean { const parts = filePath.split(path.sep); - return parts.some((p) => shouldIgnore(p)); + return parts.some((p) => ignoreSet.has(p) || p.startsWith('.')); } /** Prepare all SQL statements needed by the watcher's incremental rebuild. */ @@ -139,7 +140,7 @@ function logRebuildResults(updates: RebuildResult[]): void { } /** Recursively collect tracked source files for stat-based polling. */ -function collectTrackedFiles(dir: string, result: string[]): void { +function collectTrackedFiles(dir: string, result: string[], ignoreSet: ReadonlySet): void { let entries: fs.Dirent[]; try { entries = fs.readdirSync(dir, { withFileTypes: true }); @@ -148,10 +149,10 @@ function collectTrackedFiles(dir: string, result: string[]): void { return; } for (const entry of entries) { - if (shouldIgnore(entry.name)) continue; + if (ignoreSet.has(entry.name) || entry.name.startsWith('.')) continue; const full = path.join(dir, entry.name); if (entry.isDirectory()) { - collectTrackedFiles(full, result); + collectTrackedFiles(full, result, ignoreSet); } else if (isSupportedFile(entry.name)) { result.push(full); } @@ -168,6 +169,8 @@ interface WatcherContext { pending: Set; timer: ReturnType | null; debounceMs: number; + /** Merged ignore set from IGNORE_DIRS + config.ignoreDirs + config.ignoreAdditionalDirs. */ + ignoreSet: ReadonlySet; } /** Initialize DB, engine, cache, and statements for watch mode. */ @@ -177,6 +180,12 @@ function setupWatcher(rootDir: string, opts: { engine?: string; dbPath?: string throw new DbError('No graph.db found. Run `codegraph build` first.', { file: dbPath }); } + // Load repo config so ignoreDirs and ignoreAdditionalDirs are respected by + // the watcher the same way they are by collectFiles in the batch build path. + const config = loadConfig(rootDir); + const extraDirs = [...(config.ignoreDirs ?? []), ...(config.ignoreAdditionalDirs ?? [])]; + const ignoreSet = buildIgnoreSet(extraDirs.length ? extraDirs : undefined); + const db = openDb(dbPath); initSchema(db); const engineOpts: import('../../types.js').EngineOpts = { @@ -205,6 +214,7 @@ function setupWatcher(rootDir: string, opts: { engine?: string; dbPath?: string pending: new Set(), timer: null, debounceMs: 300, + ignoreSet, }; } @@ -223,7 +233,7 @@ function startPollingWatcher(ctx: WatcherContext, pollIntervalMs: number): () => const mtimeMap = new Map(); const initial: string[] = []; - collectTrackedFiles(ctx.rootDir, initial); + collectTrackedFiles(ctx.rootDir, initial, ctx.ignoreSet); for (const f of initial) { try { mtimeMap.set(f, fs.statSync(f).mtimeMs); @@ -235,7 +245,7 @@ function startPollingWatcher(ctx: WatcherContext, pollIntervalMs: number): () => const pollTimer = setInterval(() => { const current: string[] = []; - collectTrackedFiles(ctx.rootDir, current); + collectTrackedFiles(ctx.rootDir, current, ctx.ignoreSet); const currentSet = new Set(current); for (const f of current) { @@ -270,7 +280,7 @@ function startPollingWatcher(ctx: WatcherContext, pollIntervalMs: number): () => function startNativeWatcher(ctx: WatcherContext): () => void { const watcher = fs.watch(ctx.rootDir, { recursive: true }, (_eventType, filename) => { if (!filename) return; - if (shouldIgnorePath(filename)) return; + if (shouldIgnorePath(filename, ctx.ignoreSet)) return; if (!isSupportedFile(filename)) return; ctx.pending.add(path.join(ctx.rootDir, filename)); diff --git a/src/infrastructure/config.ts b/src/infrastructure/config.ts index 519f17570..1e5492ee9 100644 --- a/src/infrastructure/config.ts +++ b/src/infrastructure/config.ts @@ -21,6 +21,7 @@ export const DEFAULTS = { include: [] as string[], exclude: [] as string[], ignoreDirs: [] as string[], + ignoreAdditionalDirs: [] as string[], extensions: [] as string[], aliases: {} as Record, build: { @@ -452,6 +453,7 @@ const BUILD_HASH_KEYS: ReadonlyArray = [ 'include', 'exclude', 'ignoreDirs', + 'ignoreAdditionalDirs', 'extensions', 'aliases', 'build', diff --git a/src/shared/constants.ts b/src/shared/constants.ts index af702f2d9..9dc88056e 100644 --- a/src/shared/constants.ts +++ b/src/shared/constants.ts @@ -34,6 +34,15 @@ export const IGNORE_DIRS: ArrayCompatSet = withArrayCompat( ]), ); +/** + * Merge the global IGNORE_DIRS set with a per-repo additional list from config. + * Returns a new Set — does not mutate IGNORE_DIRS. + */ +export function buildIgnoreSet(additionalDirs?: string[]): ReadonlySet { + if (!additionalDirs || additionalDirs.length === 0) return IGNORE_DIRS; + return new Set([...IGNORE_DIRS, ...additionalDirs]); +} + export const EXTENSIONS: ArrayCompatSet = withArrayCompat(new Set(SUPPORTED_EXTENSIONS)); /** diff --git a/src/types.ts b/src/types.ts index fa2890b6b..08910e9cc 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1351,6 +1351,8 @@ export interface CodegraphConfig { include: string[]; exclude: string[]; ignoreDirs: string[]; + /** Additional directory names to ignore on top of the built-in IGNORE_DIRS set. */ + ignoreAdditionalDirs: string[]; extensions: string[]; aliases: Record; diff --git a/tests/unit/builder.test.ts b/tests/unit/builder.test.ts index a7140e2ae..a619e969e 100644 --- a/tests/unit/builder.test.ts +++ b/tests/unit/builder.test.ts @@ -105,6 +105,22 @@ describe('collectFiles', () => { expect(basenames).toContain('app.js'); }); + it('respects config.ignoreAdditionalDirs', () => { + const files = collectFiles(tmpDir, [], { ignoreAdditionalDirs: ['lib'] }); + const basenames = files.map((f) => path.basename(f)); + expect(basenames).not.toContain('helper.py'); + // src files still present + expect(basenames).toContain('app.js'); + }); + + it('merges ignoreAdditionalDirs with ignoreDirs when both are set', () => { + // ignoreDirs excludes 'lib', ignoreAdditionalDirs excludes 'src' + const files = collectFiles(tmpDir, [], { ignoreDirs: ['lib'], ignoreAdditionalDirs: ['src'] }); + const basenames = files.map((f) => path.basename(f)); + expect(basenames).not.toContain('helper.py'); // lib excluded + expect(basenames).not.toContain('app.js'); // src excluded + }); + it('returns empty array for non-existent directory (graceful)', () => { const files = collectFiles(path.join(tmpDir, 'does-not-exist')); expect(files).toEqual([]); diff --git a/tests/unit/constants.test.ts b/tests/unit/constants.test.ts index 81ed4dbaa..fcce3bc84 100644 --- a/tests/unit/constants.test.ts +++ b/tests/unit/constants.test.ts @@ -5,6 +5,7 @@ import path from 'node:path'; import { describe, expect, it } from 'vitest'; import { + buildIgnoreSet, EXTENSIONS, IGNORE_DIRS, isSupportedFile, @@ -49,6 +50,36 @@ describe('IGNORE_DIRS', () => { expect(IGNORE_DIRS.has(dir)).toBe(true); } }); + + it('does not contain crates (repo-specific dirs belong in ignoreAdditionalDirs config)', () => { + expect(IGNORE_DIRS.has('crates')).toBe(false); + }); +}); + +describe('buildIgnoreSet', () => { + it('returns IGNORE_DIRS when no additional dirs provided', () => { + const result = buildIgnoreSet(); + expect(result).toBe(IGNORE_DIRS); + }); + + it('returns IGNORE_DIRS when empty array provided', () => { + const result = buildIgnoreSet([]); + expect(result).toBe(IGNORE_DIRS); + }); + + it('merges additional dirs on top of IGNORE_DIRS', () => { + const result = buildIgnoreSet(['crates', 'generated']); + expect(result.has('node_modules')).toBe(true); // from IGNORE_DIRS + expect(result.has('crates')).toBe(true); // additional + expect(result.has('generated')).toBe(true); // additional + }); + + it('does not mutate IGNORE_DIRS', () => { + const before = new Set(IGNORE_DIRS); + buildIgnoreSet(['crates']); + expect(IGNORE_DIRS.size).toBe(before.size); + expect(IGNORE_DIRS.has('crates')).toBe(false); + }); }); describe('shouldIgnore', () => {