Skip to content
Merged
3 changes: 2 additions & 1 deletion .codegraphrc.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"embeddings": { "model": "bge-large" },
"exclude": ["crates/**"]
"exclude": ["crates/**"],
"ignoreAdditionalDirs": ["crates"]
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,6 @@ const DEFAULT_IGNORE_DIRS: &[&str] = &[
"venv",
"env",
".env",
// Rust workspace convention — contains only Rust source and NAPI-RS generated
// binding artifacts (index.js / index.d.ts) that produce false complexity readings.
"crates",
];

/// All supported file extensions (mirrors the JS `EXTENSIONS` set).
Expand Down
21 changes: 13 additions & 8 deletions src/domain/graph/builder/helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import fs from 'node:fs';
import path from 'node:path';
import { purgeFilesData } from '../../../db/index.js';
import { debug, warn } from '../../../infrastructure/logger.js';
import { EXTENSIONS, IGNORE_DIRS, normalizePath } from '../../../shared/constants.js';
import { buildIgnoreSet, EXTENSIONS, normalizePath } from '../../../shared/constants.js';
import { compileGlobs, globToRegex, matchesAny } from '../../../shared/globs.js';
import type {
BetterSqlite3Database,
Expand Down Expand Up @@ -56,13 +56,12 @@ export const CHA_DISPATCH_PENALTY = 0.1;
export const CHA_TYPED_DISPATCH_CONFIDENCE = 0.8;

/** Check if a directory entry should be skipped (ignored dirs, dotfiles). */
function shouldSkipEntry(entry: fs.Dirent, extraIgnore: Set<string> | null): boolean {
function shouldSkipEntry(entry: fs.Dirent, ignoreSet: ReadonlySet<string>): boolean {
if (entry.name.startsWith('.') && entry.name !== '.') {
if (IGNORE_DIRS.has(entry.name)) return true;
if (ignoreSet.has(entry.name)) return true;
if (entry.isDirectory()) return true;
}
if (IGNORE_DIRS.has(entry.name)) return true;
if (extraIgnore?.has(entry.name)) return true;
if (ignoreSet.has(entry.name)) return true;
return false;
}

Expand Down Expand Up @@ -140,7 +139,8 @@ interface CollectContext {
readonly excludeRegexes: readonly RegExp[];
readonly gitignoreRegexes: readonly RegExp[];
readonly hasGlobFilters: boolean;
readonly extraIgnore: Set<string> | null;
/** Merged set of IGNORE_DIRS + config.ignoreDirs + config.ignoreAdditionalDirs. */
readonly ignoreSet: ReadonlySet<string>;
readonly visited: Set<string>;
}

Expand Down Expand Up @@ -193,7 +193,7 @@ function walkCollect(

let hasFiles = false;
for (const entry of entries) {
if (shouldSkipEntry(entry, ctx.extraIgnore)) continue;
if (shouldSkipEntry(entry, ctx.ignoreSet)) continue;

const full = path.join(dir, entry.name);
if (entry.isDirectory()) {
Expand Down Expand Up @@ -237,13 +237,18 @@ export function collectFiles(
const includeRegexes = compileGlobs(config.include);
const excludeRegexes = compileGlobs(config.exclude);
const gitignoreRegexes = readGitignorePatterns(dir);
// Build the merged ignore set:
// - config.ignoreDirs are appended to IGNORE_DIRS (existing behaviour: per-repo overrides)
// - config.ignoreAdditionalDirs are also merged in on top of IGNORE_DIRS (new feature)
const extraDirs = [...(config.ignoreDirs ?? []), ...(config.ignoreAdditionalDirs ?? [])];
const ignoreSet = buildIgnoreSet(extraDirs.length ? extraDirs : undefined);
const ctx: CollectContext = {
rootDir: dir,
includeRegexes,
excludeRegexes,
gitignoreRegexes,
hasGlobFilters: includeRegexes.length > 0 || excludeRegexes.length > 0,
extraIgnore: config.ignoreDirs ? new Set(config.ignoreDirs) : null,
ignoreSet,
visited: new Set(),
};

Expand Down
8 changes: 3 additions & 5 deletions src/domain/graph/builder/stages/native-orchestrator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2105,11 +2105,9 @@ export async function tryNativeOrchestrator(
// nodes/edges during incremental builds, so FK enforcement causes the purge
// statements to fail silently — leaving stale nodes and edges that then get
// duplicated when the barrel-candidate re-parse re-inserts them (issue #1644).
// Disabling FK before buildGraph() lets the purge succeed. FK stays OFF for
// the entire connection lifetime (through backfillNativeDroppedFiles and
// buildDataflowP4ForNative) and is restored when the connection is closed.
// This is intentional: call_edge_id values written by P4 are always looked up
// live from the edges table, so no phantom FK reference is ever produced.
// Disabling FK before buildGraph() lets the purge succeed; FK is restored in
// a finally block so post-passes (gap-repair, structure patch) retain FK protection
// even if buildGraph() throws.
try {
ctx.nativeDb.exec('PRAGMA foreign_keys = OFF');
} catch {
Expand Down
28 changes: 19 additions & 9 deletions src/domain/graph/watcher.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
import fs from 'node:fs';
import path from 'node:path';
import { closeDb, getNodeId as getNodeIdQuery, initSchema, openDb } from '../../db/index.js';
import { loadConfig } from '../../infrastructure/config.js';
import { debug, info, warn } from '../../infrastructure/logger.js';
import { isSupportedFile, normalizePath, shouldIgnore } from '../../shared/constants.js';
import { buildIgnoreSet, isSupportedFile, normalizePath } from '../../shared/constants.js';
import { DbError } from '../../shared/errors.js';
import { createParseTreeCache, getActiveEngine } from '../parser.js';
import { type IncrementalStmts, rebuildFile } from './builder/incremental.js';
import { appendChangeEvents, buildChangeEvent, diffSymbols } from './change-journal.js';
import { appendJournalEntriesAndStampHeader } from './journal.js';

function shouldIgnorePath(filePath: string): boolean {
function shouldIgnorePath(filePath: string, ignoreSet: ReadonlySet<string>): boolean {
const parts = filePath.split(path.sep);
return parts.some((p) => shouldIgnore(p));
return parts.some((p) => ignoreSet.has(p) || p.startsWith('.'));
}

/** Prepare all SQL statements needed by the watcher's incremental rebuild. */
Expand Down Expand Up @@ -139,7 +140,7 @@ function logRebuildResults(updates: RebuildResult[]): void {
}

/** Recursively collect tracked source files for stat-based polling. */
function collectTrackedFiles(dir: string, result: string[]): void {
function collectTrackedFiles(dir: string, result: string[], ignoreSet: ReadonlySet<string>): void {
let entries: fs.Dirent[];
try {
entries = fs.readdirSync(dir, { withFileTypes: true });
Expand All @@ -148,10 +149,10 @@ function collectTrackedFiles(dir: string, result: string[]): void {
return;
}
for (const entry of entries) {
if (shouldIgnore(entry.name)) continue;
if (ignoreSet.has(entry.name) || entry.name.startsWith('.')) continue;
const full = path.join(dir, entry.name);
if (entry.isDirectory()) {
collectTrackedFiles(full, result);
collectTrackedFiles(full, result, ignoreSet);
} else if (isSupportedFile(entry.name)) {
result.push(full);
}
Expand All @@ -168,6 +169,8 @@ interface WatcherContext {
pending: Set<string>;
timer: ReturnType<typeof setTimeout> | null;
debounceMs: number;
/** Merged ignore set from IGNORE_DIRS + config.ignoreDirs + config.ignoreAdditionalDirs. */
ignoreSet: ReadonlySet<string>;
}

/** Initialize DB, engine, cache, and statements for watch mode. */
Expand All @@ -177,6 +180,12 @@ function setupWatcher(rootDir: string, opts: { engine?: string; dbPath?: string
throw new DbError('No graph.db found. Run `codegraph build` first.', { file: dbPath });
}

// Load repo config so ignoreDirs and ignoreAdditionalDirs are respected by
// the watcher the same way they are by collectFiles in the batch build path.
const config = loadConfig(rootDir);
const extraDirs = [...(config.ignoreDirs ?? []), ...(config.ignoreAdditionalDirs ?? [])];
const ignoreSet = buildIgnoreSet(extraDirs.length ? extraDirs : undefined);

const db = openDb(dbPath);
initSchema(db);
const engineOpts: import('../../types.js').EngineOpts = {
Expand Down Expand Up @@ -205,6 +214,7 @@ function setupWatcher(rootDir: string, opts: { engine?: string; dbPath?: string
pending: new Set<string>(),
timer: null,
debounceMs: 300,
ignoreSet,
};
}

Expand All @@ -223,7 +233,7 @@ function startPollingWatcher(ctx: WatcherContext, pollIntervalMs: number): () =>
const mtimeMap = new Map<string, number>();

const initial: string[] = [];
collectTrackedFiles(ctx.rootDir, initial);
collectTrackedFiles(ctx.rootDir, initial, ctx.ignoreSet);
for (const f of initial) {
try {
mtimeMap.set(f, fs.statSync(f).mtimeMs);
Expand All @@ -235,7 +245,7 @@ function startPollingWatcher(ctx: WatcherContext, pollIntervalMs: number): () =>

const pollTimer = setInterval(() => {
const current: string[] = [];
collectTrackedFiles(ctx.rootDir, current);
collectTrackedFiles(ctx.rootDir, current, ctx.ignoreSet);
const currentSet = new Set(current);

for (const f of current) {
Expand Down Expand Up @@ -270,7 +280,7 @@ function startPollingWatcher(ctx: WatcherContext, pollIntervalMs: number): () =>
function startNativeWatcher(ctx: WatcherContext): () => void {
const watcher = fs.watch(ctx.rootDir, { recursive: true }, (_eventType, filename) => {
if (!filename) return;
if (shouldIgnorePath(filename)) return;
if (shouldIgnorePath(filename, ctx.ignoreSet)) return;
if (!isSupportedFile(filename)) return;

ctx.pending.add(path.join(ctx.rootDir, filename));
Expand Down
2 changes: 2 additions & 0 deletions src/infrastructure/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ export const DEFAULTS = {
include: [] as string[],
exclude: [] as string[],
ignoreDirs: [] as string[],
ignoreAdditionalDirs: [] as string[],
extensions: [] as string[],
aliases: {} as Record<string, string>,
build: {
Expand Down Expand Up @@ -452,6 +453,7 @@ const BUILD_HASH_KEYS: ReadonlyArray<keyof CodegraphConfig> = [
'include',
'exclude',
'ignoreDirs',
'ignoreAdditionalDirs',
'extensions',
'aliases',
'build',
Expand Down
9 changes: 9 additions & 0 deletions src/shared/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,15 @@ export const IGNORE_DIRS: ArrayCompatSet<string> = withArrayCompat(
]),
);

/**
* Merge the global IGNORE_DIRS set with a per-repo additional list from config.
* Returns a new Set — does not mutate IGNORE_DIRS.
*/
export function buildIgnoreSet(additionalDirs?: string[]): ReadonlySet<string> {
if (!additionalDirs || additionalDirs.length === 0) return IGNORE_DIRS;
return new Set([...IGNORE_DIRS, ...additionalDirs]);
}

export const EXTENSIONS: ArrayCompatSet<string> = withArrayCompat(new Set(SUPPORTED_EXTENSIONS));

/**
Expand Down
2 changes: 2 additions & 0 deletions src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1351,6 +1351,8 @@ export interface CodegraphConfig {
include: string[];
exclude: string[];
ignoreDirs: string[];
/** Additional directory names to ignore on top of the built-in IGNORE_DIRS set. */
ignoreAdditionalDirs: string[];
extensions: string[];
aliases: Record<string, unknown>;

Expand Down
16 changes: 16 additions & 0 deletions tests/unit/builder.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,22 @@ describe('collectFiles', () => {
expect(basenames).toContain('app.js');
});

it('respects config.ignoreAdditionalDirs', () => {
const files = collectFiles(tmpDir, [], { ignoreAdditionalDirs: ['lib'] });
const basenames = files.map((f) => path.basename(f));
expect(basenames).not.toContain('helper.py');
// src files still present
expect(basenames).toContain('app.js');
});

it('merges ignoreAdditionalDirs with ignoreDirs when both are set', () => {
// ignoreDirs excludes 'lib', ignoreAdditionalDirs excludes 'src'
const files = collectFiles(tmpDir, [], { ignoreDirs: ['lib'], ignoreAdditionalDirs: ['src'] });
const basenames = files.map((f) => path.basename(f));
expect(basenames).not.toContain('helper.py'); // lib excluded
expect(basenames).not.toContain('app.js'); // src excluded
});

it('returns empty array for non-existent directory (graceful)', () => {
const files = collectFiles(path.join(tmpDir, 'does-not-exist'));
expect(files).toEqual([]);
Expand Down
31 changes: 31 additions & 0 deletions tests/unit/constants.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import path from 'node:path';
import { describe, expect, it } from 'vitest';
import {
buildIgnoreSet,
EXTENSIONS,
IGNORE_DIRS,
isSupportedFile,
Expand Down Expand Up @@ -49,6 +50,36 @@ describe('IGNORE_DIRS', () => {
expect(IGNORE_DIRS.has(dir)).toBe(true);
}
});

it('does not contain crates (repo-specific dirs belong in ignoreAdditionalDirs config)', () => {
expect(IGNORE_DIRS.has('crates')).toBe(false);
});
});

describe('buildIgnoreSet', () => {
it('returns IGNORE_DIRS when no additional dirs provided', () => {
const result = buildIgnoreSet();
expect(result).toBe(IGNORE_DIRS);
});

it('returns IGNORE_DIRS when empty array provided', () => {
const result = buildIgnoreSet([]);
expect(result).toBe(IGNORE_DIRS);
});

it('merges additional dirs on top of IGNORE_DIRS', () => {
const result = buildIgnoreSet(['crates', 'generated']);
expect(result.has('node_modules')).toBe(true); // from IGNORE_DIRS
expect(result.has('crates')).toBe(true); // additional
expect(result.has('generated')).toBe(true); // additional
});

it('does not mutate IGNORE_DIRS', () => {
const before = new Set(IGNORE_DIRS);
buildIgnoreSet(['crates']);
expect(IGNORE_DIRS.size).toBe(before.size);
expect(IGNORE_DIRS.has('crates')).toBe(false);
});
});

describe('shouldIgnore', () => {
Expand Down
Loading