Skip to content

Commit cae06df

Browse files
committed
Refactor: Extract embedding batch settings and improve schema formatting
- Extract symbol embedding batch configuration into reusable method - Add environment variable support for batch size and concurrency - Improve schema.surql formatting with consistent field alignment - Clean up schema comments for better readability
1 parent 156367e commit cae06df

File tree

2 files changed

+251
-162
lines changed

2 files changed

+251
-162
lines changed

crates/codegraph-mcp/src/indexer.rs

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,25 @@ pub struct ProjectIndexer {
114114
}
115115

116116
impl ProjectIndexer {
117+
#[cfg(feature = "ai-enhanced")]
118+
fn symbol_embedding_batch_settings(&self) -> (usize, usize) {
119+
let batch_size = std::env::var("CODEGRAPH_SYMBOL_BATCH_SIZE")
120+
.ok()
121+
.and_then(|v| v.parse::<usize>().ok())
122+
.unwrap_or_else(|| self.config.batch_size)
123+
.max(1)
124+
.min(2048);
125+
126+
let max_concurrent = std::env::var("CODEGRAPH_SYMBOL_MAX_CONCURRENT")
127+
.ok()
128+
.and_then(|v| v.parse::<usize>().ok())
129+
.unwrap_or_else(|| self.config.max_concurrent)
130+
.max(1)
131+
.min(16);
132+
133+
(batch_size, max_concurrent)
134+
}
135+
117136
pub async fn new(
118137
config: IndexerConfig,
119138
global_config: &codegraph_core::config_manager::CodeGraphConfig,
@@ -1267,9 +1286,6 @@ impl ProjectIndexer {
12671286
&self,
12681287
symbol_map: &std::collections::HashMap<String, NodeId>,
12691288
) -> std::collections::HashMap<String, Vec<f32>> {
1270-
use codegraph_vector::EmbeddingGenerator;
1271-
use futures::future::join_all;
1272-
12731289
info!("🧠 Pre-computing symbol embeddings for M4 Max AI optimization");
12741290
info!(
12751291
"🔧 DEBUG: precompute_symbol_embeddings called with {} symbols",
@@ -1295,16 +1311,21 @@ impl ProjectIndexer {
12951311
info!("🤖 Using preserved ONNX embedder for AI semantic matching");
12961312
let embedder = &self.embedder;
12971313
info!("✅ Using working ONNX embedder session (guaranteed real embeddings)");
1298-
let batch_size = 50; // Optimal for embedding generation
1299-
info!("⚡ Embedding batch size: {} symbols per batch", batch_size);
1314+
let (batch_size, max_concurrent) = self.symbol_embedding_batch_settings();
1315+
let total_batches = (top_symbols.len() + batch_size - 1) / batch_size;
1316+
info!(
1317+
"⚡ Embedding batch size: {} symbols ({} batches, max {} concurrent)",
1318+
batch_size,
1319+
total_batches.max(1),
1320+
max_concurrent
1321+
);
13001322

13011323
let batches: Vec<Vec<String>> = top_symbols
13021324
.chunks(batch_size)
13031325
.map(|chunk| chunk.iter().cloned().collect())
13041326
.collect();
13051327

1306-
let max_concurrent = 4; // Parallel batch processing
1307-
let mut processed = 0; // Track progress
1328+
let mut processed = 0usize;
13081329

13091330
let mut batch_stream = stream::iter(batches.into_iter().map(|batch| {
13101331
let embedder = embedder;
@@ -1400,20 +1421,21 @@ impl ProjectIndexer {
14001421

14011422
let symbols_vec: Vec<_> = unresolved_symbols.iter().cloned().collect();
14021423
let embedder = &self.embedder;
1403-
let batch_size = 50; // Professional batch size for unresolved symbols
1424+
let (batch_size, max_concurrent) = self.symbol_embedding_batch_settings();
14041425

1426+
let total_batches = (symbols_vec.len() + batch_size - 1) / batch_size;
14051427
info!(
1406-
"⚡ Unresolved embedding batch size: {} symbols per batch",
1407-
batch_size
1428+
"⚡ Unresolved embedding batch size: {} symbols ({} batches, max {} concurrent)",
1429+
batch_size,
1430+
total_batches.max(1),
1431+
max_concurrent
14081432
);
14091433

14101434
let batches: Vec<Vec<String>> = symbols_vec
14111435
.chunks(batch_size)
14121436
.map(|chunk| chunk.iter().cloned().collect())
14131437
.collect();
14141438

1415-
let max_concurrent = 4; // Parallel batch processing
1416-
14171439
let mut batch_stream = stream::iter(batches.into_iter().map(|batch| {
14181440
let embedder = embedder;
14191441
async move {

0 commit comments

Comments
 (0)