@@ -114,6 +114,25 @@ pub struct ProjectIndexer {
114114}
115115
116116impl ProjectIndexer {
117+ #[ cfg( feature = "ai-enhanced" ) ]
118+ fn symbol_embedding_batch_settings ( & self ) -> ( usize , usize ) {
119+ let batch_size = std:: env:: var ( "CODEGRAPH_SYMBOL_BATCH_SIZE" )
120+ . ok ( )
121+ . and_then ( |v| v. parse :: < usize > ( ) . ok ( ) )
122+ . unwrap_or_else ( || self . config . batch_size )
123+ . max ( 1 )
124+ . min ( 2048 ) ;
125+
126+ let max_concurrent = std:: env:: var ( "CODEGRAPH_SYMBOL_MAX_CONCURRENT" )
127+ . ok ( )
128+ . and_then ( |v| v. parse :: < usize > ( ) . ok ( ) )
129+ . unwrap_or_else ( || self . config . max_concurrent )
130+ . max ( 1 )
131+ . min ( 16 ) ;
132+
133+ ( batch_size, max_concurrent)
134+ }
135+
117136 pub async fn new (
118137 config : IndexerConfig ,
119138 global_config : & codegraph_core:: config_manager:: CodeGraphConfig ,
@@ -1267,9 +1286,6 @@ impl ProjectIndexer {
12671286 & self ,
12681287 symbol_map : & std:: collections:: HashMap < String , NodeId > ,
12691288 ) -> std:: collections:: HashMap < String , Vec < f32 > > {
1270- use codegraph_vector:: EmbeddingGenerator ;
1271- use futures:: future:: join_all;
1272-
12731289 info ! ( "🧠 Pre-computing symbol embeddings for M4 Max AI optimization" ) ;
12741290 info ! (
12751291 "🔧 DEBUG: precompute_symbol_embeddings called with {} symbols" ,
@@ -1295,16 +1311,21 @@ impl ProjectIndexer {
12951311 info ! ( "🤖 Using preserved ONNX embedder for AI semantic matching" ) ;
12961312 let embedder = & self . embedder ;
12971313 info ! ( "✅ Using working ONNX embedder session (guaranteed real embeddings)" ) ;
1298- let batch_size = 50 ; // Optimal for embedding generation
1299- info ! ( "⚡ Embedding batch size: {} symbols per batch" , batch_size) ;
1314+ let ( batch_size, max_concurrent) = self . symbol_embedding_batch_settings ( ) ;
1315+ let total_batches = ( top_symbols. len ( ) + batch_size - 1 ) / batch_size;
1316+ info ! (
1317+ "⚡ Embedding batch size: {} symbols ({} batches, max {} concurrent)" ,
1318+ batch_size,
1319+ total_batches. max( 1 ) ,
1320+ max_concurrent
1321+ ) ;
13001322
13011323 let batches: Vec < Vec < String > > = top_symbols
13021324 . chunks ( batch_size)
13031325 . map ( |chunk| chunk. iter ( ) . cloned ( ) . collect ( ) )
13041326 . collect ( ) ;
13051327
1306- let max_concurrent = 4 ; // Parallel batch processing
1307- let mut processed = 0 ; // Track progress
1328+ let mut processed = 0usize ;
13081329
13091330 let mut batch_stream = stream:: iter ( batches. into_iter ( ) . map ( |batch| {
13101331 let embedder = embedder;
@@ -1400,20 +1421,21 @@ impl ProjectIndexer {
14001421
14011422 let symbols_vec: Vec < _ > = unresolved_symbols. iter ( ) . cloned ( ) . collect ( ) ;
14021423 let embedder = & self . embedder ;
1403- let batch_size = 50 ; // Professional batch size for unresolved symbols
1424+ let ( batch_size, max_concurrent ) = self . symbol_embedding_batch_settings ( ) ;
14041425
1426+ let total_batches = ( symbols_vec. len ( ) + batch_size - 1 ) / batch_size;
14051427 info ! (
1406- "⚡ Unresolved embedding batch size: {} symbols per batch" ,
1407- batch_size
1428+ "⚡ Unresolved embedding batch size: {} symbols ({} batches, max {} concurrent)" ,
1429+ batch_size,
1430+ total_batches. max( 1 ) ,
1431+ max_concurrent
14081432 ) ;
14091433
14101434 let batches: Vec < Vec < String > > = symbols_vec
14111435 . chunks ( batch_size)
14121436 . map ( |chunk| chunk. iter ( ) . cloned ( ) . collect ( ) )
14131437 . collect ( ) ;
14141438
1415- let max_concurrent = 4 ; // Parallel batch processing
1416-
14171439 let mut batch_stream = stream:: iter ( batches. into_iter ( ) . map ( |batch| {
14181440 let embedder = embedder;
14191441 async move {
0 commit comments