Skip to content

Commit abd082b

Browse files
committed
feat(indexer): implement dynamic batching for symbol embeddings and add related tests
1 parent 4cd53f6 commit abd082b

File tree

1 file changed

+40
-3
lines changed

1 file changed

+40
-3
lines changed

crates/codegraph-mcp/src/indexer.rs

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ use std::sync::Arc;
3232

3333
use std::collections::HashMap;
3434

35+
const SYMBOL_EMBEDDING_DB_BATCH_LIMIT: usize = 256;
36+
3537
#[derive(Clone, Debug)]
3638
pub struct IndexerConfig {
3739
pub languages: Vec<String>,
@@ -2009,9 +2011,12 @@ impl ProjectIndexer {
20092011
if records.is_empty() {
20102012
return Ok(());
20112013
}
2012-
self.surreal_writer_handle()?
2013-
.enqueue_symbol_embeddings(records)
2014-
.await
2014+
let batch_size = symbol_embedding_db_batch_size();
2015+
let handle = self.surreal_writer_handle()?;
2016+
for chunk in records.chunks(batch_size) {
2017+
handle.enqueue_symbol_embeddings(chunk.to_vec()).await?;
2018+
}
2019+
Ok(())
20152020
}
20162021

20172022
fn surreal_writer_handle(&self) -> Result<&SurrealWriterHandle> {
@@ -2255,6 +2260,38 @@ impl ProjectIndexer {
22552260
}
22562261
}
22572262

2263+
fn symbol_embedding_db_batch_size() -> usize {
2264+
const MAX: usize = 512;
2265+
std::env::var("CODEGRAPH_SYMBOL_DB_BATCH_SIZE")
2266+
.ok()
2267+
.and_then(|value| value.parse::<usize>().ok())
2268+
.map(|parsed| parsed.clamp(1, MAX))
2269+
.unwrap_or(SYMBOL_EMBEDDING_DB_BATCH_LIMIT)
2270+
}
2271+
2272+
#[cfg(test)]
2273+
mod tests {
2274+
use super::*;
2275+
2276+
#[test]
2277+
fn symbol_embedding_batch_size_defaults() {
2278+
std::env::remove_var("CODEGRAPH_SYMBOL_DB_BATCH_SIZE");
2279+
assert_eq!(
2280+
symbol_embedding_db_batch_size(),
2281+
SYMBOL_EMBEDDING_DB_BATCH_LIMIT
2282+
);
2283+
}
2284+
2285+
#[test]
2286+
fn symbol_embedding_batch_size_respects_env_and_clamps() {
2287+
std::env::set_var("CODEGRAPH_SYMBOL_DB_BATCH_SIZE", "1024");
2288+
assert_eq!(symbol_embedding_db_batch_size(), 512);
2289+
std::env::set_var("CODEGRAPH_SYMBOL_DB_BATCH_SIZE", "0");
2290+
assert_eq!(symbol_embedding_db_batch_size(), 1);
2291+
std::env::remove_var("CODEGRAPH_SYMBOL_DB_BATCH_SIZE");
2292+
}
2293+
}
2294+
22582295
pub fn prepare_node_text(node: &CodeNode) -> String {
22592296
let lang = node
22602297
.language

0 commit comments

Comments
 (0)