Skip to content

Commit 4cd53f6

Browse files
committed
feat: Add repository estimation and embedding time calculation utilities
- Introduced `estimation.rs` for repository counting and embedding time estimation. - Implemented `RepositoryEstimator` to analyze repositories and estimate indexing costs. - Added CLI command for estimating indexing costs without writing to the database. - Enhanced symbol indexing with `build_symbol_index` and `extend_symbol_index` functions. - Refactored file parsing logic to use shared utilities for unified extraction. - Added tests for embedding time estimation and symbol indexing. - Updated dependencies in `Cargo.toml` to include `serde_yaml`. - Cleaned up imports and organized code structure for better readability.
1 parent 6e495be commit 4cd53f6

32 files changed

+968
-267
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/codegraph-api/src/auth.rs

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -83,11 +83,12 @@ pub async fn auth_middleware(
8383
mut req: Request,
8484
next: Next,
8585
) -> Result<Response, StatusCode> {
86-
let api_key = req.headers()
86+
let api_key = req
87+
.headers()
8788
.get("X-API-KEY")
8889
.and_then(|v| v.to_str().ok())
8990
.map(|s| s.to_string());
90-
91+
9192
if let Some(api_key) = api_key {
9293
return api_key_auth(&api_key, req, next).await;
9394
}
@@ -137,11 +138,7 @@ pub async fn auth_middleware(
137138
Ok(next.run(req).await)
138139
}
139140

140-
async fn api_key_auth(
141-
api_key: &str,
142-
mut req: Request,
143-
next: Next,
144-
) -> Result<Response, StatusCode> {
141+
async fn api_key_auth(api_key: &str, mut req: Request, next: Next) -> Result<Response, StatusCode> {
145142
// In a real application, you would look up the API key in a database.
146143
// For this example, we'll use a hardcoded key.
147144
if api_key == "test-api-key" {

crates/codegraph-api/src/enhanced_health.rs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
use crate::{ApiError, ApiResult, AppState};
21
use crate::parser_ext::TreeSitterParserExt;
32
use crate::semantic_search_ext::SemanticSearchExt;
3+
use crate::{ApiError, ApiResult, AppState};
44
use axum::{extract::State, Json};
55
use serde::{Deserialize, Serialize};
66
use std::collections::HashMap;
@@ -150,7 +150,10 @@ async fn check_database_health_enhanced(state: &AppState) -> ComponentStatus {
150150
let mut details = HashMap::new();
151151
details.insert("total_nodes".to_string(), stats.total_nodes.to_string());
152152
details.insert("total_edges".to_string(), stats.total_edges.to_string());
153-
details.insert("connection_test".to_string(), connection_ok.unwrap_or(false).to_string());
153+
details.insert(
154+
"connection_test".to_string(),
155+
connection_ok.unwrap_or(false).to_string(),
156+
);
154157

155158
// Calculate health score based on response time and stats
156159
let health_score = calculate_health_score(response_time, Some(&details));
@@ -212,7 +215,10 @@ async fn check_vector_search_health_enhanced(state: &AppState) -> ComponentStatu
212215
);
213216
details.insert("index_type".to_string(), format!("{:?}", stats.index_type));
214217
details.insert("dimension".to_string(), stats.dimension.to_string());
215-
details.insert("search_test".to_string(), test_ok.unwrap_or(false).to_string());
218+
details.insert(
219+
"search_test".to_string(),
220+
test_ok.unwrap_or(false).to_string(),
221+
);
216222

217223
let health_score = calculate_health_score(response_time, Some(&details));
218224

crates/codegraph-api/src/error.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ pub enum ApiError {
2323

2424
#[error("Bad request: {0}")]
2525
BadRequest(String),
26-
26+
2727
#[error("Service unavailable: {0}")]
2828
ServiceUnavailable(String),
2929
}

crates/codegraph-api/src/graph_stub.rs

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
// Temporary stub types for codegraph-graph until it's fixed
22
// These are placeholder implementations to allow compilation
33

4+
use chrono::{DateTime, Utc};
45
use codegraph_core::{CodeNode, NodeId, Result};
56
use std::collections::HashMap;
67
use std::sync::Arc;
78
use tokio::sync::RwLock;
89
use uuid::Uuid;
9-
use chrono::{DateTime, Utc};
1010

1111
/// Temporary stub for CodeGraph from codegraph-graph crate
1212
#[derive(Debug, Clone, Default)]
@@ -38,8 +38,13 @@ impl CodeGraph {
3838
pub fn node_count(&self) -> usize {
3939
self.nodes.len()
4040
}
41-
42-
pub async fn astar_shortest_path<F>(&self, from: NodeId, to: NodeId, _heuristic: F) -> Result<Vec<NodeId>>
41+
42+
pub async fn astar_shortest_path<F>(
43+
&self,
44+
from: NodeId,
45+
to: NodeId,
46+
_heuristic: F,
47+
) -> Result<Vec<NodeId>>
4348
where
4449
F: Fn(&NodeId) -> f64,
4550
{
@@ -50,7 +55,7 @@ impl CodeGraph {
5055
Ok(vec![])
5156
}
5257
}
53-
58+
5459
pub async fn shortest_path(&self, from: NodeId, to: NodeId) -> Result<Vec<NodeId>> {
5560
// Simple stub implementation - just return direct path
5661
if self.nodes.contains_key(&from) && self.nodes.contains_key(&to) {
@@ -82,8 +87,8 @@ impl TransactionalGraph {
8287
/// Create a new TransactionalGraph with real storage-backed managers
8388
pub async fn with_storage(storage_path: &str) -> Result<Self> {
8489
use codegraph_graph::VersionedRocksDbStorage;
85-
use tokio::sync::RwLock as TokioRwLock;
8690
use std::sync::Arc;
91+
use tokio::sync::RwLock as TokioRwLock;
8792

8893
// Initialize storage
8994
let storage = VersionedRocksDbStorage::new(storage_path).await?;
@@ -266,13 +271,18 @@ impl ConcurrentTransactionManager {
266271
Self { storage: None }
267272
}
268273

269-
pub fn with_storage(storage: Arc<tokio::sync::RwLock<codegraph_graph::VersionedRocksDbStorage>>) -> Self {
274+
pub fn with_storage(
275+
storage: Arc<tokio::sync::RwLock<codegraph_graph::VersionedRocksDbStorage>>,
276+
) -> Self {
270277
Self {
271278
storage: Some(storage),
272279
}
273280
}
274281

275-
pub async fn begin_transaction(&self, _isolation_level: IsolationLevel) -> Result<TransactionId> {
282+
pub async fn begin_transaction(
283+
&self,
284+
_isolation_level: IsolationLevel,
285+
) -> Result<TransactionId> {
276286
// Stub implementation - just generate a transaction ID
277287
Ok(Uuid::new_v4())
278288
}
@@ -311,7 +321,13 @@ impl GitLikeVersionManager {
311321
}
312322
}
313323

314-
pub async fn create_version(&self, _name: String, _description: String, _author: String, _parent_versions: Vec<VersionId>) -> Result<VersionId> {
324+
pub async fn create_version(
325+
&self,
326+
_name: String,
327+
_description: String,
328+
_author: String,
329+
_parent_versions: Vec<VersionId>,
330+
) -> Result<VersionId> {
315331
// Stub implementation - just generate a version ID
316332
Ok(Uuid::new_v4())
317333
}
@@ -415,4 +431,4 @@ impl RecoveryManager {
415431
pub async fn restore_from_backup(&self, _path: String) -> Result<()> {
416432
Ok(())
417433
}
418-
}
434+
}

crates/codegraph-api/src/graphql/loaders.rs

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,10 @@ impl Loader<NodeId> for NodeLoader {
2323
type Error = CodeGraphError;
2424

2525
#[instrument(skip(self, keys), fields(batch_size = keys.len()))]
26-
async fn load(&self, keys: &[NodeId]) -> std::result::Result<HashMap<NodeId, GraphQLCodeNode>, CodeGraphError> {
26+
async fn load(
27+
&self,
28+
keys: &[NodeId],
29+
) -> std::result::Result<HashMap<NodeId, GraphQLCodeNode>, CodeGraphError> {
2730
let start_time = Instant::now();
2831
debug!("Loading batch of {} nodes", keys.len());
2932

@@ -111,7 +114,10 @@ impl Loader<NodeId> for EdgesBySourceLoader {
111114
type Error = CodeGraphError;
112115

113116
#[instrument(skip(self, keys), fields(batch_size = keys.len()))]
114-
async fn load(&self, keys: &[NodeId]) -> std::result::Result<HashMap<NodeId, Vec<GraphQLEdge>>, CodeGraphError> {
117+
async fn load(
118+
&self,
119+
keys: &[NodeId],
120+
) -> std::result::Result<HashMap<NodeId, Vec<GraphQLEdge>>, CodeGraphError> {
115121
let start_time = Instant::now();
116122
debug!("Loading edges for {} source nodes", keys.len());
117123

@@ -177,7 +183,10 @@ impl Loader<String> for SemanticSearchLoader {
177183
type Error = CodeGraphError;
178184

179185
#[instrument(skip(self, keys), fields(batch_size = keys.len()))]
180-
async fn load(&self, keys: &[String]) -> std::result::Result<HashMap<String, Vec<RetrievalResult>>, CodeGraphError> {
186+
async fn load(
187+
&self,
188+
keys: &[String],
189+
) -> std::result::Result<HashMap<String, Vec<RetrievalResult>>, CodeGraphError> {
181190
let start_time = Instant::now();
182191
debug!("Semantic search batch for {} queries", keys.len());
183192

@@ -209,7 +218,8 @@ impl Loader<String> for SemanticSearchLoader {
209218
node_id: res.node_id,
210219
node: res.node,
211220
relevance_score: res.score,
212-
retrieval_method: codegraph_vector::rag::RetrievalMethod::SemanticSimilarity,
221+
retrieval_method:
222+
codegraph_vector::rag::RetrievalMethod::SemanticSimilarity,
213223
context_snippet: snippet,
214224
}
215225
})

crates/codegraph-api/src/graphql/resolvers.rs

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use async_graphql::{dataloader::DataLoader, Context, Object, Result, ID};
22
use async_trait::async_trait;
3-
use codegraph_core::{CodeGraphError, NodeId, GraphStore};
3+
use codegraph_core::{CodeGraphError, GraphStore, NodeId};
44
use std::collections::HashMap;
55
use std::str::FromStr;
66
use std::time::Instant;
@@ -14,13 +14,13 @@ use crate::graphql::loaders::{
1414
TraversalKey,
1515
};
1616
use crate::graphql::types::GraphQLEdge;
17+
use crate::graphql::types::UpdateNodeInput;
1718
use crate::graphql::types::{
1819
CodeSearchInput, CodeSearchResult, GraphQLCodeNode, GraphTraversalInput, GraphTraversalResult,
1920
PageInfo, ScoredNode, SearchMetadata, SearchSortBy, SemanticSearchInput,
2021
SemanticSearchMetadata, SemanticSearchResult, SubgraphExtractionInput, SubgraphMetadata,
2122
SubgraphResult, TraversalMetadata,
2223
};
23-
use crate::graphql::types::UpdateNodeInput;
2424
use crate::state::AppState;
2525

2626
pub struct QueryRoot;
@@ -68,26 +68,29 @@ impl QueryRoot {
6868
if let Some(ref language_filters) = input.language_filter {
6969
nodes.retain(|node| {
7070
node.language.as_ref().map_or(false, |lang| {
71-
language_filters.iter().any(|filter_lang| lang == filter_lang)
71+
language_filters
72+
.iter()
73+
.any(|filter_lang| lang == filter_lang)
7274
})
7375
});
7476
}
7577
if let Some(ref node_type_filters) = input.node_type_filter {
7678
nodes.retain(|node| {
77-
node.node_type.as_ref().map_or(false, |nt| {
78-
node_type_filters.iter().any(|f| nt == f)
79-
})
79+
node.node_type
80+
.as_ref()
81+
.map_or(false, |nt| node_type_filters.iter().any(|f| nt == f))
8082
});
8183
}
8284
if let Some(ref file_pattern) = input.file_path_pattern {
8385
nodes.retain(|node| node.location.file_path.contains(file_pattern));
8486
}
8587
if let Some(ref content_filter) = input.content_filter {
8688
let needle = content_filter.to_lowercase();
87-
nodes.retain(|node| node
88-
.content
89-
.as_ref()
90-
.map_or(false, |c| c.to_lowercase().contains(&needle)));
89+
nodes.retain(|node| {
90+
node.content
91+
.as_ref()
92+
.map_or(false, |c| c.to_lowercase().contains(&needle))
93+
});
9194
}
9295

9396
// Sorting

crates/codegraph-api/src/graphql/tests.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
use async_graphql::{value, Request, Variables};
2+
use codegraph_core::ConfigManager;
23
use serde_json::json;
34
use std::sync::Arc;
45
use tokio_test;
56
use uuid::Uuid;
6-
use codegraph_core::ConfigManager;
77

88
use crate::graphql::types::*;
99
use crate::schema::create_schema;
@@ -12,7 +12,9 @@ use crate::state::AppState;
1212
/// Helper function to create test app state
1313
async fn create_test_state() -> AppState {
1414
let config = Arc::new(ConfigManager::new().expect("Failed to create test config"));
15-
AppState::new(config).await.expect("Failed to create test state")
15+
AppState::new(config)
16+
.await
17+
.expect("Failed to create test state")
1618
}
1719

1820
/// Helper function to create test schema with state

crates/codegraph-api/src/health.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
use crate::{ApiError, ApiResult, AppState};
2-
use crate::semantic_search_ext::SemanticSearchExt;
31
use crate::parser_ext::TreeSitterParserExt;
2+
use crate::semantic_search_ext::SemanticSearchExt;
3+
use crate::{ApiError, ApiResult, AppState};
44
use axum::{extract::State, Json};
55
use serde::{Deserialize, Serialize};
66
use std::collections::HashMap;

crates/codegraph-api/src/lib.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ pub mod metrics;
2020
// pub mod middleware;
2121
// Legacy mutations module is disabled to avoid conflicts; mutations live in GraphQL resolvers
2222
// pub mod mutations;
23+
pub mod parser_ext;
2324
pub mod performance;
2425
#[cfg(feature = "graphql")]
2526
pub mod queries;
@@ -28,16 +29,15 @@ pub mod rest;
2829
pub mod routes;
2930
#[cfg(feature = "graphql")]
3031
pub mod schema;
31-
pub mod server;
32-
pub mod parser_ext;
3332
pub mod semantic_search_ext;
34-
pub mod vector_store_ext;
33+
pub mod server;
3534
pub mod service_registry;
3635
pub mod state;
3736
pub mod streaming_handlers;
3837
#[cfg(feature = "graphql")]
3938
pub mod subscriptions;
4039
pub mod vector_handlers;
40+
pub mod vector_store_ext;
4141
pub mod versioning_handlers;
4242

4343
#[cfg(test)]

0 commit comments

Comments
 (0)