Skip to content

Commit 4f038e3

Browse files
authored
update tantivy (#5990)
1 parent 21497d2 commit 4f038e3

File tree

7 files changed

+94
-25
lines changed

7 files changed

+94
-25
lines changed

LICENSE-3rdparty.csv

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ enum-iterator,https://github.com/stephaneyfx/enum-iterator,MIT,Stephane Raux <st
155155
enum-iterator-derive,https://github.com/stephaneyfx/enum-iterator,0BSD,Stephane Raux <stephaneyfx@gmail.com>
156156
env_logger,https://github.com/rust-cli/env_logger,MIT OR Apache-2.0,The env_logger Authors
157157
equivalent,https://github.com/indexmap-rs/equivalent,Apache-2.0 OR MIT,The equivalent Authors
158+
erased-serde,https://github.com/dtolnay/erased-serde,MIT OR Apache-2.0,David Tolnay <dtolnay@gmail.com>
158159
errno,https://github.com/lambda-fairy/rust-errno,MIT OR Apache-2.0,"Chris Wong <lambda.fairy@gmail.com>, Dan Gohman <dev@sunfishcode.online>"
159160
fail,https://github.com/tikv/fail-rs,Apache-2.0,The TiKV Project Developers
160161
fastdivide,https://github.com/fulmicoton/fastdivide,zlib-acknowledgement OR MIT,Paul Masurel <paul.masurel@gmail.com>
@@ -229,6 +230,7 @@ indexmap,https://github.com/indexmap-rs/indexmap,Apache-2.0 OR MIT,The indexmap
229230
indicatif,https://github.com/console-rs/indicatif,MIT,The indicatif Authors
230231
inout,https://github.com/RustCrypto/utils,MIT OR Apache-2.0,RustCrypto Developers
231232
instant,https://github.com/sebcrozet/instant,BSD-3-Clause,sebcrozet <developer@crozet.re>
233+
inventory,https://github.com/dtolnay/inventory,MIT OR Apache-2.0,David Tolnay <dtolnay@gmail.com>
232234
io-uring,https://github.com/tokio-rs/io-uring,MIT OR Apache-2.0,quininer <quininer@live.com>
233235
ipnet,https://github.com/krisprice/ipnet,MIT OR Apache-2.0,Kris Price <kris@krisprice.nz>
234236
ipnetwork,https://github.com/achanda/ipnetwork,MIT OR Apache-2.0,"Abhishek Chanda <abhishek.becs@gmail.com>, Linus Färnstrand <faern@faern.net>"
@@ -512,7 +514,10 @@ tracing-serde,https://github.com/tokio-rs/tracing,MIT,Tokio Contributors <team@t
512514
tracing-subscriber,https://github.com/tokio-rs/tracing,MIT,"Eliza Weisman <eliza@buoyant.io>, David Barsky <me@davidbarsky.com>, Tokio Contributors <team@tokio.rs>"
513515
try-lock,https://github.com/seanmonstar/try-lock,MIT,Sean McArthur <sean@seanmonstar.com>
514516
ttl_cache,https://github.com/stusmall/ttl_cache,MIT OR Apache-2.0,Stu Small <stuart.alan.small@gmail.com>
517+
typeid,https://github.com/dtolnay/typeid,MIT OR Apache-2.0,David Tolnay <dtolnay@gmail.com>
515518
typenum,https://github.com/paholg/typenum,MIT OR Apache-2.0,"Paho Lurie-Gregg <paho@paholg.com>, Andre Bogus <bogusandre@gmail.com>"
519+
typetag,https://github.com/dtolnay/typetag,MIT OR Apache-2.0,David Tolnay <dtolnay@gmail.com>
520+
typetag-impl,https://github.com/dtolnay/typetag,MIT OR Apache-2.0,David Tolnay <dtolnay@gmail.com>
516521
ulid,https://github.com/dylanhart/ulid-rs,MIT,dylanhart <dylan96hart@gmail.com>
517522
unarray,https://github.com/cameron1024/unarray,MIT OR Apache-2.0,The unarray Authors
518523
unicase,https://github.com/seanmonstar/unicase,MIT OR Apache-2.0,Sean McArthur <sean@seanmonstar.com>

quickwit/Cargo.lock

Lines changed: 59 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

quickwit/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,7 @@ quickwit-serve = { path = "quickwit-serve" }
346346
quickwit-storage = { path = "quickwit-storage" }
347347
quickwit-telemetry = { path = "quickwit-telemetry" }
348348

349-
tantivy = { git = "https://github.com/quickwit-oss/tantivy/", rev = "dabcaa5", default-features = false, features = [
349+
tantivy = { git = "https://github.com/quickwit-oss/tantivy/", rev = "70e591e", default-features = false, features = [
350350
"lz4-compression",
351351
"mmap",
352352
"quickwit",

quickwit/quickwit-query/src/aggregations.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,9 @@ impl From<TantivyBucketResult> for BucketResult {
189189
sum_other_doc_count,
190190
doc_count_error_upper_bound,
191191
},
192+
TantivyBucketResult::Filter(_filter_bucket_result) => {
193+
unimplemented!("filter aggregation is not yet supported in quickwit")
194+
}
192195
}
193196
}
194197
}

quickwit/quickwit-search/src/collector.rs

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,11 @@ use quickwit_proto::types::SplitId;
2727
use serde::Deserialize;
2828
use tantivy::aggregation::agg_req::{Aggregations, get_fast_field_names};
2929
use tantivy::aggregation::intermediate_agg_result::IntermediateAggregationResults;
30-
use tantivy::aggregation::{AggregationLimitsGuard, AggregationSegmentCollector};
30+
use tantivy::aggregation::{AggContextParams, AggregationLimitsGuard, AggregationSegmentCollector};
3131
use tantivy::collector::{Collector, SegmentCollector};
3232
use tantivy::columnar::{ColumnType, MonotonicallyMappableToU64};
3333
use tantivy::fastfield::Column;
34+
use tantivy::tokenizer::TokenizerManager;
3435
use tantivy::{DateTime, DocId, Score, SegmentOrdinal, SegmentReader, TantivyError};
3536

3637
use crate::find_trace_ids_collector::{FindTraceIdsCollector, FindTraceIdsSegmentCollector, Span};
@@ -715,7 +716,7 @@ pub(crate) struct QuickwitCollector {
715716
pub max_hits: usize,
716717
pub sort_by: SortByPair,
717718
pub aggregation: Option<QuickwitAggregations>,
718-
pub aggregation_limits: AggregationLimitsGuard,
719+
pub agg_context_params: AggContextParams,
719720
search_after: Option<PartialHit>,
720721
}
721722

@@ -785,7 +786,7 @@ impl Collector for QuickwitCollector {
785786
aggs,
786787
segment_reader,
787788
segment_ord,
788-
&self.aggregation_limits,
789+
&self.agg_context_params,
789790
)?,
790791
),
791792
),
@@ -1033,7 +1034,7 @@ pub(crate) fn sort_by_from_request(search_request: &SearchRequest) -> SortByPair
10331034
pub(crate) fn make_collector_for_split(
10341035
split_id: SplitId,
10351036
search_request: &SearchRequest,
1036-
aggregation_limits: AggregationLimitsGuard,
1037+
agg_context_params: AggContextParams,
10371038
) -> crate::Result<QuickwitCollector> {
10381039
let aggregation = match &search_request.aggregation_request {
10391040
Some(aggregation) => Some(serde_json::from_str(aggregation)?),
@@ -1046,16 +1047,23 @@ pub(crate) fn make_collector_for_split(
10461047
max_hits: search_request.max_hits as usize,
10471048
sort_by,
10481049
aggregation,
1049-
aggregation_limits,
1050+
agg_context_params,
10501051
search_after: search_request.search_after.clone(),
10511052
})
10521053
}
10531054

10541055
/// Builds a QuickwitCollector that's only useful for merging fruits.
10551056
pub(crate) fn make_merge_collector(
10561057
search_request: &SearchRequest,
1057-
aggregation_limits: &AggregationLimitsGuard,
1058+
agg_limits: AggregationLimitsGuard,
10581059
) -> crate::Result<QuickwitCollector> {
1060+
// Note: at this point the tokenizer manager is not used anymore by aggregations (filter query),
1061+
// so we can create an empty one. So if it will ever be used, it would panic.
1062+
let agg_context_params = AggContextParams {
1063+
limits: agg_limits,
1064+
tokenizers: TokenizerManager::new(),
1065+
};
1066+
10591067
let aggregation = match &search_request.aggregation_request {
10601068
Some(aggregation) => Some(serde_json::from_str(aggregation)?),
10611069
None => None,
@@ -1067,7 +1075,7 @@ pub(crate) fn make_merge_collector(
10671075
max_hits: search_request.max_hits as usize,
10681076
sort_by,
10691077
aggregation,
1070-
aggregation_limits: aggregation_limits.clone(),
1078+
agg_context_params,
10711079
search_after: search_request.search_after.clone(),
10721080
})
10731081
}
@@ -1748,7 +1756,7 @@ mod tests {
17481756
request: &SearchRequest,
17491757
results: Vec<LeafSearchResponse>,
17501758
) -> LeafSearchResponse {
1751-
let collector = make_merge_collector(request, &Default::default()).unwrap();
1759+
let collector = make_merge_collector(request, Default::default()).unwrap();
17521760
let mut incremental_collector = IncrementalCollector::new(collector.clone());
17531761

17541762
let result = collector

quickwit/quickwit-search/src/leaf.rs

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ use quickwit_storage::{
3535
BundleStorage, ByteRangeCache, MemorySizedCache, OwnedBytes, SplitCache, Storage,
3636
StorageResolver, TimeoutAndRetryStorage, wrap_storage_with_cache,
3737
};
38-
use tantivy::aggregation::AggregationLimitsGuard;
3938
use tantivy::aggregation::agg_req::{AggregationVariants, Aggregations};
39+
use tantivy::aggregation::{AggContextParams, AggregationLimitsGuard};
4040
use tantivy::directory::FileSlice;
4141
use tantivy::fastfield::FastFieldReaders;
4242
use tantivy::schema::Field;
@@ -489,8 +489,12 @@ async fn leaf_search_single_split(
489489
.try_into()?;
490490
let searcher = reader.searcher();
491491

492+
let agg_context_params = AggContextParams {
493+
limits: aggregations_limits,
494+
tokenizers: doc_mapper.tokenizer_manager().tantivy_manager().clone(),
495+
};
492496
let mut collector =
493-
make_collector_for_split(split_id.clone(), &search_request, aggregations_limits)?;
497+
make_collector_for_split(split_id.clone(), &search_request, agg_context_params)?;
494498

495499
let split_schema = index.schema();
496500
let (query, mut warmup_info) = doc_mapper.query(split_schema.clone(), &query_ast, false)?;
@@ -1226,7 +1230,7 @@ pub async fn multi_index_leaf_search(
12261230
try_join_all(leaf_request_tasks),
12271231
)
12281232
.await??;
1229-
let merge_collector = make_merge_collector(&search_request, &aggregation_limits)?;
1233+
let merge_collector = make_merge_collector(&search_request, aggregation_limits)?;
12301234
let mut incremental_merge_collector = IncrementalCollector::new(merge_collector);
12311235
for result in leaf_responses {
12321236
match result {
@@ -1310,7 +1314,7 @@ pub async fn single_doc_mapping_leaf_search(
13101314
let mut leaf_search_single_split_join_handles: Vec<(String, tokio::task::JoinHandle<()>)> =
13111315
Vec::with_capacity(split_with_req.len());
13121316

1313-
let merge_collector = make_merge_collector(&request, &aggregations_limits)?;
1317+
let merge_collector = make_merge_collector(&request, aggregations_limits.clone())?;
13141318
let incremental_merge_collector = IncrementalCollector::new(merge_collector);
13151319
let incremental_merge_collector = Arc::new(Mutex::new(incremental_merge_collector));
13161320

quickwit/quickwit-search/src/root.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -751,9 +751,8 @@ pub(crate) async fn search_partial_hits_phase(
751751
try_join_all(leaf_request_tasks).await?
752752
};
753753

754-
// Creates a collector which merges responses into one
755754
let merge_collector =
756-
make_merge_collector(search_request, &searcher_context.get_aggregation_limits())?;
755+
make_merge_collector(search_request, searcher_context.get_aggregation_limits())?;
757756

758757
// Merging is a cpu-bound task.
759758
// It should be executed by Tokio's blocking threads.
@@ -1287,7 +1286,7 @@ pub async fn search_plan(
12871286
&request_metadata.query_ast_resolved,
12881287
true,
12891288
)?;
1290-
let merge_collector = make_merge_collector(&search_request, &Default::default())?;
1289+
let merge_collector = make_merge_collector(&search_request, Default::default())?;
12911290
warmup_info.merge(merge_collector.warmup_info());
12921291
warmup_info.simplify();
12931292

0 commit comments

Comments
 (0)