-
Notifications
You must be signed in to change notification settings - Fork 2.2k
[Query]: Adds ability to choose global vs local/focused statistics for FullTextScore #48431
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,11 +14,13 @@ | |
| import com.azure.cosmos.models.CosmosFullTextIndex; | ||
| import com.azure.cosmos.models.CosmosFullTextPath; | ||
| import com.azure.cosmos.models.CosmosFullTextPolicy; | ||
| import com.azure.cosmos.models.CosmosFullTextScoreScope; | ||
| import com.azure.cosmos.models.CosmosQueryRequestOptions; | ||
| import com.azure.cosmos.models.IncludedPath; | ||
| import com.azure.cosmos.models.IndexingMode; | ||
| import com.azure.cosmos.models.IndexingPolicy; | ||
| import com.azure.cosmos.models.PartitionKeyDefinition; | ||
| import com.azure.cosmos.models.PartitionKey; | ||
| import com.azure.cosmos.models.SqlParameter; | ||
| import com.azure.cosmos.models.SqlQuerySpec; | ||
| import com.azure.cosmos.models.ThroughputProperties; | ||
|
|
@@ -49,7 +51,7 @@ | |
| import static org.assertj.core.api.Assertions.assertThat; | ||
| import static org.assertj.core.api.Assertions.fail; | ||
|
|
||
| @Ignore("TODO: Ignore these test cases until the public emulator is released.") | ||
| //@Ignore("TODO: Ignore these test cases until the public emulator is released.") | ||
| public class HybridSearchQueryTest { | ||
| protected static final int TIMEOUT = 30000; | ||
| protected static final int SETUP_TIMEOUT = 80000; | ||
|
|
@@ -74,7 +76,7 @@ public void before_HybridSearchQueryTest() { | |
|
|
||
| PartitionKeyDefinition partitionKeyDef = new PartitionKeyDefinition(); | ||
| ArrayList<String> paths = new ArrayList<String>(); | ||
| paths.add("/id"); | ||
| paths.add("/pk"); | ||
| partitionKeyDef.setPaths(paths); | ||
|
|
||
| CosmosContainerProperties containerProperties = new CosmosContainerProperties(containerId, partitionKeyDef); | ||
|
|
@@ -83,8 +85,11 @@ public void before_HybridSearchQueryTest() { | |
| database.createContainer(containerProperties, ThroughputProperties.createManualThroughput(10000)).block(); | ||
| container = database.getContainer(containerId); | ||
|
|
||
| // Insert documents with pk field based on (index % 2) + 1, so even ids → pk="1", odd ids → pk="2" | ||
| List<Document> documents = loadProductsFromJson(); | ||
| for (Document doc : documents) { | ||
| int index = Integer.parseInt(doc.getId()); | ||
| doc.pk = String.valueOf((index % 2) + 1); | ||
| container.createItem(doc).block(); | ||
| } | ||
| } | ||
|
|
@@ -131,7 +136,7 @@ public void hybridQueryTest() { | |
| .flatMap(feedResponse -> Flux.fromIterable(feedResponse.getResults())) | ||
| .collectList().block(); | ||
| assertThat(resultDocs).hasSize(15); | ||
| validateResults(Arrays.asList("51", "49", "24", "61", "54", "22", "2", "25", "75", "77", "57", "76", "66", "80", "85"), resultDocs); | ||
| validateResults(Arrays.asList("61", "54", "51", "49", "24", "2", "57", "22", "75", "25", "77", "76", "66", "80", "85"), resultDocs); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟡 Recommendation · Maintainability: Unverifiable Change Expected result orderings changed without explanation Seven existing test assertions had their expected result orderings changed. The partition key was changed from Suggestion: Add a comment explaining why the expected ordering changed (e.g., partition key change redistributes documents, altering BM25 statistics). |
||
|
|
||
| query = "SELECT c.id, c.title FROM c WHERE FullTextContains(c.title, @term1) " + | ||
| "OR FullTextContains(c.text, @term1) OR FullTextContains(c.text, @term2) ORDER BY " + | ||
|
|
@@ -144,7 +149,7 @@ public void hybridQueryTest() { | |
| .flatMap(feedResponse -> Flux.fromIterable(feedResponse.getResults())) | ||
| .collectList().block(); | ||
| assertThat(resultDocs).hasSize(10); | ||
| validateResults(Arrays.asList("22", "2", "25", "75", "77", "57", "76", "66", "80", "85"), resultDocs); | ||
| validateResults(Arrays.asList("2", "57", "22", "75", "25", "77", "76", "66", "80", "85"), resultDocs); | ||
|
|
||
| List<Float> vector = getQueryVector(); | ||
| query = "SELECT TOP 10 c.id, c.text, c.title FROM c " + | ||
|
|
@@ -159,7 +164,7 @@ public void hybridQueryTest() { | |
| .flatMap(feedResponse -> Flux.fromIterable(feedResponse.getResults())) | ||
| .collectList().block(); | ||
| assertThat(resultDocs).hasSize(10); | ||
| validateResults(Arrays.asList("4", "24", "6", "9", "2", "3", "21", "5", "49", "13"), resultDocs); | ||
| validateResults(Arrays.asList("55", "61", "57", "24", "2", "54", "63", "9", "62", "75"), resultDocs); | ||
| } | ||
|
|
||
| @Test(groups = {"query", "split"}, timeOut = TIMEOUT) | ||
|
|
@@ -179,7 +184,7 @@ public void hybridQueryWeightedRRFTest(){ | |
| assertThat(results).isNotNull(); | ||
|
|
||
| validateResults( | ||
| Arrays.asList("51", "49", "24", "61", "54", "22", "2", "25", "75", "77", "57", "76", "66", "80", "85"), | ||
| Arrays.asList("61", "54", "51", "49", "24", "2", "57", "22", "75", "25", "77", "76", "66", "80", "85"), | ||
| results | ||
| ); | ||
|
|
||
|
|
@@ -198,7 +203,7 @@ public void hybridQueryWeightedRRFTest(){ | |
| assertThat(results).hasSize(15); | ||
| assertThat(results).isNotNull(); | ||
| validateResults( | ||
| Arrays.asList("51", "49", "24", "61", "54", "22", "2", "25", "75", "77", "57", "76", "66", "80", "85"), | ||
| Arrays.asList("61", "54", "51", "49", "24", "2", "57", "22", "75", "25", "77", "76", "66", "80", "85"), | ||
| results | ||
| ); | ||
|
|
||
|
|
@@ -216,7 +221,7 @@ public void hybridQueryWeightedRRFTest(){ | |
| assertThat(results).hasSize(10); | ||
| assertThat(results).isNotNull(); | ||
| validateResults( | ||
| Arrays.asList("51", "49", "24", "61", "54", "22", "2", "25", "75", "77"), | ||
| Arrays.asList("61", "54", "51", "49", "24", "2", "57", "22", "75", "25"), | ||
| results | ||
| ); | ||
|
|
||
|
|
@@ -234,7 +239,7 @@ public void hybridQueryWeightedRRFTest(){ | |
| assertThat(results).hasSize(10); | ||
| assertThat(results).isNotNull(); | ||
| validateResults( | ||
| Arrays.asList("22", "57", "25", "24", "66", "2", "85", "49", "51", "54"), | ||
| Arrays.asList("57", "22", "25", "54", "66", "24", "2", "85", "61", "76"), | ||
| results | ||
| ); | ||
|
|
||
|
|
@@ -254,11 +259,106 @@ public void hybridQueryWeightedRRFTest(){ | |
| assertThat(results).hasSize(10); | ||
| assertThat(results).isNotNull(); | ||
| validateResults( | ||
| Arrays.asList("75", "24", "49", "61", "21", "9", "26", "4", "6", "37"), | ||
| Arrays.asList("75", "24", "49", "55", "61", "21", "9", "26", "37", "57"), | ||
| results | ||
| ); | ||
| } | ||
|
|
||
| @Test(groups = {"query", "split"}, timeOut = TIMEOUT) | ||
| public void hybridQueryWithLocalStatisticsTest() { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟢 Suggestion · Testing: Weak Assertion LOCAL scope tests do not assert result ordering The LOCAL scope tests assert result count and partition key values, but not the order of results. For a scoring/ranking feature, ordering is the primary output. Different score computation (local vs global statistics) should produce different orderings. Suggestion: Add
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟢 Suggestion · Cross-SDK: Test Coverage Gap Test coverage gap relative to .NET SDK The .NET SDK's equivalent implementation includes tests that validate score values differ between LOCAL and GLOBAL scope (not just result sets). The Java tests only verify result counts and partition key membership. Suggestion: Consider adding a test that verifies LOCAL and GLOBAL produce different score rankings for the same query when data distribution differs across partitions. |
||
| // Documents with 'John': id=2 (pk="1"), id=57 (pk="2"), id=85 (pk="2") | ||
|
|
||
| String query = "SELECT TOP 10 c.id, c.title, c.pk FROM c WHERE FullTextContains(c.title, @term1) OR " + | ||
| "FullTextContains(c.text, @term1) ORDER BY RANK FullTextScore(c.title, @term1)"; | ||
| SqlQuerySpec querySpec = new SqlQuerySpec(query, Arrays.asList( | ||
| new SqlParameter("@term1", "John") | ||
| )); | ||
|
|
||
| // Test 1: GLOBAL scope (default) cross-partition — should return all 3 'John' matches | ||
| List<Document> globalResultDocs = container.queryItems(querySpec, new CosmosQueryRequestOptions(), Document.class).byPage() | ||
| .flatMap(feedResponse -> Flux.fromIterable(feedResponse.getResults())) | ||
| .collectList().block(); | ||
| assertThat(globalResultDocs).isNotNull(); | ||
| assertThat(globalResultDocs).hasSize(3); | ||
|
|
||
| // Test 2: Explicit GLOBAL scope cross-partition — same as default | ||
| CosmosQueryRequestOptions globalScopeOptions = new CosmosQueryRequestOptions(); | ||
| globalScopeOptions.setFullTextScoreScope(CosmosFullTextScoreScope.GLOBAL); | ||
|
|
||
| List<Document> explicitGlobalResultDocs = container.queryItems(querySpec, globalScopeOptions, Document.class).byPage() | ||
| .flatMap(feedResponse -> Flux.fromIterable(feedResponse.getResults())) | ||
| .collectList().block(); | ||
| assertThat(explicitGlobalResultDocs).isNotNull(); | ||
| assertThat(explicitGlobalResultDocs).hasSize(3); | ||
|
|
||
| List<String> defaultIds = globalResultDocs.stream().map(Document::getId).collect(Collectors.toList()); | ||
| List<String> explicitGlobalIds = explicitGlobalResultDocs.stream().map(Document::getId).collect(Collectors.toList()); | ||
| assertThat(explicitGlobalIds).isEqualTo(defaultIds); | ||
|
|
||
| // Test 3: LOCAL scope with pk="2" — only id=57 and id=85 have 'John' in pk="2" | ||
| // Stats are computed only over pk="2" partition | ||
| CosmosQueryRequestOptions localScopeOptionsPk2 = new CosmosQueryRequestOptions(); | ||
| localScopeOptionsPk2.setFullTextScoreScope(CosmosFullTextScoreScope.LOCAL); | ||
| localScopeOptionsPk2.setPartitionKey(new PartitionKey("2")); | ||
|
|
||
| List<Document> localPk2ResultDocs = container.queryItems(querySpec, localScopeOptionsPk2, Document.class).byPage() | ||
| .flatMap(feedResponse -> Flux.fromIterable(feedResponse.getResults())) | ||
| .collectList().block(); | ||
| assertThat(localPk2ResultDocs).isNotNull(); | ||
| assertThat(localPk2ResultDocs).hasSize(2); | ||
| for (Document doc : localPk2ResultDocs) { | ||
| assertThat(doc.getPk()).isEqualTo("2"); | ||
| } | ||
|
|
||
| // Test 4: LOCAL scope with pk="1" — only id=2 has 'John' in pk="1" | ||
| // Stats are computed only over pk="1" partition | ||
| CosmosQueryRequestOptions localScopeOptionsPk1 = new CosmosQueryRequestOptions(); | ||
| localScopeOptionsPk1.setFullTextScoreScope(CosmosFullTextScoreScope.LOCAL); | ||
| localScopeOptionsPk1.setPartitionKey(new PartitionKey("1")); | ||
|
|
||
| List<Document> localPk1ResultDocs = container.queryItems(querySpec, localScopeOptionsPk1, Document.class).byPage() | ||
| .flatMap(feedResponse -> Flux.fromIterable(feedResponse.getResults())) | ||
| .collectList().block(); | ||
| assertThat(localPk1ResultDocs).isNotNull(); | ||
| assertThat(localPk1ResultDocs).hasSize(1); | ||
| assertThat(localPk1ResultDocs.get(0).getId()).isEqualTo("2"); | ||
| assertThat(localPk1ResultDocs.get(0).getPk()).isEqualTo("1"); | ||
| } | ||
|
|
||
| @Test(groups = {"query", "split"}, timeOut = TIMEOUT) | ||
| public void hybridQueryRRFWithLocalStatisticsTest() { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟡 Recommendation · Testing: Missing Coverage No test for LOCAL scope without partition key Tests cover LOCAL + partition key, but not the edge case where LOCAL is set without a partition key. Given the silent degeneration to GLOBAL behavior (finding above), this is the most likely user error scenario and should be tested to verify it degenerates gracefully. |
||
| // Test LOCAL scope with RRF (multiple component queries) | ||
| String query = "SELECT TOP 10 c.id, c.title, c.pk FROM c WHERE " + | ||
| "FullTextContains(c.title, @term1) OR FullTextContains(c.text, @term1) OR " + | ||
| "FullTextContains(c.text, @term2) " + | ||
| "ORDER BY RANK RRF(FullTextScore(c.title, @term1), FullTextScore(c.text, @term2))"; | ||
| SqlQuerySpec querySpec = new SqlQuerySpec(query, Arrays.asList( | ||
| new SqlParameter("@term1", "John"), | ||
| new SqlParameter("@term2", "United") | ||
| )); | ||
|
|
||
| // GLOBAL scope (default) cross-partition | ||
| List<Document> globalResultDocs = container.queryItems(querySpec, new CosmosQueryRequestOptions(), Document.class).byPage() | ||
| .flatMap(feedResponse -> Flux.fromIterable(feedResponse.getResults())) | ||
| .collectList().block(); | ||
| assertThat(globalResultDocs).isNotNull(); | ||
| assertThat(globalResultDocs).isNotEmpty(); | ||
|
|
||
| // LOCAL scope with pk="2" | ||
| CosmosQueryRequestOptions localScopeOptions = new CosmosQueryRequestOptions(); | ||
| localScopeOptions.setFullTextScoreScope(CosmosFullTextScoreScope.LOCAL); | ||
| localScopeOptions.setPartitionKey(new PartitionKey("2")); | ||
|
|
||
| List<Document> localResultDocs = container.queryItems(querySpec, localScopeOptions, Document.class).byPage() | ||
| .flatMap(feedResponse -> Flux.fromIterable(feedResponse.getResults())) | ||
| .collectList().block(); | ||
| assertThat(localResultDocs).isNotNull(); | ||
| assertThat(localResultDocs).isNotEmpty(); | ||
| for (Document doc : localResultDocs) { | ||
| assertThat(doc.getPk()).isEqualTo("2"); | ||
| } | ||
| } | ||
|
|
||
| @Test(groups = {"query", "split"}, timeOut = TIMEOUT) | ||
| public void wrongHybridQueryTest() { | ||
| String query = ""; | ||
|
|
@@ -410,6 +510,8 @@ public static List<Float> getQueryVector() { | |
| static class Document { | ||
| @JsonProperty("id") | ||
| String id; | ||
| @JsonProperty("pk") | ||
| String pk; | ||
| @JsonProperty("text") | ||
| String text; | ||
| @JsonProperty("title") | ||
|
|
@@ -422,8 +524,9 @@ static class Document { | |
| public Document() { | ||
| } | ||
|
|
||
| public Document(String id, String text, String title, double[] vector, double score) { | ||
| public Document(String id, String pk, String text, String title, double[] vector, double score) { | ||
| this.id = id; | ||
| this.pk = pk; | ||
| this.text = text; | ||
| this.title = title; | ||
| this.vector = vector; | ||
|
|
@@ -433,5 +536,9 @@ public Document(String id, String text, String title, double[] vector, double sc | |
| public String getId() { | ||
| return id; | ||
| } | ||
|
|
||
| public String getPk() { | ||
| return pk; | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,6 +3,7 @@ | |
| package com.azure.cosmos.implementation; | ||
|
|
||
| import com.azure.cosmos.CosmosDiagnostics; | ||
| import com.azure.cosmos.models.CosmosFullTextScoreScope; | ||
| import com.azure.cosmos.models.CosmosRequestOptions; | ||
| import com.azure.cosmos.models.FeedRange; | ||
| import com.azure.cosmos.models.PartitionKey; | ||
|
|
@@ -32,6 +33,7 @@ public final class CosmosQueryRequestOptionsImpl extends CosmosQueryRequestOptio | |
| private Integer maxItemCountForHybridSearch; | ||
| private List<CosmosDiagnostics> cancelledRequestDiagnosticsTracker = new ArrayList<>(); | ||
| private String collectionRid; | ||
| private CosmosFullTextScoreScope fullTextScoreScope; | ||
|
|
||
| /** | ||
| * Instantiates a new query request options. | ||
|
|
@@ -72,6 +74,7 @@ public CosmosQueryRequestOptionsImpl(CosmosQueryRequestOptionsImpl options) { | |
| this.maxItemCountForVectorSearch = options.maxItemCountForVectorSearch; | ||
| this.maxItemCountForHybridSearch = options.maxItemCountForHybridSearch; | ||
| this.collectionRid = options.collectionRid; | ||
| this.fullTextScoreScope = options.fullTextScoreScope; | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -408,4 +411,24 @@ public String getCollectionRid() { | |
| public void setCollectionRid(String collectionRid) { | ||
| this.collectionRid = collectionRid; | ||
| } | ||
|
|
||
| /** | ||
| * Gets the scope for computing BM25 statistics used by FullTextScore in hybrid search queries. | ||
| * | ||
| * @return the full text score scope, or null if not set (defaults to GLOBAL behavior). | ||
| */ | ||
| public CosmosFullTextScoreScope getFullTextScoreScope() { | ||
| return this.fullTextScoreScope; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟡 Recommendation · Reliability: Inconsistent Contract Inconsistent null-default between public and Impl getters The public Suggestion: Make the Impl getter consistent: ---
<sub>⚠️ AI-generated review — may be incorrect. Agree? → resolve the conversation. Disagree? → reply with your reasoning.</sub> |
||
| } | ||
|
|
||
| /** | ||
| * Sets the scope for computing BM25 statistics used by FullTextScore in hybrid search queries. | ||
| * | ||
| * @param fullTextScoreScope the full text score scope. | ||
| * @return the CosmosQueryRequestOptionsImpl. | ||
| */ | ||
| public CosmosQueryRequestOptionsImpl setFullTextScoreScope(CosmosFullTextScoreScope fullTextScoreScope) { | ||
| this.fullTextScoreScope = fullTextScoreScope; | ||
| return this; | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -262,7 +262,8 @@ synchronized private static void tryCacheQueryPlan( | |
| SqlQuerySpec query, | ||
| PartitionedQueryExecutionInfo partitionedQueryExecutionInfo, | ||
| Map<String, PartitionedQueryExecutionInfo> queryPlanCache) { | ||
| if (canCacheQuery(partitionedQueryExecutionInfo.getQueryInfo()) && !queryPlanCache.containsKey(query.getQueryText())) { | ||
| QueryInfo queryInfo = partitionedQueryExecutionInfo.getQueryInfo(); | ||
| if (queryInfo != null && canCacheQuery(queryInfo) && !queryPlanCache.containsKey(query.getQueryText())) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟢 Suggestion · Maintainability: Debugging Aid NPE fix could benefit from trace logging The null guard silently skips caching for hybrid search queries (which use Suggestion: ---
<sub>⚠️ AI-generated review — may be incorrect. Agree? → resolve the conversation. Disagree? → reply with your reasoning.</sub> |
||
| if (queryPlanCache.size() >= Constants.QUERYPLAN_CACHE_SIZE) { | ||
| logger.warn("Clearing query plan cache as it has reached the maximum size : {}", queryPlanCache.size()); | ||
| queryPlanCache.clear(); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,6 +24,7 @@ | |
| import com.azure.cosmos.implementation.query.hybridsearch.FullTextQueryStatistics; | ||
| import com.azure.cosmos.implementation.query.hybridsearch.GlobalFullTextSearchQueryStatistics; | ||
| import com.azure.cosmos.implementation.query.hybridsearch.HybridSearchQueryInfo; | ||
| import com.azure.cosmos.models.CosmosFullTextScoreScope; | ||
| import com.azure.cosmos.models.CosmosQueryRequestOptions; | ||
| import com.azure.cosmos.models.FeedResponse; | ||
| import com.azure.cosmos.models.ModelBridgeInternal; | ||
|
|
@@ -141,8 +142,15 @@ private void initialize( | |
| this.hybridSearchSchedulingStopwatch.start(); | ||
|
|
||
| if (hybridSearchQueryInfo.getRequiresGlobalStatistics()) { | ||
| // When FullTextScoreScope is GLOBAL (default), use allFeedRanges for statistics. | ||
| // When LOCAL, use only targetFeedRanges for scoped statistics. | ||
| List<FeedRangeEpkImpl> statisticsTargetRanges = | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just a QQ:
|
||
| this.cosmosQueryRequestOptions.getFullTextScoreScope() == CosmosFullTextScoreScope.LOCAL | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟡 Recommendation · Reliability: Silent Misconfiguration LOCAL scope without partition key silently degenerates to GLOBAL When Suggestion: Add a warning log when LOCAL scope is set but targetFeedRanges equals allFeedRanges: ---
<sub>⚠️ AI-generated review — may be incorrect. Agree? → resolve the conversation. Disagree? → reply with your reasoning.</sub> |
||
| ? targetFeedRanges | ||
| : allFeedRanges; | ||
|
|
||
| Map<FeedRangeEpkImpl, String> partitionKeyRangeToContinuationToken = new HashMap<>(); | ||
| for (FeedRangeEpkImpl feedRangeEpk : allFeedRanges) { | ||
| for (FeedRangeEpkImpl feedRangeEpk : statisticsTargetRanges) { | ||
| partitionKeyRangeToContinuationToken.put(feedRangeEpk, null); | ||
| } | ||
| super.initialize(collection, | ||
|
|
@@ -389,7 +397,10 @@ private static Mono<List<List<ScoreTuple>>> retrieveComponentScores(Mono<List<Hy | |
| } | ||
|
|
||
| private Flux<Document> getComponentQueryResults(List<FeedRangeEpkImpl> targetFeedRanges, int initialPageSize, DocumentCollection collection, Flux<QueryInfo> rewrittenQueryInfos) { | ||
| return rewrittenQueryInfos.flatMap(queryInfo -> { | ||
| // Use concatMap to serialize component query initialization. The parent class has shared mutable | ||
| // state (documentProducers, metrics trackers) that is not thread-safe for concurrent access. | ||
| // Each component query still executes its partition queries in parallel via the inner flatMap. | ||
| return rewrittenQueryInfos.concatMap(queryInfo -> { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟡 Recommendation · Architecture: Technical Debt concatMap is a band-aid over non-thread-safe shared state The Suggestion: Add a TODO noting the architectural debt. Consider passing the producer list as a local variable rather than relying on the mutable field.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 💬 Observation · Performance: Trade-off concatMap serializes previously-parallel component query initialization For queries with multiple FullTextScore components (e.g., |
||
| Map<FeedRangeEpkImpl, String> partitionKeyRangeToContinuationToken = new HashMap<>(); | ||
| for (FeedRangeEpkImpl feedRangeEpk : targetFeedRanges) { | ||
| partitionKeyRangeToContinuationToken.put(feedRangeEpk, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,23 @@ | ||
| // Copyright (c) Microsoft Corporation. All rights reserved. | ||
| // Licensed under the MIT License. | ||
|
|
||
| package com.azure.cosmos.models; | ||
|
|
||
| /** | ||
| * Specifies the scope for computing BM25 statistics used by FullTextScore in hybrid search queries. | ||
| */ | ||
| public enum CosmosFullTextScoreScope { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. enum here is fixed? no changes in future?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 💬 Observation · Design: Enum Pattern Enum pattern is consistent with codebase
|
||
| /** | ||
| * Compute BM25 statistics (term frequency, inverse document frequency, and document length) | ||
| * across all documents in the container, including all physical and logical partitions. | ||
| * This is the default behavior. | ||
| */ | ||
| GLOBAL, | ||
|
|
||
| /** | ||
| * Compute BM25 statistics only over the subset of documents within the partition key values | ||
| * specified in the query. This is useful for multi-tenant scenarios where scoring should | ||
| * reflect statistics that are accurate for a specific tenant's dataset. | ||
| */ | ||
| LOCAL | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🟡 Recommendation · Maintainability: Ambiguous Change
@ignore annotation commented out instead of removed
The
@Ignorehad a TODO about waiting for the public emulator. Commenting it out (//@Ignore(...)) rather than removing it leaves ambiguity — is the emulator now available? Will CI break in environments without it?Suggestion: If the emulator is now available, remove the annotation entirely. If not, keep it and explain the CI strategy.