Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@
import com.azure.cosmos.models.CosmosFullTextIndex;
import com.azure.cosmos.models.CosmosFullTextPath;
import com.azure.cosmos.models.CosmosFullTextPolicy;
import com.azure.cosmos.models.CosmosFullTextScoreScope;
import com.azure.cosmos.models.CosmosQueryRequestOptions;
import com.azure.cosmos.models.IncludedPath;
import com.azure.cosmos.models.IndexingMode;
import com.azure.cosmos.models.IndexingPolicy;
import com.azure.cosmos.models.PartitionKeyDefinition;
import com.azure.cosmos.models.PartitionKey;
import com.azure.cosmos.models.SqlParameter;
import com.azure.cosmos.models.SqlQuerySpec;
import com.azure.cosmos.models.ThroughputProperties;
Expand Down Expand Up @@ -49,7 +51,7 @@
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.fail;

@Ignore("TODO: Ignore these test cases until the public emulator is released.")
//@Ignore("TODO: Ignore these test cases until the public emulator is released.")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 Recommendation · Maintainability: Ambiguous Change

@ignore annotation commented out instead of removed

The @Ignore had a TODO about waiting for the public emulator. Commenting it out (//@Ignore(...)) rather than removing it leaves ambiguity — is the emulator now available? Will CI break in environments without it?

Suggestion: If the emulator is now available, remove the annotation entirely. If not, keep it and explain the CI strategy.


⚠️ AI-generated review — may be incorrect. Agree? → resolve the conversation. Disagree? → reply with your reasoning.

public class HybridSearchQueryTest {
protected static final int TIMEOUT = 30000;
protected static final int SETUP_TIMEOUT = 80000;
Expand All @@ -74,7 +76,7 @@ public void before_HybridSearchQueryTest() {

PartitionKeyDefinition partitionKeyDef = new PartitionKeyDefinition();
ArrayList<String> paths = new ArrayList<String>();
paths.add("/id");
paths.add("/pk");
partitionKeyDef.setPaths(paths);

CosmosContainerProperties containerProperties = new CosmosContainerProperties(containerId, partitionKeyDef);
Expand All @@ -83,8 +85,11 @@ public void before_HybridSearchQueryTest() {
database.createContainer(containerProperties, ThroughputProperties.createManualThroughput(10000)).block();
container = database.getContainer(containerId);

// Insert documents with pk field based on (index % 2) + 1, so even ids → pk="1", odd ids → pk="2"
List<Document> documents = loadProductsFromJson();
for (Document doc : documents) {
int index = Integer.parseInt(doc.getId());
doc.pk = String.valueOf((index % 2) + 1);
container.createItem(doc).block();
}
}
Expand Down Expand Up @@ -131,7 +136,7 @@ public void hybridQueryTest() {
.flatMap(feedResponse -> Flux.fromIterable(feedResponse.getResults()))
.collectList().block();
assertThat(resultDocs).hasSize(15);
validateResults(Arrays.asList("51", "49", "24", "61", "54", "22", "2", "25", "75", "77", "57", "76", "66", "80", "85"), resultDocs);
validateResults(Arrays.asList("61", "54", "51", "49", "24", "2", "57", "22", "75", "25", "77", "76", "66", "80", "85"), resultDocs);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 Recommendation · Maintainability: Unverifiable Change

Expected result orderings changed without explanation

Seven existing test assertions had their expected result orderings changed. The partition key was changed from /id to /pk, which changes the physical partition layout and consequently BM25 statistics and scoring order. Without explanation, reviewers cannot verify the new expected values are correct.

Suggestion: Add a comment explaining why the expected ordering changed (e.g., partition key change redistributes documents, altering BM25 statistics).


⚠️ AI-generated review — may be incorrect. Agree? → resolve the conversation. Disagree? → reply with your reasoning.


query = "SELECT c.id, c.title FROM c WHERE FullTextContains(c.title, @term1) " +
"OR FullTextContains(c.text, @term1) OR FullTextContains(c.text, @term2) ORDER BY " +
Expand All @@ -144,7 +149,7 @@ public void hybridQueryTest() {
.flatMap(feedResponse -> Flux.fromIterable(feedResponse.getResults()))
.collectList().block();
assertThat(resultDocs).hasSize(10);
validateResults(Arrays.asList("22", "2", "25", "75", "77", "57", "76", "66", "80", "85"), resultDocs);
validateResults(Arrays.asList("2", "57", "22", "75", "25", "77", "76", "66", "80", "85"), resultDocs);

List<Float> vector = getQueryVector();
query = "SELECT TOP 10 c.id, c.text, c.title FROM c " +
Expand All @@ -159,7 +164,7 @@ public void hybridQueryTest() {
.flatMap(feedResponse -> Flux.fromIterable(feedResponse.getResults()))
.collectList().block();
assertThat(resultDocs).hasSize(10);
validateResults(Arrays.asList("4", "24", "6", "9", "2", "3", "21", "5", "49", "13"), resultDocs);
validateResults(Arrays.asList("55", "61", "57", "24", "2", "54", "63", "9", "62", "75"), resultDocs);
}

@Test(groups = {"query", "split"}, timeOut = TIMEOUT)
Expand All @@ -179,7 +184,7 @@ public void hybridQueryWeightedRRFTest(){
assertThat(results).isNotNull();

validateResults(
Arrays.asList("51", "49", "24", "61", "54", "22", "2", "25", "75", "77", "57", "76", "66", "80", "85"),
Arrays.asList("61", "54", "51", "49", "24", "2", "57", "22", "75", "25", "77", "76", "66", "80", "85"),
results
);

Expand All @@ -198,7 +203,7 @@ public void hybridQueryWeightedRRFTest(){
assertThat(results).hasSize(15);
assertThat(results).isNotNull();
validateResults(
Arrays.asList("51", "49", "24", "61", "54", "22", "2", "25", "75", "77", "57", "76", "66", "80", "85"),
Arrays.asList("61", "54", "51", "49", "24", "2", "57", "22", "75", "25", "77", "76", "66", "80", "85"),
results
);

Expand All @@ -216,7 +221,7 @@ public void hybridQueryWeightedRRFTest(){
assertThat(results).hasSize(10);
assertThat(results).isNotNull();
validateResults(
Arrays.asList("51", "49", "24", "61", "54", "22", "2", "25", "75", "77"),
Arrays.asList("61", "54", "51", "49", "24", "2", "57", "22", "75", "25"),
results
);

Expand All @@ -234,7 +239,7 @@ public void hybridQueryWeightedRRFTest(){
assertThat(results).hasSize(10);
assertThat(results).isNotNull();
validateResults(
Arrays.asList("22", "57", "25", "24", "66", "2", "85", "49", "51", "54"),
Arrays.asList("57", "22", "25", "54", "66", "24", "2", "85", "61", "76"),
results
);

Expand All @@ -254,11 +259,106 @@ public void hybridQueryWeightedRRFTest(){
assertThat(results).hasSize(10);
assertThat(results).isNotNull();
validateResults(
Arrays.asList("75", "24", "49", "61", "21", "9", "26", "4", "6", "37"),
Arrays.asList("75", "24", "49", "55", "61", "21", "9", "26", "37", "57"),
results
);
}

@Test(groups = {"query", "split"}, timeOut = TIMEOUT)
public void hybridQueryWithLocalStatisticsTest() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟢 Suggestion · Testing: Weak Assertion

LOCAL scope tests do not assert result ordering

The LOCAL scope tests assert result count and partition key values, but not the order of results. For a scoring/ranking feature, ordering is the primary output. Different score computation (local vs global statistics) should produce different orderings.

Suggestion: Add validateResults(Arrays.asList(...), results) assertions for LOCAL scope, similar to the existing GLOBAL tests.


⚠️ AI-generated review — may be incorrect. Agree? → resolve the conversation. Disagree? → reply with your reasoning.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟢 Suggestion · Cross-SDK: Test Coverage Gap

Test coverage gap relative to .NET SDK

The .NET SDK's equivalent implementation includes tests that validate score values differ between LOCAL and GLOBAL scope (not just result sets). The Java tests only verify result counts and partition key membership.

Suggestion: Consider adding a test that verifies LOCAL and GLOBAL produce different score rankings for the same query when data distribution differs across partitions.


⚠️ AI-generated review — may be incorrect. Agree? → resolve the conversation. Disagree? → reply with your reasoning.

// Documents with 'John': id=2 (pk="1"), id=57 (pk="2"), id=85 (pk="2")

String query = "SELECT TOP 10 c.id, c.title, c.pk FROM c WHERE FullTextContains(c.title, @term1) OR " +
"FullTextContains(c.text, @term1) ORDER BY RANK FullTextScore(c.title, @term1)";
SqlQuerySpec querySpec = new SqlQuerySpec(query, Arrays.asList(
new SqlParameter("@term1", "John")
));

// Test 1: GLOBAL scope (default) cross-partition — should return all 3 'John' matches
List<Document> globalResultDocs = container.queryItems(querySpec, new CosmosQueryRequestOptions(), Document.class).byPage()
.flatMap(feedResponse -> Flux.fromIterable(feedResponse.getResults()))
.collectList().block();
assertThat(globalResultDocs).isNotNull();
assertThat(globalResultDocs).hasSize(3);

// Test 2: Explicit GLOBAL scope cross-partition — same as default
CosmosQueryRequestOptions globalScopeOptions = new CosmosQueryRequestOptions();
globalScopeOptions.setFullTextScoreScope(CosmosFullTextScoreScope.GLOBAL);

List<Document> explicitGlobalResultDocs = container.queryItems(querySpec, globalScopeOptions, Document.class).byPage()
.flatMap(feedResponse -> Flux.fromIterable(feedResponse.getResults()))
.collectList().block();
assertThat(explicitGlobalResultDocs).isNotNull();
assertThat(explicitGlobalResultDocs).hasSize(3);

List<String> defaultIds = globalResultDocs.stream().map(Document::getId).collect(Collectors.toList());
List<String> explicitGlobalIds = explicitGlobalResultDocs.stream().map(Document::getId).collect(Collectors.toList());
assertThat(explicitGlobalIds).isEqualTo(defaultIds);

// Test 3: LOCAL scope with pk="2" — only id=57 and id=85 have 'John' in pk="2"
// Stats are computed only over pk="2" partition
CosmosQueryRequestOptions localScopeOptionsPk2 = new CosmosQueryRequestOptions();
localScopeOptionsPk2.setFullTextScoreScope(CosmosFullTextScoreScope.LOCAL);
localScopeOptionsPk2.setPartitionKey(new PartitionKey("2"));

List<Document> localPk2ResultDocs = container.queryItems(querySpec, localScopeOptionsPk2, Document.class).byPage()
.flatMap(feedResponse -> Flux.fromIterable(feedResponse.getResults()))
.collectList().block();
assertThat(localPk2ResultDocs).isNotNull();
assertThat(localPk2ResultDocs).hasSize(2);
for (Document doc : localPk2ResultDocs) {
assertThat(doc.getPk()).isEqualTo("2");
}

// Test 4: LOCAL scope with pk="1" — only id=2 has 'John' in pk="1"
// Stats are computed only over pk="1" partition
CosmosQueryRequestOptions localScopeOptionsPk1 = new CosmosQueryRequestOptions();
localScopeOptionsPk1.setFullTextScoreScope(CosmosFullTextScoreScope.LOCAL);
localScopeOptionsPk1.setPartitionKey(new PartitionKey("1"));

List<Document> localPk1ResultDocs = container.queryItems(querySpec, localScopeOptionsPk1, Document.class).byPage()
.flatMap(feedResponse -> Flux.fromIterable(feedResponse.getResults()))
.collectList().block();
assertThat(localPk1ResultDocs).isNotNull();
assertThat(localPk1ResultDocs).hasSize(1);
assertThat(localPk1ResultDocs.get(0).getId()).isEqualTo("2");
assertThat(localPk1ResultDocs.get(0).getPk()).isEqualTo("1");
}

@Test(groups = {"query", "split"}, timeOut = TIMEOUT)
public void hybridQueryRRFWithLocalStatisticsTest() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 Recommendation · Testing: Missing Coverage

No test for LOCAL scope without partition key

Tests cover LOCAL + partition key, but not the edge case where LOCAL is set without a partition key. Given the silent degeneration to GLOBAL behavior (finding above), this is the most likely user error scenario and should be tested to verify it degenerates gracefully.


⚠️ AI-generated review — may be incorrect. Agree? → resolve the conversation. Disagree? → reply with your reasoning.

// Test LOCAL scope with RRF (multiple component queries)
String query = "SELECT TOP 10 c.id, c.title, c.pk FROM c WHERE " +
"FullTextContains(c.title, @term1) OR FullTextContains(c.text, @term1) OR " +
"FullTextContains(c.text, @term2) " +
"ORDER BY RANK RRF(FullTextScore(c.title, @term1), FullTextScore(c.text, @term2))";
SqlQuerySpec querySpec = new SqlQuerySpec(query, Arrays.asList(
new SqlParameter("@term1", "John"),
new SqlParameter("@term2", "United")
));

// GLOBAL scope (default) cross-partition
List<Document> globalResultDocs = container.queryItems(querySpec, new CosmosQueryRequestOptions(), Document.class).byPage()
.flatMap(feedResponse -> Flux.fromIterable(feedResponse.getResults()))
.collectList().block();
assertThat(globalResultDocs).isNotNull();
assertThat(globalResultDocs).isNotEmpty();

// LOCAL scope with pk="2"
CosmosQueryRequestOptions localScopeOptions = new CosmosQueryRequestOptions();
localScopeOptions.setFullTextScoreScope(CosmosFullTextScoreScope.LOCAL);
localScopeOptions.setPartitionKey(new PartitionKey("2"));

List<Document> localResultDocs = container.queryItems(querySpec, localScopeOptions, Document.class).byPage()
.flatMap(feedResponse -> Flux.fromIterable(feedResponse.getResults()))
.collectList().block();
assertThat(localResultDocs).isNotNull();
assertThat(localResultDocs).isNotEmpty();
for (Document doc : localResultDocs) {
assertThat(doc.getPk()).isEqualTo("2");
}
}

@Test(groups = {"query", "split"}, timeOut = TIMEOUT)
public void wrongHybridQueryTest() {
String query = "";
Expand Down Expand Up @@ -410,6 +510,8 @@ public static List<Float> getQueryVector() {
static class Document {
@JsonProperty("id")
String id;
@JsonProperty("pk")
String pk;
@JsonProperty("text")
String text;
@JsonProperty("title")
Expand All @@ -422,8 +524,9 @@ static class Document {
public Document() {
}

public Document(String id, String text, String title, double[] vector, double score) {
public Document(String id, String pk, String text, String title, double[] vector, double score) {
this.id = id;
this.pk = pk;
this.text = text;
this.title = title;
this.vector = vector;
Expand All @@ -433,5 +536,9 @@ public Document(String id, String text, String title, double[] vector, double sc
public String getId() {
return id;
}

public String getPk() {
return pk;
}
}
}
3 changes: 3 additions & 0 deletions sdk/cosmos/azure-cosmos/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,14 @@
#### Features Added
* Added support for N-Region synchronous commit feature - See [PR 47757](https://github.com/Azure/azure-sdk-for-java/pull/47757)
* Added support for Query Advisor feature - See [48160](https://github.com/Azure/azure-sdk-for-java/pull/48160)
* Added `CosmosFullTextScoreScope` enum and `setFullTextScoreScope()` on `CosmosQueryRequestOptions` for controlling BM25 statistics scope in hybrid search queries. Supports `LOCAL` (scoped to target partitions) and `GLOBAL` (default, all partitions) scopes. See [PR 48431](https://github.com/Azure/azure-sdk-for-java/pull/48431)

#### Breaking Changes

#### Bugs Fixed
* Fixed Remote Code Execution (RCE) vulnerability (CWE-502) by replacing Java deserialization with JSON-based serialization in `CosmosClientMetadataCachesSnapshot`, `AsyncCache`, and `DocumentCollection`. The metadata cache snapshot now uses Jackson for serialization/deserialization, eliminating the entire class of Java deserialization attacks. - [PR 47971](https://github.com/Azure/azure-sdk-for-java/pull/47971)
* Fixed `NullPointerException` in `DocumentQueryExecutionContextFactory.tryCacheQueryPlan` when executing hybrid search queries with a partition key filter. See [PR 48431](https://github.com/Azure/azure-sdk-for-java/pull/48431)
* Fixed `ConcurrentModificationException` in hybrid search component query execution caused by concurrent access to shared mutable state. See [PR 48431](https://github.com/Azure/azure-sdk-for-java/pull/48431)

#### Other Changes
* Added aggressive HTTP timeout policies for document operations routed to Gateway V2. - [PR 47879](https://github.com/Azure/azure-sdk-for-java/pull/47879)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
package com.azure.cosmos.implementation;

import com.azure.cosmos.CosmosDiagnostics;
import com.azure.cosmos.models.CosmosFullTextScoreScope;
import com.azure.cosmos.models.CosmosRequestOptions;
import com.azure.cosmos.models.FeedRange;
import com.azure.cosmos.models.PartitionKey;
Expand Down Expand Up @@ -32,6 +33,7 @@ public final class CosmosQueryRequestOptionsImpl extends CosmosQueryRequestOptio
private Integer maxItemCountForHybridSearch;
private List<CosmosDiagnostics> cancelledRequestDiagnosticsTracker = new ArrayList<>();
private String collectionRid;
private CosmosFullTextScoreScope fullTextScoreScope;

/**
* Instantiates a new query request options.
Expand Down Expand Up @@ -72,6 +74,7 @@ public CosmosQueryRequestOptionsImpl(CosmosQueryRequestOptionsImpl options) {
this.maxItemCountForVectorSearch = options.maxItemCountForVectorSearch;
this.maxItemCountForHybridSearch = options.maxItemCountForHybridSearch;
this.collectionRid = options.collectionRid;
this.fullTextScoreScope = options.fullTextScoreScope;
}

/**
Expand Down Expand Up @@ -408,4 +411,24 @@ public String getCollectionRid() {
public void setCollectionRid(String collectionRid) {
this.collectionRid = collectionRid;
}

/**
* Gets the scope for computing BM25 statistics used by FullTextScore in hybrid search queries.
*
* @return the full text score scope, or null if not set (defaults to GLOBAL behavior).
*/
public CosmosFullTextScoreScope getFullTextScoreScope() {
return this.fullTextScoreScope;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 Recommendation · Reliability: Inconsistent Contract

Inconsistent null-default between public and Impl getters

The public CosmosQueryRequestOptions.getFullTextScoreScope() maps null to GLOBAL. This Impl getter returns raw null. The current usage site calls the public getter (safe), but any future internal code checking getFullTextScoreScope() == GLOBAL via the Impl class would get false for the default case — a latent correctness trap.

Suggestion: Make the Impl getter consistent:
``java
return this.fullTextScoreScope != null ? this.fullTextScoreScope : CosmosFullTextScoreScope.GLOBAL;

---
<sub>⚠️ AI-generated review — may be incorrect. Agree? → resolve the conversation. Disagree? → reply with your reasoning.</sub>

}

/**
* Sets the scope for computing BM25 statistics used by FullTextScore in hybrid search queries.
*
* @param fullTextScoreScope the full text score scope.
* @return the CosmosQueryRequestOptionsImpl.
*/
public CosmosQueryRequestOptionsImpl setFullTextScoreScope(CosmosFullTextScoreScope fullTextScoreScope) {
this.fullTextScoreScope = fullTextScoreScope;
return this;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,8 @@ synchronized private static void tryCacheQueryPlan(
SqlQuerySpec query,
PartitionedQueryExecutionInfo partitionedQueryExecutionInfo,
Map<String, PartitionedQueryExecutionInfo> queryPlanCache) {
if (canCacheQuery(partitionedQueryExecutionInfo.getQueryInfo()) && !queryPlanCache.containsKey(query.getQueryText())) {
QueryInfo queryInfo = partitionedQueryExecutionInfo.getQueryInfo();
if (queryInfo != null && canCacheQuery(queryInfo) && !queryPlanCache.containsKey(query.getQueryText())) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟢 Suggestion · Maintainability: Debugging Aid

NPE fix could benefit from trace logging

The null guard silently skips caching for hybrid search queries (which use hybridSearchQueryInfo instead of queryInfo). A trace log would help diagnose query plan cache miss rates.

Suggestion:
``java
if (queryInfo == null) {
logger.trace("Skipping query plan caching: queryInfo is null (likely a hybrid search query)");
return;
}

---
<sub>⚠️ AI-generated review — may be incorrect. Agree? → resolve the conversation. Disagree? → reply with your reasoning.</sub>

if (queryPlanCache.size() >= Constants.QUERYPLAN_CACHE_SIZE) {
logger.warn("Clearing query plan cache as it has reached the maximum size : {}", queryPlanCache.size());
queryPlanCache.clear();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import com.azure.cosmos.implementation.query.hybridsearch.FullTextQueryStatistics;
import com.azure.cosmos.implementation.query.hybridsearch.GlobalFullTextSearchQueryStatistics;
import com.azure.cosmos.implementation.query.hybridsearch.HybridSearchQueryInfo;
import com.azure.cosmos.models.CosmosFullTextScoreScope;
import com.azure.cosmos.models.CosmosQueryRequestOptions;
import com.azure.cosmos.models.FeedResponse;
import com.azure.cosmos.models.ModelBridgeInternal;
Expand Down Expand Up @@ -141,8 +142,15 @@ private void initialize(
this.hybridSearchSchedulingStopwatch.start();

if (hybridSearchQueryInfo.getRequiresGlobalStatistics()) {
// When FullTextScoreScope is GLOBAL (default), use allFeedRanges for statistics.
// When LOCAL, use only targetFeedRanges for scoped statistics.
List<FeedRangeEpkImpl> statisticsTargetRanges =
Copy link
Member

@xinlian12 xinlian12 Mar 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just a QQ:

  • does LOCAL require user to define a partitionKey? do we need any validation?
  • instead of defining the partitionKey in the request options, what if the partitionKey is defined in the sql query? do we have tests to verify that?
  • do we have tests to verify HPK cases if this is a valid case?
  • do we have tests to verify query + feedRange if this is a valid case?

this.cosmosQueryRequestOptions.getFullTextScoreScope() == CosmosFullTextScoreScope.LOCAL
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 Recommendation · Reliability: Silent Misconfiguration

LOCAL scope without partition key silently degenerates to GLOBAL

When CosmosFullTextScoreScope.LOCAL is set but no partition key is specified, targetFeedRanges equals allFeedRanges (all partitions are targeted). The statistics query silently covers all partitions — identical to GLOBAL. In multi-tenant scenarios (the primary use case for LOCAL), accidentally omitting the partition key defeats the entire purpose of the feature.

Suggestion: Add a warning log when LOCAL scope is set but targetFeedRanges equals allFeedRanges:
``java
if (scope == LOCAL && targetFeedRanges.size() == allFeedRanges.size()) {
logger.warn("LOCAL fullTextScoreScope set but no partition key filter specified; statistics computed across all partitions");
}

---
<sub>⚠️ AI-generated review — may be incorrect. Agree? → resolve the conversation. Disagree? → reply with your reasoning.</sub>

? targetFeedRanges
: allFeedRanges;

Map<FeedRangeEpkImpl, String> partitionKeyRangeToContinuationToken = new HashMap<>();
for (FeedRangeEpkImpl feedRangeEpk : allFeedRanges) {
for (FeedRangeEpkImpl feedRangeEpk : statisticsTargetRanges) {
partitionKeyRangeToContinuationToken.put(feedRangeEpk, null);
}
super.initialize(collection,
Expand Down Expand Up @@ -389,7 +397,10 @@ private static Mono<List<List<ScoreTuple>>> retrieveComponentScores(Mono<List<Hy
}

private Flux<Document> getComponentQueryResults(List<FeedRangeEpkImpl> targetFeedRanges, int initialPageSize, DocumentCollection collection, Flux<QueryInfo> rewrittenQueryInfos) {
return rewrittenQueryInfos.flatMap(queryInfo -> {
// Use concatMap to serialize component query initialization. The parent class has shared mutable
// state (documentProducers, metrics trackers) that is not thread-safe for concurrent access.
// Each component query still executes its partition queries in parallel via the inner flatMap.
return rewrittenQueryInfos.concatMap(queryInfo -> {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 Recommendation · Architecture: Technical Debt

concatMap is a band-aid over non-thread-safe shared state

The flatMap to concatMap fix correctly prevents ConcurrentModificationException by serializing component query initialization. However, the root cause — ParallelDocumentQueryExecutionContextBase.documentProducers is a shared mutable field reassigned and repopulated across logical operations — remains unaddressed. Any future change re-introducing concurrency will re-expose the race.

Suggestion: Add a TODO noting the architectural debt. Consider passing the producer list as a local variable rather than relying on the mutable field.


⚠️ AI-generated review — may be incorrect. Agree? → resolve the conversation. Disagree? → reply with your reasoning.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💬 Observation · Performance: Trade-off

concatMap serializes previously-parallel component query initialization

For queries with multiple FullTextScore components (e.g., RRF(FullTextScore(...), FullTextScore(...))), component query initialization is now sequential. Each component query still executes its per-partition queries in parallel internally. The latency impact depends on the number of components — typically 2-3, so the impact should be minor.


⚠️ AI-generated review — may be incorrect. Agree? → resolve the conversation. Disagree? → reply with your reasoning.

Map<FeedRangeEpkImpl, String> partitionKeyRangeToContinuationToken = new HashMap<>();
for (FeedRangeEpkImpl feedRangeEpk : targetFeedRanges) {
partitionKeyRangeToContinuationToken.put(feedRangeEpk,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

package com.azure.cosmos.models;

/**
* Specifies the scope for computing BM25 statistics used by FullTextScore in hybrid search queries.
*/
public enum CosmosFullTextScoreScope {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

enum here is fixed? no changes in future?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💬 Observation · Design: Enum Pattern

Enum pattern is consistent with codebase

CosmosFullTextScoreScope uses a plain Java enum, consistent with CosmosVectorDataType, IndexingMode, and 15 other enums in the Cosmos data plane models package (zero uses of ExpandableStringEnum in the data plane). Since this enum is client-side only (never serialized over the wire), omitting @JsonValue/toString() is acceptable. Per Azure SDK guidelines, plain enums are allowed for input-only types (exception 2).


⚠️ AI-generated review — may be incorrect. Agree? → resolve the conversation. Disagree? → reply with your reasoning.

/**
* Compute BM25 statistics (term frequency, inverse document frequency, and document length)
* across all documents in the container, including all physical and logical partitions.
* This is the default behavior.
*/
GLOBAL,

/**
* Compute BM25 statistics only over the subset of documents within the partition key values
* specified in the query. This is useful for multi-tenant scenarios where scoring should
* reflect statistics that are accurate for a specific tenant's dataset.
*/
LOCAL
}
Loading
Loading