Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
12ada5e
[GATEWAY V2]: Fix NPE in ThinClientStoreModel.wrapInHttpRequest for h…
jeet1995 Mar 16, 2026
ba81686
Fix ConsistencyLevel.valueOf for wire-format values like BoundedStale…
jeet1995 Mar 17, 2026
733cc2a
Skip DIRECT-only query FI tests under thin client mode
jeet1995 Mar 17, 2026
5ce602a
Skip only baseline DIRECT tests under thin client; keep availability …
jeet1995 Mar 17, 2026
ad302b9
Revert FI test skip logic — availability strategy works for all conne…
jeet1995 Mar 17, 2026
ec9e42e
Skip DIRECT mode FI tests under thin client + HTTP/2 in base execute()
jeet1995 Mar 17, 2026
69be65a
Fix: drop HTTP/2 requirement from thin client DIRECT skip
jeet1995 Mar 17, 2026
ea5c56d
Enable HTTP/2 for ThinClient_MultiMaster pipeline job; restore full s…
jeet1995 Mar 17, 2026
5937fa6
Add explanatory comment for DIRECT skip; enable HTTP/2 for ThinClient…
jeet1995 Mar 17, 2026
90b2f92
Fix RegionalRoutingContext map key issues breaking cross-region routi…
jeet1995 Mar 17, 2026
38871f0
Revert LocationCache changes; add thin client test coverage for circu…
jeet1995 Mar 17, 2026
3cd349e
Fix RegionalRoutingContext.toString() to use only gatewayRegionalEndp…
jeet1995 Mar 17, 2026
1ec8d1d
Skip DIRECT mode tests in PPAF and circuit breaker when thin client e…
jeet1995 Mar 17, 2026
1383303
Fix PPAF GATEWAY tests for thin client: replace thinProxy with gatewa…
jeet1995 Mar 18, 2026
8cad50d
Revert pom.xml sys properties; add assertThinClientEndpointUsed
jeet1995 Mar 18, 2026
d99ecbe
Fix assertThinClientEndpointUsed: skip for exception responses
jeet1995 Mar 18, 2026
d80e61b
Fix assertThinClientEndpointUsed in recovery flow too
jeet1995 Mar 18, 2026
187b76c
Increase e2e timeout by 500ms for thin client FI tests
jeet1995 Mar 18, 2026
4a7c60d
Merge remote-tracking branch 'upstream/main' into AzCosmos_FixCloneMi…
jeet1995 Mar 18, 2026
6055d00
Address PR review comments
jeet1995 Mar 18, 2026
20b9783
Fix ChangeFeedProcessor isStarted race from CI optimization
jeet1995 Mar 18, 2026
b6dd784
Fix broken benchmark WorkflowTest CLI tests
jeet1995 Mar 18, 2026
a4c7c6b
Merge upstream/main and resolve conflicts
jeet1995 Mar 18, 2026
ba7a200
Fix ChangeFeed processor flaky test regression
jeet1995 Mar 19, 2026
a0a0dae
Address review: CHANGELOG format, RegionalRoutingContext comment, Tes…
jeet1995 Mar 19, 2026
b0bbe4b
Restore ChangeFeed processor test fix
jeet1995 Mar 19, 2026
ae1ee98
Remove unused imports and constants after assertThinClientEndpointUse…
jeet1995 Mar 19, 2026
423acef
Address review: unit test for RRC identity, richer toString, null col…
jeet1995 Mar 19, 2026
da8ea88
Add DR drill timechart images for PR #48432 failover regression test
jeet1995 Mar 19, 2026
fda428c
Revert "Add DR drill timechart images for PR #48432 failover regressi…
jeet1995 Mar 19, 2026
43ae2a4
Add DR drill timechart images for PR #48432
jeet1995 Mar 19, 2026
12aac15
Update DR drill charts: scope to Document ResourceType, annotate writ…
jeet1995 Mar 19, 2026
a2a27e9
Add success timechart for DR drill report
jeet1995 Mar 19, 2026
1abccf4
Update DR drill charts with skill-test results
jeet1995 Mar 20, 2026
e34dbae
Update DR drill charts with v2 skill-test results (full completion)
jeet1995 Mar 20, 2026
46f68b8
Add success timechart for v2 DR drill
jeet1995 Mar 20, 2026
584907a
Add separate write and read success timecharts for v2 DR drill
jeet1995 Mar 20, 2026
461303c
Regenerate success charts with verified Kusto data
jeet1995 Mar 20, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import reactor.core.publisher.Flux;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.UUID;

Expand All @@ -37,7 +36,6 @@ public class CosmosNotFoundTests extends FaultInjectionTestBase {

private static final Logger logger = LoggerFactory.getLogger(CosmosNotFoundTests.class);

private static final String thinClientEndpointIndicator = ":10250/";
private static final ImplementationBridgeHelpers.CosmosAsyncClientHelper.CosmosAsyncClientAccessor accessor =
ImplementationBridgeHelpers.CosmosAsyncClientHelper.getCosmosAsyncClientAccessor();

Expand Down Expand Up @@ -602,30 +600,4 @@ public void performBulkOnDeletedContainerWithGatewayV2() throws InterruptedExcep
// System.clearProperty("COSMOS.THINCLIENT_ENABLED");
}
}

private static void assertThinClientEndpointUsed(CosmosDiagnostics diagnostics) {
AssertionsForClassTypes.assertThat(diagnostics).isNotNull();

CosmosDiagnosticsContext ctx = diagnostics.getDiagnosticsContext();
AssertionsForClassTypes.assertThat(ctx).isNotNull();

Collection<CosmosDiagnosticsRequestInfo> requests = ctx.getRequestInfo();
AssertionsForClassTypes.assertThat(requests).isNotNull();
AssertionsForClassTypes.assertThat(requests.size()).isPositive();

for (CosmosDiagnosticsRequestInfo requestInfo : requests) {
logger.info(
"Endpoint: {}, RequestType: {}, Partition: {}/{}, ActivityId: {}",
requestInfo.getEndpoint(),
requestInfo.getRequestType(),
requestInfo.getPartitionId(),
requestInfo.getPartitionKeyRangeId(),
requestInfo.getActivityId());
if (requestInfo.getEndpoint().contains(thinClientEndpointIndicator)) {
return;
}
}

fail("No request targeting thin client proxy endpoint.");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
public class FITests_queryAfterCreation
extends FaultInjectionWithAvailabilityStrategyTestsBase {

@Test(groups = {"fi-multi-master"}, dataProvider = "testConfigs_queryAfterCreation", retryAnalyzer = SuperFlakyTestRetryAnalyzer.class)
@Test(groups = {"fi-multi-master", "fi-thinclient-multi-master"}, dataProvider = "testConfigs_queryAfterCreation", retryAnalyzer = SuperFlakyTestRetryAnalyzer.class)
public void queryAfterCreation(
String testCaseId,
Duration endToEndTimeout,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
public class FITests_readAfterCreation
extends FaultInjectionWithAvailabilityStrategyTestsBase {

@Test(groups = {"fi-multi-master"}, dataProvider = "testConfigs_readAfterCreation", retryAnalyzer = SuperFlakyTestRetryAnalyzer.class)
@Test(groups = {"fi-multi-master", "fi-thinclient-multi-master"}, dataProvider = "testConfigs_readAfterCreation", retryAnalyzer = SuperFlakyTestRetryAnalyzer.class)
public void readAfterCreation(
String testCaseId,
Duration endToEndTimeout,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import java.util.function.Function;

public class FITests_readAllAfterCreation extends FaultInjectionWithAvailabilityStrategyTestsBase {
@Test(groups = {"fi-multi-master"}, dataProvider = "testConfigs_readAllAfterCreation", retryAnalyzer = FlakyTestRetryAnalyzer.class)
@Test(groups = {"fi-multi-master", "fi-thinclient-multi-master"}, dataProvider = "testConfigs_readAllAfterCreation", retryAnalyzer = FlakyTestRetryAnalyzer.class)
public void readAllAfterCreation(
String testCaseId,
Duration endToEndTimeout,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
public class FITests_readManyAfterCreation
extends FaultInjectionWithAvailabilityStrategyTestsBase {

@Test(groups = {"fi-multi-master"}, dataProvider = "testConfigs_readManyAfterCreation", retryAnalyzer = FlakyTestRetryAnalyzer.class)
@Test(groups = {"fi-multi-master", "fi-thinclient-multi-master"}, dataProvider = "testConfigs_readManyAfterCreation", retryAnalyzer = FlakyTestRetryAnalyzer.class)
public void readManyAfterCreation(
String testCaseId,
Duration endToEndTimeout,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
public class FITests_writeAfterCreate
extends FaultInjectionWithAvailabilityStrategyTestsBase {

@Test(groups = {"fi-multi-master"}, dataProvider = "testConfigs_writeAfterCreation", retryAnalyzer = FlakyTestRetryAnalyzer.class)
@Test(groups = {"fi-multi-master", "fi-thinclient-multi-master"}, dataProvider = "testConfigs_writeAfterCreation", retryAnalyzer = FlakyTestRetryAnalyzer.class)
public void writeAfterCreation(
String testCaseId,
Duration endToEndTimeout,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import com.azure.cosmos.implementation.apachecommons.lang.tuple.ImmutablePair;
import com.azure.cosmos.implementation.apachecommons.lang.tuple.Pair;
import com.azure.cosmos.implementation.directconnectivity.ReflectionUtils;
import org.testng.SkipException;
import com.azure.cosmos.models.CosmosClientTelemetryConfig;
import com.azure.cosmos.models.CosmosContainerProperties;
import com.azure.cosmos.models.CosmosItemIdentity;
Expand Down Expand Up @@ -204,7 +205,7 @@ public String resolveTestNameSuffix(Object[] row) {
return (String)row[0];
}

@BeforeClass(groups = { "fi-multi-master" })
@BeforeClass(groups = { "fi-multi-master", "fi-thinclient-multi-master" })
public void beforeClass() {
CosmosClientBuilder clientBuilder = new CosmosClientBuilder()
.endpoint(TestConfigurations.HOST)
Expand Down Expand Up @@ -337,7 +338,7 @@ public void beforeClass() {
safeClose(dummyClient);
}
}
@AfterClass(groups = { "fi-multi-master" })
@AfterClass(groups = { "fi-multi-master", "fi-thinclient-multi-master" })
public void afterClass() {
CosmosClientBuilder clientBuilder = new CosmosClientBuilder()
.endpoint(TestConfigurations.HOST)
Expand Down Expand Up @@ -4882,14 +4883,29 @@ protected void execute(
ConnectionMode connectionMode,
boolean shouldInjectPreferredRegionsInClient) {

// When thin client + HTTP/2 are enabled, all requests route through the thin client
// gateway proxy — DIRECT mode is not exercised. Skip DIRECT mode tests.
if (Configs.isThinClientEnabled() && Configs.isHttp2Enabled() && connectionMode == ConnectionMode.DIRECT) {
throw new SkipException(
"Skipping DIRECT mode test '" + testCaseId + "' — thin client forces GATEWAY mode");
}

// Test two cases here:
// - the endToEndOperationLatencyPolicyConfig is being configured on the client only
// - the endToEndOperationLatencyPolicyConfig is being configured on the request options only
for (boolean e2eTimeoutPolicyOnClient : Arrays.asList(Boolean.TRUE, Boolean.FALSE)) {
logger.info("START {}, e2eTimeoutPolicyOnClient {}", testCaseId, e2eTimeoutPolicyOnClient);

// Thin client adds ~500ms overhead for container + partition key range cache lookups
// through the RNTBD-encoded thin client proxy path. Increase e2e timeout to avoid
// spurious 408 (OperationCancelled) failures with tight timeouts.
Duration effectiveEndToEndTimeout = endToEndTimeout;
if (Configs.isThinClientEnabled() && Configs.isHttp2Enabled() && endToEndTimeout != null) {
effectiveEndToEndTimeout = endToEndTimeout.plusMillis(500);
}

CosmosEndToEndOperationLatencyPolicyConfigBuilder e2ePolicyBuilder =
new CosmosEndToEndOperationLatencyPolicyConfigBuilder(endToEndTimeout)
new CosmosEndToEndOperationLatencyPolicyConfigBuilder(effectiveEndToEndTimeout)
.enable(true);
CosmosEndToEndOperationLatencyPolicyConfig endToEndOperationLatencyPolicyConfig =
availabilityStrategy != null
Expand Down Expand Up @@ -5017,6 +5033,14 @@ protected void execute(
ctxValidation.accept(currentCtx);
}
}

// When thin client + HTTP/2 are enabled (fi-thinclient-multi-master / fi-thinclient-multi-region)
// and connection mode is GATEWAY, validate that requests targeted the thin client proxy endpoint
if (Configs.isThinClientEnabled() && Configs.isHttp2Enabled() && connectionMode == ConnectionMode.GATEWAY) {
for (CosmosDiagnosticsContext diagnosticsContext : diagnosticsContexts) {
assertThinClientEndpointUsed(diagnosticsContext);
}
}
} catch (Exception e) {
if (e instanceof CosmosException) {
CosmosException cosmosException = Utils.as(e, CosmosException.class);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,7 @@ public Object[][] ppafDynamicEnablement503Only() {
};
}

@BeforeClass(groups = {"multi-region"})
@BeforeClass(groups = {"multi-region", "fi-thinclient-multi-region"})
public void beforeClass() {
this.sharedClient = getClientBuilder().buildAsyncClient();

Expand All @@ -484,7 +484,7 @@ public void beforeClass() {
this.accountLevelLocationReadableLocationContext = getAccountLevelLocationContext(databaseAccountSnapshot, false);
}

@AfterClass(groups = {"multi-region"})
@AfterClass(groups = {"multi-region", "fi-thinclient-multi-region"})
public void afterClass() throws InterruptedException {
safeClose(this.sharedClient);
System.gc();
Expand Down Expand Up @@ -1258,7 +1258,7 @@ public Object[][] ppafTestConfigsWithWriteOps() {
* <li>Success vs failure based on phase and configuration.</li>
* </ul>
*/
@Test(groups = {"multi-region"}, dataProvider = "ppafTestConfigsWithWriteOps")
@Test(groups = {"multi-region", "fi-thinclient-multi-region"}, dataProvider = "ppafTestConfigsWithWriteOps")
public void testPpafWithWriteFailoverWithEligibleErrorStatusCodes(
String testType,
OperationType operationType,
Expand All @@ -1279,6 +1279,11 @@ public void testPpafWithWriteFailoverWithEligibleErrorStatusCodes(
throw new SkipException(String.format("Test with type : %s not eligible for specified connection mode %s.", testType, connectionMode));
}

// Thin client only supports GATEWAY mode - skip DIRECT mode tests
if (connectionMode == ConnectionMode.DIRECT && Configs.isThinClientEnabled() && Configs.isHttp2Enabled()) {
throw new SkipException("DIRECT connection mode is not supported with thin client - skipping.");
}

if (connectionMode == ConnectionMode.DIRECT) {

TransportClient transportClientMock = Mockito.mock(TransportClient.class);
Expand Down Expand Up @@ -1439,10 +1444,20 @@ public void testPpafWithWriteFailoverWithEligibleErrorStatusCodes(
assertThat(preferredRegions.size()).isGreaterThanOrEqualTo(1);

String regionWithIssues = preferredRegions.get(0);
URI locationEndpointWithIssues = new URI(readableRegionNameToEndpoint.get(regionWithIssues) + "dbs/" + this.sharedDatabase.getId() + "/colls/" + this.sharedSinglePartitionContainer.getId() + "/docs");
String baseEndpoint = readableRegionNameToEndpoint.get(regionWithIssues);

URI locationEndpointWithIssues = new URI(baseEndpoint + "dbs/" + this.sharedDatabase.getId() + "/colls/" + this.sharedSinglePartitionContainer.getId() + "/docs");

ReflectionUtils.setGatewayHttpClient(rxStoreModel, mockedHttpClient);

// When thin client is enabled, data requests route through thinProxy (ThinClientStoreModel)
// which uses RNTBD binary encoding — incompatible with standard HTTP mock responses.
// Replace thinProxy with gatewayProxy so data requests use the same mocked HttpClient
// with standard HTTP encoding. PPAF retry/failover logic is transport-agnostic.
if (Configs.isThinClientEnabled() && Configs.isHttp2Enabled()) {
ReflectionUtils.setThinProxy(rxDocumentClient, rxStoreModel);
}

setupHttpClientToReturnSuccessResponse(mockedHttpClient, operationType, databaseAccount, successStatusCode);

CosmosException cosmosException = createCosmosException(
Expand Down Expand Up @@ -1521,7 +1536,7 @@ public void testPpafWithWriteFailoverWithEligibleErrorStatusCodes(
* <p>Expectations are provided by the data provider: when disabled, the request should not succeed;
* when enabled, it should succeed. Works for both DIRECT and GATEWAY connection modes.</p>
*/
@Test(groups = {"multi-region"}, dataProvider = "ppafDynamicEnablement503Only")
@Test(groups = {"multi-region", "fi-thinclient-multi-region"}, dataProvider = "ppafDynamicEnablement503Only")
public void testPpafWithWriteFailoverWithEligibleErrorStatusCodesWithPpafDynamicEnablement(
String testType,
OperationType operationType,
Expand All @@ -1542,6 +1557,11 @@ public void testPpafWithWriteFailoverWithEligibleErrorStatusCodesWithPpafDynamic
throw new SkipException(String.format("Test with type : %s not eligible for specified connection mode %s.", testType, connectionMode));
}

// Thin client only supports GATEWAY mode - skip DIRECT mode tests
if (connectionMode == ConnectionMode.DIRECT && Configs.isThinClientEnabled() && Configs.isHttp2Enabled()) {
throw new SkipException("DIRECT connection mode is not supported with thin client - skipping.");
}

// DIRECT flow: swap transport client, inject error for primary region/PK range, and verify phase-by-phase
if (connectionMode == ConnectionMode.DIRECT) {
TransportClient transportClientMock = Mockito.mock(TransportClient.class);
Expand Down Expand Up @@ -1690,11 +1710,21 @@ public void testPpafWithWriteFailoverWithEligibleErrorStatusCodesWithPpafDynamic
assertThat(preferredRegions.size()).isGreaterThanOrEqualTo(1);

String regionWithIssues = preferredRegions.get(0);
URI locationEndpointWithIssues = new URI(readableRegionNameToEndpoint.get(regionWithIssues) + "dbs/" + this.sharedDatabase.getId() + "/colls/" + this.sharedSinglePartitionContainer.getId() + "/docs");
String baseEndpoint = readableRegionNameToEndpoint.get(regionWithIssues);

URI locationEndpointWithIssues = new URI(baseEndpoint + "dbs/" + this.sharedDatabase.getId() + "/colls/" + this.sharedSinglePartitionContainer.getId() + "/docs");

// Redirect gateway calls through our mocked HttpClient
ReflectionUtils.setGatewayHttpClient(rxStoreModel, mockedHttpClient);

// When thin client is enabled, data requests route through thinProxy (ThinClientStoreModel)
// which uses RNTBD binary encoding — incompatible with standard HTTP mock responses.
// Replace thinProxy with gatewayProxy so data requests use the same mocked HttpClient
// with standard HTTP encoding. PPAF retry/failover logic is transport-agnostic.
if (Configs.isThinClientEnabled() && Configs.isHttp2Enabled()) {
ReflectionUtils.setThinProxy(rxDocumentClient, rxStoreModel);
}

setupHttpClientToReturnSuccessResponse(mockedHttpClient, operationType, databaseAccountForResponses, successStatusCode);

CosmosException cosmosException = createCosmosException(
Expand Down Expand Up @@ -1788,7 +1818,7 @@ public void testPpafWithWriteFailoverWithEligibleErrorStatusCodesWithPpafDynamic
* <p>Dynamic enablement is achieved by overriding GlobalEndpointManager's owner to
* inject the PPAF flag into DatabaseAccount snapshots.</p>
*/
@Test(groups = {"multi-region"}, dataProvider = "ppafNonWriteDynamicEnablementScenarios")
@Test(groups = {"multi-region", "fi-thinclient-multi-region"}, dataProvider = "ppafNonWriteDynamicEnablementScenarios")
public void testFailoverBehaviorForNonWriteOperationsWithPpafDynamicEnablement(
String testType,
OperationType operationType,
Expand All @@ -1804,6 +1834,11 @@ public void testFailoverBehaviorForNonWriteOperationsWithPpafDynamicEnablement(
throw new SkipException(String.format("Test with type : %s not eligible for specified connection mode %s.", testType, connectionMode));
}

// Thin client only supports GATEWAY mode - skip DIRECT mode tests
if (connectionMode == ConnectionMode.DIRECT && Configs.isThinClientEnabled() && Configs.isHttp2Enabled()) {
throw new SkipException("DIRECT connection mode is not supported with thin client - skipping.");
}

final int consecutiveFaults = 10;

// ===================== DIRECT MODE PATH =====================
Expand Down Expand Up @@ -2060,6 +2095,15 @@ public void testFailoverBehaviorForNonWriteOperationsWithPpafDynamicEnablement(
expectedDuringWindow,
expectedAfterWindow);

// Validate thin client endpoint was used when thin client is enabled
if (Configs.isThinClientEnabled() && Configs.isHttp2Enabled()) {
ResponseWrapper<?> probeResponse = dataPlaneOperation.apply(params);
CosmosDiagnostics diag = extractDiagnostics(probeResponse);
if (diag != null) {
assertThinClientEndpointUsed(diag.getDiagnosticsContext());
}
}

} catch (Exception e) {
Assertions.fail("The test ran into an exception {}", e);
} finally {
Expand Down Expand Up @@ -2089,6 +2133,19 @@ private void runHedgingPhasesForNonWrite(
this.validateExpectedResponseCharacteristics.accept(postWindow, expectedAfterWindow);
}

private static CosmosDiagnostics extractDiagnostics(ResponseWrapper<?> response) {
if (response.cosmosItemResponse != null) {
return response.cosmosItemResponse.getDiagnostics();
} else if (response.feedResponse != null) {
return response.feedResponse.getCosmosDiagnostics();
} else if (response.cosmosException != null) {
return response.cosmosException.getDiagnostics();
} else if (response.batchResponse != null) {
return response.batchResponse.getDiagnostics();
}
return null;
}

private static class DelegatingDatabaseAccountManagerInternal implements DatabaseAccountManagerInternal {
private final DatabaseAccountManagerInternal delegate;
private final AtomicReference<Boolean> ppafEnabledRef;
Expand Down
Loading