Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
5a4b74d
Cosmos Java: share PartitionKeyRangeCache across CosmosClients target…
xinlian12 Jun 18, 2026
f3fa638
Cosmos Java: use URI (not String) as registry key for case-insensitiv…
xinlian12 Jun 19, 2026
75f93d5
Cosmos Java: keep registry key as serviceEndpoint URI (not _rid) for …
xinlian12 Jun 19, 2026
05f6780
Cosmos Java: add PhantomReference-based leak safety net for unclosed …
xinlian12 Jun 19, 2026
cbd47a8
Cosmos Java: use azure-core ReferenceManager for leaked-client safety…
xinlian12 Jun 19, 2026
9b43616
remove kafka test output
xinlian12 Jun 19, 2026
892a7d7
Cosmos Java: trim comments to core logic; drop cross-SDK references
xinlian12 Jun 19, 2026
1a5e92d
Cosmos Java: address PR review feedback
xinlian12 Jun 19, 2026
9356fbc
Retrigger CI
xinlian12 Jun 19, 2026
3afed67
Retrigger CI
xinlian12 Jun 19, 2026
39913c6
Cosmos Java: clarify 2-arg RxPartitionKeyRangeCache ctor behavior
xinlian12 Jun 19, 2026
022836d
Retrigger CI
xinlian12 Jun 19, 2026
c3eaba8
Retrigger CI
xinlian12 Jun 19, 2026
d585e36
Retrigger CI
xinlian12 Jun 20, 2026
7285d1c
Remove 2-arg RxPartitionKeyRangeCache ctor; require explicit endpoint
xinlian12 Jun 22, 2026
d6494d8
Key shared PartitionKeyRangeCache registry by database account id
xinlian12 Jun 22, 2026
7a693fa
Add e2e tests for shared PartitionKeyRangeCache + refresh Configs com…
xinlian12 Jun 22, 2026
4fad3a9
Address PR review on e2e test: data provider, TestObject, public acce…
xinlian12 Jun 22, 2026
a806ee6
Fix two regressions caught by CI live tests
xinlian12 Jun 23, 2026
5ffb1c0
Revert registry key to service endpoint URI (drop fragile account-id …
xinlian12 Jun 23, 2026
9162bb2
Fix CI: remove unworkable e2e negative test; revert PKR_LOOK_UP diagn…
xinlian12 Jun 23, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.azure.cosmos;

import com.azure.cosmos.implementation.RxDocumentClientImpl;
import com.azure.cosmos.implementation.caches.AsyncCacheNonBlocking;
import com.azure.cosmos.implementation.caches.RxPartitionKeyRangeCache;
import com.azure.cosmos.implementation.caches.SharedPartitionKeyRangeCacheRegistry;
import com.azure.cosmos.implementation.directconnectivity.ReflectionUtils;
import com.azure.cosmos.implementation.routing.CollectionRoutingMap;
import com.azure.cosmos.models.CosmosContainerProperties;
import com.azure.cosmos.models.CosmosItemRequestOptions;
import com.azure.cosmos.models.CosmosItemResponse;
import com.azure.cosmos.models.PartitionKey;
import com.azure.cosmos.models.ThroughputProperties;
import com.azure.cosmos.rx.TestSuiteBase;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Factory;
import org.testng.annotations.Test;

import java.lang.reflect.Method;
import java.net.URI;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;

import static org.assertj.core.api.Assertions.assertThat;

/**
* End-to-end tests for {@link SharedPartitionKeyRangeCacheRegistry}: spin up real
* {@link CosmosAsyncClient} instances, perform partition-key-routed operations to
* populate the routing-map cache, and verify the registry's sharing semantics.
*
* <p>Sharing is keyed by the service endpoint {@link URI} configured on
* {@link CosmosClientBuilder}. Two clients configured with the same endpoint
* URI share the cache; clients configured with different endpoint URIs (e.g.
* the global endpoint vs a regional endpoint of the same logical account) do
* <b>not</b> share — see {@link SharedPartitionKeyRangeCacheRegistry} javadoc
* for the rationale.</p>
*/
public class SharedPartitionKeyRangeCacheE2ETest extends TestSuiteBase {
private static final Logger logger = LoggerFactory.getLogger(SharedPartitionKeyRangeCacheE2ETest.class);

private static final int TIMEOUT = 90_000;
private static final int SETUP_TIMEOUT = 60_000;
private static final int SHUTDOWN_TIMEOUT = 30_000;

private CosmosAsyncClient setupClient;
private CosmosAsyncDatabase database;
private CosmosAsyncContainer container;
private URI serviceEndpoint;

@Factory(dataProvider = "simpleGatewayClient")
public SharedPartitionKeyRangeCacheE2ETest(CosmosClientBuilder clientBuilder) {
super(clientBuilder);
}

@BeforeClass(groups = {"emulator", "fast"}, timeOut = SETUP_TIMEOUT)
public void before() {
this.setupClient = getClientBuilder().buildAsyncClient();
this.database = getSharedCosmosDatabase(this.setupClient);

String containerId = "pkr-share-e2e-" + UUID.randomUUID();
CosmosContainerProperties properties =
new CosmosContainerProperties(containerId, "/mypk");
this.database
.createContainer(properties, ThroughputProperties.createManualThroughput(400))
.block();
this.container = this.database.getContainer(containerId);

this.serviceEndpoint = serviceEndpointOf(this.setupClient);
assertThat(this.serviceEndpoint)
.as("service endpoint must be available after client init")
.isNotNull();
}

@AfterClass(groups = {"emulator", "fast"}, timeOut = SHUTDOWN_TIMEOUT, alwaysRun = true)
public void after() {
if (this.container != null) {
try {
this.container.delete().block();
} catch (Exception e) {
logger.warn("Failed to delete e2e container", e);
}
}
safeClose(this.setupClient);
}

/**
* Two {@link CosmosAsyncClient} instances configured with the same service
* endpoint must share the underlying {@link AsyncCacheNonBlocking} routing-map
* storage, and the registry refcount must reflect both holders.
*/
@Test(groups = {"emulator", "fast"}, timeOut = TIMEOUT)
public void twoClientsOnSameEndpointShareRoutingMapStorage() {
CosmosAsyncClient clientA = null;
CosmosAsyncClient clientB = null;
try {
clientA = getClientBuilder().buildAsyncClient();
clientB = getClientBuilder().buildAsyncClient();

// Trigger PK-routed operations on both clients so the routing-map cache populates.
TestObject seed = TestObject.create();
createItem(clientA, seed);
readItemSilently(clientA, seed.getMypk());
readItemSilently(clientB, seed.getMypk());

AsyncCacheNonBlocking<String, CollectionRoutingMap> storageA = routingMapStorageOf(clientA);
AsyncCacheNonBlocking<String, CollectionRoutingMap> storageB = routingMapStorageOf(clientB);

assertThat(storageA)
.as("Two CosmosAsyncClients on the same endpoint must share the routing-map AsyncCacheNonBlocking instance")
.isSameAs(storageB);

int refCount = registryReferenceCount(this.serviceEndpoint);
assertThat(refCount)
.as("Registry refcount for endpoint [%s] must include both clients", this.serviceEndpoint)
.isGreaterThanOrEqualTo(2);

ConcurrentHashMap<String, ?> values =
ReflectionUtils.getValueMapNonBlockingCache(storageA);
assertThat(values)
.as("Routing-map cache must contain at least one entry after PK-routed reads")
.isNotEmpty();
} finally {
int refCountBeforeClose = registryReferenceCount(this.serviceEndpoint);
safeClose(clientA);
int refCountAfterFirstClose = registryReferenceCount(this.serviceEndpoint);
assertThat(refCountAfterFirstClose)
.as("Closing one client must drop the registry refcount by exactly one")
.isEqualTo(refCountBeforeClose - 1);

safeClose(clientB);
int refCountAfterSecondClose = registryReferenceCount(this.serviceEndpoint);
assertThat(refCountAfterSecondClose)
.as("Closing both test clients must drop refcount by two (setup client may still hold a reference)")
.isEqualTo(refCountBeforeClose - 2);
}
}

// --- helpers ----------------------------------------------------------------

private void createItem(CosmosAsyncClient client, TestObject item) {
CosmosAsyncContainer c = client
.getDatabase(this.database.getId())
.getContainer(this.container.getId());
c.createItem(item, new PartitionKey(item.getMypk()), new CosmosItemRequestOptions()).block();
}

private void readItemSilently(CosmosAsyncClient client, String pk) {
// The cache is populated by the resolve step regardless of whether the doc exists;
// we issue a random-id read and tolerate 404.
CosmosAsyncContainer c = client
.getDatabase(this.database.getId())
.getContainer(this.container.getId());
try {
CosmosItemResponse<TestObject> resp = c.readItem(
UUID.randomUUID().toString(),
new PartitionKey(pk),
new CosmosItemRequestOptions(),
TestObject.class).block();
assertThat(resp).isNotNull();
} catch (CosmosException ex) {
if (ex.getStatusCode() != 404) {
throw ex;
}
}
}

private static AsyncCacheNonBlocking<String, CollectionRoutingMap> routingMapStorageOf(CosmosAsyncClient client) {
RxDocumentClientImpl rxDocumentClient =
(RxDocumentClientImpl) CosmosBridgeInternal.getAsyncDocumentClient(client);
RxPartitionKeyRangeCache partitionKeyRangeCache = rxDocumentClient.getPartitionKeyRangeCache();
return ReflectionUtils.getRoutingMapAsyncCacheNonBlocking(partitionKeyRangeCache);
}

private static URI serviceEndpointOf(CosmosAsyncClient client) {
RxDocumentClientImpl rxDocumentClient =
(RxDocumentClientImpl) CosmosBridgeInternal.getAsyncDocumentClient(client);
return rxDocumentClient.getServiceEndpoint();
}

/**
* The registry's {@code referenceCount} accessor is package-private (test-only).
* Reflect into it from this package; widening visibility for a test-only check
* would pollute the implementation class's surface.
*/
private static int registryReferenceCount(URI endpoint) {
try {
SharedPartitionKeyRangeCacheRegistry registry = SharedPartitionKeyRangeCacheRegistry.getInstance();
Method m = SharedPartitionKeyRangeCacheRegistry.class.getDeclaredMethod("referenceCount", URI.class);
m.setAccessible(true);
return (Integer) m.invoke(registry, endpoint);
} catch (ReflectiveOperationException e) {
throw new RuntimeException("Failed to reflect SharedPartitionKeyRangeCacheRegistry.referenceCount", e);
}
}
}
Loading
Loading