Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions sdk/cosmos/azure-cosmos/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#### Bugs Fixed
* Fixed bug where sdk was encountering a timeout issue caused by infinite recursion during the 410 (Gone) error. See [PR 44770](https://github.com/Azure/azure-sdk-for-python/pull/44770)
* Fixed crash in sync and async clients when `force_refresh_on_startup` was set to `None`, which could surface as `AttributeError: 'NoneType' object has no attribute '_WritableLocations'` during region discovery when `database_account` was `None`. See [PR 44987](https://github.com/Azure/azure-sdk-for-python/pull/44987)
* Fixed bug where unavailable regional endpoints were dropped from the routing list instead of being kept as fallback options. See [PR 45200](https://github.com/Azure/azure-sdk-for-python/pull/45200)

#### Other Changes
* Added tests for multi-language support for full text search. See [PR 44254](https://github.com/Azure/azure-sdk-for-python/pull/44254)
Expand Down
7 changes: 5 additions & 2 deletions sdk/cosmos/azure-cosmos/azure/cosmos/_location_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -524,8 +524,11 @@ def get_preferred_regional_routing_contexts(
else:
regional_endpoints.append(regional_endpoint)

# If all preferred locations are unavailable, honor the preferred list by trying them anyway.
if not regional_endpoints and unavailable_endpoints:
# Always append unavailable endpoints to the end of the list so they can be
# used as a last resort. This ensures that when all healthy endpoints are filtered
# out (e.g., by excluded_locations), the SDK can still fall back to unavailable
# regional endpoints rather than the global endpoint.
if unavailable_endpoints:
regional_endpoints.extend(unavailable_endpoints)
Comment on lines +527 to 532
Copy link

Copilot AI Feb 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This bug fix should be documented in the CHANGELOG.md file. Consider adding an entry under a new unreleased version section (e.g., "### 4.15.1 (Unreleased)") with a "#### Bugs Fixed" subsection describing how unavailable endpoints are now properly retained in the routing list as fallback options instead of being dropped entirely.

Copilot uses AI. Check for mistakes.

# If there are no preferred locations or none of the preferred locations are in the account,
Expand Down
83 changes: 83 additions & 0 deletions sdk/cosmos/azure-cosmos/tests/test_location_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,5 +472,88 @@ def test_write_fallback_to_global_after_regional_retries_exhausted(self):
final_endpoint = lc.resolve_service_endpoint(write_request)
assert final_endpoint == location1_endpoint

def test_unavailable_endpoints_not_dropped_from_routing_list(self):
"""
Unavailable endpoints should be appended to the end of the routing list,
not dropped entirely.

Scenario:
- Customer has preferred_locations = ["East US", "West US 2"]
- East US is marked unavailable for writes
- Customer makes a request with excluded_locations = ["West US 2"]
- Expected: East US should still be available as fallback (unavailable but in the list)
"""
# Setup: Two preferred locations, multi-write enabled
preferred_locations = [location1_name, location2_name]
lc = refresh_location_cache(preferred_locations, use_multiple_write_locations=True)
db_acc = create_database_account(enable_multiple_writable_locations=True)
lc.perform_on_database_account_read(db_acc)

# Verify initial state: Both locations are in write_regional_routing_contexts
write_contexts = lc.get_write_regional_routing_contexts()
assert len(write_contexts) == 2
assert write_contexts[0].get_primary() == location1_endpoint
assert write_contexts[1].get_primary() == location2_endpoint

# Mark location1 (East US) as unavailable for writes
lc.mark_endpoint_unavailable_for_write(location1_endpoint, refresh_cache=True, context="test")

# After marking unavailable, the routing list should still contain
# both endpoints - healthy ones first, unavailable ones at the end
write_contexts_after = lc.get_write_regional_routing_contexts()
assert len(write_contexts_after) == 2, \
f"Expected 2 endpoints in routing list, got {len(write_contexts_after)}. " \
"Unavailable endpoint was incorrectly dropped!"
# location2 (healthy) should be first
assert write_contexts_after[0].get_primary() == location2_endpoint
# location1 (unavailable) should be at the end as fallback
assert write_contexts_after[1].get_primary() == location1_endpoint

# Now simulate the customer request with excluded_locations = ["location2"]
write_request = RequestObject(ResourceType.Document, _OperationType.Create, None)
write_request.excluded_locations = [location2_name]

# Resolve endpoint - should get location1 (unavailable) as the only remaining option
# NOT the global default endpoint!
resolved_endpoint = lc.resolve_service_endpoint(write_request)

# Should fall back to location1 (unavailable regional endpoint)
# NOT the global endpoint
assert resolved_endpoint == location1_endpoint, \
f"Expected {location1_endpoint} but got {resolved_endpoint}. " \
f"Bug: Unavailable endpoint was dropped and SDK fell back to global endpoint!"

def test_unavailable_endpoints_ordering_in_routing_list(self):
"""
Test that healthy endpoints come before unavailable endpoints in the routing list.
This ensures the SDK tries healthy regions first, but has unavailable ones as fallback.
"""
# Setup: Three preferred locations
preferred_locations = [location1_name, location2_name, location3_name]
lc = refresh_location_cache(preferred_locations, use_multiple_write_locations=True)
db_acc = create_database_account(enable_multiple_writable_locations=True)
lc.perform_on_database_account_read(db_acc)

# Mark location1 as unavailable
lc.mark_endpoint_unavailable_for_write(location1_endpoint, refresh_cache=True, context="test")

# Check ordering: location2, location3 (healthy) should come before location1 (unavailable)
write_contexts = lc.get_write_regional_routing_contexts()
assert len(write_contexts) == 3
assert write_contexts[0].get_primary() == location2_endpoint # First healthy
assert write_contexts[1].get_primary() == location3_endpoint # Second healthy
assert write_contexts[2].get_primary() == location1_endpoint # Unavailable at end

# Mark location2 as unavailable too
lc.mark_endpoint_unavailable_for_write(location2_endpoint, refresh_cache=True, context="test")

# Check ordering: location3 (healthy) should come before location1, location2 (unavailable)
write_contexts = lc.get_write_regional_routing_contexts()
assert len(write_contexts) == 3
assert write_contexts[0].get_primary() == location3_endpoint # Only healthy
# Unavailable ones at end, in original preferred order
assert write_contexts[1].get_primary() == location1_endpoint
assert write_contexts[2].get_primary() == location2_endpoint

if __name__ == "__main__":
unittest.main()
Loading