From b1e5ce513b659ce2fbc9b92fa6682b6a6aeb934c Mon Sep 17 00:00:00 2001 From: Anshum Gupta Date: Wed, 11 Feb 2026 01:58:38 -0800 Subject: [PATCH] SOLR-18086: Fix LBSolrClient behavior in case of zombie nodes --- changelog/unreleased/SOLR-18086.yml | 9 +++ .../solr/client/solrj/impl/LBSolrClient.java | 1 + .../client/solrj/impl/LBSolrClientTest.java | 55 +++++++++++++++---- 3 files changed, 54 insertions(+), 11 deletions(-) create mode 100644 changelog/unreleased/SOLR-18086.yml diff --git a/changelog/unreleased/SOLR-18086.yml b/changelog/unreleased/SOLR-18086.yml new file mode 100644 index 000000000000..b03e85dfd94f --- /dev/null +++ b/changelog/unreleased/SOLR-18086.yml @@ -0,0 +1,9 @@ +# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc +title: Stale EndpointIterator state causes unexpected order when there are zombie servers in LBSolrClient +type: fixed # added, changed, fixed, deprecated, removed, dependency_update, security, other +authors: + - name: Chris Hostetter + - name: Anshum Gupta +links: + - name: SOLR-18086 + url: https://issues.apache.org/jira/browse/SOLR-18086 diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBSolrClient.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBSolrClient.java index 74e55e9629d6..8b39d0691882 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBSolrClient.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBSolrClient.java @@ -383,6 +383,7 @@ private void fetchNext() { skipped.add(wrapper.getEndpoint()); } } + endpoint = null; continue; } diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/LBSolrClientTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/LBSolrClientTest.java index 60caf7059dfd..1c6ec34f0945 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/LBSolrClientTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/LBSolrClientTest.java @@ -96,20 +96,53 @@ public void testServerIterator() throws SolrServerException { public void testServerIteratorWithZombieServers() throws SolrServerException { HashMap zombieServers = new HashMap<>(); LBSolrClient.Req req = new LBSolrClient.Req(new QueryRequest(), SOLR_ENDPOINTS); - LBSolrClient.EndpointIterator endpointIterator = + + // pick a random number of endpoints to mark as zombies + final int numToShuffle = random().nextInt(SOLR_ENDPOINTS.size()); + // yes, this might pick the same endpoint multiple times; fine for our purposes + for (int i = 0; i < numToShuffle; i++) { + final LBSolrClient.Endpoint z = SOLR_ENDPOINTS.get(random().nextInt(SOLR_ENDPOINTS.size())); + zombieServers.put(z.getUrl(), new LBSolrClient.EndpointWrapper(z)); + } + // Try those on the Zombie list after all other possibilities are exhausted. + final List expectedOrder = new ArrayList<>(SOLR_ENDPOINTS); + for (LBSolrClient.Endpoint e : SOLR_ENDPOINTS) { + if (zombieServers.containsKey(e.getUrl())) { + expectedOrder.remove(e); + expectedOrder.add(e); + } + } + + final LBSolrClient.EndpointIterator endpointIterator = new LBSolrClient.EndpointIterator(req, zombieServers); - zombieServers.put("2", new LBSolrClient.EndpointWrapper(new LBSolrClient.Endpoint("2"))); + final List actualOrder = new ArrayList<>(); + while (endpointIterator.hasNext()) { + actualOrder.add(endpointIterator.nextOrError()); + } + assertFalse(endpointIterator.hasNext()); // sanity check double call + assertEquals("randomZombies(" + zombieServers.keySet() + ")", expectedOrder, actualOrder); + LuceneTestCase.expectThrows(SolrServerException.class, endpointIterator::nextOrError); + } - assertTrue(endpointIterator.hasNext()); - assertEquals(new LBSolrClient.Endpoint("1"), endpointIterator.nextOrError()); - assertTrue(endpointIterator.hasNext()); - assertEquals(new LBSolrClient.Endpoint("3"), endpointIterator.nextOrError()); - assertTrue(endpointIterator.hasNext()); - assertEquals(new LBSolrClient.Endpoint("4"), endpointIterator.nextOrError()); + @Test + public void testServerIteratorWithAllZombies() throws SolrServerException { + HashMap zombieServers = new HashMap<>(); + LBSolrClient.Req req = new LBSolrClient.Req(new QueryRequest(), SOLR_ENDPOINTS); + for (LBSolrClient.Endpoint e : SOLR_ENDPOINTS) { + zombieServers.put(e.getBaseUrl(), new LBSolrClient.EndpointWrapper(e)); + } - // Try those on the Zombie list after all other possibilities are exhausted. - assertTrue(endpointIterator.hasNext()); - assertEquals(new LBSolrClient.Endpoint("2"), endpointIterator.nextOrError()); + final LBSolrClient.EndpointIterator endpointIterator = + new LBSolrClient.EndpointIterator(req, zombieServers); + // if everyone is a zombie, then the original sorted preference order of + // endpoints should still be respected + final List actualOrder = new ArrayList<>(); + while (endpointIterator.hasNext()) { + actualOrder.add(endpointIterator.nextOrError()); + } + assertFalse(endpointIterator.hasNext()); // sanity check double call + assertEquals(SOLR_ENDPOINTS, actualOrder); + LuceneTestCase.expectThrows(SolrServerException.class, endpointIterator::nextOrError); } @Test