From 784ba1e86e74e731a752eb9feaad764742e09960 Mon Sep 17 00:00:00 2001
From: ZhenyuLi <893652269@qq.com>
Date: Wed, 25 Mar 2026 15:28:15 -0400
Subject: [PATCH] ZOOKEEPER-5033: Quorum SASL authentication fails permanently
after Login TGT refresh thread exits
Add forceReLogin() to Login that re-logins immediately without the
minimum time check, and call it from SaslQuorumAuthLearner and
SaslQuorumAuthServer when authentication fails. This ensures the next
authentication attempt uses fresh credentials after TGT expiration.
---
.../main/java/org/apache/zookeeper/Login.java | 38 +++++
.../quorum/auth/SaslQuorumAuthLearner.java | 21 +++
.../quorum/auth/SaslQuorumAuthServer.java | 10 ++
.../auth/SaslQuorumAuthReLoginTest.java | 161 ++++++++++++++++++
4 files changed, 230 insertions(+)
create mode 100644 zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/auth/SaslQuorumAuthReLoginTest.java
diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/Login.java b/zookeeper-server/src/main/java/org/apache/zookeeper/Login.java
index 2c483a5f74b..ac6762b16a6 100644
--- a/zookeeper-server/src/main/java/org/apache/zookeeper/Login.java
+++ b/zookeeper-server/src/main/java/org/apache/zookeeper/Login.java
@@ -422,6 +422,44 @@ public long getLastLogin() {
return lastLogin;
}
+ /**
+ * Force a re-login, bypassing the minimum time check and the Kerberos ticket check.
+ * This is used when authentication fails and fresh credentials are needed immediately,
+ * regardless of the authentication mechanism (Kerberos or DIGEST-MD5).
+ *
+ *
Unlike {@link #reLogin()}, this method:
+ *
+ * - Does not check {@code isKrbTicket} — works for all SASL mechanisms
+ * - Does not check {@code hasSufficientTimeElapsed} — allows immediate retry
+ * - Clears stale credentials from the Subject before re-login
+ *
+ *
+ * @throws javax.security.auth.login.LoginException on a failure
+ */
+ public synchronized void forceReLogin() throws LoginException {
+ LoginContext lc = getLogin();
+ if (lc == null) {
+ throw new LoginException("login must be done first");
+ }
+ LOG.info("Forcing re-login for {}", getUserName());
+ synchronized (Login.class) {
+ // First try the regular logout to clean up mechanism-specific state
+ logout();
+ // Clear any remaining credentials that logout may not have handled.
+ // For DIGEST-MD5, DigestLoginModule.logout() is a no-op and does not
+ // remove credentials from the Subject. We must clear them explicitly
+ // to avoid stale credentials being picked up on the next authentication
+ // attempt (SecurityUtils.createSaslClient uses toArray()[0]).
+ subject.getPrivateCredentials().clear();
+ subject.getPublicCredentials().clear();
+ // Re-login to get fresh credentials
+ lc = new LoginContext(loginContextName, subject, newCallbackHandler());
+ lc.login();
+ setLogin(lc);
+ }
+ setLastLogin(Time.currentElapsedTime());
+ }
+
/**
* Re-login a principal. This method assumes that {@link #login(String)} has happened already.
* @throws javax.security.auth.login.LoginException on a failure
diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/auth/SaslQuorumAuthLearner.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/auth/SaslQuorumAuthLearner.java
index 3151a57567d..9d522d9ad7c 100644
--- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/auth/SaslQuorumAuthLearner.java
+++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/auth/SaslQuorumAuthLearner.java
@@ -133,6 +133,22 @@ public void authenticate(Socket sock, String hostName) throws IOException {
// Validate status code at the end of authentication exchange.
checkAuthStatus(sock, qpStatus);
+ } catch (SaslException e) {
+ // Authentication failed. Try to re-login so that the next
+ // authentication attempt (after the caller reconnects) will
+ // use fresh credentials. This handles the case where the
+ // Kerberos TGT has expired and the Login refresh thread has
+ // exited, or credentials have otherwise become stale.
+ LOG.warn(
+ "SASL authentication failed against server addr: {}, attempting re-login for next retry",
+ sock.getRemoteSocketAddress(), e);
+ try {
+ learnerLogin.forceReLogin();
+ LOG.info("Successfully re-logged in after SASL authentication failure");
+ } catch (LoginException le) {
+ LOG.error("Failed to re-login after SASL authentication failure", le);
+ }
+ throw e;
} finally {
if (sc != null) {
try {
@@ -172,6 +188,11 @@ private void send(DataOutputStream dout, byte[] response) throws IOException {
bufferedOutput.flush();
}
+ // Visible for testing
+ Login getLogin() {
+ return learnerLogin;
+ }
+
// TODO: need to consolidate the #createSaslToken() implementation between ZooKeeperSaslClient#createSaslToken().
private byte[] createSaslToken(
final byte[] saslToken,
diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/auth/SaslQuorumAuthServer.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/auth/SaslQuorumAuthServer.java
index a1425833b7f..21435c3fd57 100644
--- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/auth/SaslQuorumAuthServer.java
+++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/auth/SaslQuorumAuthServer.java
@@ -123,6 +123,16 @@ public void authenticate(Socket sock, DataInputStream din) throws SaslException
} catch (IOException ioe) {
LOG.warn("Exception while sending failed status", ioe);
}
+ // Try to re-login so that the next authentication attempt
+ // will use fresh credentials. This handles the case where the
+ // Kerberos TGT has expired and the Login refresh thread has
+ // exited, or credentials have otherwise become stale.
+ try {
+ serverLogin.forceReLogin();
+ LOG.info("Successfully re-logged in after server SASL authentication failure");
+ } catch (LoginException le) {
+ LOG.error("Failed to re-login after server SASL authentication failure", le);
+ }
// If sasl is not required, when a server initializes a
// connection it will try to log in, but it will also
// accept connections that do not start with a sasl
diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/auth/SaslQuorumAuthReLoginTest.java b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/auth/SaslQuorumAuthReLoginTest.java
new file mode 100644
index 00000000000..aa4a3687f8b
--- /dev/null
+++ b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/auth/SaslQuorumAuthReLoginTest.java
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.zookeeper.server.quorum.auth;
+
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import java.io.BufferedInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.net.ServerSocket;
+import java.net.Socket;
+import java.util.HashSet;
+import java.util.concurrent.atomic.AtomicReference;
+import javax.security.auth.Subject;
+import javax.security.auth.login.Configuration;
+import javax.security.sasl.SaslException;
+
+import org.apache.zookeeper.Login;
+import org.apache.zookeeper.common.X509Util;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.Timeout;
+
+/**
+ * Tests that SaslQuorumAuthLearner recovers from SASL authentication
+ * failures by re-logging in to refresh stale credentials.
+ *
+ * This addresses the scenario where the Login TGT refresh thread
+ * has silently exited (due to clock skew, KDC unavailability, etc.)
+ * and the cached credentials in the Subject have become stale.
+ * Without the re-login logic, the learner would fail to authenticate
+ * indefinitely until the process is restarted.
+ */
+public class SaslQuorumAuthReLoginTest extends QuorumAuthTestBase {
+
+ private static final String JAAS_ENTRIES =
+ "QuorumServer {\n"
+ + " org.apache.zookeeper.server.auth.DigestLoginModule required\n"
+ + " user_test=\"mypassword\";\n"
+ + "};\n"
+ + "QuorumLearner {\n"
+ + " org.apache.zookeeper.server.auth.DigestLoginModule required\n"
+ + " username=\"test\"\n"
+ + " password=\"mypassword\";\n"
+ + "};\n";
+
+ private SaslQuorumAuthServer authServer;
+ private SaslQuorumAuthLearner authLearner;
+
+ @BeforeAll
+ public static void setUpClass() {
+ // DIGEST-MD5 is not FIPS-compliant
+ System.setProperty(X509Util.FIPS_MODE_PROPERTY, "false");
+ setupJaasConfig(JAAS_ENTRIES);
+ }
+
+ @AfterAll
+ public static void tearDownClass() {
+ System.clearProperty(X509Util.FIPS_MODE_PROPERTY);
+ cleanupJaasConfig();
+ }
+
+ @BeforeEach
+ public void setUp() throws Exception {
+ Configuration.getConfiguration().refresh();
+ authServer = new SaslQuorumAuthServer(
+ true, "QuorumServer", new HashSet<>());
+ authLearner = new SaslQuorumAuthLearner(
+ true, "zkquorum/localhost", "QuorumLearner");
+ }
+
+ @AfterEach
+ @Override
+ public void tearDown() throws Exception {
+ super.tearDown();
+ }
+
+ /**
+ * Test that after credential corruption and authentication failure,
+ * the re-login mechanism restores valid credentials so that the
+ * next authentication attempt succeeds.
+ *
+ *
Without the fix (forceReLogin on auth failure), the second
+ * authentication attempt would also fail because the corrupted
+ * credentials remain in the Subject.
+ */
+ @Test
+ @Timeout(value = 30)
+ public void testReLoginOnSaslAuthFailure() throws Exception {
+ // Baseline: normal authentication should succeed
+ runAuthentication();
+
+ // Simulate stale/corrupted credentials by replacing the
+ // password in the learner's Subject
+ Login learnerLogin = authLearner.getLogin();
+ Subject subject = learnerLogin.getSubject();
+ subject.getPrivateCredentials().clear();
+ subject.getPrivateCredentials().add("wrongpassword");
+
+ // Authentication should fail with corrupted credentials.
+ // With the fix, forceReLogin() is called inside authenticate(),
+ // which restores the correct credentials from JAAS config.
+ assertThrows(IOException.class, this::runAuthentication);
+
+ // The next authentication attempt should succeed because
+ // forceReLogin() restored the correct credentials.
+ // Without the fix, this would fail because the corrupted
+ // credentials are still in the Subject.
+ assertDoesNotThrow(this::runAuthentication);
+ }
+
+ /**
+ * Run a single SASL authentication exchange between the learner
+ * and server over connected sockets.
+ */
+ private void runAuthentication() throws Exception {
+ try (ServerSocket ss = new ServerSocket(0)) {
+ int port = ss.getLocalPort();
+ AtomicReference serverError = new AtomicReference<>();
+
+ Thread serverThread = new Thread(() -> {
+ try (Socket serverSock = ss.accept()) {
+ DataInputStream din = new DataInputStream(
+ new BufferedInputStream(serverSock.getInputStream()));
+ authServer.authenticate(serverSock, din);
+ } catch (Exception e) {
+ serverError.set(e);
+ }
+ });
+ serverThread.setDaemon(true);
+ serverThread.start();
+
+ try (Socket clientSock = new Socket("localhost", port)) {
+ authLearner.authenticate(clientSock, "localhost");
+ }
+
+ serverThread.join(5000);
+ }
+ }
+
+}
\ No newline at end of file