From 784ba1e86e74e731a752eb9feaad764742e09960 Mon Sep 17 00:00:00 2001 From: ZhenyuLi <893652269@qq.com> Date: Wed, 25 Mar 2026 15:28:15 -0400 Subject: [PATCH] ZOOKEEPER-5033: Quorum SASL authentication fails permanently after Login TGT refresh thread exits Add forceReLogin() to Login that re-logins immediately without the minimum time check, and call it from SaslQuorumAuthLearner and SaslQuorumAuthServer when authentication fails. This ensures the next authentication attempt uses fresh credentials after TGT expiration. --- .../main/java/org/apache/zookeeper/Login.java | 38 +++++ .../quorum/auth/SaslQuorumAuthLearner.java | 21 +++ .../quorum/auth/SaslQuorumAuthServer.java | 10 ++ .../auth/SaslQuorumAuthReLoginTest.java | 161 ++++++++++++++++++ 4 files changed, 230 insertions(+) create mode 100644 zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/auth/SaslQuorumAuthReLoginTest.java diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/Login.java b/zookeeper-server/src/main/java/org/apache/zookeeper/Login.java index 2c483a5f74b..ac6762b16a6 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/Login.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/Login.java @@ -422,6 +422,44 @@ public long getLastLogin() { return lastLogin; } + /** + * Force a re-login, bypassing the minimum time check and the Kerberos ticket check. + * This is used when authentication fails and fresh credentials are needed immediately, + * regardless of the authentication mechanism (Kerberos or DIGEST-MD5). + * + *

Unlike {@link #reLogin()}, this method: + *

+ * + * @throws javax.security.auth.login.LoginException on a failure + */ + public synchronized void forceReLogin() throws LoginException { + LoginContext lc = getLogin(); + if (lc == null) { + throw new LoginException("login must be done first"); + } + LOG.info("Forcing re-login for {}", getUserName()); + synchronized (Login.class) { + // First try the regular logout to clean up mechanism-specific state + logout(); + // Clear any remaining credentials that logout may not have handled. + // For DIGEST-MD5, DigestLoginModule.logout() is a no-op and does not + // remove credentials from the Subject. We must clear them explicitly + // to avoid stale credentials being picked up on the next authentication + // attempt (SecurityUtils.createSaslClient uses toArray()[0]). + subject.getPrivateCredentials().clear(); + subject.getPublicCredentials().clear(); + // Re-login to get fresh credentials + lc = new LoginContext(loginContextName, subject, newCallbackHandler()); + lc.login(); + setLogin(lc); + } + setLastLogin(Time.currentElapsedTime()); + } + /** * Re-login a principal. This method assumes that {@link #login(String)} has happened already. * @throws javax.security.auth.login.LoginException on a failure diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/auth/SaslQuorumAuthLearner.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/auth/SaslQuorumAuthLearner.java index 3151a57567d..9d522d9ad7c 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/auth/SaslQuorumAuthLearner.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/auth/SaslQuorumAuthLearner.java @@ -133,6 +133,22 @@ public void authenticate(Socket sock, String hostName) throws IOException { // Validate status code at the end of authentication exchange. checkAuthStatus(sock, qpStatus); + } catch (SaslException e) { + // Authentication failed. Try to re-login so that the next + // authentication attempt (after the caller reconnects) will + // use fresh credentials. This handles the case where the + // Kerberos TGT has expired and the Login refresh thread has + // exited, or credentials have otherwise become stale. + LOG.warn( + "SASL authentication failed against server addr: {}, attempting re-login for next retry", + sock.getRemoteSocketAddress(), e); + try { + learnerLogin.forceReLogin(); + LOG.info("Successfully re-logged in after SASL authentication failure"); + } catch (LoginException le) { + LOG.error("Failed to re-login after SASL authentication failure", le); + } + throw e; } finally { if (sc != null) { try { @@ -172,6 +188,11 @@ private void send(DataOutputStream dout, byte[] response) throws IOException { bufferedOutput.flush(); } + // Visible for testing + Login getLogin() { + return learnerLogin; + } + // TODO: need to consolidate the #createSaslToken() implementation between ZooKeeperSaslClient#createSaslToken(). private byte[] createSaslToken( final byte[] saslToken, diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/auth/SaslQuorumAuthServer.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/auth/SaslQuorumAuthServer.java index a1425833b7f..21435c3fd57 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/auth/SaslQuorumAuthServer.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/auth/SaslQuorumAuthServer.java @@ -123,6 +123,16 @@ public void authenticate(Socket sock, DataInputStream din) throws SaslException } catch (IOException ioe) { LOG.warn("Exception while sending failed status", ioe); } + // Try to re-login so that the next authentication attempt + // will use fresh credentials. This handles the case where the + // Kerberos TGT has expired and the Login refresh thread has + // exited, or credentials have otherwise become stale. + try { + serverLogin.forceReLogin(); + LOG.info("Successfully re-logged in after server SASL authentication failure"); + } catch (LoginException le) { + LOG.error("Failed to re-login after server SASL authentication failure", le); + } // If sasl is not required, when a server initializes a // connection it will try to log in, but it will also // accept connections that do not start with a sasl diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/auth/SaslQuorumAuthReLoginTest.java b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/auth/SaslQuorumAuthReLoginTest.java new file mode 100644 index 00000000000..aa4a3687f8b --- /dev/null +++ b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/auth/SaslQuorumAuthReLoginTest.java @@ -0,0 +1,161 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.zookeeper.server.quorum.auth; + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import java.io.BufferedInputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.net.ServerSocket; +import java.net.Socket; +import java.util.HashSet; +import java.util.concurrent.atomic.AtomicReference; +import javax.security.auth.Subject; +import javax.security.auth.login.Configuration; +import javax.security.sasl.SaslException; + +import org.apache.zookeeper.Login; +import org.apache.zookeeper.common.X509Util; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; + +/** + * Tests that SaslQuorumAuthLearner recovers from SASL authentication + * failures by re-logging in to refresh stale credentials. + * + *

This addresses the scenario where the Login TGT refresh thread + * has silently exited (due to clock skew, KDC unavailability, etc.) + * and the cached credentials in the Subject have become stale. + * Without the re-login logic, the learner would fail to authenticate + * indefinitely until the process is restarted. + */ +public class SaslQuorumAuthReLoginTest extends QuorumAuthTestBase { + + private static final String JAAS_ENTRIES = + "QuorumServer {\n" + + " org.apache.zookeeper.server.auth.DigestLoginModule required\n" + + " user_test=\"mypassword\";\n" + + "};\n" + + "QuorumLearner {\n" + + " org.apache.zookeeper.server.auth.DigestLoginModule required\n" + + " username=\"test\"\n" + + " password=\"mypassword\";\n" + + "};\n"; + + private SaslQuorumAuthServer authServer; + private SaslQuorumAuthLearner authLearner; + + @BeforeAll + public static void setUpClass() { + // DIGEST-MD5 is not FIPS-compliant + System.setProperty(X509Util.FIPS_MODE_PROPERTY, "false"); + setupJaasConfig(JAAS_ENTRIES); + } + + @AfterAll + public static void tearDownClass() { + System.clearProperty(X509Util.FIPS_MODE_PROPERTY); + cleanupJaasConfig(); + } + + @BeforeEach + public void setUp() throws Exception { + Configuration.getConfiguration().refresh(); + authServer = new SaslQuorumAuthServer( + true, "QuorumServer", new HashSet<>()); + authLearner = new SaslQuorumAuthLearner( + true, "zkquorum/localhost", "QuorumLearner"); + } + + @AfterEach + @Override + public void tearDown() throws Exception { + super.tearDown(); + } + + /** + * Test that after credential corruption and authentication failure, + * the re-login mechanism restores valid credentials so that the + * next authentication attempt succeeds. + * + *

Without the fix (forceReLogin on auth failure), the second + * authentication attempt would also fail because the corrupted + * credentials remain in the Subject. + */ + @Test + @Timeout(value = 30) + public void testReLoginOnSaslAuthFailure() throws Exception { + // Baseline: normal authentication should succeed + runAuthentication(); + + // Simulate stale/corrupted credentials by replacing the + // password in the learner's Subject + Login learnerLogin = authLearner.getLogin(); + Subject subject = learnerLogin.getSubject(); + subject.getPrivateCredentials().clear(); + subject.getPrivateCredentials().add("wrongpassword"); + + // Authentication should fail with corrupted credentials. + // With the fix, forceReLogin() is called inside authenticate(), + // which restores the correct credentials from JAAS config. + assertThrows(IOException.class, this::runAuthentication); + + // The next authentication attempt should succeed because + // forceReLogin() restored the correct credentials. + // Without the fix, this would fail because the corrupted + // credentials are still in the Subject. + assertDoesNotThrow(this::runAuthentication); + } + + /** + * Run a single SASL authentication exchange between the learner + * and server over connected sockets. + */ + private void runAuthentication() throws Exception { + try (ServerSocket ss = new ServerSocket(0)) { + int port = ss.getLocalPort(); + AtomicReference serverError = new AtomicReference<>(); + + Thread serverThread = new Thread(() -> { + try (Socket serverSock = ss.accept()) { + DataInputStream din = new DataInputStream( + new BufferedInputStream(serverSock.getInputStream())); + authServer.authenticate(serverSock, din); + } catch (Exception e) { + serverError.set(e); + } + }); + serverThread.setDaemon(true); + serverThread.start(); + + try (Socket clientSock = new Socket("localhost", port)) { + authLearner.authenticate(clientSock, "localhost"); + } + + serverThread.join(5000); + } + } + +} \ No newline at end of file