From df1fc59463e5e01429a2bfe84856fae054b0a6d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Tue, 28 Apr 2026 19:24:43 +0200 Subject: [PATCH 1/3] policies.py: Remove max reconnect attempts This was kept this way to preserve legacy behavior, but this behavior is absurd and hurtful. We need to get rid of it now. --- cassandra/policies.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/cassandra/policies.py b/cassandra/policies.py index ceb5ebdc45..2cb2625baa 100644 --- a/cassandra/policies.py +++ b/cassandra/policies.py @@ -773,7 +773,7 @@ class ConstantReconnectionPolicy(ReconnectionPolicy): in-between each reconnection attempt. """ - def __init__(self, delay, max_attempts=64): + def __init__(self, delay, max_attempts=None): """ `delay` should be a floating point number of seconds to wait in-between each attempt. @@ -807,10 +807,7 @@ class ExponentialReconnectionPolicy(ReconnectionPolicy): trying to reconnect at exactly the same time. """ - # TODO: max_attempts is 64 to preserve legacy default behavior - # consider changing to None in major release to prevent the policy - # giving up forever - def __init__(self, base_delay, max_delay, max_attempts=64): + def __init__(self, base_delay, max_delay, max_attempts=None): """ `base_delay` and `max_delay` should be in floating point units of seconds. From f0f13eeafa156d8762ed753529ce511a46b9d0ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Tue, 28 Apr 2026 19:58:38 +0200 Subject: [PATCH 2/3] connection.py: Rename timeout to timeout_left This better conveys what this is: not a timeut duration from config, but how much of this timeout is left right now. --- cassandra/connection.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cassandra/connection.py b/cassandra/connection.py index 08501d0a2b..e45a696f41 100644 --- a/cassandra/connection.py +++ b/cassandra/connection.py @@ -1888,13 +1888,13 @@ def run(self): self._raise_if_stopped() # Wait max `self._timeout` seconds for all HeartbeatFutures to complete - timeout = self._timeout + timeout_left = self._timeout start_time = time.time() for f in futures: self._raise_if_stopped() connection = f.connection try: - f.wait(timeout) + f.wait(timeout_left) # TODO: move this, along with connection locks in pool, down into Connection with connection.lock: connection.in_flight -= 1 @@ -1904,7 +1904,7 @@ def run(self): id(connection), connection.endpoint) failed_connections.append((f.connection, f.owner, e)) - timeout = self._timeout - (time.time() - start_time) + timeout_left = self._timeout - (time.time() - start_time) for connection, owner, exc in failed_connections: self._raise_if_stopped() From 3465ec8e168fa667413c87f630c4fc07eb95555b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Tue, 28 Apr 2026 20:00:46 +0200 Subject: [PATCH 3/3] HearbeatFuture: Use correct timeout in error message The timeout argument in `wait` tells how much we need to wait taking into consideration that we already waited for some other futures. This created confusing hearbeat messages, that could even show negative wait times. --- cassandra/connection.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cassandra/connection.py b/cassandra/connection.py index e45a696f41..f7eac53c46 100644 --- a/cassandra/connection.py +++ b/cassandra/connection.py @@ -1821,15 +1821,15 @@ def __init__(self, connection, owner): self._exception = Exception("Failed to send heartbeat because connection 'in_flight' exceeds threshold") self._event.set() - def wait(self, timeout): + def wait(self, timeout, original_timeout): self._event.wait(timeout) if self._event.is_set(): if self._exception: raise self._exception else: - raise OperationTimedOut("Connection heartbeat timeout after %s seconds" % (timeout,), + raise OperationTimedOut("Connection heartbeat timeout after %s seconds" % (original_timeout,), self.connection.endpoint, - timeout=timeout, + timeout=original_timeout, in_flight=self.connection.in_flight) def _options_callback(self, response): @@ -1894,7 +1894,7 @@ def run(self): self._raise_if_stopped() connection = f.connection try: - f.wait(timeout_left) + f.wait(timeout_left, self._timeout) # TODO: move this, along with connection locks in pool, down into Connection with connection.lock: connection.in_flight -= 1