From c43d05ec7849fbd25867b9cfcc75d32d9f140342 Mon Sep 17 00:00:00 2001
From: jirka <6035284+Borda@users.noreply.github.com>
Date: Tue, 2 Jun 2026 23:40:42 -0600
Subject: [PATCH 1/4] feat: add --max-suite-retries option to cap total reruns
 across suite

- New CLI option `--max-suite-retries` (int, default None = no limit); once
  the suite-wide rerun count hits the cap, failing tests are logged as final
  failures without further retry
- `StatusDB` gains thread-safe `increment_suite_reruns()` / `get_suite_reruns()`
  via `threading.Lock` for the single-process path
- `ServerStatusDB` overrides use the existing `rerunfailures_db` dict with a
  `"__suite__"` key and an atomic socket `inc` operation in `run_connection`
- `ClientStatusDB` overrides route increment through the socket to the master
- Five new tests covering: cap enforcement, pass-through when under cap,
  zero disables all reruns, passing tests don't consume the budget, and
  standalone option no-op
- CHANGES.rst and README.rst updated

Fixes #298

---
Co-authored-by: Claude Code <noreply@anthropic.com>
---
 CHANGES.rst                        |  8 +++-
 README.rst                         | 15 +++++++
 src/pytest_rerunfailures.py        | 54 ++++++++++++++++++++++
 tests/test_pytest_rerunfailures.py | 72 ++++++++++++++++++++++++++++++
 4 files changed, 148 insertions(+), 1 deletion(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index 7960238..4128c23 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -4,7 +4,13 @@ Changelog
 16.4 (unreleased)
 -----------------
 
-- Nothing changed yet.
+Features
+++++++++
+
+- Add ``--max-suite-retries`` option to cap the total number of reruns across
+  the entire test suite. Once the limit is reached, no further reruns occur
+  regardless of per-test ``--reruns`` or ``@pytest.mark.flaky`` settings.
+  Fixes `#298 <https://github.com/pytest-dev/pytest-rerunfailures/issues/298>`_.
 
 
 16.3 (2026-05-22)
diff --git a/README.rst b/README.rst
index 30d2669..d8d9a6a 100644
--- a/README.rst
+++ b/README.rst
@@ -220,6 +220,21 @@ setting. To make them additive instead, pass ``--reruns-mode=append``. With
 
    $ pytest --reruns 4 --reruns-mode append
 
+Limit total reruns across the suite
+------------------------------------
+
+To cap the total number of reruns across the entire test suite regardless of
+how many individual tests fail, pass ``--max-suite-retries``. Once the limit
+is reached, no further reruns occur even if individual tests have remaining
+retries:
+
+.. code-block:: bash
+
+   $ pytest --reruns 3 --max-suite-retries 10
+
+This is useful in large test suites to bound resource usage when many tests
+are flaky at the same time.
+
 Show tracebacks for retried failures
 ------------------------------------
 
diff --git a/src/pytest_rerunfailures.py b/src/pytest_rerunfailures.py
index 127cdc9..366f6bd 100644
--- a/src/pytest_rerunfailures.py
+++ b/src/pytest_rerunfailures.py
@@ -120,6 +120,15 @@ def pytest_addoption(parser):
         "'rerun test summary info' section, which is emitted automatically "
         "when this flag is set.",
     )
+    group._addoption(
+        "--max-suite-retries",
+        action="store",
+        dest="max_suite_retries",
+        type=int,
+        default=None,
+        help="Maximum total number of reruns across the entire test suite. "
+        "Once this limit is reached, no further reruns will occur.",
+    )
 
     arg_type = "string"
     parser.addini("reruns", RERUNS_DESC, type=arg_type)
@@ -430,6 +439,18 @@ class StatusDB:
     def __init__(self):
         self.delim = b"\n"
         self.hmap = {}
+        self._suite_rerun_count = 0
+        self._suite_lock = threading.Lock()
+
+    def increment_suite_reruns(self) -> int:
+        """Atomically increment the suite-wide rerun counter; return new total."""
+        with self._suite_lock:
+            self._suite_rerun_count += 1
+            return self._suite_rerun_count
+
+    def get_suite_reruns(self) -> int:
+        """Return the current suite-wide rerun count."""
+        return self._suite_rerun_count
 
     def _hash(self, crashitem: str) -> str:
         if crashitem not in self.hmap:
@@ -514,6 +535,11 @@ def run_connection(self, conn):
                     self._set(i, k, int(v))
                 elif op == "get":
                     self._sock_send(conn, str(self._get(i, k)))
+                elif op == "inc":
+                    with self._suite_lock:
+                        new_v = self._get(i, k) + 1
+                        self._set(i, k, new_v)
+                    self._sock_send(conn, str(new_v))
 
     def _set(self, i: str, k: str, v: int):
         if i not in self.rerunfailures_db:
@@ -526,6 +552,17 @@ def _get(self, i: str, k: str) -> int:
         except KeyError:
             return 0
 
+    def increment_suite_reruns(self) -> int:
+        """Atomically increment the suite-wide rerun counter; return new total."""
+        with self._suite_lock:
+            new_v = self._get("__suite__", "r") + 1
+            self._set("__suite__", "r", new_v)
+            return new_v
+
+    def get_suite_reruns(self) -> int:
+        """Return the current suite-wide rerun count."""
+        return self._get("__suite__", "r")
+
 
 class ClientStatusDB(SocketDB):
     def __init__(self, sock_port):
@@ -539,6 +576,15 @@ def _get(self, i: str, k: str) -> int:
         self._sock_send(self.sock, "|".join(("get", i, k, "")))
         return int(self._sock_recv(self.sock))
 
+    def increment_suite_reruns(self) -> int:
+        """Atomically increment the suite-wide rerun counter; return new total."""
+        self._sock_send(self.sock, "|".join(("inc", "__suite__", "r", "")))
+        return int(self._sock_recv(self.sock))
+
+    def get_suite_reruns(self) -> int:
+        """Return the current suite-wide rerun count."""
+        return self._get("__suite__", "r")
+
 
 suspended_finalizers = {}
 
@@ -638,6 +684,14 @@ def pytest_runtest_protocol(item, nextitem):
                 item.ihook.pytest_runtest_logreport(report=report)
             else:
                 # failure detected and reruns not exhausted, since i < reruns
+                max_suite_reruns = item.session.config.option.max_suite_retries
+                if max_suite_reruns is not None:
+                    suite_count = db.increment_suite_reruns()
+                    if suite_count > max_suite_reruns:
+                        # suite-wide limit exhausted — log as final failure
+                        item.ihook.pytest_runtest_logreport(report=report)
+                        continue
+
                 report.outcome = "rerun"
                 time.sleep(delay)
 
diff --git a/tests/test_pytest_rerunfailures.py b/tests/test_pytest_rerunfailures.py
index b60716c..6f3f398 100644
--- a/tests/test_pytest_rerunfailures.py
+++ b/tests/test_pytest_rerunfailures.py
@@ -1526,3 +1526,75 @@ def test_pass():
 
     result = testdir.runpytest("--reruns-mode", "bogus")
     assert result.ret != 0
+
+
+def test_max_suite_retries_caps_total_reruns(testdir):
+    """Suite limit stops reruns once the total across all tests is reached."""
+    testdir.makepyfile(
+        """
+        def test_fail_1():
+            assert False
+
+        def test_fail_2():
+            assert False
+
+        def test_fail_3():
+            assert False
+    """
+    )
+    # 3 tests each allowed up to 3 reruns, but suite cap is 4 total
+    result = testdir.runpytest("--reruns", "3", "--max-suite-retries", "4")
+    outcomes = result.parseoutcomes()
+    assert outcomes.get("rerun", 0) == 4
+    assert outcomes.get("failed", 0) == 3
+
+
+def test_max_suite_retries_does_not_limit_when_sufficient(testdir):
+    """Suite limit has no effect when total reruns stay below the cap."""
+    testdir.makepyfile(
+        """
+        def test_fail():
+            assert False
+    """
+    )
+    result = testdir.runpytest("--reruns", "2", "--max-suite-retries", "10")
+    assert_outcomes(result, passed=0, failed=1, rerun=2)
+
+
+def test_max_suite_retries_zero_disables_all_reruns(testdir):
+    """Suite limit of 0 prevents any reruns from occurring."""
+    testdir.makepyfile(
+        """
+        def test_fail():
+            assert False
+    """
+    )
+    result = testdir.runpytest("--reruns", "3", "--max-suite-retries", "0")
+    assert_outcomes(result, passed=0, failed=1, rerun=0)
+
+
+def test_max_suite_retries_works_with_passing_tests(testdir):
+    """Suite limit only counts actual reruns, not passing test runs."""
+    testdir.makepyfile(
+        """
+        def test_pass():
+            assert True
+
+        def test_fail():
+            assert False
+    """
+    )
+    result = testdir.runpytest("--reruns", "3", "--max-suite-retries", "2")
+    assert_outcomes(result, passed=1, failed=1, rerun=2)
+
+
+def test_max_suite_retries_without_reruns_has_no_effect(testdir):
+    """--max-suite-retries alone (without --reruns) does not break anything."""
+    testdir.makepyfile(
+        """
+        def test_fail():
+            assert False
+    """
+    )
+    result = testdir.runpytest("--max-suite-retries", "5")
+    assert_outcomes(result, passed=0, failed=1, rerun=0)

From 8b3e3304ccd5f3a0ed745d515e42a286911bfb8c Mon Sep 17 00:00:00 2001
From: jirka <6035284+Borda@users.noreply.github.com>
Date: Thu, 4 Jun 2026 07:13:17 -0600
Subject: [PATCH 2/4] fix: use atomic try-increment-if-below-cap for suite
 rerun counter

[resolve #3] Review by @Copilot (PR #332):
"This increments the suite-wide counter even when the cap is already
exhausted, which can permanently overshoot the configured limit..."
Challenge: evidence=VALID suggestion=VALID resolution=as-suggested

- Add `try_increment_suite_reruns(max_cap)` to StatusDB, ServerStatusDB, ClientStatusDB
- ServerStatusDB adds `try_inc` socket protocol (atomic check-then-increment)
- Caller uses bool return instead of post-increment comparison
- Counter now reflects reruns actually performed, not attempts

---
Co-authored-by: Claude Code <noreply@anthropic.com>
Co-authored-by: OpenAI Codex <codex@openai.com>
---
 src/pytest_rerunfailures.py | 32 ++++++++++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/src/pytest_rerunfailures.py b/src/pytest_rerunfailures.py
index 366f6bd..8dbf5bb 100644
--- a/src/pytest_rerunfailures.py
+++ b/src/pytest_rerunfailures.py
@@ -448,6 +448,13 @@ def increment_suite_reruns(self) -> int:
             self._suite_rerun_count += 1
             return self._suite_rerun_count
 
+    def try_increment_suite_reruns(self, max_cap: int) -> bool:
+        with self._suite_lock:
+            if self._suite_rerun_count < max_cap:
+                self._suite_rerun_count += 1
+                return True
+            return False
+
     def get_suite_reruns(self) -> int:
         """Return the current suite-wide rerun count."""
         return self._suite_rerun_count
@@ -540,6 +547,14 @@ def run_connection(self, conn):
                         new_v = self._get(i, k) + 1
                         self._set(i, k, new_v)
                     self._sock_send(conn, str(new_v))
+                elif op == "try_inc":
+                    with self._suite_lock:
+                        current = self._get(i, k)
+                        if current < int(v):
+                            self._set(i, k, current + 1)
+                            self._sock_send(conn, "1")
+                        else:
+                            self._sock_send(conn, "0")
 
     def _set(self, i: str, k: str, v: int):
         if i not in self.rerunfailures_db:
@@ -559,6 +574,14 @@ def increment_suite_reruns(self) -> int:
             self._set("__suite__", "r", new_v)
             return new_v
 
+    def try_increment_suite_reruns(self, max_cap: int) -> bool:
+        with self._suite_lock:
+            current = self._get("__suite__", "r")
+            if current < max_cap:
+                self._set("__suite__", "r", current + 1)
+                return True
+            return False
+
     def get_suite_reruns(self) -> int:
         """Return the current suite-wide rerun count."""
         return self._get("__suite__", "r")
@@ -581,6 +604,12 @@ def increment_suite_reruns(self) -> int:
         self._sock_send(self.sock, "|".join(("inc", "__suite__", "r", "")))
         return int(self._sock_recv(self.sock))
 
+    def try_increment_suite_reruns(self, max_cap: int) -> bool:
+        self._sock_send(
+            self.sock, "|".join(("try_inc", "__suite__", "r", str(max_cap)))
+        )
+        return self._sock_recv(self.sock) == "1"
+
     def get_suite_reruns(self) -> int:
         """Return the current suite-wide rerun count."""
         return self._get("__suite__", "r")
@@ -686,8 +715,7 @@ def pytest_runtest_protocol(item, nextitem):
                 # failure detected and reruns not exhausted, since i < reruns
                 max_suite_reruns = item.session.config.option.max_suite_retries
                 if max_suite_reruns is not None:
-                    suite_count = db.increment_suite_reruns()
-                    if suite_count > max_suite_reruns:
+                    if not db.try_increment_suite_reruns(max_suite_reruns):
                         # suite-wide limit exhausted — log as final failure
                         item.ihook.pytest_runtest_logreport(report=report)
                         continue

From 0aeec5540195cbe24dfab9461d02a0196b079e07 Mon Sep 17 00:00:00 2001
From: jirka <6035284+Borda@users.noreply.github.com>
Date: Thu, 4 Jun 2026 07:14:10 -0600
Subject: [PATCH 3/4] fix: use public group.addoption for --max-suite-retries

[resolve #1] Review by @Copilot (PR #332):
"group._addoption is a private pytest API and can break across pytest versions."
Challenge: evidence=VALID suggestion=VALID resolution=as-suggested

---
Co-authored-by: Claude Code <noreply@anthropic.com>
Co-authored-by: OpenAI Codex <codex@openai.com>
---
 src/pytest_rerunfailures.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pytest_rerunfailures.py b/src/pytest_rerunfailures.py
index 8dbf5bb..d611b38 100644
--- a/src/pytest_rerunfailures.py
+++ b/src/pytest_rerunfailures.py
@@ -120,7 +120,7 @@ def pytest_addoption(parser):
         "'rerun test summary info' section, which is emitted automatically "
         "when this flag is set.",
     )
-    group._addoption(
+    group.addoption(
         "--max-suite-retries",
         action="store",
         dest="max_suite_retries",

From c48a1986f856a7c04c351ee64df0f9a6b263ad74 Mon Sep 17 00:00:00 2001
From: jirka <6035284+Borda@users.noreply.github.com>
Date: Thu, 4 Jun 2026 07:22:48 -0600
Subject: [PATCH 4/4] lint: auto-fix violations after resolve cycle

---
Co-authored-by: Claude Code <noreply@anthropic.com>
---
 pyproject.toml              | 5 +++++
 src/pytest_rerunfailures.py | 3 ++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 4d6aa3d..a5afddc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -72,3 +72,8 @@ lint.pydocstyle.convention = "google"
 
 [tool.check-manifest]
 ignore = [ ".pre-commit-config.yaml" ]
+
+[dependency-groups]
+dev = [
+    "mypy>=2.1.0",
+]
diff --git a/src/pytest_rerunfailures.py b/src/pytest_rerunfailures.py
index d611b38..a074976 100644
--- a/src/pytest_rerunfailures.py
+++ b/src/pytest_rerunfailures.py
@@ -10,6 +10,7 @@
 import traceback
 import warnings
 from contextlib import suppress
+from typing import Any
 
 import pytest
 from _pytest.outcomes import fail
@@ -615,7 +616,7 @@ def get_suite_reruns(self) -> int:
         return self._get("__suite__", "r")
 
 
-suspended_finalizers = {}
+suspended_finalizers: dict[Any, Any] = {}
 
 
 def pytest_runtest_teardown(item, nextitem):