feat: add multiple testing methods (Holm, Sidak, Benjamini-Yekutelli)

AxelF23 · AxelF23 · commit 27953e2f76ec · 2025-07-31T00:12:11.000+03:00
feat: add multiple testing methods (Holm, Sidak, Benjamini-Yekutelli)

Added Holm, Sidak-Holm and Benjamini-Yekutieli methods
diff --git a/poetry.lock b/poetry.lock
diff --git a/pysatl_criterion/multiple_testing/__init__.py b/pysatl_criterion/multiple_testing/__init__.py
@@ -0,0 +1,2 @@
+from .fwer import Holm
+from .fdr import BenjaminiYekutieli
diff --git a/pysatl_criterion/multiple_testing/abstract_multiple_testing.py b/pysatl_criterion/multiple_testing/abstract_multiple_testing.py
@@ -1,28 +1,37 @@
 from abc import ABC, abstractmethod
-
-
 class AbstractMultipleTesting(ABC):
     @staticmethod
-    @abstractmethod
-    def test(p_values: list[float], threshold: float) -> tuple[list[bool], list[float]]:
+    def _validate_p_values(p_values: list[float]) -> None:
+        """
+        Validate that all p-values are in [0,1] range.
+        :param p_values: List of p-values for hypothesis testing
+        """
+        if not all(0 <= x <= 1 for x in p_values):
+            raise ValueError("All p-values must be in range [0,1].")
+
+    @classmethod
+    def test(cls, p_values: list[float], threshold: float = 0.05) -> tuple[list[bool], list[float]]:
         """
         Perform multiple testing correction and return both rejection decisions
         and adjusted p-values.
         :param p_values: List of raw p-values for hypothesis testing
         :param threshold: Significance level for controlling FWER (Family-Wise Error Rate)
-        or FDR (False Discovery Rate)
+        or FDR (False Discovery Rate) (default is 0.05)
         :return: Tuple containing:
                 - Boolean list indicating rejected hypotheses (True where rejected)
                 - List of adjusted p-values after multiple testing correction
         """
-        raise NotImplementedError("Method is not implemented")
+        cls._validate_p_values(p_values)
+        p_values_adjusted = cls.adjust(p_values)
+        rejected = [p_value < threshold for p_value in p_values_adjusted]
+        return rejected, p_values_adjusted
 
-    @staticmethod
+    @classmethod
     @abstractmethod
-    def adjust(p_values: list[float]) -> list[float]:
+    def adjust(cls, p_values: list[float]) -> list[float]:
         """
         Compute adjusted p-values for multiple testing correction.
         :param p_values: List of raw p-values for hypothesis testing
-        :return: List of adjusted p-values after multiple testing correction
+	:return: List of adjusted p-values after multiple testing correction
         """
         raise NotImplementedError("Method is not implemented")
diff --git a/pysatl_criterion/multiple_testing/fdr.py b/pysatl_criterion/multiple_testing/fdr.py
@@ -0,0 +1,54 @@
+from .abstract_multiple_testing import AbstractMultipleTesting
+import math
+
+class BenjaminiYekutieli(AbstractMultipleTesting):
+    """
+    Adjust p-values using Benjamini-Yekutieli correction for controlling FDR.
+
+    This method controls the false discovery rate (FDR) under arbitrary dependence.
+    It uses a harmonic series correction factor.
+
+    Steps:
+    1. Compute harmonic factor c = Σ(1/i) for i=1 to n
+    2. Sort p-values and calculate adjusted values: p_adjusted[i] = p[i] * n * c / rank
+    3. Enforce monotonicity using step-up procedure
+
+    Parameters
+    ----------
+    p_values : list[float]
+        List of raw p-values between 0 and 1
+
+    Returns
+    -------
+    list[float]
+        Adjusted p-values in original order
+    """
+
+    @classmethod
+    def adjust(cls, p_values: list[float]) -> list[float]:
+        n = len(p_values)
+        if n == 0:
+            return []
+
+        if n > 10000:
+            c = math.log(n) + 0.5772156649015328606065120900824024310421
+        else:
+            c = sum(1.0 / i for i in range(1, n + 1))
+
+        sorted_indices = sorted(range(n), key=lambda i: p_values[i])
+        adjusted = [0.0] * n
+
+        for i, idx in enumerate(sorted_indices):
+            rank = i + 1
+            adjusted_value = p_values[idx] * n * c / rank
+            adjusted[idx] = min(1.0, adjusted_value)
+
+        current_min = adjusted[sorted_indices[-1]]
+        for i in range(n - 2, -1, -1):
+            idx = sorted_indices[i]
+            if adjusted[idx] > current_min:
+                adjusted[idx] = current_min
+            else:
+                current_min = adjusted[idx]
+
+        return adjusted
diff --git a/pysatl_criterion/multiple_testing/fwer.py b/pysatl_criterion/multiple_testing/fwer.py
@@ -0,0 +1,65 @@
+from .abstract_multiple_testing import AbstractMultipleTesting
+
+class Holm(AbstractMultipleTesting):
+    """
+    Adjust p-values using the Holm-Bonferroni method for multiple testing correction.
+
+    This method controls the family-wise error rate (FWER) and is more powerful than
+    the standard Bonferroni correction. It uses a step-down procedure.
+
+    Steps:
+    1. Sort p-values and keep original indices
+    2. Calculate adjusted p-values: p_adjusted[i] = p[i] * (n - i)
+    3. Enforce monotonicity using step-up procedure
+
+    Parameters
+    ----------
+    p_values : list[float]
+        List of raw p-values between 0 and 1
+
+    Returns
+    -------
+    list[float]
+        Adjusted p-values in original order
+    """
+
+    @classmethod
+    def adjust(cls, p_values: list[float]) -> list[float]:
+        n = len(p_values)
+        if n == 0:
+            return []
+
+        sorted_indices = sorted(range(n), key=lambda i: p_values[i])
+        adjusted = [0.0] * n
+
+        for i, idx in enumerate(sorted_indices):
+            adjusted_value = p_values[idx] * (n - i)
+            adjusted[idx] = min(1.0, adjusted_value)
+
+        for i in range(n - 2, -1, -1):
+            if adjusted[sorted_indices[i]] > adjusted[sorted_indices[i + 1]]:
+                adjusted[sorted_indices[i]] = adjusted[sorted_indices[i + 1]]
+
+        return adjusted
+
+
+class SidakHolm(AbstractMultipleTesting):
+    @classmethod
+    def adjust(cls, p_values: list[float]) -> list[float]:
+        """
+        Adjust p-values using the Šidák correction for multiple hypothesis testing.
+
+        Parameters
+        ----------
+        p_values : list[float]
+            List of raw p-values for hypothesis testing. Must be in range [0, 1].
+
+        Returns
+        -------
+        list[float]
+            List of adjusted p-values, each in range [0, 1].
+        """
+        n = len(p_values)
+        if n == 0:
+            return []
+        return [min(1.0, 1 - (1 - p) ** n) for p in p_values]
diff --git a/tests/multiple_testing/test_fdr.py b/tests/multiple_testing/test_fdr.py
@@ -0,0 +1,50 @@
+import pytest
+from pysatl_criterion.multiple_testing.fdr import BenjaminiYekutieli
+
+def test_benjamini_yekutieli_complex():
+    p_values = [0.001, 0.002, 0.01, 0.03, 0.04, 0.1, 0.15, 0.2, 0.25, 0.5]
+    alpha = 0.05
+
+    rejected, _ = BenjaminiYekutieli.test(p_values, alpha)
+
+    assert rejected == [True, True, False, False, False, False, False, False, False, False]
+
+
+def test_benjamini_yekutieli_adjust_complex():
+    p_values = [0.001, 0.005, 0.05, 0.1, 0.15]
+    adjusted = BenjaminiYekutieli.adjust(p_values)
+
+    n = 5
+    c = sum(1.0 / i for i in range(1, n + 1))
+    sorted_p = sorted(p_values)
+    expected = [
+        min(1.0, sorted_p[0] * n * c / 1),
+        min(1.0, sorted_p[1] * n * c / 2),
+        min(1.0, sorted_p[2] * n * c / 3),
+        min(1.0, sorted_p[3] * n * c / 4),
+        min(1.0, sorted_p[4] * n * c / 5)
+    ]
+
+    for i in range(n - 2, -1, -1):
+        if expected[i] > expected[i + 1]:
+            expected[i] = expected[i + 1]
+
+    assert adjusted == pytest.approx(expected, abs=1e-4)
+
+
+def test_benjamini_yekutieli_edge_cases():
+    p_values = [0.00001, 0.99999]
+    rejected, _ = BenjaminiYekutieli.test(p_values, 0.05)
+    assert rejected == [True, False]
+
+
+def test_benjamini_yekutieli_large_input():
+    import numpy as np
+    np.random.seed(42)
+    p_values = np.random.rand(1000).tolist()
+
+    rejected, adjusted = BenjaminiYekutieli.test(p_values, 0.05)
+
+    assert len(rejected) == 1000
+    assert len(adjusted) == 1000
+    assert all(0 <= p <= 1 for p in adjusted)
diff --git a/tests/multiple_testing/test_fwer.py b/tests/multiple_testing/test_fwer.py
@@ -0,0 +1,85 @@
+import pytest
+from pysatl_criterion.multiple_testing.fwer import Holm, SidakHolm
+
+def test_holm_correction():
+    p_values = [0.04, 0.001, 0.7, 0.02]
+    alpha = 0.05
+
+    rejected, adjusted = Holm.test(p_values, alpha)
+
+    expected_adjusted = [0.08, 0.004, 0.7, 0.06]
+
+    assert rejected == [False, True, False, False]
+    assert adjusted == pytest.approx(expected_adjusted, abs=1e-3)
+
+
+def test_holm_adjust():
+    p_values = [0.01, 0.04, 0.03]
+
+    expected = [0.03, 0.04, 0.04]
+
+    adjusted = Holm.adjust(p_values)
+    assert adjusted == pytest.approx(expected, abs=1e-3)
+
+
+def test_holm_with_early_stop():
+    p_values = [0.1, 0.2, 0.3]
+    alpha = 0.05
+
+    rejected, adjusted = Holm.test(p_values, alpha)
+
+    expected_adjusted = [0.3, 0.3, 0.3]
+
+    assert rejected == [False, False, False]
+    assert adjusted == pytest.approx(expected_adjusted, abs=1e-3)
+
+
+def test_holm_all_rejected():
+    p_values = [0.001, 0.002, 0.003]
+    alpha = 0.05
+
+    rejected, adjusted = Holm.test(p_values, alpha)
+
+    expected_adjusted = [0.003, 0.003, 0.003]
+
+    assert rejected == [True, True, True]
+    assert adjusted == pytest.approx(expected_adjusted, abs=1e-3)
+
+
+def test_sidak_correction():
+    p_values = [0.01, 0.02, 0.1]
+    alpha = 0.05
+
+    rejected, adjusted = SidakHolm.test(p_values, alpha)
+
+    assert rejected == [True, False, False]
+
+    expected = [
+        1 - (1 - 0.01) ** 3,
+        1 - (1 - 0.02) ** 3,
+        1 - (1 - 0.1) ** 3
+    ]
+    assert adjusted == pytest.approx(expected, abs=1e-10)
+
+
+def test_sidak_edge_cases():
+    p_values = [0.0, 0.0, 0.0]
+    rejected, adjusted = SidakHolm.test(p_values, 0.05001)
+    assert adjusted == [0.0, 0.0, 0.0]
+    assert all(rejected)
+
+    p_values = [1.0, 1.0, 1.0]
+    rejected, adjusted = SidakHolm.test(p_values, 0.05001)
+    assert adjusted == [1.0, 1.0, 1.0]
+    assert not any(rejected)
+
+    p_values = [0.05]
+    rejected, adjusted = SidakHolm.test(p_values, 0.05001)
+    assert adjusted == pytest.approx([0.05], abs=1e-10)
+    assert rejected == [True]
+
+    p_values = [0.05, 0.1, 0.15]
+    n = len(p_values)
+    expected = [1 - (1 - p) ** n for p in p_values]
+    rejected, adjusted = SidakHolm.test(p_values, 0.05001)
+    assert adjusted == pytest.approx(expected, abs=1e-10)

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+from .fwer import Holm`
	`2`	`+from .fdr import BenjaminiYekutieli`