@@ -1590,3 +1590,63 @@ def connection_failure_mock(*args, **kwargs):
15901590 # Verify no CRITICAL check sent
15911591 service_checks = aggregator .service_checks (SPARK_DRIVER_SERVICE_CHECK )
15921592 assert len (service_checks ) == 0
1593+
1594+
1595+ @pytest .mark .unit
1596+ @pytest .mark .parametrize (
1597+ "pod_phase" ,
1598+ ["Failed" , "Succeeded" , "Unknown" ],
1599+ )
1600+ def test_debounce_connection_failure_all_terminal_phases (aggregator , dd_run_check , caplog , pod_phase ):
1601+ """Test that all terminal pod phases suppress connection errors."""
1602+
1603+ def connection_failure_mock (* args , ** kwargs ):
1604+ raise ConnectionError ("Connection refused" )
1605+
1606+ instance = DRIVER_CONFIG .copy ()
1607+ instance ['tags' ] = list (instance .get ('tags' , [])) + ['pod_phase:{}' .format (pod_phase )]
1608+
1609+ with mock .patch ('requests.Session.get' , side_effect = connection_failure_mock ):
1610+ c = SparkCheck ('spark' , {}, [instance ])
1611+
1612+ with caplog .at_level (logging .DEBUG ):
1613+ dd_run_check (c )
1614+
1615+ assert "Pod phase is terminal, suppressing request error" in caplog .text
1616+
1617+ service_checks = aggregator .service_checks (SPARK_DRIVER_SERVICE_CHECK )
1618+ assert len (service_checks ) == 0
1619+
1620+
1621+ @pytest .mark .unit
1622+ def test_debounce_no_route_to_host (aggregator , dd_run_check , caplog ):
1623+ """Test that 'No route to host' errors are also debounced."""
1624+
1625+ def connection_failure_mock (* args , ** kwargs ):
1626+ raise ConnectionError ("No route to host" )
1627+
1628+ instance = DRIVER_CONFIG .copy ()
1629+ instance ['tags' ] = list (instance .get ('tags' , [])) + ['pod_phase:Running' ]
1630+
1631+ with mock .patch ('requests.Session.get' , side_effect = connection_failure_mock ):
1632+ c = SparkCheck ('spark' , {}, [instance ])
1633+
1634+ # First run: expect warning, no CRITICAL check
1635+ with caplog .at_level (logging .WARNING ):
1636+ dd_run_check (c )
1637+
1638+ assert "Connection failed. Suppressing error once to ensure driver is running" in caplog .text
1639+
1640+ service_checks = aggregator .service_checks (SPARK_DRIVER_SERVICE_CHECK )
1641+ assert len (service_checks ) == 0
1642+
1643+
1644+ @pytest .mark .unit
1645+ def test_get_pod_phase ():
1646+ """Test _get_pod_phase static method."""
1647+ assert SparkCheck ._get_pod_phase (['pod_phase:Running' ]) == 'running'
1648+ assert SparkCheck ._get_pod_phase (['pod_phase:Failed' ]) == 'failed'
1649+ assert SparkCheck ._get_pod_phase (['other:tag' , 'pod_phase:Succeeded' ]) == 'succeeded'
1650+ assert SparkCheck ._get_pod_phase (['other:tag' ]) is None
1651+ assert SparkCheck ._get_pod_phase (None ) is None
1652+ assert SparkCheck ._get_pod_phase ([]) is None
0 commit comments