Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Fixed
- **`LinearRegression.get_se()` / `get_inference()` no longer return a `NaN` standard error from a tiny-negative variance artifact.** A high-leverage / degenerate coefficient (e.g. an absorbed-FE dummy near-collinear with the treatment, whose Bell-McCaffrey Satterthwaite DOF already hits the noise-floor guard) can have a CR2/HC variance of ~0 (≈1e-32) whose vcov diagonal lands just-below-zero under BLAS-dependent float rounding; `np.sqrt` of the negative then produced a `NaN` SE **nondeterministically** — passing single-threaded but failing under the parallel pure-Python full-suite run (`tests/test_methodology_wls_cr2.py::TestLinearRegressionFENanGuardEndToEnd::test_did_absorbed_fe_lr_inference_nan_for_guarded_coefs`). Both SE sites now clamp the vcov diagonal at 0, so the SE is finite (0 for a genuinely-zero variance), deterministic, and BLAS-independent. **No change for any positive variance** (the clamp is a no-op there); only the previously-`NaN` degenerate case is affected.

## [3.5.2] - 2026-06-08

### Added
Expand Down
11 changes: 9 additions & 2 deletions diff_diff/linalg.py
Original file line number Diff line number Diff line change
Expand Up @@ -3865,7 +3865,13 @@ def get_se(self, index: int) -> float:
"""
self._check_fitted()
assert self.vcov_ is not None
return float(np.sqrt(self.vcov_[index, index]))
# Clamp a tiny-negative variance artifact at 0 before sqrt. A high-leverage
# / degenerate coefficient (e.g. an absorbed-FE dummy near-collinear with the
# treatment) can have a CR2/HC variance of ~0 that lands just below zero under
# BLAS-dependent float rounding; without the clamp `np.sqrt` returns NaN
# nondeterministically (passes single-threaded, fails under parallel test
# load). The SE is then finite — 0 for a genuinely-zero variance.
return float(np.sqrt(max(float(self.vcov_[index, index]), 0.0)))

def get_inference(
self,
Expand Down Expand Up @@ -3908,7 +3914,8 @@ def get_inference(
assert self.vcov_ is not None

coef = float(self.coefficients_[index])
se = float(np.sqrt(self.vcov_[index, index]))
# See get_se: clamp a tiny-negative variance artifact at 0 so SE is finite, not NaN.
se = float(np.sqrt(max(float(self.vcov_[index, index]), 0.0)))

# Use instance alpha if not provided
effective_alpha = alpha if alpha is not None else self.alpha
Expand Down
7 changes: 7 additions & 0 deletions docs/methodology/REGISTRY.md
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,13 @@ where V is the VCV sub-matrix for post-treatment δ_e coefficients.
DiD panel sizes (n ≤ few thousand); tracked in `TODO.md` under Performance for
a follow-up that plumbs the contrast DOF through the existing CR2 vcov path or
shares precomputes.
- **Note:** `LinearRegression.get_se()` / `get_inference()` clamp the vcov diagonal at 0
before `sqrt`. A high-leverage / degenerate coefficient (an absorbed-FE dummy
near-collinear with the treatment, whose Satterthwaite DOF already hits the noise-floor
guard) has a CR2/HC variance of ~0 (≈1e-32) that can land just-below-zero under
BLAS-dependent rounding; the clamp keeps the SE finite (0 for a genuinely-zero variance)
and deterministic across BLAS implementations, never `NaN`. No effect on any positive
variance. Regression: `tests/test_methodology_wls_cr2.py::TestLinearRegressionFENanGuardEndToEnd`.
- Optional: Wild cluster bootstrap (complex for multi-coefficient testing;
requires joint bootstrap distribution)
- Degrees of freedom adjusted for absorbed fixed effects
Expand Down
38 changes: 37 additions & 1 deletion tests/test_methodology_wls_cr2.py
Original file line number Diff line number Diff line change
Expand Up @@ -775,7 +775,13 @@ def test_did_absorbed_fe_lr_inference_nan_for_guarded_coefs(self, goldens):
np.isnan(b) for b in inf_i.conf_int
), f"NaN guard must produce NaN conf_int at index {i}; got {inf_i.conf_int}"
# SE and coefficient remain valid (vcov matches at machine precision).
assert np.isfinite(inf_i.se) and inf_i.se > 0
# These guarded high-leverage FE dummies have a genuinely-~0 CR2
# variance (~1e-32); the SE is therefore ~0 and may clamp to exactly 0
# when the tiny diagonal lands just-negative under BLAS-dependent
# rounding (get_se/get_inference clamp at 0 so the SE is finite, never
# NaN — previously this produced a nondeterministic NaN that failed
# only under the parallel pure-Python full-suite run).
assert np.isfinite(inf_i.se) and inf_i.se >= 0
assert np.isfinite(inf_i.coefficient)

# Non-guarded coefficients still emit finite inference.
Expand All @@ -789,6 +795,36 @@ def test_did_absorbed_fe_lr_inference_nan_for_guarded_coefs(self, goldens):
assert np.isfinite(inf_i.p_value)
assert all(np.isfinite(b) for b in inf_i.conf_int)

def test_negative_variance_artifact_yields_finite_se_not_nan(self):
"""A tiny-negative vcov diagonal (numerical artifact of a ~0 variance for a
degenerate/high-leverage coefficient) must clamp to a finite SE, never NaN.

Regression for the pure-Python full-suite flake: the guarded FE-dummy CR2
variances are ~1e-32 and tip just-below-zero under BLAS-dependent rounding,
so `np.sqrt(vcov[i,i])` returned NaN nondeterministically (passed single-
threaded, failed under the parallel `-n auto` run). get_se / get_inference
now clamp the diagonal at 0 so the SE is finite (0 for a genuinely-0 variance).
"""
from diff_diff.linalg import LinearRegression

rng = np.random.default_rng(7)
n = 40
X = np.column_stack([np.ones(n), rng.normal(size=n)])
y = X @ np.array([1.0, 2.0]) + rng.normal(size=n)
lr = LinearRegression(include_intercept=False)
lr.fit(X, y)
assert lr.vcov_ is not None

# Sanity: the unperturbed SE is finite and positive.
assert np.isfinite(lr.get_se(1)) and lr.get_se(1) > 0

# Inject a tiny-negative diagonal artifact (as parallel-load rounding can).
lr.vcov_[1, 1] = -1e-30
se = lr.get_se(1)
assert np.isfinite(se) and se == 0.0, f"expected finite 0 SE, got {se}"
inf = lr.get_inference(1)
assert np.isfinite(inf.se) and inf.se == 0.0, f"expected finite 0 SE, got {inf.se}"


class TestUnweightedRegressionStillBitEqual:
"""Regression safety: existing unweighted goldens must still match bit-equal."""
Expand Down
Loading