igerber · igerber · Jan 25, 2026 · Jan 24, 2026 · Jan 25, 2026 · Jan 25, 2026
diff --git a/diff_diff/visualization.py b/diff_diff/visualization.py
@@ -73,8 +73,10 @@ def plot_event_study(
     periods : list, optional
         List of periods to plot. If None, uses all periods from results.
     reference_period : any, optional
-        The reference period (normalized to effect=0). Will be shown as a
-        hollow marker. If None, tries to infer from results.
+        The reference period to highlight. When explicitly provided, effects
+        are normalized (ref effect subtracted) and ref SE is set to NaN.
+        When None and auto-inferred from results, only hollow marker styling
+        is applied (no normalization). If None, tries to infer from results.
     pre_periods : list, optional
         List of pre-treatment periods. Used for shading.
     post_periods : list, optional
@@ -151,8 +153,9 @@ def plot_event_study(
        trends holds. Large pre-treatment effects suggest the assumption may
        be violated.
 
-    2. **Reference period**: Usually the last pre-treatment period (t=-1),
-       normalized to zero. This is the omitted category.
+    2. **Reference period**: Usually the last pre-treatment period (t=-1).
+       When explicitly specified via ``reference_period``, effects are normalized
+       to zero at this period. When auto-inferred, shown with hollow marker only.
 
     3. **Post-treatment periods**: The treatment effects of interest. These
        show how the outcome evolved after treatment.
@@ -170,10 +173,18 @@ def plot_event_study(
 
     from scipy import stats as scipy_stats
 
+    # Track if reference_period was explicitly provided by user
+    reference_period_explicit = reference_period is not None
+
     # Extract data from results if provided
     if results is not None:
-        effects, se, periods, pre_periods, post_periods, reference_period = \
-            _extract_plot_data(results, periods, pre_periods, post_periods, reference_period)
+        extracted = _extract_plot_data(
+            results, periods, pre_periods, post_periods, reference_period
+        )
+        effects, se, periods, pre_periods, post_periods, reference_period, reference_inferred = extracted
+        # If reference was inferred from results, it was NOT explicitly provided
+        if reference_inferred:
+            reference_period_explicit = False
     elif effects is None or se is None:
         raise ValueError(
             "Must provide either 'results' or both 'effects' and 'se'"
@@ -192,6 +203,19 @@ def plot_event_study(
     # Compute confidence intervals
     critical_value = scipy_stats.norm.ppf(1 - alpha / 2)
 
+    # Normalize effects to reference period ONLY if explicitly specified by user
+    # Auto-inferred reference periods (from CallawaySantAnna) just get hollow marker styling,
+    # NO normalization. This prevents unintended normalization when the reference period
+    # isn't a true identifying constraint (e.g., CallawaySantAnna with base_period="varying").
+    if (reference_period is not None and reference_period in effects and
+            reference_period_explicit):
+        ref_effect = effects[reference_period]
+        if np.isfinite(ref_effect):
+            effects = {p: e - ref_effect for p, e in effects.items()}
+            # Set reference SE to NaN (it's now a constraint, not an estimate)
+            # This follows fixest convention where the omitted category has no SE/CI
+            se = {p: (np.nan if p == reference_period else s) for p, s in se.items()}
+
     plot_data = []
     for period in periods:
         effect = effects.get(period, np.nan)
@@ -304,14 +328,17 @@ def _extract_plot_data(
     pre_periods: Optional[List[Any]],
     post_periods: Optional[List[Any]],
     reference_period: Optional[Any],
-) -> Tuple[Dict, Dict, List, List, List, Any]:
+) -> Tuple[Dict, Dict, List, List, List, Any, bool]:
     """
     Extract plotting data from various result types.
 
     Returns
     -------
     tuple
-        (effects, se, periods, pre_periods, post_periods, reference_period)
+        (effects, se, periods, pre_periods, post_periods, reference_period, reference_inferred)
+
+        reference_inferred is True if reference_period was auto-detected from results
+        rather than explicitly provided by the user.
     """
     # Handle DataFrame input
     if isinstance(results, pd.DataFrame):
@@ -328,7 +355,8 @@ def _extract_plot_data(
         if periods is None:
             periods = list(results['period'])
 
-        return effects, se, periods, pre_periods, post_periods, reference_period
+        # DataFrame input: reference_period was already set by caller, never inferred here
+        return effects, se, periods, pre_periods, post_periods, reference_period, False
 
     # Handle MultiPeriodDiDResults
     if hasattr(results, 'period_effects'):
@@ -348,7 +376,8 @@ def _extract_plot_data(
         if periods is None:
             periods = post_periods
 
-        return effects, se, periods, pre_periods, post_periods, reference_period
+        # MultiPeriodDiDResults: reference_period was already set by caller, never inferred here
+        return effects, se, periods, pre_periods, post_periods, reference_period, False
 
     # Handle CallawaySantAnnaResults (event study aggregation)
     if hasattr(results, 'event_study_effects') and results.event_study_effects is not None:
@@ -362,8 +391,12 @@ def _extract_plot_data(
         if periods is None:
             periods = sorted(effects.keys())
 
+        # Track if reference_period was explicitly provided vs auto-inferred
+        reference_inferred = False
+
         # Reference period is typically -1 for event study
         if reference_period is None:
+            reference_inferred = True  # We're about to infer it
             # Detect reference period from n_groups=0 marker (normalization constraint)
             # This handles anticipation > 0 where reference is at e = -1 - anticipation
             for period, effect_data in results.event_study_effects.items():
@@ -380,7 +413,7 @@ def _extract_plot_data(
         if post_periods is None:
             post_periods = [p for p in periods if p >= 0]
 
-        return effects, se, periods, pre_periods, post_periods, reference_period
+        return effects, se, periods, pre_periods, post_periods, reference_period, reference_inferred
 
     raise TypeError(
         f"Cannot extract plot data from {type(results).__name__}. "

diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
@@ -728,6 +728,52 @@ n = 2(t_{α/2} + t_{1-κ})² σ² / MDE²
 
 ---
 
+# Visualization
+
+## Event Study Plotting (`plot_event_study`)
+
+**Reference Period Normalization**
+
+Normalization only occurs when `reference_period` is **explicitly specified** by the user:
+
+- **Explicit `reference_period=X`**: Normalizes effects (subtracts ref effect), sets ref SE to NaN
+  - Point estimates: `effect_normalized = effect - effect_ref`
+  - Reference period SE → NaN (it's now a constraint, not an estimate)
+  - Other periods' SEs unchanged (uncertainty relative to the constraint)
+  - CIs recomputed from normalized effects and original SEs
+
+- **Auto-inferred reference** (from CallawaySantAnna results): Hollow marker styling only, no normalization
+  - Original effects are plotted unchanged
+  - Reference period shown with hollow marker for visual indication
+  - All periods retain their original SEs and error bars
+
+This design prevents unintended normalization when the reference period isn't a true
+identifying constraint (e.g., CallawaySantAnna with `base_period="varying"` where different
+cohorts use different comparison periods).
+
+The explicit-only normalization follows the `fixest` (R) convention where the omitted/reference
+category is an identifying constraint with no associated uncertainty. Auto-inferred references
+follow the `did` (R) package convention which does not normalize and reports full inference.
+
+**Rationale**: When normalizing to a reference period, we're treating that period as an
+identifying constraint (effect ≡ 0 by definition). The variance of a constant is zero,
+but since it's a constraint rather than an estimated quantity, we report NaN rather than 0.
+Auto-inferred references may not represent true identifying constraints, so normalization
+should be a deliberate user choice.
+
+**Edge Cases:**
+- If `reference_period` not in data: No normalization applied
+- If reference effect is NaN: No normalization applied
+- Reference period CI becomes (NaN, NaN) after normalization (explicit only)
+- Reference period is plotted with hollow marker (both explicit and auto-inferred)
+- Reference period error bars: removed for explicit, retained for auto-inferred
+
+**Reference implementation(s):**
+- R: `fixest::coefplot()` with reference category shown at 0 with no CI
+- R: `did::ggdid()` does not normalize; shows full inference for all periods
+
+---
+
 # Cross-Reference: Standard Errors Summary
 
 | Estimator | Default SE | Alternatives |