Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion doc/_quarto-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@ website:
- plm/plr.qmd
- plm/plr_gate.qmd
- plm/plr_cate.qmd
- plm/pliv.qmd
- plm/lplr.qmd
- plm/plpr.qmd
- plm/pliv.qmd
# DID
- did/did_pa.qmd
- did/did_cs.qmd
Expand Down
1 change: 1 addition & 0 deletions doc/_website.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ website:
- plm/plr_gate.qmd
- plm/plr_cate.qmd
- plm/lplr.qmd
- plm/plpr.qmd
- plm/pliv.qmd
- text: "DID"
menu:
Expand Down
184 changes: 184 additions & 0 deletions doc/plm/plpr.qmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
---
title: "PLPR Models"

jupyter: python3
---


```{python}
#| echo: false

import numpy as np
import pandas as pd
from itables import init_notebook_mode
import os
import sys

doc_dir = os.path.abspath(os.path.join(os.getcwd(), ".."))
if doc_dir not in sys.path:
sys.path.append(doc_dir)

from utils.style_tables import generate_and_show_styled_table

init_notebook_mode(all_interactive=True)
```

## Coverage

The simulations are based on the the [make_plpr_CP2025](https://docs.doubleml.org/stable/api/datasets.html#dataset-generators)-DGP with $1000$ units and $10$ time periods. The following DGPs are considered:

- DGP 1: Linear in the nuisance parameters
- DGP 2: Non-linear and smooth in the nuisance parameters
- DGP 3: Non-linear and discontinuous in the nuisance parameters


::: {.callout-note title="Metadata" collapse="true"}

```{python}
#| echo: false
metadata_file = '../../results/plm/plpr_ate_metadata.csv'
metadata_df = pd.read_csv(metadata_file)
print(metadata_df.T.to_string(header=False))
```

:::

```{python}
#| echo: false

# set up data and rename columns
df_coverage = pd.read_csv("../../results/plm/plpr_ate_coverage.csv", index_col=None)

if "repetition" in df_coverage.columns and df_coverage["repetition"].nunique() == 1:
n_rep_coverage = df_coverage["repetition"].unique()[0]
elif "n_rep" in df_coverage.columns and df_coverage["n_rep"].nunique() == 1:
n_rep_coverage = df_coverage["n_rep"].unique()[0]
else:
n_rep_coverage = "N/A" # Fallback if n_rep cannot be determined

display_columns_coverage = ["Learner g", "Learner m", "DGP", "Approach", "Bias", "CI Length", "Coverage", "Loss g", "Loss m"]
```

### Partialling out

```{python}
# | echo: false

generate_and_show_styled_table(
main_df=df_coverage,
filters={"level": 0.95, "Score": "partialling out"},
display_cols=display_columns_coverage,
n_rep=n_rep_coverage,
level_col="level",
rename_map={"Learner g": "Learner l", "Loss g": "Loss l"},
coverage_highlight_cols=["Coverage"]
)
```

```{python}
#| echo: false

generate_and_show_styled_table(
main_df=df_coverage,
filters={"level": 0.9, "Score": "partialling out"},
display_cols=display_columns_coverage,
n_rep=n_rep_coverage,
level_col="level",
rename_map={"Learner g": "Learner l", "Loss g": "Loss l"},
coverage_highlight_cols=["Coverage"]
)
```

### IV-type

For the IV-type score, the learners `ml_l` and `ml_g` are both set to the same type of learner (here **Learner g**).

```{python}
#| echo: false

generate_and_show_styled_table(
main_df=df_coverage,
filters={"level": 0.95, "Score": "IV-type"},
display_cols=display_columns_coverage,
n_rep=n_rep_coverage,
level_col="level",
coverage_highlight_cols=["Coverage"]
)
```

```{python}
#| echo: false

generate_and_show_styled_table(
main_df=df_coverage,
filters={"level": 0.9, "Score": "IV-type"},
display_cols=display_columns_coverage,
n_rep=n_rep_coverage,
level_col="level",
coverage_highlight_cols=["Coverage"]
)
```


## Tuning

The simulations are based on the the [make_plpr_CP2025](https://docs.doubleml.org/stable/api/datasets.html#dataset-generators)-DGP with $1000$ units and $10$ time periods. The following DGPs are considered:

- DGP 1: Linear in the nuisance parameters
- DGP 3: Non-linear and discontinuous in the nuisance parameters

This is only an example as the untuned version just relies on the default configuration.

::: {.callout-note title="Metadata" collapse="true"}

```{python}
#| echo: false
metadata_file = '../../results/plm/plpr_ate_tune_metadata.csv'
metadata_df = pd.read_csv(metadata_file)
print(metadata_df.T.to_string(header=False))
```

:::

```{python}
#| echo: false

# set up data
df_tune_cov = pd.read_csv("../../results/plm/plpr_ate_tune_coverage.csv", index_col=None)

assert df_tune_cov["repetition"].nunique() == 1
n_rep_tune_cov = df_tune_cov["repetition"].unique()[0]

display_columns_tune_cov = ["Learner g", "Learner m", "Tuned", "DGP", "Approach", "Bias", "CI Length", "Coverage", "Loss g", "Loss m"]
```


### Partialling out

```{python}
# | echo: false

generate_and_show_styled_table(
main_df=df_tune_cov,
filters={"level": 0.95, "Score": "partialling out"},
display_cols=display_columns_tune_cov,
n_rep=n_rep_tune_cov,
level_col="level",
rename_map={"Learner g": "Learner l", "Loss g": "Loss l"},
coverage_highlight_cols=["Coverage"]
)
```

```{python}
#| echo: false

generate_and_show_styled_table(
main_df=df_tune_cov,
filters={"level": 0.9, "Score": "partialling out"},
display_cols=display_columns_tune_cov,
n_rep=n_rep_tune_cov,
level_col="level",
rename_map={"Learner g": "Learner l", "Loss g": "Loss l"},
coverage_highlight_cols=["Coverage"]
)
```
6 changes: 3 additions & 3 deletions doc/plm/plr.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ elif "n_rep" in df_coverage.columns and df_coverage["n_rep"].nunique() == 1:
else:
n_rep_coverage = "N/A" # Fallback if n_rep cannot be determined

display_columns_coverage = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage"]
display_columns_coverage = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage", "Loss g", "Loss m"]
```

### Partialling out
Expand All @@ -64,7 +64,7 @@ generate_and_show_styled_table(
display_cols=display_columns_coverage,
n_rep=n_rep_coverage,
level_col="level",
rename_map={"Learner g": "Learner l"},
rename_map={"Learner g": "Learner l", "Loss g": "Loss l"},
coverage_highlight_cols=["Coverage"]
)
```
Expand All @@ -78,7 +78,7 @@ generate_and_show_styled_table(
display_cols=display_columns_coverage,
n_rep=n_rep_coverage,
level_col="level",
rename_map={"Learner g": "Learner l"},
rename_map={"Learner g": "Learner l", "Loss g": "Loss l"},
coverage_highlight_cols=["Coverage"]
)
```
Expand Down
5 changes: 5 additions & 0 deletions monte-cover/src/montecover/plm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from montecover.plm.lplr_ate import LPLRATECoverageSimulation
from montecover.plm.lplr_ate_tune import LPLRATETuningCoverageSimulation
from montecover.plm.pliv_late import PLIVLATECoverageSimulation
from montecover.plm.plpr_ate import PLPRATECoverageSimulation
from montecover.plm.plpr_ate_tune import PLPRATETuningCoverageSimulation
from montecover.plm.plr_ate import PLRATECoverageSimulation
from montecover.plm.plr_ate_sensitivity import PLRATESensitivityCoverageSimulation
from montecover.plm.plr_ate_tune import PLRATETuningCoverageSimulation
Expand All @@ -16,6 +18,9 @@
"PLRCATECoverageSimulation",
"PLRATESensitivityCoverageSimulation",
"PLRATETuningCoverageSimulation",
"PLPRATECoverageSimulation",
"PLPRATETuningCoverageSimulation",
"LPLRATECoverageSimulation",
"LPLRATETuningCoverageSimulation",
"PLPRATECoverageSimulation",
]
142 changes: 142 additions & 0 deletions monte-cover/src/montecover/plm/plpr_ate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
from typing import Any, Dict, Optional

import doubleml as dml
from doubleml.plm.datasets import make_plpr_CP2025

from montecover.base import BaseSimulation
from montecover.utils import create_learner_from_config


class PLPRATECoverageSimulation(BaseSimulation):
"""Simulation class for coverage properties of DoubleMLPLPR for ATE estimation."""

def __init__(
self,
config_file: str,
suppress_warnings: bool = True,
log_level: str = "INFO",
log_file: Optional[str] = None,
):
super().__init__(
config_file=config_file,
suppress_warnings=suppress_warnings,
log_level=log_level,
log_file=log_file,
)

# Calculate oracle values
self._calculate_oracle_values()

def _process_config_parameters(self):
"""Process simulation-specific parameters from config"""
# Process ML models in parameter grid
assert (
"learners" in self.dml_parameters
), "No learners specified in the config file"

required_learners = ["ml_g", "ml_m"]
for learner in self.dml_parameters["learners"]:
for ml in required_learners:
assert ml in learner, f"No {ml} specified in the config file"

def _calculate_oracle_values(self):
"""Calculate oracle values for the simulation."""
self.logger.info("Calculating oracle values")

self.oracle_values = dict()
self.oracle_values["theta"] = self.dgp_parameters["theta"]

def run_single_rep(self, dml_data, dml_params) -> Dict[str, Any]:
"""Run a single repetition with the given parameters."""
# Extract parameters
learner_config = dml_params["learners"]
learner_g_name, ml_g = create_learner_from_config(learner_config["ml_g"])
learner_m_name, ml_m = create_learner_from_config(learner_config["ml_m"])
score = dml_params["score"]
approach = dml_params["approach"]

# Model
dml_model = dml.DoubleMLPLPR(
obj_dml_data=dml_data,
ml_l=ml_g,
ml_m=ml_m,
ml_g=ml_g if score == "IV-type" else None,
score=score,
approach=approach,
)
dml_model.fit()
nuisance_loss = dml_model.nuisance_loss

result = {
"coverage": [],
}
for level in self.confidence_parameters["level"]:
level_result = dict()
level_result["coverage"] = self._compute_coverage(
thetas=dml_model.coef,
oracle_thetas=self.oracle_values["theta"],
confint=dml_model.confint(level=level),
joint_confint=None,
)

# add parameters to the result
for res in level_result.values():
res.update(
{
"Learner g": learner_g_name,
"Learner m": learner_m_name,
"Score": score,
"Approach": approach,
"level": level,
"Loss g": nuisance_loss["ml_l"].mean() if score == "partialling out" else nuisance_loss["ml_g"].mean(),
"Loss m": nuisance_loss["ml_m"].mean(),
}
)
for key, res in level_result.items():
result[key].append(res)

return result

def summarize_results(self):
"""Summarize the simulation results."""
self.logger.info("Summarizing simulation results")

# Group by parameter combinations
groupby_cols = ["Learner g", "Learner m", "Score", "Approach", "DGP", "level"]
aggregation_dict = {
"Coverage": "mean",
"CI Length": "mean",
"Bias": "mean",
"Loss g": "mean",
"Loss m": "mean",
"repetition": "count",
}

# Aggregate results (possibly multiple result dfs)
result_summary = dict()
for result_name, result_df in self.results.items():
result_summary[result_name] = (
result_df.groupby(groupby_cols).agg(aggregation_dict).reset_index()
)
self.logger.debug(f"Summarized {result_name} results")

return result_summary

def _generate_dml_data(self, dgp_params) -> dml.DoubleMLData:
"""Generate data for the simulation."""
data = make_plpr_CP2025(
num_id=dgp_params["num_id"],
num_t=dgp_params["num_t"],
dim_x=dgp_params["dim_x"],
theta=dgp_params["theta"],
dgp_type=dgp_params["DGP"],
)
dml_data = dml.DoubleMLPanelData(
data,
y_col="y",
d_cols="d",
t_col="time",
id_col="id",
static_panel=True,
)
return dml_data
Loading