diff --git a/doc/api/datasets.rst b/doc/api/datasets.rst index a1ead85a..29b58c96 100644 --- a/doc/api/datasets.rst +++ b/doc/api/datasets.rst @@ -32,6 +32,7 @@ Dataset Generators plm.datasets.make_plr_CCDDHNR2018 plm.datasets.make_plr_turrell2018 plm.datasets.make_lplr_LZZ2020 + plm.datasets.make_plpr_CP2025 plm.datasets.make_pliv_CHS2015 plm.datasets.make_pliv_multiway_cluster_CKMS2021 plm.datasets.make_confounded_plr_data diff --git a/doc/api/dml_models.rst b/doc/api/dml_models.rst index 77e1104a..450eded7 100644 --- a/doc/api/dml_models.rst +++ b/doc/api/dml_models.rst @@ -17,6 +17,7 @@ doubleml.plm DoubleMLPLR DoubleMLLPLR + DoubleMLPLPR DoubleMLPLIV diff --git a/doc/conf.py b/doc/conf.py index b31f4ea0..4699fa26 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -275,7 +275,11 @@ # Valid DOI; Causes 403 Client Error: Forbidden for url:... "https://doi.org/10.3982/ECTA15732", # Valid DOI; Causes 403 Client Error: Forbidden for url:... - "https://doi.org/10.1093/ectj/utab019" + "https://doi.org/10.1093/ectj/utab019", + # Valid DOI; Causes 403 Client Error: Forbidden for url:... + "https://doi.org/10.1093/ectj/utaf011", + # Valid DOI; Causes 403 Client Error: Forbidden for url:... + "https://doi.org/10.2307/1913646" ] # To execute R code via jupyter-execute one needs to install the R kernel for jupyter diff --git a/doc/examples/index.rst b/doc/examples/index.rst index 03f4e713..394134d3 100644 --- a/doc/examples/index.rst +++ b/doc/examples/index.rst @@ -23,6 +23,7 @@ General Examples py_double_ml_apo.ipynb py_double_ml_irm_vs_apo.ipynb py_double_ml_lplr.ipynb + py_double_ml_plpr.ipynb py_double_ml_ssm.ipynb learners/py_optuna.ipynb learners/py_learner.ipynb diff --git a/doc/examples/py_double_ml_plpr.ipynb b/doc/examples/py_double_ml_plpr.ipynb new file mode 100644 index 00000000..17616ee2 --- /dev/null +++ b/doc/examples/py_double_ml_plpr.ipynb @@ -0,0 +1,3342 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Python: Static Panel Models with Fixed Effects" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this example, we illustrate how the [DoubleML](https://docs.doubleml.org/stable/index.html) package can be used to estimate treatment effects for static panel models with fixed effects in a partially linear panel regression [DoubleMLPLPR](https://docs.doubleml.org/stable/guide/models.html#partially-linear-models-plm) model. The model is based on [Clarke and Polselli (2025)](https://doi.org/10.1093/ectj/utaf011)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import optuna\n", + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "from sklearn.base import clone\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.preprocessing import PolynomialFeatures\n", + "from sklearn.compose import ColumnTransformer\n", + "from sklearn.pipeline import make_pipeline\n", + "from sklearn.base import BaseEstimator, TransformerMixin\n", + "from sklearn.linear_model import LassoCV\n", + "from lightgbm import LGBMRegressor\n", + "\n", + "from doubleml.data import DoubleMLPanelData\n", + "from doubleml.plm.datasets import make_plpr_CP2025\n", + "from doubleml import DoubleMLPLPR\n", + "\n", + "import warnings\n", + "warnings.filterwarnings(\"ignore\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data\n", + "\n", + "We will use the implemented data generating process [make_plpr_CP2025](https://docs.doubleml.org/stable/api/datasets.html#dataset-generators) to generate data similar to the simulation in [Clarke and Polselli (2025)](https://doi.org/10.1093/ectj/utaf011). For exposition, we use the simple linear `dgp_type=\"dgp1\"`, with 150 units, 10 time periods per unit, and a true treatment effect of `theta=0.5`.\n", + "\n", + "We set `time_type=\"int\"` such that the time variable values will be integers. It's also possible to use `\"float\"` or `\"datetime\"` time variables with [DoubleMLPLPR](https://docs.doubleml.org/stable/api/dml_models.html#doubleml-plm)." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
| \n", + " | id | \n", + "time | \n", + "y | \n", + "d | \n", + "x1 | \n", + "x2 | \n", + "x3 | \n", + "x4 | \n", + "x5 | \n", + "x6 | \n", + "... | \n", + "x21 | \n", + "x22 | \n", + "x23 | \n", + "x24 | \n", + "x25 | \n", + "x26 | \n", + "x27 | \n", + "x28 | \n", + "x29 | \n", + "x30 | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "1 | \n", + "1 | \n", + "-1.290479 | \n", + "0.908307 | \n", + "1.710715 | \n", + "-1.853675 | \n", + "-1.473907 | \n", + "1.366514 | \n", + "-0.322024 | \n", + "2.944020 | \n", + "... | \n", + "-1.828362 | \n", + "-3.010547 | \n", + "-0.840202 | \n", + "-3.085159 | \n", + "1.169952 | \n", + "-0.954107 | \n", + "-3.925198 | \n", + "-0.779510 | \n", + "-0.430700 | \n", + "1.004298 | \n", + "
| 1 | \n", + "1 | \n", + "2 | \n", + "-2.850646 | \n", + "-1.316777 | \n", + "-0.325043 | \n", + "4.178599 | \n", + "-1.159857 | \n", + "-0.139527 | \n", + "-0.230115 | \n", + "-0.631976 | \n", + "... | \n", + "-0.724172 | \n", + "-0.421045 | \n", + "-2.012480 | \n", + "-2.081784 | \n", + "-2.734123 | \n", + "-0.879470 | \n", + "-2.141218 | \n", + "4.598401 | \n", + "-4.222797 | \n", + "-2.523024 | \n", + "
| 2 | \n", + "1 | \n", + "3 | \n", + "-4.338502 | \n", + "-1.756120 | \n", + "-0.897590 | \n", + "1.505972 | \n", + "-0.925189 | \n", + "1.511500 | \n", + "-2.206561 | \n", + "0.132579 | \n", + "... | \n", + "1.766109 | \n", + "-2.252858 | \n", + "-2.919826 | \n", + "-1.974066 | \n", + "-0.773881 | \n", + "0.244633 | \n", + "-1.727550 | \n", + "1.665467 | \n", + "0.562291 | \n", + "-1.553616 | \n", + "
| 3 | \n", + "1 | \n", + "4 | \n", + "-2.713236 | \n", + "0.934866 | \n", + "1.987849 | \n", + "2.596228 | \n", + "-0.220666 | \n", + "-0.480717 | \n", + "-3.966273 | \n", + "-0.911226 | \n", + "... | \n", + "0.856124 | \n", + "0.727759 | \n", + "-0.501579 | \n", + "1.077504 | \n", + "2.268052 | \n", + "-3.821422 | \n", + "1.629055 | \n", + "-0.220834 | \n", + "-1.185091 | \n", + "-5.462884 | \n", + "
| 4 | \n", + "1 | \n", + "5 | \n", + "-5.782997 | \n", + "-4.357881 | \n", + "-3.086559 | \n", + "3.796975 | \n", + "-1.539641 | \n", + "-2.425617 | \n", + "-1.020599 | \n", + "-1.666200 | \n", + "... | \n", + "2.617215 | \n", + "-1.231835 | \n", + "-0.891350 | \n", + "0.246981 | \n", + "2.489642 | \n", + "0.319735 | \n", + "-2.810366 | \n", + "0.585826 | \n", + "3.643749 | \n", + "0.147147 | \n", + "
5 rows × 34 columns
\n", + "| \n", + " | id | \n", + "time | \n", + "y | \n", + "d | \n", + "x1 | \n", + "x2 | \n", + "x3 | \n", + "x4 | \n", + "x5 | \n", + "x6 | \n", + "... | \n", + "x21_mean | \n", + "x22_mean | \n", + "x23_mean | \n", + "x24_mean | \n", + "x25_mean | \n", + "x26_mean | \n", + "x27_mean | \n", + "x28_mean | \n", + "x29_mean | \n", + "x30_mean | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "1 | \n", + "1 | \n", + "-1.290479 | \n", + "0.908307 | \n", + "1.710715 | \n", + "-1.853675 | \n", + "-1.473907 | \n", + "1.366514 | \n", + "-0.322024 | \n", + "2.944020 | \n", + "... | \n", + "1.24018 | \n", + "-0.52821 | \n", + "-0.734145 | \n", + "0.227494 | \n", + "1.164763 | \n", + "0.412979 | \n", + "-1.272608 | \n", + "0.459816 | \n", + "-0.829863 | \n", + "-1.145189 | \n", + "
| 1 | \n", + "1 | \n", + "2 | \n", + "-2.850646 | \n", + "-1.316777 | \n", + "-0.325043 | \n", + "4.178599 | \n", + "-1.159857 | \n", + "-0.139527 | \n", + "-0.230115 | \n", + "-0.631976 | \n", + "... | \n", + "1.24018 | \n", + "-0.52821 | \n", + "-0.734145 | \n", + "0.227494 | \n", + "1.164763 | \n", + "0.412979 | \n", + "-1.272608 | \n", + "0.459816 | \n", + "-0.829863 | \n", + "-1.145189 | \n", + "
| 2 | \n", + "1 | \n", + "3 | \n", + "-4.338502 | \n", + "-1.756120 | \n", + "-0.897590 | \n", + "1.505972 | \n", + "-0.925189 | \n", + "1.511500 | \n", + "-2.206561 | \n", + "0.132579 | \n", + "... | \n", + "1.24018 | \n", + "-0.52821 | \n", + "-0.734145 | \n", + "0.227494 | \n", + "1.164763 | \n", + "0.412979 | \n", + "-1.272608 | \n", + "0.459816 | \n", + "-0.829863 | \n", + "-1.145189 | \n", + "
| 3 | \n", + "1 | \n", + "4 | \n", + "-2.713236 | \n", + "0.934866 | \n", + "1.987849 | \n", + "2.596228 | \n", + "-0.220666 | \n", + "-0.480717 | \n", + "-3.966273 | \n", + "-0.911226 | \n", + "... | \n", + "1.24018 | \n", + "-0.52821 | \n", + "-0.734145 | \n", + "0.227494 | \n", + "1.164763 | \n", + "0.412979 | \n", + "-1.272608 | \n", + "0.459816 | \n", + "-0.829863 | \n", + "-1.145189 | \n", + "
| 4 | \n", + "1 | \n", + "5 | \n", + "-5.782997 | \n", + "-4.357881 | \n", + "-3.086559 | \n", + "3.796975 | \n", + "-1.539641 | \n", + "-2.425617 | \n", + "-1.020599 | \n", + "-1.666200 | \n", + "... | \n", + "1.24018 | \n", + "-0.52821 | \n", + "-0.734145 | \n", + "0.227494 | \n", + "1.164763 | \n", + "0.412979 | \n", + "-1.272608 | \n", + "0.459816 | \n", + "-0.829863 | \n", + "-1.145189 | \n", + "
5 rows × 64 columns
\n", + "| \n", + " | id | \n", + "time | \n", + "y_diff | \n", + "d_diff | \n", + "x1 | \n", + "x2 | \n", + "x3 | \n", + "x4 | \n", + "x5 | \n", + "x6 | \n", + "... | \n", + "x21_lag | \n", + "x22_lag | \n", + "x23_lag | \n", + "x24_lag | \n", + "x25_lag | \n", + "x26_lag | \n", + "x27_lag | \n", + "x28_lag | \n", + "x29_lag | \n", + "x30_lag | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "1 | \n", + "2 | \n", + "-1.560167 | \n", + "-2.225084 | \n", + "-0.325043 | \n", + "4.178599 | \n", + "-1.159857 | \n", + "-0.139527 | \n", + "-0.230115 | \n", + "-0.631976 | \n", + "... | \n", + "-1.828362 | \n", + "-3.010547 | \n", + "-0.840202 | \n", + "-3.085159 | \n", + "1.169952 | \n", + "-0.954107 | \n", + "-3.925198 | \n", + "-0.779510 | \n", + "-0.430700 | \n", + "1.004298 | \n", + "
| 1 | \n", + "1 | \n", + "3 | \n", + "-1.487856 | \n", + "-0.439343 | \n", + "-0.897590 | \n", + "1.505972 | \n", + "-0.925189 | \n", + "1.511500 | \n", + "-2.206561 | \n", + "0.132579 | \n", + "... | \n", + "-0.724172 | \n", + "-0.421045 | \n", + "-2.012480 | \n", + "-2.081784 | \n", + "-2.734123 | \n", + "-0.879470 | \n", + "-2.141218 | \n", + "4.598401 | \n", + "-4.222797 | \n", + "-2.523024 | \n", + "
| 2 | \n", + "1 | \n", + "4 | \n", + "1.625266 | \n", + "2.690986 | \n", + "1.987849 | \n", + "2.596228 | \n", + "-0.220666 | \n", + "-0.480717 | \n", + "-3.966273 | \n", + "-0.911226 | \n", + "... | \n", + "1.766109 | \n", + "-2.252858 | \n", + "-2.919826 | \n", + "-1.974066 | \n", + "-0.773881 | \n", + "0.244633 | \n", + "-1.727550 | \n", + "1.665467 | \n", + "0.562291 | \n", + "-1.553616 | \n", + "
| 3 | \n", + "1 | \n", + "5 | \n", + "-3.069761 | \n", + "-5.292747 | \n", + "-3.086559 | \n", + "3.796975 | \n", + "-1.539641 | \n", + "-2.425617 | \n", + "-1.020599 | \n", + "-1.666200 | \n", + "... | \n", + "0.856124 | \n", + "0.727759 | \n", + "-0.501579 | \n", + "1.077504 | \n", + "2.268052 | \n", + "-3.821422 | \n", + "1.629055 | \n", + "-0.220834 | \n", + "-1.185091 | \n", + "-5.462884 | \n", + "
| 4 | \n", + "1 | \n", + "6 | \n", + "-1.094799 | \n", + "0.551051 | \n", + "0.289315 | \n", + "-2.823134 | \n", + "-3.137179 | \n", + "-1.425923 | \n", + "-0.730116 | \n", + "0.232687 | \n", + "... | \n", + "2.617215 | \n", + "-1.231835 | \n", + "-0.891350 | \n", + "0.246981 | \n", + "2.489642 | \n", + "0.319735 | \n", + "-2.810366 | \n", + "0.585826 | \n", + "3.643749 | \n", + "0.147147 | \n", + "
5 rows × 64 columns
\n", + "| \n", + " | id | \n", + "time | \n", + "y_demean | \n", + "d_demean | \n", + "x1_demean | \n", + "x2_demean | \n", + "x3_demean | \n", + "x4_demean | \n", + "x5_demean | \n", + "x6_demean | \n", + "... | \n", + "x21_demean | \n", + "x22_demean | \n", + "x23_demean | \n", + "x24_demean | \n", + "x25_demean | \n", + "x26_demean | \n", + "x27_demean | \n", + "x28_demean | \n", + "x29_demean | \n", + "x30_demean | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "1 | \n", + "1 | \n", + "1.543571 | \n", + "1.760660 | \n", + "2.207607 | \n", + "-2.039516 | \n", + "-0.642847 | \n", + "1.014204 | \n", + "0.384166 | \n", + "1.826013 | \n", + "... | \n", + "-3.162933 | \n", + "-2.475942 | \n", + "0.000728 | \n", + "-3.344219 | \n", + "0.082829 | \n", + "-1.351303 | \n", + "-2.670511 | \n", + "-1.275344 | \n", + "0.407596 | \n", + "2.187878 | \n", + "
| 1 | \n", + "1 | \n", + "2 | \n", + "-0.016596 | \n", + "-0.464424 | \n", + "0.171849 | \n", + "3.992759 | \n", + "-0.328797 | \n", + "-0.491837 | \n", + "0.476074 | \n", + "-1.749982 | \n", + "... | \n", + "-2.058743 | \n", + "0.113560 | \n", + "-1.171550 | \n", + "-2.340844 | \n", + "-3.821246 | \n", + "-1.276666 | \n", + "-0.886531 | \n", + "4.102567 | \n", + "-3.384501 | \n", + "-1.339444 | \n", + "
| 2 | \n", + "1 | \n", + "3 | \n", + "-1.504452 | \n", + "-0.903767 | \n", + "-0.400698 | \n", + "1.320131 | \n", + "-0.094129 | \n", + "1.159190 | \n", + "-1.500371 | \n", + "-0.985427 | \n", + "... | \n", + "0.431538 | \n", + "-1.718253 | \n", + "-2.078895 | \n", + "-2.233126 | \n", + "-1.861004 | \n", + "-0.152563 | \n", + "-0.472863 | \n", + "1.169632 | \n", + "1.400587 | \n", + "-0.370036 | \n", + "
| 3 | \n", + "1 | \n", + "4 | \n", + "0.120814 | \n", + "1.787219 | \n", + "2.484741 | \n", + "2.410387 | \n", + "0.610394 | \n", + "-0.833027 | \n", + "-3.260084 | \n", + "-2.029232 | \n", + "... | \n", + "-0.478447 | \n", + "1.262364 | \n", + "0.339352 | \n", + "0.818443 | \n", + "1.180930 | \n", + "-4.218618 | \n", + "2.883741 | \n", + "-0.716668 | \n", + "-0.346795 | \n", + "-4.279304 | \n", + "
| 4 | \n", + "1 | \n", + "5 | \n", + "-2.948947 | \n", + "-3.505528 | \n", + "-2.589667 | \n", + "3.611134 | \n", + "-0.708582 | \n", + "-2.777927 | \n", + "-0.314410 | \n", + "-2.784206 | \n", + "... | \n", + "1.282645 | \n", + "-0.697230 | \n", + "-0.050420 | \n", + "-0.012080 | \n", + "1.402519 | \n", + "-0.077461 | \n", + "-1.555679 | \n", + "0.089991 | \n", + "4.482045 | \n", + "1.330727 | \n", + "
5 rows × 34 columns
\n", + "| \n", + " | id | \n", + "time | \n", + "y | \n", + "d | \n", + "x1 | \n", + "x2 | \n", + "x3 | \n", + "x4 | \n", + "x5 | \n", + "x6 | \n", + "... | \n", + "x21 | \n", + "x22 | \n", + "x23 | \n", + "x24 | \n", + "x25 | \n", + "x26 | \n", + "x27 | \n", + "x28 | \n", + "x29 | \n", + "x30 | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "1 | \n", + "1 | \n", + "-1.290479 | \n", + "0.908307 | \n", + "1.710715 | \n", + "-1.853675 | \n", + "-1.473907 | \n", + "1.366514 | \n", + "-0.322024 | \n", + "2.944020 | \n", + "... | \n", + "-1.828362 | \n", + "-3.010547 | \n", + "-0.840202 | \n", + "-3.085159 | \n", + "1.169952 | \n", + "-0.954107 | \n", + "-3.925198 | \n", + "-0.779510 | \n", + "-0.430700 | \n", + "1.004298 | \n", + "
| 1 | \n", + "1 | \n", + "2 | \n", + "-2.850646 | \n", + "-1.316777 | \n", + "-0.325043 | \n", + "4.178599 | \n", + "-1.159857 | \n", + "-0.139527 | \n", + "-0.230115 | \n", + "-0.631976 | \n", + "... | \n", + "-0.724172 | \n", + "-0.421045 | \n", + "-2.012480 | \n", + "-2.081784 | \n", + "-2.734123 | \n", + "-0.879470 | \n", + "-2.141218 | \n", + "4.598401 | \n", + "-4.222797 | \n", + "-2.523024 | \n", + "
| 2 | \n", + "1 | \n", + "3 | \n", + "-4.338502 | \n", + "-1.756120 | \n", + "-0.897590 | \n", + "1.505972 | \n", + "-0.925189 | \n", + "1.511500 | \n", + "-2.206561 | \n", + "0.132579 | \n", + "... | \n", + "1.766109 | \n", + "-2.252858 | \n", + "-2.919826 | \n", + "-1.974066 | \n", + "-0.773881 | \n", + "0.244633 | \n", + "-1.727550 | \n", + "1.665467 | \n", + "0.562291 | \n", + "-1.553616 | \n", + "
| 3 | \n", + "1 | \n", + "4 | \n", + "-2.713236 | \n", + "0.934866 | \n", + "1.987849 | \n", + "2.596228 | \n", + "-0.220666 | \n", + "-0.480717 | \n", + "-3.966273 | \n", + "-0.911226 | \n", + "... | \n", + "0.856124 | \n", + "0.727759 | \n", + "-0.501579 | \n", + "1.077504 | \n", + "2.268052 | \n", + "-3.821422 | \n", + "1.629055 | \n", + "-0.220834 | \n", + "-1.185091 | \n", + "-5.462884 | \n", + "
| 4 | \n", + "1 | \n", + "5 | \n", + "-5.782997 | \n", + "-4.357881 | \n", + "-3.086559 | \n", + "3.796975 | \n", + "-1.539641 | \n", + "-2.425617 | \n", + "-1.020599 | \n", + "-1.666200 | \n", + "... | \n", + "2.617215 | \n", + "-1.231835 | \n", + "-0.891350 | \n", + "0.246981 | \n", + "2.489642 | \n", + "0.319735 | \n", + "-2.810366 | \n", + "0.585826 | \n", + "3.643749 | \n", + "0.147147 | \n", + "
5 rows × 34 columns
\n", + "Pipeline(steps=[('columntransformer',\n",
+ " ColumnTransformer(remainder='passthrough',\n",
+ " transformers=[('poly_x', PolyPlus(),\n",
+ " [0, 1, 2, 3, 4, 5, 6, 7, 8, 9,\n",
+ " 10, 11, 12, 13, 14, 15, 16,\n",
+ " 17, 18, 19, 20, 21, 22, 23,\n",
+ " 24, 25, 26, 27, 28, 29]),\n",
+ " ('poly_x_tr', PolyPlus(),\n",
+ " [30, 31, 32, 33, 34, 35, 36,\n",
+ " 37, 38, 39, 40, 41, 42, 43,\n",
+ " 44, 45, 46, 47, 48, 49, 50,\n",
+ " 51, 52, 53, 54, 55, 56, 57,\n",
+ " 58, 59])])),\n",
+ " ('standardscaler', StandardScaler()),\n",
+ " ('lassocv', LassoCV(cv=2, n_jobs=5))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. Pipeline(steps=[('columntransformer',\n",
+ " ColumnTransformer(remainder='passthrough',\n",
+ " transformers=[('poly_x', PolyPlus(),\n",
+ " [0, 1, 2, 3, 4, 5, 6, 7, 8, 9,\n",
+ " 10, 11, 12, 13, 14, 15, 16,\n",
+ " 17, 18, 19, 20, 21, 22, 23,\n",
+ " 24, 25, 26, 27, 28, 29]),\n",
+ " ('poly_x_tr', PolyPlus(),\n",
+ " [30, 31, 32, 33, 34, 35, 36,\n",
+ " 37, 38, 39, 40, 41, 42, 43,\n",
+ " 44, 45, 46, 47, 48, 49, 50,\n",
+ " 51, 52, 53, 54, 55, 56, 57,\n",
+ " 58, 59])])),\n",
+ " ('standardscaler', StandardScaler()),\n",
+ " ('lassocv', LassoCV(cv=2, n_jobs=5))])ColumnTransformer(remainder='passthrough',\n",
+ " transformers=[('poly_x', PolyPlus(),\n",
+ " [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,\n",
+ " 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,\n",
+ " 25, 26, 27, 28, 29]),\n",
+ " ('poly_x_tr', PolyPlus(),\n",
+ " [30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,\n",
+ " 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,\n",
+ " 52, 53, 54, 55, 56, 57, 58, 59])])[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]
PolyPlus()
[30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59]
PolyPlus()
passthrough
StandardScaler()
LassoCV(cv=2, n_jobs=5)
ColumnTransformer(remainder='passthrough',\n",
+ " transformers=[('poly_x', PolyPlus(), [2, 5, 21])])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. ColumnTransformer(remainder='passthrough',\n",
+ " transformers=[('poly_x', PolyPlus(), [2, 5, 21])])[2, 5, 21]
PolyPlus()
[0, 1, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 29]
passthrough
| \n", + " | coef | \n", + "std err | \n", + "t | \n", + "P>|t| | \n", + "2.5 % | \n", + "97.5 % | \n", + "
|---|---|---|---|---|---|---|
| d | \n", + "0.495091 | \n", + "0.007491 | \n", + "66.088483 | \n", + "0.0 | \n", + "0.480408 | \n", + "0.509774 | \n", + "
| \n", + " | coef | \n", + "std err | \n", + "t | \n", + "P>|t| | \n", + "2.5 % | \n", + "97.5 % | \n", + "
|---|---|---|---|---|---|---|
| d | \n", + "0.489759 | \n", + "0.010002 | \n", + "48.968271 | \n", + "0.0 | \n", + "0.470156 | \n", + "0.509362 | \n", + "
| \n", + " | coef | \n", + "std err | \n", + "t | \n", + "P>|t| | \n", + "2.5 % | \n", + "97.5 % | \n", + "
|---|---|---|---|---|---|---|
| d_diff | \n", + "0.549295 | \n", + "0.008736 | \n", + "62.878843 | \n", + "0.0 | \n", + "0.532174 | \n", + "0.566417 | \n", + "
| \n", + " | coef | \n", + "std err | \n", + "t | \n", + "P>|t| | \n", + "2.5 % | \n", + "97.5 % | \n", + "
|---|---|---|---|---|---|---|
| d_demean | \n", + "1.133962 | \n", + "0.005109 | \n", + "221.970892 | \n", + "0.0 | \n", + "1.123949 | \n", + "1.143975 | \n", + "
| \n", + " | y | \n", + "d | \n", + "score | \n", + "x0 | \n", + "x1 | \n", + "x2 | \n", + "
|---|---|---|---|---|---|---|
| 0 | \n", + "1.563067 | \n", + "1.0 | \n", + "0.471435 | \n", + "-0.198503 | \n", + "-0.193849 | \n", + "-0.493426 | \n", + "
| 1 | \n", + "1.348622 | \n", + "0.0 | \n", + "-1.190976 | \n", + "0.013677 | \n", + "-0.630880 | \n", + "-0.885832 | \n", + "
| 2 | \n", + "2.265929 | \n", + "1.0 | \n", + "1.432707 | \n", + "-0.266147 | \n", + "0.439675 | \n", + "-0.051651 | \n", + "
| 3 | \n", + "7.477357 | \n", + "0.0 | \n", + "-0.312652 | \n", + "0.845241 | \n", + "-0.659755 | \n", + "0.436764 | \n", + "
| 4 | \n", + "13.185130 | \n", + "0.0 | \n", + "-0.720589 | \n", + "0.739595 | \n", + "-0.741380 | \n", + "0.948112 | \n", + "
| \n", + " | y | \n", + "d | \n", + "score | \n", + "x0 | \n", + "x1 | \n", + "x2 | \n", + "
|---|---|---|---|---|---|---|
| 0 | \n", + "-0.183553 | \n", + "0.0 | \n", + "0.471435 | \n", + "-0.198503 | \n", + "-0.193849 | \n", + "-0.493426 | \n", + "
| 1 | \n", + "1.348622 | \n", + "0.0 | \n", + "-1.190976 | \n", + "0.013677 | \n", + "-0.630880 | \n", + "-0.885832 | \n", + "
| 2 | \n", + "2.265929 | \n", + "1.0 | \n", + "1.432707 | \n", + "-0.266147 | \n", + "0.439675 | \n", + "-0.051651 | \n", + "
| 3 | \n", + "9.694561 | \n", + "1.0 | \n", + "-0.312652 | \n", + "0.845241 | \n", + "-0.659755 | \n", + "0.436764 | \n", + "
| 4 | \n", + "15.001403 | \n", + "1.0 | \n", + "-0.720589 | \n", + "0.739595 | \n", + "-0.741380 | \n", + "0.948112 | \n", + "