|
8 | 8 | "outputs": [], |
9 | 9 | "source": [ |
10 | 10 | "import numpy as np\n", |
11 | | - "import pandas as pd\n", |
12 | 11 | "from sklearn.datasets import fetch_openml\n", |
13 | | - "from sklearn.model_selection import train_test_split\n", |
14 | | - "from sklearn.metrics import accuracy_score, log_loss" |
| 12 | + "from sklearn.metrics import accuracy_score, log_loss\n", |
| 13 | + "from sklearn.model_selection import train_test_split" |
15 | 14 | ] |
16 | 15 | }, |
17 | 16 | { |
|
55 | 54 | "metadata": {}, |
56 | 55 | "outputs": [], |
57 | 56 | "source": [ |
58 | | - "cat_cols = ['job', 'marital', 'education', 'default', 'housing',\n", |
59 | | - " 'loan', 'contact', 'day', 'month', 'campaign',\n", |
60 | | - " 'previous', 'poutcome']\n", |
| 57 | + "cat_cols = [\n", |
| 58 | + " \"job\",\n", |
| 59 | + " \"marital\",\n", |
| 60 | + " \"education\",\n", |
| 61 | + " \"default\",\n", |
| 62 | + " \"housing\",\n", |
| 63 | + " \"loan\",\n", |
| 64 | + " \"contact\",\n", |
| 65 | + " \"day\",\n", |
| 66 | + " \"month\",\n", |
| 67 | + " \"campaign\",\n", |
| 68 | + " \"previous\",\n", |
| 69 | + " \"poutcome\",\n", |
| 70 | + "]\n", |
61 | 71 | "\n", |
62 | | - "num_cols = ['age', 'balance', 'duration', 'pdays']\n", |
63 | | - "target=[\"y\"]" |
| 72 | + "num_cols = [\"age\", \"balance\", \"duration\", \"pdays\"]\n", |
| 73 | + "target = [\"y\"]" |
64 | 74 | ] |
65 | 75 | }, |
66 | 76 | { |
|
96 | 106 | "test_enc = test.copy()\n", |
97 | 107 | "for col in cat_cols:\n", |
98 | 108 | " enc = OrdinalEncoder(handle_unknown=\"use_encoded_value\", encoded_missing_value=np.nan, unknown_value=np.nan)\n", |
99 | | - " train_enc[col] = enc.fit_transform(train_enc[col].values.reshape(-1,1))\n", |
100 | | - " test_enc[col] = enc.transform(test_enc[col].values.reshape(-1,1))" |
| 109 | + " train_enc[col] = enc.fit_transform(train_enc[col].values.reshape(-1, 1))\n", |
| 110 | + " test_enc[col] = enc.transform(test_enc[col].values.reshape(-1, 1))" |
101 | 111 | ] |
102 | 112 | }, |
103 | 113 | { |
|
153 | 163 | "outputs": [], |
154 | 164 | "source": [ |
155 | 165 | "from pytorch_tabular import TabularModel\n", |
| 166 | + "from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig\n", |
156 | 167 | "from pytorch_tabular.models import (\n", |
157 | | - " CategoryEmbeddingModelConfig, \n", |
158 | | - " FTTransformerConfig, \n", |
159 | | - " TabNetModelConfig, \n", |
160 | | - " GatedAdditiveTreeEnsembleConfig, \n", |
161 | | - " TabTransformerConfig, \n", |
162 | | - " AutoIntConfig\n", |
| 168 | + " AutoIntConfig,\n", |
| 169 | + " CategoryEmbeddingModelConfig,\n", |
| 170 | + " FTTransformerConfig,\n", |
| 171 | + " GatedAdditiveTreeEnsembleConfig,\n", |
| 172 | + " TabNetModelConfig,\n", |
| 173 | + " TabTransformerConfig,\n", |
163 | 174 | ")\n", |
164 | | - "from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig, ExperimentConfig\n", |
165 | 175 | "from pytorch_tabular.models.common.heads import LinearHeadConfig" |
166 | 176 | ] |
167 | 177 | }, |
|
183 | 193 | "outputs": [], |
184 | 194 | "source": [ |
185 | 195 | "data_config = DataConfig(\n", |
186 | | - " target=target, #target should always be a list.\n", |
| 196 | + " target=target, # target should always be a list.\n", |
187 | 197 | " continuous_cols=num_cols,\n", |
188 | 198 | " categorical_cols=cat_cols,\n", |
189 | 199 | ")\n", |
190 | 200 | "\n", |
191 | 201 | "trainer_config = TrainerConfig(\n", |
192 | | - "# auto_lr_find=True, # Runs the LRFinder to automatically derive a learning rate\n", |
| 202 | + " # auto_lr_find=True, # Runs the LRFinder to automatically derive a learning rate\n", |
193 | 203 | " batch_size=256,\n", |
194 | 204 | " max_epochs=500,\n", |
195 | | - " early_stopping=\"valid_loss\", # Monitor valid_loss for early stopping\n", |
196 | | - " early_stopping_mode = \"min\", # Set the mode as min because for val_loss, lower is better\n", |
197 | | - " early_stopping_patience=5, # No. of epochs of degradation training will wait before terminating\n", |
198 | | - " checkpoints=\"valid_loss\", # Save best checkpoint monitoring val_loss\n", |
199 | | - " load_best=True, # After training, load the best checkpoint\n", |
| 205 | + " early_stopping=\"valid_loss\", # Monitor valid_loss for early stopping\n", |
| 206 | + " early_stopping_mode=\"min\", # Set the mode as min because for val_loss, lower is better\n", |
| 207 | + " early_stopping_patience=5, # No. of epochs of degradation training will wait before terminating\n", |
| 208 | + " checkpoints=\"valid_loss\", # Save best checkpoint monitoring val_loss\n", |
| 209 | + " load_best=True, # After training, load the best checkpoint\n", |
200 | 210 | ")\n", |
201 | 211 | "\n", |
202 | 212 | "optimizer_config = OptimizerConfig()\n", |
203 | 213 | "\n", |
204 | 214 | "head_config = LinearHeadConfig(\n", |
205 | | - " layers=\"\", # No additional layer in head, just a mapping layer to output_dim\n", |
| 215 | + " layers=\"\", # No additional layer in head, just a mapping layer to output_dim\n", |
206 | 216 | " dropout=0.1,\n", |
207 | | - " initialization=\"kaiming\"\n", |
208 | | - ").__dict__ # Convert to dict to pass to the model config (OmegaConf doesn't accept objects)" |
| 217 | + " initialization=\"kaiming\",\n", |
| 218 | + ").__dict__ # Convert to dict to pass to the model config (OmegaConf doesn't accept objects)" |
209 | 219 | ] |
210 | 220 | }, |
211 | 221 | { |
|
442 | 452 | "model_config = CategoryEmbeddingModelConfig(\n", |
443 | 453 | " task=\"classification\",\n", |
444 | 454 | " layers=\"64-32\", # Number of nodes in each layer\n", |
445 | | - " activation=\"ReLU\", # Activation between each layers\n", |
446 | | - " learning_rate = 1e-3,\n", |
447 | | - " head = \"LinearHead\", #Linear Head\n", |
448 | | - " head_config = head_config, # Linear Head Config\n", |
| 455 | + " activation=\"ReLU\", # Activation between each layers\n", |
| 456 | + " learning_rate=1e-3,\n", |
| 457 | + " head=\"LinearHead\", # Linear Head\n", |
| 458 | + " head_config=head_config, # Linear Head Config\n", |
449 | 459 | ")\n", |
450 | 460 | "\n", |
451 | 461 | "tabular_model = TabularModel(\n", |
|
455 | 465 | " trainer_config=trainer_config,\n", |
456 | 466 | ")\n", |
457 | 467 | "tabular_model.fit(train=train)\n", |
458 | | - "tabular_model.evaluate(test)\n" |
| 468 | + "tabular_model.evaluate(test)" |
459 | 469 | ] |
460 | 470 | }, |
461 | 471 | { |
|
709 | 719 | "source": [ |
710 | 720 | "model_config = GatedAdditiveTreeEnsembleConfig(\n", |
711 | 721 | " task=\"classification\",\n", |
712 | | - " learning_rate = 1e-3,\n", |
713 | | - " head = \"LinearHead\", #Linear Head\n", |
714 | | - " head_config = head_config, # Linear Head Config\n", |
| 722 | + " learning_rate=1e-3,\n", |
| 723 | + " head=\"LinearHead\", # Linear Head\n", |
| 724 | + " head_config=head_config, # Linear Head Config\n", |
715 | 725 | ")\n", |
716 | 726 | "\n", |
717 | 727 | "tabular_model = TabularModel(\n", |
|
983 | 993 | "source": [ |
984 | 994 | "model_config = GatedAdditiveTreeEnsembleConfig(\n", |
985 | 995 | " task=\"classification\",\n", |
986 | | - " learning_rate = 1e-3,\n", |
987 | | - " head = \"LinearHead\", #Linear Head\n", |
988 | | - " head_config = head_config, # Linear Head Config\n", |
| 996 | + " learning_rate=1e-3,\n", |
| 997 | + " head=\"LinearHead\", # Linear Head\n", |
| 998 | + " head_config=head_config, # Linear Head Config\n", |
989 | 999 | " gflu_stages=4,\n", |
990 | 1000 | " num_trees=30,\n", |
991 | 1001 | " tree_depth=5,\n", |
992 | | - " chain_trees=False\n", |
| 1002 | + " chain_trees=False,\n", |
993 | 1003 | ")\n", |
994 | 1004 | "\n", |
995 | 1005 | "tabular_model = TabularModel(\n", |
|
1265 | 1275 | "source": [ |
1266 | 1276 | "model_config = FTTransformerConfig(\n", |
1267 | 1277 | " task=\"classification\",\n", |
1268 | | - " learning_rate = 1e-3,\n", |
1269 | | - " head = \"LinearHead\", #Linear Head\n", |
1270 | | - " head_config = head_config, # Linear Head Config\n", |
| 1278 | + " learning_rate=1e-3,\n", |
| 1279 | + " head=\"LinearHead\", # Linear Head\n", |
| 1280 | + " head_config=head_config, # Linear Head Config\n", |
1271 | 1281 | ")\n", |
1272 | 1282 | "\n", |
1273 | 1283 | "tabular_model = TabularModel(\n", |
|
1543 | 1553 | "source": [ |
1544 | 1554 | "model_config = TabTransformerConfig(\n", |
1545 | 1555 | " task=\"classification\",\n", |
1546 | | - " learning_rate = 1e-3,\n", |
1547 | | - " head = \"LinearHead\", #Linear Head\n", |
1548 | | - " head_config = head_config, # Linear Head Config\n", |
| 1556 | + " learning_rate=1e-3,\n", |
| 1557 | + " head=\"LinearHead\", # Linear Head\n", |
| 1558 | + " head_config=head_config, # Linear Head Config\n", |
1549 | 1559 | ")\n", |
1550 | 1560 | "\n", |
1551 | 1561 | "tabular_model = TabularModel(\n", |
|
1819 | 1829 | "source": [ |
1820 | 1830 | "model_config = AutoIntConfig(\n", |
1821 | 1831 | " task=\"classification\",\n", |
1822 | | - " learning_rate = 1e-3,\n", |
1823 | | - " head = \"LinearHead\", #Linear Head\n", |
1824 | | - " head_config = head_config, # Linear Head Config\n", |
| 1832 | + " learning_rate=1e-3,\n", |
| 1833 | + " head=\"LinearHead\", # Linear Head\n", |
| 1834 | + " head_config=head_config, # Linear Head Config\n", |
1825 | 1835 | ")\n", |
1826 | 1836 | "\n", |
1827 | 1837 | "tabular_model = TabularModel(\n", |
|
2095 | 2105 | "source": [ |
2096 | 2106 | "model_config = TabNetModelConfig(\n", |
2097 | 2107 | " task=\"classification\",\n", |
2098 | | - " learning_rate = 1e-3,\n", |
2099 | | - " head = \"LinearHead\", #Linear Head\n", |
2100 | | - " head_config = head_config, # Linear Head Config\n", |
| 2108 | + " learning_rate=1e-3,\n", |
| 2109 | + " head=\"LinearHead\", # Linear Head\n", |
| 2110 | + " head_config=head_config, # Linear Head Config\n", |
2101 | 2111 | ")\n", |
2102 | 2112 | "\n", |
2103 | 2113 | "tabular_model = TabularModel(\n", |
|
0 commit comments