Skip to content

Commit 75b21c4

Browse files
[pre-commit.ci] pre-commit suggestions (#417)
* [pre-commit.ci] pre-commit suggestions updates: - [github.com/pre-commit/pre-commit-hooks: v4.5.0 → v4.6.0](pre-commit/pre-commit-hooks@v4.5.0...v4.6.0) - [github.com/astral-sh/ruff-pre-commit: v0.2.2 → v0.6.4](astral-sh/ruff-pre-commit@v0.2.2...v0.6.4) * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fixed --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> Co-authored-by: Jirka B <j.borovec+github@gmail.com>
1 parent 918ecaa commit 75b21c4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+145
-76
lines changed

.pre-commit-config.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ ci:
88

99
repos:
1010
- repo: https://github.com/pre-commit/pre-commit-hooks
11-
rev: v4.5.0
11+
rev: v4.6.0
1212
hooks:
1313
- id: end-of-file-fixer
1414
exclude: "setup.cfg"
@@ -48,15 +48,15 @@ repos:
4848
)
4949
5050
- repo: https://github.com/astral-sh/ruff-pre-commit
51-
rev: v0.2.2
51+
rev: v0.6.4
5252
hooks:
5353
- id: ruff
5454
args: ["--fix"]
5555
- id: ruff-format
5656
- id: ruff
5757

5858
- repo: https://github.com/pre-commit/mirrors-prettier
59-
rev: v4.0.0-alpha.8
59+
rev: v3.1.0
6060
hooks:
6161
- id: prettier
6262
files: \.(json|yml|yaml|toml)

examples/PyTorch Tabular with Bank Marketing Dataset.ipynb

Lines changed: 61 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,9 @@
88
"outputs": [],
99
"source": [
1010
"import numpy as np\n",
11-
"import pandas as pd\n",
1211
"from sklearn.datasets import fetch_openml\n",
13-
"from sklearn.model_selection import train_test_split\n",
14-
"from sklearn.metrics import accuracy_score, log_loss"
12+
"from sklearn.metrics import accuracy_score, log_loss\n",
13+
"from sklearn.model_selection import train_test_split"
1514
]
1615
},
1716
{
@@ -55,12 +54,23 @@
5554
"metadata": {},
5655
"outputs": [],
5756
"source": [
58-
"cat_cols = ['job', 'marital', 'education', 'default', 'housing',\n",
59-
" 'loan', 'contact', 'day', 'month', 'campaign',\n",
60-
" 'previous', 'poutcome']\n",
57+
"cat_cols = [\n",
58+
" \"job\",\n",
59+
" \"marital\",\n",
60+
" \"education\",\n",
61+
" \"default\",\n",
62+
" \"housing\",\n",
63+
" \"loan\",\n",
64+
" \"contact\",\n",
65+
" \"day\",\n",
66+
" \"month\",\n",
67+
" \"campaign\",\n",
68+
" \"previous\",\n",
69+
" \"poutcome\",\n",
70+
"]\n",
6171
"\n",
62-
"num_cols = ['age', 'balance', 'duration', 'pdays']\n",
63-
"target=[\"y\"]"
72+
"num_cols = [\"age\", \"balance\", \"duration\", \"pdays\"]\n",
73+
"target = [\"y\"]"
6474
]
6575
},
6676
{
@@ -96,8 +106,8 @@
96106
"test_enc = test.copy()\n",
97107
"for col in cat_cols:\n",
98108
" enc = OrdinalEncoder(handle_unknown=\"use_encoded_value\", encoded_missing_value=np.nan, unknown_value=np.nan)\n",
99-
" train_enc[col] = enc.fit_transform(train_enc[col].values.reshape(-1,1))\n",
100-
" test_enc[col] = enc.transform(test_enc[col].values.reshape(-1,1))"
109+
" train_enc[col] = enc.fit_transform(train_enc[col].values.reshape(-1, 1))\n",
110+
" test_enc[col] = enc.transform(test_enc[col].values.reshape(-1, 1))"
101111
]
102112
},
103113
{
@@ -153,15 +163,15 @@
153163
"outputs": [],
154164
"source": [
155165
"from pytorch_tabular import TabularModel\n",
166+
"from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig\n",
156167
"from pytorch_tabular.models import (\n",
157-
" CategoryEmbeddingModelConfig, \n",
158-
" FTTransformerConfig, \n",
159-
" TabNetModelConfig, \n",
160-
" GatedAdditiveTreeEnsembleConfig, \n",
161-
" TabTransformerConfig, \n",
162-
" AutoIntConfig\n",
168+
" AutoIntConfig,\n",
169+
" CategoryEmbeddingModelConfig,\n",
170+
" FTTransformerConfig,\n",
171+
" GatedAdditiveTreeEnsembleConfig,\n",
172+
" TabNetModelConfig,\n",
173+
" TabTransformerConfig,\n",
163174
")\n",
164-
"from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig, ExperimentConfig\n",
165175
"from pytorch_tabular.models.common.heads import LinearHeadConfig"
166176
]
167177
},
@@ -183,29 +193,29 @@
183193
"outputs": [],
184194
"source": [
185195
"data_config = DataConfig(\n",
186-
" target=target, #target should always be a list.\n",
196+
" target=target, # target should always be a list.\n",
187197
" continuous_cols=num_cols,\n",
188198
" categorical_cols=cat_cols,\n",
189199
")\n",
190200
"\n",
191201
"trainer_config = TrainerConfig(\n",
192-
"# auto_lr_find=True, # Runs the LRFinder to automatically derive a learning rate\n",
202+
" # auto_lr_find=True, # Runs the LRFinder to automatically derive a learning rate\n",
193203
" batch_size=256,\n",
194204
" max_epochs=500,\n",
195-
" early_stopping=\"valid_loss\", # Monitor valid_loss for early stopping\n",
196-
" early_stopping_mode = \"min\", # Set the mode as min because for val_loss, lower is better\n",
197-
" early_stopping_patience=5, # No. of epochs of degradation training will wait before terminating\n",
198-
" checkpoints=\"valid_loss\", # Save best checkpoint monitoring val_loss\n",
199-
" load_best=True, # After training, load the best checkpoint\n",
205+
" early_stopping=\"valid_loss\", # Monitor valid_loss for early stopping\n",
206+
" early_stopping_mode=\"min\", # Set the mode as min because for val_loss, lower is better\n",
207+
" early_stopping_patience=5, # No. of epochs of degradation training will wait before terminating\n",
208+
" checkpoints=\"valid_loss\", # Save best checkpoint monitoring val_loss\n",
209+
" load_best=True, # After training, load the best checkpoint\n",
200210
")\n",
201211
"\n",
202212
"optimizer_config = OptimizerConfig()\n",
203213
"\n",
204214
"head_config = LinearHeadConfig(\n",
205-
" layers=\"\", # No additional layer in head, just a mapping layer to output_dim\n",
215+
" layers=\"\", # No additional layer in head, just a mapping layer to output_dim\n",
206216
" dropout=0.1,\n",
207-
" initialization=\"kaiming\"\n",
208-
").__dict__ # Convert to dict to pass to the model config (OmegaConf doesn't accept objects)"
217+
" initialization=\"kaiming\",\n",
218+
").__dict__ # Convert to dict to pass to the model config (OmegaConf doesn't accept objects)"
209219
]
210220
},
211221
{
@@ -442,10 +452,10 @@
442452
"model_config = CategoryEmbeddingModelConfig(\n",
443453
" task=\"classification\",\n",
444454
" layers=\"64-32\", # Number of nodes in each layer\n",
445-
" activation=\"ReLU\", # Activation between each layers\n",
446-
" learning_rate = 1e-3,\n",
447-
" head = \"LinearHead\", #Linear Head\n",
448-
" head_config = head_config, # Linear Head Config\n",
455+
" activation=\"ReLU\", # Activation between each layers\n",
456+
" learning_rate=1e-3,\n",
457+
" head=\"LinearHead\", # Linear Head\n",
458+
" head_config=head_config, # Linear Head Config\n",
449459
")\n",
450460
"\n",
451461
"tabular_model = TabularModel(\n",
@@ -455,7 +465,7 @@
455465
" trainer_config=trainer_config,\n",
456466
")\n",
457467
"tabular_model.fit(train=train)\n",
458-
"tabular_model.evaluate(test)\n"
468+
"tabular_model.evaluate(test)"
459469
]
460470
},
461471
{
@@ -709,9 +719,9 @@
709719
"source": [
710720
"model_config = GatedAdditiveTreeEnsembleConfig(\n",
711721
" task=\"classification\",\n",
712-
" learning_rate = 1e-3,\n",
713-
" head = \"LinearHead\", #Linear Head\n",
714-
" head_config = head_config, # Linear Head Config\n",
722+
" learning_rate=1e-3,\n",
723+
" head=\"LinearHead\", # Linear Head\n",
724+
" head_config=head_config, # Linear Head Config\n",
715725
")\n",
716726
"\n",
717727
"tabular_model = TabularModel(\n",
@@ -983,13 +993,13 @@
983993
"source": [
984994
"model_config = GatedAdditiveTreeEnsembleConfig(\n",
985995
" task=\"classification\",\n",
986-
" learning_rate = 1e-3,\n",
987-
" head = \"LinearHead\", #Linear Head\n",
988-
" head_config = head_config, # Linear Head Config\n",
996+
" learning_rate=1e-3,\n",
997+
" head=\"LinearHead\", # Linear Head\n",
998+
" head_config=head_config, # Linear Head Config\n",
989999
" gflu_stages=4,\n",
9901000
" num_trees=30,\n",
9911001
" tree_depth=5,\n",
992-
" chain_trees=False\n",
1002+
" chain_trees=False,\n",
9931003
")\n",
9941004
"\n",
9951005
"tabular_model = TabularModel(\n",
@@ -1265,9 +1275,9 @@
12651275
"source": [
12661276
"model_config = FTTransformerConfig(\n",
12671277
" task=\"classification\",\n",
1268-
" learning_rate = 1e-3,\n",
1269-
" head = \"LinearHead\", #Linear Head\n",
1270-
" head_config = head_config, # Linear Head Config\n",
1278+
" learning_rate=1e-3,\n",
1279+
" head=\"LinearHead\", # Linear Head\n",
1280+
" head_config=head_config, # Linear Head Config\n",
12711281
")\n",
12721282
"\n",
12731283
"tabular_model = TabularModel(\n",
@@ -1543,9 +1553,9 @@
15431553
"source": [
15441554
"model_config = TabTransformerConfig(\n",
15451555
" task=\"classification\",\n",
1546-
" learning_rate = 1e-3,\n",
1547-
" head = \"LinearHead\", #Linear Head\n",
1548-
" head_config = head_config, # Linear Head Config\n",
1556+
" learning_rate=1e-3,\n",
1557+
" head=\"LinearHead\", # Linear Head\n",
1558+
" head_config=head_config, # Linear Head Config\n",
15491559
")\n",
15501560
"\n",
15511561
"tabular_model = TabularModel(\n",
@@ -1819,9 +1829,9 @@
18191829
"source": [
18201830
"model_config = AutoIntConfig(\n",
18211831
" task=\"classification\",\n",
1822-
" learning_rate = 1e-3,\n",
1823-
" head = \"LinearHead\", #Linear Head\n",
1824-
" head_config = head_config, # Linear Head Config\n",
1832+
" learning_rate=1e-3,\n",
1833+
" head=\"LinearHead\", # Linear Head\n",
1834+
" head_config=head_config, # Linear Head Config\n",
18251835
")\n",
18261836
"\n",
18271837
"tabular_model = TabularModel(\n",
@@ -2095,9 +2105,9 @@
20952105
"source": [
20962106
"model_config = TabNetModelConfig(\n",
20972107
" task=\"classification\",\n",
2098-
" learning_rate = 1e-3,\n",
2099-
" head = \"LinearHead\", #Linear Head\n",
2100-
" head_config = head_config, # Linear Head Config\n",
2108+
" learning_rate=1e-3,\n",
2109+
" head=\"LinearHead\", # Linear Head\n",
2110+
" head_config=head_config, # Linear Head Config\n",
21012111
")\n",
21022112
"\n",
21032113
"tabular_model = TabularModel(\n",

examples/__only_for_dev__/to_test_classification.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from pathlib import Path
22

33
import pandas as pd
4+
from sklearn.model_selection import train_test_split
45

56
# from torch.utils import data
67
from pytorch_tabular.config import DataConfig, ExperimentConfig, OptimizerConfig, TrainerConfig
@@ -9,7 +10,6 @@
910

1011
# import wget
1112
from pytorch_tabular.utils import get_class_weighted_cross_entropy
12-
from sklearn.model_selection import train_test_split
1313

1414
# torch.manual_seed(0)
1515
# np.random.seed(0)

examples/__only_for_dev__/to_test_node.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,11 @@
33

44
import numpy as np
55
import pandas as pd
6+
from sklearn.datasets import fetch_california_housing, fetch_covtype
7+
68
from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig
79
from pytorch_tabular.models.node import NodeConfig
810
from pytorch_tabular.tabular_model import TabularModel
9-
from sklearn.datasets import fetch_california_housing, fetch_covtype
1011

1112

1213
def regression_data():

examples/__only_for_dev__/to_test_regression.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
import pandas as pd
22
import torch
3+
from sklearn.datasets import fetch_california_housing
4+
35
from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig
46
from pytorch_tabular.models.category_embedding.config import CategoryEmbeddingModelConfig
57
from pytorch_tabular.tabular_model import TabularModel
6-
from sklearn.datasets import fetch_california_housing
78

89
# from pytorch_tabular.models.mixture_density import (
910
# CategoryEmbeddingMDNConfig,

examples/__only_for_dev__/to_test_regression_custom_models.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
import torch
66
import torch.nn as nn
77
from omegaconf import DictConfig
8+
from sklearn.datasets import fetch_california_housing
9+
810
from pytorch_tabular.config import DataConfig, ModelConfig, OptimizerConfig, TrainerConfig
911

1012
# from pytorch_tabular.models.deep_gmm import (
@@ -14,7 +16,6 @@
1416

1517
# from pytorch_tabular.models.node import utils as utils
1618
from pytorch_tabular.tabular_model import TabularModel
17-
from sklearn.datasets import fetch_california_housing
1819

1920

2021
@dataclass

examples/covertype_classification.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,12 @@
22

33
import pandas as pd
44
import wget
5+
from sklearn.model_selection import train_test_split
6+
57
from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig
68
from pytorch_tabular.models import CategoryEmbeddingModelConfig
79
from pytorch_tabular.models.common.heads import LinearHeadConfig
810
from pytorch_tabular.tabular_model import TabularModel
9-
from sklearn.model_selection import train_test_split
1011

1112
BASE_DIR = Path.home().joinpath("data")
1213
datafile = BASE_DIR.joinpath("covtype.data.gz")

examples/covertype_classification_using_yaml.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22

33
import pandas as pd
44
import wget
5-
from pytorch_tabular.tabular_model import TabularModel
65
from sklearn.model_selection import train_test_split
76

7+
from pytorch_tabular.tabular_model import TabularModel
8+
89
BASE_DIR = Path.home().joinpath("data")
910
datafile = BASE_DIR.joinpath("covtype.data.gz")
1011
datafile.parent.mkdir(parents=True, exist_ok=True)

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#!/usr/bin/env python
22

33
"""The setup script."""
4+
45
import os
56

67
from setuptools import find_packages, setup

src/pytorch_tabular/categorical_encoders.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# For license information, see LICENSE.TXT
44
# Modified https://github.com/tcassou/mlencoders/blob/master/mlencoders/base_encoder.py to suit NN encoding
55
"""Category Encoders."""
6+
67
from pandas import DataFrame, Series, unique
78

89
try:

0 commit comments

Comments
 (0)