1+ """module preprocess_utils. Specifies utilities for
2+ preprocessing a data set.
3+
4+ """
15import csv
26import pandas as pd
37from pathlib import Path
48from typing import List
59
610
7- def read_csv (filename : Path , features_drop_names : List [str ], names : List [str ], delimiter = ',' ):
8- """
9- Read the csv file specified at the given filename
10- :param filename: The path to the filw to read
11- :param features_drop_names: Features to drop
12- :param names: Names of columns
13- :return:
11+ def read_csv (filename : Path , features_drop_names : List [str ], names : List [str ], delimiter = ',' ) -> pd .DataFrame :
12+
13+ """Read the csv file specified at the given filename
14+
15+ Parameters
16+ ----------
17+ filename: Filename to read
18+ features_drop_names: Which columns to drop
19+ names: Column names
20+ delimiter: file delimiter
21+
22+ Returns
23+ -------
24+
25+ A pandas DataFrame
1426 """
1527
28+
1629 df = pd .read_csv (filepath_or_buffer = filename , sep = delimiter , header = 0 , names = names )
1730
1831 if len (features_drop_names ) != 0 :
@@ -25,14 +38,20 @@ def read_csv(filename: Path, features_drop_names: List[str], names: List[str], d
2538
2639
2740def replace (ds : pd .DataFrame , options : dict ) -> pd .DataFrame :
28- """
29- Replace the values in the given data set according to the passed
41+ """Replace the values in the given data set according to the passed
3042 options. The options should specify for each column the values
3143 to be changed and the corresponding values to set
32- :param ds: The dataframe to replace
33- :param options:
34- :return: None
44+
45+ Parameters
46+ ----------
47+ ds
48+ options
49+
50+ Returns
51+ -------
52+
3553 """
54+
3655 for col in options :
3756
3857 # get the values to change for each column
@@ -49,5 +68,16 @@ def replace(ds: pd.DataFrame, options: dict) -> pd.DataFrame:
4968
5069
5170def change_column_types (ds , column_types ) -> pd .DataFrame :
71+ """Change the column type
72+
73+ Parameters
74+ ----------
75+ ds
76+ column_types
77+
78+ Returns
79+ -------
80+
81+ """
5282 ds = ds .astype (dtype = column_types )
5383 return ds
0 commit comments