Skip to content

Commit 53f2d10

Browse files
committed
#32 Add hierarchy creation for ARX
1 parent cf104c5 commit 53f2d10

File tree

1 file changed

+66
-0
lines changed

1 file changed

+66
-0
lines changed
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
"""
2+
This example shows how to create hierarchies suitable to
3+
be loaded in the ARX tool
4+
"""
5+
import csv
6+
from src.datasets.datasets_loaders import MockSubjectsLoader
7+
8+
9+
def get_ethnicity_hierarchy():
10+
11+
ethnicity_hierarchy = {}
12+
13+
ethnicity_hierarchy["Mixed White/Asian"] = ["White/Asian", "Mixed"]
14+
ethnicity_hierarchy["Chinese"] = ["Asian", "Asian"]
15+
ethnicity_hierarchy["Indian"] = ["Asian", "Asian"]
16+
ethnicity_hierarchy["Mixed White/Black African"] = ["White/Black", "Mixed"]
17+
ethnicity_hierarchy["Black African"] = ["Black", "African"]
18+
ethnicity_hierarchy["Asian other"] = ["Asian", "Other"]
19+
ethnicity_hierarchy["Black other"] = ["Black", "Other"]
20+
ethnicity_hierarchy["Mixed White/Black Caribbean"] = ["White/Black", "Mixed"]
21+
ethnicity_hierarchy["Mixed other"] = ["Mixed", "Mixe"]
22+
ethnicity_hierarchy["Arab"] = ["Asian", "Asian"]
23+
ethnicity_hierarchy["White Irish"] = ["Irish", "European"]
24+
ethnicity_hierarchy["Not stated"] = ["Not stated", "Not stated"]
25+
ethnicity_hierarchy["White Gypsy/Traveller"] = ["White", "White"]
26+
ethnicity_hierarchy["White British"] = ["British", "European"]
27+
ethnicity_hierarchy["Bangladeshi"] = ["Asian", "Asian"]
28+
ethnicity_hierarchy["White other"] = ["White", "White"]
29+
ethnicity_hierarchy["Black Caribbean"] = ["Black", "Caribbean"]
30+
ethnicity_hierarchy["Pakistani"] = ["Asian", "Asian"]
31+
32+
return ethnicity_hierarchy
33+
34+
35+
if __name__ == '__main__':
36+
37+
# specify the columns to drop
38+
drop_columns = MockSubjectsLoader.FEATURES_DROP_NAMES + ["preventative_treatment", "gender",
39+
"education", "mutation_status"]
40+
MockSubjectsLoader.FEATURES_DROP_NAMES = drop_columns
41+
42+
# do a salary normalization
43+
MockSubjectsLoader.NORMALIZED_COLUMNS = ["salary"]
44+
45+
# specify the columns to use
46+
MockSubjectsLoader.COLUMNS_TYPES = {"ethnicity": str, "salary": float, "diagnosis": int}
47+
ds = MockSubjectsLoader()
48+
49+
ehnicity_map = get_ethnicity_hierarchy()
50+
# get the ethincity column loop over
51+
# the values and create the hierarchy file
52+
filename = "/home/alex/qi3/drl_anonymity/data/hierarchies/ethnicity_hierarchy.csv"
53+
with open(filename, 'w') as fh:
54+
writer = csv.writer(fh, delimiter=",")
55+
56+
ethnicity_column = ds.get_column(col_name="ethnicity").values
57+
58+
for val in ethnicity_column:
59+
60+
if val not in ehnicity_map:
61+
raise ValueError("Value {0} not in ethnicity map")
62+
63+
row = [val]
64+
row.extend(ehnicity_map[val])
65+
writer.writerow(row)
66+

0 commit comments

Comments
 (0)