1+ import random
2+ import numpy as np
3+ from pathlib import Path
4+
5+ from src .algorithms .sarsa_semi_gradient import SARSAnConfig , SARSAn
6+ from src .algorithms .trainer import Trainer
7+ from src .datasets .datasets_loaders import MockSubjectsLoader
8+ from src .spaces .action_space import ActionSpace
9+ from src .spaces .actions import ActionIdentity , ActionStringGeneralize , ActionNumericBinGeneralize
10+ from src .utils .reward_manager import RewardManager
11+ from src .utils .serial_hierarchy import SerialHierarchy
12+ from src .policies .epsilon_greedy_policy import EpsilonGreedyPolicy , EpsilonDecayOption
13+ from src .policies .softmax_policy import SoftMaxPolicy
14+ from src .utils .numeric_distance_type import NumericDistanceType
15+ from src .utils .string_distance_calculator import StringDistanceType
16+ from src .utils .distortion_calculator import DistortionCalculationType , DistortionCalculator
17+ from src .spaces .discrete_state_environment import DiscreteStateEnvironment , DiscreteEnvConfig
18+ from src .spaces .tiled_environment import TiledEnv
19+ from src .utils .iteration_control import IterationControl
20+ from src .utils .plot_utils import plot_running_avg
21+ from src .utils import INFO
22+
23+ # configuration params
24+ EPS = 1.0
25+ EPSILON_DECAY_OPTION = EpsilonDecayOption .CONSTANT_RATE #.INVERSE_STEP
26+ EPSILON_DECAY_FACTOR = 0.01
27+ GAMMA = 0.99
28+ ALPHA = 0.1
29+ N_EPISODES = 1001
30+ N_ITRS_PER_EPISODE = 30
31+ N_STATES = 10
32+ REWARD_FACTOR = 0.95
33+ PUNISH_FACTOR = 2.0
34+
35+
36+ # fix the rewards. Assume that any average distortion in
37+ # (0.4, 0.7) suits us
38+ MAX_DISTORTION = 0.7
39+ MIN_DISTORTION = 0.3
40+ OUT_OF_MAX_BOUND_REWARD = - 1.0
41+ OUT_OF_MIN_BOUND_REWARD = - 1.0
42+ IN_BOUNDS_REWARD = 5.0
43+ OUTPUT_MSG_FREQUENCY = 100
44+ N_ROUNDS_BELOW_MIN_DISTORTION = 10
45+ SAVE_DISTORTED_SETS_DIR = "/home/alex/qi3/drl_anonymity/src/examples/nstep_semi_grad_sarsa_learn_distorted_sets/distorted_set"
46+
47+
48+ def get_ethinicity_hierarchy ():
49+ ethnicity_hierarchy = SerialHierarchy (values = {})
50+
51+ ethnicity_hierarchy ["Mixed White/Asian" ] = "White/Asian"
52+ ethnicity_hierarchy ["White/Asian" ] = "Mixed"
53+
54+ ethnicity_hierarchy ["Chinese" ] = "Asian"
55+ ethnicity_hierarchy ["Indian" ] = "Asian"
56+ ethnicity_hierarchy ["Mixed White/Black African" ] = "White/Black"
57+ ethnicity_hierarchy ["White/Black" ] = "Mixed"
58+
59+ ethnicity_hierarchy ["Black African" ] = "African"
60+ ethnicity_hierarchy ["African" ] = "Black"
61+ ethnicity_hierarchy ["Asian other" ] = "Asian"
62+ ethnicity_hierarchy ["Black other" ] = "Black"
63+ ethnicity_hierarchy ["Mixed White/Black Caribbean" ] = "White/Black"
64+ ethnicity_hierarchy ["White/Black" ] = "Mixed"
65+
66+ ethnicity_hierarchy ["Mixed other" ] = "Mixed"
67+ ethnicity_hierarchy ["Arab" ] = "Asian"
68+ ethnicity_hierarchy ["White Irish" ] = "Irish"
69+ ethnicity_hierarchy ["Irish" ] = "European"
70+ ethnicity_hierarchy ["Not stated" ] = "Not stated"
71+ ethnicity_hierarchy ["White Gypsy/Traveller" ] = "White"
72+ ethnicity_hierarchy ["White British" ] = "British"
73+ ethnicity_hierarchy ["British" ] = "European"
74+ ethnicity_hierarchy ["Bangladeshi" ] = "Asian"
75+ ethnicity_hierarchy ["White other" ] = "White"
76+ ethnicity_hierarchy ["Black Caribbean" ] = "Caribbean"
77+ ethnicity_hierarchy ["Caribbean" ] = "Black"
78+ ethnicity_hierarchy ["Pakistani" ] = "Asian"
79+
80+ ethnicity_hierarchy ["European" ] = "European"
81+ ethnicity_hierarchy ["Mixed" ] = "Mixed"
82+ ethnicity_hierarchy ["Asian" ] = "Asian"
83+ ethnicity_hierarchy ["Black" ] = "Black"
84+ ethnicity_hierarchy ["White" ] = "White"
85+ return ethnicity_hierarchy
86+
87+
88+ def load_dataset () -> MockSubjectsLoader :
89+ # specify the columns to drop
90+ drop_columns = MockSubjectsLoader .FEATURES_DROP_NAMES + ["preventative_treatment" , "gender" ,
91+ "education" , "mutation_status" ]
92+ MockSubjectsLoader .FEATURES_DROP_NAMES = drop_columns
93+
94+ # do a salary normalization so that we work with
95+ # salaries in [0, 1] this is needed as we will
96+ # be using normalized distances
97+ MockSubjectsLoader .NORMALIZED_COLUMNS = ["salary" ]
98+
99+ # specify the columns to use
100+ MockSubjectsLoader .COLUMNS_TYPES = {"ethnicity" : str , "salary" : float , "diagnosis" : int }
101+ ds = MockSubjectsLoader ()
102+
103+ assert ds .n_columns == 3 , "Invalid number of columns {0} not equal to 3" .format (ds .n_columns )
104+
105+ return ds
106+
107+ if __name__ == '__main__' :
108+
109+ # set the seed for random engine
110+ random .seed (42 )
111+
112+ ds = load_dataset ()
113+
114+ # create bins for the salary generalization
115+ unique_salary = ds .get_column_unique_values (col_name = "salary" )
116+ unique_salary .sort ()
117+
118+ # modify slightly the max value because
119+ # we get out of bounds for the maximum salary
120+ bins = np .linspace (unique_salary [0 ], unique_salary [- 1 ] + 1 , N_STATES )
121+
122+ # establish the action space. For every column
123+ # we assume three actions except for the ```diagnosis```
124+ # which we do not alter
125+ action_space = ActionSpace (n = 5 )
126+ action_space .add_many (ActionIdentity (column_name = "ethnicity" ),
127+ ActionStringGeneralize (column_name = "ethnicity" ,
128+ generalization_table = get_ethinicity_hierarchy ()),
129+ ActionIdentity (column_name = "salary" ),
130+ ActionNumericBinGeneralize (column_name = "salary" , generalization_table = bins ),
131+ ActionIdentity (column_name = "diagnosis" ))
132+
133+ action_space .shuffle ()
134+
135+ env_config = DiscreteEnvConfig ()
136+
137+ env_config .action_space = action_space
138+ env_config .reward_manager = RewardManager (bounds = (MIN_DISTORTION , MAX_DISTORTION ),
139+ out_of_max_bound_reward = OUT_OF_MAX_BOUND_REWARD ,
140+ out_of_min_bound_reward = OUT_OF_MIN_BOUND_REWARD ,
141+ in_bounds_reward = IN_BOUNDS_REWARD )
142+ env_config .data_set = ds
143+ env_config .gamma = GAMMA
144+ env_config .max_distortion = MAX_DISTORTION
145+ env_config .min_distortion = MIN_DISTORTION
146+ env_config .n_states = N_STATES
147+ env_config .n_rounds_below_min_distortion = N_ROUNDS_BELOW_MIN_DISTORTION
148+ env_config .distorted_set_path = Path (SAVE_DISTORTED_SETS_DIR )
149+ env_config .distortion_calculator = DistortionCalculator (
150+ numeric_column_distortion_metric_type = NumericDistanceType .L2_AVG ,
151+ string_column_distortion_metric_type = StringDistanceType .COSINE_NORMALIZE ,
152+ dataset_distortion_type = DistortionCalculationType .SUM )
153+ env_config .reward_factor = REWARD_FACTOR
154+ env_config .punish_factor = PUNISH_FACTOR
155+
156+ # create the environment
157+ env = DiscreteStateEnvironment (env_config = env_config )
158+
159+ # we will use a tiled environment in this example
160+ tiled_env = TiledEnv ()
161+ env .reset ()
162+
163+ # save the data before distortion so that we can
164+ # later load it on ARX
165+ env .save_current_dataset (episode_index = - 1 , save_index = False )
166+
167+ # configuration for the Q-learner
168+ algo_config = QLearnConfig ()
169+ algo_config .n_itrs_per_episode = N_ITRS_PER_EPISODE
170+ algo_config .gamma = GAMMA
171+ algo_config .alpha = ALPHA
172+ #algo_config.policy = SoftMaxPolicy(n_actions=len(action_space), tau=1.2)
173+ algo_config .policy = EpsilonGreedyPolicy (eps = EPS , env = env ,decay_op = EPSILON_DECAY_OPTION ,
174+ epsilon_decay_factor = EPSILON_DECAY_FACTOR )
175+
176+ # the learner we want to train
177+ agent = QLearning (algo_config = algo_config )
178+
179+ configuration = {"n_episodes" : N_EPISODES , "output_msg_frequency" : OUTPUT_MSG_FREQUENCY }
180+
181+ # create a trainer to train the Qlearning agent
182+ trainer = Trainer (env = env , agent = agent , configuration = configuration )
183+ trainer .train ()
184+
185+ # avg_rewards = trainer.avg_rewards()
186+ avg_rewards = trainer .total_rewards
187+ plot_running_avg (avg_rewards , steps = 100 ,
188+ xlabel = "Episodes" , ylabel = "Reward" ,
189+ title = "Running reward average over 100 episodes" )
190+
191+ avg_episode_dist = np .array (trainer .total_distortions )
192+ print ("{0} Max/Min distortion {1}/{2}" .format (INFO , np .max (avg_episode_dist ), np .min (avg_episode_dist )))
193+
194+ plot_running_avg (avg_episode_dist , steps = 100 ,
195+ xlabel = "Episodes" , ylabel = "Distortion" ,
196+ title = "Running distortion average over 100 episodes" )
197+
198+ print ("=============================================" )
199+ print ("{0} Generating distorted dataset" .format (INFO ))
200+ # Let's play
201+ env .reset ()
202+
203+ stop_criterion = IterationControl (n_itrs = 10 , min_dist = MIN_DISTORTION , max_dist = MAX_DISTORTION )
204+ agent .play (env = env , stop_criterion = stop_criterion )
205+ env .save_current_dataset (episode_index = - 2 , save_index = False )
206+ print ("{0} Done...." .format (INFO ))
207+ print ("=============================================" )
0 commit comments