Skip to content

Commit 55c989a

Browse files
authored
Merge pull request #57 from pockerman/fix_documentation
Fix documentation
2 parents fc616ea + 3f5a77f commit 55c989a

14 files changed

+798
-101
lines changed

build_sphinx_doc.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
#sphinx-quickstart docs
22

3-
sphinx-apidoc -f -o docs/source docs/projectdir
4-
#sphinx-build -b html docs/source/ docs/build/html
3+
#sphinx-apidoc -f -o docs/source docs/source/API
4+
sphinx-build -b html docs/source/ docs/build/html

docs/requirements.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
numpy
2+
gym
3+
torch
Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,230 @@
1+
import random
2+
import numpy as np
3+
from pathlib import Path
4+
5+
from src.algorithms.sarsa_semi_gradient import SARSAnConfig, SARSAn
6+
from src.algorithms.q_estimator import QEstimator
7+
from src.algorithms.trainer import Trainer
8+
from src.datasets.datasets_loaders import MockSubjectsLoader
9+
from src.spaces.action_space import ActionSpace
10+
from src.spaces.actions import ActionIdentity, ActionStringGeneralize, ActionNumericBinGeneralize
11+
from src.utils.reward_manager import RewardManager
12+
from src.utils.serial_hierarchy import SerialHierarchy
13+
from src.policies.epsilon_greedy_policy import EpsilonGreedyPolicy, EpsilonDecayOption
14+
from src.policies.softmax_policy import SoftMaxPolicy
15+
from src.utils.numeric_distance_type import NumericDistanceType
16+
from src.utils.string_distance_calculator import StringDistanceType
17+
from src.utils.distortion_calculator import DistortionCalculationType, DistortionCalculator
18+
from src.spaces.discrete_state_environment import DiscreteStateEnvironment, DiscreteEnvConfig
19+
from src.spaces.tiled_environment import TiledEnv
20+
from src.utils.iteration_control import IterationControl
21+
from src.utils.plot_utils import plot_running_avg
22+
from src.utils import INFO
23+
24+
# configuration params
25+
EPS = 1.0
26+
EPSILON_DECAY_OPTION = EpsilonDecayOption.CONSTANT_RATE #.INVERSE_STEP
27+
EPSILON_DECAY_FACTOR = 0.01
28+
GAMMA = 0.99
29+
ALPHA = 0.1
30+
N_EPISODES = 1001
31+
N_ITRS_PER_EPISODE = 30
32+
N_STATES = 10
33+
REWARD_FACTOR = 0.95
34+
PUNISH_FACTOR = 2.0
35+
36+
37+
# fix the rewards. Assume that any average distortion in
38+
# (0.4, 0.7) suits us
39+
MAX_DISTORTION = 0.7
40+
MIN_DISTORTION = 0.3
41+
OUT_OF_MAX_BOUND_REWARD = -1.0
42+
OUT_OF_MIN_BOUND_REWARD = -1.0
43+
IN_BOUNDS_REWARD = 5.0
44+
OUTPUT_MSG_FREQUENCY = 100
45+
N_ROUNDS_BELOW_MIN_DISTORTION = 10
46+
SAVE_DISTORTED_SETS_DIR = "/home/alex/qi3/drl_anonymity/src/examples/nstep_semi_grad_sarsa_learn_distorted_sets/distorted_set"
47+
NUM_TILINGS = 5
48+
TILING_DIM = 8
49+
BOOTSTRAP_N = 6
50+
51+
# Step size is interpreted as the fraction of the way we want
52+
# to move towards the target. To compute the learning rate alpha,
53+
# scale by number of tilings.
54+
STEP_SIZE = 0.5
55+
56+
MAX_SIZE = 4096
57+
58+
59+
def get_ethinicity_hierarchy():
60+
ethnicity_hierarchy = SerialHierarchy(values={})
61+
62+
ethnicity_hierarchy["Mixed White/Asian"] = "White/Asian"
63+
ethnicity_hierarchy["White/Asian"] = "Mixed"
64+
65+
ethnicity_hierarchy["Chinese"] = "Asian"
66+
ethnicity_hierarchy["Indian"] = "Asian"
67+
ethnicity_hierarchy["Mixed White/Black African"] = "White/Black"
68+
ethnicity_hierarchy["White/Black"] = "Mixed"
69+
70+
ethnicity_hierarchy["Black African"] = "African"
71+
ethnicity_hierarchy["African"] = "Black"
72+
ethnicity_hierarchy["Asian other"] = "Asian"
73+
ethnicity_hierarchy["Black other"] = "Black"
74+
ethnicity_hierarchy["Mixed White/Black Caribbean"] = "White/Black"
75+
ethnicity_hierarchy["White/Black"] = "Mixed"
76+
77+
ethnicity_hierarchy["Mixed other"] = "Mixed"
78+
ethnicity_hierarchy["Arab"] = "Asian"
79+
ethnicity_hierarchy["White Irish"] = "Irish"
80+
ethnicity_hierarchy["Irish"] = "European"
81+
ethnicity_hierarchy["Not stated"] = "Not stated"
82+
ethnicity_hierarchy["White Gypsy/Traveller"] = "White"
83+
ethnicity_hierarchy["White British"] = "British"
84+
ethnicity_hierarchy["British"] = "European"
85+
ethnicity_hierarchy["Bangladeshi"] = "Asian"
86+
ethnicity_hierarchy["White other"] = "White"
87+
ethnicity_hierarchy["Black Caribbean"] = "Caribbean"
88+
ethnicity_hierarchy["Caribbean"] = "Black"
89+
ethnicity_hierarchy["Pakistani"] = "Asian"
90+
91+
ethnicity_hierarchy["European"] = "European"
92+
ethnicity_hierarchy["Mixed"] = "Mixed"
93+
ethnicity_hierarchy["Asian"] = "Asian"
94+
ethnicity_hierarchy["Black"] = "Black"
95+
ethnicity_hierarchy["White"] = "White"
96+
return ethnicity_hierarchy
97+
98+
99+
def load_dataset() -> MockSubjectsLoader:
100+
# specify the columns to drop
101+
drop_columns = MockSubjectsLoader.FEATURES_DROP_NAMES + ["preventative_treatment", "gender",
102+
"education", "mutation_status"]
103+
MockSubjectsLoader.FEATURES_DROP_NAMES = drop_columns
104+
105+
# do a salary normalization so that we work with
106+
# salaries in [0, 1] this is needed as we will
107+
# be using normalized distances
108+
MockSubjectsLoader.NORMALIZED_COLUMNS = ["salary"]
109+
110+
# specify the columns to use
111+
MockSubjectsLoader.COLUMNS_TYPES = {"ethnicity": str, "salary": float, "diagnosis": int}
112+
dataset = MockSubjectsLoader()
113+
114+
assert dataset.n_columns == 3, "Invalid number of columns {0} not equal to 3".format(dataset.n_columns)
115+
return dataset
116+
117+
118+
if __name__ == '__main__':
119+
120+
# set the seed for random engine
121+
random.seed(42)
122+
123+
# load data set
124+
ds = load_dataset()
125+
126+
# create bins for the salary generalization
127+
unique_salary = ds.get_column_unique_values(col_name="salary")
128+
unique_salary.sort()
129+
130+
# modify slightly the max value because
131+
# we get out of bounds for the maximum salary
132+
bins = np.linspace(unique_salary[0], unique_salary[-1] + 1, N_STATES)
133+
134+
# establish the action space. For every column
135+
# we assume three actions except for the ```diagnosis```
136+
# which we do not alter
137+
action_space = ActionSpace(n=5)
138+
action_space.add_many(ActionIdentity(column_name="ethnicity"),
139+
ActionStringGeneralize(column_name="ethnicity",
140+
generalization_table=get_ethinicity_hierarchy()),
141+
ActionIdentity(column_name="salary"),
142+
ActionNumericBinGeneralize(column_name="salary", generalization_table=bins),
143+
ActionIdentity(column_name="diagnosis"))
144+
145+
action_space.shuffle()
146+
147+
# create environment configuration
148+
env_config = DiscreteEnvConfig()
149+
env_config.action_space = action_space
150+
env_config.reward_manager = RewardManager(bounds=(MIN_DISTORTION, MAX_DISTORTION),
151+
out_of_max_bound_reward=OUT_OF_MAX_BOUND_REWARD,
152+
out_of_min_bound_reward=OUT_OF_MIN_BOUND_REWARD,
153+
in_bounds_reward=IN_BOUNDS_REWARD)
154+
env_config.data_set = ds
155+
env_config.gamma = GAMMA
156+
env_config.max_distortion = MAX_DISTORTION
157+
env_config.min_distortion = MIN_DISTORTION
158+
env_config.n_states = N_STATES
159+
env_config.n_rounds_below_min_distortion = N_ROUNDS_BELOW_MIN_DISTORTION
160+
env_config.distorted_set_path = Path(SAVE_DISTORTED_SETS_DIR)
161+
env_config.distortion_calculator = DistortionCalculator(
162+
numeric_column_distortion_metric_type=NumericDistanceType.L2_AVG,
163+
string_column_distortion_metric_type=StringDistanceType.COSINE_NORMALIZE,
164+
dataset_distortion_type=DistortionCalculationType.SUM)
165+
env_config.reward_factor = REWARD_FACTOR
166+
env_config.punish_factor = PUNISH_FACTOR
167+
168+
# create the environment
169+
env = DiscreteStateEnvironment(env_config=env_config)
170+
171+
# we will use a tiled environment in this example
172+
tiled_env = TiledEnv(env=env, max_size=MAX_SIZE,
173+
num_tilings=NUM_TILINGS,
174+
tiling_dim=TILING_DIM)
175+
tiled_env.reset()
176+
177+
# save the data before distortion so that we can
178+
# later load it on ARX
179+
tiled_env.save_current_dataset(episode_index=-1, save_index=False)
180+
181+
# configuration for the Q-learner
182+
algo_config = SARSAnConfig()
183+
algo_config.n_itrs_per_episode = N_ITRS_PER_EPISODE
184+
algo_config.gamma = GAMMA
185+
algo_config.alpha = ALPHA
186+
#algo_config.policy = SoftMaxPolicy(n_actions=len(action_space), tau=1.2)
187+
algo_config.policy = EpsilonGreedyPolicy(eps=EPS, n_actions=tiled_env.n_actions,
188+
decay_op=EPSILON_DECAY_OPTION,
189+
epsilon_decay_factor=EPSILON_DECAY_FACTOR)
190+
# level of bootstrapping
191+
algo_config.n = BOOTSTRAP_N
192+
algo_config.estimator = QEstimator(env=tiled_env,
193+
max_size=MAX_SIZE,
194+
alpha=STEP_SIZE / NUM_TILINGS)
195+
196+
# the learner we want to train
197+
agent = SARSAn(sarsa_config=algo_config)
198+
199+
configuration = {"n_episodes": N_EPISODES, "output_msg_frequency": OUTPUT_MSG_FREQUENCY}
200+
201+
# create a trainer to train the Qlearning agent
202+
trainer = Trainer(env=tiled_env, agent=agent, configuration=configuration)
203+
trainer.train()
204+
205+
# avg_rewards = trainer.avg_rewards()
206+
avg_rewards = trainer.total_rewards
207+
plot_running_avg(avg_rewards, steps=100,
208+
xlabel="Episodes", ylabel="Reward",
209+
title="Running reward average over 100 episodes")
210+
211+
avg_episode_dist = np.array(trainer.total_distortions)
212+
print("{0} Max/Min distortion {1}/{2}".format(INFO, np.max(avg_episode_dist), np.min(avg_episode_dist)))
213+
214+
plot_running_avg(avg_episode_dist, steps=100,
215+
xlabel="Episodes", ylabel="Distortion",
216+
title="Running distortion average over 100 episodes")
217+
218+
219+
'''
220+
print("=============================================")
221+
print("{0} Generating distorted dataset".format(INFO))
222+
# Let's play
223+
env.reset()
224+
225+
stop_criterion = IterationControl(n_itrs=10, min_dist=MIN_DISTORTION, max_dist=MAX_DISTORTION)
226+
agent.play(env=env, stop_criterion=stop_criterion)
227+
env.save_current_dataset(episode_index=-2, save_index=False)
228+
print("{0} Done....".format(INFO))
229+
print("=============================================")
230+
'''

0 commit comments

Comments
 (0)