pockerman
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎README.md‎
Lines changed: 5 additions & 1 deletion b/‎README.md‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎doc/env_concept.md‎
Lines changed: 1 addition & 0 deletions b/‎doc/env_concept.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/algorithms/q_learning.py‎
Lines changed: 108 additions & 0 deletions b/‎src/algorithms/q_learning.py‎
Lines changed: 108 additions & 0 deletions
diff --git a/‎src/algorithms/trainer.py‎
Lines changed: 26 additions & 7 deletions b/‎src/algorithms/trainer.py‎
Lines changed: 26 additions & 7 deletions
diff --git a/‎src/apps/__init__.py‎ b/‎src/apps/__init__.py‎
diff --git a/‎src/apps/qlearning_on_mock.py‎
Lines changed: 85 additions & 0 deletions b/‎src/apps/qlearning_on_mock.py‎
Lines changed: 85 additions & 0 deletions
diff --git a/‎src/datasets/dataset_information_leakage.py‎
Lines changed: 65 additions & 1 deletion b/‎src/datasets/dataset_information_leakage.py‎
Lines changed: 65 additions & 1 deletion
diff --git a/‎src/datasets/dataset_wrapper.py‎
Lines changed: 13 additions & 0 deletions b/‎src/datasets/dataset_wrapper.py‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎src/exceptions/exceptions.py‎
Lines changed: 20 additions & 0 deletions b/‎src/exceptions/exceptions.py‎
Lines changed: 20 additions & 0 deletions
@@ -5,3 +5,4 @@ src/tests/.pytest_cache/
 src/spaces/__pycache__/
 src/__pycache__/
 src/algorithms/__pycache__/
+src/policies/__pycache__/
@@ -16,5 +16,9 @@ to use the reinforcement learning paradigm in order to train agents to perform t
 places this into a persepctive 
 
 
-![RL anonymity paradigm](images/general_concept.png "Reinforcement learning anonymity schematics") 
+![RL anonymity paradigm](images/general_concept.png "Reinforcement learning anonymity schematics")
+
+## Dependencies 
+
+## Documentation
 
@@ -0,0 +1 @@
+# Environment concept
@@ -0,0 +1,108 @@
+"""
+Simple Q-learning algorithm
+"""
+
+import numpy as np
+from typing import TypeVar
+
+from src.exceptions.exceptions import InvalidParamValue
+from src.utils.mixins import WithMaxActionMixin
+
+Env = TypeVar('Env')
+Policy = TypeVar('Policy')
+
+class QLearnConfig(object):
+
+    def __init__(self):
+        self.gamma: float = 1.0
+        self.alpha: float = 0.1
+        self.n_itrs_per_episode: int = 100
+        self.policy: Policy = None
+
+
+class QLearning(WithMaxActionMixin):
+
+    def __init__(self, algo_config: QLearnConfig):
+        super(QLearning, self).__init__()
+        self.q_table = {}
+        self.config = algo_config
+
+        # monitor performance
+        self.total_rewards: np.array = None
+        self.iterations_per_episode = []
+
+    @property
+    def name(self) -> str:
+        return "QLearn"
+
+    def actions_before_training(self, env: Env, **options):
+
+        if self.config.policy is None:
+            raise InvalidParamValue(param_name="policy", param_value="None")
+
+        for state in range(env.observation_space.n):
+            for action in range(env.action_space.n):
+                self.q_table[state, action] = 0.0
+
+    def actions_after_episode_ends(self, **options):
+        """
+        Execute any actions the algorithm needs before
+        starting the episode
+        :param options:
+        :return:
+        """
+
+        self.config.policy.actions_after_episode(options['episode_idx'])
+
+    def train(self, env: Env, **options) -> tuple:
+
+        # episode score
+        episode_score = 0  # initialize score
+        counter = 0
+
+        time_step = env.reset()
+        state = time_step.observation
+
+        for itr in range(self.config.n_itrs_per_episode):
+
+            # epsilon-greedy action selection
+            action_idx = self.config.policy(q_func=self.q_table, state=state)
+
+            action = env.get_action(action_idx)
+
+            # take action A, observe R, S'
+            next_time_step = env.step(action)
+            next_state = next_time_step.observation
+            reward = next_time_step.reward
+
+            next_state_id = next_state.state_id if next_state is not None else None
+
+            # add reward to agent's score
+            episode_score += next_time_step.reward
+            self._update_Q_table(state=state.state_id, action=action_idx, reward=reward,
+                                 next_state=next_state_id, n_actions=env.action_space.n)
+            state = next_state  # S <- S'
+            counter += 1
+
+            if next_time_step.last():
+                break
+
+        return episode_score, counter
+
+    def _update_Q_table(self, state: int, action: int, n_actions: int, reward: float, next_state: int = None) -> None:
+        """
+        Update the Q-value for the state
+        """
+
+        # estimate in Q-table (for current state, action pair)
+        q_s = self.q_table[state, action]
+
+        # value of next state
+        Qsa_next = \
+            self.q_table[next_state, self.max_action(next_state, n_actions=n_actions)] if next_state is not None else 0
+        # construct TD target
+        target = reward + (self.config.gamma * Qsa_next)
+
+        # get updated value
+        new_value = q_s + (self.config.alpha * (target - q_s))
+        self.q_table[state, action] = new_value
@@ -2,8 +2,9 @@
 Trainer
 """
 
-from src.utils import INFO
+import numpy as np
 from typing import TypeVar
+from src.utils import INFO
 
 Env = TypeVar("Env")
 Agent = TypeVar("Agent")
@@ -15,22 +16,40 @@ def __init__(self, env: Env,  agent: Agent, configuration: dir) -> None:
         self.env = env
         self.agent = agent
         self.configuration = configuration
+        # monitor performance
+        self.total_rewards: np.array = None
+        self.iterations_per_episode = []
+
+    def actions_before_training(self):
+        self.total_rewards: np.array = np.zeros(self.configuration['n_episodes'])
+        self.iterations_per_episode = []
+
+        self.agent.actions_before_training(self.env)
+
+    def actions_after_episode_ends(self, **options):
+        self.agent.actions_after_episode_ends(**options)
 
     def train(self):
 
         print("{0} Training agent {1}".format(INFO, self.agent.name))
+        self.actions_before_training()
 
-        for episode in range(1, self.configuration["max_n_episodes"] + 1):
-            print("INFO: Episode {0}/{1}".format(episode, self.configuration["max_n_episodes"]))
+        for episode in range(0, self.configuration["n_episodes"]):
+            print("INFO: Episode {0}/{1}".format(episode, self.configuration["n_episodes"]))
 
             # reset the environment
             ignore = self.env.reset()
 
             # train for a number of iterations
-            self.agent.train(self.env)
+            episode_score, n_itrs = self.agent.train(self.env)
+
+            if episode % self.configuration['output_msg_frequency'] == 0:
+                print("{0}: On episode {1} training finished with  "
+                      "{2} iterations. Total reward={3}".format(INFO, episode, n_itrs, episode_score))
+
+            self.iterations_per_episode.append(n_itrs)
+            self.total_rewards[episode] = episode_score
 
-            # is it time to update the model?
-            if self.configuration["update_frequency"] % episode == 0:
-                self.agent.update()
+            self.actions_after_episode_ends(**{"episode_idx": episode})
 
         print("{0} Training finished for agent {1}".format(INFO, self.agent.name))
@@ -0,0 +1,85 @@
+from src.algorithms.q_learning import QLearning, QLearnConfig
+from src.algorithms.trainer import Trainer
+from src.utils.string_distance_calculator import DistanceType
+from src.spaces.actions import ActionSuppress, ActionIdentity, ActionGeneralize, ActionTransform
+from src.spaces.environment import Environment, EnvConfig
+from src.spaces.action_space import ActionSpace
+from src.datasets.datasets_loaders import MockSubjectsLoader
+from src.utils.reward_manager import RewardManager
+from src.policies.epsilon_greedy_policy import EpsilonGreedyPolicy, EpsilonDecreaseOption
+from src.utils.serial_hierarchy import SerialHierarchy
+from src.utils.numeric_distance_type import NumericDistanceType
+
+
+if __name__ == '__main__':
+
+    EPS = 1.0
+    GAMMA = 0.99
+    ALPHA = 0.1
+
+    # load the dataset
+    ds = MockSubjectsLoader()
+
+    # specify the action space. We need to establish how these actions
+    # are performed
+    action_space = ActionSpace(n=4)
+
+    generalization_table = {"Mixed White/Asian": SerialHierarchy(values=["Mixed", ]),
+                            "Chinese": SerialHierarchy(values=["Asian", ]),
+                            "Indian": SerialHierarchy(values=["Asian", ]),
+                            "Mixed White/Black African": SerialHierarchy(values=["Mixed", ]),
+                            "Black African": SerialHierarchy(values=["Black", ]),
+                            "Asian other": SerialHierarchy(values=["Asian", ]),
+                            "Black other": SerialHierarchy(values=["Black", ]),
+                            "Mixed White/Black Caribbean": SerialHierarchy(values=["Mixed", ]),
+                            "Mixed other": SerialHierarchy(values=["Mixed", ]),
+                            "Arab": SerialHierarchy(values=["Asian", ]),
+                            "White Irish": SerialHierarchy(values=["White", ]),
+                            "Not stated": SerialHierarchy(values=["Not stated"]),
+                            "White Gypsy/Traveller": SerialHierarchy(values=["White", ]),
+                            "White British": SerialHierarchy(values=["White", ]),
+                            "Bangladeshi": SerialHierarchy(values=["Asian", ]),
+                            "White other": SerialHierarchy(values=["White", ]),
+                            "Black Caribbean": SerialHierarchy(values=["Black", ]),
+                            "Pakistani": SerialHierarchy(values=["Asian", ])}
+
+    action_space.add_many(ActionSuppress(column_name="gender", suppress_table={"F": SerialHierarchy(values=['*', ]),
+                                                                               'M': SerialHierarchy(values=['*', ])}),
+                          ActionIdentity(column_name="salary"), ActionIdentity(column_name="education"),
+                          ActionGeneralize(column_name="ethnicity", generalization_table=generalization_table))
+
+    average_distortion_constraint = {"salary": [0.0, 0.0, 0.0], "education": [0.0, 0.0, 0.0],
+                                     "ethnicity": [3.0, 1.0, -1.0], "gender": [4.0, 1.0, -1.0]}
+
+    # specify the reward manager to use
+    reward_manager = RewardManager(average_distortion_constraint=average_distortion_constraint)
+
+    env_config = EnvConfig()
+    env_config.start_column = "gender"
+    env_config.action_space = action_space
+    env_config.reward_manager = reward_manager
+    env_config.data_set = ds
+    env_config.gamma = 0.99
+    env_config.numeric_column_distortion_metric_type = NumericDistanceType.L2
+
+    # create the environment
+    env = Environment(env_config=env_config)
+
+    # initialize text distances
+    env.initialize_text_distances(distance_type=DistanceType.COSINE)
+
+    algo_config = QLearnConfig()
+    algo_config.n_itrs_per_episode = 1000
+    algo_config.gamma = 0.99
+    algo_config.alpha = 0.1
+    algo_config.policy = EpsilonGreedyPolicy(eps=EPS, env=env,
+                                             decay_op=EpsilonDecreaseOption.INVERSE_STEP)
+
+    agent = QLearning(algo_config=algo_config)
+
+    configuration = {"n_episodes": 10, "output_msg_frequency": 100}
+
+    # create a trainer to train the A2C agent
+    trainer = Trainer(env=env, agent=agent, configuration=configuration)
+
+    trainer.train()
@@ -1,4 +1,68 @@
 """
 Utilities for calculating the information leakage
 for a dataset
-"""
+"""
+import numpy as np
+from typing import TypeVar
+from src.exceptions.exceptions import InvalidSchemaException, Error
+from src.datasets.dataset_distances import lp_distance
+from src.utils import numeric_distance_type
+
+DataSet = TypeVar("DataSet")
+State = TypeVar("State")
+
+
+def state_leakage(state1: State, state2: State, dist_type: numeric_distance_type.NumericDistanceType) -> float:
+
+    if dist_type == numeric_distance_type.NumericDistanceType.L2:
+        return _l2_state_leakage(state1=state1, state2=state2)
+    elif dist_type == numeric_distance_type.NumericDistanceType.L1:
+        return _l1_state_leakage(state1=state1, state2=state2)
+
+    raise Error("Invalid distance type {0}".format(dist_type.name))
+
+
+def info_leakage(ds1: DataSet, ds2: DataSet, column_distances: dict = None, p=None) -> tuple:
+    """
+    Returns the information leakage between the two data sets
+    :param ds1:
+    :param ds2:
+    :param column_dists: A dictionary that holds numeric distances to use if a column
+    is of type string
+    :return:
+    """
+
+    if ds1.schema != ds2.schema:
+        raise InvalidSchemaException(message="Invalid schema for datasets")
+
+    if column_distances is None:
+        return lp_distance(ds1=ds1, ds2=ds2, p=p)
+
+    distances = {}
+    cols = ds1.get_columns_names()
+    for col in cols:
+
+        if col in column_distances:
+            # get the total distortion of the column
+            distances[col] = column_distances[col]
+        else:
+
+            val1 = ds1.get_column(col_name=col)
+            val2 = ds2.get_column(col_name=col)
+            distances[col] = np.linalg.norm(val1 - val2, ord=p)
+
+    sum_distances = sum(distances.values())
+    return distances, sum_distances
+
+
+def _l2_state_leakage(state1: State, state2: State) -> float:
+    return np.linalg.norm(state1 - state2, ord=None)
+
+def _l1_state_leakage(state1: State, state2: State) -> float:
+    return np.linalg.norm(state1 - state2, ord=1)
+
+
+
+
+
+
@@ -80,7 +80,20 @@ def read(self, filename: Path,  **options) -> None:
         # try to cast to the data types
         self.ds = change_column_types(ds=self.ds, column_types=self.columns)
 
+    def sample_column_name(self) -> str:
+        """
+        Samples a name from the columns
+        :return: a column name
+        """
+        names = self.get_columns_names()
+        return np.random.choice(names)
+
     def set_columns_to_type(self, col_name_types) -> None:
+        """
+        Set the types of the columns
+        :param col_name_types:
+        :return:
+        """
         self.ds.astype(dtype=col_name_types)
 
     def attach_column_hierarchy(self, col_name: str, hierarchy: HierarchyBase):
 
@@ -15,3 +15,23 @@ def __str__(self):
         return self.message
 
 
+class InvalidParamValue(Exception):
+    def __init__(self, param_name: str, param_value: str):
+        self.message = "Parameter {0} has invalid value {1}".format(param_name, param_value)
+
+    def __str__(self):
+        return self.message
+
+
+class InvalidSchemaException(Exception):
+    def __init__(self, message: str) -> None:
+        self.message = message
+
+    def __str__(self):
+        return self.message
+
+
+
+
+
+