pockerman
diff --git a/‎src/algorithms/q_learning.py‎
Lines changed: 7 additions & 5 deletions b/‎src/algorithms/q_learning.py‎
Lines changed: 7 additions & 5 deletions
diff --git a/‎src/algorithms/trainer.py‎
Lines changed: 1 addition & 1 deletion b/‎src/algorithms/trainer.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/apps/qlearning_on_mock.py‎
Lines changed: 77 additions & 29 deletions b/‎src/apps/qlearning_on_mock.py‎
Lines changed: 77 additions & 29 deletions
diff --git a/‎src/datasets/dataset_wrapper.py‎
Lines changed: 3 additions & 1 deletion b/‎src/datasets/dataset_wrapper.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎src/exceptions/exceptions.py‎
Lines changed: 7 additions & 0 deletions b/‎src/exceptions/exceptions.py‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎src/spaces/action_space.py‎
Lines changed: 8 additions & 0 deletions b/‎src/spaces/action_space.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎src/spaces/actions.py‎
Lines changed: 49 additions & 0 deletions b/‎src/spaces/actions.py‎
Lines changed: 49 additions & 0 deletions
@@ -11,8 +11,11 @@
 Env = TypeVar('Env')
 Policy = TypeVar('Policy')
 
-class QLearnConfig(object):
 
+class QLearnConfig(object):
+    """
+    Configuration  for Q-learning
+    """
     def __init__(self):
         self.gamma: float = 1.0
         self.alpha: float = 0.1
@@ -21,16 +24,15 @@ def __init__(self):
 
 
 class QLearning(WithMaxActionMixin):
+    """
+    Q-learning algorithm implementation
+    """
 
     def __init__(self, algo_config: QLearnConfig):
         super(QLearning, self).__init__()
         self.q_table = {}
         self.config = algo_config
 
-        # monitor performance
-        self.total_rewards: np.array = None
-        self.iterations_per_episode = []
-
     @property
     def name(self) -> str:
         return "QLearn"
 
@@ -17,7 +17,7 @@ def __init__(self, env: Env,  agent: Agent, configuration: dir) -> None:
         self.agent = agent
         self.configuration = configuration
         # monitor performance
-        self.total_rewards: np.array = None
+        self.total_rewards: np.array = np.zeros(configuration['n_episodes'])
         self.iterations_per_episode = []
 
     def actions_before_training(self):
 
@@ -1,6 +1,9 @@
+import matplotlib.pyplot as plt
+import numpy as np
+
 from src.algorithms.q_learning import QLearning, QLearnConfig
 from src.algorithms.trainer import Trainer
-from src.utils.string_distance_calculator import DistanceType
+from src.utils.string_distance_calculator import StringDistanceType
 from src.spaces.actions import ActionSuppress, ActionIdentity, ActionGeneralize, ActionTransform
 from src.spaces.environment import Environment, EnvConfig
 from src.spaces.action_space import ActionSpace
@@ -11,45 +14,74 @@
 from src.utils.numeric_distance_type import NumericDistanceType
 
 
+def plot_running_avg(avg_rewards):
+
+    running_avg = np.empty(avg_rewards.shape[0])
+    for t in range(avg_rewards.shape[0]):
+        running_avg[t] = np.mean(avg_rewards[max(0, t-100) : (t+1)])
+    plt.plot(running_avg)
+    plt.xlabel("Number of episodes")
+    plt.ylabel("Reward")
+    plt.title("Running average")
+    plt.show()
+
+def get_ethinicity_hierarchies():
+
+    ethnicity_hierarchy = SerialHierarchy()
+    ethnicity_hierarchy.add("Mixed White/Asian", values=["Mixed", '*'])
+    ethnicity_hierarchy.add("Chinese", values=["Asian", '*'])
+    ethnicity_hierarchy.add("Indian", values=["Asian", '*'])
+    ethnicity_hierarchy.add("Mixed White/Black African", values=["Mixed", '*'])
+    ethnicity_hierarchy.add("Black African", values=["Black", '*'])
+    ethnicity_hierarchy.add("Asian other", values=["Asian", "*"])
+    ethnicity_hierarchy.add("Black other", values=["Black", "*"])
+    ethnicity_hierarchy.add("Mixed White/Black Caribbean", values=["Mixed", "*"])
+    ethnicity_hierarchy.add("Mixed other", values=["Mixed", "*"])
+    ethnicity_hierarchy.add("Arab", values=["Asian", "*"])
+    ethnicity_hierarchy.add("White Irish", values=["White", "*"])
+    ethnicity_hierarchy.add("Not stated", values=["Not stated", "*"])
+    ethnicity_hierarchy.add("White Gypsy/Traveller", values=["White", "*"])
+    ethnicity_hierarchy.add("White British", values=["White", "*"])
+    ethnicity_hierarchy.add("Bangladeshi", values=["Asian", "*"])
+    ethnicity_hierarchy.add("White other", values=["White", "*"])
+    ethnicity_hierarchy.add("Black Caribbean", values=["Black", "*"])
+    ethnicity_hierarchy.add("Pakistani", values=["Asian", "*"])
+
+    return ethnicity_hierarchy
+
+
 if __name__ == '__main__':
 
     EPS = 1.0
     GAMMA = 0.99
     ALPHA = 0.1
+    N_EPISODES = 100
 
     # load the dataset
     ds = MockSubjectsLoader()
 
+    # generalization table for the ethnicity column
+    ethinicity_table = get_ethinicity_hierarchies()
+
     # specify the action space. We need to establish how these actions
     # are performed
-    action_space = ActionSpace(n=4)
-
-    generalization_table = {"Mixed White/Asian": SerialHierarchy(values=["Mixed", ]),
-                            "Chinese": SerialHierarchy(values=["Asian", ]),
-                            "Indian": SerialHierarchy(values=["Asian", ]),
-                            "Mixed White/Black African": SerialHierarchy(values=["Mixed", ]),
-                            "Black African": SerialHierarchy(values=["Black", ]),
-                            "Asian other": SerialHierarchy(values=["Asian", ]),
-                            "Black other": SerialHierarchy(values=["Black", ]),
-                            "Mixed White/Black Caribbean": SerialHierarchy(values=["Mixed", ]),
-                            "Mixed other": SerialHierarchy(values=["Mixed", ]),
-                            "Arab": SerialHierarchy(values=["Asian", ]),
-                            "White Irish": SerialHierarchy(values=["White", ]),
-                            "Not stated": SerialHierarchy(values=["Not stated"]),
-                            "White Gypsy/Traveller": SerialHierarchy(values=["White", ]),
-                            "White British": SerialHierarchy(values=["White", ]),
-                            "Bangladeshi": SerialHierarchy(values=["Asian", ]),
-                            "White other": SerialHierarchy(values=["White", ]),
-                            "Black Caribbean": SerialHierarchy(values=["Black", ]),
-                            "Pakistani": SerialHierarchy(values=["Asian", ])}
-
+    action_space = ActionSpace(n=5)
     action_space.add_many(ActionSuppress(column_name="gender", suppress_table={"F": SerialHierarchy(values=['*', ]),
                                                                                'M': SerialHierarchy(values=['*', ])}),
-                          ActionIdentity(column_name="salary"), ActionIdentity(column_name="education"),
-                          ActionGeneralize(column_name="ethnicity", generalization_table=generalization_table))
-
+                          ActionIdentity(column_name="salary"),
+                          ActionIdentity(column_name="education"),
+                          ActionGeneralize(column_name="ethnicity", generalization_table=ethinicity_table),
+                          ActionSuppress(column_name="preventative_treatment",
+                                         suppress_table={"No":  SerialHierarchy(values=['Maybe', '*']),
+                                                         'Yes': SerialHierarchy(values=['Maybe', '*']),
+                                                         "NA":  SerialHierarchy(values=['Maybe', '*']),
+                                                         "Maybe": SerialHierarchy(values=['*', '*'])
+                                                         }))
+
+    # average distirtion
     average_distortion_constraint = {"salary": [0.0, 0.0, 0.0], "education": [0.0, 0.0, 0.0],
-                                     "ethnicity": [3.0, 1.0, -1.0], "gender": [4.0, 1.0, -1.0]}
+                                     "ethnicity": [3.0, 1.0, -1.0], "gender": [4.0, 1.0, -1.0],
+                                     "preventative_treatment": [4.0, 1.0, -1.0]}
 
     # specify the reward manager to use
     reward_manager = RewardManager(average_distortion_constraint=average_distortion_constraint)
@@ -66,20 +98,36 @@
     env = Environment(env_config=env_config)
 
     # initialize text distances
-    env.initialize_text_distances(distance_type=DistanceType.COSINE)
+    env.initialize_text_distances(distance_type=StringDistanceType.COSINE)
 
     algo_config = QLearnConfig()
-    algo_config.n_itrs_per_episode = 1000
+    algo_config.n_itrs_per_episode = 10
     algo_config.gamma = 0.99
     algo_config.alpha = 0.1
     algo_config.policy = EpsilonGreedyPolicy(eps=EPS, env=env,
                                              decay_op=EpsilonDecreaseOption.INVERSE_STEP)
 
     agent = QLearning(algo_config=algo_config)
 
-    configuration = {"n_episodes": 10, "output_msg_frequency": 100}
+    configuration = {"n_episodes": N_EPISODES, "output_msg_frequency": 10}
 
     # create a trainer to train the A2C agent
     trainer = Trainer(env=env, agent=agent, configuration=configuration)
 
     trainer.train()
+
+    # get the state space
+    state_space = env.state_space
+
+    for state in state_space:
+        print("Column {0} history {1}".format(state, state_space[state].history))
+
+    total_reward = trainer.total_rewards
+    episodes = [episode for episode in range(N_EPISODES)]
+
+    plt.plot(episodes, total_reward)
+    plt.xlabel("Episodes")
+    plt.ylabel("Reward")
+    plt.show()
+
+
@@ -74,7 +74,9 @@ def read(self, filename: Path,  **options) -> None:
                            features_drop_names=options["features_drop_names"],
                            names=options["names"])
 
-        if "change_col_vals" in options:
+        if "change_col_vals" in options and \
+                options["change_col_vals"] is not None and \
+                len(options["change_col_vals"]) != 0:
             self.ds = replace(ds=self.ds, options=options["change_col_vals"])
 
         # try to cast to the data types
 
@@ -31,6 +31,13 @@ def __str__(self):
         return self.message
 
 
+class InvalidStateException(Exception):
+    def __init__(self, type_name: str, state_type: str) -> None:
+        self.message = "Type= {0} is not in state= {1}".format(type_name, state_type)
+
+    def __str__(self):
+        return self.message
+
 
 
 
 
@@ -4,6 +4,7 @@
 """
 
 import numpy as np
+import random
 from gym.spaces.discrete import Discrete
 from src.spaces.actions import ActionBase
 
@@ -37,6 +38,13 @@ def __setitem__(self, key: int, value: ActionBase) -> None:
         """
         self.actions[key] = value
 
+    def shuffle(self) -> None:
+        """
+        Randomly shuffle the actions in the space
+        :return:
+        """
+        random.shuffle(self.actions)
+
     def get_action_by_column_name(self, column_name: str) -> ActionBase:
         """
         Get the action that corresponds to the column with
 
@@ -22,10 +22,15 @@ class ActionType(enum.IntEnum):
     Defines the status of an Action
     """
 
+    INVALID_TYPE = -1
     TRANSFORM = 0
     SUPPRESS = 1
     GENERALIZE = 2
     IDENTITY = 3
+    RESTORE = 4
+
+    def invalid(self) -> bool:
+        return self is ActionType.RESTORE
 
     def transform(self) -> bool:
         return self is ActionType.TRANSFORM
@@ -39,6 +44,9 @@ def generalize(self) -> bool:
     def identity(self) -> bool:
         return self is ActionType.IDENTITY
 
+    def restore(self) -> bool:
+        return self is ActionType.RESTORE
+
 
 class ActionBase(metaclass=abc.ABCMeta):
     """
@@ -122,6 +130,46 @@ def reinitialize(self) -> None:
         self.called = False
 
 
+class ActionRestore(ActionBase, WithHierarchyTable):
+    """
+    Implements the restore action
+    """
+
+    def __init__(self, column_name: str, restore_table):
+        super(ActionRestore, self).__init__(column_name=column_name, action_type=ActionType.RESTORE)
+        self.table = restore_table
+
+    def act(self, **ops):
+        """
+        Perform an action
+        :return:
+        """
+        pass
+
+    def get_maximum_number_of_transforms(self):
+        """
+        Returns the maximum number of transforms that the action applies
+        :return:
+        """
+        raise NotImplementedError("Method not implemented")
+
+    def is_exhausted(self) -> bool:
+        """
+        Returns true if the action has exhausted all its
+        transforms
+        :return:
+        """
+        raise NotImplementedError("Method not implemented")
+
+    def reinitialize(self) -> None:
+        """
+        Reinitialize the action to the state when the
+        constructor is called
+        :return:
+        """
+        raise NotImplementedError("Method not implemented")
+
+
 class ActionTransform(ActionBase):
     """
     Implements the transform action
@@ -183,6 +231,7 @@ def act(self, **ops) -> None:
 
         # generalize the data given
         for i, item in enumerate(ops["data"]):
+
             value = self.table[item].value
             col_vals[i] = value