pockerman
diff --git a/‎src/algorithms/anonymity_a2c_ray.py‎
Lines changed: 1 addition & 1 deletion b/‎src/algorithms/anonymity_a2c_ray.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/algorithms/q_learning.py‎
Lines changed: 14 additions & 9 deletions b/‎src/algorithms/q_learning.py‎
Lines changed: 14 additions & 9 deletions
diff --git a/‎src/algorithms/trainer.py‎
Lines changed: 27 additions & 6 deletions b/‎src/algorithms/trainer.py‎
Lines changed: 27 additions & 6 deletions
diff --git a/‎src/apps/qlearning_on_mock.py‎
Lines changed: 1 addition & 1 deletion b/‎src/apps/qlearning_on_mock.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/datasets/dataset_wrapper.py‎
Lines changed: 43 additions & 10 deletions b/‎src/datasets/dataset_wrapper.py‎
Lines changed: 43 additions & 10 deletions
diff --git a/‎src/exceptions/exceptions.py‎
Lines changed: 16 additions & 0 deletions b/‎src/exceptions/exceptions.py‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎src/spaces/action_space.py‎
Lines changed: 3 additions & 40 deletions b/‎src/spaces/action_space.py‎
Lines changed: 3 additions & 40 deletions
@@ -6,7 +6,7 @@
 import ray.rllib.agents.a3c as a3c
 from ray.tune.logger import pretty_print
 from ray.rllib.env.env_context import EnvContext
-from src.spaces.environment import TimeStep, StepType
+from src.spaces.discrete_state_environment import TimeStep, StepType
 from src.spaces.observation_space import ObsSpace
 
 
 
@@ -42,8 +42,8 @@ def actions_before_training(self, env: Env, **options):
         if self.config.policy is None:
             raise InvalidParamValue(param_name="policy", param_value="None")
 
-        for state in range(env.observation_space.n):
-            for action in range(env.action_space.n):
+        for state in range(1, env.n_states):
+            for action in range(env.n_actions):
                 self.q_table[state, action] = 0.0
 
     def actions_after_episode_ends(self, **options):
@@ -59,8 +59,9 @@ def actions_after_episode_ends(self, **options):
     def train(self, env: Env, **options) -> tuple:
 
         # episode score
-        episode_score = 0  # initialize score
+        episode_score = 0
         counter = 0
+        total_distortion = 0
 
         time_step = env.reset()
         state = time_step.observation
@@ -72,24 +73,28 @@ def train(self, env: Env, **options) -> tuple:
 
             action = env.get_action(action_idx)
 
+            if action.action_type.name == "GENERALIZE" and action.column_name == "salary":
+                print("Attempt to generalize salary")
+            else:
+                print(action.action_type.name, " on ", action.column_name)
+
             # take action A, observe R, S'
             next_time_step = env.step(action)
             next_state = next_time_step.observation
             reward = next_time_step.reward
 
-            next_state_id = next_state.state_id if next_state is not None else None
-
             # add reward to agent's score
-            episode_score += next_time_step.reward
-            self._update_Q_table(state=state.state_id, action=action_idx, reward=reward,
-                                 next_state=next_state_id, n_actions=env.action_space.n)
+            episode_score += reward
+            self._update_Q_table(state=state, action=action_idx, reward=reward,
+                                 next_state=next_state, n_actions=env.n_actions)
             state = next_state  # S <- S'
             counter += 1
+            total_distortion += next_time_step.info["total_distortion"]
 
             if next_time_step.last():
                 break
 
-        return episode_score, counter
+        return episode_score, total_distortion, counter
 
     def _update_Q_table(self, state: int, action: int, n_actions: int, reward: float, next_state: int = None) -> None:
         """
 
@@ -19,8 +19,24 @@ def __init__(self, env: Env,  agent: Agent, configuration: dir) -> None:
         # monitor performance
         self.total_rewards: np.array = np.zeros(configuration['n_episodes'])
         self.iterations_per_episode = []
+        self.total_distortions = []
+
+    def avg_rewards(self) -> np.array:
+        """
+        Returns the average reward per episode
+        :return:
+        """
+        avg = np.zeros(self.configuration['n_episodes'])
+
+        for i in range(self.total_rewards.shape[0]):
+            avg[i] = self.total_rewards[i] / self.iterations_per_episode[i]
+        return avg
 
     def actions_before_training(self):
+        """
+        Any actions to perform before training begins
+        :return:
+        """
         self.total_rewards: np.array = np.zeros(self.configuration['n_episodes'])
         self.iterations_per_episode = []
 
@@ -29,27 +45,32 @@ def actions_before_training(self):
     def actions_after_episode_ends(self, **options):
         self.agent.actions_after_episode_ends(**options)
 
+        if options["episode_idx"] % self.configuration['output_msg_frequency'] == 0:
+            if self.env.config.distorted_set_path is not None:
+                self.env.save_current_dataset(options["episode_idx"])
+
     def train(self):
 
         print("{0} Training agent {1}".format(INFO, self.agent.name))
         self.actions_before_training()
 
         for episode in range(0, self.configuration["n_episodes"]):
-            print("INFO: Episode {0}/{1}".format(episode, self.configuration["n_episodes"]))
+            print("{0} On episode {1}/{2}".format(INFO, episode, self.configuration["n_episodes"]))
 
             # reset the environment
             ignore = self.env.reset()
 
             # train for a number of iterations
-            episode_score, n_itrs = self.agent.train(self.env)
+            episode_score, total_distortion, n_itrs = self.agent.train(self.env)
 
-            if episode % self.configuration['output_msg_frequency'] == 0:
-                print("{0}: On episode {1} training finished with  "
-                      "{2} iterations. Total reward={3}".format(INFO, episode, n_itrs, episode_score))
+            print("{0} Episode score={1}, episode total distortion {2}".format(INFO, episode_score, total_distortion / n_itrs))
+
+            #if episode % self.configuration['output_msg_frequency'] == 0:
+            print("{0} Episode finished after {1} iterations".format(INFO, n_itrs))
 
             self.iterations_per_episode.append(n_itrs)
             self.total_rewards[episode] = episode_score
-
+            self.total_distortions.append(total_distortion)
             self.actions_after_episode_ends(**{"episode_idx": episode})
 
         print("{0} Training finished for agent {1}".format(INFO, self.agent.name))
@@ -5,7 +5,7 @@
 from src.algorithms.trainer import Trainer
 from src.utils.string_distance_calculator import StringDistanceType
 from src.spaces.actions import ActionSuppress, ActionIdentity, ActionStringGeneralize, ActionTransform
-from src.spaces.environment import Environment, EnvConfig
+from src.spaces.discrete_state_environment import Environment, EnvConfig
 from src.spaces.action_space import ActionSpace
 from src.datasets.datasets_loaders import MockSubjectsLoader
 from src.utils.reward_manager import RewardManager
 
@@ -5,6 +5,7 @@
 import numpy as np
 
 from src.preprocessor.cleanup_utils import read_csv, replace, change_column_types
+from src.exceptions.exceptions import InvalidDataTypeException
 
 DS = TypeVar("DS")
 HierarchyBase = TypeVar('HierarchyBase')
@@ -41,7 +42,7 @@ def __init__(self, columns: dir) -> None:
 
         # map that holds the hierarchy to be applied
         # on each column in the dataset
-        self.column_hierarchy = {}
+        #self.column_hierarchy = {}
 
     @property
     def n_rows(self) -> int:
@@ -63,6 +64,14 @@ def n_columns(self) -> int:
     def schema(self) -> dict:
         return pd.io.json.build_table_schema(self.ds)
 
+    def save_to_csv(self, filename: Path) -> None:
+        """
+        Save the underlying dataset in a csv format
+        :param filename:
+        :return:
+        """
+        self.ds.to_csv(filename)
+
     def read(self, filename: Path,  **options) -> None:
         """
         Load a data set from a file
@@ -82,6 +91,25 @@ def read(self, filename: Path,  **options) -> None:
         # try to cast to the data types
         self.ds = change_column_types(ds=self.ds, column_types=self.columns)
 
+    def normalize_column(self, column_name) -> None:
+        """
+        Normalizes the column with the given name using the following
+        transformation:
+
+        z_i = \frac{x_i - min(x)}{max(x) - min(x)}
+
+        if the column is not of numeric type then this function
+        throws an InvalidDataTypeException
+        :param column_name:
+        :return:
+        """
+
+        data_type = self.columns[column_name]
+        if data_type is not int or data_type is not float:
+            raise InvalidDataTypeException(param_name=column_name, param_types="[int, float]")
+
+        raise NotImplementedError("Function is not implemented")
+
     def sample_column_name(self) -> str:
         """
         Samples a name from the columns
@@ -98,18 +126,23 @@ def set_columns_to_type(self, col_name_types) -> None:
         """
         self.ds.astype(dtype=col_name_types)
 
-    def attach_column_hierarchy(self, col_name: str, hierarchy: HierarchyBase):
-        self.column_hierarchy[col_name] = hierarchy
-
     def get_column(self, col_name: str):
+        """
+        Returns the column with the given name
+        :param col_name:
+        :return:
+        """
         return self.ds.loc[:, col_name]
 
     def get_column_unique_values(self, col_name: str):
-        # what are the unique values?
-
-        col = self.get_column(col_name=col_name)
-        vals = col.values.ravel()
-        return pd.unique(vals)
+       """
+       Returns the unique values for the column
+       :param col_name:
+       :return:
+       """
+       col = self.get_column(col_name=col_name)
+       vals = col.values.ravel()
+       return pd.unique(vals)
 
     def get_columns_types(self):
         return list(self.ds.dtypes)
@@ -136,7 +169,7 @@ def apply_column_transform(self, column_name: str, transform: Transform) -> None
 
         # get the column
         column = self.get_column(col_name=column_name)
-        column = transform.act(**{"data": column})
+        column = transform.act(**{"data": column.values})
         self.ds[transform.column_name] = column
 
 
 
@@ -23,6 +23,14 @@ def __str__(self):
         return self.message
 
 
+class InvalidDataTypeException(Exception):
+    def __init__(self, param_name: str, param_types: str):
+        self.message = "Parameter {0} has invalid type. Type not in {1}".format(param_name, param_types)
+
+    def __str__(self):
+        return self.message
+
+
 class InvalidSchemaException(Exception):
     def __init__(self, message: str) -> None:
         self.message = message
@@ -39,6 +47,14 @@ def __str__(self):
         return self.message
 
 
+class IncompatibleVectorSizesException(Exception):
+    def __iter__(self, size1: int, size2: int) -> None:
+        self.message = "Size {0} does not match size {1} ".format(size1, size2)
+
+    def __str__(self):
+        return self.message
+
+
 
 
 
@@ -38,6 +38,9 @@ def __setitem__(self, key: int, value: ActionBase) -> None:
         """
         self.actions[key] = value
 
+    def __len__(self) -> int:
+        return len(self.actions)
+
     def shuffle(self) -> None:
         """
         Randomly shuffle the actions in the space
@@ -91,44 +94,4 @@ def sample_and_get(self) -> ActionBase:
         action_idx = self.sample()
         return self.actions[action_idx]
 
-    def get_non_exhausted_actions(self) -> list:
-        """
-        Returns a list of actions that have not exhausted the
-        transformations that apply on a column.
-        :return: list of actions. List may be empty. Client code should handle this
-        """
-        actions_ = []
-        for action in self.actions:
-            if not action.is_exhausted():
-                actions_.append(action)
-
-        return actions_
-
-    def sample_and_get_non_exhausted(self) -> ActionBase:
-        """
-        Sample an action from the non exhausted actions
-        :return: A non-exhausted action
-        """
-        actions = self.get_non_exhausted_actions()
-        return np.random.choice(actions)
-
-    def is_exhausted(self) -> bool:
-        """
-        Returns true if all the actions in the space are exhausted
-        :return:
-        """
-        finished = True
-        for action in self.actions:
-            if not action.is_exhausted():
-                return False
-
-        return finished
-
-    def reset(self) -> None:
-        """
-        Reset every action in the action space
-        :return:
-        """
-        for action in self.actions:
-            action.reinitialize()