Merge pull request #25 from pockerman/investigate_q_learning

pockerman · web-flow · commit 24d6bfa8d541 · 2022-01-26T14:15:31.000Z
Investigate q learning
diff --git a/src/algorithms/q_learning.py b/src/algorithms/q_learning.py
@@ -56,6 +56,21 @@ def actions_after_episode_ends(self, **options):
 
         self.config.policy.actions_after_episode(options['episode_idx'])
 
+    def play(self, env: Env) -> None:
+        """
+        Play the game on the environment. This should produce
+        a distorted dataset
+        :param env:
+        :return:
+        """
+
+        # loop over the columns and for the
+        # column get the action that corresponds to
+        # the max payout.
+        # TODO: This will no work as the distortion is calculated
+        # by summing over the columns.
+        raise NotImplementedError("Function not implemented")
+
     def train(self, env: Env, **options) -> tuple:
 
         # episode score
@@ -73,10 +88,10 @@ def train(self, env: Env, **options) -> tuple:
 
             action = env.get_action(action_idx)
 
-            if action.action_type.name == "GENERALIZE" and action.column_name == "salary":
-                print("Attempt to generalize salary")
-            else:
-                print(action.action_type.name, " on ", action.column_name)
+            #if action.action_type.name == "GENERALIZE" and action.column_name == "salary":
+             #   print("Attempt to generalize salary")
+            #else:
+             #   print(action.action_type.name, " on ", action.column_name)
 
             # take action A, observe R, S'
             next_time_step = env.step(action)
diff --git a/src/datasets/dataset_wrapper.py b/src/datasets/dataset_wrapper.py
@@ -106,16 +106,17 @@ def normalize_column(self, column_name) -> None:
         """
 
         data_type = self.columns[column_name]
-        if data_type is not int or data_type is not float:
-            raise InvalidDataTypeException(param_name=column_name, param_types="[int, float]")
+
+        if data_type is not type(1) and data_type is not type(1.0):
+            raise InvalidDataTypeException(param_name=column_name, param_type=data_type, param_types="[int, float]")
 
         col_vals = self.get_column(col_name=column_name).values
 
         min_val = np.min(col_vals)
         max_val = np.max(col_vals)
 
         for i in range(len(col_vals)):
-            col_vals[i] = (col_vals[i] - min_val) / (max_val - min_val)
+            col_vals[i] = float((col_vals[i] - min_val)) / float((max_val - min_val))
 
         self.ds[column_name] = col_vals
 
diff --git a/src/exceptions/exceptions.py b/src/exceptions/exceptions.py
@@ -1,3 +1,5 @@
+from typing import Any
+
 
 class Error(Exception):
     """
@@ -24,8 +26,8 @@ def __str__(self):
 
 
 class InvalidDataTypeException(Exception):
-    def __init__(self, param_name: str, param_types: str):
-        self.message = "Parameter {0} has invalid type. Type not in {1}".format(param_name, param_types)
+    def __init__(self, param_name: str, param_type: Any, param_types: str):
+        self.message = "Parameter {0} has invalid type. Type {1} not in {2}".format(param_name, str(Any), param_types)
 
     def __str__(self):
         return self.message
@@ -48,7 +50,7 @@ def __str__(self):
 
 
 class IncompatibleVectorSizesException(Exception):
-    def __iter__(self, size1: int, size2: int) -> None:
+    def __init__(self, size1: int, size2: int) -> None:
         self.message = "Size {0} does not match size {1} ".format(size1, size2)
 
     def __str__(self):
diff --git a/src/spaces/action_space.py b/src/spaces/action_space.py
@@ -6,7 +6,7 @@
 import numpy as np
 import random
 from gym.spaces.discrete import Discrete
-from src.spaces.actions import ActionBase
+from src.spaces.actions import ActionBase, ActionType
 
 
 class ActionSpace(Discrete):
@@ -48,7 +48,7 @@ def shuffle(self) -> None:
         """
         random.shuffle(self.actions)
 
-    def get_action_by_column_name(self, column_name: str) -> ActionBase:
+    def get_action_by_name_and_type(self, column_name: str, action_type: ActionType) -> ActionBase:
         """
         Get the action that corresponds to the column with
         the given name. Raises ValueError if such an action does not
@@ -58,10 +58,11 @@ def get_action_by_column_name(self, column_name: str) -> ActionBase:
         """
 
         for action in self.actions:
-            if action.column_name == column_name:
+            if action.column_name == column_name and \
+                    action.action_type == action_type:
                 return action
 
-        raise ValueError("No action exists for column={0}".format(column_name))
+        raise ValueError("No action exists for column={0} with type {1}".format(column_name, action_type.name))
 
     def add(self, action: ActionBase) -> None:
         """
diff --git a/src/spaces/discrete_state_environment.py b/src/spaces/discrete_state_environment.py
@@ -17,10 +17,10 @@
 from src.utils.numeric_distance_type import NumericDistanceType
 from src.utils.numeric_distance_calculator import NumericDistanceCalculator
 
-
 DataSet = TypeVar("DataSet")
 RewardManager = TypeVar("RewardManager")
 ActionSpace = TypeVar("ActionSpace")
+DistortionCalculator = TypeVar('DistortionCalculator')
 
 _Reward = TypeVar('_Reward')
 _Discount = TypeVar('_Discount')
@@ -72,33 +72,36 @@ class DiscreteEnvConfig(object):
     """
     Configuration for discrete environment
     """
+
     def __init__(self) -> None:
         self.data_set: DataSet = None
         self.action_space: ActionSpace = None
         self.reward_manager: RewardManager = None
         self.average_distortion_constraint: float = 0.0
         self.gamma: float = 0.99
-        self.string_column_distortion_type: StringDistanceType = StringDistanceType.INVALID
-        self.numeric_column_distortion_metric_type: NumericDistanceType = NumericDistanceType.INVALID
+        # self.string_column_distortion_type: StringDistanceType = StringDistanceType.INVALID
+        # self.numeric_column_distortion_metric_type: NumericDistanceType = NumericDistanceType.INVALID
         self.n_states: int = 10
         self.min_distortion: float = 0.4
         self.max_distortion: float = 0.7
         self.n_rounds_below_min_distortion: int = 10
         self.distorted_set_path: Path = None
+        self.distortion_calculator: DistortionCalculator = None
 
 
 class DiscreteStateEnvironment(object):
     """
     The DiscreteStateEnvironment class. Uses state aggregation in order
     to create bins where the average total distortion of the dataset falls in
     """
+
     def __init__(self, env_config: DiscreteEnvConfig) -> None:
         self.config = env_config
         self.n_rounds_below_min_distortion = 0
         self.state_bins: List[float] = []
         self.distorted_data_set = copy.deepcopy(self.config.data_set)
         self.current_time_step: TimeStep = None
-        self.string_distance_calculator: TextDistanceCalculator = None
+        # self.string_distance_calculator: TextDistanceCalculator = None
 
         # dictionary that holds the distortion for every column
         # in the dataset
@@ -126,7 +129,8 @@ def get_action(self, aidx: int) -> ActionBase:
         return self.config.action_space[aidx]
 
     def save_current_dataset(self, episode_index: int) -> None:
-        self.distorted_data_set.save_to_csv(filename=Path(str(self.config.distorted_set_path) + "_" + str(episode_index)))
+        self.distorted_data_set.save_to_csv(
+            filename=Path(str(self.config.distorted_set_path) + "_" + str(episode_index)))
 
     def create_bins(self) -> None:
         """
@@ -167,7 +171,7 @@ def initialize_distances(self) -> None:
         normalized distance to 0.0 meaning that no distortion is assumed initially
         :return: None
         """
-        self.string_distance_calculator = TextDistanceCalculator(dist_type=self.config.string_column_distortion_type)
+        # self.string_distance_calculator = TextDistanceCalculator(dist_type=self.config.string_column_distortion_type)
         col_names = self.config.data_set.get_columns_names()
         for col in col_names:
             self.column_distances[col] = 0.0
@@ -194,14 +198,21 @@ def apply_action(self, action: ActionBase):
         current_column = self.distorted_data_set.get_column(col_name=action.column_name)
         start_column = self.config.data_set.get_column(col_name=action.column_name)
 
+        datatype = 'float'
         # calculate column distortion
         if self.distorted_data_set.columns[action.column_name] == str:
+            current_column = "".join(current_column.values)
+            start_column = "".join(start_column.values)
+            datatype = 'str'
+
             # join the column to calculate the distance
-            distance = self.string_distance_calculator.calculate(txt1="".join(current_column.values),
-                                                                 txt2="".join(start_column.values))
-        else:
-            distance = NumericDistanceCalculator(dist_type=self.config.numeric_column_distortion_metric_type)\
-                .calculate(state1=current_column, state2=start_column)
+            # distance = self.string_distance_calculator.calculate(txt1="".join(current_column.values),
+            #                                                     txt2="".join(start_column.values))
+        # else:
+        #    distance = NumericDistanceCalculator(dist_type=self.config.numeric_column_distortion_metric_type)\
+        #       .calculate(state1=current_column, state2=start_column)
+
+        distance = self.config.distortion_calculator.calculate(current_column, start_column, datatype)
 
         self.column_distances[action.column_name] = distance
 
@@ -212,7 +223,8 @@ def total_average_current_distortion(self) -> float:
         :return:
         """
 
-        return float(np.mean(list(self.column_distances.values())))
+        return self.config.distortion_calculator.total_distortion(
+            list(self.column_distances.values()))  # float(np.mean(list(self.column_distances.values())))
 
     def reset(self, **options) -> TimeStep:
         """
@@ -294,16 +306,43 @@ def step(self, action: ActionBase) -> TimeStep:
         step_type = StepType.MID
         next_state = self.get_aggregated_state(state_val=current_distortion)
 
+        # get the bin for the min distortion
+        min_dist_bin = self.get_aggregated_state(state_val=self.config.min_distortion)
+        max_dist_bin = self.get_aggregated_state(state_val=self.config.max_distortion)
+
+        # TODO: these modifications will cause the agent to always
+        # move close to transition points
+        if next_state < min_dist_bin <= self.current_time_step.observation:
+            # the agent chose to step into the chaos again
+            # we punish him with double the reward
+            reward = 2.0 * self.config.reward_manager.out_of_min_bound_reward
+        elif next_state > max_dist_bin >= self.current_time_step.observation:
+            # the agent is going to chaos from above
+            # punish him
+            reward = 2.0 * self.config.reward_manager.out_of_max_bound_reward
+
+        elif next_state >= min_dist_bin > self.current_time_step.observation:
+            # the agent goes towards the transition of min point so give a higher reward
+            # for this
+            reward = 0.95 * self.config.reward_manager.in_bounds_reward
+
+        elif next_state <= max_dist_bin < self.current_time_step.observation:
+            # the agent goes towards the transition of max point so give a higher reward
+            # for this
+            reward = 0.95 * self.config.reward_manager.in_bounds_reward
+
         if next_state >= self.n_states:
             done = True
 
         if done:
             step_type = StepType.LAST
             next_state = None
 
-        return TimeStep(step_type=step_type, reward=reward,
-                        observation=next_state,
-                        discount=self.config.gamma, info={"total_distortion": current_distortion})
+        self.current_time_step = TimeStep(step_type=step_type, reward=reward,
+                                          observation=next_state,
+                                          discount=self.config.gamma, info={"total_distortion": current_distortion})
+
+        return self.current_time_step
 
 
 class MultiprocessEnv(object):
diff --git a/src/utils/distortion_calculator.py b/src/utils/distortion_calculator.py
@@ -0,0 +1,51 @@
+"""
+Utilities for dataset distortion calculation
+"""
+import enum
+from typing import TypeVar
+from src.utils.numeric_distance_type import NumericDistanceType
+from src.utils.numeric_distance_calculator import NumericDistanceCalculator
+from src.utils.string_distance_calculator import StringDistanceType, TextDistanceCalculator
+from src.exceptions.exceptions import InvalidParamValue
+
+Vector = TypeVar('Vector')
+
+
+class DistortionCalculationType(enum.IntEnum):
+    """
+
+    """
+
+    INVALID = -1
+    SUM = 0
+    AVG = 1
+
+
+class DistortionCalculator(object):
+
+    def __init__(self, numeric_column_distortion_metric_type: NumericDistanceType,
+                 string_column_distortion_metric_type: StringDistanceType,
+                 dataset_distortion_type: DistortionCalculationType):
+        self.numeric_column_distortion_metric_type = numeric_column_distortion_metric_type
+        self.string_column_distortion_metric_type = string_column_distortion_metric_type
+        self.dataset_distortion_type = dataset_distortion_type
+
+    def calculate(self, vec1: Vector, vec2: Vector, datatype: str) -> float:
+
+        if datatype == 'str':
+            return TextDistanceCalculator(dist_type=self.string_column_distortion_metric_type).calculate(txt1=vec1,
+                                                                                                         txt2=vec2)
+        elif datatype == 'float' or datatype == 'int':
+            return NumericDistanceCalculator(dist_type=self.numeric_column_distortion_metric_type).calculate(state1=vec1,
+                                                                                                             state2=vec2)
+        raise InvalidParamValue(param_name='datatype', param_value=datatype)
+
+    def total_distortion(self, distortions: Vector) -> float:
+
+        if self.dataset_distortion_type == DistortionCalculationType.SUM:
+            return float(sum(distortions))
+        elif self.dataset_distortion_type == DistortionCalculationType.AVG:
+            return float(sum(distortions) / len(distortions))
+
+        raise InvalidParamValue(param_name='dataset_distortion_type', param_value=self.dataset_distortion_type.name)
+
diff --git a/src/utils/mixins.py b/src/utils/mixins.py
@@ -77,6 +77,7 @@ def max_action(self, state: Any, n_actions: int) -> int:
         :param n_actions: Total number of actions allowed
         :return: The action that corresponds to the maximum value
         """
-        values = np.array(self.q_table[state, a] for a in range(n_actions))
+        values = [self.q_table[state, a] for a in range(n_actions)]
+        values = np.array(values)
         action = np.argmax(values)
         return int(action)
diff --git a/src/utils/numeric_distance_calculator.py b/src/utils/numeric_distance_calculator.py
@@ -25,15 +25,27 @@ def _numeric_distance_calculator(state1: Vector, state2: Vector, dist_type: Nume
         raise IncompatibleVectorSizesException(size1=len(state1), size2=len(state2))
 
     if dist_type == NumericDistanceType.L1:
-        return _l1_state_leakage(state1=state1, state2=state2)
+        return np.linalg.norm(state1 - state2, ord=1)
     elif dist_type == NumericDistanceType.L2:
-        return _l1_state_leakage(state1=state1, state2=state2)
+        return np.linalg.norm(state1 - state2, ord=None)
     elif dist_type == NumericDistanceType.L2_NORMALIZED:
         return _normalized_l2_distance(state1=state1, state2=state2)
+    elif dist_type == NumericDistanceType.L2_AVG:
+        return _avg_l2_distance(state1=state1, state2=state2)
 
     raise InvalidParamValue(param_name="dist_type", param_value=dist_type.name)
 
 
+def _avg_l2_distance(state1: Vector, state2: Vector) -> float:
+
+    size = len(state1)
+    dist = 0.0
+    for item1, item2 in zip(state1, state2):
+        dist += ((item1 - item2) ** 2)
+
+    return np.sqrt(dist / float(size))
+
+
 def _normalized_l2_distance(state1: Vector, state2: Vector) -> float:
     """
     Returns the normalized L2 norm between the two vectors
@@ -49,8 +61,11 @@ def _normalized_l2_distance(state1: Vector, state2: Vector) -> float:
 
     return np.sqrt(dist)
 
+
+"""
 def _l2_state_leakage(state1: Vector, state2: Vector) -> float:
     return np.linalg.norm(state1 - state2, ord=None)
 
 def _l1_state_leakage(state1: Vector, state2: Vector) -> float:
     return np.linalg.norm(state1 - state2, ord=1)
+"""
diff --git a/src/utils/numeric_distance_type.py b/src/utils/numeric_distance_type.py
@@ -16,3 +16,4 @@ class NumericDistanceType(enum.IntEnum):
     L2 = 1
     L2_NORMALIZED = 2
     L1_NORMALIZED = 3
+    L2_AVG = 4