#32 Update API

pockerman · pockerman · commit e315d3a66360 · 2022-01-31T12:27:34.000Z
diff --git a/src/algorithms/q_learning.py b/src/algorithms/q_learning.py
@@ -74,7 +74,7 @@ def play(self, env: Env, stop_criterion: Criterion) -> None:
 
         # set the q_table for the policy
         self.config.policy.q_table = self.q_table
-        total_dist = env.total_average_current_distortion()
+        total_dist = env.total_current_distortion()
         while stop_criterion.continue_itr(total_dist):
 
             if stop_criterion.iteration_counter == 12:
@@ -87,7 +87,7 @@ def play(self, env: Env, stop_criterion: Criterion) -> None:
             print("{0} At state={1} with distortion={2} select action={3}".format("INFO: ", state_idx, total_dist,
                                                                                   action.column_name + "-" + action.action_type.name))
             env.step(action=action)
-            total_dist = env.total_average_current_distortion()
+            total_dist = env.total_current_distortion()
 
     def train(self, env: Env, **options) -> tuple:
 
diff --git a/src/datasets/dataset_wrapper.py b/src/datasets/dataset_wrapper.py
@@ -29,7 +29,6 @@ def read(self, filename: Path, **options) -> None:
 
 
 class PandasDSWrapper(DSWrapper[pd.DataFrame]):
-
     """
     Simple wrapper to a pandas DataFrame object.
     Facilitates various actions on the original dataset
@@ -60,15 +59,15 @@ def n_columns(self) -> int:
     def schema(self) -> dict:
         return pd.io.json.build_table_schema(self.ds)
 
-    def save_to_csv(self, filename: Path) -> None:
+    def save_to_csv(self, filename: Path, save_index: bool) -> None:
         """
         Save the underlying dataset in a csv format
         :param filename:
         :return:
         """
-        self.ds.to_csv(filename)
+        self.ds.to_csv(filename, index=save_index)
 
-    def read(self, filename: Path,  **options) -> None:
+    def read(self, filename: Path, **options) -> None:
         """
         Load a data set from a file
         :param filename:
@@ -145,14 +144,14 @@ def get_column(self, col_name: str):
         return self.ds.loc[:, col_name]
 
     def get_column_unique_values(self, col_name: str):
-       """
+        """
        Returns the unique values for the column
        :param col_name:
        :return:
        """
-       col = self.get_column(col_name=col_name)
-       vals = col.values.ravel()
-       return pd.unique(vals)
+        col = self.get_column(col_name=col_name)
+        vals = col.values.ravel()
+        return pd.unique(vals)
 
     def get_columns_types(self):
         return list(self.ds.dtypes)
@@ -181,8 +180,3 @@ def apply_column_transform(self, column_name: str, transform: Transform) -> None
         column = self.get_column(col_name=column_name)
         column = transform.act(**{"data": column.values})
         self.ds[transform.column_name] = column
-
-
-
-
-
diff --git a/src/examples/qlearning_three_columns.py b/src/examples/qlearning_three_columns.py
@@ -151,11 +151,10 @@ def get_ethinicity_hierarchy():
     # create the environment
     env = DiscreteStateEnvironment(env_config=env_config)
     env.reset()
-    env.save_current_dataset(episode_index=-1)
 
-    # save the original dataset for comparison
-    env.save_current_dataset(episode_index=-1)
-    env.reset()
+    # save the data before distortion so that we can
+    # later load it on ARX
+    env.save_current_dataset(episode_index=-1, save_index=False)
 
     # configuration for the Q-learner
     algo_config = QLearnConfig()
@@ -195,7 +194,8 @@ def get_ethinicity_hierarchy():
 
     stop_criterion = IterationControl(n_itrs=10, min_dist=MIN_DISTORTION, max_dist=MAX_DISTORTION)
     agent.play(env=env, stop_criterion=stop_criterion)
-    env.save_current_dataset(episode_index=-2)
-
+    env.save_current_dataset(episode_index=-2, save_index=False)
+    print("{0} Done....".format(INFO))
+    print("=============================================")
 
 
diff --git a/src/spaces/discrete_state_environment.py b/src/spaces/discrete_state_environment.py
@@ -128,14 +128,16 @@ def n_states(self) -> int:
     def get_action(self, aidx: int) -> ActionBase:
         return self.config.action_space[aidx]
 
-    def save_current_dataset(self, episode_index: int) -> None:
+    def save_current_dataset(self, episode_index: int, save_index: bool = False) -> None:
         """
         Save the current distorted datase for the given episode index
         :param episode_index:
+        :param save_index:
         :return:
         """
         self.distorted_data_set.save_to_csv(
-            filename=Path(str(self.config.distorted_set_path) + "_" + str(episode_index)))
+            filename=Path(str(self.config.distorted_set_path) + "_" + str(episode_index)),
+            save_index=save_index)
 
     def create_bins(self) -> None:
         """
@@ -216,15 +218,14 @@ def apply_action(self, action: ActionBase):
 
         self.column_distances[action.column_name] = distance
 
-    def total_average_current_distortion(self) -> float:
+    def total_current_distortion(self) -> float:
         """
-        Calculates the average total distortion of the dataset
-        by summing over the current computed distances for each column
+        Calculates the current total distortion of the dataset.
         :return:
         """
 
         return self.config.distortion_calculator.total_distortion(
-            list(self.column_distances.values()))  # float(np.mean(list(self.column_distances.values())))
+            list(self.column_distances.values()))
 
     def reset(self, **options) -> TimeStep:
         """
@@ -270,7 +271,7 @@ def step(self, action: ActionBase) -> TimeStep:
         self.apply_action(action=action)
 
         # calculate the distortion of the dataset
-        current_distortion = self.total_average_current_distortion()
+        current_distortion = self.total_current_distortion()
 
         # get the reward for the current distortion
         reward = self.config.reward_manager.get_reward_for_state(state=current_distortion, **{"action": action})
@@ -312,6 +313,7 @@ def step(self, action: ActionBase) -> TimeStep:
 
         # TODO: these modifications will cause the agent to always
         # move close to transition points
+        # TODO: Remove the magic constants
         if next_state is not None and self.current_time_step.observation is not None:
             if next_state < min_dist_bin <= self.current_time_step.observation:
                 # the agent chose to step into the chaos again