Update docstrings and API

pockerman · pockerman · commit 714fbcb4a1b2 · 2022-02-25T16:22:46.000Z
diff --git a/docs/source/API/actions.rst b/docs/source/API/actions.rst
@@ -3,16 +3,8 @@
 
 .. automodule:: actions
 
-   
-   
-   
 
-   
-   
-   
-
-   
-   
+      
    .. rubric:: Classes
 
    .. autosummary::
diff --git a/docs/source/modules.rst b/docs/source/modules.rst
@@ -5,14 +5,14 @@ API
    :maxdepth: 4
    
    API/actions
+   API/state
    generated/action_space
    generated/q_estimator
    generated/q_learning
    generated/trainer
    generated/sarsa_semi_gradient
    generated/exceptions
    generated/action_space
-   generated/actions
    generated/column_type
    generated/discrete_state_environment
    generated/observation_space
diff --git a/src/algorithms/q_learning.py b/src/algorithms/q_learning.py
@@ -77,9 +77,6 @@ def play(self, env: Env, stop_criterion: Criterion) -> None:
         total_dist = env.total_current_distortion()
         while stop_criterion.continue_itr(total_dist):
 
-            if stop_criterion.iteration_counter == 12:
-                print("Break...")
-
             # use the policy to select an action
             state_idx = env.get_aggregated_state(total_dist)
             action_idx = self.config.policy.on_state(state_idx)
diff --git a/src/algorithms/sarsa_semi_gradient.py b/src/algorithms/sarsa_semi_gradient.py
@@ -39,13 +39,45 @@ class SARSAn(WithMaxActionMixin):
     """
 
     def __init__(self, sarsa_config: SARSAnConfig):
-        super(SARSAn, self).__init__()
+        super(SARSAn, self).__init__(table={})
         self.name = "SARSAn"
         self.config = sarsa_config
-        self.q_table = {}
 
     def play(self, env: Env, stop_criterion: Criterion) -> None:
-        pass
+        """
+        Apply the trained agent on the given environment.
+
+        Parameters
+        ----------
+        env: The environment to apply the agent
+        stop_criterion: Criteria that specify when play should stop
+
+        Returns
+        -------
+
+        None
+
+        """
+        # loop over the columns and for the
+        # column get the action that corresponds to
+        # the max payout.
+        # TODO: This will no work as the distortion is calculated
+        # by summing over the columns.
+
+        # set the q_table for the policy
+        # this is the table we should be using to
+        # make decisions
+        self.config.policy.q_table = self.q_table
+        total_dist = env.total_current_distortion()
+        while stop_criterion.continue_itr(total_dist):
+            # use the policy to select an action
+            state_idx = env.get_aggregated_state(total_dist)
+            action_idx = self.config.policy.on_state(state_idx)
+            action = env.get_action(action_idx)
+            print("{0} At state={1} with distortion={2} select action={3}".format("INFO: ", state_idx, total_dist,
+                                                                                  action.column_name + "-" + action.action_type.name))
+            env.step(action=action)
+            total_dist = env.total_current_distortion()
 
     def actions_before_training(self, env: Env) -> None:
         """
diff --git a/src/algorithms/trainer.py b/src/algorithms/trainer.py
@@ -1,5 +1,6 @@
-"""
-Trainer
+"""Module trainer. Specifies a utility class
+for training serial reinforcement learning algorithms
+
 """
 
 import numpy as np
diff --git a/src/examples/nstep_semi_grad_sarsa_three_columns.py b/src/examples/nstep_semi_grad_sarsa_three_columns.py
@@ -218,7 +218,7 @@ def load_dataset() -> MockSubjectsLoader:
                      title="Running distortion average over 100 episodes")
 
 
-    '''
+
     print("=============================================")
     print("{0} Generating distorted dataset".format(INFO))
     # Let's play
@@ -229,4 +229,3 @@ def load_dataset() -> MockSubjectsLoader:
     env.save_current_dataset(episode_index=-2, save_index=False)
     print("{0} Done....".format(INFO))
     print("=============================================")
-    '''
diff --git a/src/policies/epsilon_greedy_policy.py b/src/policies/epsilon_greedy_policy.py
@@ -32,7 +32,7 @@ def __init__(self, eps: float, n_actions: int,
                  max_eps: float = 1.0, min_eps: float = 0.001,
                  epsilon_decay_factor: float = 0.01,
                  user_defined_decrease_method: UserDefinedDecreaseMethod = None) -> None:
-        super(WithMaxActionMixin, self).__init__()
+        super(WithMaxActionMixin, self).__init__(table={})
         self._eps = eps
         self._n_actions = n_actions
         self._decay_op = decay_op
diff --git a/src/spaces/state.py b/src/spaces/state.py
@@ -1,5 +1,7 @@
-"""
-Discretized state space
+"""The state module. Specifies a wrapper
+to a state such that it exposes column distortions
+and the bin index of the overall distortion.
+
 """
 
 from typing import TypeVar, List, Any
@@ -54,17 +56,30 @@ def __len__(self):
 
 
 class State(object):
-    """
-    Helper to represent a State
+    """Helper to represent a State
     """
     def __init__(self):
         self.idx = -1
         self.bin_idx = -1
         self.total_distortion: float = 0.0
         self.column_distortions = {}
 
-    def __contains__(self, item) -> bool:
-        return item in self.column_distortions.keys()
+    def __contains__(self, column_name: str) -> bool:
+        """
+        Returns true if column_name is in the column_distortions
+        keys
+
+        Parameters
+        ----------
+        column_name: The column name to query
+
+        Returns
+        -------
+
+        A boolean indicating if column_name is in the column_distortions
+        keys or not.
+        """
+        return column_name in self.column_distortions.keys()
 
     def __iter__(self):
         return StateIterator(list(self.column_distortions.keys()))
diff --git a/src/utils/mixins.py b/src/utils/mixins.py
@@ -52,24 +52,29 @@ def finished(self) -> bool:
 
 
 class WithQTableMixinBase(metaclass=abc.ABCMeta):
-    """
-    Base class to impose the concept of Q-table
+    """Base class to impose the concept of Q-table
     """
 
-    def __init__(self):
+    def __init__(self, table: QTable = None):
         # the table representing the q function
         # client code should choose the type of
         # the table
-        self.q_table: QTable = None
+        self.q_table: QTable = table
 
 
 class WithQTableMixin(WithQTableMixinBase):
+    """Helper class to associate a q_table with an algorithm
     """
-    Helper class to associate a q_table with an algorithm
-     if this is needed.
-    """
-    def __init__(self):
-        super(WithQTableMixin, self).__init__()
+    def __init__(self, table: QTable = None):
+        """
+        Constructor
+
+        Parameters
+        ----------
+        table: The Q-table representing the Q-function
+
+        """
+        super(WithQTableMixin, self).__init__(table)
 
     def state_action_values(self, state: Any, n_actions: int):
 
@@ -81,12 +86,11 @@ def state_action_values(self, state: Any, n_actions: int):
 
 
 class WithMaxActionMixin(WithQTableMixin):
-    """
-    The class WithMaxActionMixin.
+    """The class WithMaxActionMixin.
     """
 
-    def __init__(self):
-        super(WithMaxActionMixin, self).__init__()
+    def __init__(self, table: QTable = None):
+        super(WithMaxActionMixin, self).__init__(table)
 
     def max_action(self, state: Any, n_actions: int) -> int:
         """