API update

pockerman · pockerman · commit 11ef082e7f87 · 2022-03-01T14:07:52.000Z
diff --git a/docs/source/API/epsilon_greedy_q_estimator.rst b/docs/source/API/epsilon_greedy_q_estimator.rst
@@ -5,6 +5,5 @@ epsilon\_greedy\_q\_estimator
 
 .. autoclass:: EpsilonGreedyQEstimatorConfig
   
-   
 .. autoclass:: EpsilonGreedyQEstimator
    :members: __init__, q_hat_value, update_weights, on_state
diff --git a/docs/source/modules.rst b/docs/source/modules.rst
@@ -11,7 +11,6 @@ API
    generated/q_estimator
    generated/q_learning
    generated/trainer
-   generated/sarsa_semi_gradient
    generated/exceptions
    generated/action_space
    generated/column_type
diff --git a/src/algorithms/epsilon_greedy_q_estimator.py b/src/algorithms/epsilon_greedy_q_estimator.py
@@ -1,4 +1,5 @@
-"""Module epsilon_greedy_q_estimator
+"""Module epsilon_greedy_q_estimator. Implements
+a q-estimator by assuming linear function approximation
 
 """
 from typing import TypeVar
@@ -31,6 +32,7 @@ def __init__(self, config: EpsilonGreedyQEstimatorConfig):
 
         Parameters
         ----------
+
         config: The instance configuration
 
         """
@@ -46,9 +48,11 @@ def q_hat_value(self, state_action_vec: StateActionVec) -> float:
         :math: \hat{q}
 
         approximate value for the given state-action vector
+
         Parameters
         ----------
-        state_action_vec
+
+        state_action_vec: The state-action tiled vector
 
         Returns
         -------
@@ -62,8 +66,10 @@ def update_weights(self, total_reward: float, state_action: Action,
                        state_action_: Action, t: float) -> None:
         """
         Update the weights
+
         Parameters
         ----------
+
         total_reward: The reward observed
         state_action: The action that led to the reward
         state_action_:
@@ -81,13 +87,16 @@ def update_weights(self, total_reward: float, state_action: Action,
 
     def on_state(self, state: State) -> Action:
         """Returns the action on the given state
+
         Parameters
         ----------
-        state
+
+        state: The state observed
 
         Returns
         -------
 
+        An environment specific Action type
         """
 
         # compute the state values related to