Skip to content

Commit 11ef082

Browse files
committed
API update
1 parent 367c873 commit 11ef082

File tree

3 files changed

+12
-5
lines changed

3 files changed

+12
-5
lines changed

docs/source/API/epsilon_greedy_q_estimator.rst

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,5 @@ epsilon\_greedy\_q\_estimator
55

66
.. autoclass:: EpsilonGreedyQEstimatorConfig
77

8-
98
.. autoclass:: EpsilonGreedyQEstimator
109
:members: __init__, q_hat_value, update_weights, on_state

docs/source/modules.rst

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ API
1111
generated/q_estimator
1212
generated/q_learning
1313
generated/trainer
14-
generated/sarsa_semi_gradient
1514
generated/exceptions
1615
generated/action_space
1716
generated/column_type

src/algorithms/epsilon_greedy_q_estimator.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
"""Module epsilon_greedy_q_estimator
1+
"""Module epsilon_greedy_q_estimator. Implements
2+
a q-estimator by assuming linear function approximation
23
34
"""
45
from typing import TypeVar
@@ -31,6 +32,7 @@ def __init__(self, config: EpsilonGreedyQEstimatorConfig):
3132
3233
Parameters
3334
----------
35+
3436
config: The instance configuration
3537
3638
"""
@@ -46,9 +48,11 @@ def q_hat_value(self, state_action_vec: StateActionVec) -> float:
4648
:math: \hat{q}
4749
4850
approximate value for the given state-action vector
51+
4952
Parameters
5053
----------
51-
state_action_vec
54+
55+
state_action_vec: The state-action tiled vector
5256
5357
Returns
5458
-------
@@ -62,8 +66,10 @@ def update_weights(self, total_reward: float, state_action: Action,
6266
state_action_: Action, t: float) -> None:
6367
"""
6468
Update the weights
69+
6570
Parameters
6671
----------
72+
6773
total_reward: The reward observed
6874
state_action: The action that led to the reward
6975
state_action_:
@@ -81,13 +87,16 @@ def update_weights(self, total_reward: float, state_action: Action,
8187

8288
def on_state(self, state: State) -> Action:
8389
"""Returns the action on the given state
90+
8491
Parameters
8592
----------
86-
state
93+
94+
state: The state observed
8795
8896
Returns
8997
-------
9098
99+
An environment specific Action type
91100
"""
92101

93102
# compute the state values related to

0 commit comments

Comments
 (0)