pockerman
diff --git a/‎docs/source/API/a2c.rst‎
Lines changed: 0 additions & 13 deletions b/‎docs/source/API/a2c.rst‎
Lines changed: 0 additions & 13 deletions
diff --git a/‎docs/source/API/algorithms/a2c.rst‎
Lines changed: 11 additions & 0 deletions b/‎docs/source/API/algorithms/a2c.rst‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎docs/source/API/q_learning.rst‎ ‎docs/source/API/algorithms/q_learning.rst‎docs/source/API/q_learning.rst renamed to docs/source/API/algorithms/q_learning.rst b/‎docs/source/API/q_learning.rst‎ ‎docs/source/API/algorithms/q_learning.rst‎docs/source/API/q_learning.rst renamed to docs/source/API/algorithms/q_learning.rst
diff --git a/‎docs/source/API/epsilon_greedy_q_estimator.rst‎
Lines changed: 0 additions & 9 deletions b/‎docs/source/API/epsilon_greedy_q_estimator.rst‎
Lines changed: 0 additions & 9 deletions
diff --git a/‎docs/source/API/epsilon_greedy_policy.rst‎ ‎…e/API/policies/epsilon_greedy_policy.rst‎docs/source/API/epsilon_greedy_policy.rst renamed to docs/source/API/policies/epsilon_greedy_policy.rst b/‎docs/source/API/epsilon_greedy_policy.rst‎ ‎…e/API/policies/epsilon_greedy_policy.rst‎docs/source/API/epsilon_greedy_policy.rst renamed to docs/source/API/policies/epsilon_greedy_policy.rst
diff --git a/‎docs/source/API/trainers/pytorch_trainer.rst‎
Lines changed: 10 additions & 0 deletions b/‎docs/source/API/trainers/pytorch_trainer.rst‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎docs/source/API/trainer.rst‎ ‎docs/source/API/trainers/trainer.rst‎docs/source/API/trainer.rst renamed to docs/source/API/trainers/trainer.rst b/‎docs/source/API/trainer.rst‎ ‎docs/source/API/trainers/trainer.rst‎docs/source/API/trainer.rst renamed to docs/source/API/trainers/trainer.rst
diff --git a/‎docs/source/conf.py‎
Lines changed: 1 addition & 0 deletions b/‎docs/source/conf.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/source/modules.rst‎
Lines changed: 7 additions & 5 deletions b/‎docs/source/modules.rst‎
Lines changed: 7 additions & 5 deletions
@@ -0,0 +1,11 @@
+a2c
+===
+.. automodule:: a2c
+   :members: create_discounts_array, calculate_discounted_returns
+
+.. autoclass:: A2CConfig
+
+.. autoclass:: _ActResult
+  
+.. autoclass:: A2C
+   :members: __init__, share_memory, parameters, on_episode, default_action_sampler, from_path, _do_train
@@ -0,0 +1,10 @@
+pytorch\_trainer
+================
+
+.. automodule:: pytorch_trainer
+   :members: worker
+   
+.. autoclass:: PyTorchTrainerConfig
+
+.. autoclass:: PyTorchTrainer
+   :members:  __init__, avg_rewards,  avg_distortion,  actions_before_training, actions_before_episode_begins, actions_after_episode_ends, train, actions_after_training 
@@ -22,6 +22,7 @@
 sys.path.append(os.path.abspath("../../src/utils/"))
 sys.path.append(os.path.abspath("../../src/datasets/"))
 sys.path.append(os.path.abspath("../../src/networks/"))
+sys.path.append(os.path.abspath("../../src/trainers/"))
 print(sys.path)
 
 
 
@@ -4,23 +4,25 @@ API
 .. toctree::
    :maxdepth: 4
 
-   API/epsilon_greedy_policy
-   API/epsilon_greedy_q_estimator
-   API/q_learning
-   API/trainer
+   API/algorithms/epsilon_greedy_q_estimator
+   API/algorithms/a2c
+   API/algorithms/q_learning
+   API/trainers/trainer
+   API/trainers/pytorch_trainer
    API/datasets/column_type
    API/exceptions/exceptions
    API/maths/optimizer_type
    API/maths/pytorch_optimizer_builder
    API/networks/a2c_networks
+   API/policies/epsilon_greedy_policy
    API/spaces/actions
    API/spaces/action_space
    API/spaces/state
    API/spaces/discrete_state_environment
    API/spaces/tiled_environment
    API/spaces/time_step
    API/replay_buffer
-   API/a2c
+