JuliaReinforcementLearning
diff --git a/‎Manifest.toml‎
Lines changed: 394 additions & 132 deletions b/‎Manifest.toml‎
Lines changed: 394 additions & 132 deletions
diff --git a/‎Project.toml‎
Lines changed: 2 additions & 4 deletions b/‎Project.toml‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎notebooks/Chapter01_Tic_Tac_Toe.ipynb‎
Lines changed: 52 additions & 39 deletions b/‎notebooks/Chapter01_Tic_Tac_Toe.ipynb‎
Lines changed: 52 additions & 39 deletions
@@ -19,14 +19,12 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd"
 
 [compat]
-julia = "1.3"
 DataStructures = "0.17"
 Distributions = "0.22"
 Flux = "0.10"
 MacroTools = "0.5"
 Plots = "0.29"
 Reexport = "0.2"
-ReinforcementLearningBase = "0.6"
-ReinforcementLearningCore = "0.2"
 StatsBase = "0.32"
-StatsPlots = "0.12"
+StatsPlots = "0.12"
+julia = "1.4"
@@ -11,11 +11,13 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\u001b[32m\u001b[1m    Status\u001b[22m\u001b[39m `/mnt/E4E0A9C0E0A998F6/github/ReinforcementLearningAnIntroduction.jl/notebooks/Project.toml`\n",
-      " \u001b[90m [31c24e10]\u001b[39m\u001b[37m Distributions v0.22.4\u001b[39m\n",
-      " \u001b[90m [91a5bcdd]\u001b[39m\u001b[37m Plots v0.29.1\u001b[39m\n",
+      "\u001b[32m\u001b[1mStatus\u001b[22m\u001b[39m `/mnt/E4E0A9C0E0A998F6/github/ReinforcementLearningAnIntroduction.jl/notebooks/Project.toml`\n",
+      " \u001b[90m [31c24e10]\u001b[39m\u001b[37m Distributions v0.22.6\u001b[39m\n",
+      " \u001b[90m [91a5bcdd]\u001b[39m\u001b[37m Plots v0.29.9\u001b[39m\n",
       " \u001b[90m [02c1da58]\u001b[39m\u001b[37m ReinforcementLearningAnIntroduction v0.2.0 [`..`]\u001b[39m\n",
-      " \u001b[90m [2913bbd2]\u001b[39m\u001b[37m StatsBase v0.32.1\u001b[39m\n",
+      " \u001b[90m [e575027e]\u001b[39m\u001b[37m ReinforcementLearningBase v0.7.1\u001b[39m\n",
+      " \u001b[90m [de1b191a]\u001b[39m\u001b[37m ReinforcementLearningCore v0.3.0\u001b[39m\n",
+      " \u001b[90m [2913bbd2]\u001b[39m\u001b[37m StatsBase v0.32.2\u001b[39m\n",
       " \u001b[90m [f3b207a7]\u001b[39m\u001b[37m StatsPlots v0.12.0\u001b[39m\n",
       " \u001b[90m [2f01184e]\u001b[39m\u001b[37m SparseArrays \u001b[39m\n"
      ]
@@ -35,7 +37,9 @@
      "output_type": "stream",
      "text": [
       "┌ Info: Precompiling ReinforcementLearningAnIntroduction [02c1da58-b9a1-11e8-0212-f9611b8fe936]\n",
-      "└ @ Base loading.jl:1273\n"
+      "└ @ Base loading.jl:1260\n",
+      "┌ Info: Skipping precompilation since __precompile__(false). Importing ReinforcementLearningAnIntroduction [02c1da58-b9a1-11e8-0212-f9611b8fe936].\n",
+      "└ @ Base loading.jl:1033\n"
      ]
     },
     {
@@ -114,7 +118,7 @@
     {
      "data": {
       "text/plain": [
-       "(reward = 0.0, terminal = false, state = 4151, legal_actions_mask = Bool[1, 1, 1, 1, 1, 1, 1, 1, 1, 0])"
+       "(reward = 0.0, terminal = false, state = 4244, legal_actions_mask = Bool[1, 1, 1, 1, 1, 1, 1, 1, 1, 0])"
       ]
      },
      "execution_count": 5,
@@ -210,7 +214,7 @@
     {
      "data": {
       "text/plain": [
-       "MonteCarloLearner{ReinforcementLearningAnIntroduction.EveryVisit,TabularApproximator{1,Array{Float64,1}},CachedSampleAvg{Float64},ReinforcementLearningAnIntroduction.NoSampling}(TabularApproximator{1,Array{Float64,1}}([0.5, 0.5, 0.0, 0.5, 0.5, 0.5, 0.0, 0.5, 0.5, 0.5  …  0.5, 0.5, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]), 1.0, 0.1, CachedSampleAvg{Float64}(Dict{Float64,SampleAvg}()))"
+       "MonteCarloLearner{ReinforcementLearningAnIntroduction.EveryVisit,TabularApproximator{1,Array{Float64,1}},CachedSampleAvg{Float64},ReinforcementLearningAnIntroduction.NoSampling}(TabularApproximator{1,Array{Float64,1}}([0.5, 0.5, 0.5, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.0  …  0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.0, 0.5, 0.5, 0.5]), 1.0, 0.1, CachedSampleAvg{Float64}(Dict{Float64,SampleAvg}()))"
       ]
      },
      "execution_count": 8,
@@ -271,13 +275,13 @@
     "π_1 = VBasedPolicy(\n",
     "    learner = learner_1,\n",
     "    mapping = create_mapping(TicTacToe.offensive),\n",
-    "    explorer = EpsilonGreedyExplorer(ϵ),\n",
+    "    explorer = EpsilonGreedyExplorer(ϵ;seed=1),\n",
     "    )\n",
     "\n",
     "π_2 = VBasedPolicy(\n",
     "    learner = learner_2,\n",
     "    mapping = create_mapping(TicTacToe.defensive),\n",
-    "    explorer = EpsilonGreedyExplorer(ϵ),\n",
+    "    explorer = EpsilonGreedyExplorer(ϵ;seed=2),\n",
     "    );\n",
     "\n",
     "agent_1 = Agent(\n",
@@ -306,7 +310,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\u001b[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:42\u001b[39m8:41\u001b[39m\n"
+      "\u001b[32mProgress: 100%|█████████████████████████████████████████| Time: 0:07:51\u001b[39m39m\n"
      ]
     },
     {
@@ -323,7 +327,7 @@
     }
    ],
    "source": [
-    "run((agent_1, agent_2), env, StopAfterEpisode(1000000))"
+    "run((agent_1, agent_2), env, StopAfterEpisode(1_000_000))  # try adjusting the number of episodes to see the performance difference"
    ]
   },
   {
@@ -347,6 +351,15 @@
     "agent_2.policy.explorer.ϵ_stable = 0.0"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "reset!(env)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -356,7 +369,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
@@ -365,7 +378,7 @@
        "play (generic function with 1 method)"
       ]
      },
-     "execution_count": 13,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -419,7 +432,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
@@ -438,45 +451,45 @@
       "\n",
       "___\n",
       "_X_\n",
-      "O__\n",
+      "__O\n",
       "isdone = [false], winner = [nothing]\n",
       "\n",
-      "Your input:stdin> 6\n",
+      "Your input:stdin> 2\n",
       "___\n",
-      "_X_\n",
-      "OX_\n",
+      "XX_\n",
+      "__O\n",
       "isdone = [false], winner = [nothing]\n",
       "\n",
-      "_O_\n",
-      "_X_\n",
-      "OX_\n",
+      "___\n",
+      "XXO\n",
+      "__O\n",
       "isdone = [false], winner = [nothing]\n",
       "\n",
-      "Your input:stdin> 8\n",
-      "_O_\n",
-      "_XX\n",
-      "OX_\n",
+      "Your input:stdin> 7\n",
+      "__X\n",
+      "XXO\n",
+      "__O\n",
       "isdone = [false], winner = [nothing]\n",
       "\n",
-      "_O_\n",
-      "OXX\n",
-      "OX_\n",
+      "__X\n",
+      "XXO\n",
+      "O_O\n",
       "isdone = [false], winner = [nothing]\n",
       "\n",
-      "Your input:stdin> 1\n",
-      "XO_\n",
-      "OXX\n",
-      "OX_\n",
+      "Your input:stdin> 6\n",
+      "__X\n",
+      "XXO\n",
+      "OXO\n",
       "isdone = [false], winner = [nothing]\n",
       "\n",
-      "XO_\n",
-      "OXX\n",
+      "_OX\n",
+      "XXO\n",
       "OXO\n",
       "isdone = [false], winner = [nothing]\n",
       "\n",
-      "Your input:stdin> 7\n",
+      "Your input:stdin> 1\n",
       "XOX\n",
-      "OXX\n",
+      "XXO\n",
       "OXO\n",
       "isdone = [true], winner = [nothing]\n",
       "\n",
@@ -495,15 +508,15 @@
    "lastKernelId": null
   },
   "kernelspec": {
-   "display_name": "Julia 1.3.0",
+   "display_name": "Julia 1.4.1",
    "language": "julia",
-   "name": "julia-1.3"
+   "name": "julia-1.4"
   },
   "language_info": {
    "file_extension": ".jl",
    "mimetype": "application/julia",
    "name": "julia",
-   "version": "1.3.0"
+   "version": "1.4.1"
   }
  },
  "nbformat": 4,