JuliaReinforcementLearning
diff --git a/‎Manifest.toml‎
Lines changed: 16 additions & 16 deletions b/‎Manifest.toml‎
Lines changed: 16 additions & 16 deletions
diff --git a/‎Project.toml‎
Lines changed: 2 additions & 1 deletion b/‎Project.toml‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎notebooks/Chapter01_Tic_Tac_Toe.ipynb‎
Lines changed: 50 additions & 47 deletions b/‎notebooks/Chapter01_Tic_Tac_Toe.ipynb‎
Lines changed: 50 additions & 47 deletions
@@ -154,9 +154,9 @@ version = "1.0.2"
 
 [[DiffRules]]
 deps = ["NaNMath", "Random", "SpecialFunctions"]
-git-tree-sha1 = "10dca52cf6d4a62d82528262921daf63b99704a2"
+git-tree-sha1 = "eb0c34204c8410888844ada5359ac8b96292cfd1"
 uuid = "b552c78f-8df3-52c6-915a-8e097449b14b"
-version = "1.0.0"
+version = "1.0.1"
 
 [[Distances]]
 deps = ["LinearAlgebra", "Statistics"]
@@ -170,9 +170,9 @@ uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 
 [[Distributions]]
 deps = ["FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SpecialFunctions", "Statistics", "StatsBase", "StatsFuns"]
-git-tree-sha1 = "71a3f1ae1fca9ed876edfbc2079d7b7c27e2e3d5"
+git-tree-sha1 = "6b19601c0e98de3a8964ed33ad73e130c7165b1d"
 uuid = "31c24e10-a181-5473-b8eb-7969acd0382f"
-version = "0.22.3"
+version = "0.22.4"
 
 [[FFMPEG]]
 deps = ["BinaryProvider", "Libdl"]
@@ -229,9 +229,9 @@ version = "2.0.1"
 
 [[GR]]
 deps = ["Base64", "DelimitedFiles", "LinearAlgebra", "Printf", "Random", "Serialization", "Sockets", "Test"]
-git-tree-sha1 = "c690c2ab22ac9ee323d9966deae61a089362b25c"
+git-tree-sha1 = "10633436bc2fc836347bda5073b7b6f06dcdc5e6"
 uuid = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71"
-version = "0.44.0"
+version = "0.46.0"
 
 [[GeometryTypes]]
 deps = ["ColorTypes", "FixedPointNumbers", "LinearAlgebra", "StaticArrays"]
@@ -439,9 +439,9 @@ version = "0.6.3"
 
 [[Plots]]
 deps = ["Base64", "Contour", "Dates", "FFMPEG", "FixedPointNumbers", "GR", "GeometryTypes", "JSON", "LinearAlgebra", "Measures", "NaNMath", "Pkg", "PlotThemes", "PlotUtils", "Printf", "REPL", "Random", "RecipesBase", "Reexport", "Requires", "Showoff", "SparseArrays", "Statistics", "StatsBase", "UUIDs"]
-git-tree-sha1 = "efbe466a790d7e8a5c4b5ee1601c0c8edc99780b"
+git-tree-sha1 = "fd11ab7aec59103217ecc5b5ccc34ce60e61b9ba"
 uuid = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
-version = "0.28.4"
+version = "0.29.1"
 
 [[PositiveFactorizations]]
 deps = ["LinearAlgebra", "Test"]
@@ -483,9 +483,9 @@ uuid = "c84ed2f1-dad5-54f0-aa8e-dbefe2724439"
 version = "0.4.0"
 
 [[RecipesBase]]
-git-tree-sha1 = "7bdce29bc9b2f5660a6e5e64d64d91ec941f6aa2"
+git-tree-sha1 = "b4ed4a7f988ea2340017916f7c9e5d7560b52cae"
 uuid = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
-version = "0.7.0"
+version = "0.8.0"
 
 [[Reexport]]
 deps = ["Pkg"]
@@ -494,13 +494,13 @@ uuid = "189a3867-3050-52da-a836-e630ba90ab69"
 version = "0.2.0"
 
 [[ReinforcementLearningBase]]
-deps = ["Distributions", "Random"]
+deps = ["CUDAapi", "CuArrays", "Distributions", "MacroTools", "Random"]
 path = "/home/tj/workspace/github/ReinforcementLearningBase.jl/"
 uuid = "e575027e-6cd6-5018-9292-cdc6200d2b44"
 version = "0.5.0"
 
 [[ReinforcementLearningCore]]
-deps = ["CuArrays", "Distributions", "Flux", "MacroTools", "ProgressMeter", "Random", "Reexport", "ReinforcementLearningBase", "StatsBase"]
+deps = ["Distributions", "MacroTools", "ProgressMeter", "Random", "ReinforcementLearningBase", "StatsBase"]
 path = "/home/tj/workspace/github/ReinforcementLearningCore/"
 uuid = "de1b191a-4ae0-4afa-a27b-92d07f46b2d6"
 version = "0.1.0"
@@ -548,9 +548,9 @@ uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 
 [[SpecialFunctions]]
 deps = ["OpenSpecFun_jll"]
-git-tree-sha1 = "268052ee908b2c086cc0011f528694f02f3e2408"
+git-tree-sha1 = "e19b98acb182567bcb7b75bb5d9eedf3a3b5ec6c"
 uuid = "276daf66-3868-5448-9aa4-cd146d93841b"
-version = "0.9.0"
+version = "0.10.0"
 
 [[StaticArrays]]
 deps = ["LinearAlgebra", "Random", "Statistics"]
@@ -570,9 +570,9 @@ version = "0.32.0"
 
 [[StatsFuns]]
 deps = ["Rmath", "SpecialFunctions"]
-git-tree-sha1 = "79982835d2ff3970685cb704500909c94189bde9"
+git-tree-sha1 = "f290ddd5fdedeadd10e961eb3f4d3340f09d030a"
 uuid = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
-version = "0.9.3"
+version = "0.9.4"
 
 [[StatsPlots]]
 deps = ["Clustering", "DataStructures", "DataValues", "Distributions", "Interpolations", "KernelDensity", "Observables", "Plots", "RecipesBase", "Reexport", "StatsBase", "Tables", "Widgets"]
 
@@ -1,4 +1,4 @@
-name = "RLIntro"
+name = "ReinforcementLearningAnIntroduction"
 uuid = "02c1da58-b9a1-11e8-0212-f9611b8fe936"
 authors = ["TianJun <tianjun.cpp@gmail.com>"]
 version = "0.2.0"
@@ -11,6 +11,7 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 ReinforcementLearningBase = "e575027e-6cd6-5018-9292-cdc6200d2b44"
 ReinforcementLearningCore = "de1b191a-4ae0-4afa-a27b-92d07f46b2d6"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 
@@ -13,10 +13,10 @@
      "text": [
       "\u001b[32m\u001b[1m    Status\u001b[22m\u001b[39m `/mnt/E4E0A9C0E0A998F6/github/ReinforcementLearningAnIntroduction.jl/notebooks/Project.toml`\n",
       " \u001b[90m [31c24e10]\u001b[39m\u001b[37m Distributions v0.22.4\u001b[39m\n",
-      " \u001b[90m [91a5bcdd]\u001b[39m\u001b[37m Plots v0.28.4\u001b[39m\n",
-      " \u001b[90m [02c1da58]\u001b[39m\u001b[37m RLIntro v0.2.0 [`..`]\u001b[39m\n",
-      " \u001b[90m [e575027e]\u001b[39m\u001b[37m ReinforcementLearningBase v0.5.0 [`~/workspace/github/ReinforcementLearningBase.jl`]\u001b[39m\n",
-      " \u001b[90m [de1b191a]\u001b[39m\u001b[37m ReinforcementLearningCore v0.1.0 [`~/workspace/github/ReinforcementLearningCore`]\u001b[39m\n",
+      " \u001b[90m [91a5bcdd]\u001b[39m\u001b[37m Plots v0.29.1\u001b[39m\n",
+      " \u001b[90m [02c1da58]\u001b[39m\u001b[37m ReinforcementLearningAnIntroduction v0.2.0 [`..`]\u001b[39m\n",
+      " \u001b[90m [e575027e]\u001b[39m\u001b[37m ReinforcementLearningBase v0.5.0 [`../../ReinforcementLearningBase.jl`]\u001b[39m\n",
+      " \u001b[90m [de1b191a]\u001b[39m\u001b[37m ReinforcementLearningCore v0.1.0 [`../../ReinforcementLearningCore`]\u001b[39m\n",
       " \u001b[90m [2913bbd2]\u001b[39m\u001b[37m StatsBase v0.32.0\u001b[39m\n",
       " \u001b[90m [f3b207a7]\u001b[39m\u001b[37m StatsPlots v0.12.0\u001b[39m\n",
       " \u001b[90m [2f01184e]\u001b[39m\u001b[37m SparseArrays \u001b[39m\n"
@@ -36,16 +36,8 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "┌ Info: Precompiling ReinforcementLearningCore [de1b191a-4ae0-4afa-a27b-92d07f46b2d6]\n",
-      "└ @ Base loading.jl:1273\n",
-      "┌ Info: Precompiling RLIntro [02c1da58-b9a1-11e8-0212-f9611b8fe936]\n",
-      "└ @ Base loading.jl:1273\n",
-      "┌ Warning: Package RLIntro does not have Flux in its dependencies:\n",
-      "│ - If you have RLIntro checked out for development and have\n",
-      "│   added Flux as a dependency but haven't updated your primary\n",
-      "│   environment's manifest file, try `Pkg.resolve()`.\n",
-      "│ - Otherwise you may need to report an issue with RLIntro\n",
-      "└ Loading Flux into RLIntro from project dependency, future warnings for RLIntro are suppressed.\n"
+      "┌ Info: Precompiling ReinforcementLearningAnIntroduction [02c1da58-b9a1-11e8-0212-f9611b8fe936]\n",
+      "└ @ Base loading.jl:1273\n"
      ]
     },
     {
@@ -63,8 +55,7 @@
     }
    ],
    "source": [
-    "using ReinforcementLearningCore, RLIntro\n",
-    "using RLIntro.TicTacToe\n",
+    "using ReinforcementLearningAnIntroduction\n",
     "\n",
     "env = TicTacToeEnv()"
    ]
@@ -125,7 +116,7 @@
     {
      "data": {
       "text/plain": [
-       "(reward = 0.0, terminal = false, state = 4193, legal_actions_mask = Bool[1, 1, 1, 1, 1, 1, 1, 1, 1, 0])"
+       "(reward = 0.0, terminal = false, state = 4151, legal_actions_mask = Bool[1, 1, 1, 1, 1, 1, 1, 1, 1, 0])"
       ]
      },
      "execution_count": 5,
@@ -221,7 +212,7 @@
     {
      "data": {
       "text/plain": [
-       "MonteCarloLearner{RLIntro.EveryVisit,TabularApproximator{1,Array{Float64,1}},CachedSampleAvg{Float64},RLIntro.NoSampling}(TabularApproximator{1,Array{Float64,1}}([0.5, 1.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5  …  0.5, 0.5, 0.5, 0.5, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5]), 1.0, 0.1, CachedSampleAvg{Float64}(Dict{Float64,SampleAvg}()))"
+       "MonteCarloLearner{ReinforcementLearningAnIntroduction.EveryVisit,TabularApproximator{1,Array{Float64,1}},CachedSampleAvg{Float64},ReinforcementLearningAnIntroduction.NoSampling}(TabularApproximator{1,Array{Float64,1}}([0.5, 0.5, 0.0, 0.5, 0.5, 0.5, 0.0, 0.5, 0.5, 0.5  …  0.5, 0.5, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]), 1.0, 0.1, CachedSampleAvg{Float64}(Dict{Float64,SampleAvg}()))"
       ]
      },
      "execution_count": 8,
@@ -261,10 +252,10 @@
    ],
    "source": [
     "function create_mapping(role)\n",
-    "    (obs, value_learner) -> begin\n",
+    "    (obs, learner) -> begin\n",
     "        mask = get_legal_actions_mask(obs)\n",
     "        [\n",
-    "            mask[a] ? value_learner(StateOverriddenObs(obs=obs, state=TicTacToe.get_next_state_id(get_state(obs), role, a))) : 0.  # a dummy value     \n",
+    "            mask[a] ? learner(StateOverriddenObs(obs=obs, state=TicTacToe.get_next_state_id(get_state(obs), role, a))) : 0.  # a dummy value     \n",
     "            for a in action_space\n",
     "        ]\n",
     "    end\n",
@@ -273,20 +264,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
     "ϵ = 0.01\n",
     "\n",
     "π_1 = VBasedPolicy(\n",
-    "    value_learner = learner_1,\n",
+    "    learner = learner_1,\n",
     "    mapping = create_mapping(TicTacToe.offensive),\n",
     "    explorer = EpsilonGreedyExplorer(ϵ),\n",
     "    )\n",
     "\n",
     "π_2 = VBasedPolicy(\n",
-    "    value_learner = learner_2,\n",
+    "    learner = learner_2,\n",
     "    mapping = create_mapping(TicTacToe.defensive),\n",
     "    explorer = EpsilonGreedyExplorer(ϵ),\n",
     "    );\n",
@@ -310,15 +301,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\u001b[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:51\u001b[39mm46\u001b[39m\n"
+      "\u001b[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:42\u001b[39m8:41\u001b[39m\n"
      ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "2-element Array{EmptyHook,1}:\n",
+       " EmptyHook()\n",
+       " EmptyHook()"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
@@ -355,7 +358,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
@@ -364,7 +367,7 @@
        "play (generic function with 1 method)"
       ]
      },
-     "execution_count": 17,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -418,7 +421,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
@@ -435,48 +438,48 @@
       "___\n",
       "isdone = [false], winner = [nothing]\n",
       "\n",
-      "__O\n",
-      "_X_\n",
       "___\n",
+      "_X_\n",
+      "O__\n",
       "isdone = [false], winner = [nothing]\n",
       "\n",
-      "Your input:stdin> 1\n",
-      "X_O\n",
-      "_X_\n",
+      "Your input:stdin> 6\n",
       "___\n",
+      "_X_\n",
+      "OX_\n",
       "isdone = [false], winner = [nothing]\n",
       "\n",
-      "X_O\n",
+      "_O_\n",
       "_X_\n",
-      "__O\n",
+      "OX_\n",
       "isdone = [false], winner = [nothing]\n",
       "\n",
       "Your input:stdin> 8\n",
-      "X_O\n",
+      "_O_\n",
       "_XX\n",
-      "__O\n",
+      "OX_\n",
       "isdone = [false], winner = [nothing]\n",
       "\n",
-      "X_O\n",
+      "_O_\n",
       "OXX\n",
-      "__O\n",
+      "OX_\n",
       "isdone = [false], winner = [nothing]\n",
       "\n",
-      "Your input:stdin> 6\n",
-      "X_O\n",
+      "Your input:stdin> 1\n",
+      "XO_\n",
       "OXX\n",
-      "_XO\n",
+      "OX_\n",
       "isdone = [false], winner = [nothing]\n",
       "\n",
-      "XOO\n",
+      "XO_\n",
       "OXX\n",
-      "_XO\n",
+      "OXO\n",
       "isdone = [false], winner = [nothing]\n",
       "\n",
-      "Your input:stdin> 3\n",
-      "XOO\n",
+      "Your input:stdin> 7\n",
+      "XOX\n",
       "OXX\n",
-      "XXO\n",
+      "OXO\n",
       "isdone = [true], winner = [nothing]\n",
       "\n",
       "Tie!\n"