Skip to content

Commit 5655e17

Browse files
committed
sync
1 parent a17e25d commit 5655e17

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+22090
-126733
lines changed

Manifest.toml

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -154,9 +154,9 @@ version = "1.0.2"
154154

155155
[[DiffRules]]
156156
deps = ["NaNMath", "Random", "SpecialFunctions"]
157-
git-tree-sha1 = "10dca52cf6d4a62d82528262921daf63b99704a2"
157+
git-tree-sha1 = "eb0c34204c8410888844ada5359ac8b96292cfd1"
158158
uuid = "b552c78f-8df3-52c6-915a-8e097449b14b"
159-
version = "1.0.0"
159+
version = "1.0.1"
160160

161161
[[Distances]]
162162
deps = ["LinearAlgebra", "Statistics"]
@@ -170,9 +170,9 @@ uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
170170

171171
[[Distributions]]
172172
deps = ["FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SpecialFunctions", "Statistics", "StatsBase", "StatsFuns"]
173-
git-tree-sha1 = "71a3f1ae1fca9ed876edfbc2079d7b7c27e2e3d5"
173+
git-tree-sha1 = "6b19601c0e98de3a8964ed33ad73e130c7165b1d"
174174
uuid = "31c24e10-a181-5473-b8eb-7969acd0382f"
175-
version = "0.22.3"
175+
version = "0.22.4"
176176

177177
[[FFMPEG]]
178178
deps = ["BinaryProvider", "Libdl"]
@@ -229,9 +229,9 @@ version = "2.0.1"
229229

230230
[[GR]]
231231
deps = ["Base64", "DelimitedFiles", "LinearAlgebra", "Printf", "Random", "Serialization", "Sockets", "Test"]
232-
git-tree-sha1 = "c690c2ab22ac9ee323d9966deae61a089362b25c"
232+
git-tree-sha1 = "10633436bc2fc836347bda5073b7b6f06dcdc5e6"
233233
uuid = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71"
234-
version = "0.44.0"
234+
version = "0.46.0"
235235

236236
[[GeometryTypes]]
237237
deps = ["ColorTypes", "FixedPointNumbers", "LinearAlgebra", "StaticArrays"]
@@ -439,9 +439,9 @@ version = "0.6.3"
439439

440440
[[Plots]]
441441
deps = ["Base64", "Contour", "Dates", "FFMPEG", "FixedPointNumbers", "GR", "GeometryTypes", "JSON", "LinearAlgebra", "Measures", "NaNMath", "Pkg", "PlotThemes", "PlotUtils", "Printf", "REPL", "Random", "RecipesBase", "Reexport", "Requires", "Showoff", "SparseArrays", "Statistics", "StatsBase", "UUIDs"]
442-
git-tree-sha1 = "efbe466a790d7e8a5c4b5ee1601c0c8edc99780b"
442+
git-tree-sha1 = "fd11ab7aec59103217ecc5b5ccc34ce60e61b9ba"
443443
uuid = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
444-
version = "0.28.4"
444+
version = "0.29.1"
445445

446446
[[PositiveFactorizations]]
447447
deps = ["LinearAlgebra", "Test"]
@@ -483,9 +483,9 @@ uuid = "c84ed2f1-dad5-54f0-aa8e-dbefe2724439"
483483
version = "0.4.0"
484484

485485
[[RecipesBase]]
486-
git-tree-sha1 = "7bdce29bc9b2f5660a6e5e64d64d91ec941f6aa2"
486+
git-tree-sha1 = "b4ed4a7f988ea2340017916f7c9e5d7560b52cae"
487487
uuid = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
488-
version = "0.7.0"
488+
version = "0.8.0"
489489

490490
[[Reexport]]
491491
deps = ["Pkg"]
@@ -494,13 +494,13 @@ uuid = "189a3867-3050-52da-a836-e630ba90ab69"
494494
version = "0.2.0"
495495

496496
[[ReinforcementLearningBase]]
497-
deps = ["Distributions", "Random"]
497+
deps = ["CUDAapi", "CuArrays", "Distributions", "MacroTools", "Random"]
498498
path = "/home/tj/workspace/github/ReinforcementLearningBase.jl/"
499499
uuid = "e575027e-6cd6-5018-9292-cdc6200d2b44"
500500
version = "0.5.0"
501501

502502
[[ReinforcementLearningCore]]
503-
deps = ["CuArrays", "Distributions", "Flux", "MacroTools", "ProgressMeter", "Random", "Reexport", "ReinforcementLearningBase", "StatsBase"]
503+
deps = ["Distributions", "MacroTools", "ProgressMeter", "Random", "ReinforcementLearningBase", "StatsBase"]
504504
path = "/home/tj/workspace/github/ReinforcementLearningCore/"
505505
uuid = "de1b191a-4ae0-4afa-a27b-92d07f46b2d6"
506506
version = "0.1.0"
@@ -548,9 +548,9 @@ uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
548548

549549
[[SpecialFunctions]]
550550
deps = ["OpenSpecFun_jll"]
551-
git-tree-sha1 = "268052ee908b2c086cc0011f528694f02f3e2408"
551+
git-tree-sha1 = "e19b98acb182567bcb7b75bb5d9eedf3a3b5ec6c"
552552
uuid = "276daf66-3868-5448-9aa4-cd146d93841b"
553-
version = "0.9.0"
553+
version = "0.10.0"
554554

555555
[[StaticArrays]]
556556
deps = ["LinearAlgebra", "Random", "Statistics"]
@@ -570,9 +570,9 @@ version = "0.32.0"
570570

571571
[[StatsFuns]]
572572
deps = ["Rmath", "SpecialFunctions"]
573-
git-tree-sha1 = "79982835d2ff3970685cb704500909c94189bde9"
573+
git-tree-sha1 = "f290ddd5fdedeadd10e961eb3f4d3340f09d030a"
574574
uuid = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
575-
version = "0.9.3"
575+
version = "0.9.4"
576576

577577
[[StatsPlots]]
578578
deps = ["Clustering", "DataStructures", "DataValues", "Distributions", "Interpolations", "KernelDensity", "Observables", "Plots", "RecipesBase", "Reexport", "StatsBase", "Tables", "Widgets"]

Project.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name = "RLIntro"
1+
name = "ReinforcementLearningAnIntroduction"
22
uuid = "02c1da58-b9a1-11e8-0212-f9611b8fe936"
33
authors = ["TianJun <tianjun.cpp@gmail.com>"]
44
version = "0.2.0"
@@ -11,6 +11,7 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
1111
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
1212
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
1313
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
14+
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
1415
ReinforcementLearningBase = "e575027e-6cd6-5018-9292-cdc6200d2b44"
1516
ReinforcementLearningCore = "de1b191a-4ae0-4afa-a27b-92d07f46b2d6"
1617
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"

notebooks/Chapter01_Tic_Tac_Toe.ipynb

Lines changed: 50 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,10 @@
1313
"text": [
1414
"\u001b[32m\u001b[1m Status\u001b[22m\u001b[39m `/mnt/E4E0A9C0E0A998F6/github/ReinforcementLearningAnIntroduction.jl/notebooks/Project.toml`\n",
1515
" \u001b[90m [31c24e10]\u001b[39m\u001b[37m Distributions v0.22.4\u001b[39m\n",
16-
" \u001b[90m [91a5bcdd]\u001b[39m\u001b[37m Plots v0.28.4\u001b[39m\n",
17-
" \u001b[90m [02c1da58]\u001b[39m\u001b[37m RLIntro v0.2.0 [`..`]\u001b[39m\n",
18-
" \u001b[90m [e575027e]\u001b[39m\u001b[37m ReinforcementLearningBase v0.5.0 [`~/workspace/github/ReinforcementLearningBase.jl`]\u001b[39m\n",
19-
" \u001b[90m [de1b191a]\u001b[39m\u001b[37m ReinforcementLearningCore v0.1.0 [`~/workspace/github/ReinforcementLearningCore`]\u001b[39m\n",
16+
" \u001b[90m [91a5bcdd]\u001b[39m\u001b[37m Plots v0.29.1\u001b[39m\n",
17+
" \u001b[90m [02c1da58]\u001b[39m\u001b[37m ReinforcementLearningAnIntroduction v0.2.0 [`..`]\u001b[39m\n",
18+
" \u001b[90m [e575027e]\u001b[39m\u001b[37m ReinforcementLearningBase v0.5.0 [`../../ReinforcementLearningBase.jl`]\u001b[39m\n",
19+
" \u001b[90m [de1b191a]\u001b[39m\u001b[37m ReinforcementLearningCore v0.1.0 [`../../ReinforcementLearningCore`]\u001b[39m\n",
2020
" \u001b[90m [2913bbd2]\u001b[39m\u001b[37m StatsBase v0.32.0\u001b[39m\n",
2121
" \u001b[90m [f3b207a7]\u001b[39m\u001b[37m StatsPlots v0.12.0\u001b[39m\n",
2222
" \u001b[90m [2f01184e]\u001b[39m\u001b[37m SparseArrays \u001b[39m\n"
@@ -36,16 +36,8 @@
3636
"name": "stderr",
3737
"output_type": "stream",
3838
"text": [
39-
"┌ Info: Precompiling ReinforcementLearningCore [de1b191a-4ae0-4afa-a27b-92d07f46b2d6]\n",
40-
"└ @ Base loading.jl:1273\n",
41-
"┌ Info: Precompiling RLIntro [02c1da58-b9a1-11e8-0212-f9611b8fe936]\n",
42-
"└ @ Base loading.jl:1273\n",
43-
"┌ Warning: Package RLIntro does not have Flux in its dependencies:\n",
44-
"│ - If you have RLIntro checked out for development and have\n",
45-
"│ added Flux as a dependency but haven't updated your primary\n",
46-
"│ environment's manifest file, try `Pkg.resolve()`.\n",
47-
"│ - Otherwise you may need to report an issue with RLIntro\n",
48-
"└ Loading Flux into RLIntro from project dependency, future warnings for RLIntro are suppressed.\n"
39+
"┌ Info: Precompiling ReinforcementLearningAnIntroduction [02c1da58-b9a1-11e8-0212-f9611b8fe936]\n",
40+
"└ @ Base loading.jl:1273\n"
4941
]
5042
},
5143
{
@@ -63,8 +55,7 @@
6355
}
6456
],
6557
"source": [
66-
"using ReinforcementLearningCore, RLIntro\n",
67-
"using RLIntro.TicTacToe\n",
58+
"using ReinforcementLearningAnIntroduction\n",
6859
"\n",
6960
"env = TicTacToeEnv()"
7061
]
@@ -125,7 +116,7 @@
125116
{
126117
"data": {
127118
"text/plain": [
128-
"(reward = 0.0, terminal = false, state = 4193, legal_actions_mask = Bool[1, 1, 1, 1, 1, 1, 1, 1, 1, 0])"
119+
"(reward = 0.0, terminal = false, state = 4151, legal_actions_mask = Bool[1, 1, 1, 1, 1, 1, 1, 1, 1, 0])"
129120
]
130121
},
131122
"execution_count": 5,
@@ -221,7 +212,7 @@
221212
{
222213
"data": {
223214
"text/plain": [
224-
"MonteCarloLearner{RLIntro.EveryVisit,TabularApproximator{1,Array{Float64,1}},CachedSampleAvg{Float64},RLIntro.NoSampling}(TabularApproximator{1,Array{Float64,1}}([0.5, 1.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 … 0.5, 0.5, 0.5, 0.5, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5]), 1.0, 0.1, CachedSampleAvg{Float64}(Dict{Float64,SampleAvg}()))"
215+
"MonteCarloLearner{ReinforcementLearningAnIntroduction.EveryVisit,TabularApproximator{1,Array{Float64,1}},CachedSampleAvg{Float64},ReinforcementLearningAnIntroduction.NoSampling}(TabularApproximator{1,Array{Float64,1}}([0.5, 0.5, 0.0, 0.5, 0.5, 0.5, 0.0, 0.5, 0.5, 0.5 … 0.5, 0.5, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]), 1.0, 0.1, CachedSampleAvg{Float64}(Dict{Float64,SampleAvg}()))"
225216
]
226217
},
227218
"execution_count": 8,
@@ -261,10 +252,10 @@
261252
],
262253
"source": [
263254
"function create_mapping(role)\n",
264-
" (obs, value_learner) -> begin\n",
255+
" (obs, learner) -> begin\n",
265256
" mask = get_legal_actions_mask(obs)\n",
266257
" [\n",
267-
" mask[a] ? value_learner(StateOverriddenObs(obs=obs, state=TicTacToe.get_next_state_id(get_state(obs), role, a))) : 0. # a dummy value \n",
258+
" mask[a] ? learner(StateOverriddenObs(obs=obs, state=TicTacToe.get_next_state_id(get_state(obs), role, a))) : 0. # a dummy value \n",
268259
" for a in action_space\n",
269260
" ]\n",
270261
" end\n",
@@ -273,20 +264,20 @@
273264
},
274265
{
275266
"cell_type": "code",
276-
"execution_count": 15,
267+
"execution_count": 10,
277268
"metadata": {},
278269
"outputs": [],
279270
"source": [
280271
"ϵ = 0.01\n",
281272
"\n",
282273
"π_1 = VBasedPolicy(\n",
283-
" value_learner = learner_1,\n",
274+
" learner = learner_1,\n",
284275
" mapping = create_mapping(TicTacToe.offensive),\n",
285276
" explorer = EpsilonGreedyExplorer(ϵ),\n",
286277
" )\n",
287278
"\n",
288279
"π_2 = VBasedPolicy(\n",
289-
" value_learner = learner_2,\n",
280+
" learner = learner_2,\n",
290281
" mapping = create_mapping(TicTacToe.defensive),\n",
291282
" explorer = EpsilonGreedyExplorer(ϵ),\n",
292283
" );\n",
@@ -310,15 +301,27 @@
310301
},
311302
{
312303
"cell_type": "code",
313-
"execution_count": null,
304+
"execution_count": 11,
314305
"metadata": {},
315306
"outputs": [
316307
{
317308
"name": "stderr",
318309
"output_type": "stream",
319310
"text": [
320-
"\u001b[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:51\u001b[39mm46\u001b[39m\n"
311+
"\u001b[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:42\u001b[39m8:41\u001b[39m\n"
321312
]
313+
},
314+
{
315+
"data": {
316+
"text/plain": [
317+
"2-element Array{EmptyHook,1}:\n",
318+
" EmptyHook()\n",
319+
" EmptyHook()"
320+
]
321+
},
322+
"execution_count": 11,
323+
"metadata": {},
324+
"output_type": "execute_result"
322325
}
323326
],
324327
"source": [
@@ -355,7 +358,7 @@
355358
},
356359
{
357360
"cell_type": "code",
358-
"execution_count": 17,
361+
"execution_count": 13,
359362
"metadata": {},
360363
"outputs": [
361364
{
@@ -364,7 +367,7 @@
364367
"play (generic function with 1 method)"
365368
]
366369
},
367-
"execution_count": 17,
370+
"execution_count": 13,
368371
"metadata": {},
369372
"output_type": "execute_result"
370373
}
@@ -418,7 +421,7 @@
418421
},
419422
{
420423
"cell_type": "code",
421-
"execution_count": 20,
424+
"execution_count": 14,
422425
"metadata": {},
423426
"outputs": [
424427
{
@@ -435,48 +438,48 @@
435438
"___\n",
436439
"isdone = [false], winner = [nothing]\n",
437440
"\n",
438-
"__O\n",
439-
"_X_\n",
440441
"___\n",
442+
"_X_\n",
443+
"O__\n",
441444
"isdone = [false], winner = [nothing]\n",
442445
"\n",
443-
"Your input:stdin> 1\n",
444-
"X_O\n",
445-
"_X_\n",
446+
"Your input:stdin> 6\n",
446447
"___\n",
448+
"_X_\n",
449+
"OX_\n",
447450
"isdone = [false], winner = [nothing]\n",
448451
"\n",
449-
"X_O\n",
452+
"_O_\n",
450453
"_X_\n",
451-
"__O\n",
454+
"OX_\n",
452455
"isdone = [false], winner = [nothing]\n",
453456
"\n",
454457
"Your input:stdin> 8\n",
455-
"X_O\n",
458+
"_O_\n",
456459
"_XX\n",
457-
"__O\n",
460+
"OX_\n",
458461
"isdone = [false], winner = [nothing]\n",
459462
"\n",
460-
"X_O\n",
463+
"_O_\n",
461464
"OXX\n",
462-
"__O\n",
465+
"OX_\n",
463466
"isdone = [false], winner = [nothing]\n",
464467
"\n",
465-
"Your input:stdin> 6\n",
466-
"X_O\n",
468+
"Your input:stdin> 1\n",
469+
"XO_\n",
467470
"OXX\n",
468-
"_XO\n",
471+
"OX_\n",
469472
"isdone = [false], winner = [nothing]\n",
470473
"\n",
471-
"XOO\n",
474+
"XO_\n",
472475
"OXX\n",
473-
"_XO\n",
476+
"OXO\n",
474477
"isdone = [false], winner = [nothing]\n",
475478
"\n",
476-
"Your input:stdin> 3\n",
477-
"XOO\n",
479+
"Your input:stdin> 7\n",
480+
"XOX\n",
478481
"OXX\n",
479-
"XXO\n",
482+
"OXO\n",
480483
"isdone = [true], winner = [nothing]\n",
481484
"\n",
482485
"Tie!\n"

0 commit comments

Comments
 (0)