Skip to content

Commit 7ded25e

Browse files
authored
Adapt to the upcoming new release of RLCore (#16)
* update dependency * done * use the latest stable versions * update dependencies
1 parent b6d5a41 commit 7ded25e

38 files changed

+124999
-22118
lines changed

Manifest.toml

Lines changed: 394 additions & 132 deletions
Large diffs are not rendered by default.

Project.toml

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,12 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
1919
StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd"
2020

2121
[compat]
22-
julia = "1.3"
2322
DataStructures = "0.17"
2423
Distributions = "0.22"
2524
Flux = "0.10"
2625
MacroTools = "0.5"
2726
Plots = "0.29"
2827
Reexport = "0.2"
29-
ReinforcementLearningBase = "0.6"
30-
ReinforcementLearningCore = "0.2"
3128
StatsBase = "0.32"
32-
StatsPlots = "0.12"
29+
StatsPlots = "0.12"
30+
julia = "1.4"

notebooks/Chapter01_Tic_Tac_Toe.ipynb

Lines changed: 52 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,13 @@
1111
"name": "stdout",
1212
"output_type": "stream",
1313
"text": [
14-
"\u001b[32m\u001b[1m Status\u001b[22m\u001b[39m `/mnt/E4E0A9C0E0A998F6/github/ReinforcementLearningAnIntroduction.jl/notebooks/Project.toml`\n",
15-
" \u001b[90m [31c24e10]\u001b[39m\u001b[37m Distributions v0.22.4\u001b[39m\n",
16-
" \u001b[90m [91a5bcdd]\u001b[39m\u001b[37m Plots v0.29.1\u001b[39m\n",
14+
"\u001b[32m\u001b[1mStatus\u001b[22m\u001b[39m `/mnt/E4E0A9C0E0A998F6/github/ReinforcementLearningAnIntroduction.jl/notebooks/Project.toml`\n",
15+
" \u001b[90m [31c24e10]\u001b[39m\u001b[37m Distributions v0.22.6\u001b[39m\n",
16+
" \u001b[90m [91a5bcdd]\u001b[39m\u001b[37m Plots v0.29.9\u001b[39m\n",
1717
" \u001b[90m [02c1da58]\u001b[39m\u001b[37m ReinforcementLearningAnIntroduction v0.2.0 [`..`]\u001b[39m\n",
18-
" \u001b[90m [2913bbd2]\u001b[39m\u001b[37m StatsBase v0.32.1\u001b[39m\n",
18+
" \u001b[90m [e575027e]\u001b[39m\u001b[37m ReinforcementLearningBase v0.7.1\u001b[39m\n",
19+
" \u001b[90m [de1b191a]\u001b[39m\u001b[37m ReinforcementLearningCore v0.3.0\u001b[39m\n",
20+
" \u001b[90m [2913bbd2]\u001b[39m\u001b[37m StatsBase v0.32.2\u001b[39m\n",
1921
" \u001b[90m [f3b207a7]\u001b[39m\u001b[37m StatsPlots v0.12.0\u001b[39m\n",
2022
" \u001b[90m [2f01184e]\u001b[39m\u001b[37m SparseArrays \u001b[39m\n"
2123
]
@@ -35,7 +37,9 @@
3537
"output_type": "stream",
3638
"text": [
3739
"┌ Info: Precompiling ReinforcementLearningAnIntroduction [02c1da58-b9a1-11e8-0212-f9611b8fe936]\n",
38-
"└ @ Base loading.jl:1273\n"
40+
"└ @ Base loading.jl:1260\n",
41+
"┌ Info: Skipping precompilation since __precompile__(false). Importing ReinforcementLearningAnIntroduction [02c1da58-b9a1-11e8-0212-f9611b8fe936].\n",
42+
"└ @ Base loading.jl:1033\n"
3943
]
4044
},
4145
{
@@ -114,7 +118,7 @@
114118
{
115119
"data": {
116120
"text/plain": [
117-
"(reward = 0.0, terminal = false, state = 4151, legal_actions_mask = Bool[1, 1, 1, 1, 1, 1, 1, 1, 1, 0])"
121+
"(reward = 0.0, terminal = false, state = 4244, legal_actions_mask = Bool[1, 1, 1, 1, 1, 1, 1, 1, 1, 0])"
118122
]
119123
},
120124
"execution_count": 5,
@@ -210,7 +214,7 @@
210214
{
211215
"data": {
212216
"text/plain": [
213-
"MonteCarloLearner{ReinforcementLearningAnIntroduction.EveryVisit,TabularApproximator{1,Array{Float64,1}},CachedSampleAvg{Float64},ReinforcementLearningAnIntroduction.NoSampling}(TabularApproximator{1,Array{Float64,1}}([0.5, 0.5, 0.0, 0.5, 0.5, 0.5, 0.0, 0.5, 0.5, 0.5 … 0.5, 0.5, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]), 1.0, 0.1, CachedSampleAvg{Float64}(Dict{Float64,SampleAvg}()))"
217+
"MonteCarloLearner{ReinforcementLearningAnIntroduction.EveryVisit,TabularApproximator{1,Array{Float64,1}},CachedSampleAvg{Float64},ReinforcementLearningAnIntroduction.NoSampling}(TabularApproximator{1,Array{Float64,1}}([0.5, 0.5, 0.5, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.0 … 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.0, 0.5, 0.5, 0.5]), 1.0, 0.1, CachedSampleAvg{Float64}(Dict{Float64,SampleAvg}()))"
214218
]
215219
},
216220
"execution_count": 8,
@@ -271,13 +275,13 @@
271275
"π_1 = VBasedPolicy(\n",
272276
" learner = learner_1,\n",
273277
" mapping = create_mapping(TicTacToe.offensive),\n",
274-
" explorer = EpsilonGreedyExplorer(ϵ),\n",
278+
" explorer = EpsilonGreedyExplorer(ϵ;seed=1),\n",
275279
" )\n",
276280
"\n",
277281
"π_2 = VBasedPolicy(\n",
278282
" learner = learner_2,\n",
279283
" mapping = create_mapping(TicTacToe.defensive),\n",
280-
" explorer = EpsilonGreedyExplorer(ϵ),\n",
284+
" explorer = EpsilonGreedyExplorer(ϵ;seed=2),\n",
281285
" );\n",
282286
"\n",
283287
"agent_1 = Agent(\n",
@@ -306,7 +310,7 @@
306310
"name": "stderr",
307311
"output_type": "stream",
308312
"text": [
309-
"\u001b[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:42\u001b[39m8:41\u001b[39m\n"
313+
"\u001b[32mProgress: 100%|█████████████████████████████████████████| Time: 0:07:51\u001b[39m39m\n"
310314
]
311315
},
312316
{
@@ -323,7 +327,7 @@
323327
}
324328
],
325329
"source": [
326-
"run((agent_1, agent_2), env, StopAfterEpisode(1000000))"
330+
"run((agent_1, agent_2), env, StopAfterEpisode(1_000_000)) # try adjusting the number of episodes to see the performance difference"
327331
]
328332
},
329333
{
@@ -347,6 +351,15 @@
347351
"agent_2.policy.explorer.ϵ_stable = 0.0"
348352
]
349353
},
354+
{
355+
"cell_type": "code",
356+
"execution_count": 13,
357+
"metadata": {},
358+
"outputs": [],
359+
"source": [
360+
"reset!(env)"
361+
]
362+
},
350363
{
351364
"cell_type": "markdown",
352365
"metadata": {},
@@ -356,7 +369,7 @@
356369
},
357370
{
358371
"cell_type": "code",
359-
"execution_count": 13,
372+
"execution_count": 14,
360373
"metadata": {},
361374
"outputs": [
362375
{
@@ -365,7 +378,7 @@
365378
"play (generic function with 1 method)"
366379
]
367380
},
368-
"execution_count": 13,
381+
"execution_count": 14,
369382
"metadata": {},
370383
"output_type": "execute_result"
371384
}
@@ -419,7 +432,7 @@
419432
},
420433
{
421434
"cell_type": "code",
422-
"execution_count": 14,
435+
"execution_count": 15,
423436
"metadata": {},
424437
"outputs": [
425438
{
@@ -438,45 +451,45 @@
438451
"\n",
439452
"___\n",
440453
"_X_\n",
441-
"O__\n",
454+
"__O\n",
442455
"isdone = [false], winner = [nothing]\n",
443456
"\n",
444-
"Your input:stdin> 6\n",
457+
"Your input:stdin> 2\n",
445458
"___\n",
446-
"_X_\n",
447-
"OX_\n",
459+
"XX_\n",
460+
"__O\n",
448461
"isdone = [false], winner = [nothing]\n",
449462
"\n",
450-
"_O_\n",
451-
"_X_\n",
452-
"OX_\n",
463+
"___\n",
464+
"XXO\n",
465+
"__O\n",
453466
"isdone = [false], winner = [nothing]\n",
454467
"\n",
455-
"Your input:stdin> 8\n",
456-
"_O_\n",
457-
"_XX\n",
458-
"OX_\n",
468+
"Your input:stdin> 7\n",
469+
"__X\n",
470+
"XXO\n",
471+
"__O\n",
459472
"isdone = [false], winner = [nothing]\n",
460473
"\n",
461-
"_O_\n",
462-
"OXX\n",
463-
"OX_\n",
474+
"__X\n",
475+
"XXO\n",
476+
"O_O\n",
464477
"isdone = [false], winner = [nothing]\n",
465478
"\n",
466-
"Your input:stdin> 1\n",
467-
"XO_\n",
468-
"OXX\n",
469-
"OX_\n",
479+
"Your input:stdin> 6\n",
480+
"__X\n",
481+
"XXO\n",
482+
"OXO\n",
470483
"isdone = [false], winner = [nothing]\n",
471484
"\n",
472-
"XO_\n",
473-
"OXX\n",
485+
"_OX\n",
486+
"XXO\n",
474487
"OXO\n",
475488
"isdone = [false], winner = [nothing]\n",
476489
"\n",
477-
"Your input:stdin> 7\n",
490+
"Your input:stdin> 1\n",
478491
"XOX\n",
479-
"OXX\n",
492+
"XXO\n",
480493
"OXO\n",
481494
"isdone = [true], winner = [nothing]\n",
482495
"\n",
@@ -495,15 +508,15 @@
495508
"lastKernelId": null
496509
},
497510
"kernelspec": {
498-
"display_name": "Julia 1.3.0",
511+
"display_name": "Julia 1.4.1",
499512
"language": "julia",
500-
"name": "julia-1.3"
513+
"name": "julia-1.4"
501514
},
502515
"language_info": {
503516
"file_extension": ".jl",
504517
"mimetype": "application/julia",
505518
"name": "julia",
506-
"version": "1.3.0"
519+
"version": "1.4.1"
507520
}
508521
},
509522
"nbformat": 4,

0 commit comments

Comments
 (0)