|
11 | 11 | "name": "stdout", |
12 | 12 | "output_type": "stream", |
13 | 13 | "text": [ |
14 | | - "\u001b[32m\u001b[1m Status\u001b[22m\u001b[39m `/mnt/E4E0A9C0E0A998F6/github/ReinforcementLearningAnIntroduction.jl/notebooks/Project.toml`\n", |
15 | | - " \u001b[90m [31c24e10]\u001b[39m\u001b[37m Distributions v0.22.4\u001b[39m\n", |
16 | | - " \u001b[90m [91a5bcdd]\u001b[39m\u001b[37m Plots v0.29.1\u001b[39m\n", |
| 14 | + "\u001b[32m\u001b[1mStatus\u001b[22m\u001b[39m `/mnt/E4E0A9C0E0A998F6/github/ReinforcementLearningAnIntroduction.jl/notebooks/Project.toml`\n", |
| 15 | + " \u001b[90m [31c24e10]\u001b[39m\u001b[37m Distributions v0.22.6\u001b[39m\n", |
| 16 | + " \u001b[90m [91a5bcdd]\u001b[39m\u001b[37m Plots v0.29.9\u001b[39m\n", |
17 | 17 | " \u001b[90m [02c1da58]\u001b[39m\u001b[37m ReinforcementLearningAnIntroduction v0.2.0 [`..`]\u001b[39m\n", |
18 | | - " \u001b[90m [2913bbd2]\u001b[39m\u001b[37m StatsBase v0.32.1\u001b[39m\n", |
| 18 | + " \u001b[90m [e575027e]\u001b[39m\u001b[37m ReinforcementLearningBase v0.7.1\u001b[39m\n", |
| 19 | + " \u001b[90m [de1b191a]\u001b[39m\u001b[37m ReinforcementLearningCore v0.3.0\u001b[39m\n", |
| 20 | + " \u001b[90m [2913bbd2]\u001b[39m\u001b[37m StatsBase v0.32.2\u001b[39m\n", |
19 | 21 | " \u001b[90m [f3b207a7]\u001b[39m\u001b[37m StatsPlots v0.12.0\u001b[39m\n", |
20 | 22 | " \u001b[90m [2f01184e]\u001b[39m\u001b[37m SparseArrays \u001b[39m\n" |
21 | 23 | ] |
|
35 | 37 | "output_type": "stream", |
36 | 38 | "text": [ |
37 | 39 | "┌ Info: Precompiling ReinforcementLearningAnIntroduction [02c1da58-b9a1-11e8-0212-f9611b8fe936]\n", |
38 | | - "└ @ Base loading.jl:1273\n" |
| 40 | + "└ @ Base loading.jl:1260\n", |
| 41 | + "┌ Info: Skipping precompilation since __precompile__(false). Importing ReinforcementLearningAnIntroduction [02c1da58-b9a1-11e8-0212-f9611b8fe936].\n", |
| 42 | + "└ @ Base loading.jl:1033\n" |
39 | 43 | ] |
40 | 44 | }, |
41 | 45 | { |
|
114 | 118 | { |
115 | 119 | "data": { |
116 | 120 | "text/plain": [ |
117 | | - "(reward = 0.0, terminal = false, state = 4151, legal_actions_mask = Bool[1, 1, 1, 1, 1, 1, 1, 1, 1, 0])" |
| 121 | + "(reward = 0.0, terminal = false, state = 4244, legal_actions_mask = Bool[1, 1, 1, 1, 1, 1, 1, 1, 1, 0])" |
118 | 122 | ] |
119 | 123 | }, |
120 | 124 | "execution_count": 5, |
|
210 | 214 | { |
211 | 215 | "data": { |
212 | 216 | "text/plain": [ |
213 | | - "MonteCarloLearner{ReinforcementLearningAnIntroduction.EveryVisit,TabularApproximator{1,Array{Float64,1}},CachedSampleAvg{Float64},ReinforcementLearningAnIntroduction.NoSampling}(TabularApproximator{1,Array{Float64,1}}([0.5, 0.5, 0.0, 0.5, 0.5, 0.5, 0.0, 0.5, 0.5, 0.5 … 0.5, 0.5, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]), 1.0, 0.1, CachedSampleAvg{Float64}(Dict{Float64,SampleAvg}()))" |
| 217 | + "MonteCarloLearner{ReinforcementLearningAnIntroduction.EveryVisit,TabularApproximator{1,Array{Float64,1}},CachedSampleAvg{Float64},ReinforcementLearningAnIntroduction.NoSampling}(TabularApproximator{1,Array{Float64,1}}([0.5, 0.5, 0.5, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.0 … 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.0, 0.5, 0.5, 0.5]), 1.0, 0.1, CachedSampleAvg{Float64}(Dict{Float64,SampleAvg}()))" |
214 | 218 | ] |
215 | 219 | }, |
216 | 220 | "execution_count": 8, |
|
271 | 275 | "π_1 = VBasedPolicy(\n", |
272 | 276 | " learner = learner_1,\n", |
273 | 277 | " mapping = create_mapping(TicTacToe.offensive),\n", |
274 | | - " explorer = EpsilonGreedyExplorer(ϵ),\n", |
| 278 | + " explorer = EpsilonGreedyExplorer(ϵ;seed=1),\n", |
275 | 279 | " )\n", |
276 | 280 | "\n", |
277 | 281 | "π_2 = VBasedPolicy(\n", |
278 | 282 | " learner = learner_2,\n", |
279 | 283 | " mapping = create_mapping(TicTacToe.defensive),\n", |
280 | | - " explorer = EpsilonGreedyExplorer(ϵ),\n", |
| 284 | + " explorer = EpsilonGreedyExplorer(ϵ;seed=2),\n", |
281 | 285 | " );\n", |
282 | 286 | "\n", |
283 | 287 | "agent_1 = Agent(\n", |
|
306 | 310 | "name": "stderr", |
307 | 311 | "output_type": "stream", |
308 | 312 | "text": [ |
309 | | - "\u001b[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:42\u001b[39m8:41\u001b[39m\n" |
| 313 | + "\u001b[32mProgress: 100%|█████████████████████████████████████████| Time: 0:07:51\u001b[39m39m\n" |
310 | 314 | ] |
311 | 315 | }, |
312 | 316 | { |
|
323 | 327 | } |
324 | 328 | ], |
325 | 329 | "source": [ |
326 | | - "run((agent_1, agent_2), env, StopAfterEpisode(1000000))" |
| 330 | + "run((agent_1, agent_2), env, StopAfterEpisode(1_000_000)) # try adjusting the number of episodes to see the performance difference" |
327 | 331 | ] |
328 | 332 | }, |
329 | 333 | { |
|
347 | 351 | "agent_2.policy.explorer.ϵ_stable = 0.0" |
348 | 352 | ] |
349 | 353 | }, |
| 354 | + { |
| 355 | + "cell_type": "code", |
| 356 | + "execution_count": 13, |
| 357 | + "metadata": {}, |
| 358 | + "outputs": [], |
| 359 | + "source": [ |
| 360 | + "reset!(env)" |
| 361 | + ] |
| 362 | + }, |
350 | 363 | { |
351 | 364 | "cell_type": "markdown", |
352 | 365 | "metadata": {}, |
|
356 | 369 | }, |
357 | 370 | { |
358 | 371 | "cell_type": "code", |
359 | | - "execution_count": 13, |
| 372 | + "execution_count": 14, |
360 | 373 | "metadata": {}, |
361 | 374 | "outputs": [ |
362 | 375 | { |
|
365 | 378 | "play (generic function with 1 method)" |
366 | 379 | ] |
367 | 380 | }, |
368 | | - "execution_count": 13, |
| 381 | + "execution_count": 14, |
369 | 382 | "metadata": {}, |
370 | 383 | "output_type": "execute_result" |
371 | 384 | } |
|
419 | 432 | }, |
420 | 433 | { |
421 | 434 | "cell_type": "code", |
422 | | - "execution_count": 14, |
| 435 | + "execution_count": 15, |
423 | 436 | "metadata": {}, |
424 | 437 | "outputs": [ |
425 | 438 | { |
|
438 | 451 | "\n", |
439 | 452 | "___\n", |
440 | 453 | "_X_\n", |
441 | | - "O__\n", |
| 454 | + "__O\n", |
442 | 455 | "isdone = [false], winner = [nothing]\n", |
443 | 456 | "\n", |
444 | | - "Your input:stdin> 6\n", |
| 457 | + "Your input:stdin> 2\n", |
445 | 458 | "___\n", |
446 | | - "_X_\n", |
447 | | - "OX_\n", |
| 459 | + "XX_\n", |
| 460 | + "__O\n", |
448 | 461 | "isdone = [false], winner = [nothing]\n", |
449 | 462 | "\n", |
450 | | - "_O_\n", |
451 | | - "_X_\n", |
452 | | - "OX_\n", |
| 463 | + "___\n", |
| 464 | + "XXO\n", |
| 465 | + "__O\n", |
453 | 466 | "isdone = [false], winner = [nothing]\n", |
454 | 467 | "\n", |
455 | | - "Your input:stdin> 8\n", |
456 | | - "_O_\n", |
457 | | - "_XX\n", |
458 | | - "OX_\n", |
| 468 | + "Your input:stdin> 7\n", |
| 469 | + "__X\n", |
| 470 | + "XXO\n", |
| 471 | + "__O\n", |
459 | 472 | "isdone = [false], winner = [nothing]\n", |
460 | 473 | "\n", |
461 | | - "_O_\n", |
462 | | - "OXX\n", |
463 | | - "OX_\n", |
| 474 | + "__X\n", |
| 475 | + "XXO\n", |
| 476 | + "O_O\n", |
464 | 477 | "isdone = [false], winner = [nothing]\n", |
465 | 478 | "\n", |
466 | | - "Your input:stdin> 1\n", |
467 | | - "XO_\n", |
468 | | - "OXX\n", |
469 | | - "OX_\n", |
| 479 | + "Your input:stdin> 6\n", |
| 480 | + "__X\n", |
| 481 | + "XXO\n", |
| 482 | + "OXO\n", |
470 | 483 | "isdone = [false], winner = [nothing]\n", |
471 | 484 | "\n", |
472 | | - "XO_\n", |
473 | | - "OXX\n", |
| 485 | + "_OX\n", |
| 486 | + "XXO\n", |
474 | 487 | "OXO\n", |
475 | 488 | "isdone = [false], winner = [nothing]\n", |
476 | 489 | "\n", |
477 | | - "Your input:stdin> 7\n", |
| 490 | + "Your input:stdin> 1\n", |
478 | 491 | "XOX\n", |
479 | | - "OXX\n", |
| 492 | + "XXO\n", |
480 | 493 | "OXO\n", |
481 | 494 | "isdone = [true], winner = [nothing]\n", |
482 | 495 | "\n", |
|
495 | 508 | "lastKernelId": null |
496 | 509 | }, |
497 | 510 | "kernelspec": { |
498 | | - "display_name": "Julia 1.3.0", |
| 511 | + "display_name": "Julia 1.4.1", |
499 | 512 | "language": "julia", |
500 | | - "name": "julia-1.3" |
| 513 | + "name": "julia-1.4" |
501 | 514 | }, |
502 | 515 | "language_info": { |
503 | 516 | "file_extension": ".jl", |
504 | 517 | "mimetype": "application/julia", |
505 | 518 | "name": "julia", |
506 | | - "version": "1.3.0" |
| 519 | + "version": "1.4.1" |
507 | 520 | } |
508 | 521 | }, |
509 | 522 | "nbformat": 4, |
|
0 commit comments