Skip to content

Commit d2b15da

Browse files
authored
chore: small changes on chapter08, mainly: (#57)
- add some labels on plots - add some notebook titles
1 parent c252d04 commit d2b15da

File tree

3 files changed

+1209
-8
lines changed

3 files changed

+1209
-8
lines changed

notebooks/Chapter08_Expectation_VS_Sample.jl

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ using Statistics
1010
# ╔═╡ 9c2ef32a-522d-11eb-29e6-29da29e4dec7
1111
using Plots
1212

13+
# ╔═╡ 96096b41-fc25-43f9-ab75-50c5196e9604
14+
md"# Chapter 8.5 Expected vs. Sample Updates"
15+
1316
# ╔═╡ 88b47ee6-522d-11eb-1a98-ed4dd43dfd11
1417
begin
1518
Base.@kwdef mutable struct SampleAvg
@@ -37,6 +40,9 @@ function run_once(b)
3740
rms
3841
end
3942

43+
# ╔═╡ 39865f6a-8500-4277-884f-81b7de151fc6
44+
md"## Figure 8.7"
45+
4046
# ╔═╡ 969b48aa-522d-11eb-2039-61916ddf87d4
4147
begin
4248
n_runs = 1000
@@ -45,7 +51,7 @@ begin
4551
for b in [2, 10, 100, 1000]
4652
rms = mean(run_once(b) for _ in 1:n_runs)
4753
xs = (1:2*b) ./ b
48-
plot!(p, xs, rms, label="b=$b")
54+
plot!(p, xs, rms, label="b=$b", ylabel="RMS error in value estimate")
4955
end
5056

5157
p
@@ -864,9 +870,11 @@ version = "0.9.1+5"
864870
"""
865871

866872
# ╔═╡ Cell order:
873+
# ╟─96096b41-fc25-43f9-ab75-50c5196e9604
867874
# ╠═d80342ae-522b-11eb-19c0-cf931fb4ae81
868875
# ╠═88b47ee6-522d-11eb-1a98-ed4dd43dfd11
869876
# ╠═8fde026e-522d-11eb-2aa1-f3e28c238ea6
877+
# ╟─39865f6a-8500-4277-884f-81b7de151fc6
870878
# ╠═9c2ef32a-522d-11eb-29e6-29da29e4dec7
871879
# ╠═969b48aa-522d-11eb-2039-61916ddf87d4
872880
# ╟─00000000-0000-0000-0000-000000000001

notebooks/Chapter08_Maze.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ end
220220

221221
# ╔═╡ d3463894-59a0-11eb-2bcf-0dc30ae600dc
222222
begin
223-
fig_8_4 = plot(legend=:topleft)
223+
fig_8_4 = plot(legend=:topleft, xlabel="Time steps", ylabel="Cumulative reward")
224224
plot!(fig_8_4, mean(cumulative_dyna_reward(ExperienceBasedSamplingModel(), walls(), 1000, change_walls, 2000) for _ in 1:30), label="Dyna-Q")
225225
plot!(fig_8_4, mean(cumulative_dyna_reward(TimeBasedSamplingModel(;n_actions=4), walls(), 1000, change_walls, 2000) for _ in 1:30), label="Dyna-Q+")
226226
fig_8_4
@@ -241,7 +241,7 @@ end
241241

242242
# ╔═╡ b1d8fc48-59a1-11eb-27f0-cdc20954dbb3
243243
begin
244-
fig_8_5 = plot(legend=:topleft)
244+
fig_8_5 = plot(legend=:topleft, ylabel="Cumulative reward", xlabel="Time steps")
245245
plot!(fig_8_5, mean(cumulative_dyna_reward(ExperienceBasedSamplingModel(), new_walls(), 3000, new_change_walls, 3000) for _ in 1:50), label="dyna-Q")
246246
plot!(fig_8_5, mean(cumulative_dyna_reward(TimeBasedSamplingModel(n_actions=4, κ = 1e-3), new_walls(), 3000, new_change_walls, 3000) for _ in 1:50), label="dyna-Q+")
247247
fig_8_5

0 commit comments

Comments
 (0)