1- getgreedystates = ReinforcementLearning. getgreedystates
2- for (v, rO, rVO, r, rP) in (([- 9. , 12. , Inf64 ], [2 , 3 ], [3 ], [3 ], [2 ]),
3- ([- 9. , - 12. ], [1 ], [1 ], [1 ], [1 ]),
4- ([Inf64 , Inf64 ], [1 , 2 ], [1 , 2 ], [1 , 2 ], [1 , 2 ]))
5- @test getgreedystates (OptimisticEpsilonGreedyPolicy (0. ), v) == rO
6- @test getgreedystates (VeryOptimisticEpsilonGreedyPolicy (0. ), v) == rVO
7- @test getgreedystates (PesimisticEpsilonGreedyPolicy (0. ), v) == rP
1+ import ReinforcementLearning: selectaction
2+
3+ function empiricalactionprop (p, v; n = 10 ^ 6 )
4+ res = [selectaction (p, v) for _ in 1 : n]
5+ map (x -> length (find (i -> i == x, res)), 1 : length (v))./ n
86end
97
108for (v, rO, rVO, r, rP) in (([- 9. , 12. , Inf64 ], [0 , .5 , .5 ], [0 , 0. , 1. ],
@@ -16,7 +14,15 @@ for (v, rO, rVO, r, rP) in (([-9., 12., Inf64], [0, .5, .5], [0, 0., 1.],
1614 @test getactionprobabilities (OptimisticEpsilonGreedyPolicy (0. ), v) == rO
1715 @test getactionprobabilities (VeryOptimisticEpsilonGreedyPolicy (0. ), v) == rVO
1816 @test getactionprobabilities (PesimisticEpsilonGreedyPolicy (0. ), v) == rP
17+ @test isapprox (empiricalactionprop (OptimisticEpsilonGreedyPolicy (0. ), v),
18+ rO, atol = .05 )
19+ @test isapprox (empiricalactionprop (VeryOptimisticEpsilonGreedyPolicy (0. ), v),
20+ rVO, atol = .05 )
21+ @test isapprox (empiricalactionprop (PesimisticEpsilonGreedyPolicy (0. ), v),
22+ rP, atol = .05 )
23+ @test isapprox (empiricalactionprop (OptimisticEpsilonGreedyPolicy (.2 ), v),
24+ getactionprobabilities (OptimisticEpsilonGreedyPolicy (.2 ), v),
25+ atol = .05 )
1926end
2027
2128
22-
0 commit comments