k-Armed Bandit 1.0.0
A collection of k-armed bandits and assoicated agents for reinforcement learning
Loading...
Searching...
No Matches
test_epsilon_greedy.py
Go to the documentation of this file.
1from agent import EpsilonGreedy
2import numpy
3import unittest
4
5
6class TestEpsilonGreedy(unittest.TestCase):
7 """
8 Test case to verify behavior of the epsilon greedy agent implementation.
9 """
10
11 def setUp(self) -> None:
12 """
13 Create an agent to use for tests.
14 """
15 self.agent = EpsilonGreedy(k=10, epsilon=0.5, start_value=0.0)
16
18 """
19 Test appropriate actions are selected.
20
21 As the agent can select via explore or exploit, no specific action is assumed. So this just tests that all
22 selected actions are within the correct range of [0, k).
23 """
24 # Repeat multiple times.
25 for _ in range(100):
26 action = self.agent.act()
27 self.assertGreaterEqual(action, 0)
28 self.assertLess(action, self.agent.table.size)
29
31 """
32 Test that epsilon only accepts probability values.
33 """
34 # These values are all valid probabilities.
35 for e in (0.0, 0.25, 0.12345, 1.0, 1, 0):
36 agent = EpsilonGreedy(10, e, 0.0)
37 self.assertEqual(e, agent.epsilon,
38 msg='Agent did not store epsilon property.')
39 # These are not allowed.
40 for e in (-0.01, 100, 1.1, '0.5', (0.0, 0.5, 0.25)):
41 with self.assertRaises(Exception, msg='Agent did not reject invalid epsilon inputs.'):
42 agent = EpsilonGreedy(3, e, 0.0) # type: ignore
43
44 def test_update(self):
45 """
46 Test that the update works correctly.
47
48 The agent uses a weighted average, so use known values to ensure correct calculation.
49 """
50 # Working out the formula by hand produces the following values.
51 rewards = numpy.array(range(15, 26))
52 expected_results = numpy.array(
53 [15, 15.5, 16, 16.5, 17, 17.5, 18, 18.5, 19, 19.5, 20])
54 for i in range(expected_results.size):
55 # Apply the reward first, then check that the table updated correctly.
56 self.agent.update(action=0, reward=rewards[i])
57 self.assertEqual(self.agent.table[0], expected_results[i])
A greedy agent that occasionally explores.
Test case to verify behavior of the epsilon greedy agent implementation.
test_update(self)
Test that the update works correctly.
test_action_selection(self)
Test appropriate actions are selected.
test_epsilon_bounds(self)
Test that epsilon only accepts probability values.
None setUp(self)
Create an agent to use for tests.