13 Test that the class can handle reward values. Acceptable values should
14 be some sort of iterable with a number of elements equal to k. Each
15 element should be numeric. Alternatively, None can be used to have the
16 class randomly select values.
20 for values
in ((1, 2, 3, 4), [1, 2, 3, 4], numpy.array([1, 2, 3, 4])):
21 with self.subTest(values=values):
24 rewards = bandit.trueValues()
25 for i, value
in enumerate(values):
27 value, rewards[i],
'Stored reward does not match provided.')
28 self.assertIsNotNone(bandit)
33 values = bandit.trueValues()
35 self.assertGreaterEqual(value, 0)
36 self.assertLess(value, 1)
38 with self.assertRaises(ValueError, msg=
'Static bandit did not reject incorrect length rewards.'):
39 Static(k=3, rewards=(1, 2))
42 for values
in ((1, 2), (
'the',
'it', 4, 5), 4,
'1 2 3 4', numpy.array([[1, 2], [3, 4]])):
43 with self.assertRaises(Exception, msg=
'Static bandit did not reject non-numeric rewards'):
48 Test that the right reward is returned when each arm is selected. This
49 should allow any indexing inputs that you could use for numpy arrays
50 and reject everything else. The correct inputs should return the
51 appropriate rewards that match the reward values set by the class.
54 true_rewards = numpy.random.uniform(low=0, high=1, size=10)
55 bandit =
Static(k, true_rewards)
57 for arm
in (-3, 0, 2, 9):
58 expected_reward = bandit.select(arm)
60 expected_reward, true_rewards[arm],
'Static bandit did not provide a correct reward.')
63 expected_rewards = bandit.select(arms)
64 self.assertTrue(numpy.array_equal(
65 true_rewards[arms], expected_rewards))
68 for i
in (0.5, 10,
'1',
'the'):
69 rewards = bandit.trueValues()
70 with self.subTest(i=i):
71 with self.assertRaises(Exception, msg=
'Incorrect indices not rejected.'):
72 reward = bandit.select(i)
74 self.assertIsNone(bandit.select(
None))