k-Armed Bandit 1.0.0
A collection of k-armed bandits and assoicated agents for reinforcement learning
Loading...
Searching...
No Matches
static.py
Go to the documentation of this file.
1from bandit import BaseBandit
2import numpy
3
4
6 """
7 This class implements a bandit with a constant reward value each time
8 an arm is chosen.
9
10 The reward is not drawn from a distribution, nor does it change over time.
11 The user can specify the reward values at instantiation if they want.
12 """
13
14 def __init__(self, k, rewards=None):
15 """
16 Instantiate the class.
17
18 If reward_values is provided, it will be used for the reward values.
19 Otherwise, a random value on the interval [0, 1) will be chosen for
20 each arm.
21 @param k An int greater than or equal to one representing the number
22 of arms this bandit has.
23 @param rewards If provided, the fixed reward for each arm. It can
24 be a list, array, numpy array, or any sort of iterable object, but must
25 have a length equal to k. It can also be None to let the bandit pick
26 random rewards from the interval [0, 1).
27 """
28 super().__init__(k)
29 if rewards is None:
30 self._rewards = numpy.random.uniform(low=0, high=1, size=self.kk)
31 else:
32 if len(rewards) != self.kk:
33 raise ValueError('rewards_value must have a length of {0}, not {1}'.format(
34 self.kk, len(rewards)))
35 self._rewards = numpy.fromiter(rewards, dtype=numpy.float)
36
37 @property
38 def rewards(self):
39 return self._rewards
40
41 def select(self, index):
42 """
43 Get a reward from the chosen arm.
44 @param index The arm to pick. It can be any input that allows for
45 indexing of a numpy array, including single integers or a set of
46 integers.
47 @return The reward for that arm. The type will be either a single
48 float if a single arm was chosen or an array of floats representing
49 the rewards for each arm identified by index. If None is passed in
50 to index, this will return None.
51 """
52 # Numpy arrays allow use of None, which serves as newaxis. This
53 # behavior should be guarded against since this method should not
54 # be manipulating the rewards array (or its copies), only providing
55 # values from it. If a None occurs, just pass it back along.
56 if index is None:
57 return None
58 else:
59 return self.rewards[index]
60
61 def trueValues(self):
62 """
63 Provide a numpy array of the rewards for each arm.
64 @return A numpy array where each index corresponds to the reward value
65 for the associated arm at that index.
66 """
67 return self.rewards
A base class for the various bandit implementations.
Definition base_bandit.py:4
int k(self)
Return the number of arms this bandit has.
This class implements a bandit with a constant reward value each time an arm is chosen.
Definition static.py:5
trueValues(self)
Provide a numpy array of the rewards for each arm.
Definition static.py:61
select(self, index)
Get a reward from the chosen arm.
Definition static.py:41
__init__(self, k, rewards=None)
Instantiate the class.
Definition static.py:14