k-Armed Bandit 1.0.0
A collection of k-armed bandits and assoicated agents for reinforcement learning
Loading...
Searching...
No Matches
normal.py
Go to the documentation of this file.
1from bandit import BaseBandit
2import numpy
3
4
6 """
7 This bandit draws a reward from a set normal distribution each time
8 an arm is chosen. Each arm has its own distribution that is fixed upon
9 construction. Each distribution has a standard deviation of 1 and a mean
10 randomly drawn from the uniform range [-1, 1).
11 """
12
13 def __init__(self, k: int) -> None:
14 """
15 Construct the class.
16
17 This includes defining the normal distribution parameters for each
18 arm. There is a different distribution for each arm. The means are
19 sampled from the uniform range [-1, 1). The standard deviations are
20 1.0.
21 @param k The number of arms this bandit should have. This must be an
22 int greater than 0.
23 """
24 super().__init__(k)
25 # The standard deviations are fixed.
26 self._std = numpy.ones(shape=(k,), dtype=numpy.float)
27 # The means are drawn from a uniform range.
28 self._mean = numpy.random.uniform(low=-1.0, high=1.0, size=(k,))
29
30 def select(self, index):
31 """
32 Select one or several arms to obtain a reward from.
33
34 @param index Any numpy valid indexing method to select which arms
35 a reward should be drawn from. None can also be passed, but will only
36 return a reward of None.
37 @return The rewards. The size of this will depend on the type of index.
38 If a single integer is passed in, a single float will be returned.
39 Otherwise, a numpy array will be returned. If None is passed in, this
40 will also be None.
41 """
42 if index is None:
43 return None
44 means = self._mean[index]
45 stds = self._std[index]
46 return numpy.random.normal(loc=means, scale=stds)
47
48 def trueValues(self):
49 """
50 Return the distribution parameters for the arms.
51
52 @return A tuple containing the parameters for each arm's distribution.
53 The first element of the tuple will be a numpy array holding the means
54 for each arm. The second element will also be a numpy array with the
55 standard deviations.
56 """
57 return (self._mean, self._std)
A base class for the various bandit implementations.
Definition base_bandit.py:4
This bandit draws a reward from a set normal distribution each time an arm is chosen.
Definition normal.py:5
None __init__(self, int k)
Construct the class.
Definition normal.py:13
select(self, index)
Select one or several arms to obtain a reward from.
Definition normal.py:30
trueValues(self)
Return the distribution parameters for the arms.
Definition normal.py:48