k-Armed Bandit 1.0.0
A collection of k-armed bandits and assoicated agents for reinforcement learning
Loading...
Searching...
No Matches
epsilon_greedy.py
Go to the documentation of this file.
1import numpy
2from agent import BaseAgent
3
4
6 """
7 A greedy agent that occasionally explores.
8
9 This agent will primarily exploit when deciding its actions. However, it will occasionally choose to explore at a
10 rate of epsilon, which is provided at initialization. This gives it a chance to see if other actions are better
11 options.
12 """
13
14 def __init__(self, k: int, epsilon: float, start_value: float = 0.0) -> None:
15 """
16 Construct the agent.
17
18 @param k The number of actions to consider. This must be an int greater than zero.
19 @param epsilon The rate at which actions should randomly explore. As this is a probability, it should be between
20 0 and 1.
21 @param start_value The initial value to use in the table. All actions start with the same value.
22 @exception ValueError if epsilon is not a valid probability (between 0 and 1).
23 """
24 super().__init__(k, start_value=start_value)
25 self.epsilonepsilonepsilon = epsilon
26 # Track how many selections have been made to use in the update formula.
27 self._n = 0
28 # Per Numpy documentation, this is the preferred way to sample from random distributions.
29 self._rng = numpy.random.default_rng()
30
31 def act(self) -> int:
32 """
33 Determine which action to take.
34
35 This will explore randomly over the actions at a rate of epsilon and inversely will exploit based on table
36 values at a rate of (1.0 - epsilon).
37 @return The index of the selected action to take. Gauranteed to be an int on the range [0, k).
38 """
39 # Decide if the agent should explore or exploit using epsilon
40 samples = self._rng.binomial(n=1, p=self.epsilonepsilonepsilon, size=1)
41 should_explore = (samples[0] == 1)
42 if should_explore:
43 action = self.explore()
44 else:
45 action = self.exploit()
46 return action
47
48 @property
49 def epsilon(self) -> float:
50 return self._epsilon
51
52 @epsilon.setter
53 def epsilon(self, value: float) -> None:
54 if value < 0.0 or value > 1.0:
55 raise ValueError(
56 'Epsilon must be a valid probability, so between 0 and 1 (inclusive)!')
57 self._epsilon = value
58
59 def update(self, action: int, reward: float) -> None:
60 """
61 Update the Q-table based on the last action.
62
63 This will use an incremental formulation of the mean of all rewards obtained so far as the values of the table.
64 @param action An index representing which action on the table was selected. It must be between [0, k).
65 @param reward The reward obtained from this action.
66 """
67 self._n += 1
68 self.table[action] += (reward - self.table[action]) / self._n
A base class used to create a variety of bandit solving agents.
Definition base_agent.py:5
int explore(self)
Explore a new action.
Definition base_agent.py:56
int exploit(self)
Select the best action.
Definition base_agent.py:39
numpy.ndarray table(self)
Return the Q-Table.
Definition base_agent.py:68
A greedy agent that occasionally explores.
int act(self)
Determine which action to take.
None __init__(self, int k, float epsilon, float start_value=0.0)
Construct the agent.
None update(self, int action, float reward)
Update the Q-table based on the last action.
None epsilon(self, float value)