k-Armed Bandit 1.0.0
A collection of k-armed bandits and assoicated agents for reinforcement learning
Loading...
Searching...
No Matches
base_agent.py
Go to the documentation of this file.
1import abc
2import numpy
3
4
5class BaseAgent(abc.ABC):
6 """
7 A base class used to create a variety of bandit solving agents.
8
9 This class provides a table that can be used to store reward estimates. It also defines the interface that any
10 agent must define when implemented. This ensures consistent API across each agent type.
11 """
12
13 def __init__(self, k: int, start_value: float = 0.0) -> None:
14 """
15 Construct the agent.
16
17 @param k The number of possible actions the agent can pick from at any given time. Must be an int greater than
18 zero.
19 @param start_value An initial value to use for each possible action. This assumes that each action is equally
20 likely at start, so all values in the Q-table are set to this value.
21 @exception ValueError if k is not an integer greater than 0.
22 """
23 super().__init__()
24 # Create a Q-table with size k.
25 if k <= 0:
26 raise ValueError('k must be an integer greater than zero.')
27 self._table = start_value * numpy.ones(shape=(k,), dtype=numpy.float)
28
29 @abc.abstractmethod
30 def act(self) -> int:
31 """
32 Use a specific algorithm to determine which action to take.
33
34 This method should define how exactly the agent selects an action. It is free to use @ref explore and @ref
35 exploit as needed.
36 @return An int representing which arm action to take. This int should be between [0, k).
37 """
38
39 def exploit(self) -> int:
40 """
41 Select the best action.
42
43 This will use the Q-table to select the action with the highest likelihood. Ties are broken arbitrarily.
44 @return An int representing which arm action to take. This int will be between [0, k).
45 """
46 # Argmax will return all the highest arguments as an array. If there is only a single highest, argmax will
47 # just return a single value. Otherwise, use choice to pick amongst all the indices.
48 possible_actions = numpy.argmax(a=self.table)
49 if possible_actions.size == 1:
50 selected_action = possible_actions
51 else:
52 # Default of choice is to pick a single value
53 selected_action = numpy.random.choice(a=possible_actions)
54 return selected_action
55
56 def explore(self) -> int:
57 """
58 Explore a new action.
59
60 This will select a random action to take from the Q-table, to explore the decision space more.
61 @return An int representing which arm action to take. This int will be between [0, k).
62 """
63 # Just pick a random action. When provided a single value, choice chooses from the range [0, k), so use the
64 # size of the table as the input to choice.
65 return numpy.random.choice(a=self.table.size, size=1)
66
67 @property
68 def table(self) -> numpy.ndarray:
69 """
70 Return the Q-Table.
71 @return a Numpy array of k elements. the i-th element holds the estimated value for the i-th action/arm.
72 """
73 return self._table
74
75 @abc.abstractmethod
76 def update(self, action: int, reward: float) -> None:
77 """
78 Update the Q-Table.
79
80 This takes the result of the previous action and the resulting reward and should update the Q-Table. How it
81 updates will depend on the specific implementation.
82 @param action An int representing which arm action was taken. This should be between [0, k].
83 @param reward A float representing the resulting reward obtained from the selected action.
84 """
A base class used to create a variety of bandit solving agents.
Definition base_agent.py:5
None update(self, int action, float reward)
Update the Q-Table.
Definition base_agent.py:76
int explore(self)
Explore a new action.
Definition base_agent.py:56
int act(self)
Use a specific algorithm to determine which action to take.
Definition base_agent.py:30
int exploit(self)
Select the best action.
Definition base_agent.py:39
numpy.ndarray table(self)
Return the Q-Table.
Definition base_agent.py:68
None __init__(self, int k, float start_value=0.0)
Construct the agent.
Definition base_agent.py:13