k-armed-bandit/greedy_8py_source.html

from agent import BaseAgent


class Greedy(BaseAgent):

    """

    An agent that always exploits, never explores.


    It will always pick the action with the highest value from the Q-table. While these values will be updated, it

    never explores, so will likely quickly converge on a single action.

    """


    def __init__(self, k: int, start_value: float = 0.0) -> None:

        """

        Construct the agent.


        @param k The number of arms to select from. Should be an int greater than zero.

        @param start_value The starting reward to use for each arm. All arms assume the same value at the start.

        """

        super().__init__(k, start_value=start_value)

        # Track how many selections have been made to use in the update formula.

        self._n = 0


    def act(self) -> int:

        """

        Select an action to take from the available ones.


        Greedy always exploits, so this will always be one of the actions with the highest table value.

        @return An int representing the selected action. It will be on the interval [0, k).

        """

        return self.exploit()


    def update(self, action: int, reward: float) -> None:

        """

        Update the table values based on the last action.


        This uses an iterative version of a running average to update table values.

        @param action The index corresponding to the action that was taken.

        @param reward The resulting reward that was earned.

        """

        self._n += 1

        self.table[action] += (reward - self.table[action]) / self._n


agent.base_agent.BaseAgent
A base class used to create a variety of bandit solving agents.
Definition base_agent.py:5

agent.base_agent.BaseAgent.exploit
int exploit(self)
Select the best action.
Definition base_agent.py:39

agent.base_agent.BaseAgent.table
numpy.ndarray table(self)
Return the Q-Table.
Definition base_agent.py:68

agent.greedy.Greedy
An agent that always exploits, never explores.
Definition greedy.py:4

agent.greedy.Greedy.act
int act(self)
Select an action to take from the available ones.
Definition greedy.py:23

agent.greedy.Greedy.__init__
None __init__(self, int k, float start_value=0.0)
Construct the agent.
Definition greedy.py:12

agent.greedy.Greedy.update
None update(self, int action, float reward)
Update the table values based on the last action.
Definition greedy.py:32

agent.greedy.Greedy._n
_n
Definition greedy.py:21