k-armed-bandit/epsilon__greedy_8py_source.html

import numpy

from agent import BaseAgent


class EpsilonGreedy(BaseAgent):

    """

    A greedy agent that occasionally explores.


    This agent will primarily exploit when deciding its actions. However, it will occasionally choose to explore at a

    rate of epsilon, which is provided at initialization. This gives it a chance to see if other actions are better

    options.

    """


    def __init__(self, k: int, epsilon: float, start_value: float = 0.0) -> None:

        """

        Construct the agent.


        @param k The number of actions to consider. This must be an int greater than zero.

        @param epsilon The rate at which actions should randomly explore. As this is a probability, it should be between

        0 and 1.

        @param start_value The initial value to use in the table. All actions start with the same value.

        @exception ValueError if epsilon is not a valid probability (between 0 and 1).

        """

        super().__init__(k, start_value=start_value)

        self.epsilonepsilonepsilon = epsilon

        # Track how many selections have been made to use in the update formula.

        self._n = 0

        # Per Numpy documentation, this is the preferred way to sample from random distributions.

        self._rng = numpy.random.default_rng()


    def act(self) -> int:

        """

        Determine which action to take.


        This will explore randomly over the actions at a rate of epsilon and inversely will exploit based on table

        values at a rate of (1.0 - epsilon).

        @return The index of the selected action to take. Gauranteed to be an int on the range [0, k).

        """

        # Decide if the agent should explore or exploit using epsilon

        samples = self._rng.binomial(n=1, p=self.epsilonepsilonepsilon, size=1)

        should_explore = (samples[0] == 1)

        if should_explore:

            action = self.explore()

        else:

            action = self.exploit()

        return action


    @property


    def epsilon(self) -> float:

        return self._epsilon


    @epsilon.setter


    def epsilon(self, value: float) -> None:

        if value < 0.0 or value > 1.0:

            raise ValueError(

                'Epsilon must be a valid probability, so between 0 and 1 (inclusive)!')

        self._epsilon = value


    def update(self, action: int, reward: float) -> None:

        """

        Update the Q-table based on the last action.


        This will use an incremental formulation of the mean of all rewards obtained so far as the values of the table.

        @param action An index representing which action on the table was selected. It must be between [0, k).

        @param reward The reward obtained from this action.

        """

        self._n += 1

        self.table[action] += (reward - self.table[action]) / self._n


agent.base_agent.BaseAgent
A base class used to create a variety of bandit solving agents.
Definition base_agent.py:5

agent.base_agent.BaseAgent.explore
int explore(self)
Explore a new action.
Definition base_agent.py:56

agent.base_agent.BaseAgent.exploit
int exploit(self)
Select the best action.
Definition base_agent.py:39

agent.base_agent.BaseAgent.table
numpy.ndarray table(self)
Return the Q-Table.
Definition base_agent.py:68

agent.epsilon_greedy.EpsilonGreedy
A greedy agent that occasionally explores.
Definition epsilon_greedy.py:5

agent.epsilon_greedy.EpsilonGreedy.act
int act(self)
Determine which action to take.
Definition epsilon_greedy.py:31

agent.epsilon_greedy.EpsilonGreedy.__init__
None __init__(self, int k, float epsilon, float start_value=0.0)
Construct the agent.
Definition epsilon_greedy.py:14

agent.epsilon_greedy.EpsilonGreedy._epsilon
_epsilon
Definition epsilon_greedy.py:57

agent.epsilon_greedy.EpsilonGreedy._rng
_rng
Definition epsilon_greedy.py:29

agent.epsilon_greedy.EpsilonGreedy.epsilon
epsilon
Definition epsilon_greedy.py:25

agent.epsilon_greedy.EpsilonGreedy.update
None update(self, int action, float reward)
Update the Q-table based on the last action.
Definition epsilon_greedy.py:59

agent.epsilon_greedy.EpsilonGreedy._n
_n
Definition epsilon_greedy.py:27

agent.epsilon_greedy.EpsilonGreedy.epsilon
None epsilon(self, float value)
Definition epsilon_greedy.py:53

agent.epsilon_greedy.EpsilonGreedy.epsilon
float epsilon(self)
Definition epsilon_greedy.py:49