Source code for keras_gym.policies.special

import numpy as np

from ..base.errors import UnavailableActionError, ActionSpaceError
from ..base.mixins import RandomStateMixin, ActionSpaceMixin
from ..policies.base import BasePolicy

__all__ = (
    'RandomPolicy',
    'UserInputPolicy',
)


[docs]class RandomPolicy(BasePolicy, RandomStateMixin, ActionSpaceMixin):
    """
    Value-based policy to select actions using epsilon-greedy strategy.

    Parameters
    ----------
    env : gym environment

       The gym environment is used to sample from the action space.

    random_seed : int, optional

        Sets the random state to get reproducible results.

    """
    def __init__(self, env, random_seed=None):
        self.env = env
        self.random_seed = random_seed  # sets self.random in RandomStateMixin

[docs]    def __call__(self, s):
        return self.env.action_space.sample()

[docs]    def greedy(self, s):
        return self(s)

[docs]    def dist_params(self, s):
        if self.action_space_is_discrete:
            return np.ones(self.num_actions) / self.num_actions

        if self.action_space_is_box:
            mu = np.zeros(self.actions_ndim)          # zero mean
            logvar = 10 * np.ones(self.actions_ndim)  # large variance
            return mu, logvar

        raise ActionSpaceError(
            "method RandomPolicy.dist_params() is not yet implemented for "
            "action spaces of type: {}"
            .format(self.env.action_space.__class__.__name__))


[docs]class UserInputPolicy(BasePolicy, ActionSpaceMixin):
    """
    A policy that prompts the user to take an action.

    Parameters
    ----------
    env : gym environment

       The gym environment is used to sample from the action space.

    render_before_prompt : bool, optional

        Whether to render the env before prompting the user to pick an action.

    """
    def __init__(self, env, render_before_prompt=False):
        self.env = env
        self.render_before_prompt = bool(render_before_prompt)

[docs]    def __call__(self, s):
        actions = ", ".join(map(str, range(self.num_actions)))
        if self.render_before_prompt:
            self.env.render()

        for attempt in range(1, 4):  # 3 attempts
            try:
                a = input("Pick action from {{{}}}: ".format(actions))
                return int(a)
            except ValueError:
                print(
                    "ValueError: invalid action, try again (attempt {:d} of 3)"
                    "...".format(attempt))

        raise UnavailableActionError("a = {}".format(a))

[docs]    def greedy(self, s):
        return self(s)

    def dist_params(self, s):
        raise NotImplementedError('UserInputPolicy.dist_params')
    dist_params.__doc__ = ""