Source code for keras_gym.policies.value_based

import numpy as np

from ..base.mixins import RandomStateMixin
from ..policies.base import BasePolicy
from ..utils import argmax


__all__ = (
    'EpsilonGreedy',
    # 'BoltzmannPolicy',  #TODO: implement
)


[docs]class EpsilonGreedy(BasePolicy, RandomStateMixin): """ Value-based policy to select actions using epsilon-greedy strategy. Parameters ---------- q_function : callable A state-action value function object. epsilon : float between 0 and 1 The probability of selecting an action uniformly at random. random_seed : int, optional Sets the random state to get reproducible results. """ def __init__(self, q_function, epsilon=0.1, random_seed=None): self.q_function = q_function self.epsilon = epsilon self.random_seed = random_seed # sets self.random in RandomStateMixin
[docs] def __call__(self, s): if self.random.rand() < self.epsilon: return self.q_function.env.action_space.sample() a = self.greedy(s) return a
[docs] def set_epsilon(self, epsilon): """ Change the value for ``epsilon``. Parameters ---------- epsilon : float between 0 and 1 The probability of selecting an action uniformly at random. Returns ------- self The updated instance. """ self.epsilon = epsilon return self
[docs] def greedy(self, s): Q = self.q_function(s) # shape: [num_actions] a = argmax(Q) return a
[docs] def dist_params(self, s): Q = self.q_function(s) # shape: [num_actions] a = argmax(Q) n = self.q_function.num_actions p = np.ones(n) * self.epsilon / n p[a] += 1 - self.epsilon assert p.sum() == 1 return p