Source code for keras_gym.core.function_approximator

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K

from ..utils import check_tensor
from ..base.mixins import ActionSpaceMixin
from ..base.errors import ActionSpaceError


__all__ = (
    'FunctionApproximator',
)


[docs]class FunctionApproximator(ActionSpaceMixin): """ A generic function approximator. This is the central object object that provides an interface between a gym-type environment and function approximators like :term:`value functions <state value function>` and :term:`updateable policies <updateable policy>`. In order to create a valid function approximator, you need to implement the :term:`body` method. For example, to implement a simple multi-layer perceptron function approximator you would do something like: .. code:: python import gym import keras_gym as km from tensorflow.keras.layers import Flatten, Dense class MLP(km.FunctionApproximator): \"\"\" multi-layer perceptron with one hidden layer \"\"\" def body(self, S): X = Flatten()(S) X = Dense(units=4)(X) return X # environment env = gym.make(...) # generic function approximator mlp = MLP(env, lr=0.001) # policy and value function pi, v = km.SoftmaxPolicy(mlp), km.V(mlp) The default :term:`heads <head>` are simple (multi) linear regression layers, which can be overridden by your own implementation. Parameters ---------- env : environment A gym-style environment. optimizer : keras.optimizers.Optimizer, optional If left unspecified (``optimizer=None``), the function approximator's DEFAULT_OPTIMIZER is used. See `keras documentation <https://keras.io/optimizers/>`_ for more details. **optimizer_kwargs : keyword arguments Keyword arguments for the optimizer. This is useful when you want to use the default optimizer with a different setting, e.g. changing the learning rate. """ DEFAULT_OPTIMIZER = keras.optimizers.Adam VALUE_LOSS_FUNCTION = keras.losses.Huber() def __init__(self, env, optimizer=None, **optimizer_kwargs): self.env = env self._init_optimizer(optimizer, optimizer_kwargs)
[docs] def head_v(self, X): """ This is the :term:`state value <state value function>` head. It returns a scalar V-value :math:`v(s)\\in\\mathbb{R}`. Parameters ---------- X : nd Tensor, shape: [batch_size, ...] ``X`` is an intermediate tensor in the full forward-pass of the computation graph; it's the output of the last layer of the :func:`body` method. Returns ------- V : 2d Tensor, shape: [batch_size, 1] The output :term:`state values <V>` :math:`v(s)\\in\\mathbb{R}`. """ V = keras.layers.Dense( units=1, activation='linear', kernel_initializer='zeros', name='value/v')(X) return V
[docs] def head_q1(self, X): """ This is the :term:`type-I <type-I state-action value function>` Q-value head. It returns a scalar Q-value :math:`q(s,a)\\in\\mathbb{R}`. Parameters ---------- X : nd Tensor, shape: [batch_size, ...] ``X`` is an intermediate tensor in the full forward-pass of the computation graph; it's the output of the last layer of the :func:`body` method. Returns ------- Q_sa : 2d Tensor, shape: [batch_size, 1] The output :term:`type-I <Q_sa>` Q-values :math:`q(s,a)\\in\\mathbb{R}`. """ Q_sa = keras.layers.Dense( units=1, activation='linear', kernel_initializer='zeros', name='value/qtype1')(X) return Q_sa
[docs] def head_q2(self, X): """ This is the :term:`type-II <type-II state-action value function>` Q-value head. It returns a vector of Q-values :math:`q(s,.)\\in\\mathbb{R}^n`. Parameters ---------- X : nd Tensor, shape: [batch_size, ...] ``X`` is an intermediate tensor in the full forward-pass of the computation graph; it's the output of the last layer of the :func:`body` method. Returns ------- Q_s : 2d Tensor, shape: [batch_size, num_actions] The output :term:`type-II <Q_s>` Q-values :math:`q(s,.)\\in\\mathbb{R}^n`. """ Q_s = keras.layers.Dense( units=self.num_actions, activation='linear', kernel_initializer='zeros', name='value/qtype2')(X) return Q_s
[docs] def head_pi(self, X): """ This is the policy head. It returns logits, i.e. not probabilities. Use a softmax to turn the output into probabilities. Parameters ---------- X : nd Tensor, shape: [batch_size, ...] ``X`` is an intermediate tensor in the full forward-pass of the computation graph; it's the output of the last layer of the :func:`body` method. Returns ------- \\*params : Tensor or tuple of Tensors, shape: [batch_size, ...] These constitute the raw policy distribution parameters. """ if self.action_space_is_discrete: logits = keras.layers.Dense( units=self.num_actions, activation='linear', kernel_initializer='zeros', name='policy/logits')(X) return logits if self.action_space_is_box: mu = keras.layers.Dense( units=self.actions_ndim, activation='linear', kernel_initializer='zeros', name='policy/mu')(X) logvar = keras.layers.Dense( units=self.actions_ndim, activation='linear', kernel_initializer='zeros', name='policy/logvar')(X) return mu, logvar raise ActionSpaceError.feature_request(self.env)
[docs] def body(self, S): """ This is the part of the computation graph that may be shared between e.g. policy (actor) and value function (critic). It is typically the part of a neural net that does most of the heavy lifting. One may think of the :func:`body` as an elaborate automatic feature extractor. Parameters ---------- S : nd Tensor: shape: [batch_size, ...] The input state observation. Returns ------- X : nd Tensor, shape: [batch_size, ...] The intermediate keras tensor. """ return keras.layers.Lambda( lambda x: self._to_vector(x, self.env.observation_space))(S)
[docs] def body_q1(self, S, A): """ This is similar to :func:`body`, except that it takes a state-action pair as input instead of only state observations. Parameters ---------- S : nd Tensor: shape: [batch_size, ...] The input state observation. A : nd Tensor: shape: [batch_size, ...] The input actions. Returns ------- X : nd Tensor, shape: [batch_size, ...] The intermediate keras tensor. """ def kronecker_product(args): S, A = args S = self._to_vector(S, self.env.observation_space) A = self._to_vector(A, self.env.action_space) check_tensor(S, ndim=2, dtype=('float32', 'float64')) check_tensor(A, ndim=2, dtype=('float32', 'float64')) return tf.einsum('ij,ik->ijk', S, A) X = keras.layers.Lambda(kronecker_product)([S, A]) X = keras.layers.Flatten()(X) return self.body(X)
def _init_optimizer(self, optimizer, optimizer_kwargs): if optimizer is None: self.optimizer = self.DEFAULT_OPTIMIZER(**optimizer_kwargs) elif isinstance(optimizer, keras.optimizers.Optimizer): self.optimizer = optimizer else: raise ValueError( "unknown optimizer, expected a keras.optimizers.Optimizer or " "None (which sets the default keras.optimizers.Adam " "optimizer)") @staticmethod def _to_vector(X, space): if K.ndim(X) == 1 and K.dtype(X).startswith('int'): X = K.one_hot(X, space.n) elif K.ndim(X) > 2: X = keras.layers.Flatten()(X) return X