mlpack-git/doxygen/categorical__dqn_8hpp_source.html

 #ifndef MLPACK_METHODS_RL_CATEGORICAL_DQN_HPP
 #define MLPACK_METHODS_RL_CATEGORICAL_DQN_HPP

 #include <mlpack/prereqs.hpp>
 #include <mlpack/methods/ann/ffn.hpp>
 #include <mlpack/methods/ann/init_rules/gaussian_init.hpp>
 #include <mlpack/methods/ann/layer/layer.hpp>
 #include <mlpack/methods/ann/loss_functions/mean_squared_error.hpp>
 #include <mlpack/methods/ann/loss_functions/empty_loss.hpp>
 #include "../training_config.hpp"

 namespace mlpack {
 namespace rl {

 using namespace mlpack::ann;

 template<
   typename OutputLayerType = EmptyLoss<>,
   typename InitType = GaussianInitialization,
   typename NetworkType = FFN<OutputLayerType, InitType>
 >
 class CategoricalDQN
 {
  public:
   CategoricalDQN() :
       network(), atomSize(0), vMin(0.0), vMax(0.0), isNoisy(false)
   { /* Nothing to do here. */ }

   CategoricalDQN(const int inputDim,
                  const int h1,
                  const int h2,
                  const int outputDim,
                  TrainingConfig config,
                  const bool isNoisy = false,
                  InitType init = InitType(),
                  OutputLayerType outputLayer = OutputLayerType()):
       network(outputLayer, init),
       atomSize(config.AtomSize()),
       vMin(config.VMin()),
       vMax(config.VMax()),
       isNoisy(isNoisy)
   {
     network.Add(new Linear<>(inputDim, h1));
     network.Add(new ReLULayer<>());
     if (isNoisy)
     {
       noisyLayerIndex.push_back(network.Model().size());
       network.Add(new NoisyLinear<>(h1, h2));
       network.Add(new ReLULayer<>());
       noisyLayerIndex.push_back(network.Model().size());
       network.Add(new NoisyLinear<>(h2, outputDim * atomSize));
     }
     else
     {
       network.Add(new Linear<>(h1, h2));
       network.Add(new ReLULayer<>());
       network.Add(new Linear<>(h2, outputDim * atomSize));
     }
   }

   CategoricalDQN(NetworkType& network,
                  TrainingConfig config,
                  const bool isNoisy = false):
       network(std::move(network)),
       atomSize(config.AtomSize()),
       vMin(config.VMin()),
       vMax(config.VMax()),
       isNoisy(isNoisy)
   { /* Nothing to do here. */ }

   void Predict(const arma::mat state, arma::mat& actionValue)
   {
     arma::mat q_atoms;
     network.Predict(state, q_atoms);
     activations.copy_size(q_atoms);
     actionValue.set_size(q_atoms.n_rows / atomSize, q_atoms.n_cols);
     arma::rowvec support = arma::linspace<arma::rowvec>(vMin, vMax, atomSize);
     for (size_t i = 0; i < q_atoms.n_rows; i += atomSize)
     {
       arma::mat activation = activations.rows(i, i + atomSize - 1);
       arma::mat input = q_atoms.rows(i, i + atomSize - 1);
       softMax.Forward(input, activation);
       activations.rows(i, i + atomSize - 1) = activation;
       actionValue.row(i/atomSize) = support * activation;
     }
   }

   void Forward(const arma::mat state, arma::mat& dist)
   {
     arma::mat q_atoms;
     network.Forward(state, q_atoms);
     activations.copy_size(q_atoms);
     for (size_t i = 0; i < q_atoms.n_rows; i += atomSize)
     {
       arma::mat activation = activations.rows(i, i + atomSize - 1);
       arma::mat input = q_atoms.rows(i, i + atomSize - 1);
       softMax.Forward(input, activation);
       activations.rows(i, i + atomSize - 1) = activation;
     }
     dist = activations;
   }

   void ResetParameters()
   {
     network.ResetParameters();
   }

   void ResetNoise()
   {
     for (size_t i = 0; i < noisyLayerIndex.size(); i++)
     {
       boost::get<NoisyLinear<>*>
           (network.Model()[noisyLayerIndex[i]])->ResetNoise();
     }
   }

   const arma::mat& Parameters() const { return network.Parameters(); }
   arma::mat& Parameters() { return network.Parameters(); }

   void Backward(const arma::mat state,
                 arma::mat& lossGradients,
                 arma::mat& gradient)
   {
     arma::mat activationGradients(arma::size(activations));
     for (size_t i = 0; i < activations.n_rows; i += atomSize)
     {
       arma::mat activationGrad;
       arma::mat lossGrad = lossGradients.rows(i, i + atomSize - 1);
       arma::mat activation = activations.rows(i, i + atomSize - 1);
       softMax.Backward(activation, lossGrad, activationGrad);
       activationGradients.rows(i, i + atomSize - 1) = activationGrad;
     }
     network.Backward(state, activationGradients, gradient);
   }

  private:
   NetworkType network;

   size_t atomSize;

   double vMin;

   double vMax;

   bool isNoisy;

   std::vector<size_t> noisyLayerIndex;

   Softmax<> softMax;

   arma::mat activations;
 };

 } // namespace rl
 } // namespace mlpack

 #endif
mlpack::ann
Artificial Neural Network.
Definition: elish_function.hpp:32

mlpack::rl::CategoricalDQN::Predict
void Predict(const arma::mat state, arma::mat &actionValue)
Predict the responses to a given set of predictors.
Definition: categorical_dqn.hpp:133

mlpack::rl::CategoricalDQN::ResetNoise
void ResetNoise()
Resets noise of the network, if the network is of type noisy.
Definition: categorical_dqn.hpp:182

ffn.hpp

mlpack::rl::CategoricalDQN::CategoricalDQN
CategoricalDQN(const int inputDim, const int h1, const int h2, const int outputDim, TrainingConfig config, const bool isNoisy=false, InitType init=InitType(), OutputLayerType outputLayer=OutputLayerType())
Construct an instance of CategoricalDQN class.
Definition: categorical_dqn.hpp:72

mlpack
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: add_to_cli11.hpp:21

mlpack::rl::CategoricalDQN::CategoricalDQN
CategoricalDQN(NetworkType &network, TrainingConfig config, const bool isNoisy=false)
Construct an instance of CategoricalDQN class from a pre-constructed network.
Definition: categorical_dqn.hpp:112

prereqs.hpp
The core includes that mlpack expects; standard C++ includes and Armadillo.

mlpack::ann::Linear
Implementation of the Linear layer class.
Definition: layer_types.hpp:95

std
Definition: pointer_wrapper.hpp:23

mlpack::rl::CategoricalDQN::Parameters
arma::mat & Parameters()
Modify the Parameters.
Definition: categorical_dqn.hpp:194

mlpack::ann::EmptyLoss
The empty loss does nothing, letting the user calculate the loss outside the model.
Definition: empty_loss.hpp:35

gaussian_init.hpp

mlpack::ann::Softmax
Implementation of the Softmax layer.
Definition: softmax.hpp:38

mlpack::rl::CategoricalDQN::Forward
void Forward(const arma::mat state, arma::mat &dist)
Perform the forward pass of the states in real batch mode.
Definition: categorical_dqn.hpp:156

mlpack::rl::CategoricalDQN::Parameters
const arma::mat & Parameters() const
Return the Parameters.
Definition: categorical_dqn.hpp:192

mlpack::ann::BaseLayer
Implementation of the base layer.
Definition: base_layer.hpp:71

empty_loss.hpp

mlpack::rl::TrainingConfig
Definition: training_config.hpp:19

mlpack::ann::NoisyLinear
Implementation of the NoisyLinear layer class.
Definition: layer_types.hpp:109

mean_squared_error.hpp

layer.hpp

mlpack::rl::CategoricalDQN::CategoricalDQN
CategoricalDQN()
Default constructor.
Definition: categorical_dqn.hpp:56

mlpack::ann::FFN
Implementation of a standard feed forward network.
Definition: ffn.hpp:52

mlpack::rl::CategoricalDQN
Implementation of the Categorical Deep Q-Learning network.
Definition: categorical_dqn.hpp:50

mlpack::rl::CategoricalDQN::ResetParameters
void ResetParameters()
Resets the parameters of the network.
Definition: categorical_dqn.hpp:174

mlpack::rl::CategoricalDQN::Backward
void Backward(const arma::mat state, arma::mat &lossGradients, arma::mat &gradient)
Perform the backward pass of the state in real batch mode.
Definition: categorical_dqn.hpp:203

mlpack::ann::GaussianInitialization
This class is used to initialize weigth matrix with a gaussian.
Definition: gaussian_init.hpp:28