categorical_dqn.hpp
Go to the documentation of this file.
1 
12 #ifndef MLPACK_METHODS_RL_CATEGORICAL_DQN_HPP
13 #define MLPACK_METHODS_RL_CATEGORICAL_DQN_HPP
14 
15 #include <mlpack/prereqs.hpp>
21 #include "../training_config.hpp"
22 
23 namespace mlpack {
24 namespace rl {
25 
26 using namespace mlpack::ann;
27 
45 template<
46  typename OutputLayerType = EmptyLoss<>,
47  typename InitType = GaussianInitialization,
48  typename NetworkType = FFN<OutputLayerType, InitType>
49 >
51 {
52  public:
57  network(), atomSize(0), vMin(0.0), vMax(0.0), isNoisy(false)
58  { /* Nothing to do here. */ }
59 
72  CategoricalDQN(const int inputDim,
73  const int h1,
74  const int h2,
75  const int outputDim,
76  TrainingConfig config,
77  const bool isNoisy = false,
78  InitType init = InitType(),
79  OutputLayerType outputLayer = OutputLayerType()):
80  network(outputLayer, init),
81  atomSize(config.AtomSize()),
82  vMin(config.VMin()),
83  vMax(config.VMax()),
84  isNoisy(isNoisy)
85  {
86  network.Add(new Linear<>(inputDim, h1));
87  network.Add(new ReLULayer<>());
88  if (isNoisy)
89  {
90  noisyLayerIndex.push_back(network.Model().size());
91  network.Add(new NoisyLinear<>(h1, h2));
92  network.Add(new ReLULayer<>());
93  noisyLayerIndex.push_back(network.Model().size());
94  network.Add(new NoisyLinear<>(h2, outputDim * atomSize));
95  }
96  else
97  {
98  network.Add(new Linear<>(h1, h2));
99  network.Add(new ReLULayer<>());
100  network.Add(new Linear<>(h2, outputDim * atomSize));
101  }
102  }
103 
112  CategoricalDQN(NetworkType& network,
113  TrainingConfig config,
114  const bool isNoisy = false):
115  network(std::move(network)),
116  atomSize(config.AtomSize()),
117  vMin(config.VMin()),
118  vMax(config.VMax()),
119  isNoisy(isNoisy)
120  { /* Nothing to do here. */ }
121 
133  void Predict(const arma::mat state, arma::mat& actionValue)
134  {
135  arma::mat q_atoms;
136  network.Predict(state, q_atoms);
137  activations.copy_size(q_atoms);
138  actionValue.set_size(q_atoms.n_rows / atomSize, q_atoms.n_cols);
139  arma::rowvec support = arma::linspace<arma::rowvec>(vMin, vMax, atomSize);
140  for (size_t i = 0; i < q_atoms.n_rows; i += atomSize)
141  {
142  arma::mat activation = activations.rows(i, i + atomSize - 1);
143  arma::mat input = q_atoms.rows(i, i + atomSize - 1);
144  softMax.Forward(input, activation);
145  activations.rows(i, i + atomSize - 1) = activation;
146  actionValue.row(i/atomSize) = support * activation;
147  }
148  }
149 
156  void Forward(const arma::mat state, arma::mat& dist)
157  {
158  arma::mat q_atoms;
159  network.Forward(state, q_atoms);
160  activations.copy_size(q_atoms);
161  for (size_t i = 0; i < q_atoms.n_rows; i += atomSize)
162  {
163  arma::mat activation = activations.rows(i, i + atomSize - 1);
164  arma::mat input = q_atoms.rows(i, i + atomSize - 1);
165  softMax.Forward(input, activation);
166  activations.rows(i, i + atomSize - 1) = activation;
167  }
168  dist = activations;
169  }
170 
175  {
176  network.ResetParameters();
177  }
178 
182  void ResetNoise()
183  {
184  for (size_t i = 0; i < noisyLayerIndex.size(); i++)
185  {
186  boost::get<NoisyLinear<>*>
187  (network.Model()[noisyLayerIndex[i]])->ResetNoise();
188  }
189  }
190 
192  const arma::mat& Parameters() const { return network.Parameters(); }
194  arma::mat& Parameters() { return network.Parameters(); }
195 
203  void Backward(const arma::mat state,
204  arma::mat& lossGradients,
205  arma::mat& gradient)
206  {
207  arma::mat activationGradients(arma::size(activations));
208  for (size_t i = 0; i < activations.n_rows; i += atomSize)
209  {
210  arma::mat activationGrad;
211  arma::mat lossGrad = lossGradients.rows(i, i + atomSize - 1);
212  arma::mat activation = activations.rows(i, i + atomSize - 1);
213  softMax.Backward(activation, lossGrad, activationGrad);
214  activationGradients.rows(i, i + atomSize - 1) = activationGrad;
215  }
216  network.Backward(state, activationGradients, gradient);
217  }
218 
219  private:
221  NetworkType network;
222 
224  size_t atomSize;
225 
227  double vMin;
228 
230  double vMax;
231 
233  bool isNoisy;
234 
236  std::vector<size_t> noisyLayerIndex;
237 
239  Softmax<> softMax;
240 
242  arma::mat activations;
243 };
244 
245 } // namespace rl
246 } // namespace mlpack
247 
248 #endif
Artificial Neural Network.
void Predict(const arma::mat state, arma::mat &actionValue)
Predict the responses to a given set of predictors.
void ResetNoise()
Resets noise of the network, if the network is of type noisy.
CategoricalDQN(const int inputDim, const int h1, const int h2, const int outputDim, TrainingConfig config, const bool isNoisy=false, InitType init=InitType(), OutputLayerType outputLayer=OutputLayerType())
Construct an instance of CategoricalDQN class.
Linear algebra utility functions, generally performed on matrices or vectors.
CategoricalDQN(NetworkType &network, TrainingConfig config, const bool isNoisy=false)
Construct an instance of CategoricalDQN class from a pre-constructed network.
The core includes that mlpack expects; standard C++ includes and Armadillo.
Implementation of the Linear layer class.
Definition: layer_types.hpp:95
arma::mat & Parameters()
Modify the Parameters.
The empty loss does nothing, letting the user calculate the loss outside the model.
Definition: empty_loss.hpp:35
Implementation of the Softmax layer.
Definition: softmax.hpp:38
void Forward(const arma::mat state, arma::mat &dist)
Perform the forward pass of the states in real batch mode.
const arma::mat & Parameters() const
Return the Parameters.
Implementation of the base layer.
Definition: base_layer.hpp:71
Implementation of the NoisyLinear layer class.
CategoricalDQN()
Default constructor.
Implementation of a standard feed forward network.
Definition: ffn.hpp:52
Implementation of the Categorical Deep Q-Learning network.
void ResetParameters()
Resets the parameters of the network.
void Backward(const arma::mat state, arma::mat &lossGradients, arma::mat &gradient)
Perform the backward pass of the state in real batch mode.
This class is used to initialize weigth matrix with a gaussian.