lstm.hpp
Go to the documentation of this file.
1 
12 #ifndef MLPACK_METHODS_ANN_LAYER_LSTM_HPP
13 #define MLPACK_METHODS_ANN_LAYER_LSTM_HPP
14 
15 #include <mlpack/prereqs.hpp>
16 #include <limits>
17 
18 namespace mlpack {
19 namespace ann {
20 
58 template <
59  typename InputDataType = arma::mat,
60  typename OutputDataType = arma::mat
61 >
62 class LSTM
63 {
64  public:
66  LSTM();
67 
75  LSTM(const size_t inSize,
76  const size_t outSize,
77  const size_t rho = std::numeric_limits<size_t>::max());
78 
80  LSTM(const LSTM& layer);
81 
83  LSTM(LSTM&&);
84 
86  LSTM& operator=(const LSTM& layer);
87 
89  LSTM& operator=(LSTM&& layer);
90 
98  template<typename InputType, typename OutputType>
99  void Forward(const InputType& input, OutputType& output);
100 
110  template<typename InputType, typename OutputType>
111  void Forward(const InputType& input,
112  OutputType& output,
113  OutputType& cellState,
114  bool useCellState = false);
115 
125  template<typename InputType, typename ErrorType, typename GradientType>
126  void Backward(const InputType& input,
127  const ErrorType& gy,
128  GradientType& g);
129 
130  /*
131  * Reset the layer parameter.
132  */
133  void Reset();
134 
135  /*
136  * Resets the cell to accept a new input. This breaks the BPTT chain starts a
137  * new one.
138  *
139  * @param size The current maximum number of steps through time.
140  */
141  void ResetCell(const size_t size);
142 
143  /*
144  * Calculate the gradient using the output delta and the input activation.
145  *
146  * @param input The input parameter used for calculating the gradient.
147  * @param error The calculated error.
148  * @param gradient The calculated gradient.
149  */
150  template<typename InputType, typename ErrorType, typename GradientType>
151  void Gradient(const InputType& input,
152  const ErrorType& error,
153  GradientType& gradient);
154 
156  size_t Rho() const { return rho; }
158  size_t& Rho() { return rho; }
159 
161  OutputDataType const& Parameters() const { return weights; }
163  OutputDataType& Parameters() { return weights; }
164 
166  OutputDataType const& OutputParameter() const { return outputParameter; }
168  OutputDataType& OutputParameter() { return outputParameter; }
169 
171  OutputDataType const& Delta() const { return delta; }
173  OutputDataType& Delta() { return delta; }
174 
176  OutputDataType const& Gradient() const { return grad; }
178  OutputDataType& Gradient() { return grad; }
179 
181  size_t InSize() const { return inSize; }
182 
184  size_t OutSize() const { return outSize; }
185 
187  size_t WeightSize() const
188  {
189  return (4 * outSize * inSize + 7 * outSize + 4 * outSize * outSize);
190  }
191 
193  size_t InputShape() const
194  {
195  return inSize;
196  }
197 
201  template<typename Archive>
202  void serialize(Archive& ar, const uint32_t /* version */);
203 
204  private:
206  size_t inSize;
207 
209  size_t outSize;
210 
212  size_t rho;
213 
215  size_t forwardStep;
216 
218  size_t backwardStep;
219 
221  size_t gradientStep;
222 
224  OutputDataType weights;
225 
227  OutputDataType prevOutput;
228 
230  size_t batchSize;
231 
233  size_t batchStep;
234 
237  size_t gradientStepIdx;
238 
240  OutputDataType cellActivationError;
241 
243  OutputDataType delta;
244 
246  OutputDataType grad;
247 
249  OutputDataType outputParameter;
250 
252  OutputDataType output2GateInputWeight;
253 
255  OutputDataType input2GateInputWeight;
256 
258  OutputDataType input2GateInputBias;
259 
261  OutputDataType cell2GateInputWeight;
262 
264  OutputDataType output2GateForgetWeight;
265 
267  OutputDataType input2GateForgetWeight;
268 
270  OutputDataType input2GateForgetBias;
271 
273  OutputDataType cell2GateForgetWeight;
274 
276  OutputDataType output2GateOutputWeight;
277 
279  OutputDataType input2GateOutputWeight;
280 
282  OutputDataType input2GateOutputBias;
283 
285  OutputDataType cell2GateOutputWeight;
286 
288  OutputDataType inputGate;
289 
291  OutputDataType forgetGate;
292 
294  OutputDataType hiddenLayer;
295 
297  OutputDataType outputGate;
298 
300  OutputDataType inputGateActivation;
301 
303  OutputDataType forgetGateActivation;
304 
306  OutputDataType outputGateActivation;
307 
309  OutputDataType hiddenLayerActivation;
310 
312  OutputDataType input2HiddenWeight;
313 
315  OutputDataType input2HiddenBias;
316 
318  OutputDataType output2HiddenWeight;
319 
321  OutputDataType cell;
322 
324  OutputDataType cellActivation;
325 
327  OutputDataType forgetGateError;
328 
330  OutputDataType outputGateError;
331 
333  OutputDataType prevError;
334 
336  OutputDataType outParameter;
337 
339  OutputDataType inputCellError;
340 
342  OutputDataType inputGateError;
343 
345  OutputDataType hiddenError;
346 
348  size_t rhoSize;
349 
351  size_t bpttSteps;
352 }; // class LSTM
353 
354 } // namespace ann
355 } // namespace mlpack
356 
357 // Include implementation.
358 #include "lstm_impl.hpp"
359 
360 #endif
OutputDataType const & OutputParameter() const
Get the output parameter.
Definition: lstm.hpp:166
Linear algebra utility functions, generally performed on matrices or vectors.
size_t OutSize() const
Get the number of output units.
Definition: lstm.hpp:184
void serialize(Archive &ar, const uint32_t)
Serialize the layer.
size_t InputShape() const
Get the shape of the input.
Definition: lstm.hpp:193
The core includes that mlpack expects; standard C++ includes and Armadillo.
OutputDataType & Gradient()
Modify the gradient.
Definition: lstm.hpp:178
OutputDataType & OutputParameter()
Modify the output parameter.
Definition: lstm.hpp:168
size_t InSize() const
Get the number of input units.
Definition: lstm.hpp:181
size_t Rho() const
Get the maximum number of steps to backpropagate through time (BPTT).
Definition: lstm.hpp:156
OutputDataType const & Parameters() const
Get the parameters.
Definition: lstm.hpp:161
void Backward(const InputType &input, const ErrorType &gy, GradientType &g)
Ordinary feed backward pass of a neural network, calculating the function f(x) by propagating x backw...
OutputDataType const & Delta() const
Get the delta.
Definition: lstm.hpp:171
void Forward(const InputType &input, OutputType &output)
Ordinary feed-forward pass of a neural network, evaluating the function f(x) by propagating the activ...
LSTM()
Create the LSTM object.
LSTM & operator=(const LSTM &layer)
Copy assignment operator.
size_t WeightSize() const
Get the size of the weights.
Definition: lstm.hpp:187
OutputDataType const & Gradient() const
Get the gradient.
Definition: lstm.hpp:176
void ResetCell(const size_t size)
OutputDataType & Delta()
Modify the delta.
Definition: lstm.hpp:173
OutputDataType & Parameters()
Modify the parameters.
Definition: lstm.hpp:163
size_t & Rho()
Modify the maximum number of steps to backpropagate through time (BPTT).
Definition: lstm.hpp:158