fast_lstm.hpp
Go to the documentation of this file.
1 
13 #ifndef MLPACK_METHODS_ANN_LAYER_FAST_LSTM_HPP
14 #define MLPACK_METHODS_ANN_LAYER_FAST_LSTM_HPP
15 
16 #include <mlpack/prereqs.hpp>
17 #include <limits>
18 
19 namespace mlpack {
20 namespace ann {
21 
62 template <
63  typename InputDataType = arma::mat,
64  typename OutputDataType = arma::mat
65 >
66 class FastLSTM
67 {
68  public:
69  // Convenience typedefs.
70  typedef typename InputDataType::elem_type InputElemType;
71  typedef typename OutputDataType::elem_type ElemType;
72 
74  FastLSTM();
75 
77  FastLSTM(const FastLSTM& layer);
78 
80  FastLSTM(FastLSTM&& layer);
81 
83  FastLSTM& operator=(const FastLSTM& layer);
84 
86  FastLSTM& operator=(FastLSTM&& layer);
87 
95  FastLSTM(const size_t inSize,
96  const size_t outSize,
97  const size_t rho = std::numeric_limits<size_t>::max());
98 
106  template<typename InputType, typename OutputType>
107  void Forward(const InputType& input, OutputType& output);
108 
118  template<typename InputType, typename ErrorType, typename GradientType>
119  void Backward(const InputType& input,
120  const ErrorType& gy,
121  GradientType& g);
122 
123  /*
124  * Reset the layer parameter.
125  */
126  void Reset();
127 
128  /*
129  * Resets the cell to accept a new input. This breaks the BPTT chain starts a
130  * new one.
131  *
132  * @param size The current maximum number of steps through time.
133  */
134  void ResetCell(const size_t size);
135 
136  /*
137  * Calculate the gradient using the output delta and the input activation.
138  *
139  * @param input The input parameter used for calculating the gradient.
140  * @param error The calculated error.
141  * @param gradient The calculated gradient.
142  */
143  template<typename InputType, typename ErrorType, typename GradientType>
144  void Gradient(const InputType& input,
145  const ErrorType& error,
146  GradientType& gradient);
147 
149  size_t Rho() const { return rho; }
151  size_t& Rho() { return rho; }
152 
154  OutputDataType const& Parameters() const { return weights; }
156  OutputDataType& Parameters() { return weights; }
157 
159  OutputDataType const& OutputParameter() const { return outputParameter; }
161  OutputDataType& OutputParameter() { return outputParameter; }
162 
164  OutputDataType const& Delta() const { return delta; }
166  OutputDataType& Delta() { return delta; }
167 
169  OutputDataType const& Gradient() const { return grad; }
171  OutputDataType& Gradient() { return grad; }
172 
174  size_t InSize() const { return inSize; }
175 
177  size_t OutSize() const { return outSize; }
178 
180  size_t WeightSize() const
181  {
182  return 4 * outSize * inSize + 4 * outSize + 4 * outSize * outSize;
183  }
184 
186  size_t InputShape() const
187  {
188  return inSize;
189  }
190 
194  template<typename Archive>
195  void serialize(Archive& ar, const uint32_t /* version */);
196 
197  private:
204  template<typename InputType, typename OutputType>
205  void FastSigmoid(const InputType& input, OutputType& sigmoids)
206  {
207  for (size_t i = 0; i < input.n_elem; ++i)
208  sigmoids(i) = FastSigmoid(input(i));
209  }
210 
217  ElemType FastSigmoid(const InputElemType data)
218  {
219  ElemType x = 0.5 * data;
220  ElemType z;
221  if (x >= 0)
222  {
223  if (x < 1.7)
224  z = (1.5 * x / (1 + x));
225  else if (x < 3)
226  z = (0.935409070603099 + 0.0458812946797165 * (x - 1.7));
227  else
228  z = 0.99505475368673;
229  }
230  else
231  {
232  ElemType xx = -x;
233  if (xx < 1.7)
234  z = -(1.5 * xx / (1 + xx));
235  else if (xx < 3)
236  z = -(0.935409070603099 + 0.0458812946797165 * (xx - 1.7));
237  else
238  z = -0.99505475368673;
239  }
240 
241  return 0.5 * (z + 1.0);
242  }
243 
245  size_t inSize;
246 
248  size_t outSize;
249 
251  size_t rho;
252 
254  size_t forwardStep;
255 
257  size_t backwardStep;
258 
260  size_t gradientStep;
261 
263  OutputDataType weights;
264 
266  OutputDataType prevOutput;
267 
269  size_t batchSize;
270 
272  size_t batchStep;
273 
276  size_t gradientStepIdx;
277 
279  OutputDataType cellActivationError;
280 
282  OutputDataType delta;
283 
285  OutputDataType grad;
286 
288  OutputDataType outputParameter;
289 
291  OutputDataType output2GateWeight;
292 
294  OutputDataType input2GateWeight;
295 
297  OutputDataType input2GateBias;
298 
300  OutputDataType gate;
301 
303  OutputDataType gateActivation;
304 
306  OutputDataType stateActivation;
307 
309  OutputDataType cell;
310 
312  OutputDataType cellActivation;
313 
315  OutputDataType forgetGateError;
316 
318  OutputDataType prevError;
319 
321  OutputDataType outParameter;
322 
324  size_t rhoSize;
325 
327  size_t bpttSteps;
328 }; // class FastLSTM
329 
330 } // namespace ann
331 } // namespace mlpack
332 
333 // Include implementation.
334 #include "fast_lstm_impl.hpp"
335 
336 #endif
OutputDataType & Gradient()
Modify the gradient.
Definition: fast_lstm.hpp:171
OutputDataType & Delta()
Modify the delta.
Definition: fast_lstm.hpp:166
void serialize(Archive &ar, const uint32_t)
Serialize the layer.
void Backward(const InputType &input, const ErrorType &gy, GradientType &g)
Ordinary feed backward pass of a neural network, calculating the function f(x) by propagating x backw...
Linear algebra utility functions, generally performed on matrices or vectors.
The core includes that mlpack expects; standard C++ includes and Armadillo.
size_t InputShape() const
Get the shape of the input.
Definition: fast_lstm.hpp:186
OutputDataType::elem_type ElemType
Definition: fast_lstm.hpp:71
OutputDataType const & Parameters() const
Get the parameters.
Definition: fast_lstm.hpp:154
OutputDataType const & Gradient() const
Get the gradient.
Definition: fast_lstm.hpp:169
size_t & Rho()
Modify the maximum number of steps to backpropagate through time (BPTT).
Definition: fast_lstm.hpp:151
FastLSTM()
Create the Fast LSTM object.
OutputDataType const & Delta() const
Get the delta.
Definition: fast_lstm.hpp:164
OutputDataType const & OutputParameter() const
Get the output parameter.
Definition: fast_lstm.hpp:159
InputDataType::elem_type InputElemType
Definition: fast_lstm.hpp:70
size_t OutSize() const
Get the number of output units.
Definition: fast_lstm.hpp:177
OutputDataType & OutputParameter()
Modify the output parameter.
Definition: fast_lstm.hpp:161
FastLSTM & operator=(const FastLSTM &layer)
Copy assignment operator.
void ResetCell(const size_t size)
size_t Rho() const
Get the maximum number of steps to backpropagate through time (BPTT).
Definition: fast_lstm.hpp:149
OutputDataType & Parameters()
Modify the parameters.
Definition: fast_lstm.hpp:156
size_t InSize() const
Get the number of input units.
Definition: fast_lstm.hpp:174
size_t WeightSize() const
Get the size of the weight matrix.
Definition: fast_lstm.hpp:180
An implementation of a faster version of the Fast LSTM network layer.
Definition: fast_lstm.hpp:66
void Forward(const InputType &input, OutputType &output)
Ordinary feed forward pass of a neural network, evaluating the function f(x) by propagating the activ...