mean_pooling.hpp
Go to the documentation of this file.
1 
13 #ifndef MLPACK_METHODS_ANN_LAYER_MEAN_POOLING_HPP
14 #define MLPACK_METHODS_ANN_LAYER_MEAN_POOLING_HPP
15 
16 #include <mlpack/prereqs.hpp>
17 
18 namespace mlpack {
19 namespace ann {
20 
29 template <
30  typename InputDataType = arma::mat,
31  typename OutputDataType = arma::mat
32 >
34 {
35  public:
37  MeanPooling();
38 
48  MeanPooling(const size_t kernelWidth,
49  const size_t kernelHeight,
50  const size_t strideWidth = 1,
51  const size_t strideHeight = 1,
52  const bool floor = true);
53 
61  template<typename eT>
62  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output);
63 
73  template<typename eT>
74  void Backward(const arma::Mat<eT>& /* input */,
75  const arma::Mat<eT>& gy,
76  arma::Mat<eT>& g);
77 
79  OutputDataType const& OutputParameter() const { return outputParameter; }
81  OutputDataType& OutputParameter() { return outputParameter; }
82 
84  OutputDataType const& Delta() const { return delta; }
86  OutputDataType& Delta() { return delta; }
87 
89  size_t const& InputWidth() const { return inputWidth; }
91  size_t& InputWidth() { return inputWidth; }
92 
94  size_t const& InputHeight() const { return inputHeight; }
96  size_t& InputHeight() { return inputHeight; }
97 
99  size_t const& OutputWidth() const { return outputWidth; }
101  size_t& OutputWidth() { return outputWidth; }
102 
104  size_t const& OutputHeight() const { return outputHeight; }
106  size_t& OutputHeight() { return outputHeight; }
107 
109  size_t InputSize() const { return inSize; }
110 
112  size_t OutputSize() const { return outSize; }
113 
115  size_t KernelWidth() const { return kernelWidth; }
117  size_t& KernelWidth() { return kernelWidth; }
118 
120  size_t KernelHeight() const { return kernelHeight; }
122  size_t& KernelHeight() { return kernelHeight; }
123 
125  size_t StrideWidth() const { return strideWidth; }
127  size_t& StrideWidth() { return strideWidth; }
128 
130  size_t StrideHeight() const { return strideHeight; }
132  size_t& StrideHeight() { return strideHeight; }
133 
135  bool const& Floor() const { return floor; }
137  bool& Floor() { return floor; }
138 
140  bool Deterministic() const { return deterministic; }
142  bool& Deterministic() { return deterministic; }
143 
145  size_t WeightSize() const { return 0; }
146 
150  template<typename Archive>
151  void serialize(Archive& ar, const uint32_t /* version */);
152 
153  private:
160  template<typename eT>
161  void Pooling(const arma::Mat<eT>& input, arma::Mat<eT>& output)
162  {
163  arma::Mat<eT> inputPre = input;
164 
165  for (size_t i = 1; i < input.n_cols; ++i)
166  inputPre.col(i) += inputPre.col(i - 1);
167 
168  for (size_t i = 1; i < input.n_rows; ++i)
169  inputPre.row(i) += inputPre.row(i - 1);
170 
171  for (size_t j = 0, colidx = 0; j < output.n_cols;
172  ++j, colidx += strideHeight)
173  {
174  for (size_t i = 0, rowidx = 0; i < output.n_rows;
175  ++i, rowidx += strideWidth)
176  {
177  double val = 0.0;
178  size_t rowEnd = rowidx + kernelWidth - 1;
179  size_t colEnd = colidx + kernelHeight - 1;
180 
181  if (rowEnd > input.n_rows - 1)
182  rowEnd = input.n_rows - 1;
183  if (colEnd > input.n_cols - 1)
184  colEnd = input.n_cols - 1;
185 
186  const size_t kernalArea = (rowEnd - rowidx + 1) * (colEnd - colidx + 1);
187  val += inputPre(rowEnd, colEnd);
188  if (rowidx >= 1)
189  {
190  if (colidx >= 1)
191  val += inputPre(rowidx - 1, colidx - 1);
192  val -= inputPre(rowidx - 1, colEnd);
193  }
194  if (colidx >= 1)
195  val -= inputPre(rowEnd, colidx - 1);
196 
197  output(i, j) = val / kernalArea;
198  }
199  }
200  }
201 
208  template<typename eT>
209  void Unpooling(const arma::Mat<eT>& input,
210  const arma::Mat<eT>& error,
211  arma::Mat<eT>& output)
212  {
213  // This condition comes by comparing the number of operations involved in the brute
214  // force method and the prefix method. Let the area of error be errorArea and area
215  // of kernal be kernalArea. Total number of operations in brute force method will be
216  // `errorArea * kernalArea` and for each element in error we are doing `kernalArea`
217  // number of operations. Whereas in the prefix method the total number of operations
218  // will be `4 * errorArea + 2 * inputArea`. The term `2 * inputArea` comes from
219  // prefix sums performed (col-wise and row-wise).
220  // We can use this to determine which method to use.
221  const bool condition = (error.n_elem * kernelHeight * kernelWidth) >
222  (4 * error.n_elem + 2 * input.n_elem);
223 
224  if (condition)
225  {
226  // If this condition is true then theoritically the prefix sum method of
227  // unpooling is faster. The aim of unpooling is to add
228  // `error(i, j) / kernalArea` to `inputArea(kernal)`. This requires
229  // `inputArea.n_elem` additions. So, total operations required will be
230  // `error.n_elem * inputArea.n_elem` operations.
231  // To improve this method we will use an idea of prefix sums. Let's see
232  // this method in 1-D matrix then we will extend it to 2-D matrix.
233  // Let the input be a 1-D matrix input = `[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]` of size 10
234  // and we want to add `10` to idx = 1 to idx = 5. In brute force method we can run
235  // a loop from idx = 1 to idx = 5 and add `10` to each element. In prefix method
236  // We will add `+10` to idx = 1 and `-10` to idx = (5 + 1). Now the input will look
237  // like `[0, +10, 0, 0, 0, 0, -10, 0, 0, 0]`. After that we can just do prefix
238  // sum `input[i] += input[i - 1]`. Then the input becomes
239  // `[0, +10, +10, +10, +10, +10, 0, 0, 0, 0]`. So the total computation require
240  // by this method is (2 additions + Prefix operations).
241  // Note that if there are `k` such operation of adding a number of some
242  // continuous subarray. Then the brute force method will require
243  // `k * size(subarray)` operations. But the prefix method will require
244  // `2 * k + Prefix` operations, because the Prefix can be performed once at
245  // the end.
246  // Now for 2-D matrix. Lets say we want to add `e` to all elements from
247  // input(x1 : x2, y1 : y2). So the inputArea = (x2 - x1 + 1) * (y2 - y1 + 1).
248  // In prefix method the following operations will be performed:
249  // 1. Add `+e` to input(x1, y1).
250  // 2. Add `-e` to input(x1 + 1, y1).
251  // 3. Add `-e` to input(x1, y1 + 1).
252  // 4. Add `+e` to input(x1 + 1, y1 + 1).
253  // 5. Perform Prefix sum over columns i.e input(i, j) += input(i, j - 1)
254  // 6. Perform Prefix sum over rows i.e input(i, j) += input(i - 1, j)
255  // So lets say if we had `k` number of such operations. The brute force
256  // method will require `kernalArea * k` operations.
257  // The prefix method will require `4 * k + Prefix operation`.
258 
259  for (size_t j = 0, colidx = 0; j < input.n_cols; j += strideHeight, ++colidx)
260  {
261  for (size_t i = 0, rowidx = 0; i < input.n_rows; i += strideWidth, ++rowidx)
262  {
263  // We have to add error(i, j) to output(span(rowidx, rowEnd), span(colidx, colEnd)).
264  // The steps of prefix sum method:
265  //
266  // 1. For each (i, j) perform:
267  // 1.1 Add +error(i, j) to output(rowidx, colidx)
268  // 1.2 Add -error(i, j) to output(rowidx, colidx + 1)
269  // 1.3 Add -error(i, j) to output(rowidx + 1, colidx)
270  // 1.4 Add +error(i, j) to output(rowidx + 1, colidx + 1)
271  //
272  // 2. Do prefix sum column wise i.e output(i, j) += output(i, j - 1)
273  // 2. Do prefix sum row wise i.e output(i, j) += output(i - 1, j)
274 
275  size_t rowEnd = i + kernelWidth - 1;
276  size_t colEnd = j + kernelHeight - 1;
277 
278  if (rowEnd > input.n_rows - 1)
279  {
280  if (floor)
281  continue;
282  rowEnd = input.n_rows - 1;
283  }
284 
285  if (colEnd > input.n_cols - 1)
286  {
287  if (floor)
288  continue;
289  colEnd = input.n_cols - 1;
290  }
291 
292  size_t kernalArea = (rowEnd - i + 1) * (colEnd - j + 1);
293  output(i, j) += error(rowidx, colidx) / kernalArea;
294 
295  if (rowEnd + 1 < input.n_rows)
296  {
297  output(rowEnd + 1, j) -= error(rowidx, colidx) / kernalArea;
298 
299  if (colEnd + 1 < input.n_cols)
300  output(rowEnd + 1, colEnd + 1) += error(rowidx, colidx) / kernalArea;
301  }
302 
303  if (colEnd + 1 < input.n_cols)
304  output(i, colEnd + 1) -= error(rowidx, colidx) / kernalArea;
305  }
306  }
307 
308  for (size_t i = 1; i < input.n_rows; ++i)
309  output.row(i) += output.row(i - 1);
310 
311  for (size_t j = 1; j < input.n_cols; ++j)
312  output.col(j) += output.col(j - 1);
313  }
314  else
315  {
316  arma::Mat<eT> unpooledError;
317  for (size_t j = 0, colidx = 0; j < input.n_cols; j += strideHeight, ++colidx)
318  {
319  for (size_t i = 0, rowidx = 0; i < input.n_rows; i += strideWidth, ++rowidx)
320  {
321  size_t rowEnd = i + kernelWidth - 1;
322  size_t colEnd = j + kernelHeight - 1;
323 
324  if (rowEnd > input.n_rows - 1)
325  {
326  if (floor)
327  continue;
328  rowEnd = input.n_rows - 1;
329  }
330 
331  if (colEnd > input.n_cols - 1)
332  {
333  if (floor)
334  continue;
335  colEnd = input.n_cols - 1;
336  }
337 
338  arma::mat InputArea = input(arma::span(i, rowEnd), arma::span(j, colEnd));
339 
340  unpooledError = arma::Mat<eT>(InputArea.n_rows, InputArea.n_cols);
341  unpooledError.fill(error(rowidx, colidx) / InputArea.n_elem);
342 
343  output(arma::span(i, i + InputArea.n_rows - 1),
344  arma::span(j, j + InputArea.n_cols - 1)) += unpooledError;
345  }
346  }
347  }
348  }
349 
351  size_t kernelWidth;
352 
354  size_t kernelHeight;
355 
357  size_t strideWidth;
358 
360  size_t strideHeight;
361 
363  bool floor;
364 
366  size_t inSize;
367 
369  size_t outSize;
370 
372  size_t inputWidth;
373 
375  size_t inputHeight;
376 
378  size_t outputWidth;
379 
381  size_t outputHeight;
382 
384  bool reset;
385 
387  bool deterministic;
388 
390  size_t batchSize;
391 
393  arma::cube outputTemp;
394 
396  arma::cube inputTemp;
397 
399  arma::cube gTemp;
400 
402  OutputDataType delta;
403 
405  OutputDataType gradient;
406 
408  OutputDataType outputParameter;
409 }; // class MeanPooling
410 
411 
412 } // namespace ann
413 } // namespace mlpack
414 
415 // Include implementation.
416 #include "mean_pooling_impl.hpp"
417 
418 #endif
size_t KernelHeight() const
Get the kernel height.
OutputDataType const & Delta() const
Get the delta.
void serialize(Archive &ar, const uint32_t)
Serialize the layer.
Linear algebra utility functions, generally performed on matrices or vectors.
size_t & KernelWidth()
Modify the kernel width.
MeanPooling()
Create the MeanPooling object.
size_t & OutputHeight()
Modify the output height.
The core includes that mlpack expects; standard C++ includes and Armadillo.
size_t const & OutputHeight() const
Get the output height.
size_t const & InputHeight() const
Get the input height.
bool Deterministic() const
Get the value of the deterministic parameter.
OutputDataType & Delta()
Modify the delta.
Implementation of the MeanPooling.
void Forward(const arma::Mat< eT > &input, arma::Mat< eT > &output)
Ordinary feed forward pass of a neural network, evaluating the function f(x) by propagating the activ...
size_t StrideWidth() const
Get the stride width.
OutputDataType const & OutputParameter() const
Get the output parameter.
size_t InputSize() const
Get the input size.
size_t KernelWidth() const
Get the kernel width.
size_t const & OutputWidth() const
Get the output width.
size_t & InputHeight()
Modify the input height.
size_t const & InputWidth() const
Get the intput width.
OutputDataType & OutputParameter()
Modify the output parameter.
bool const & Floor() const
Get the value of the rounding operation.
size_t & InputWidth()
Modify the input width.
bool & Floor()
Modify the value of the rounding operation.
bool & Deterministic()
Modify the value of the deterministic parameter.
size_t & OutputWidth()
Modify the output width.
size_t & StrideHeight()
Modify the stride height.
size_t OutputSize() const
Get the output size.
size_t WeightSize() const
Get the size of the weights.
size_t & StrideWidth()
Modify the stride width.
size_t StrideHeight() const
Get the stride height.
size_t & KernelHeight()
Modify the kernel height.
void Backward(const arma::Mat< eT > &, const arma::Mat< eT > &gy, arma::Mat< eT > &g)
Ordinary feed backward pass of a neural network, using 3rd-order tensors as input, calculating the function f(x) by propagating x backwards through f.