item_mean_normalization.hpp
Go to the documentation of this file.
1 
13 #ifndef MLPACK_METHODS_CF_NORMALIZATION_ITEM_MEAN_NORMALIZATION_HPP
14 #define MLPACK_METHODS_CF_NORMALIZATION_ITEM_MEAN_NORMALIZATION_HPP
15 
16 #include <mlpack/prereqs.hpp>
17 
18 namespace mlpack {
19 namespace cf {
20 
40 {
41  public:
42  // Empty constructor.
44 
50  void Normalize(arma::mat& data)
51  {
52  const size_t itemNum = arma::max(data.row(1)) + 1;
53  itemMean = arma::vec(itemNum, arma::fill::zeros);
54  // Number of ratings for each item.
55  arma::Row<size_t> ratingNum(itemNum, arma::fill::zeros);
56 
57  // Sum ratings for each item.
58  data.each_col([&](arma::vec& datapoint)
59  {
60  const size_t item = (size_t) datapoint(1);
61  const double rating = datapoint(2);
62  itemMean(item) += rating;
63  ratingNum(item) += 1;
64  });
65 
66  // Calculate item mean and subtract item mean from ratings.
67  // Set item mean to 0 if the item has no rating.
68  for (size_t i = 0; i < itemNum; ++i)
69  {
70  if (ratingNum(i) != 0)
71  itemMean(i) /= ratingNum(i);
72  }
73 
74  data.each_col([&](arma::vec& datapoint)
75  {
76  const size_t item = (size_t) datapoint(1);
77  datapoint(2) -= itemMean(item);
78  // The algorithm omits rating of zero. If normalized rating equals zero,
79  // it is set to the smallest positive float value.
80  if (datapoint(2) == 0)
81  datapoint(2) = std::numeric_limits<float>::min();
82  });
83  }
84 
90  void Normalize(arma::sp_mat& cleanedData)
91  {
92  // Calculate itemMean.
93  itemMean = arma::vec(cleanedData.n_rows, arma::fill::zeros);
94  arma::Col<size_t> ratingNum(cleanedData.n_rows, arma::fill::zeros);
95  arma::sp_mat::iterator it = cleanedData.begin();
96  arma::sp_mat::iterator it_end = cleanedData.end();
97  for (; it != it_end; ++it)
98  {
99  itemMean(it.row()) += *it;
100  ratingNum(it.row()) += 1;
101  }
102  for (size_t i = 0; i < itemMean.n_elem; ++i)
103  {
104  if (ratingNum(i) != 0)
105  itemMean(i) /= ratingNum(i);
106  }
107 
108  // Normalize the data.
109  it = cleanedData.begin();
110  for (; it != cleanedData.end(); ++it)
111  {
112  double tmp = *it - itemMean(it.row());
113 
114  // The algorithm omits rating of zero. If normalized rating equals zero,
115  // it is set to the smallest positive double value.
116  if (tmp == 0)
117  tmp = std::numeric_limits<float>::min();
118 
119  *it = tmp;
120  }
121  }
122 
130  double Denormalize(const size_t /* user */,
131  const size_t item,
132  const double rating) const
133  {
134  return rating + itemMean(item);
135  }
136 
143  void Denormalize(const arma::Mat<size_t>& combinations,
144  arma::vec& predictions) const
145  {
146  for (size_t i = 0; i < predictions.n_elem; ++i)
147  {
148  const size_t item = combinations(1, i);
149  predictions(i) += itemMean(item);
150  }
151  }
152 
156  const arma::vec& Mean() const { return itemMean; }
157 
161  template<typename Archive>
162  void serialize(Archive& ar, const uint32_t /* version */)
163  {
164  ar(CEREAL_NVP(itemMean));
165  }
166 
167  private:
169  arma::vec itemMean;
170 };
171 
172 } // namespace cf
173 } // namespace mlpack
174 
175 #endif
const arma::vec & Mean() const
Return item mean.
This normalization class performs item mean normalization on raw ratings.
Linear algebra utility functions, generally performed on matrices or vectors.
The core includes that mlpack expects; standard C++ includes and Armadillo.
void serialize(Archive &ar, const uint32_t)
Serialization.
double Denormalize(const size_t, const size_t item, const double rating) const
Denormalize computed rating by adding item mean.
void Denormalize(const arma::Mat< size_t > &combinations, arma::vec &predictions) const
Denormalize computed rating by adding item mean.
void Normalize(arma::sp_mat &cleanedData)
Normalize the data by subtracting item mean from each of existing ratings.
void Normalize(arma::mat &data)
Normalize the data by subtracting item mean from each of existing ratings.