user_mean_normalization.hpp
Go to the documentation of this file.
1 
13 #ifndef MLPACK_METHODS_CF_NORMALIZATION_USER_MEAN_NORMALIZATION_HPP
14 #define MLPACK_METHODS_CF_NORMALIZATION_USER_MEAN_NORMALIZATION_HPP
15 
16 #include <mlpack/prereqs.hpp>
17 
18 namespace mlpack {
19 namespace cf {
20 
40 {
41  public:
42  // Empty constructor.
44 
50  void Normalize(arma::mat& data)
51  {
52  const size_t userNum = arma::max(data.row(0)) + 1;
53  userMean = arma::vec(userNum, arma::fill::zeros);
54  // Number of ratings for each user.
55  arma::Row<size_t> ratingNum(userNum, arma::fill::zeros);
56 
57  // Sum ratings for each user.
58  data.each_col([&](arma::vec& datapoint)
59  {
60  const size_t user = (size_t) datapoint(0);
61  const double rating = datapoint(2);
62  userMean(user) += rating;
63  ratingNum(user) += 1;
64  });
65 
66  // Calculate user mean and subtract user mean from ratings.
67  // Set user mean to 0 if the user has no rating.
68  for (size_t i = 0; i < userNum; ++i)
69  {
70  if (ratingNum(i) != 0)
71  userMean(i) /= ratingNum(i);
72  }
73 
74  data.each_col([&](arma::vec& datapoint)
75  {
76  const size_t user = (size_t) datapoint(0);
77  datapoint(2) -= userMean(user);
78  // The algorithm omits rating of zero. If normalized rating equals zero,
79  // it is set to the smallest positive double value.
80  if (datapoint(2) == 0)
81  datapoint(2) = std::numeric_limits<double>::min();
82  });
83  }
84 
90  void Normalize(arma::sp_mat& cleanedData)
91  {
92  // Calculate userMean.
93  userMean = arma::vec(cleanedData.n_cols, arma::fill::zeros);
94  arma::Col<size_t> ratingNum(cleanedData.n_cols, arma::fill::zeros);
95  arma::sp_mat::iterator it = cleanedData.begin();
96  arma::sp_mat::iterator it_end = cleanedData.end();
97  for (; it != it_end; ++it)
98  {
99  userMean(it.col()) += *it;
100  ratingNum(it.col()) += 1;
101  }
102  for (size_t i = 0; i < userMean.n_elem; ++i)
103  {
104  if (ratingNum(i) != 0)
105  userMean(i) /= ratingNum(i);
106  }
107 
108  // Normalize the data.
109  it = cleanedData.begin();
110  for (; it != cleanedData.end(); ++it)
111  {
112  double tmp = *it - userMean(it.col());
113 
114  // The algorithm omits rating of zero. If normalized rating equals zero,
115  // it is set to the smallest positive float value.
116  if (tmp == 0)
117  tmp = std::numeric_limits<float>::min();
118 
119  *it = tmp;
120  }
121  }
122 
130  double Denormalize(const size_t user,
131  const size_t /* item */,
132  const double rating) const
133  {
134  return rating + userMean(user);
135  }
136 
143  void Denormalize(const arma::Mat<size_t>& combinations,
144  arma::vec& predictions) const
145  {
146  for (size_t i = 0; i < predictions.n_elem; ++i)
147  {
148  const size_t user = combinations(0, i);
149  predictions(i) += userMean(user);
150  }
151  }
152 
156  const arma::vec& Mean() const { return userMean; }
157 
161  template<typename Archive>
162  void serialize(Archive& ar, const uint32_t /* version */)
163  {
164  ar(CEREAL_NVP(userMean));
165  }
166 
167  private:
169  arma::vec userMean;
170 };
171 
172 } // namespace cf
173 } // namespace mlpack
174 
175 #endif
void Normalize(arma::mat &data)
Normalize the data by subtracting user mean from each of existing ratings.
Linear algebra utility functions, generally performed on matrices or vectors.
The core includes that mlpack expects; standard C++ includes and Armadillo.
void Denormalize(const arma::Mat< size_t > &combinations, arma::vec &predictions) const
Denormalize computed rating by adding user mean.
const arma::vec & Mean() const
Return user mean.
void Normalize(arma::sp_mat &cleanedData)
Normalize the data by subtracting user mean from each of existing rating.
This normalization class performs user mean normalization on raw ratings.
double Denormalize(const size_t user, const size_t, const double rating) const
Denormalize computed rating by adding user mean.
void serialize(Archive &ar, const uint32_t)
Serialization.