z_score_normalization.hpp
Go to the documentation of this file.
1 
12 #ifndef MLPACK_METHODS_CF_NORMALIZATION_Z_SCORE_NORMALIZATION_HPP
13 #define MLPACK_METHODS_CF_NORMALIZATION_Z_SCORE_NORMALIZATION_HPP
14 
15 #include <mlpack/prereqs.hpp>
16 
17 namespace mlpack {
18 namespace cf {
19 
39 {
40  public:
41  // Empty constructor.
42  ZScoreNormalization() : mean(0), stddev(1) { }
43 
49  void Normalize(arma::mat& data)
50  {
51  mean = arma::mean(data.row(2));
52  stddev = arma::stddev(data.row(2));
53 
54  if (std::fabs(stddev) < 1e-14)
55  {
56  Log::Fatal << "Standard deviation of all existing ratings is 0! "
57  << "This may indicate that all existing ratings are the same."
58  << std::endl;
59  }
60 
61  data.row(2) = (data.row(2) - mean) / stddev;
62  // The algorithm omits rating of zero. If normalized rating equals zero,
63  // it is set to the smallest positive float value.
64  data.row(2).for_each([](double& x)
65  {
66  if (x == 0)
67  x = std::numeric_limits<float>::min();
68  });
69  }
70 
76  void Normalize(arma::sp_mat& cleanedData)
77  {
78  // Caculate mean and stdev of all non zero ratings.
79  arma::vec ratings = arma::nonzeros(cleanedData);
80  mean = arma::mean(ratings);
81  stddev = arma::stddev(ratings);
82 
83  if (std::fabs(stddev) < 1e-14)
84  {
85  Log::Fatal << "Standard deviation of all existing ratings is 0! "
86  << "This may indicate that all existing ratings are the same."
87  << std::endl;
88  }
89 
90  // Subtract mean from existing rating and divide it by stddev.
91  // TODO: consider using spmat::transform() instead of spmat iterators
92  // TODO: http://arma.sourceforge.net/docs.html#transform
93  arma::sp_mat::iterator it = cleanedData.begin();
94  arma::sp_mat::iterator it_end = cleanedData.end();
95  for (; it != it_end; ++it)
96  {
97  double tmp = (*it - mean) / stddev;
98 
99  // The algorithm omits rating of zero. If normalized rating equals zero,
100  // it is set to the smallest positive float value.
101  if (tmp == 0)
102  tmp = std::numeric_limits<float>::min();
103 
104  *it = tmp;
105  }
106  }
107 
115  double Denormalize(const size_t /* user */,
116  const size_t /* item */,
117  const double rating) const
118  {
119  return rating * stddev + mean;
120  }
121 
128  void Denormalize(const arma::Mat<size_t>& /* combinations */,
129  arma::vec& predictions) const
130  {
131  predictions = predictions * stddev + mean;
132  }
133 
137  double Mean() const
138  {
139  return mean;
140  }
141 
145  double Stddev() const
146  {
147  return stddev;
148  }
149 
153  template<typename Archive>
154  void serialize(Archive& ar, const uint32_t /* version */)
155  {
156  ar(CEREAL_NVP(mean));
157  ar(CEREAL_NVP(stddev));
158  }
159 
160  private:
162  double mean;
164  double stddev;
165 };
166 
167 } // namespace cf
168 } // namespace mlpack
169 
170 #endif
void Denormalize(const arma::Mat< size_t > &, arma::vec &predictions) const
Denormalize computed rating by adding mean and multiplying stddev.
Linear algebra utility functions, generally performed on matrices or vectors.
double Stddev() const
Return stddev.
The core includes that mlpack expects; standard C++ includes and Armadillo.
void serialize(Archive &ar, const uint32_t)
Serialization.
static MLPACK_EXPORT util::PrefixedOutStream Fatal
Prints fatal messages prefixed with [FATAL], then terminates the program.
Definition: log.hpp:90
void Normalize(arma::mat &data)
Normalize the data to zero mean and one standard deviation.
double Denormalize(const size_t, const size_t, const double rating) const
Denormalize computed rating by adding mean and multiplying stddev.
double Mean() const
Return mean.
void Normalize(arma::sp_mat &cleanedData)
Normalize the data to zero mean and one standard deviation.
This normalization class performs z-score normalization on raw ratings.