randomized_svd_method.hpp
Go to the documentation of this file.
1 
14 #ifndef MLPACK_METHODS_CF_DECOMPOSITION_POLICIES_RANDOMIZED_SVD_METHOD_HPP
15 #define MLPACK_METHODS_CF_DECOMPOSITION_POLICIES_RANDOMIZED_SVD_METHOD_HPP
16 
17 #include <mlpack/prereqs.hpp>
19 
20 namespace mlpack {
21 namespace cf {
22 
42 {
43  public:
52  RandomizedSVDPolicy(const size_t iteratedPower = 0,
53  const size_t maxIterations = 2) :
54  iteratedPower(iteratedPower),
55  maxIterations(maxIterations)
56  {
57  /* Nothing to do here */
58  }
59 
72  template<typename MatType>
73  void Apply(const MatType& /* data */,
74  const arma::sp_mat& cleanedData,
75  const size_t rank,
76  const size_t maxIterations,
77  const double /* minResidue */,
78  const bool /* mit */)
79  {
80  arma::vec sigma;
81 
82  // Do singular value decomposition using the randomized SVD algorithm.
83  svd::RandomizedSVD rsvd(iteratedPower, maxIterations);
84  rsvd.Apply(cleanedData, w, sigma, h, rank);
85 
86  // Sigma matrix is multiplied to w.
87  w = w * arma::diagmat(sigma);
88 
89  // Take transpose of the matrix h as required by CF class.
90  h = arma::trans(h);
91  }
92 
99  double GetRating(const size_t user, const size_t item) const
100  {
101  double rating = arma::as_scalar(w.row(item) * h.col(user));
102  return rating;
103  }
104 
111  void GetRatingOfUser(const size_t user, arma::vec& rating) const
112  {
113  rating = w * h.col(user);
114  }
115 
128  template<typename NeighborSearchPolicy>
129  void GetNeighborhood(const arma::Col<size_t>& users,
130  const size_t numUsersForSimilarity,
131  arma::Mat<size_t>& neighborhood,
132  arma::mat& similarities) const
133  {
134  // We want to avoid calculating the full rating matrix, so we will do
135  // nearest neighbor search only on the H matrix, using the observation that
136  // if the rating matrix X = W*H, then d(X.col(i), X.col(j)) = d(W H.col(i),
137  // W H.col(j)). This can be seen as nearest neighbor search on the H
138  // matrix with the Mahalanobis distance where M^{-1} = W^T W. So, we'll
139  // decompose M^{-1} = L L^T (the Cholesky decomposition), and then multiply
140  // H by L^T. Then we can perform nearest neighbor search.
141  arma::mat l = arma::chol(w.t() * w);
142  arma::mat stretchedH = l * h; // Due to the Armadillo API, l is L^T.
143 
144  // Temporarily store feature vector of queried users.
145  arma::mat query(stretchedH.n_rows, users.n_elem);
146  // Select feature vectors of queried users.
147  for (size_t i = 0; i < users.n_elem; ++i)
148  query.col(i) = stretchedH.col(users(i));
149 
150  NeighborSearchPolicy neighborSearch(stretchedH);
151  neighborSearch.Search(
152  query, numUsersForSimilarity, neighborhood, similarities);
153  }
154 
156  const arma::mat& W() const { return w; }
158  const arma::mat& H() const { return h; }
159 
161  size_t IteratedPower() const { return iteratedPower; }
163  size_t& IteratedPower() { return iteratedPower; }
164 
166  size_t MaxIterations() const { return maxIterations; }
168  size_t& MaxIterations() { return maxIterations; }
169 
173  template<typename Archive>
174  void serialize(Archive& ar, const uint32_t /* version */)
175  {
176  ar(CEREAL_NVP(w));
177  ar(CEREAL_NVP(h));
178  }
179 
180  private:
182  size_t iteratedPower;
184  size_t maxIterations;
186  arma::mat w;
188  arma::mat h;
189 };
190 
191 } // namespace cf
192 } // namespace mlpack
193 
194 #endif
const arma::mat & W() const
Get the Item Matrix.
void serialize(Archive &ar, const uint32_t)
Serialization.
Linear algebra utility functions, generally performed on matrices or vectors.
double GetRating(const size_t user, const size_t item) const
Return predicted rating given user ID and item ID.
void Apply(const MatType &, const arma::sp_mat &cleanedData, const size_t rank, const size_t maxIterations, const double, const bool)
Apply Collaborative Filtering to the provided data set using the randomized SVD.
size_t IteratedPower() const
Get the size of the normalized power iterations.
The core includes that mlpack expects; standard C++ includes and Armadillo.
void GetRatingOfUser(const size_t user, arma::vec &rating) const
Get predicted ratings for a user.
const arma::mat & H() const
Get the User Matrix.
Randomized SVD is a matrix factorization that is based on randomized matrix approximation techniques...
void GetNeighborhood(const arma::Col< size_t > &users, const size_t numUsersForSimilarity, arma::Mat< size_t > &neighborhood, arma::mat &similarities) const
Get the neighborhood and corresponding similarities for a set of users.
size_t MaxIterations() const
Get the number of iterations.
RandomizedSVDPolicy(const size_t iteratedPower=0, const size_t maxIterations=2)
Use randomized SVD method to perform collaborative filtering.
size_t & IteratedPower()
Modify the size of the normalized power iterations.
Implementation of the Randomized SVD policy to act as a wrapper when accessing Randomized SVD from wi...
size_t & MaxIterations()
Modify the number of iterations.
void Apply(const arma::sp_mat &data, arma::mat &u, arma::vec &s, arma::mat &v, const size_t rank)
Center the data to apply Principal Component Analysis on given sparse matrix dataset using randomized...