refined_start.hpp
Go to the documentation of this file.
1 
14 #ifndef MLPACK_METHODS_KMEANS_REFINED_START_HPP
15 #define MLPACK_METHODS_KMEANS_REFINED_START_HPP
16 
17 #include <mlpack/prereqs.hpp>
18 
19 namespace mlpack {
20 namespace kmeans {
21 
40 {
41  public:
47  RefinedStart(const size_t samplings = 100,
48  const double percentage = 0.02) :
49  samplings(samplings), percentage(percentage) { }
50 
61  template<typename MatType>
62  void Cluster(const MatType& data,
63  const size_t clusters,
64  arma::mat& centroids) const;
65 
77  template<typename MatType>
78  void Cluster(const MatType& data,
79  const size_t clusters,
80  arma::Row<size_t>& assignments) const;
81 
83  size_t Samplings() const { return samplings; }
85  size_t& Samplings() { return samplings; }
86 
88  double Percentage() const { return percentage; }
90  double& Percentage() { return percentage; }
91 
93  template<typename Archive>
94  void serialize(Archive& ar, const uint32_t /* version */)
95  {
96  ar(CEREAL_NVP(samplings));
97  ar(CEREAL_NVP(percentage));
98  }
99 
100  private:
102  size_t samplings;
104  double percentage;
105 };
106 
107 } // namespace kmeans
108 } // namespace mlpack
109 
110 // Include implementation.
111 #include "refined_start_impl.hpp"
112 
113 #endif
Linear algebra utility functions, generally performed on matrices or vectors.
RefinedStart(const size_t samplings=100, const double percentage=0.02)
Create the RefinedStart object, optionally specifying parameters for the number of samplings to perfo...
The core includes that mlpack expects; standard C++ includes and Armadillo.
double & Percentage()
Modify the percentage of the data used by each subsampling.
void serialize(Archive &ar, const uint32_t)
Serialize the object.
double Percentage() const
Get the percentage of the data used by each subsampling.
void Cluster(const MatType &data, const size_t clusters, arma::mat &centroids) const
Partition the given dataset into the given number of clusters according to the random sampling scheme...
size_t & Samplings()
Modify the number of samplings that will be performed.
A refined approach for choosing initial points for k-means clustering.
size_t Samplings() const
Get the number of samplings that will be performed.