7 #ifndef MLPACK_METHODS_KMEANS_KMEANS_PLUS_PLUS_INITIALIZATION_HPP 8 #define MLPACK_METHODS_KMEANS_KMEANS_PLUS_PLUS_INITIALIZATION_HPP 46 template<
typename MatType>
47 inline static void Cluster(
const MatType& data,
48 const size_t clusters,
51 centroids.set_size(data.n_rows, clusters);
55 centroids.col(0) = data.col(firstPoint);
58 arma::vec distribution(data.n_cols);
61 for (
size_t i = 1; i < clusters; ++i)
70 for (
size_t p = 0; p < data.n_cols; ++p)
72 double minDistance = std::numeric_limits<double>::max();
73 for (
size_t j = 0; j < i; ++j)
75 const double distance =
78 minDistance = std::min(distance, minDistance);
81 distribution[p] = minDistance;
86 distribution /= arma::accu(distribution);
89 for (
size_t j = 1; j < distribution.n_elem; ++j)
90 distribution[j] += distribution[j - 1];
94 const double* elem = std::lower_bound(distribution.begin(),
95 distribution.end(), sampleValue);
96 const size_t position = (size_t)
97 (elem - distribution.begin()) /
sizeof(
double);
98 centroids.col(i) = data.col(position);
This class implements the k-means++ initialization, as described in the following paper: ...
The core includes that mlpack expects; standard C++ includes and Armadillo.
KMeansPlusPlusInitialization()
Empty constructor, required by the InitialPartitionPolicy type definition.
static VecTypeA::elem_type Evaluate(const VecTypeA &a, const VecTypeB &b)
Computes the distance between two points.
double Random()
Generates a uniform random number between 0 and 1.
int RandInt(const int hiExclusive)
Generates a uniform random integer.
static void Cluster(const MatType &data, const size_t clusters, arma::mat ¢roids)
Initialize the centroids matrix by randomly sampling points from the data matrix. ...