dual_tree_kmeans.hpp
Go to the documentation of this file.
1 
15 #ifndef MLPACK_METHODS_KMEANS_DUAL_TREE_KMEANS_HPP
16 #define MLPACK_METHODS_KMEANS_DUAL_TREE_KMEANS_HPP
17 
21 
23 
24 namespace mlpack {
25 namespace kmeans {
26 
34 template<
35  typename MetricType,
36  typename MatType,
37  template<typename TreeMetricType,
38  typename TreeStatType,
39  typename TreeMatType>
40  class TreeType = tree::KDTree>
42 {
43  public:
45  typedef TreeType<MetricType, DualTreeKMeansStatistic, MatType> Tree;
46 
47  template<typename TreeMetricType,
48  typename IgnoredStatType,
49  typename TreeMatType>
50  using NNSTreeType =
51  TreeType<TreeMetricType, DualTreeKMeansStatistic, TreeMatType>;
52 
57  DualTreeKMeans(const MatType& dataset, MetricType& metric);
58 
63 
72  double Iterate(const arma::mat& centroids,
73  arma::mat& newCentroids,
74  arma::Col<size_t>& counts);
75 
77  size_t DistanceCalculations() const { return distanceCalculations; }
79  size_t& DistanceCalculations() { return distanceCalculations; }
80 
81  private:
83  const MatType& datasetOrig; // Maybe not necessary.
85  Tree* tree;
87  const MatType& dataset;
89  MetricType metric;
90 
92  size_t distanceCalculations;
94  size_t iteration;
95 
97  arma::vec upperBounds;
99  arma::vec lowerBounds;
101  std::vector<bool> prunedPoints;
102 
103  arma::Row<size_t> assignments;
104 
105  std::vector<bool> visited; // Was the point visited this iteration?
106 
107  arma::mat lastIterationCentroids; // For sanity checks.
108 
109  arma::vec clusterDistances; // The amount the clusters moved last iteration.
110 
111  arma::mat interclusterDistances; // Static storage for intercluster distances.
112 
115  void UpdateTree(Tree& node,
116  const arma::mat& centroids,
117  const double parentUpperBound = 0.0,
118  const double adjustedParentUpperBound = DBL_MAX,
119  const double parentLowerBound = DBL_MAX,
120  const double adjustedParentLowerBound = 0.0);
121 
123  void ExtractCentroids(Tree& node,
124  arma::mat& newCentroids,
125  arma::Col<size_t>& newCounts,
126  const arma::mat& centroids);
127 
128  void CoalesceTree(Tree& node, const size_t child = 0);
129  void DecoalesceTree(Tree& node);
130 };
131 
134 template<typename TreeType>
135 void HideChild(TreeType& node,
136  const size_t child,
137  const typename std::enable_if_t<
139 
143 template<typename TreeType>
144 void HideChild(TreeType& node,
145  const size_t child,
146  const typename std::enable_if_t<
148 
150 template<typename TreeType>
151 void RestoreChildren(TreeType& node,
152  const typename std::enable_if_t<!tree::TreeTraits<
153  TreeType>::BinaryTree>* junk = 0);
154 
156 template<typename TreeType>
157 void RestoreChildren(TreeType& node,
158  const typename std::enable_if_t<tree::TreeTraits<
159  TreeType>::BinaryTree>* junk = 0);
160 
163 template<typename MetricType, typename MatType>
165 
168 template<typename MetricType, typename MatType>
169 using CoverTreeDualTreeKMeans = DualTreeKMeans<MetricType, MatType,
171 
172 } // namespace kmeans
173 } // namespace mlpack
174 
175 #include "dual_tree_kmeans_impl.hpp"
176 
177 #endif
void HideChild(TreeType &node, const size_t child, const typename std::enable_if_t< !tree::TreeTraits< TreeType >::BinaryTree > *junk=0)
Utility function for hiding children.
typename enable_if< B, T >::type enable_if_t
Definition: prereqs.hpp:70
Linear algebra utility functions, generally performed on matrices or vectors.
size_t & DistanceCalculations()
Modify the number of distance calculations.
void RestoreChildren(TreeType &node, const typename std::enable_if_t<!tree::TreeTraits< TreeType >::BinaryTree > *junk=0)
Utility function for restoring children to a non-binary tree.
An algorithm for an exact Lloyd iteration which simply uses dual-tree nearest-neighbor search to find...
size_t DistanceCalculations() const
Return the number of distance calculations.
TreeType< MetricType, DualTreeKMeansStatistic, MatType > Tree
Convenience typedef.
TreeType< TreeMetricType, DualTreeKMeansStatistic, TreeMatType > NNSTreeType
The TreeTraits class provides compile-time information on the characteristics of a given tree type...
Definition: tree_traits.hpp:77
~DualTreeKMeans()
Delete the tree constructed by the DualTreeKMeans object.
double Iterate(const arma::mat &centroids, arma::mat &newCentroids, arma::Col< size_t > &counts)
Run a single iteration of the dual-tree nearest neighbor algorithm for k-means, updating the given ce...
BinarySpaceTree< MetricType, StatisticType, MatType, bound::HRectBound, MidpointSplit > KDTree
The standard midpoint-split kd-tree.
Definition: typedef.hpp:63
DualTreeKMeans(const MatType &dataset, MetricType &metric)
Construct the DualTreeKMeans object, which will construct a tree on the points.
A cover tree is a tree specifically designed to speed up nearest-neighbor computation in high-dimensi...
Definition: cover_tree.hpp:99