spill_tree.hpp
Go to the documentation of this file.
1 
11 #ifndef MLPACK_CORE_TREE_SPILL_TREE_SPILL_TREE_HPP
12 #define MLPACK_CORE_TREE_SPILL_TREE_SPILL_TREE_HPP
13 
14 #include <mlpack/prereqs.hpp>
15 #include "../space_split/midpoint_space_split.hpp"
16 #include "../statistic.hpp"
17 
18 namespace mlpack {
19 namespace tree {
20 
66 template<typename MetricType,
67  typename StatisticType = EmptyStatistic,
68  typename MatType = arma::mat,
69  template<typename HyperplaneMetricType>
70  class HyperplaneType = AxisOrthogonalHyperplane,
71  template<typename SplitMetricType, typename SplitMatType>
72  class SplitType = MidpointSpaceSplit>
73 class SpillTree
74 {
75  public:
77  typedef MatType Mat;
79  typedef typename MatType::elem_type ElemType;
81  typedef typename HyperplaneType<MetricType>::BoundType BoundType;
82 
83  private:
85  SpillTree* left;
87  SpillTree* right;
89  SpillTree* parent;
92  size_t count;
95  arma::Col<size_t>* pointsIndex;
97  bool overlappingNode;
99  HyperplaneType<MetricType> hyperplane;
101  BoundType bound;
103  StatisticType stat;
105  ElemType parentDistance;
108  ElemType furthestDescendantDistance;
110  ElemType minimumBoundDistance;
113  const MatType* dataset;
115  bool localDataset;
116 
121  template<typename RuleType, bool Defeatist = false>
123 
128  template<typename RuleType, bool Defeatist = false>
130 
131  public:
133  template<typename RuleType>
135 
137  template<typename RuleType>
139 
141  template<typename RuleType>
143 
145  template<typename RuleType>
147 
158  SpillTree(const MatType& data,
159  const double tau = 0,
160  const size_t maxLeafSize = 20,
161  const double rho = 0.7);
162 
174  SpillTree(MatType&& data,
175  const double tau = 0,
176  const size_t maxLeafSize = 20,
177  const double rho = 0.7);
178 
190  SpillTree(SpillTree* parent,
191  arma::Col<size_t>& points,
192  const double tau = 0,
193  const size_t maxLeafSize = 20,
194  const double rho = 0.7);
195 
202  SpillTree(const SpillTree& other);
203 
210  SpillTree(SpillTree&& other);
211 
217  SpillTree& operator=(const SpillTree& other);
218 
224  SpillTree& operator=(SpillTree&& other);
225 
231  template<typename Archive>
232  SpillTree(
233  Archive& ar,
234  const typename std::enable_if_t<cereal::is_loading<Archive>()>* = 0);
235 
241  ~SpillTree();
242 
244  const BoundType& Bound() const { return bound; }
246  BoundType& Bound() { return bound; }
247 
249  const StatisticType& Stat() const { return stat; }
251  StatisticType& Stat() { return stat; }
252 
254  bool IsLeaf() const;
255 
257  SpillTree* Left() const { return left; }
259  SpillTree*& Left() { return left; }
260 
262  SpillTree* Right() const { return right; }
264  SpillTree*& Right() { return right; }
265 
267  SpillTree* Parent() const { return parent; }
269  SpillTree*& Parent() { return parent; }
270 
272  const MatType& Dataset() const { return *dataset; }
273 
275  bool Overlap() const { return overlappingNode; }
276 
278  const HyperplaneType<MetricType>& Hyperplane() const { return hyperplane; }
279 
281  MetricType Metric() const { return MetricType(); }
282 
284  size_t NumChildren() const;
285 
292  template<typename VecType>
293  size_t GetNearestChild(
294  const VecType& point,
296 
303  template<typename VecType>
304  size_t GetFurthestChild(
305  const VecType& point,
307 
314  size_t GetNearestChild(const SpillTree& queryNode);
315 
322  size_t GetFurthestChild(const SpillTree& queryNode);
323 
328  ElemType FurthestPointDistance() const;
329 
337  ElemType FurthestDescendantDistance() const;
338 
340  ElemType MinimumBoundDistance() const;
341 
344  ElemType ParentDistance() const { return parentDistance; }
347  ElemType& ParentDistance() { return parentDistance; }
348 
355  SpillTree& Child(const size_t child) const;
356 
357  SpillTree*& ChildPtr(const size_t child)
358  { return (child == 0) ? left : right; }
359 
361  size_t NumPoints() const;
362 
368  size_t NumDescendants() const;
369 
377  size_t Descendant(const size_t index) const;
378 
387  size_t Point(const size_t index) const;
388 
390  ElemType MinDistance(const SpillTree& other) const
391  {
392  return bound.MinDistance(other.Bound());
393  }
394 
396  ElemType MaxDistance(const SpillTree& other) const
397  {
398  return bound.MaxDistance(other.Bound());
399  }
400 
403  {
404  return bound.RangeDistance(other.Bound());
405  }
406 
408  template<typename VecType>
409  ElemType MinDistance(const VecType& point,
411  const
412  {
413  return bound.MinDistance(point);
414  }
415 
417  template<typename VecType>
418  ElemType MaxDistance(const VecType& point,
420  const
421  {
422  return bound.MaxDistance(point);
423  }
424 
426  template<typename VecType>
428  RangeDistance(const VecType& point,
429  typename std::enable_if_t<IsVector<VecType>::value>* = 0) const
430  {
431  return bound.RangeDistance(point);
432  }
433 
435  static bool HasSelfChildren() { return false; }
436 
438  void Center(arma::vec& center) { bound.Center(center); }
439 
440  private:
449  void SplitNode(arma::Col<size_t>& points,
450  const size_t maxLeafSize,
451  const double tau,
452  const double rho);
453 
464  bool SplitPoints(const double tau,
465  const double rho,
466  const arma::Col<size_t>& points,
467  arma::Col<size_t>& leftPoints,
468  arma::Col<size_t>& rightPoints);
469  protected:
476  SpillTree();
477 
479  friend class cereal::access;
480 
481  public:
485  template<typename Archive>
486  void serialize(Archive& ar, const uint32_t version);
487 };
488 
489 } // namespace tree
490 } // namespace mlpack
491 
492 // Include implementation.
493 #include "spill_tree_impl.hpp"
494 
495 // Include everything else, if necessary.
496 #include "../spill_tree.hpp"
497 
498 #endif
SpillTree & Child(const size_t child) const
Return the specified child (0 will be left, 1 will be right).
math::RangeType< ElemType > RangeDistance(const SpillTree &other) const
Return the minimum and maximum distance to another node.
Definition: spill_tree.hpp:402
static bool HasSelfChildren()
Returns false: this tree type does not have self children.
Definition: spill_tree.hpp:435
SpillTree & operator=(const SpillTree &other)
Copy the given Spill Tree.
ElemType MinimumBoundDistance() const
Return the minimum distance from the center of the node to any bound edge.
HyperplaneBase< bound::HRectBound< MetricType >, AxisParallelProjVector > AxisOrthogonalHyperplane
AxisOrthogonalHyperplane represents a hyperplane orthogonal to an axis.
Definition: hyperplane.hpp:145
typename enable_if< B, T >::type enable_if_t
Definition: prereqs.hpp:70
ElemType ParentDistance() const
Return the distance from the center of this node to the center of the parent node.
Definition: spill_tree.hpp:344
ElemType FurthestDescendantDistance() const
Return the furthest possible descendant distance.
Linear algebra utility functions, generally performed on matrices or vectors.
MetricType Metric() const
Get the metric that the tree uses.
Definition: spill_tree.hpp:281
ElemType FurthestPointDistance() const
Return the furthest distance to a point held in this node.
The core includes that mlpack expects; standard C++ includes and Armadillo.
A generic single-tree traverser for hybrid spill trees; see spill_single_tree_traverser.hpp for implementation.
size_t NumChildren() const
Return the number of children in this node.
SpillTree()
A default constructor.
math::RangeType< ElemType > RangeDistance(const VecType &point, typename std::enable_if_t< IsVector< VecType >::value > *=0) const
Return the minimum and maximum distance to another point.
Definition: spill_tree.hpp:428
SpillTree * Left() const
Gets the left child of this node.
Definition: spill_tree.hpp:257
SpillTree *& Parent()
Modify the parent of this node.
Definition: spill_tree.hpp:269
size_t Descendant(const size_t index) const
Return the index (with reference to the dataset) of a particular descendant of this node...
StatisticType & Stat()
Return the statistic object for this node.
Definition: spill_tree.hpp:251
A hybrid spill tree is a variant of binary space trees in which the children of a node can "spill ove...
Definition: spill_tree.hpp:73
A generic dual-tree traverser for hybrid spill trees; see spill_dual_tree_traverser.hpp for implementation.
const BoundType & Bound() const
Return the bound object for this node.
Definition: spill_tree.hpp:244
SpillTree * Right() const
Gets the right child of this node.
Definition: spill_tree.hpp:262
MatType Mat
So other classes can use TreeType::Mat.
Definition: spill_tree.hpp:77
~SpillTree()
Deletes this node, deallocating the memory for the children and calling their destructors in turn...
size_t NumPoints() const
Return the number of points in this node (0 if not a leaf).
size_t GetFurthestChild(const VecType &point, typename std::enable_if_t< IsVector< VecType >::value > *=0)
Return the index of the furthest child node to the given query point (this is an efficient estimation...
void Center(arma::vec &center)
Store the center of the bounding region in the given vector.
Definition: spill_tree.hpp:438
SpillTree *& Left()
Modify the left child of this node.
Definition: spill_tree.hpp:259
ElemType MaxDistance(const VecType &point, typename std::enable_if_t< IsVector< VecType >::value > *=0) const
Return the maximum distance to another point.
Definition: spill_tree.hpp:418
const HyperplaneType< MetricType > & Hyperplane() const
Get the Hyperplane instance.
Definition: spill_tree.hpp:278
bool Overlap() const
Distinguish overlapping nodes from non-overlapping nodes.
Definition: spill_tree.hpp:275
ElemType MinDistance(const VecType &point, typename std::enable_if_t< IsVector< VecType >::value > *=0) const
Return the minimum distance to another point.
Definition: spill_tree.hpp:409
ElemType & ParentDistance()
Modify the distance from the center of this node to the center of the parent node.
Definition: spill_tree.hpp:347
const MatType & Dataset() const
Get the dataset which the tree is built on.
Definition: spill_tree.hpp:272
ElemType MinDistance(const SpillTree &other) const
Return the minimum distance to another node.
Definition: spill_tree.hpp:390
ElemType MaxDistance(const SpillTree &other) const
Return the maximum distance to another node.
Definition: spill_tree.hpp:396
void serialize(Archive &ar, const uint32_t version)
Serialize the tree.
BoundType & Bound()
Return the bound object for this node.
Definition: spill_tree.hpp:246
size_t GetNearestChild(const VecType &point, typename std::enable_if_t< IsVector< VecType >::value > *=0)
Return the index of the nearest child node to the given query point (this is an efficient estimation ...
const StatisticType & Stat() const
Return the statistic object for this node.
Definition: spill_tree.hpp:249
bool IsLeaf() const
Return whether or not this node is a leaf (true if it has no children).
size_t NumDescendants() const
Return the number of descendants of this node.
SpillTree * Parent() const
Gets the parent of this node.
Definition: spill_tree.hpp:267
size_t Point(const size_t index) const
Return the index (with reference to the dataset) of a particular point in this node.
HyperplaneType< MetricType >::BoundType BoundType
The bound type.
Definition: spill_tree.hpp:81
SpillTree *& ChildPtr(const size_t child)
Definition: spill_tree.hpp:357
MatType::elem_type ElemType
The type of element held in MatType.
Definition: spill_tree.hpp:79
SpillTree *& Right()
Modify the right child of this node.
Definition: spill_tree.hpp:264
If value == true, then VecType is some sort of Armadillo vector or subview.
Definition: arma_traits.hpp:35